1//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def FlatOffset : ComplexPattern<iPTR, 2, "SelectFlatOffset", [], [SDNPWantRoot], -10>; 10def GlobalOffset : ComplexPattern<iPTR, 2, "SelectGlobalOffset", [], [SDNPWantRoot], -10>; 11def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [SDNPWantRoot], -10>; 12 13def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>; 14def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>; 15def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [SDNPWantRoot], -10>; 16 17//===----------------------------------------------------------------------===// 18// FLAT classes 19//===----------------------------------------------------------------------===// 20 21class FLAT_Pseudo<string opName, dag outs, dag ins, 22 string asmOps, list<dag> pattern=[]> : 23 InstSI<outs, ins, "", pattern>, 24 SIMCInstr<opName, SIEncodingFamily.NONE> { 25 26 let isPseudo = 1; 27 let isCodeGenOnly = 1; 28 29 let FLAT = 1; 30 31 let UseNamedOperandTable = 1; 32 let hasSideEffects = 0; 33 let SchedRW = [WriteVMEM]; 34 35 string Mnemonic = opName; 36 string AsmOperands = asmOps; 37 38 bits<1> is_flat_global = 0; 39 bits<1> is_flat_scratch = 0; 40 41 bits<1> has_vdst = 1; 42 43 // We need to distinguish having saddr and enabling saddr because 44 // saddr is only valid for scratch and global instructions. Pre-gfx9 45 // these bits were reserved, so we also don't necessarily want to 46 // set these bits to the disabled value for the original flat 47 // segment instructions. 48 bits<1> has_saddr = 0; 49 bits<1> enabled_saddr = 0; 50 bits<7> saddr_value = 0; 51 bits<1> has_vaddr = 1; 52 53 bits<1> has_data = 1; 54 bits<1> has_glc = 1; 55 bits<1> glcValue = 0; 56 bits<1> has_dlc = 1; 57 bits<1> dlcValue = 0; 58 bits<1> has_sccb = 1; 59 bits<1> sccbValue = 0; 60 bits<1> has_sve = 0; // Scratch VGPR Enable 61 bits<1> lds = 0; 62 bits<1> sve = 0; 63 64 let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, 65 !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); 66 67 // TODO: M0 if it could possibly access LDS (before gfx9? only)? 68 let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]); 69 70 // Internally, FLAT instruction are executed as both an LDS and a 71 // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT 72 // and are not considered done until both have been decremented. 73 let VM_CNT = 1; 74 let LGKM_CNT = !not(!or(is_flat_global, is_flat_scratch)); 75 76 let FlatGlobal = is_flat_global; 77 78 let FlatScratch = is_flat_scratch; 79} 80 81class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 82 InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>, 83 Enc64 { 84 85 let isPseudo = 0; 86 let isCodeGenOnly = 0; 87 88 let FLAT = 1; 89 90 // copy relevant pseudo op flags 91 let SubtargetPredicate = ps.SubtargetPredicate; 92 let AsmMatchConverter = ps.AsmMatchConverter; 93 let OtherPredicates = ps.OtherPredicates; 94 let TSFlags = ps.TSFlags; 95 let UseNamedOperandTable = ps.UseNamedOperandTable; 96 let SchedRW = ps.SchedRW; 97 let mayLoad = ps.mayLoad; 98 let mayStore = ps.mayStore; 99 let IsAtomicRet = ps.IsAtomicRet; 100 let IsAtomicNoRet = ps.IsAtomicNoRet; 101 let VM_CNT = ps.VM_CNT; 102 let LGKM_CNT = ps.LGKM_CNT; 103 let VALU = ps.VALU; 104 105 // encoding fields 106 bits<8> vaddr; 107 bits<10> vdata; 108 bits<7> saddr; 109 bits<10> vdst; 110 111 bits<5> cpol; 112 113 // Only valid on gfx9 114 bits<1> lds = ps.lds; // LDS DMA for global and scratch 115 116 // Segment, 00=flat, 01=scratch, 10=global, 11=reserved 117 bits<2> seg = !if(ps.is_flat_global, 0b10, 118 !if(ps.is_flat_scratch, 0b01, 0)); 119 120 // Signed offset. Highest bit ignored for flat and treated as 12-bit 121 // unsigned for flat accesses. 122 bits<13> offset; 123 // GFX90A+ only: instruction uses AccVGPR for data 124 bits<1> acc = !if(ps.has_vdst, vdst{9}, !if(ps.has_data, vdata{9}, 0)); 125 126 // We don't use tfe right now, and it was removed in gfx9. 127 bits<1> tfe = 0; 128 129 // Only valid on GFX9+ 130 let Inst{12-0} = offset; 131 let Inst{13} = !if(ps.has_sve, ps.sve, lds); 132 let Inst{15-14} = seg; 133 134 let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); 135 let Inst{17} = cpol{CPolBit.SLC}; 136 let Inst{24-18} = op; 137 let Inst{31-26} = 0x37; // Encoding. 138 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 139 let Inst{47-40} = !if(ps.has_data, vdata{7-0}, ?); 140 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0); 141 142 // 54-48 is reserved. 143 let Inst{55} = acc; // nv on GFX9+, TFE before. AccVGPR for data on GFX90A. 144 let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, ?); 145} 146 147class GlobalSaddrTable <bit is_saddr, string Name = ""> { 148 bit IsSaddr = is_saddr; 149 string SaddrOp = Name; 150} 151 152// TODO: Is exec allowed for saddr? The disabled value 0x7f is the 153// same encoding value as exec_hi, so it isn't possible to use that if 154// saddr is 32-bit (which isn't handled here yet). 155class FLAT_Load_Pseudo <string opName, RegisterClass regClass, 156 bit HasTiedOutput = 0, 157 bit HasSaddr = 0, bit EnableSaddr = 0, 158 RegisterOperand vdata_op = getLdStRegisterOperand<regClass>.ret> : FLAT_Pseudo< 159 opName, 160 (outs vdata_op:$vdst), 161 !con( 162 !con( 163 !if(EnableSaddr, 164 (ins SReg_64:$saddr, VGPR_32:$vaddr), 165 (ins VReg_64:$vaddr)), 166 (ins flat_offset:$offset)), 167 // FIXME: Operands with default values do not work with following non-optional operands. 168 !if(HasTiedOutput, (ins CPol:$cpol, vdata_op:$vdst_in), 169 (ins CPol_0:$cpol))), 170 " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { 171 let has_data = 0; 172 let mayLoad = 1; 173 let has_saddr = HasSaddr; 174 let enabled_saddr = EnableSaddr; 175 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 176 let maybeAtomic = 1; 177 178 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 179 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 180} 181 182class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, 183 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 184 opName, 185 (outs), 186 !con( 187 !if(EnableSaddr, 188 (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr), 189 (ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)), 190 (ins flat_offset:$offset, CPol_0:$cpol)), 191 " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { 192 let mayLoad = 0; 193 let mayStore = 1; 194 let has_vdst = 0; 195 let has_saddr = HasSaddr; 196 let enabled_saddr = EnableSaddr; 197 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 198 let maybeAtomic = 1; 199} 200 201multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> { 202 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 203 def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>, 204 GlobalSaddrTable<0, opName>; 205 def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>, 206 GlobalSaddrTable<1, opName>; 207 } 208} 209 210class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass, 211 bit HasTiedOutput = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 212 opName, 213 (outs regClass:$vdst), 214 !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)), 215 (ins flat_offset:$offset, CPol_0:$cpol), 216 !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), 217 " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 218 let is_flat_global = 1; 219 let has_data = 0; 220 let mayLoad = 1; 221 let has_vaddr = 0; 222 let has_saddr = 1; 223 let enabled_saddr = EnableSaddr; 224 let maybeAtomic = 1; 225 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 226 227 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 228 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 229} 230 231multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass, 232 bit HasTiedOutput = 0> { 233 def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput>, 234 GlobalSaddrTable<0, opName>; 235 def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, 1>, 236 GlobalSaddrTable<1, opName>; 237} 238 239multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> { 240 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 241 def "" : FLAT_Store_Pseudo<opName, regClass, 1>, 242 GlobalSaddrTable<0, opName>; 243 def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>, 244 GlobalSaddrTable<1, opName>; 245 } 246} 247 248class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo< 249 opName, 250 (outs ), 251 !con( 252 !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)), 253 (ins flat_offset:$offset, CPol_0:$cpol)), 254 " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> { 255 let LGKM_CNT = 1; 256 let is_flat_global = 1; 257 let lds = 1; 258 let has_data = 0; 259 let has_vdst = 0; 260 let mayLoad = 1; 261 let mayStore = 1; 262 let has_saddr = 1; 263 let enabled_saddr = EnableSaddr; 264 let VALU = 1; 265 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 266 let Uses = [M0, EXEC]; 267 let SchedRW = [WriteVMEM, WriteLDS]; 268} 269 270multiclass FLAT_Global_Load_LDS_Pseudo<string opName> { 271 def "" : FLAT_Global_Load_LDS_Pseudo<opName>, 272 GlobalSaddrTable<0, opName>; 273 def _SADDR : FLAT_Global_Load_LDS_Pseudo<opName, 1>, 274 GlobalSaddrTable<1, opName>; 275} 276 277class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass, 278 bit EnableSaddr = 0> : FLAT_Pseudo< 279 opName, 280 (outs), 281 !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)), 282 (ins flat_offset:$offset, CPol:$cpol)), 283 " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 284 let is_flat_global = 1; 285 let mayLoad = 0; 286 let mayStore = 1; 287 let has_vdst = 0; 288 let has_vaddr = 0; 289 let has_saddr = 1; 290 let enabled_saddr = EnableSaddr; 291 let maybeAtomic = 1; 292 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 293} 294 295multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass> { 296 def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass>, 297 GlobalSaddrTable<0, opName>; 298 def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, 1>, 299 GlobalSaddrTable<1, opName>; 300} 301 302class FlatScratchInst <string sv_op, string mode> { 303 string SVOp = sv_op; 304 string Mode = mode; 305} 306 307class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass, 308 bit HasTiedOutput = 0, 309 bit EnableSaddr = 0, 310 bit EnableSVE = 0, 311 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> 312 : FLAT_Pseudo< 313 opName, 314 (outs getLdStRegisterOperand<regClass>.ret:$vdst), 315 !con( 316 !if(EnableSVE, 317 (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), 318 !if(EnableSaddr, 319 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), 320 !if(EnableVaddr, 321 (ins VGPR_32:$vaddr, flat_offset:$offset), 322 (ins flat_offset:$offset)))), 323 !if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in), 324 (ins CPol_0:$cpol))), 325 " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 326 let has_data = 0; 327 let mayLoad = 1; 328 let has_saddr = 1; 329 let enabled_saddr = EnableSaddr; 330 let has_vaddr = EnableVaddr; 331 let has_sve = EnableSVE; 332 let sve = EnableVaddr; 333 let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); 334 let maybeAtomic = 1; 335 336 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 337 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 338} 339 340class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0, 341 bit EnableSVE = 0, 342 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr)), 343 RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret> : FLAT_Pseudo< 344 opName, 345 (outs), 346 !if(EnableSVE, 347 (ins vdata_op:$vdata, VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), 348 !if(EnableSaddr, 349 (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), 350 !if(EnableVaddr, 351 (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol), 352 (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol)))), 353 " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 354 let mayLoad = 0; 355 let mayStore = 1; 356 let has_vdst = 0; 357 let has_saddr = 1; 358 let enabled_saddr = EnableSaddr; 359 let has_vaddr = EnableVaddr; 360 let has_sve = EnableSVE; 361 let sve = EnableVaddr; 362 let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); 363 let maybeAtomic = 1; 364} 365 366multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedOutput = 0> { 367 let is_flat_scratch = 1 in { 368 def "" : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput>, 369 FlatScratchInst<opName, "SV">; 370 def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>, 371 FlatScratchInst<opName, "SS">; 372 373 let SubtargetPredicate = HasFlatScratchSVSMode in 374 def _SVS : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1, 1>, 375 FlatScratchInst<opName, "SVS">; 376 377 let SubtargetPredicate = HasFlatScratchSTMode in 378 def _ST : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0, 0>, 379 FlatScratchInst<opName, "ST">; 380 } 381} 382 383multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> { 384 let is_flat_scratch = 1 in { 385 def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>, 386 FlatScratchInst<opName, "SV">; 387 def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>, 388 FlatScratchInst<opName, "SS">; 389 390 let SubtargetPredicate = HasFlatScratchSVSMode in 391 def _SVS : FLAT_Scratch_Store_Pseudo<opName, regClass, 1, 1>, 392 FlatScratchInst<opName, "SVS">; 393 394 let SubtargetPredicate = HasFlatScratchSTMode in 395 def _ST : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0, 0>, 396 FlatScratchInst<opName, "ST">; 397 } 398} 399 400class FLAT_Scratch_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0, 401 bit EnableSVE = 0, 402 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> : FLAT_Pseudo< 403 opName, 404 (outs ), 405 !if(EnableSVE, 406 (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), 407 !if(EnableSaddr, 408 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), 409 !if(EnableVaddr, 410 (ins VGPR_32:$vaddr, flat_offset:$offset, CPol:$cpol), 411 (ins flat_offset:$offset, CPol:$cpol)))), 412 " "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 413 414 let LGKM_CNT = 1; 415 let is_flat_scratch = 1; 416 let lds = 1; 417 let has_data = 0; 418 let has_vdst = 0; 419 let mayLoad = 1; 420 let mayStore = 1; 421 let has_saddr = 1; 422 let enabled_saddr = EnableSaddr; 423 let has_vaddr = EnableVaddr; 424 let has_sve = EnableSVE; 425 let sve = EnableVaddr; 426 let VALU = 1; 427 let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); 428 let Uses = [M0, EXEC]; 429 let SchedRW = [WriteVMEM, WriteLDS]; 430} 431 432multiclass FLAT_Scratch_Load_LDS_Pseudo<string opName> { 433 def "" : FLAT_Scratch_Load_LDS_Pseudo<opName>, 434 FlatScratchInst<opName, "SV">; 435 def _SADDR : FLAT_Scratch_Load_LDS_Pseudo<opName, 1>, 436 FlatScratchInst<opName, "SS">; 437 def _SVS : FLAT_Scratch_Load_LDS_Pseudo<opName, 1, 1>, 438 FlatScratchInst<opName, "SVS">; 439 def _ST : FLAT_Scratch_Load_LDS_Pseudo<opName, 0, 0, 0>, 440 FlatScratchInst<opName, "ST">; 441} 442 443class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins, 444 string asm, list<dag> pattern = []> : 445 FLAT_Pseudo<opName, outs, ins, asm, pattern> { 446 let mayLoad = 1; 447 let mayStore = 1; 448 let has_glc = 0; 449 let glcValue = 0; 450 let has_vdst = 0; 451 let has_sccb = 1; 452 let sccbValue = 0; 453 let maybeAtomic = 1; 454 let IsAtomicNoRet = 1; 455} 456 457class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins, 458 string asm, list<dag> pattern = []> 459 : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> { 460 let hasPostISelHook = 1; 461 let has_vdst = 1; 462 let glcValue = 1; 463 let sccbValue = 0; 464 let IsAtomicNoRet = 0; 465 let IsAtomicRet = 1; 466 let PseudoInstr = NAME # "_RTN"; 467} 468 469multiclass FLAT_Atomic_Pseudo< 470 string opName, 471 RegisterClass vdst_rc, 472 ValueType vt, 473 ValueType data_vt = vt, 474 RegisterClass data_rc = vdst_rc, 475 bit isFP = isFloatType<data_vt>.ret, 476 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 477 def "" : FLAT_AtomicNoRet_Pseudo <opName, 478 (outs), 479 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), 480 " $vaddr, $vdata$offset$cpol">, 481 GlobalSaddrTable<0, opName>, 482 AtomicNoRet <opName, 0> { 483 let PseudoInstr = NAME; 484 let FPAtomic = isFP; 485 let AddedComplexity = -1; // Prefer global atomics if available 486 } 487 488 def _RTN : FLAT_AtomicRet_Pseudo <opName, 489 (outs getLdStRegisterOperand<vdst_rc>.ret:$vdst), 490 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), 491 " $vdst, $vaddr, $vdata$offset$cpol">, 492 GlobalSaddrTable<0, opName#"_rtn">, 493 AtomicNoRet <opName, 1> { 494 let FPAtomic = isFP; 495 let AddedComplexity = -1; // Prefer global atomics if available 496 } 497} 498 499multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< 500 string opName, 501 RegisterClass vdst_rc, 502 ValueType vt, 503 ValueType data_vt = vt, 504 RegisterClass data_rc = vdst_rc, 505 bit isFP = isFloatType<data_vt>.ret, 506 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 507 508 def "" : FLAT_AtomicNoRet_Pseudo <opName, 509 (outs), 510 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), 511 " $vaddr, $vdata, off$offset$cpol">, 512 GlobalSaddrTable<0, opName>, 513 AtomicNoRet <opName, 0> { 514 let has_saddr = 1; 515 let PseudoInstr = NAME; 516 let FPAtomic = isFP; 517 } 518 519 def _SADDR : FLAT_AtomicNoRet_Pseudo <opName, 520 (outs), 521 (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol), 522 " $vaddr, $vdata, $saddr$offset$cpol">, 523 GlobalSaddrTable<1, opName>, 524 AtomicNoRet <opName#"_saddr", 0> { 525 let has_saddr = 1; 526 let enabled_saddr = 1; 527 let PseudoInstr = NAME#"_SADDR"; 528 let FPAtomic = isFP; 529 } 530} 531 532multiclass FLAT_Global_Atomic_Pseudo_RTN< 533 string opName, 534 RegisterClass vdst_rc, 535 ValueType vt, 536 ValueType data_vt = vt, 537 RegisterClass data_rc = vdst_rc, 538 bit isFP = isFloatType<data_vt>.ret, 539 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret, 540 RegisterOperand vdst_op = getLdStRegisterOperand<vdst_rc>.ret> { 541 542 def _RTN : FLAT_AtomicRet_Pseudo <opName, 543 (outs vdst_op:$vdst), 544 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), 545 " $vdst, $vaddr, $vdata, off$offset$cpol">, 546 GlobalSaddrTable<0, opName#"_rtn">, 547 AtomicNoRet <opName, 1> { 548 let has_saddr = 1; 549 let FPAtomic = isFP; 550 } 551 552 def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName, 553 (outs vdst_op:$vdst), 554 (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol), 555 " $vdst, $vaddr, $vdata, $saddr$offset$cpol">, 556 GlobalSaddrTable<1, opName#"_rtn">, 557 AtomicNoRet <opName#"_saddr", 1> { 558 let has_saddr = 1; 559 let enabled_saddr = 1; 560 let PseudoInstr = NAME#"_SADDR_RTN"; 561 let FPAtomic = isFP; 562 } 563} 564 565multiclass FLAT_Global_Atomic_Pseudo< 566 string opName, 567 RegisterClass vdst_rc, 568 ValueType vt, 569 ValueType data_vt = vt, 570 RegisterClass data_rc = vdst_rc> { 571 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 572 defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc>; 573 defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc>; 574 } 575} 576 577//===----------------------------------------------------------------------===// 578// Flat Instructions 579//===----------------------------------------------------------------------===// 580 581def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>; 582def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>; 583def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>; 584def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>; 585def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>; 586def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>; 587def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>; 588def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>; 589 590def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>; 591def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>; 592def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>; 593def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>; 594def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>; 595def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>; 596 597let SubtargetPredicate = HasD16LoadStore in { 598let TiedSourceNotRead = 1 in { 599def FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>; 600def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>; 601def FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>; 602def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>; 603def FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>; 604def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>; 605} 606 607def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>; 608def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>; 609} 610 611defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", 612 VGPR_32, i32, v2i32, VReg_64>; 613 614defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", 615 VReg_64, i64, v2i64, VReg_128>; 616 617defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", 618 VGPR_32, i32>; 619 620defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2", 621 VReg_64, i64>; 622 623defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add", 624 VGPR_32, i32>; 625 626defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub", 627 VGPR_32, i32>; 628 629defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin", 630 VGPR_32, i32>; 631 632defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin", 633 VGPR_32, i32>; 634 635defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax", 636 VGPR_32, i32>; 637 638defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax", 639 VGPR_32, i32>; 640 641defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and", 642 VGPR_32, i32>; 643 644defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or", 645 VGPR_32, i32>; 646 647defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor", 648 VGPR_32, i32>; 649 650defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc", 651 VGPR_32, i32>; 652 653defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec", 654 VGPR_32, i32>; 655 656defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2", 657 VReg_64, i64>; 658 659defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2", 660 VReg_64, i64>; 661 662defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2", 663 VReg_64, i64>; 664 665defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2", 666 VReg_64, i64>; 667 668defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2", 669 VReg_64, i64>; 670 671defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2", 672 VReg_64, i64>; 673 674defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2", 675 VReg_64, i64>; 676 677defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2", 678 VReg_64, i64>; 679 680defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2", 681 VReg_64, i64>; 682 683defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", 684 VReg_64, i64>; 685 686defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", 687 VReg_64, i64>; 688 689// GFX7-, GFX10-only flat instructions. 690let SubtargetPredicate = isGFX7GFX10 in { 691 692defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", 693 VReg_64, f64, v2f64, VReg_128>; 694 695defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2", 696 VReg_64, f64>; 697 698defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", 699 VReg_64, f64>; 700 701} // End SubtargetPredicate = isGFX7GFX10 702 703let SubtargetPredicate = isGFX90APlus in { 704 defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>; 705 defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64>; 706 defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64>; 707 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>; 708 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>; 709 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>; 710} // End SubtargetPredicate = isGFX90APlus 711 712let SubtargetPredicate = isGFX940Plus in { 713 defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>; 714 defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2f16>; 715 defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2f16>; 716} // End SubtargetPredicate = isGFX940Plus 717 718// GFX7-, GFX10-, GFX11-only flat instructions. 719let SubtargetPredicate = isGFX7GFX10GFX11 in { 720 721defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", 722 VGPR_32, f32, v2f32, VReg_64>; 723 724defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", 725 VGPR_32, f32>; 726 727defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", 728 VGPR_32, f32>; 729 730} // End SubtargetPredicate = isGFX7GFX10GFX11 731 732// GFX940-, GFX11-only flat instructions. 733let SubtargetPredicate = HasFlatAtomicFaddF32Inst in { 734 defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>; 735} // End SubtargetPredicate = HasFlatAtomicFaddF32Inst 736 737defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; 738defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; 739defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; 740defm GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>; 741defm GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>; 742defm GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>; 743defm GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>; 744defm GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>; 745 746let TiedSourceNotRead = 1 in { 747defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>; 748defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>; 749defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>; 750defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>; 751defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>; 752defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>; 753} 754 755let OtherPredicates = [HasGFX10_BEncoding] in 756defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>; 757 758defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>; 759defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>; 760defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>; 761defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>; 762defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>; 763defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>; 764let OtherPredicates = [HasGFX10_BEncoding] in 765defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>; 766 767defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>; 768defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>; 769 770let is_flat_global = 1 in { 771defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap", 772 VGPR_32, i32, v2i32, VReg_64>; 773 774defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2", 775 VReg_64, i64, v2i64, VReg_128>; 776 777defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", 778 VGPR_32, i32>; 779 780defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", 781 VReg_64, i64>; 782 783defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", 784 VGPR_32, i32>; 785 786defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", 787 VGPR_32, i32>; 788 789defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", 790 VGPR_32, i32>; 791 792defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", 793 VGPR_32, i32>; 794 795defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", 796 VGPR_32, i32>; 797 798defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", 799 VGPR_32, i32>; 800 801defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", 802 VGPR_32, i32>; 803 804defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", 805 VGPR_32, i32>; 806 807defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", 808 VGPR_32, i32>; 809 810defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", 811 VGPR_32, i32>; 812 813defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", 814 VGPR_32, i32>; 815 816defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", 817 VReg_64, i64>; 818 819defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", 820 VReg_64, i64>; 821 822defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", 823 VReg_64, i64>; 824 825defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", 826 VReg_64, i64>; 827 828defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", 829 VReg_64, i64>; 830 831defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", 832 VReg_64, i64>; 833 834defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", 835 VReg_64, i64>; 836 837defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", 838 VReg_64, i64>; 839 840defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", 841 VReg_64, i64>; 842 843defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", 844 VReg_64, i64>; 845 846defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", 847 VReg_64, i64>; 848 849let SubtargetPredicate = HasGFX10_BEncoding in 850defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub", 851 VGPR_32, i32>; 852 853defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">; 854defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">; 855defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ushort">; 856defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">; 857defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">; 858 859} // End is_flat_global = 1 860 861 862 863let SubtargetPredicate = HasFlatScratchInsts in { 864defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>; 865defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>; 866defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>; 867defm SCRATCH_LOAD_SSHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>; 868defm SCRATCH_LOAD_DWORD : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>; 869defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>; 870defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>; 871defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>; 872 873let TiedSourceNotRead = 1 in { 874defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>; 875defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>; 876defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>; 877defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>; 878defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>; 879defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>; 880} 881 882defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>; 883defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>; 884defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>; 885defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>; 886defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>; 887defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>; 888 889defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>; 890defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>; 891 892defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">; 893defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">; 894defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">; 895defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">; 896defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">; 897 898} // End SubtargetPredicate = HasFlatScratchInsts 899 900let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { 901 defm GLOBAL_ATOMIC_FCMPSWAP : 902 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, v2f32, VReg_64>; 903 defm GLOBAL_ATOMIC_FMIN : 904 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>; 905 defm GLOBAL_ATOMIC_FMAX : 906 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>; 907 defm GLOBAL_ATOMIC_FCMPSWAP_X2 : 908 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>; 909 defm GLOBAL_ATOMIC_FMIN_X2 : 910 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>; 911 defm GLOBAL_ATOMIC_FMAX_X2 : 912 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>; 913} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1 914 915let is_flat_global = 1 in { 916let OtherPredicates = [HasAtomicFaddNoRtnInsts] in 917 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < 918 "global_atomic_add_f32", VGPR_32, f32 919 >; 920let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in 921 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < 922 "global_atomic_pk_add_f16", VGPR_32, v2f16 923 >; 924let OtherPredicates = [HasAtomicFaddRtnInsts] in 925 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN < 926 "global_atomic_add_f32", VGPR_32, f32 927 >; 928let OtherPredicates = [isGFX90APlus] in 929 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN < 930 "global_atomic_pk_add_f16", VGPR_32, v2f16 931 >; 932} // End is_flat_global = 1 933 934//===----------------------------------------------------------------------===// 935// Flat Patterns 936//===----------------------------------------------------------------------===// 937 938// Patterns for global loads with no offset. 939class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 940 (vt (node (FlatOffset i64:$vaddr, i16:$offset))), 941 (inst $vaddr, $offset) 942>; 943 944class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 945 (node (FlatOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in), 946 (inst $vaddr, $offset, 0, $in) 947>; 948 949class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 950 (node (GlobalOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in), 951 (inst $vaddr, $offset, 0, $in) 952>; 953 954class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 955 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$in)), 956 (inst $saddr, $voffset, $offset, 0, $in) 957>; 958 959class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 960 (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i16:$offset))), 961 (inst $vaddr, $offset) 962>; 963 964class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 965 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset))), 966 (inst $saddr, $voffset, $offset, 0) 967>; 968 969class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 970 ValueType vt> : GCNPat < 971 (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset)), 972 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 973>; 974 975class GlobalAtomicStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 976 ValueType vt> : GCNPat < 977 (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$data), 978 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 979>; 980 981class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 982 ValueType vt, ValueType data_vt = vt> : GCNPat < 983 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), data_vt:$data)), 984 (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset) 985>; 986 987class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 988 ValueType vt> : GCNPat < 989 (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$data), 990 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 991>; 992 993class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 994 (node vt:$data, (FlatOffset i64:$vaddr, i16:$offset)), 995 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 996>; 997 998class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 999 (node vt:$data, (GlobalOffset i64:$vaddr, i16:$offset)), 1000 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 1001>; 1002 1003class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1004 // atomic store follows atomic binop convention so the address comes 1005 // first. 1006 (node (FlatOffset i64:$vaddr, i16:$offset), vt:$data), 1007 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 1008>; 1009 1010class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, 1011 ValueType vt, ValueType data_vt = vt> : GCNPat < 1012 // atomic store follows atomic binop convention so the address comes 1013 // first. 1014 (node (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data), 1015 (inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset) 1016>; 1017 1018multiclass FlatAtomicPat <string inst, string node, ValueType vt, 1019 ValueType data_vt = vt> { 1020 defvar rtnNode = !cast<PatFrags>(node#"_"#vt.Size); 1021 defvar noRtnNode = !cast<PatFrags>(node#"_noret_"#vt.Size); 1022 1023 def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)), 1024 (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; 1025 1026 let AddedComplexity = 1 in 1027 def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)), 1028 (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; 1029} 1030 1031class FlatSignedAtomicPatBase <FLAT_Pseudo inst, SDPatternOperator node, 1032 ValueType vt, ValueType data_vt = vt> : GCNPat < 1033 (vt (node (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)), 1034 (inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset) 1035>; 1036 1037multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt, 1038 ValueType data_vt = vt, int complexity = 0, 1039 bit isIntr = 0> { 1040 defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_" # vt.Size)); 1041 defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt.Size)); 1042 1043 let AddedComplexity = complexity in 1044 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>; 1045 1046 let AddedComplexity = !add(complexity, 1) in 1047 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>; 1048} 1049 1050multiclass FlatSignedAtomicIntrPat <string inst, string node, ValueType vt, 1051 ValueType data_vt = vt> { 1052 defm : FlatSignedAtomicPat<inst, node, vt, data_vt, /* complexity */ 0, /* isIntr */ 1>; 1053} 1054 1055multiclass FlatSignedAtomicPatWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1056 ValueType vt, ValueType data_vt = vt> { 1057 defvar noRtnNode = !cast<PatFrags>(intr # "_noret_" # addrSpaceSuffix); 1058 defvar rtnNode = !cast<PatFrags>(intr # "_" # addrSpaceSuffix); 1059 1060 let AddedComplexity = 1 in 1061 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>; 1062 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>; 1063} 1064 1065class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1066 (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset))), 1067 (inst $vaddr, $offset) 1068>; 1069 1070class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1071 (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset), vt:$in), 1072 (inst $vaddr, $offset, 0, $in) 1073>; 1074 1075class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1076 (node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset)), 1077 (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset) 1078>; 1079 1080class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1081 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset))), 1082 (inst $saddr, $offset) 1083>; 1084 1085class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1086 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset), vt:$in)), 1087 (inst $saddr, $offset, 0, $in) 1088>; 1089 1090class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1091 ValueType vt> : GCNPat < 1092 (node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset)), 1093 (inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 1094>; 1095 1096class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1097 (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset))), 1098 (inst $vaddr, $saddr, $offset, 0) 1099>; 1100 1101class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1102 ValueType vt> : GCNPat < 1103 (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset)), 1104 (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset) 1105>; 1106 1107class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1108 (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset), vt:$in)), 1109 (inst $vaddr, $saddr, $offset, 0, $in) 1110>; 1111 1112let OtherPredicates = [HasFlatAddressSpace] in { 1113 1114def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>; 1115def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>; 1116def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>; 1117def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>; 1118def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>; 1119def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>; 1120def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>; 1121def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>; 1122def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>; 1123def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>; 1124def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>; 1125def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>; 1126def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>; 1127def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>; 1128def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>; 1129 1130def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>; 1131def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>; 1132 1133def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>; 1134def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>; 1135 1136foreach vt = Reg32Types.types in { 1137def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>; 1138def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>; 1139} 1140 1141foreach vt = VReg_64.RegTypes in { 1142def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>; 1143def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>; 1144} 1145 1146def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>; 1147 1148foreach vt = VReg_128.RegTypes in { 1149def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>; 1150def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>; 1151} 1152 1153def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>; 1154def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>; 1155def : FlatStoreAtomicPat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>; 1156def : FlatStoreAtomicPat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>; 1157def : FlatStoreAtomicPat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>; 1158def : FlatStoreAtomicPat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>; 1159 1160foreach as = [ "flat", "global" ] in { 1161defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>; 1162defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>; 1163defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_inc_"#as, i32>; 1164defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_dec_"#as, i32>; 1165defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>; 1166defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>; 1167defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>; 1168defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>; 1169defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>; 1170defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>; 1171defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>; 1172defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>; 1173defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>; 1174 1175defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>; 1176defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>; 1177defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_inc_"#as, i64>; 1178defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_dec_"#as, i64>; 1179defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>; 1180defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>; 1181defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>; 1182defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>; 1183defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>; 1184defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>; 1185defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>; 1186defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>; 1187defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>; 1188} // end foreach as 1189 1190def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>; 1191def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>; 1192 1193let OtherPredicates = [HasD16LoadStore] in { 1194def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>; 1195def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>; 1196} 1197 1198let OtherPredicates = [D16PreservesUnusedBits] in { 1199def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>; 1200def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>; 1201def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>; 1202def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>; 1203def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>; 1204def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>; 1205 1206def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>; 1207def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>; 1208def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>; 1209def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>; 1210def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>; 1211def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>; 1212} 1213 1214} // End OtherPredicates = [HasFlatAddressSpace] 1215 1216 1217multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1218 def : FlatLoadSignedPat <inst, node, vt> { 1219 let AddedComplexity = 10; 1220 } 1221 1222 def : GlobalLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1223 let AddedComplexity = 11; 1224 } 1225} 1226 1227multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1228 def : FlatSignedLoadPat_D16 <inst, node, vt> { 1229 let AddedComplexity = 10; 1230 } 1231 1232 def : GlobalLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1233 let AddedComplexity = 11; 1234 } 1235} 1236 1237multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1238 ValueType vt> { 1239 def : FlatStoreSignedPat <inst, node, vt> { 1240 let AddedComplexity = 10; 1241 } 1242 1243 def : GlobalStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1244 let AddedComplexity = 11; 1245 } 1246} 1247 1248// Deal with swapped operands for atomic_store vs. regular store 1249multiclass GlobalFLATAtomicStorePats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1250 def : FlatStoreSignedAtomicPat <inst, node, vt> { 1251 let AddedComplexity = 10; 1252 } 1253 1254 def : GlobalAtomicStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1255 let AddedComplexity = 11; 1256 } 1257} 1258 1259multiclass GlobalFLATAtomicPatsNoRtnBase<string inst, string node, ValueType vt, 1260 ValueType data_vt = vt> { 1261 let AddedComplexity = 11 in 1262 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), !cast<PatFrags>(node), vt, data_vt>; 1263 1264 let AddedComplexity = 13 in 1265 def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<PatFrags>(node), vt, data_vt>; 1266} 1267 1268multiclass GlobalFLATAtomicPatsRtnBase<string inst, string node, ValueType vt, 1269 ValueType data_vt = vt, bit isPatFrags = 0> { 1270 defvar rtnNode = !if(isPatFrags, !cast<PatFrags>(node), !cast<SDPatternOperator>(node)); 1271 1272 let AddedComplexity = 10 in 1273 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>; 1274 1275 let AddedComplexity = 12 in 1276 def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>; 1277} 1278 1279multiclass GlobalFLATAtomicPatsNoRtn<string inst, string node, ValueType vt, 1280 ValueType data_vt = vt, bit isIntr = 0> : 1281 GlobalFLATAtomicPatsNoRtnBase<inst, node # "_noret" # !if(isIntr, "", "_" # vt.Size), vt, data_vt>; 1282 1283multiclass GlobalFLATAtomicPatsRtn<string inst, string node, ValueType vt, 1284 ValueType data_vt = vt, bit isIntr = 0> : 1285 GlobalFLATAtomicPatsRtnBase<inst, node # !if(isIntr, "", "_" # vt.Size), vt, data_vt>; 1286 1287multiclass GlobalFLATAtomicPats<string inst, string node, ValueType vt, 1288 ValueType data_vt = vt, bit isIntr = 0> : 1289 GlobalFLATAtomicPatsNoRtn<inst, node, vt, data_vt, isIntr>, 1290 GlobalFLATAtomicPatsRtn<inst, node, vt, data_vt, isIntr>; 1291 1292multiclass GlobalFLATAtomicPatsNoRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1293 ValueType vt, ValueType data_vt = vt> : 1294 GlobalFLATAtomicPatsNoRtnBase<inst, intr # "_noret_" # addrSpaceSuffix, vt, data_vt>; 1295 1296multiclass GlobalFLATAtomicPatsRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1297 ValueType vt, ValueType data_vt = vt> : 1298 GlobalFLATAtomicPatsRtnBase<inst, intr # "_" # addrSpaceSuffix, vt, data_vt, /*isPatFrags*/ 1>; 1299 1300multiclass GlobalFLATAtomicPatsWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1301 ValueType vt, ValueType data_vt = vt> : 1302 GlobalFLATAtomicPatsNoRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>, 1303 GlobalFLATAtomicPatsRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>; 1304 1305multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt, 1306 ValueType data_vt = vt> { 1307 defm : GlobalFLATAtomicPats<inst, node, vt, data_vt, /* isIntr */ 1>; 1308} 1309 1310multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1311 def : ScratchLoadSignedPat <inst, node, vt> { 1312 let AddedComplexity = 25; 1313 } 1314 1315 def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1316 let AddedComplexity = 26; 1317 } 1318 1319 def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1320 let SubtargetPredicate = HasFlatScratchSVSMode; 1321 let AddedComplexity = 27; 1322 } 1323} 1324 1325multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1326 ValueType vt> { 1327 def : ScratchStoreSignedPat <inst, node, vt> { 1328 let AddedComplexity = 25; 1329 } 1330 1331 def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1332 let AddedComplexity = 26; 1333 } 1334 1335 def : ScratchStoreSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1336 let SubtargetPredicate = HasFlatScratchSVSMode; 1337 let AddedComplexity = 27; 1338 } 1339} 1340 1341multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1342 def : ScratchLoadSignedPat_D16 <inst, node, vt> { 1343 let AddedComplexity = 25; 1344 } 1345 1346 def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1347 let AddedComplexity = 26; 1348 } 1349 1350 def : ScratchLoadSVaddrPat_D16 <!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1351 let SubtargetPredicate = HasFlatScratchSVSMode; 1352 let AddedComplexity = 27; 1353 } 1354} 1355 1356let OtherPredicates = [HasFlatGlobalInsts] in { 1357 1358defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>; 1359defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i16>; 1360defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i32>; 1361defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i16>; 1362defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>; 1363defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>; 1364defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>; 1365defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>; 1366defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>; 1367defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>; 1368defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>; 1369defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>; 1370defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>; 1371defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>; 1372 1373foreach vt = Reg32Types.types in { 1374defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>; 1375defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>; 1376} 1377 1378foreach vt = VReg_64.RegTypes in { 1379defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, load_global, vt>; 1380defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, store_global, vt>; 1381} 1382 1383defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX3, load_global, v3i32>; 1384 1385foreach vt = VReg_128.RegTypes in { 1386defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX4, load_global, vt>; 1387defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, store_global, vt>; 1388} 1389 1390// There is no distinction for atomic load lowering during selection; 1391// the memory legalizer will set the cache bits and insert the 1392// appropriate waits. 1393defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>; 1394defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>; 1395 1396defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>; 1397defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>; 1398defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>; 1399defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>; 1400defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>; 1401 1402let OtherPredicates = [HasD16LoadStore] in { 1403defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>; 1404defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>; 1405} 1406 1407let OtherPredicates = [D16PreservesUnusedBits] in { 1408defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>; 1409defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>; 1410defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>; 1411defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>; 1412defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>; 1413defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>; 1414 1415defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>; 1416defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>; 1417defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>; 1418defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>; 1419defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>; 1420defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>; 1421} 1422 1423defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i32>; 1424defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>; 1425defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i32>; 1426defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i16>; 1427defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORD, atomic_store_32_global, i32>; 1428defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global, i64>; 1429 1430defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>; 1431defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>; 1432defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_inc_global", i32>; 1433defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_dec_global", i32>; 1434defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", "atomic_load_and_global", i32>; 1435defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", "atomic_load_max_global", i32>; 1436defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", "atomic_load_umax_global", i32>; 1437defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", "atomic_load_min_global", i32>; 1438defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", "atomic_load_umin_global", i32>; 1439defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", "atomic_load_or_global", i32>; 1440defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", "atomic_swap_global", i32>; 1441defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_global", i32, v2i32>; 1442defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>; 1443defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>; 1444 1445defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>; 1446defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>; 1447defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_inc_global", i64>; 1448defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_dec_global", i64>; 1449defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", "atomic_load_and_global", i64>; 1450defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", "atomic_load_max_global", i64>; 1451defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", "atomic_load_umax_global", i64>; 1452defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", "atomic_load_min_global", i64>; 1453defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", "atomic_load_umin_global", i64>; 1454defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", "atomic_load_or_global", i64>; 1455defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", "atomic_swap_global", i64>; 1456defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_global", i64, v2i64>; 1457defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i64>; 1458 1459let OtherPredicates = [isGFX10Plus] in { 1460defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>; 1461defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>; 1462defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", "atomic_load_fmin_global", f64>; 1463defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", "atomic_load_fmax_global", f64>; 1464defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>; 1465defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>; 1466defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN_X2", "int_amdgcn_global_atomic_fmin", f64>; 1467defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX_X2", "int_amdgcn_global_atomic_fmax", f64>; 1468} 1469 1470let OtherPredicates = [HasAtomicFaddNoRtnInsts] in { 1471defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>; 1472defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>; 1473defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>; 1474} 1475 1476let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in { 1477defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>; 1478defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>; 1479} 1480 1481let OtherPredicates = [HasAtomicFaddRtnInsts] in { 1482defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>; 1483defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>; 1484defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>; 1485} 1486 1487let OtherPredicates = [isGFX90APlus] in { 1488defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>; 1489defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>; 1490defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>; 1491defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>; 1492defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>; 1493defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>; 1494defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>; 1495defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>; 1496defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>; 1497defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>; 1498defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>; 1499defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>; 1500defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f64>; 1501defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>; 1502defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>; 1503} 1504 1505let OtherPredicates = [HasFlatAtomicFaddF32Inst] in { 1506defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>; 1507defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f32>; 1508} 1509 1510let OtherPredicates = [isGFX940Plus] in { 1511defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", v2f16>; 1512defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_BF16", "int_amdgcn_flat_atomic_fadd_v2bf16", v2i16>; 1513defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "int_amdgcn_global_atomic_fadd_v2bf16", v2i16>; 1514} 1515 1516} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 1517 1518let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in { 1519 1520defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i32>; 1521defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i32>; 1522defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i32>; 1523defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>; 1524defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>; 1525defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>; 1526defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>; 1527defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>; 1528defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>; 1529defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16>; 1530 1531foreach vt = Reg32Types.types in { 1532defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORD, load_private, vt>; 1533defm : ScratchFLATStorePats <SCRATCH_STORE_DWORD, store_private, vt>; 1534} 1535 1536foreach vt = VReg_64.RegTypes in { 1537defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX2, load_private, vt>; 1538defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX2, store_private, vt>; 1539} 1540 1541defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX3, load_private, v3i32>; 1542 1543foreach vt = VReg_128.RegTypes in { 1544defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX4, load_private, vt>; 1545defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX4, store_private, vt>; 1546} 1547 1548defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i32>; 1549defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>; 1550defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>; 1551defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>; 1552defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>; 1553 1554let OtherPredicates = [HasD16LoadStore, HasFlatScratchInsts, EnableFlatScratch] in { 1555defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>; 1556defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>; 1557} 1558 1559let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in { 1560defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2i16>; 1561defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2f16>; 1562defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2i16>; 1563defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2f16>; 1564defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2i16>; 1565defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2f16>; 1566 1567defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2i16>; 1568defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2f16>; 1569defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2i16>; 1570defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2f16>; 1571defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2i16>; 1572defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f16>; 1573} 1574 1575} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch] 1576 1577//===----------------------------------------------------------------------===// 1578// Target 1579//===----------------------------------------------------------------------===// 1580 1581//===----------------------------------------------------------------------===// 1582// CI 1583//===----------------------------------------------------------------------===// 1584 1585class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> : 1586 FLAT_Real <op, ps>, 1587 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> { 1588 let AssemblerPredicate = isGFX7Only; 1589 let DecoderNamespace="GFX7"; 1590} 1591 1592def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>; 1593def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>; 1594def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>; 1595def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>; 1596def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>; 1597def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>; 1598def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>; 1599def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>; 1600 1601def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>; 1602def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>; 1603def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>; 1604def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>; 1605def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>; 1606def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>; 1607 1608multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> { 1609 def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 1610 def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 1611} 1612 1613defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>; 1614defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>; 1615defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>; 1616defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>; 1617defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>; 1618defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>; 1619defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>; 1620defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>; 1621defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>; 1622defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>; 1623defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>; 1624defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>; 1625defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>; 1626defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>; 1627defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>; 1628defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>; 1629defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>; 1630defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>; 1631defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>; 1632defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>; 1633defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>; 1634defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>; 1635defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>; 1636defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>; 1637defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>; 1638defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>; 1639 1640// CI Only flat instructions 1641defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>; 1642defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>; 1643defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>; 1644defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>; 1645defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>; 1646defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>; 1647 1648 1649//===----------------------------------------------------------------------===// 1650// VI 1651//===----------------------------------------------------------------------===// 1652 1653class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : 1654 FLAT_Real <op, ps>, 1655 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { 1656 let AssemblerPredicate = isGFX8GFX9; 1657 let DecoderNamespace = "GFX8"; 1658 1659 let Inst{25} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); 1660 let AsmString = ps.Mnemonic # 1661 !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands); 1662} 1663 1664multiclass FLAT_Real_AllAddr_vi<bits<7> op, 1665 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1666 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>; 1667 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>; 1668} 1669 1670class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> : 1671 FLAT_Real <op, ps>, 1672 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> { 1673 let AssemblerPredicate = isGFX940Plus; 1674 let DecoderNamespace = "GFX9"; 1675 let Inst{13} = ps.sve; 1676 let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); 1677} 1678 1679multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> { 1680 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> { 1681 let AssemblerPredicate = isGFX8GFX9NotGFX940; 1682 let OtherPredicates = [isGFX8GFX9NotGFX940]; 1683 } 1684 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> { 1685 let DecoderNamespace = "GFX9"; 1686 } 1687 let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in { 1688 def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 1689 def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>; 1690 def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>; 1691 } 1692} 1693 1694multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op, 1695 string pre_gfx940_name = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr), 1696 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1697 1698 let OtherPredicates = [isGFX8GFX9NotGFX940] in { 1699 def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> { 1700 let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds"; 1701 } 1702 def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> { 1703 let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds"; 1704 } 1705 } 1706 1707 let SubtargetPredicate = isGFX940Plus in { 1708 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 1709 def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1710 } 1711} 1712 1713multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> { 1714 defm "" : FLAT_Real_AllAddr_LDS<op, pre_gfx940_op>; 1715 let SubtargetPredicate = isGFX940Plus in { 1716 def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>; 1717 def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>; 1718 } 1719} 1720 1721def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; 1722def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; 1723def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; 1724def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>; 1725def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>; 1726def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>; 1727def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>; 1728def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>; 1729 1730def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>; 1731def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>; 1732def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>; 1733def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; 1734def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>; 1735def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>; 1736def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>; 1737def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>; 1738 1739def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>; 1740def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; 1741def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>; 1742def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; 1743def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>; 1744def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; 1745 1746multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps, 1747 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1748 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>; 1749 def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>; 1750} 1751 1752multiclass FLAT_Global_Real_Atomics_vi<bits<7> op, 1753 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> : 1754 FLAT_Real_AllAddr_vi<op, has_sccb> { 1755 def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>; 1756 def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>; 1757} 1758 1759 1760defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>; 1761defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>; 1762defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>; 1763defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>; 1764defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>; 1765defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>; 1766defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>; 1767defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>; 1768defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>; 1769defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>; 1770defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>; 1771defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>; 1772defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>; 1773defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>; 1774defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>; 1775defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>; 1776defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>; 1777defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>; 1778defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>; 1779defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>; 1780defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>; 1781defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>; 1782defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>; 1783defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>; 1784defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>; 1785defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>; 1786 1787defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1788defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1789defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1790defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1791defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1792defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1793defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1794defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1795 1796defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1797defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1798defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1799defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1800defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1801defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1802 1803defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1804defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1805defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1806defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1807defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1808defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1809defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1810defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1811 1812defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS <0x026, 0x10>; 1813defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS <0x027, 0x11>; 1814defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>; 1815defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>; 1816defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS <0x02a, 0x14>; 1817 1818defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; 1819defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; 1820defm GLOBAL_ATOMIC_ADD : FLAT_Global_Real_Atomics_vi <0x42>; 1821defm GLOBAL_ATOMIC_SUB : FLAT_Global_Real_Atomics_vi <0x43>; 1822defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Real_Atomics_vi <0x44>; 1823defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Real_Atomics_vi <0x45>; 1824defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Real_Atomics_vi <0x46>; 1825defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Real_Atomics_vi <0x47>; 1826defm GLOBAL_ATOMIC_AND : FLAT_Global_Real_Atomics_vi <0x48>; 1827defm GLOBAL_ATOMIC_OR : FLAT_Global_Real_Atomics_vi <0x49>; 1828defm GLOBAL_ATOMIC_XOR : FLAT_Global_Real_Atomics_vi <0x4a>; 1829defm GLOBAL_ATOMIC_INC : FLAT_Global_Real_Atomics_vi <0x4b>; 1830defm GLOBAL_ATOMIC_DEC : FLAT_Global_Real_Atomics_vi <0x4c>; 1831defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Real_Atomics_vi <0x60>; 1832defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>; 1833defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Real_Atomics_vi <0x62>; 1834defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Real_Atomics_vi <0x63>; 1835defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Real_Atomics_vi <0x64>; 1836defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Real_Atomics_vi <0x65>; 1837defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Real_Atomics_vi <0x66>; 1838defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Real_Atomics_vi <0x67>; 1839defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Real_Atomics_vi <0x68>; 1840defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Real_Atomics_vi <0x69>; 1841defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>; 1842defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; 1843defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; 1844 1845defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_SVE_LDS <0x026, 0x10>; 1846defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_SVE_LDS <0x027, 0x11>; 1847defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_LDS <0x028, 0x12>; 1848defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_LDS <0x029, 0x13>; 1849defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_AllAddr_SVE_LDS <0x02a, 0x14>; 1850 1851defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_SVE_vi <0x10>; 1852defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_SVE_vi <0x11>; 1853defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_SVE_vi <0x12>; 1854defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_SVE_vi <0x13>; 1855defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_SVE_vi <0x14>; 1856defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x15>; 1857defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x16>; 1858defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x17>; 1859defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_SVE_vi <0x18>; 1860defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x19>; 1861defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x20>; 1862defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x21>; 1863defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x22>; 1864defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x23>; 1865defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_SVE_vi <0x24>; 1866defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x25>; 1867defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_SVE_vi <0x1a>; 1868defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x1b>; 1869defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_SVE_vi <0x1c>; 1870defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x1d>; 1871defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x1e>; 1872defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x1f>; 1873 1874let SubtargetPredicate = isGFX8GFX9NotGFX940 in { 1875 // These instructions are encoded differently on gfx90* and gfx940. 1876 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>; 1877 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; 1878} 1879 1880let SubtargetPredicate = isGFX90AOnly in { 1881 defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_vi<0x4f, FLAT_ATOMIC_ADD_F64, 0>; 1882 defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_vi<0x50, FLAT_ATOMIC_MIN_F64, 0>; 1883 defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_vi<0x51, FLAT_ATOMIC_MAX_F64, 0>; 1884 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>; 1885 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>; 1886 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>; 1887} // End SubtargetPredicate = isGFX90AOnly 1888 1889multiclass FLAT_Real_AllAddr_gfx940<bits<7> op> { 1890 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 1891 def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1892} 1893 1894multiclass FLAT_Real_Atomics_gfx940 <bits<7> op, FLAT_Pseudo ps> { 1895 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 1896 def _RTN_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 1897} 1898 1899multiclass FLAT_Global_Real_Atomics_gfx940<bits<7> op> : 1900 FLAT_Real_AllAddr_gfx940<op> { 1901 def _RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 1902 def _SADDR_RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 1903} 1904 1905let SubtargetPredicate = isGFX940Plus in { 1906 // These instructions are encoded differently on gfx90* and gfx940. 1907 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx940 <0x04d>; 1908 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx940 <0x04e>; 1909 1910 defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx940<0x4f, FLAT_ATOMIC_ADD_F64>; 1911 defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx940<0x50, FLAT_ATOMIC_MIN_F64>; 1912 defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx940<0x51, FLAT_ATOMIC_MAX_F64>; 1913 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>; 1914 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>; 1915 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>; 1916 defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d, FLAT_ATOMIC_ADD_F32>; 1917 defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e, FLAT_ATOMIC_PK_ADD_F16>; 1918 defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52, FLAT_ATOMIC_PK_ADD_BF16>; 1919 defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>; 1920} // End SubtargetPredicate = isGFX940Plus 1921 1922//===----------------------------------------------------------------------===// 1923// GFX10. 1924//===----------------------------------------------------------------------===// 1925 1926class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> : 1927 FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> { 1928 let AssemblerPredicate = isGFX10Only; 1929 let DecoderNamespace = "GFX10"; 1930 1931 let Inst{11-0} = offset{11-0}; 1932 let Inst{12} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); 1933 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d); 1934 let Inst{55} = 0; 1935} 1936 1937 1938multiclass FLAT_Real_Base_gfx10<bits<7> op> { 1939 def _gfx10 : 1940 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>; 1941} 1942 1943multiclass FLAT_Real_RTN_gfx10<bits<7> op> { 1944 def _RTN_gfx10 : 1945 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 1946} 1947 1948multiclass FLAT_Real_SADDR_gfx10<bits<7> op> { 1949 def _SADDR_gfx10 : 1950 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1951} 1952 1953multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> { 1954 def _SADDR_RTN_gfx10 : 1955 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 1956} 1957 1958multiclass FLAT_Real_ST_gfx10<bits<7> op> { 1959 def _ST_gfx10 : 1960 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")> { 1961 let Inst{54-48} = !cast<int>(EXEC_HI.HWEncoding); 1962 let OtherPredicates = [HasFlatScratchSTMode]; 1963 } 1964} 1965 1966multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> : 1967 FLAT_Real_Base_gfx10<op>, 1968 FLAT_Real_SADDR_gfx10<op>; 1969 1970multiclass FLAT_Real_Atomics_gfx10<bits<7> op> : 1971 FLAT_Real_Base_gfx10<op>, 1972 FLAT_Real_RTN_gfx10<op>; 1973 1974multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> : 1975 FLAT_Real_AllAddr_gfx10<op>, 1976 FLAT_Real_RTN_gfx10<op>, 1977 FLAT_Real_SADDR_RTN_gfx10<op>; 1978 1979multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> : 1980 FLAT_Real_RTN_gfx10<op>, 1981 FLAT_Real_SADDR_RTN_gfx10<op>; 1982 1983multiclass FLAT_Real_ScratchAllAddr_gfx10<bits<7> op> : 1984 FLAT_Real_Base_gfx10<op>, 1985 FLAT_Real_SADDR_gfx10<op>, 1986 FLAT_Real_ST_gfx10<op>; 1987 1988multiclass FLAT_Real_AllAddr_LDS_gfx10<bits<7> op, 1989 string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr)> { 1990 let AsmString = opname # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in 1991 defm "" : FLAT_Real_Base_gfx10<op>; 1992 1993 let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in 1994 defm "" : FLAT_Real_SADDR_gfx10<op>; 1995} 1996 1997multiclass FLAT_Real_ScratchAllAddr_LDS_gfx10<bits<7> op, 1998 string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr)> { 1999 defm "" : FLAT_Real_AllAddr_LDS_gfx10<op>; 2000 2001 let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_ST").AsmOperands # " lds" in 2002 defm "" : FLAT_Real_ST_gfx10<op>; 2003} 2004 2005// ENC_FLAT. 2006defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>; 2007defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>; 2008defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>; 2009defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>; 2010defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>; 2011defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>; 2012defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>; 2013defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>; 2014defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>; 2015defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>; 2016defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>; 2017defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>; 2018defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>; 2019defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>; 2020defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>; 2021defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>; 2022defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>; 2023defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>; 2024defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>; 2025defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>; 2026defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>; 2027defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>; 2028defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>; 2029defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>; 2030defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>; 2031defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>; 2032defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>; 2033defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>; 2034defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>; 2035defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>; 2036defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>; 2037defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>; 2038defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>; 2039defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>; 2040defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>; 2041defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>; 2042defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>; 2043defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>; 2044defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>; 2045defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>; 2046defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>; 2047defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>; 2048defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>; 2049defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>; 2050defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>; 2051defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>; 2052defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>; 2053defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>; 2054defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>; 2055defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>; 2056defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>; 2057defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>; 2058defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>; 2059defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>; 2060 2061 2062// ENC_FLAT_GLBL. 2063defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; 2064defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; 2065defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; 2066defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; 2067defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; 2068defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; 2069defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; 2070defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; 2071defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; 2072defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; 2073defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; 2074defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; 2075defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; 2076defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; 2077defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; 2078defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; 2079defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; 2080defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; 2081defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; 2082defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; 2083defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; 2084defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; 2085defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>; 2086defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>; 2087defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>; 2088defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>; 2089defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_RTN_gfx10<0x034>; 2090defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>; 2091defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>; 2092defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>; 2093defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>; 2094defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>; 2095defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>; 2096defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>; 2097defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>; 2098defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>; 2099defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>; 2100defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>; 2101defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>; 2102defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>; 2103defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>; 2104defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>; 2105defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>; 2106defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>; 2107defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>; 2108defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>; 2109defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>; 2110defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>; 2111defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>; 2112defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>; 2113defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>; 2114defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>; 2115defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>; 2116defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>; 2117defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>; 2118defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x016>; 2119defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x017>; 2120 2121defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x008>; 2122defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x009>; 2123defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00a>; 2124defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00b>; 2125defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS_gfx10 <0x00c>; 2126 2127// ENC_FLAT_SCRATCH. 2128defm SCRATCH_LOAD_UBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x008>; 2129defm SCRATCH_LOAD_SBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x009>; 2130defm SCRATCH_LOAD_USHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00a>; 2131defm SCRATCH_LOAD_SSHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00b>; 2132defm SCRATCH_LOAD_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x00c>; 2133defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x00d>; 2134defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x00e>; 2135defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x00f>; 2136defm SCRATCH_STORE_BYTE : FLAT_Real_ScratchAllAddr_gfx10<0x018>; 2137defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x019>; 2138defm SCRATCH_STORE_SHORT : FLAT_Real_ScratchAllAddr_gfx10<0x01a>; 2139defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x01b>; 2140defm SCRATCH_STORE_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x01c>; 2141defm SCRATCH_STORE_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x01d>; 2142defm SCRATCH_STORE_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x01e>; 2143defm SCRATCH_STORE_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x01f>; 2144defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x020>; 2145defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x021>; 2146defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x022>; 2147defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x023>; 2148defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x024>; 2149defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x025>; 2150 2151defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x008>; 2152defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x009>; 2153defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00a>; 2154defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00b>; 2155defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00c>; 2156 2157//===----------------------------------------------------------------------===// 2158// GFX11 2159//===----------------------------------------------------------------------===// 2160 2161class FLAT_Real_gfx11 <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 2162 FLAT_Real <op, ps, opName>, 2163 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX11> { 2164 let AssemblerPredicate = isGFX11Plus; 2165 let DecoderNamespace = "GFX11"; 2166 2167 let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); 2168 let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); 2169 let Inst{15} = cpol{CPolBit.SLC}; 2170 let Inst{17-16} = seg; 2171 let Inst{55} = ps.sve; 2172} 2173 2174multiclass FLAT_Real_Base_gfx11<bits<7> op, string ps, string opName, int renamed = false> { 2175 def _gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps), opName> { 2176 let Inst{54-48} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding); 2177 } 2178 if renamed then 2179 def _renamed_gfx11 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX11Plus]>; 2180} 2181 2182multiclass FLAT_Real_RTN_gfx11<bits<7> op, string ps, string opName> { 2183 def _RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName> { 2184 let Inst{54-48} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding); 2185 } 2186} 2187 2188multiclass FLAT_Real_SADDR_gfx11<bits<7> op, string ps, string opName> { 2189 def _SADDR_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SADDR"), opName>; 2190} 2191 2192multiclass FLAT_Real_SADDR_RTN_gfx11<bits<7> op, string ps, string opName> { 2193 def _SADDR_RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SADDR_RTN"), opName>; 2194} 2195 2196multiclass FLAT_Real_ST_gfx11<bits<7> op, string ps, string opName> { 2197 def _ST_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_ST"), opName> { 2198 let Inst{54-48} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding); 2199 let OtherPredicates = [HasFlatScratchSTMode]; 2200 } 2201} 2202 2203multiclass FLAT_Real_SVS_gfx11<bits<7> op, string ps, string opName> { 2204 def _SVS_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SVS"), opName> { 2205 let OtherPredicates = [HasFlatScratchSVSMode]; 2206 } 2207} 2208 2209multiclass FLAT_Real_AllAddr_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2210 FLAT_Real_Base_gfx11<op, ps, opName, renamed>, 2211 FLAT_Real_SADDR_gfx11<op, ps, opName>; 2212 2213multiclass FLAT_Real_Atomics_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2214 FLAT_Real_Base_gfx11<op, ps, opName, renamed>, 2215 FLAT_Real_RTN_gfx11<op, ps, opName>; 2216 2217multiclass FLAT_Real_GlblAtomics_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2218 FLAT_Real_AllAddr_gfx11<op, ps, opName, renamed>, 2219 FLAT_Real_RTN_gfx11<op, ps, opName>, 2220 FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>; 2221 2222multiclass FLAT_Real_GlblAtomics_RTN_gfx11<bits<7> op, string ps, string opName> : 2223 FLAT_Real_RTN_gfx11<op, ps, opName>, 2224 FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>; 2225 2226multiclass FLAT_Real_ScratchAllAddr_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2227 FLAT_Real_Base_gfx11<op, ps, opName, renamed>, 2228 FLAT_Real_SADDR_gfx11<op, ps, opName>, 2229 FLAT_Real_ST_gfx11<op, ps, opName>, 2230 FLAT_Real_SVS_gfx11<op, ps, opName>; 2231 2232// ENC_FLAT. 2233defm FLAT_LOAD_U8 : FLAT_Real_Base_gfx11<0x010, "FLAT_LOAD_UBYTE", "flat_load_u8", true>; 2234defm FLAT_LOAD_I8 : FLAT_Real_Base_gfx11<0x011, "FLAT_LOAD_SBYTE", "flat_load_i8", true>; 2235defm FLAT_LOAD_U16 : FLAT_Real_Base_gfx11<0x012, "FLAT_LOAD_USHORT", "flat_load_u16", true>; 2236defm FLAT_LOAD_I16 : FLAT_Real_Base_gfx11<0x013, "FLAT_LOAD_SSHORT", "flat_load_i16", true>; 2237defm FLAT_LOAD_B32 : FLAT_Real_Base_gfx11<0x014, "FLAT_LOAD_DWORD", "flat_load_b32", true>; 2238defm FLAT_LOAD_B64 : FLAT_Real_Base_gfx11<0x015, "FLAT_LOAD_DWORDX2", "flat_load_b64", true>; 2239defm FLAT_LOAD_B96 : FLAT_Real_Base_gfx11<0x016, "FLAT_LOAD_DWORDX3", "flat_load_b96", true>; 2240defm FLAT_LOAD_B128 : FLAT_Real_Base_gfx11<0x017, "FLAT_LOAD_DWORDX4", "flat_load_b128", true>; 2241defm FLAT_STORE_B8 : FLAT_Real_Base_gfx11<0x018, "FLAT_STORE_BYTE", "flat_store_b8", true>; 2242defm FLAT_STORE_B16 : FLAT_Real_Base_gfx11<0x019, "FLAT_STORE_SHORT", "flat_store_b16", true>; 2243defm FLAT_STORE_B32 : FLAT_Real_Base_gfx11<0x01a, "FLAT_STORE_DWORD", "flat_store_b32", true>; 2244defm FLAT_STORE_B64 : FLAT_Real_Base_gfx11<0x01b, "FLAT_STORE_DWORDX2", "flat_store_b64", true>; 2245defm FLAT_STORE_B96 : FLAT_Real_Base_gfx11<0x01c, "FLAT_STORE_DWORDX3", "flat_store_b96", true>; 2246defm FLAT_STORE_B128 : FLAT_Real_Base_gfx11<0x01d, "FLAT_STORE_DWORDX4", "flat_store_b128", true>; 2247defm FLAT_LOAD_D16_U8 : FLAT_Real_Base_gfx11<0x01e, "FLAT_LOAD_UBYTE_D16", "flat_load_d16_u8">; 2248defm FLAT_LOAD_D16_I8 : FLAT_Real_Base_gfx11<0x01f, "FLAT_LOAD_SBYTE_D16", "flat_load_d16_i8">; 2249defm FLAT_LOAD_D16_B16 : FLAT_Real_Base_gfx11<0x020, "FLAT_LOAD_SHORT_D16", "flat_load_d16_b16">; 2250defm FLAT_LOAD_D16_HI_U8 : FLAT_Real_Base_gfx11<0x021, "FLAT_LOAD_UBYTE_D16_HI", "flat_load_d16_hi_u8">; 2251defm FLAT_LOAD_D16_HI_I8 : FLAT_Real_Base_gfx11<0x022, "FLAT_LOAD_SBYTE_D16_HI", "flat_load_d16_hi_i8">; 2252defm FLAT_LOAD_D16_HI_B16 : FLAT_Real_Base_gfx11<0x023, "FLAT_LOAD_SHORT_D16_HI", "flat_load_d16_hi_b16">; 2253defm FLAT_STORE_D16_HI_B8 : FLAT_Real_Base_gfx11<0x024, "FLAT_STORE_BYTE_D16_HI", "flat_store_d16_hi_b8">; 2254defm FLAT_STORE_D16_HI_B16 : FLAT_Real_Base_gfx11<0x025, "FLAT_STORE_SHORT_D16_HI", "flat_store_d16_hi_b16">; 2255defm FLAT_ATOMIC_SWAP_B32 : FLAT_Real_Atomics_gfx11<0x033, "FLAT_ATOMIC_SWAP", "flat_atomic_swap_b32", true>; 2256defm FLAT_ATOMIC_CMPSWAP_B32 : FLAT_Real_Atomics_gfx11<0x034, "FLAT_ATOMIC_CMPSWAP", "flat_atomic_cmpswap_b32", true>; 2257defm FLAT_ATOMIC_ADD_U32 : FLAT_Real_Atomics_gfx11<0x035, "FLAT_ATOMIC_ADD", "flat_atomic_add_u32", true>; 2258defm FLAT_ATOMIC_SUB_U32 : FLAT_Real_Atomics_gfx11<0x036, "FLAT_ATOMIC_SUB", "flat_atomic_sub_u32", true>; 2259defm FLAT_ATOMIC_MIN_I32 : FLAT_Real_Atomics_gfx11<0x038, "FLAT_ATOMIC_SMIN", "flat_atomic_min_i32", true>; 2260defm FLAT_ATOMIC_MIN_U32 : FLAT_Real_Atomics_gfx11<0x039, "FLAT_ATOMIC_UMIN", "flat_atomic_min_u32", true>; 2261defm FLAT_ATOMIC_MAX_I32 : FLAT_Real_Atomics_gfx11<0x03a, "FLAT_ATOMIC_SMAX", "flat_atomic_max_i32", true>; 2262defm FLAT_ATOMIC_MAX_U32 : FLAT_Real_Atomics_gfx11<0x03b, "FLAT_ATOMIC_UMAX", "flat_atomic_max_u32", true>; 2263defm FLAT_ATOMIC_AND_B32 : FLAT_Real_Atomics_gfx11<0x03c, "FLAT_ATOMIC_AND", "flat_atomic_and_b32", true>; 2264defm FLAT_ATOMIC_OR_B32 : FLAT_Real_Atomics_gfx11<0x03d, "FLAT_ATOMIC_OR", "flat_atomic_or_b32", true>; 2265defm FLAT_ATOMIC_XOR_B32 : FLAT_Real_Atomics_gfx11<0x03e, "FLAT_ATOMIC_XOR", "flat_atomic_xor_b32", true>; 2266defm FLAT_ATOMIC_INC_U32 : FLAT_Real_Atomics_gfx11<0x03f, "FLAT_ATOMIC_INC", "flat_atomic_inc_u32", true>; 2267defm FLAT_ATOMIC_DEC_U32 : FLAT_Real_Atomics_gfx11<0x040, "FLAT_ATOMIC_DEC", "flat_atomic_dec_u32", true>; 2268defm FLAT_ATOMIC_SWAP_B64 : FLAT_Real_Atomics_gfx11<0x041, "FLAT_ATOMIC_SWAP_X2", "flat_atomic_swap_b64", true>; 2269defm FLAT_ATOMIC_CMPSWAP_B64 : FLAT_Real_Atomics_gfx11<0x042, "FLAT_ATOMIC_CMPSWAP_X2", "flat_atomic_cmpswap_b64", true>; 2270defm FLAT_ATOMIC_ADD_U64 : FLAT_Real_Atomics_gfx11<0x043, "FLAT_ATOMIC_ADD_X2", "flat_atomic_add_u64", true>; 2271defm FLAT_ATOMIC_SUB_U64 : FLAT_Real_Atomics_gfx11<0x044, "FLAT_ATOMIC_SUB_X2", "flat_atomic_sub_u64", true>; 2272defm FLAT_ATOMIC_MIN_I64 : FLAT_Real_Atomics_gfx11<0x045, "FLAT_ATOMIC_SMIN_X2", "flat_atomic_min_i64", true>; 2273defm FLAT_ATOMIC_MIN_U64 : FLAT_Real_Atomics_gfx11<0x046, "FLAT_ATOMIC_UMIN_X2", "flat_atomic_min_u64", true>; 2274defm FLAT_ATOMIC_MAX_I64 : FLAT_Real_Atomics_gfx11<0x047, "FLAT_ATOMIC_SMAX_X2", "flat_atomic_max_i64", true>; 2275defm FLAT_ATOMIC_MAX_U64 : FLAT_Real_Atomics_gfx11<0x048, "FLAT_ATOMIC_UMAX_X2", "flat_atomic_max_u64", true>; 2276defm FLAT_ATOMIC_AND_B64 : FLAT_Real_Atomics_gfx11<0x049, "FLAT_ATOMIC_AND_X2", "flat_atomic_and_b64", true>; 2277defm FLAT_ATOMIC_OR_B64 : FLAT_Real_Atomics_gfx11<0x04a, "FLAT_ATOMIC_OR_X2", "flat_atomic_or_b64", true>; 2278defm FLAT_ATOMIC_XOR_B64 : FLAT_Real_Atomics_gfx11<0x04b, "FLAT_ATOMIC_XOR_X2", "flat_atomic_xor_b64", true>; 2279defm FLAT_ATOMIC_INC_U64 : FLAT_Real_Atomics_gfx11<0x04c, "FLAT_ATOMIC_INC_X2", "flat_atomic_inc_u64", true>; 2280defm FLAT_ATOMIC_DEC_U64 : FLAT_Real_Atomics_gfx11<0x04d, "FLAT_ATOMIC_DEC_X2", "flat_atomic_dec_u64", true>; 2281defm FLAT_ATOMIC_CMPSWAP_F32 : FLAT_Real_Atomics_gfx11<0x050, "FLAT_ATOMIC_FCMPSWAP", "flat_atomic_cmpswap_f32">; 2282defm FLAT_ATOMIC_MIN_F32 : FLAT_Real_Atomics_gfx11<0x051, "FLAT_ATOMIC_FMIN", "flat_atomic_min_f32">; 2283defm FLAT_ATOMIC_MAX_F32 : FLAT_Real_Atomics_gfx11<0x052, "FLAT_ATOMIC_FMAX", "flat_atomic_max_f32">; 2284defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_gfx11<0x056, "FLAT_ATOMIC_ADD_F32", "flat_atomic_add_f32">; 2285 2286// ENC_FLAT_GLBL. 2287defm GLOBAL_LOAD_U8 : FLAT_Real_AllAddr_gfx11<0x010, "GLOBAL_LOAD_UBYTE", "global_load_u8", true>; 2288defm GLOBAL_LOAD_I8 : FLAT_Real_AllAddr_gfx11<0x011, "GLOBAL_LOAD_SBYTE", "global_load_i8", true>; 2289defm GLOBAL_LOAD_U16 : FLAT_Real_AllAddr_gfx11<0x012, "GLOBAL_LOAD_USHORT", "global_load_u16", true>; 2290defm GLOBAL_LOAD_I16 : FLAT_Real_AllAddr_gfx11<0x013, "GLOBAL_LOAD_SSHORT", "global_load_i16", true>; 2291defm GLOBAL_LOAD_B32 : FLAT_Real_AllAddr_gfx11<0x014, "GLOBAL_LOAD_DWORD", "global_load_b32", true>; 2292defm GLOBAL_LOAD_B64 : FLAT_Real_AllAddr_gfx11<0x015, "GLOBAL_LOAD_DWORDX2", "global_load_b64", true>; 2293defm GLOBAL_LOAD_B96 : FLAT_Real_AllAddr_gfx11<0x016, "GLOBAL_LOAD_DWORDX3", "global_load_b96", true>; 2294defm GLOBAL_LOAD_B128 : FLAT_Real_AllAddr_gfx11<0x017, "GLOBAL_LOAD_DWORDX4", "global_load_b128", true>; 2295defm GLOBAL_STORE_B8 : FLAT_Real_AllAddr_gfx11<0x018, "GLOBAL_STORE_BYTE", "global_store_b8", true>; 2296defm GLOBAL_STORE_B16 : FLAT_Real_AllAddr_gfx11<0x019, "GLOBAL_STORE_SHORT", "global_store_b16", true>; 2297defm GLOBAL_STORE_B32 : FLAT_Real_AllAddr_gfx11<0x01a, "GLOBAL_STORE_DWORD", "global_store_b32", true>; 2298defm GLOBAL_STORE_B64 : FLAT_Real_AllAddr_gfx11<0x01b, "GLOBAL_STORE_DWORDX2", "global_store_b64", true>; 2299defm GLOBAL_STORE_B96 : FLAT_Real_AllAddr_gfx11<0x01c, "GLOBAL_STORE_DWORDX3", "global_store_b96", true>; 2300defm GLOBAL_STORE_B128 : FLAT_Real_AllAddr_gfx11<0x01d, "GLOBAL_STORE_DWORDX4", "global_store_b128", true>; 2301defm GLOBAL_LOAD_D16_U8 : FLAT_Real_AllAddr_gfx11<0x01e, "GLOBAL_LOAD_UBYTE_D16", "global_load_d16_u8">; 2302defm GLOBAL_LOAD_D16_I8 : FLAT_Real_AllAddr_gfx11<0x01f, "GLOBAL_LOAD_SBYTE_D16", "global_load_d16_i8">; 2303defm GLOBAL_LOAD_D16_B16 : FLAT_Real_AllAddr_gfx11<0x020, "GLOBAL_LOAD_SHORT_D16", "global_load_d16_b16">; 2304defm GLOBAL_LOAD_D16_HI_U8 : FLAT_Real_AllAddr_gfx11<0x021, "GLOBAL_LOAD_UBYTE_D16_HI", "global_load_d16_hi_u8">; 2305defm GLOBAL_LOAD_D16_HI_I8 : FLAT_Real_AllAddr_gfx11<0x022, "GLOBAL_LOAD_SBYTE_D16_HI", "global_load_d16_hi_i8">; 2306defm GLOBAL_LOAD_D16_HI_B16 : FLAT_Real_AllAddr_gfx11<0x023, "GLOBAL_LOAD_SHORT_D16_HI", "global_load_d16_hi_b16">; 2307defm GLOBAL_STORE_D16_HI_B8 : FLAT_Real_AllAddr_gfx11<0x024, "GLOBAL_STORE_BYTE_D16_HI", "global_store_d16_hi_b8">; 2308defm GLOBAL_STORE_D16_HI_B16 : FLAT_Real_AllAddr_gfx11<0x025, "GLOBAL_STORE_SHORT_D16_HI", "global_store_d16_hi_b16">; 2309defm GLOBAL_LOAD_ADDTID_B32 : FLAT_Real_AllAddr_gfx11<0x028, "GLOBAL_LOAD_DWORD_ADDTID", "global_load_addtid_b32">; 2310defm GLOBAL_STORE_ADDTID_B32 : FLAT_Real_AllAddr_gfx11<0x029, "GLOBAL_STORE_DWORD_ADDTID", "global_store_addtid_b32">; 2311defm GLOBAL_ATOMIC_SWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x033, "GLOBAL_ATOMIC_SWAP", "global_atomic_swap_b32", true>; 2312defm GLOBAL_ATOMIC_CMPSWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>; 2313defm GLOBAL_ATOMIC_ADD_U32 : FLAT_Real_GlblAtomics_gfx11<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>; 2314defm GLOBAL_ATOMIC_SUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>; 2315defm GLOBAL_ATOMIC_CSUB_U32 : FLAT_Real_GlblAtomics_RTN_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32">; 2316defm GLOBAL_ATOMIC_MIN_I32 : FLAT_Real_GlblAtomics_gfx11<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>; 2317defm GLOBAL_ATOMIC_MIN_U32 : FLAT_Real_GlblAtomics_gfx11<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>; 2318defm GLOBAL_ATOMIC_MAX_I32 : FLAT_Real_GlblAtomics_gfx11<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>; 2319defm GLOBAL_ATOMIC_MAX_U32 : FLAT_Real_GlblAtomics_gfx11<0x03b, "GLOBAL_ATOMIC_UMAX", "global_atomic_max_u32", true>; 2320defm GLOBAL_ATOMIC_AND_B32 : FLAT_Real_GlblAtomics_gfx11<0x03c, "GLOBAL_ATOMIC_AND", "global_atomic_and_b32", true>; 2321defm GLOBAL_ATOMIC_OR_B32 : FLAT_Real_GlblAtomics_gfx11<0x03d, "GLOBAL_ATOMIC_OR", "global_atomic_or_b32", true>; 2322defm GLOBAL_ATOMIC_XOR_B32 : FLAT_Real_GlblAtomics_gfx11<0x03e, "GLOBAL_ATOMIC_XOR", "global_atomic_xor_b32", true>; 2323defm GLOBAL_ATOMIC_INC_U32 : FLAT_Real_GlblAtomics_gfx11<0x03f, "GLOBAL_ATOMIC_INC", "global_atomic_inc_u32", true>; 2324defm GLOBAL_ATOMIC_DEC_U32 : FLAT_Real_GlblAtomics_gfx11<0x040, "GLOBAL_ATOMIC_DEC", "global_atomic_dec_u32", true>; 2325defm GLOBAL_ATOMIC_SWAP_B64 : FLAT_Real_GlblAtomics_gfx11<0x041, "GLOBAL_ATOMIC_SWAP_X2", "global_atomic_swap_b64", true>; 2326defm GLOBAL_ATOMIC_CMPSWAP_B64 : FLAT_Real_GlblAtomics_gfx11<0x042, "GLOBAL_ATOMIC_CMPSWAP_X2", "global_atomic_cmpswap_b64", true>; 2327defm GLOBAL_ATOMIC_ADD_U64 : FLAT_Real_GlblAtomics_gfx11<0x043, "GLOBAL_ATOMIC_ADD_X2", "global_atomic_add_u64", true>; 2328defm GLOBAL_ATOMIC_SUB_U64 : FLAT_Real_GlblAtomics_gfx11<0x044, "GLOBAL_ATOMIC_SUB_X2", "global_atomic_sub_u64", true>; 2329defm GLOBAL_ATOMIC_MIN_I64 : FLAT_Real_GlblAtomics_gfx11<0x045, "GLOBAL_ATOMIC_SMIN_X2", "global_atomic_min_i64", true>; 2330defm GLOBAL_ATOMIC_MIN_U64 : FLAT_Real_GlblAtomics_gfx11<0x046, "GLOBAL_ATOMIC_UMIN_X2", "global_atomic_min_u64", true>; 2331defm GLOBAL_ATOMIC_MAX_I64 : FLAT_Real_GlblAtomics_gfx11<0x047, "GLOBAL_ATOMIC_SMAX_X2", "global_atomic_max_i64", true>; 2332defm GLOBAL_ATOMIC_MAX_U64 : FLAT_Real_GlblAtomics_gfx11<0x048, "GLOBAL_ATOMIC_UMAX_X2", "global_atomic_max_u64", true>; 2333defm GLOBAL_ATOMIC_AND_B64 : FLAT_Real_GlblAtomics_gfx11<0x049, "GLOBAL_ATOMIC_AND_X2", "global_atomic_and_b64", true>; 2334defm GLOBAL_ATOMIC_OR_B64 : FLAT_Real_GlblAtomics_gfx11<0x04a, "GLOBAL_ATOMIC_OR_X2", "global_atomic_or_b64", true>; 2335defm GLOBAL_ATOMIC_XOR_B64 : FLAT_Real_GlblAtomics_gfx11<0x04b, "GLOBAL_ATOMIC_XOR_X2", "global_atomic_xor_b64", true>; 2336defm GLOBAL_ATOMIC_INC_U64 : FLAT_Real_GlblAtomics_gfx11<0x04c, "GLOBAL_ATOMIC_INC_X2", "global_atomic_inc_u64", true>; 2337defm GLOBAL_ATOMIC_DEC_U64 : FLAT_Real_GlblAtomics_gfx11<0x04d, "GLOBAL_ATOMIC_DEC_X2", "global_atomic_dec_u64", true>; 2338defm GLOBAL_ATOMIC_CMPSWAP_F32 : FLAT_Real_GlblAtomics_gfx11<0x050, "GLOBAL_ATOMIC_FCMPSWAP", "global_atomic_cmpswap_f32">; 2339defm GLOBAL_ATOMIC_MIN_F32 : FLAT_Real_GlblAtomics_gfx11<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_f32">; 2340defm GLOBAL_ATOMIC_MAX_F32 : FLAT_Real_GlblAtomics_gfx11<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_f32">; 2341defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Real_GlblAtomics_gfx11<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">; 2342 2343// ENC_FLAT_SCRATCH. 2344defm SCRATCH_LOAD_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>; 2345defm SCRATCH_LOAD_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x11, "SCRATCH_LOAD_SBYTE", "scratch_load_i8", true>; 2346defm SCRATCH_LOAD_U16 : FLAT_Real_ScratchAllAddr_gfx11<0x12, "SCRATCH_LOAD_USHORT", "scratch_load_u16", true>; 2347defm SCRATCH_LOAD_I16 : FLAT_Real_ScratchAllAddr_gfx11<0x13, "SCRATCH_LOAD_SSHORT", "scratch_load_i16", true>; 2348defm SCRATCH_LOAD_B32 : FLAT_Real_ScratchAllAddr_gfx11<0x14, "SCRATCH_LOAD_DWORD", "scratch_load_b32", true>; 2349defm SCRATCH_LOAD_B64 : FLAT_Real_ScratchAllAddr_gfx11<0x15, "SCRATCH_LOAD_DWORDX2", "scratch_load_b64", true>; 2350defm SCRATCH_LOAD_B96 : FLAT_Real_ScratchAllAddr_gfx11<0x16, "SCRATCH_LOAD_DWORDX3", "scratch_load_b96", true>; 2351defm SCRATCH_LOAD_B128 : FLAT_Real_ScratchAllAddr_gfx11<0x17, "SCRATCH_LOAD_DWORDX4", "scratch_load_b128", true>; 2352defm SCRATCH_STORE_B8 : FLAT_Real_ScratchAllAddr_gfx11<0x18, "SCRATCH_STORE_BYTE", "scratch_store_b8", true>; 2353defm SCRATCH_STORE_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x19, "SCRATCH_STORE_SHORT", "scratch_store_b16", true>; 2354defm SCRATCH_STORE_B32 : FLAT_Real_ScratchAllAddr_gfx11<0x1a, "SCRATCH_STORE_DWORD", "scratch_store_b32", true>; 2355defm SCRATCH_STORE_B64 : FLAT_Real_ScratchAllAddr_gfx11<0x1b, "SCRATCH_STORE_DWORDX2", "scratch_store_b64", true>; 2356defm SCRATCH_STORE_B96 : FLAT_Real_ScratchAllAddr_gfx11<0x1c, "SCRATCH_STORE_DWORDX3", "scratch_store_b96", true>; 2357defm SCRATCH_STORE_B128 : FLAT_Real_ScratchAllAddr_gfx11<0x1d, "SCRATCH_STORE_DWORDX4", "scratch_store_b128", true>; 2358defm SCRATCH_LOAD_D16_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x1e, "SCRATCH_LOAD_UBYTE_D16", "scratch_load_d16_u8">; 2359defm SCRATCH_LOAD_D16_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x1f, "SCRATCH_LOAD_SBYTE_D16", "scratch_load_d16_i8">; 2360defm SCRATCH_LOAD_D16_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x20, "SCRATCH_LOAD_SHORT_D16", "scratch_load_d16_b16">; 2361defm SCRATCH_LOAD_D16_HI_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x21, "SCRATCH_LOAD_UBYTE_D16_HI", "scratch_load_d16_hi_u8">; 2362defm SCRATCH_LOAD_D16_HI_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x22, "SCRATCH_LOAD_SBYTE_D16_HI", "scratch_load_d16_hi_i8">; 2363defm SCRATCH_LOAD_D16_HI_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x23, "SCRATCH_LOAD_SHORT_D16_HI", "scratch_load_d16_hi_b16">; 2364defm SCRATCH_STORE_D16_HI_B8 : FLAT_Real_ScratchAllAddr_gfx11<0x24, "SCRATCH_STORE_BYTE_D16_HI", "scratch_store_d16_hi_b8">; 2365defm SCRATCH_STORE_D16_HI_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x25, "SCRATCH_STORE_SHORT_D16_HI", "scratch_store_d16_hi_b16">; 2366