1//===-- BUFInstructions.td - Buffer Instruction Definitions ---------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def MUBUFAddr64 : ComplexPattern<iPTR, 4, "SelectMUBUFAddr64">; 10def MUBUFOffset : ComplexPattern<iPTR, 3, "SelectMUBUFOffset">; 11 12def MUBUFScratchOffen : ComplexPattern<iPTR, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>; 13def MUBUFScratchOffset : ComplexPattern<iPTR, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>; 14 15def BUFAddrKind { 16 int Offset = 0; 17 int OffEn = 1; 18 int IdxEn = 2; 19 int BothEn = 3; 20 int Addr64 = 4; 21} 22 23class getAddrName<int addrKind> { 24 string ret = 25 !if(!eq(addrKind, BUFAddrKind.Offset), "offset", 26 !if(!eq(addrKind, BUFAddrKind.OffEn), "offen", 27 !if(!eq(addrKind, BUFAddrKind.IdxEn), "idxen", 28 !if(!eq(addrKind, BUFAddrKind.BothEn), "bothen", 29 !if(!eq(addrKind, BUFAddrKind.Addr64), "addr64", 30 ""))))); 31} 32 33class MUBUFAddr64Table <bit is_addr64, string Name> { 34 bit IsAddr64 = is_addr64; 35 string OpName = Name; 36} 37 38class MUBUFLdsTable <bit is_lds, string Name> { 39 bit IsLds = is_lds; 40 string OpName = Name; 41} 42 43class MTBUFAddr64Table <bit is_addr64, string Name> { 44 bit IsAddr64 = is_addr64; 45 string OpName = Name; 46} 47 48//===----------------------------------------------------------------------===// 49// MTBUF classes 50//===----------------------------------------------------------------------===// 51 52class MTBUFGetBaseOpcode<string Op> { 53 string ret = !subst("FORMAT_XY", "FORMAT_X", 54 !subst("FORMAT_XYZ", "FORMAT_X", 55 !subst("FORMAT_XYZW", "FORMAT_X", Op))); 56} 57 58 59class MTBUF_Pseudo <string opName, dag outs, dag ins, 60 string asmOps, list<dag> pattern=[]> : 61 InstSI<outs, ins, "", pattern>, 62 SIMCInstr<opName, SIEncodingFamily.NONE> { 63 64 let isPseudo = 1; 65 let isCodeGenOnly = 1; 66 let Size = 8; 67 let UseNamedOperandTable = 1; 68 69 string Mnemonic = opName; 70 string AsmOperands = asmOps; 71 72 Instruction Opcode = !cast<Instruction>(NAME); 73 Instruction BaseOpcode = !cast<Instruction>(MTBUFGetBaseOpcode<NAME>.ret); 74 75 let VM_CNT = 1; 76 let EXP_CNT = 1; 77 let MTBUF = 1; 78 let Uses = [EXEC]; 79 let hasSideEffects = 0; 80 let SchedRW = [WriteVMEM]; 81 82 let AsmMatchConverter = "cvtMtbuf"; 83 84 bits<1> offen = 0; 85 bits<1> idxen = 0; 86 bits<1> addr64 = 0; 87 bits<1> has_vdata = 1; 88 bits<1> has_vaddr = 1; 89 bits<1> has_glc = 1; 90 bits<1> has_dlc = 1; 91 bits<1> glc_value = 0; // the value for glc if no such operand 92 bits<1> dlc_value = 0; // the value for dlc if no such operand 93 bits<1> has_srsrc = 1; 94 bits<1> has_soffset = 1; 95 bits<1> has_offset = 1; 96 bits<1> has_slc = 1; 97 bits<1> has_tfe = 1; 98 bits<4> elements = 0; 99 bits<1> has_sccb = 1; 100 bits<1> sccb_value = 0; 101} 102 103class MTBUF_Real <MTBUF_Pseudo ps> : 104 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> { 105 106 let isPseudo = 0; 107 let isCodeGenOnly = 0; 108 109 let VM_CNT = 1; 110 let EXP_CNT = 1; 111 let MTBUF = 1; 112 113 // copy relevant pseudo op flags 114 let UseNamedOperandTable = ps.UseNamedOperandTable; 115 let SubtargetPredicate = ps.SubtargetPredicate; 116 let AsmMatchConverter = ps.AsmMatchConverter; 117 let Constraints = ps.Constraints; 118 let DisableEncoding = ps.DisableEncoding; 119 let TSFlags = ps.TSFlags; 120 let SchedRW = ps.SchedRW; 121 let mayLoad = ps.mayLoad; 122 let mayStore = ps.mayStore; 123 let IsAtomicRet = ps.IsAtomicRet; 124 let IsAtomicNoRet = ps.IsAtomicNoRet; 125 126 bits<12> offset; 127 bits<5> cpol; 128 bits<7> format; 129 bits<8> vaddr; 130 bits<10> vdata; 131 bits<7> srsrc; 132 bits<1> tfe; 133 bits<8> soffset; 134 135 bits<4> dfmt = format{3-0}; 136 bits<3> nfmt = format{6-4}; 137 138 // GFX90A+ only: instruction uses AccVGPR for data 139 // Bit superceedes tfe. 140 bits<1> acc = !if(ps.has_vdata, vdata{9}, 0); 141} 142 143class getMTBUFInsDA<list<RegisterClass> vdataList, 144 list<RegisterClass> vaddrList=[]> { 145 RegisterClass vdataClass = !if(!empty(vdataList), ?, !head(vdataList)); 146 RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList)); 147 RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret; 148 dag InsNoData = !if(!empty(vaddrList), 149 (ins SReg_128:$srsrc, SCSrc_b32:$soffset, 150 offset:$offset, FORMAT:$format, CPol:$cpol, TFE:$tfe, SWZ:$swz), 151 (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, 152 offset:$offset, FORMAT:$format, CPol:$cpol, TFE:$tfe, SWZ:$swz) 153 ); 154 dag InsData = !if(!empty(vaddrList), 155 (ins vdata_op:$vdata, SReg_128:$srsrc, 156 SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, CPol:$cpol, 157 TFE:$tfe, SWZ:$swz), 158 (ins vdata_op:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, 159 SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, CPol:$cpol, 160 TFE:$tfe, SWZ:$swz) 161 ); 162 dag ret = !if(!empty(vdataList), InsNoData, InsData); 163} 164 165class getMTBUFIns<int addrKind, list<RegisterClass> vdataList=[]> { 166 dag ret = 167 !if(!eq(addrKind, BUFAddrKind.Offset), getMTBUFInsDA<vdataList>.ret, 168 !if(!eq(addrKind, BUFAddrKind.OffEn), getMTBUFInsDA<vdataList, [VGPR_32]>.ret, 169 !if(!eq(addrKind, BUFAddrKind.IdxEn), getMTBUFInsDA<vdataList, [VGPR_32]>.ret, 170 !if(!eq(addrKind, BUFAddrKind.BothEn), getMTBUFInsDA<vdataList, [VReg_64]>.ret, 171 !if(!eq(addrKind, BUFAddrKind.Addr64), getMTBUFInsDA<vdataList, [VReg_64]>.ret, 172 (ins)))))); 173} 174 175class getMTBUFAsmOps<int addrKind> { 176 string Pfx = 177 !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc,$format $soffset", 178 !if(!eq(addrKind, BUFAddrKind.OffEn), 179 "$vaddr, $srsrc,$format $soffset offen", 180 !if(!eq(addrKind, BUFAddrKind.IdxEn), 181 "$vaddr, $srsrc,$format $soffset idxen", 182 !if(!eq(addrKind, BUFAddrKind.BothEn), 183 "$vaddr, $srsrc,$format $soffset idxen offen", 184 !if(!eq(addrKind, BUFAddrKind.Addr64), 185 "$vaddr, $srsrc,$format $soffset addr64", 186 ""))))); 187 string ret = Pfx # "$offset"; 188} 189 190class MTBUF_SetupAddr<int addrKind> { 191 bits<1> offen = !or(!eq(addrKind, BUFAddrKind.OffEn), 192 !eq(addrKind, BUFAddrKind.BothEn)); 193 194 bits<1> idxen = !or(!eq(addrKind, BUFAddrKind.IdxEn), 195 !eq(addrKind, BUFAddrKind.BothEn)); 196 197 bits<1> addr64 = !eq(addrKind, BUFAddrKind.Addr64); 198 199 bits<1> has_vaddr = !ne(addrKind, BUFAddrKind.Offset); 200} 201 202class MTBUF_Load_Pseudo <string opName, 203 int addrKind, 204 RegisterClass vdataClass, 205 int elems, 206 list<dag> pattern=[], 207 // Workaround bug bz30254 208 int addrKindCopy = addrKind> 209 : MTBUF_Pseudo<opName, 210 (outs getLdStRegisterOperand<vdataClass>.ret:$vdata), 211 getMTBUFIns<addrKindCopy>.ret, 212 " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$cpol$tfe$swz", 213 pattern>, 214 MTBUF_SetupAddr<addrKindCopy> { 215 let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; 216 let mayLoad = 1; 217 let mayStore = 0; 218 let elements = elems; 219} 220 221multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass, 222 int elems> { 223 224 def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>, 225 MTBUFAddr64Table<0, NAME>; 226 227 def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, elems>, 228 MTBUFAddr64Table<1, NAME>; 229 230 def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>; 231 def _IDXEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>; 232 def _BOTHEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>; 233 234 let DisableWQM = 1 in { 235 def _OFFSET_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>; 236 def _OFFEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>; 237 def _IDXEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>; 238 def _BOTHEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>; 239 } 240} 241 242class MTBUF_Store_Pseudo <string opName, 243 int addrKind, 244 RegisterClass vdataClass, 245 int elems, 246 list<dag> pattern=[], 247 // Workaround bug bz30254 248 int addrKindCopy = addrKind, 249 RegisterClass vdataClassCopy = vdataClass> 250 : MTBUF_Pseudo<opName, 251 (outs), 252 getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret, 253 " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$cpol$tfe$swz", 254 pattern>, 255 MTBUF_SetupAddr<addrKindCopy> { 256 let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; 257 let mayLoad = 0; 258 let mayStore = 1; 259 let elements = elems; 260} 261 262multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass, 263 int elems> { 264 265 def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>, 266 MTBUFAddr64Table<0, NAME>; 267 268 def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, elems>, 269 MTBUFAddr64Table<1, NAME>; 270 271 def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>; 272 def _IDXEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>; 273 def _BOTHEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>; 274 275 let DisableWQM = 1 in { 276 def _OFFSET_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>; 277 def _OFFEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>; 278 def _IDXEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>; 279 def _BOTHEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>; 280 } 281} 282 283 284//===----------------------------------------------------------------------===// 285// MUBUF classes 286//===----------------------------------------------------------------------===// 287 288class MUBUFGetBaseOpcode<string Op> { 289 string ret = !subst("DWORDX2", "DWORD", 290 !subst("DWORDX3", "DWORD", 291 !subst("DWORDX4", "DWORD", Op))); 292} 293 294class MUBUF_Pseudo <string opName, dag outs, dag ins, 295 string asmOps, list<dag> pattern=[]> : 296 InstSI<outs, ins, "", pattern>, 297 SIMCInstr<opName, SIEncodingFamily.NONE> { 298 299 let isPseudo = 1; 300 let isCodeGenOnly = 1; 301 let Size = 8; 302 let UseNamedOperandTable = 1; 303 304 string Mnemonic = opName; 305 string AsmOperands = asmOps; 306 307 Instruction Opcode = !cast<Instruction>(NAME); 308 Instruction BaseOpcode = !cast<Instruction>(MUBUFGetBaseOpcode<NAME>.ret); 309 310 let VM_CNT = 1; 311 let EXP_CNT = 1; 312 let MUBUF = 1; 313 let Uses = [EXEC]; 314 let hasSideEffects = 0; 315 let SchedRW = [WriteVMEM]; 316 317 let AsmMatchConverter = "cvtMubuf"; 318 319 bits<1> offen = 0; 320 bits<1> idxen = 0; 321 bits<1> addr64 = 0; 322 bits<1> lds = 0; 323 bits<1> has_vdata = 1; 324 bits<1> has_vaddr = 1; 325 bits<1> has_glc = 1; 326 bits<1> has_dlc = 1; 327 bits<1> glc_value = 0; // the value for glc if no such operand 328 bits<1> dlc_value = 0; // the value for dlc if no such operand 329 bits<1> has_srsrc = 1; 330 bits<1> has_soffset = 1; 331 bits<1> has_offset = 1; 332 bits<1> has_slc = 1; 333 bits<1> has_tfe = 1; 334 bits<4> elements = 0; 335 bits<1> has_sccb = 1; 336 bits<1> sccb_value = 0; 337 bits<1> IsBufferInv = 0; 338} 339 340class MUBUF_Real <MUBUF_Pseudo ps> : 341 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> { 342 343 let isPseudo = 0; 344 let isCodeGenOnly = 0; 345 346 let VM_CNT = 1; 347 let EXP_CNT = 1; 348 let MUBUF = 1; 349 350 // copy relevant pseudo op flags 351 let SubtargetPredicate = ps.SubtargetPredicate; 352 let AsmMatchConverter = ps.AsmMatchConverter; 353 let OtherPredicates = ps.OtherPredicates; 354 let Constraints = ps.Constraints; 355 let DisableEncoding = ps.DisableEncoding; 356 let TSFlags = ps.TSFlags; 357 let UseNamedOperandTable = ps.UseNamedOperandTable; 358 let SchedRW = ps.SchedRW; 359 let mayLoad = ps.mayLoad; 360 let mayStore = ps.mayStore; 361 let IsAtomicRet = ps.IsAtomicRet; 362 let IsAtomicNoRet = ps.IsAtomicNoRet; 363 364 bits<12> offset; 365 bits<5> cpol; 366 bits<8> vaddr; 367 bits<10> vdata; 368 bits<7> srsrc; 369 bits<1> tfe; 370 bits<8> soffset; 371 372 // GFX90A+ only: instruction uses AccVGPR for data 373 // Bit superceedes tfe. 374 bits<1> acc = !if(ps.has_vdata, vdata{9}, 0); 375} 376 377 378// For cache invalidation instructions. 379class MUBUF_Invalidate <string opName, SDPatternOperator node = null_frag> : 380 MUBUF_Pseudo<opName, (outs), (ins), "", [(node)]> { 381 382 let AsmMatchConverter = ""; 383 384 let hasSideEffects = 1; 385 let mayLoad = 0; 386 let mayStore = 0; 387 388 let IsBufferInv = 1; 389 // Set everything else to 0. 390 let offen = 0; 391 let idxen = 0; 392 let addr64 = 0; 393 let has_vdata = 0; 394 let has_vaddr = 0; 395 let has_glc = 0; 396 let has_dlc = 0; 397 let glc_value = 0; 398 let dlc_value = 0; 399 let has_srsrc = 0; 400 let has_soffset = 0; 401 let has_offset = 0; 402 let has_slc = 0; 403 let has_tfe = 0; 404 let has_sccb = 0; 405 let sccb_value = 0; 406} 407 408class getMUBUFInsDA<list<RegisterClass> vdataList, 409 list<RegisterClass> vaddrList=[], 410 bit isLds = 0> { 411 RegisterClass vdataClass = !if(!empty(vdataList), ?, !head(vdataList)); 412 RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList)); 413 RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret; 414 dag InsNoData = !if(!empty(vaddrList), 415 (ins SReg_128:$srsrc, SCSrc_b32:$soffset, 416 offset:$offset, CPol_0:$cpol), 417 (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, 418 offset:$offset, CPol_0:$cpol) 419 ); 420 dag InsData = !if(!empty(vaddrList), 421 (ins vdata_op:$vdata, SReg_128:$srsrc, 422 SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol), 423 (ins vdata_op:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, 424 SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol) 425 ); 426 dag ret = !con( 427 !if(!empty(vdataList), InsNoData, InsData), 428 !if(isLds, (ins SWZ_0:$swz), (ins TFE_0:$tfe, SWZ_0:$swz)) 429 ); 430} 431 432class getMUBUFElements<ValueType vt> { 433 int ret = 434 !if(!eq(vt, f16), 1, 435 !if(!eq(vt, v2f16), 2, 436 !if(!eq(vt, v3f16), 3, 437 !if(!eq(vt, v4f16), 4, 438 !if(!eq(vt.Size, 32), 1, 439 !if(!eq(vt.Size, 64), 2, 440 !if(!eq(vt.Size, 96), 3, 441 !if(!eq(vt.Size, 128), 4, 0) 442 ) 443 ) 444 ) 445 ) 446 ) 447 ) 448 ); 449} 450 451class getMUBUFIns<int addrKind, list<RegisterClass> vdataList=[], bit isLds = 0> { 452 dag ret = 453 !if(!eq(addrKind, BUFAddrKind.Offset), getMUBUFInsDA<vdataList, [], isLds>.ret, 454 !if(!eq(addrKind, BUFAddrKind.OffEn), getMUBUFInsDA<vdataList, [VGPR_32], isLds>.ret, 455 !if(!eq(addrKind, BUFAddrKind.IdxEn), getMUBUFInsDA<vdataList, [VGPR_32], isLds>.ret, 456 !if(!eq(addrKind, BUFAddrKind.BothEn), getMUBUFInsDA<vdataList, [VReg_64], isLds>.ret, 457 !if(!eq(addrKind, BUFAddrKind.Addr64), getMUBUFInsDA<vdataList, [VReg_64], isLds>.ret, 458 (ins)))))); 459} 460 461class getMUBUFAsmOps<int addrKind> { 462 string Pfx = 463 !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $soffset", 464 !if(!eq(addrKind, BUFAddrKind.OffEn), "$vaddr, $srsrc, $soffset offen", 465 !if(!eq(addrKind, BUFAddrKind.IdxEn), "$vaddr, $srsrc, $soffset idxen", 466 !if(!eq(addrKind, BUFAddrKind.BothEn), "$vaddr, $srsrc, $soffset idxen offen", 467 !if(!eq(addrKind, BUFAddrKind.Addr64), "$vaddr, $srsrc, $soffset addr64", 468 ""))))); 469 string ret = Pfx # "$offset"; 470} 471 472class MUBUF_SetupAddr<int addrKind> { 473 bits<1> offen = !or(!eq(addrKind, BUFAddrKind.OffEn), 474 !eq(addrKind, BUFAddrKind.BothEn)); 475 476 bits<1> idxen = !or(!eq(addrKind, BUFAddrKind.IdxEn), 477 !eq(addrKind, BUFAddrKind.BothEn)); 478 479 bits<1> addr64 = !eq(addrKind, BUFAddrKind.Addr64); 480 481 bits<1> has_vaddr = !ne(addrKind, BUFAddrKind.Offset); 482} 483 484class MUBUF_Load_Pseudo <string opName, 485 int addrKind, 486 ValueType vdata_vt, 487 bit HasTiedDest = 0, 488 bit isLds = 0, 489 list<dag> pattern=[], 490 // Workaround bug bz30254 491 int addrKindCopy = addrKind, 492 RegisterClass vdata_rc = getVregSrcForVT<vdata_vt>.ret, 493 RegisterOperand vdata_op = getLdStRegisterOperand<vdata_rc>.ret> 494 : MUBUF_Pseudo<opName, 495 (outs vdata_op:$vdata), 496 !con(getMUBUFIns<addrKindCopy, [], isLds>.ret, 497 !if(HasTiedDest, (ins vdata_op:$vdata_in), (ins))), 498 " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol" # 499 !if(isLds, " lds", "$tfe") # "$swz", 500 pattern>, 501 MUBUF_SetupAddr<addrKindCopy> { 502 let PseudoInstr = opName # !if(isLds, "_lds", "") # 503 "_" # getAddrName<addrKindCopy>.ret; 504 let AsmMatchConverter = !if(isLds, "cvtMubufLds", "cvtMubuf"); 505 506 let Constraints = !if(HasTiedDest, "$vdata = $vdata_in", ""); 507 let mayLoad = 1; 508 let mayStore = 0; 509 let maybeAtomic = 1; 510 let Uses = !if(isLds, [EXEC, M0], [EXEC]); 511 let has_tfe = !not(isLds); 512 let lds = isLds; 513 let elements = getMUBUFElements<vdata_vt>.ret; 514} 515 516class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat < 517 (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset))), 518 (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset)) 519>; 520 521class MUBUF_Addr64_Load_Pat <Instruction inst, 522 ValueType load_vt = i32, 523 SDPatternOperator ld = null_frag> : Pat < 524 (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset))), 525 (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset)) 526>; 527 528multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> { 529 def : MUBUF_Offset_Load_Pat<!cast<Instruction>(BaseInst#"_OFFSET"), load_vt, ld>; 530 def : MUBUF_Addr64_Load_Pat<!cast<Instruction>(BaseInst#"_ADDR64"), load_vt, ld>; 531} 532 533 534// FIXME: tfe can't be an operand because it requires a separate 535// opcode because it needs an N+1 register class dest register. 536multiclass MUBUF_Pseudo_Loads<string opName, 537 ValueType load_vt = i32, 538 bit TiedDest = 0, 539 bit isLds = 0> { 540 541 defvar legal_load_vt = !if(!eq(load_vt, v3f16), v4f16, load_vt); 542 543 def _OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, legal_load_vt, TiedDest, isLds>, 544 MUBUFAddr64Table<0, NAME # !if(isLds, "_LDS", "")>; 545 546 def _ADDR64 : MUBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, legal_load_vt, TiedDest, isLds>, 547 MUBUFAddr64Table<1, NAME # !if(isLds, "_LDS", "")>; 548 549 def _OFFEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, legal_load_vt, TiedDest, isLds>; 550 def _IDXEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, legal_load_vt, TiedDest, isLds>; 551 def _BOTHEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, legal_load_vt, TiedDest, isLds>; 552 553 let DisableWQM = 1 in { 554 def _OFFSET_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, legal_load_vt, TiedDest, isLds>; 555 def _OFFEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, legal_load_vt, TiedDest, isLds>; 556 def _IDXEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, legal_load_vt, TiedDest, isLds>; 557 def _BOTHEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, legal_load_vt, TiedDest, isLds>; 558 } 559} 560 561multiclass MUBUF_Pseudo_Loads_Lds<string opName, ValueType load_vt = i32> { 562 defm NAME : MUBUF_Pseudo_Loads<opName, load_vt>; 563 defm _LDS : MUBUF_Pseudo_Loads<opName, load_vt, 0, 1>; 564} 565 566class MUBUF_Store_Pseudo <string opName, 567 int addrKind, 568 ValueType store_vt, 569 list<dag> pattern=[], 570 // Workaround bug bz30254 571 int addrKindCopy = addrKind> 572 : MUBUF_Pseudo<opName, 573 (outs), 574 getMUBUFIns<addrKindCopy, [getVregSrcForVT<store_vt>.ret]>.ret, 575 " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol$tfe$swz", 576 pattern>, 577 MUBUF_SetupAddr<addrKindCopy> { 578 let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; 579 let mayLoad = 0; 580 let mayStore = 1; 581 let maybeAtomic = 1; 582 let elements = getMUBUFElements<store_vt>.ret; 583} 584 585multiclass MUBUF_Pseudo_Stores<string opName, 586 ValueType store_vt = i32, 587 SDPatternOperator st = null_frag> { 588 589 defvar legal_store_vt = !if(!eq(store_vt, v3f16), v4f16, store_vt); 590 591 def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, legal_store_vt, 592 [(st legal_store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, 593 i16:$offset))]>, 594 MUBUFAddr64Table<0, NAME>; 595 596 def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, legal_store_vt, 597 [(st legal_store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, 598 i16:$offset))]>, 599 MUBUFAddr64Table<1, NAME>; 600 601 def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, legal_store_vt>; 602 def _IDXEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, legal_store_vt>; 603 def _BOTHEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, legal_store_vt>; 604 605 let DisableWQM = 1 in { 606 def _OFFSET_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, legal_store_vt>; 607 def _OFFEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, legal_store_vt>; 608 def _IDXEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, legal_store_vt>; 609 def _BOTHEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, legal_store_vt>; 610 } 611} 612 613class MUBUF_Pseudo_Store_Lds<string opName> 614 : MUBUF_Pseudo<opName, 615 (outs), 616 (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol:$cpol, SWZ:$swz), 617 " $srsrc, $soffset$offset lds$cpol$swz"> { 618 let mayLoad = 0; 619 let mayStore = 1; 620 let maybeAtomic = 1; 621 622 let has_vdata = 0; 623 let has_vaddr = 0; 624 let has_tfe = 0; 625 let lds = 1; 626 627 let Uses = [EXEC, M0]; 628 let AsmMatchConverter = "cvtMubufLds"; 629} 630 631class getMUBUFAtomicInsDA<RegisterClass vdataClass, bit vdata_in, 632 list<RegisterClass> vaddrList=[]> { 633 RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList)); 634 RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret; 635 dag ret = !if(vdata_in, 636 !if(!empty(vaddrList), 637 (ins vdata_op:$vdata_in, 638 SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_GLC1:$cpol), 639 (ins vdata_op:$vdata_in, vaddrClass:$vaddr, 640 SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_GLC1:$cpol) 641 ), 642 !if(!empty(vaddrList), 643 (ins vdata_op:$vdata, 644 SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol), 645 (ins vdata_op:$vdata, vaddrClass:$vaddr, 646 SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol) 647 )); 648} 649 650class getMUBUFAtomicIns<int addrKind, 651 RegisterClass vdataClass, 652 bit vdata_in, 653 // Workaround bug bz30254 654 RegisterClass vdataClassCopy=vdataClass> { 655 dag ret = 656 !if(!eq(addrKind, BUFAddrKind.Offset), 657 getMUBUFAtomicInsDA<vdataClassCopy, vdata_in>.ret, 658 !if(!eq(addrKind, BUFAddrKind.OffEn), 659 getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, [VGPR_32]>.ret, 660 !if(!eq(addrKind, BUFAddrKind.IdxEn), 661 getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, [VGPR_32]>.ret, 662 !if(!eq(addrKind, BUFAddrKind.BothEn), 663 getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, [VReg_64]>.ret, 664 !if(!eq(addrKind, BUFAddrKind.Addr64), 665 getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, [VReg_64]>.ret, 666 (ins)))))); 667} 668 669class MUBUF_Atomic_Pseudo<string opName, 670 int addrKind, 671 dag outs, 672 dag ins, 673 string asmOps, 674 list<dag> pattern=[], 675 // Workaround bug bz30254 676 int addrKindCopy = addrKind> 677 : MUBUF_Pseudo<opName, outs, ins, asmOps, pattern>, 678 MUBUF_SetupAddr<addrKindCopy> { 679 let mayStore = 1; 680 let mayLoad = 1; 681 let hasPostISelHook = 1; 682 let hasSideEffects = 1; 683 let DisableWQM = 1; 684 let has_glc = 0; 685 let has_dlc = 0; 686 let has_tfe = 0; 687 let has_sccb = 1; 688 let maybeAtomic = 1; 689 let AsmMatchConverter = "cvtMubufAtomic"; 690} 691 692class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind, 693 RegisterClass vdataClass, 694 list<dag> pattern=[], 695 // Workaround bug bz30254 696 int addrKindCopy = addrKind, 697 RegisterClass vdataClassCopy = vdataClass> 698 : MUBUF_Atomic_Pseudo<opName, addrKindCopy, 699 (outs), 700 getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 0>.ret, 701 " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol", 702 pattern>, 703 AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 0> { 704 let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; 705 let glc_value = 0; 706 let dlc_value = 0; 707 let sccb_value = 0; 708 let IsAtomicNoRet = 1; 709} 710 711class MUBUF_AtomicRet_Pseudo<string opName, int addrKind, 712 RegisterClass vdataClass, 713 list<dag> pattern=[], 714 // Workaround bug bz30254 715 int addrKindCopy = addrKind, 716 RegisterClass vdataClassCopy = vdataClass, 717 RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret> 718 : MUBUF_Atomic_Pseudo<opName, addrKindCopy, 719 (outs vdata_op:$vdata), 720 getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 1>.ret, 721 " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol", 722 pattern>, 723 AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 1> { 724 let PseudoInstr = opName # "_rtn_" # getAddrName<addrKindCopy>.ret; 725 let glc_value = 1; 726 let dlc_value = 0; 727 let sccb_value = 0; 728 let IsAtomicRet = 1; 729 let Constraints = "$vdata = $vdata_in"; 730 let DisableEncoding = "$vdata_in"; 731} 732 733multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName, 734 RegisterClass vdataClass, 735 ValueType vdataType, 736 bit isFP = isFloatType<vdataType>.ret> { 737 let FPAtomic = isFP in 738 def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass>, 739 MUBUFAddr64Table <0, NAME>; 740 741 let FPAtomic = isFP in 742 def _ADDR64 : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass>, 743 MUBUFAddr64Table <1, NAME>; 744 745 let FPAtomic = isFP in 746 def _OFFEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>; 747 748 let FPAtomic = isFP in 749 750 def _IDXEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>; 751 752 let FPAtomic = isFP in 753 def _BOTHEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>; 754} 755 756multiclass MUBUF_Pseudo_Atomics_RTN <string opName, 757 RegisterClass vdataClass, 758 ValueType vdataType, 759 SDPatternOperator atomic, 760 bit isFP = isFloatType<vdataType>.ret> { 761 let FPAtomic = isFP in 762 def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass, 763 [(set vdataType:$vdata, 764 (atomic (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset), 765 vdataType:$vdata_in))]>, 766 MUBUFAddr64Table <0, NAME # "_RTN">; 767 768 let FPAtomic = isFP in 769 def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, 770 [(set vdataType:$vdata, 771 (atomic (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset), 772 vdataType:$vdata_in))]>, 773 MUBUFAddr64Table <1, NAME # "_RTN">; 774 775 let FPAtomic = isFP in 776 def _OFFEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>; 777 778 let FPAtomic = isFP in 779 def _IDXEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>; 780 781 let FPAtomic = isFP in 782 def _BOTHEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>; 783} 784 785multiclass MUBUF_Pseudo_Atomics <string opName, 786 RegisterClass vdataClass, 787 ValueType vdataType, 788 SDPatternOperator atomic> : 789 MUBUF_Pseudo_Atomics_NO_RTN<opName, vdataClass, vdataType>, 790 MUBUF_Pseudo_Atomics_RTN<opName, vdataClass, vdataType, atomic>; 791 792 793//===----------------------------------------------------------------------===// 794// MUBUF Instructions 795//===----------------------------------------------------------------------===// 796 797defm BUFFER_LOAD_FORMAT_X : MUBUF_Pseudo_Loads_Lds < 798 "buffer_load_format_x", f32 799>; 800defm BUFFER_LOAD_FORMAT_XY : MUBUF_Pseudo_Loads < 801 "buffer_load_format_xy", v2f32 802>; 803defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Pseudo_Loads < 804 "buffer_load_format_xyz", v3f32 805>; 806defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Pseudo_Loads < 807 "buffer_load_format_xyzw", v4f32 808>; 809defm BUFFER_STORE_FORMAT_X : MUBUF_Pseudo_Stores < 810 "buffer_store_format_x", f32 811>; 812defm BUFFER_STORE_FORMAT_XY : MUBUF_Pseudo_Stores < 813 "buffer_store_format_xy", v2f32 814>; 815defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Pseudo_Stores < 816 "buffer_store_format_xyz", v3f32 817>; 818defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Pseudo_Stores < 819 "buffer_store_format_xyzw", v4f32 820>; 821 822let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in { 823 defm BUFFER_LOAD_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Loads < 824 "buffer_load_format_d16_x", i32 825 >; 826 defm BUFFER_LOAD_FORMAT_D16_XY_gfx80 : MUBUF_Pseudo_Loads < 827 "buffer_load_format_d16_xy", v2i32 828 >; 829 defm BUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MUBUF_Pseudo_Loads < 830 "buffer_load_format_d16_xyz", v3i32 831 >; 832 defm BUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MUBUF_Pseudo_Loads < 833 "buffer_load_format_d16_xyzw", v4i32 834 >; 835 defm BUFFER_STORE_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Stores < 836 "buffer_store_format_d16_x", i32 837 >; 838 defm BUFFER_STORE_FORMAT_D16_XY_gfx80 : MUBUF_Pseudo_Stores < 839 "buffer_store_format_d16_xy", v2i32 840 >; 841 defm BUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MUBUF_Pseudo_Stores < 842 "buffer_store_format_d16_xyz", v3i32 843 >; 844 defm BUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MUBUF_Pseudo_Stores < 845 "buffer_store_format_d16_xyzw", v4i32 846 >; 847} // End HasUnpackedD16VMem. 848 849let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in { 850 defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Pseudo_Loads < 851 "buffer_load_format_d16_x", f16 852 >; 853 defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Pseudo_Loads < 854 "buffer_load_format_d16_xy", v2f16 855 >; 856 defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Pseudo_Loads < 857 "buffer_load_format_d16_xyz", v3f16 858 >; 859 defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Pseudo_Loads < 860 "buffer_load_format_d16_xyzw", v4f16 861 >; 862 defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Pseudo_Stores < 863 "buffer_store_format_d16_x", f16 864 >; 865 defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Pseudo_Stores < 866 "buffer_store_format_d16_xy", v2f16 867 >; 868 defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Pseudo_Stores < 869 "buffer_store_format_d16_xyz", v3f16 870 >; 871 defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Pseudo_Stores < 872 "buffer_store_format_d16_xyzw", v4f16 873 >; 874} // End HasPackedD16VMem. 875 876defm BUFFER_LOAD_UBYTE : MUBUF_Pseudo_Loads_Lds < 877 "buffer_load_ubyte", i32 878>; 879defm BUFFER_LOAD_SBYTE : MUBUF_Pseudo_Loads_Lds < 880 "buffer_load_sbyte", i32 881>; 882defm BUFFER_LOAD_USHORT : MUBUF_Pseudo_Loads_Lds < 883 "buffer_load_ushort", i32 884>; 885defm BUFFER_LOAD_SSHORT : MUBUF_Pseudo_Loads_Lds < 886 "buffer_load_sshort", i32 887>; 888defm BUFFER_LOAD_DWORD : MUBUF_Pseudo_Loads_Lds < 889 "buffer_load_dword", i32 890>; 891defm BUFFER_LOAD_DWORDX2 : MUBUF_Pseudo_Loads < 892 "buffer_load_dwordx2", v2i32 893>; 894defm BUFFER_LOAD_DWORDX3 : MUBUF_Pseudo_Loads < 895 "buffer_load_dwordx3", v3i32 896>; 897defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads < 898 "buffer_load_dwordx4", v4i32 899>; 900 901defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, extloadi8_global>; 902defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, zextloadi8_global>; 903defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, sextloadi8_global>; 904defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, extloadi16_global>; 905defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, zextloadi16_global>; 906defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, sextloadi16_global>; 907defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORD", i32, load_global>; 908defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX2", v2i32, load_global>; 909defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX3", v3i32, load_global>; 910defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>; 911 912// This is not described in AMD documentation, 913// but 'lds' versions of these opcodes are available 914// in at least GFX8+ chips. See Bug 37653. 915let SubtargetPredicate = isGFX8GFX9 in { 916defm BUFFER_LOAD_DWORDX2_LDS : MUBUF_Pseudo_Loads < 917 "buffer_load_dwordx2", v2i32, 0, 1 918>; 919defm BUFFER_LOAD_DWORDX3_LDS : MUBUF_Pseudo_Loads < 920 "buffer_load_dwordx3", v3i32, 0, 1 921>; 922defm BUFFER_LOAD_DWORDX4_LDS : MUBUF_Pseudo_Loads < 923 "buffer_load_dwordx4", v4i32, 0, 1 924>; 925} 926 927defm BUFFER_STORE_BYTE : MUBUF_Pseudo_Stores < 928 "buffer_store_byte", i32, truncstorei8_global 929>; 930defm BUFFER_STORE_SHORT : MUBUF_Pseudo_Stores < 931 "buffer_store_short", i32, truncstorei16_global 932>; 933defm BUFFER_STORE_DWORD : MUBUF_Pseudo_Stores < 934 "buffer_store_dword", i32, store_global 935>; 936defm BUFFER_STORE_DWORDX2 : MUBUF_Pseudo_Stores < 937 "buffer_store_dwordx2", v2i32, store_global 938>; 939defm BUFFER_STORE_DWORDX3 : MUBUF_Pseudo_Stores < 940 "buffer_store_dwordx3", v3i32, store_global 941>; 942defm BUFFER_STORE_DWORDX4 : MUBUF_Pseudo_Stores < 943 "buffer_store_dwordx4", v4i32, store_global 944>; 945defm BUFFER_ATOMIC_SWAP : MUBUF_Pseudo_Atomics < 946 "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global_32 947>; 948defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Pseudo_Atomics < 949 "buffer_atomic_cmpswap", VReg_64, v2i32, null_frag 950>; 951defm BUFFER_ATOMIC_ADD : MUBUF_Pseudo_Atomics < 952 "buffer_atomic_add", VGPR_32, i32, atomic_load_add_global_32 953>; 954defm BUFFER_ATOMIC_SUB : MUBUF_Pseudo_Atomics < 955 "buffer_atomic_sub", VGPR_32, i32, atomic_load_sub_global_32 956>; 957defm BUFFER_ATOMIC_SMIN : MUBUF_Pseudo_Atomics < 958 "buffer_atomic_smin", VGPR_32, i32, atomic_load_min_global_32 959>; 960defm BUFFER_ATOMIC_UMIN : MUBUF_Pseudo_Atomics < 961 "buffer_atomic_umin", VGPR_32, i32, atomic_load_umin_global_32 962>; 963defm BUFFER_ATOMIC_SMAX : MUBUF_Pseudo_Atomics < 964 "buffer_atomic_smax", VGPR_32, i32, atomic_load_max_global_32 965>; 966defm BUFFER_ATOMIC_UMAX : MUBUF_Pseudo_Atomics < 967 "buffer_atomic_umax", VGPR_32, i32, atomic_load_umax_global_32 968>; 969defm BUFFER_ATOMIC_AND : MUBUF_Pseudo_Atomics < 970 "buffer_atomic_and", VGPR_32, i32, atomic_load_and_global_32 971>; 972defm BUFFER_ATOMIC_OR : MUBUF_Pseudo_Atomics < 973 "buffer_atomic_or", VGPR_32, i32, atomic_load_or_global_32 974>; 975defm BUFFER_ATOMIC_XOR : MUBUF_Pseudo_Atomics < 976 "buffer_atomic_xor", VGPR_32, i32, atomic_load_xor_global_32 977>; 978defm BUFFER_ATOMIC_INC : MUBUF_Pseudo_Atomics < 979 "buffer_atomic_inc", VGPR_32, i32, atomic_inc_global_32 980>; 981defm BUFFER_ATOMIC_DEC : MUBUF_Pseudo_Atomics < 982 "buffer_atomic_dec", VGPR_32, i32, atomic_dec_global_32 983>; 984defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Pseudo_Atomics < 985 "buffer_atomic_swap_x2", VReg_64, i64, atomic_swap_global_64 986>; 987defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Pseudo_Atomics < 988 "buffer_atomic_cmpswap_x2", VReg_128, v2i64, null_frag 989>; 990defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Pseudo_Atomics < 991 "buffer_atomic_add_x2", VReg_64, i64, atomic_load_add_global_64 992>; 993defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Pseudo_Atomics < 994 "buffer_atomic_sub_x2", VReg_64, i64, atomic_load_sub_global_64 995>; 996defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Pseudo_Atomics < 997 "buffer_atomic_smin_x2", VReg_64, i64, atomic_load_min_global_64 998>; 999defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Pseudo_Atomics < 1000 "buffer_atomic_umin_x2", VReg_64, i64, atomic_load_umin_global_64 1001>; 1002defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Pseudo_Atomics < 1003 "buffer_atomic_smax_x2", VReg_64, i64, atomic_load_max_global_64 1004>; 1005defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Pseudo_Atomics < 1006 "buffer_atomic_umax_x2", VReg_64, i64, atomic_load_umax_global_64 1007>; 1008defm BUFFER_ATOMIC_AND_X2 : MUBUF_Pseudo_Atomics < 1009 "buffer_atomic_and_x2", VReg_64, i64, atomic_load_and_global_64 1010>; 1011defm BUFFER_ATOMIC_OR_X2 : MUBUF_Pseudo_Atomics < 1012 "buffer_atomic_or_x2", VReg_64, i64, atomic_load_or_global_64 1013>; 1014defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Pseudo_Atomics < 1015 "buffer_atomic_xor_x2", VReg_64, i64, atomic_load_xor_global_64 1016>; 1017defm BUFFER_ATOMIC_INC_X2 : MUBUF_Pseudo_Atomics < 1018 "buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global_64 1019>; 1020defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics < 1021 "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global_64 1022>; 1023 1024let SubtargetPredicate = HasGFX10_BEncoding in 1025defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN < 1026 "buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub 1027>; 1028 1029let SubtargetPredicate = isGFX8GFX9 in { 1030def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">; 1031} 1032 1033let SubtargetPredicate = isGFX6 in { // isn't on CI & VI 1034/* 1035defm BUFFER_ATOMIC_RSUB : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub">; 1036defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub_x2">; 1037*/ 1038 1039def BUFFER_WBINVL1_SC : MUBUF_Invalidate <"buffer_wbinvl1_sc", 1040 int_amdgcn_buffer_wbinvl1_sc>; 1041} 1042 1043let SubtargetPredicate = isGFX6GFX7GFX10 in { 1044 1045defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics < 1046 "buffer_atomic_fcmpswap", VReg_64, v2f32, null_frag 1047>; 1048defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics < 1049 "buffer_atomic_fmin", VGPR_32, f32, null_frag 1050>; 1051defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics < 1052 "buffer_atomic_fmax", VGPR_32, f32, null_frag 1053>; 1054defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics < 1055 "buffer_atomic_fcmpswap_x2", VReg_128, v2f64, null_frag 1056>; 1057defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics < 1058 "buffer_atomic_fmin_x2", VReg_64, f64, null_frag 1059>; 1060defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics < 1061 "buffer_atomic_fmax_x2", VReg_64, f64, null_frag 1062>; 1063 1064} 1065 1066let SubtargetPredicate = HasD16LoadStore in { 1067 1068defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Pseudo_Loads < 1069 "buffer_load_ubyte_d16", i32, 1 1070>; 1071 1072defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Pseudo_Loads < 1073 "buffer_load_ubyte_d16_hi", i32, 1 1074>; 1075 1076defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Pseudo_Loads < 1077 "buffer_load_sbyte_d16", i32, 1 1078>; 1079 1080defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Pseudo_Loads < 1081 "buffer_load_sbyte_d16_hi", i32, 1 1082>; 1083 1084defm BUFFER_LOAD_SHORT_D16 : MUBUF_Pseudo_Loads < 1085 "buffer_load_short_d16", i32, 1 1086>; 1087 1088defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Pseudo_Loads < 1089 "buffer_load_short_d16_hi", i32, 1 1090>; 1091 1092defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Pseudo_Stores < 1093 "buffer_store_byte_d16_hi", i32 1094>; 1095 1096defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Pseudo_Stores < 1097 "buffer_store_short_d16_hi", i32 1098>; 1099 1100defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Pseudo_Loads < 1101 "buffer_load_format_d16_hi_x", i32 1102>; 1103defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Pseudo_Stores < 1104 "buffer_store_format_d16_hi_x", i32 1105>; 1106 1107} // End HasD16LoadStore 1108 1109def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1", 1110 int_amdgcn_buffer_wbinvl1>; 1111 1112let SubtargetPredicate = HasAtomicFaddInsts in { 1113defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN < 1114 "buffer_atomic_add_f32", VGPR_32, f32 1115>; 1116defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN < 1117 "buffer_atomic_pk_add_f16", VGPR_32, v2f16 1118>; 1119 1120let OtherPredicates = [isGFX90APlus] in { 1121defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_RTN < 1122 "buffer_atomic_add_f32", VGPR_32, f32, atomic_load_fadd_global_32 1123>; 1124defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_RTN < 1125 "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_load_fadd_v2f16_global_32 1126>; 1127} 1128} // End SubtargetPredicate = HasAtomicFaddInsts 1129 1130//===----------------------------------------------------------------------===// 1131// MTBUF Instructions 1132//===----------------------------------------------------------------------===// 1133 1134defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32, 1>; 1135defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64, 2>; 1136defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_96, 3>; 1137defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128, 4>; 1138defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32, 1>; 1139defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64, 2>; 1140defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_96, 3>; 1141defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128, 4>; 1142 1143let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in { 1144 defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32, 1>; 1145 defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VReg_64, 2>; 1146 defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_96, 3>; 1147 defm TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_128, 4>; 1148 defm TBUFFER_STORE_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32, 1>; 1149 defm TBUFFER_STORE_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VReg_64, 2>; 1150 defm TBUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_96, 3>; 1151 defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128, 4>; 1152} // End HasUnpackedD16VMem. 1153 1154let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in { 1155 defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32, 1>; 1156 defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VGPR_32, 2>; 1157 defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_64, 3>; 1158 defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_64, 4>; 1159 defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32, 1>; 1160 defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VGPR_32, 2>; 1161 defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_64, 3>; 1162 defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_64, 4>; 1163} // End HasPackedD16VMem. 1164 1165let SubtargetPredicate = isGFX7Plus in { 1166 1167//===----------------------------------------------------------------------===// 1168// Instruction definitions for CI and newer. 1169//===----------------------------------------------------------------------===// 1170 1171def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol", 1172 int_amdgcn_buffer_wbinvl1_vol>; 1173 1174} // End let SubtargetPredicate = isGFX7Plus 1175 1176let SubtargetPredicate = isGFX90APlus in { 1177 def BUFFER_WBL2 : MUBUF_Invalidate<"buffer_wbl2"> { 1178 } 1179 def BUFFER_INVL2 : MUBUF_Invalidate<"buffer_invl2"> { 1180 } 1181 1182 defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64, int_amdgcn_global_atomic_fadd>; 1183 defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64, int_amdgcn_global_atomic_fmin>; 1184 defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64, int_amdgcn_global_atomic_fmax>; 1185} // End SubtargetPredicate = isGFX90APlus 1186 1187let SubtargetPredicate = isGFX10Plus in { 1188 def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">; 1189 def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">; 1190} // End SubtargetPredicate = isGFX10Plus 1191 1192//===----------------------------------------------------------------------===// 1193// MUBUF Patterns 1194//===----------------------------------------------------------------------===// 1195 1196//===----------------------------------------------------------------------===// 1197// buffer_load/store_format patterns 1198//===----------------------------------------------------------------------===// 1199 1200multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, 1201 string opcode, ValueType memoryVt = vt> { 1202 defvar st = !if(!eq(memoryVt, vt), name, mubuf_intrinsic_load<name, memoryVt>); 1203 1204 def : GCNPat< 1205 (vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, 1206 timm:$auxiliary, 0)), 1207 (!cast<MUBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), 1208 (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) 1209 >; 1210 1211 def : GCNPat< 1212 (vt (st v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, 1213 timm:$auxiliary, 0)), 1214 (!cast<MUBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), 1215 (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) 1216 >; 1217 1218 def : GCNPat< 1219 (vt (st v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, 1220 timm:$auxiliary, timm)), 1221 (!cast<MUBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), 1222 (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) 1223 >; 1224 1225 def : GCNPat< 1226 (vt (st v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, 1227 timm:$auxiliary, timm)), 1228 (!cast<MUBUF_Pseudo>(opcode # _BOTHEN) 1229 (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), 1230 SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), 1231 (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) 1232 >; 1233} 1234 1235defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, f32, "BUFFER_LOAD_FORMAT_X">; 1236defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, i32, "BUFFER_LOAD_FORMAT_X">; 1237defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2f32, "BUFFER_LOAD_FORMAT_XY">; 1238defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2i32, "BUFFER_LOAD_FORMAT_XY">; 1239defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v3f32, "BUFFER_LOAD_FORMAT_XYZ">; 1240defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v3i32, "BUFFER_LOAD_FORMAT_XYZ">; 1241defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4f32, "BUFFER_LOAD_FORMAT_XYZW">; 1242defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4i32, "BUFFER_LOAD_FORMAT_XYZW">; 1243 1244let SubtargetPredicate = HasUnpackedD16VMem in { 1245 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">; 1246 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">; 1247 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i32, "BUFFER_LOAD_FORMAT_D16_X_gfx80">; 1248 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i32, "BUFFER_LOAD_FORMAT_D16_XY_gfx80">; 1249 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v3i32, "BUFFER_LOAD_FORMAT_D16_XYZ_gfx80">; 1250 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i32, "BUFFER_LOAD_FORMAT_D16_XYZW_gfx80">; 1251} // End HasUnpackedD16VMem. 1252 1253let SubtargetPredicate = HasPackedD16VMem in { 1254 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X">; 1255 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X">; 1256 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i32, "BUFFER_LOAD_FORMAT_D16_X">; 1257 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2f16, "BUFFER_LOAD_FORMAT_D16_XY">; 1258 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i16, "BUFFER_LOAD_FORMAT_D16_XY">; 1259 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4f16, "BUFFER_LOAD_FORMAT_D16_XYZ", v3f16>; 1260 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i16, "BUFFER_LOAD_FORMAT_D16_XYZ", v3i16>; 1261 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4f16, "BUFFER_LOAD_FORMAT_D16_XYZW">; 1262 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i16, "BUFFER_LOAD_FORMAT_D16_XYZW">; 1263} // End HasPackedD16VMem. 1264 1265defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, f32, "BUFFER_LOAD_DWORD">; 1266defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, i32, "BUFFER_LOAD_DWORD">; 1267defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2i16, "BUFFER_LOAD_DWORD">; 1268defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f16, "BUFFER_LOAD_DWORD">; 1269defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f32, "BUFFER_LOAD_DWORDX2">; 1270defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2i32, "BUFFER_LOAD_DWORDX2">; 1271defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4i16, "BUFFER_LOAD_DWORDX2">; 1272defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f16, "BUFFER_LOAD_DWORDX2">; 1273defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v3f32, "BUFFER_LOAD_DWORDX3">; 1274defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v3i32, "BUFFER_LOAD_DWORDX3">; 1275defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f32, "BUFFER_LOAD_DWORDX4">; 1276defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4i32, "BUFFER_LOAD_DWORDX4">; 1277defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_byte, i32, "BUFFER_LOAD_SBYTE">; 1278defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_short, i32, "BUFFER_LOAD_SSHORT">; 1279defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ubyte, i32, "BUFFER_LOAD_UBYTE">; 1280defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ushort, i32, "BUFFER_LOAD_USHORT">; 1281 1282multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, 1283 string opcode, ValueType memoryVt = vt> { 1284 defvar st = !if(!eq(memoryVt, vt), name, mubuf_intrinsic_store<name, memoryVt>); 1285 1286 def : GCNPat< 1287 (st vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, 1288 timm:$auxiliary, 0), 1289 (!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), 1290 (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) 1291 >; 1292 1293 def : GCNPat< 1294 (st vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, 1295 timm:$auxiliary, 0), 1296 (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, 1297 (as_i16timm $offset), (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) 1298 >; 1299 1300 def : GCNPat< 1301 (st vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, 1302 timm:$auxiliary, timm), 1303 (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, 1304 (as_i16timm $offset), (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) 1305 >; 1306 1307 def : GCNPat< 1308 (st vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, 1309 timm:$auxiliary, timm), 1310 (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact) 1311 getVregSrcForVT<vt>.ret:$vdata, 1312 (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), 1313 SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_cpol $auxiliary), 1314 0, (extract_swz $auxiliary)) 1315 >; 1316} 1317 1318defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, f32, "BUFFER_STORE_FORMAT_X">; 1319defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, i32, "BUFFER_STORE_FORMAT_X">; 1320defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2f32, "BUFFER_STORE_FORMAT_XY">; 1321defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2i32, "BUFFER_STORE_FORMAT_XY">; 1322defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v3f32, "BUFFER_STORE_FORMAT_XYZ">; 1323defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v3i32, "BUFFER_STORE_FORMAT_XYZ">; 1324defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4f32, "BUFFER_STORE_FORMAT_XYZW">; 1325defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4i32, "BUFFER_STORE_FORMAT_XYZW">; 1326 1327let SubtargetPredicate = HasUnpackedD16VMem in { 1328 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X_gfx80">; 1329 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i16, "BUFFER_STORE_FORMAT_D16_X_gfx80">; 1330 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i32, "BUFFER_STORE_FORMAT_D16_X_gfx80">; 1331 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i32, "BUFFER_STORE_FORMAT_D16_XY_gfx80">; 1332 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v3i32, "BUFFER_STORE_FORMAT_D16_XYZ_gfx80">; 1333 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i32, "BUFFER_STORE_FORMAT_D16_XYZW_gfx80">; 1334} // End HasUnpackedD16VMem. 1335 1336let SubtargetPredicate = HasPackedD16VMem in { 1337 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X">; 1338 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i16, "BUFFER_STORE_FORMAT_D16_X">; 1339 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i32, "BUFFER_STORE_FORMAT_D16_X">; 1340 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2f16, "BUFFER_STORE_FORMAT_D16_XY">; 1341 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i16, "BUFFER_STORE_FORMAT_D16_XY">; 1342 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4f16, "BUFFER_STORE_FORMAT_D16_XYZ", v3f16>; 1343 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i16, "BUFFER_STORE_FORMAT_D16_XYZ", v3i16>; 1344 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4f16, "BUFFER_STORE_FORMAT_D16_XYZW">; 1345 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i16, "BUFFER_STORE_FORMAT_D16_XYZW">; 1346} // End HasPackedD16VMem. 1347 1348defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, f32, "BUFFER_STORE_DWORD">; 1349defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, i32, "BUFFER_STORE_DWORD">; 1350defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2i16, "BUFFER_STORE_DWORD">; 1351defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2f16, "BUFFER_STORE_DWORD">; 1352defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2f32, "BUFFER_STORE_DWORDX2">; 1353defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2i32, "BUFFER_STORE_DWORDX2">; 1354defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4i16, "BUFFER_STORE_DWORDX2">; 1355defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f16, "BUFFER_STORE_DWORDX2">; 1356defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v3f32, "BUFFER_STORE_DWORDX3">; 1357defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v3i32, "BUFFER_STORE_DWORDX3">; 1358defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f32, "BUFFER_STORE_DWORDX4">; 1359defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4i32, "BUFFER_STORE_DWORDX4">; 1360defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_byte, i32, "BUFFER_STORE_BYTE">; 1361defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_short, i32, "BUFFER_STORE_SHORT">; 1362 1363//===----------------------------------------------------------------------===// 1364// buffer_atomic patterns 1365//===----------------------------------------------------------------------===// 1366 1367multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt, 1368 string opcode> { 1369 def : GCNPat< 1370 (vt (name vt:$vdata_in, v4i32:$rsrc, 0, 0, i32:$soffset, 1371 timm:$offset, timm:$cachepolicy, 0)), 1372 (!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) 1373 getVregSrcForVT<vt>.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset, 1374 (as_i16timm $offset), (set_glc $cachepolicy)) 1375 >; 1376 1377 def : GCNPat< 1378 (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, 1379 timm:$offset, timm:$cachepolicy, timm)), 1380 (!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) getVregSrcForVT<vt>.ret:$vdata_in, 1381 VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, 1382 (as_i16timm $offset), (set_glc $cachepolicy)) 1383 >; 1384 1385 def : GCNPat< 1386 (vt (name vt:$vdata_in, v4i32:$rsrc, 0, i32:$voffset, 1387 i32:$soffset, timm:$offset, timm:$cachepolicy, 0)), 1388 (!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) getVregSrcForVT<vt>.ret:$vdata_in, 1389 VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, 1390 (as_i16timm $offset), (set_glc $cachepolicy)) 1391 >; 1392 1393 def : GCNPat< 1394 (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, i32:$voffset, 1395 i32:$soffset, timm:$offset, timm:$cachepolicy, timm)), 1396 (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN) 1397 getVregSrcForVT<vt>.ret:$vdata_in, 1398 (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), 1399 SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), 1400 (set_glc $cachepolicy)) 1401 >; 1402} 1403 1404defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i32, "BUFFER_ATOMIC_SWAP">; 1405defm : BufferAtomicPatterns<SIbuffer_atomic_swap, f32, "BUFFER_ATOMIC_SWAP">; 1406defm : BufferAtomicPatterns<SIbuffer_atomic_add, i32, "BUFFER_ATOMIC_ADD">; 1407defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i32, "BUFFER_ATOMIC_SUB">; 1408defm : BufferAtomicPatterns<SIbuffer_atomic_smin, i32, "BUFFER_ATOMIC_SMIN">; 1409defm : BufferAtomicPatterns<SIbuffer_atomic_umin, i32, "BUFFER_ATOMIC_UMIN">; 1410defm : BufferAtomicPatterns<SIbuffer_atomic_smax, i32, "BUFFER_ATOMIC_SMAX">; 1411defm : BufferAtomicPatterns<SIbuffer_atomic_umax, i32, "BUFFER_ATOMIC_UMAX">; 1412defm : BufferAtomicPatterns<SIbuffer_atomic_and, i32, "BUFFER_ATOMIC_AND">; 1413defm : BufferAtomicPatterns<SIbuffer_atomic_or, i32, "BUFFER_ATOMIC_OR">; 1414defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i32, "BUFFER_ATOMIC_XOR">; 1415defm : BufferAtomicPatterns<SIbuffer_atomic_inc, i32, "BUFFER_ATOMIC_INC">; 1416defm : BufferAtomicPatterns<SIbuffer_atomic_dec, i32, "BUFFER_ATOMIC_DEC">; 1417defm : BufferAtomicPatterns<SIbuffer_atomic_csub, i32, "BUFFER_ATOMIC_CSUB">; 1418defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i64, "BUFFER_ATOMIC_SWAP_X2">; 1419defm : BufferAtomicPatterns<SIbuffer_atomic_add, i64, "BUFFER_ATOMIC_ADD_X2">; 1420defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i64, "BUFFER_ATOMIC_SUB_X2">; 1421defm : BufferAtomicPatterns<SIbuffer_atomic_smin, i64, "BUFFER_ATOMIC_SMIN_X2">; 1422defm : BufferAtomicPatterns<SIbuffer_atomic_umin, i64, "BUFFER_ATOMIC_UMIN_X2">; 1423defm : BufferAtomicPatterns<SIbuffer_atomic_smax, i64, "BUFFER_ATOMIC_SMAX_X2">; 1424defm : BufferAtomicPatterns<SIbuffer_atomic_umax, i64, "BUFFER_ATOMIC_UMAX_X2">; 1425defm : BufferAtomicPatterns<SIbuffer_atomic_and, i64, "BUFFER_ATOMIC_AND_X2">; 1426defm : BufferAtomicPatterns<SIbuffer_atomic_or, i64, "BUFFER_ATOMIC_OR_X2">; 1427defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i64, "BUFFER_ATOMIC_XOR_X2">; 1428defm : BufferAtomicPatterns<SIbuffer_atomic_inc, i64, "BUFFER_ATOMIC_INC_X2">; 1429defm : BufferAtomicPatterns<SIbuffer_atomic_dec, i64, "BUFFER_ATOMIC_DEC_X2">; 1430 1431let SubtargetPredicate = isGFX6GFX7GFX10 in { 1432 defm : BufferAtomicPatterns<SIbuffer_atomic_fmin, f32, "BUFFER_ATOMIC_FMIN">; 1433 defm : BufferAtomicPatterns<SIbuffer_atomic_fmax, f32, "BUFFER_ATOMIC_FMAX">; 1434 defm : BufferAtomicPatterns<SIbuffer_atomic_fmin, f64, "BUFFER_ATOMIC_FMIN_X2">; 1435 defm : BufferAtomicPatterns<SIbuffer_atomic_fmax, f64, "BUFFER_ATOMIC_FMAX_X2">; 1436} 1437 1438class NoUseBufferAtomic<SDPatternOperator Op, ValueType vt> : PatFrag < 1439 (ops node:$src0, node:$src1, node:$src2, node:$src3, node:$src4, node:$src5, node:$src6, node:$src7), 1440 (vt (Op $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7)), 1441 [{ return SDValue(N, 0).use_empty(); }]> { 1442 1443 let GISelPredicateCode = [{ 1444 return MRI.use_nodbg_empty(MI.getOperand(0).getReg()); 1445 }]; 1446} 1447 1448multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt, 1449 string opcode> { 1450 def : GCNPat< 1451 (NoUseBufferAtomic<name, vt> vt:$vdata_in, v4i32:$rsrc, 0, 1452 0, i32:$soffset, timm:$offset, 1453 timm:$cachepolicy, 0), 1454 (!cast<MUBUF_Pseudo>(opcode # _OFFSET) getVregSrcForVT<vt>.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset, 1455 (as_i16timm $offset), $cachepolicy) 1456 >; 1457 1458 def : GCNPat< 1459 (NoUseBufferAtomic<name, vt> vt:$vdata_in, v4i32:$rsrc, i32:$vindex, 1460 0, i32:$soffset, timm:$offset, 1461 timm:$cachepolicy, timm), 1462 (!cast<MUBUF_Pseudo>(opcode # _IDXEN) getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, 1463 (as_i16timm $offset), $cachepolicy) 1464 >; 1465 1466 def : GCNPat< 1467 (NoUseBufferAtomic<name, vt> vt:$vdata_in, v4i32:$rsrc, 0, 1468 i32:$voffset, i32:$soffset, timm:$offset, 1469 timm:$cachepolicy, 0), 1470 (!cast<MUBUF_Pseudo>(opcode # _OFFEN) getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, 1471 (as_i16timm $offset), $cachepolicy) 1472 >; 1473 1474 def : GCNPat< 1475 (NoUseBufferAtomic<name, vt> vt:$vdata_in, v4i32:$rsrc, i32:$vindex, 1476 i32:$voffset, i32:$soffset, timm:$offset, 1477 timm:$cachepolicy, timm), 1478 (!cast<MUBUF_Pseudo>(opcode # _BOTHEN) 1479 getVregSrcForVT<vt>.ret:$vdata_in, 1480 (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), 1481 SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), $cachepolicy) 1482 >; 1483} 1484 1485let SubtargetPredicate = HasAtomicFaddInsts in { 1486defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_fadd, f32, "BUFFER_ATOMIC_ADD_F32">; 1487defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_fadd, v2f16, "BUFFER_ATOMIC_PK_ADD_F16">; 1488} 1489 1490let SubtargetPredicate = isGFX90APlus in { 1491 defm : BufferAtomicPatterns<SIbuffer_atomic_fadd, f32, "BUFFER_ATOMIC_ADD_F32">; 1492 defm : BufferAtomicPatterns<SIbuffer_atomic_fadd, v2f16, "BUFFER_ATOMIC_PK_ADD_F16">; 1493 1494 defm : BufferAtomicPatterns<SIbuffer_atomic_fadd, f64, "BUFFER_ATOMIC_ADD_F64">; 1495 defm : BufferAtomicPatterns<SIbuffer_atomic_fmin, f64, "BUFFER_ATOMIC_MIN_F64">; 1496 defm : BufferAtomicPatterns<SIbuffer_atomic_fmax, f64, "BUFFER_ATOMIC_MAX_F64">; 1497} // End SubtargetPredicate = isGFX90APlus 1498 1499def : GCNPat< 1500 (SIbuffer_atomic_cmpswap 1501 i32:$data, i32:$cmp, v4i32:$rsrc, 0, 0, i32:$soffset, 1502 timm:$offset, timm:$cachepolicy, 0), 1503 (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS 1504 (BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN 1505 (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), 1506 SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), 1507 (set_glc $cachepolicy)), VReg_64)), sub0) 1508>; 1509 1510def : GCNPat< 1511 (SIbuffer_atomic_cmpswap 1512 i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, 1513 0, i32:$soffset, timm:$offset, 1514 timm:$cachepolicy, timm), 1515 (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS 1516 (BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN 1517 (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), 1518 VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), 1519 (set_glc $cachepolicy)), VReg_64)), 1520 sub0) 1521>; 1522 1523def : GCNPat< 1524 (SIbuffer_atomic_cmpswap 1525 i32:$data, i32:$cmp, v4i32:$rsrc, 0, 1526 i32:$voffset, i32:$soffset, timm:$offset, 1527 timm:$cachepolicy, 0), 1528 (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS 1529 (BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN 1530 (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), 1531 VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), 1532 (set_glc $cachepolicy)), VReg_64)), 1533 sub0) 1534>; 1535 1536def : GCNPat< 1537 (SIbuffer_atomic_cmpswap 1538 i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, 1539 i32:$voffset, i32:$soffset, timm:$offset, 1540 timm:$cachepolicy, timm), 1541 (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS 1542 (BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN 1543 (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), 1544 (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), 1545 SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), 1546 (set_glc $cachepolicy)), VReg_64)), 1547 sub0) 1548>; 1549 1550class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt, 1551 PatFrag constant_ld> : GCNPat < 1552 (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, 1553 i16:$offset))), 1554 (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset) 1555 >; 1556 1557multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET, 1558 ValueType vt, PatFrag atomic_ld> { 1559 def : GCNPat < 1560 (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset))), 1561 (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset) 1562 >; 1563 1564 def : GCNPat < 1565 (vt (atomic_ld (MUBUFOffset v4i32:$rsrc, i32:$soffset, i16:$offset))), 1566 (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset)) 1567 >; 1568} 1569 1570let SubtargetPredicate = isGFX6GFX7 in { 1571def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>; 1572def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_UBYTE_ADDR64, i32, extloadi8_constant>; 1573def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_UBYTE_ADDR64, i32, zextloadi8_constant>; 1574def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>; 1575def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_USHORT_ADDR64, i32, extloadi16_constant>; 1576def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_USHORT_ADDR64, i32, zextloadi16_constant>; 1577 1578defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORD_ADDR64, BUFFER_LOAD_DWORD_OFFSET, i32, atomic_load_32_global>; 1579defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, BUFFER_LOAD_DWORDX2_OFFSET, i64, atomic_load_64_global>; 1580} // End SubtargetPredicate = isGFX6GFX7 1581 1582multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt, 1583 PatFrag ld> { 1584 1585 def : GCNPat < 1586 (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset))), 1587 (Instr_OFFSET $srsrc, $soffset, $offset) 1588 >; 1589} 1590 1591let OtherPredicates = [Has16BitInsts] in { 1592 1593defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_constant>; 1594defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_constant>; 1595defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_constant>; 1596defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_global>; 1597defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_global>; 1598defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_global>; 1599 1600defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_OFFSET, i16, load_global>; 1601 1602} // End OtherPredicates = [Has16BitInsts] 1603 1604multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen, 1605 MUBUF_Pseudo InstrOffset, 1606 ValueType vt, PatFrag ld> { 1607 def : GCNPat < 1608 (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, 1609 i32:$soffset, u16imm:$offset))), 1610 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) 1611 >; 1612 1613 def : GCNPat < 1614 (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))), 1615 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0) 1616 >; 1617} 1618 1619// XXX - Is it possible to have a complex pattern in a PatFrag? 1620multiclass MUBUFScratchLoadPat_D16 <MUBUF_Pseudo InstrOffen, 1621 MUBUF_Pseudo InstrOffset, 1622 ValueType vt, PatFrag ld_frag> { 1623 def : GCNPat < 1624 (ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in), 1625 (InstrOffen $vaddr, $srsrc, $soffset, $offset, $in) 1626 >; 1627 1628 def : GCNPat < 1629 (ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in), 1630 (InstrOffset $srsrc, $soffset, $offset, $in) 1631 >; 1632} 1633 1634let OtherPredicates = [DisableFlatScratch] in { 1635defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i32, sextloadi8_private>; 1636defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, extloadi8_private>; 1637defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, zextloadi8_private>; 1638defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_private>; 1639defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_private>; 1640defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_private>; 1641defm : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, BUFFER_LOAD_SSHORT_OFFSET, i32, sextloadi16_private>; 1642defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, extloadi16_private>; 1643defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, zextloadi16_private>; 1644defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i16, load_private>; 1645 1646foreach vt = Reg32Types.types in { 1647defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, vt, load_private>; 1648} 1649defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_OFFSET, v2i32, load_private>; 1650defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX3_OFFEN, BUFFER_LOAD_DWORDX3_OFFSET, v3i32, load_private>; 1651defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, BUFFER_LOAD_DWORDX4_OFFSET, v4i32, load_private>; 1652 1653let OtherPredicates = [D16PreservesUnusedBits, DisableFlatScratch] in { 1654defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_HI_OFFEN, BUFFER_LOAD_SHORT_D16_HI_OFFSET, v2i16, load_d16_hi_private>; 1655defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_HI_OFFEN, BUFFER_LOAD_UBYTE_D16_HI_OFFSET, v2i16, az_extloadi8_d16_hi_private>; 1656defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_HI_OFFEN, BUFFER_LOAD_SBYTE_D16_HI_OFFSET, v2i16, sextloadi8_d16_hi_private>; 1657defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_HI_OFFEN, BUFFER_LOAD_SHORT_D16_HI_OFFSET, v2f16, load_d16_hi_private>; 1658defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_HI_OFFEN, BUFFER_LOAD_UBYTE_D16_HI_OFFSET, v2f16, az_extloadi8_d16_hi_private>; 1659defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_HI_OFFEN, BUFFER_LOAD_SBYTE_D16_HI_OFFSET, v2f16, sextloadi8_d16_hi_private>; 1660 1661defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_OFFEN, BUFFER_LOAD_SHORT_D16_OFFSET, v2i16, load_d16_lo_private>; 1662defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_OFFEN, BUFFER_LOAD_UBYTE_D16_OFFSET, v2i16, az_extloadi8_d16_lo_private>; 1663defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D16_OFFSET, v2i16, sextloadi8_d16_lo_private>; 1664defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_OFFEN, BUFFER_LOAD_SHORT_D16_OFFSET, v2f16, load_d16_lo_private>; 1665defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_OFFEN, BUFFER_LOAD_UBYTE_D16_OFFSET, v2f16, az_extloadi8_d16_lo_private>; 1666defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D16_OFFSET, v2f16, sextloadi8_d16_lo_private>; 1667} 1668 1669} // End OtherPredicates = [DisableFlatScratch] 1670 1671multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET, 1672 ValueType vt, PatFrag atomic_st> { 1673 // Store follows atomic op convention so address is first 1674 def : GCNPat < 1675 (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset), vt:$val), 1676 (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset) 1677 >; 1678 1679 def : GCNPat < 1680 (atomic_st (MUBUFOffset v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), 1681 (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset)) 1682 >; 1683} 1684let SubtargetPredicate = isGFX6GFX7 in { 1685defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORD_ADDR64, BUFFER_STORE_DWORD_OFFSET, i32, atomic_store_global_32>; 1686defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWORDX2_OFFSET, i64, atomic_store_global_64>; 1687} // End Predicates = isGFX6GFX7 1688 1689 1690multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt, 1691 PatFrag st> { 1692 1693 def : GCNPat < 1694 (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset)), 1695 (Instr_OFFSET $vdata, $srsrc, $soffset, $offset) 1696 >; 1697} 1698 1699defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_global>; 1700defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT_OFFSET, i16, store_global>; 1701 1702multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen, 1703 MUBUF_Pseudo InstrOffset, 1704 ValueType vt, PatFrag st, 1705 RegisterClass rc = VGPR_32> { 1706 def : GCNPat < 1707 (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, 1708 i32:$soffset, u16imm:$offset)), 1709 (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) 1710 >; 1711 1712 def : GCNPat < 1713 (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, 1714 u16imm:$offset)), 1715 (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0) 1716 >; 1717} 1718 1719let OtherPredicates = [DisableFlatScratch] in { 1720defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i32, truncstorei8_private>; 1721defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i32, truncstorei16_private>; 1722defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_private>; 1723defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i16, store_private>; 1724 1725foreach vt = Reg32Types.types in { 1726defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET, vt, store_private>; 1727} 1728 1729defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private, VReg_64>; 1730defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OFFSET, v3i32, store_private, VReg_96>; 1731defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private, VReg_128>; 1732 1733 1734let OtherPredicates = [D16PreservesUnusedBits, DisableFlatScratch] in { 1735 // Hiding the extract high pattern in the PatFrag seems to not 1736 // automatically increase the complexity. 1737let AddedComplexity = 1 in { 1738defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_D16_HI_OFFEN, BUFFER_STORE_SHORT_D16_HI_OFFSET, i32, store_hi16_private>; 1739defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_D16_HI_OFFEN, BUFFER_STORE_BYTE_D16_HI_OFFSET, i32, truncstorei8_hi16_private>; 1740} 1741} 1742} // End OtherPredicates = [DisableFlatScratch] 1743 1744//===----------------------------------------------------------------------===// 1745// MTBUF Patterns 1746//===----------------------------------------------------------------------===// 1747 1748//===----------------------------------------------------------------------===// 1749// tbuffer_load/store_format patterns 1750//===----------------------------------------------------------------------===// 1751 1752multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, 1753 string opcode, ValueType memoryVt = vt> { 1754 defvar st = !if(!eq(memoryVt, vt), name, mtbuf_intrinsic_load<name, memoryVt>); 1755 1756 def : GCNPat< 1757 (vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, 1758 timm:$format, timm:$auxiliary, 0)), 1759 (!cast<MTBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), 1760 (as_i8timm $format), 1761 (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) 1762 >; 1763 1764 def : GCNPat< 1765 (vt (st v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, 1766 timm:$format, timm:$auxiliary, timm)), 1767 (!cast<MTBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), 1768 (as_i8timm $format), 1769 (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) 1770 >; 1771 1772 def : GCNPat< 1773 (vt (st v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, 1774 timm:$format, timm:$auxiliary, 0)), 1775 (!cast<MTBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), 1776 (as_i8timm $format), 1777 (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) 1778 >; 1779 1780 def : GCNPat< 1781 (vt (st v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, 1782 timm:$format, timm:$auxiliary, timm)), 1783 (!cast<MTBUF_Pseudo>(opcode # _BOTHEN) 1784 (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), 1785 SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), 1786 (as_i8timm $format), 1787 (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) 1788 >; 1789} 1790 1791defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, i32, "TBUFFER_LOAD_FORMAT_X">; 1792defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2i32, "TBUFFER_LOAD_FORMAT_XY">; 1793defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v3i32, "TBUFFER_LOAD_FORMAT_XYZ">; 1794defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4i32, "TBUFFER_LOAD_FORMAT_XYZW">; 1795defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, f32, "TBUFFER_LOAD_FORMAT_X">; 1796defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2f32, "TBUFFER_LOAD_FORMAT_XY">; 1797defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v3f32, "TBUFFER_LOAD_FORMAT_XYZ">; 1798defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4f32, "TBUFFER_LOAD_FORMAT_XYZW">; 1799 1800let SubtargetPredicate = HasUnpackedD16VMem in { 1801 defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, f16, "TBUFFER_LOAD_FORMAT_D16_X_gfx80">; 1802 defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, i32, "TBUFFER_LOAD_FORMAT_D16_X_gfx80">; 1803 defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2i32, "TBUFFER_LOAD_FORMAT_D16_XY_gfx80">; 1804 defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v3i32, "TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80">; 1805 defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v4i32, "TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80">; 1806} // End HasUnpackedD16VMem. 1807 1808let SubtargetPredicate = HasPackedD16VMem in { 1809 defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, f16, "TBUFFER_LOAD_FORMAT_D16_X">; 1810 defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, i32, "TBUFFER_LOAD_FORMAT_D16_X">; 1811 defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2f16, "TBUFFER_LOAD_FORMAT_D16_XY">; 1812 defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v4f16, "TBUFFER_LOAD_FORMAT_D16_XYZ", v3f16>; 1813 defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v4f16, "TBUFFER_LOAD_FORMAT_D16_XYZW">; 1814} // End HasPackedD16VMem. 1815 1816multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, 1817 string opcode, ValueType memoryVt = vt> { 1818 defvar st = !if(!eq(memoryVt, vt), name, mtbuf_intrinsic_store<name, memoryVt>); 1819 1820 def : GCNPat< 1821 (st vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, 1822 timm:$format, timm:$auxiliary, 0), 1823 (!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset, 1824 (as_i16timm $offset), (as_i8timm $format), 1825 (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) 1826 >; 1827 1828 def : GCNPat< 1829 (st vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, 1830 timm:$format, timm:$auxiliary, timm), 1831 (!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, 1832 (as_i16timm $offset), (as_i8timm $format), 1833 (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) 1834 >; 1835 1836 def : GCNPat< 1837 (st vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, 1838 timm:$format, timm:$auxiliary, 0), 1839 (!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, 1840 (as_i16timm $offset), (as_i8timm $format), 1841 (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) 1842 >; 1843 1844 def : GCNPat< 1845 (st vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, 1846 timm:$offset, timm:$format, timm:$auxiliary, timm), 1847 (!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact) 1848 getVregSrcForVT<vt>.ret:$vdata, 1849 (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), 1850 SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), 1851 (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) 1852 >; 1853} 1854 1855defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, i32, "TBUFFER_STORE_FORMAT_X">; 1856defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2i32, "TBUFFER_STORE_FORMAT_XY">; 1857defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v3i32, "TBUFFER_STORE_FORMAT_XYZ">; 1858defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4i32, "TBUFFER_STORE_FORMAT_XYZW">; 1859defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, f32, "TBUFFER_STORE_FORMAT_X">; 1860defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2f32, "TBUFFER_STORE_FORMAT_XY">; 1861defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v3f32, "TBUFFER_STORE_FORMAT_XYZ">; 1862defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4f32, "TBUFFER_STORE_FORMAT_XYZW">; 1863 1864let SubtargetPredicate = HasUnpackedD16VMem in { 1865 defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, f16, "TBUFFER_STORE_FORMAT_D16_X_gfx80">; 1866 defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, i32, "TBUFFER_STORE_FORMAT_D16_X_gfx80">; 1867 defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v2i32, "TBUFFER_STORE_FORMAT_D16_XY_gfx80">; 1868 defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v3i32, "TBUFFER_STORE_FORMAT_D16_XYZ_gfx80">; 1869 defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v4i32, "TBUFFER_STORE_FORMAT_D16_XYZW_gfx80">; 1870} // End HasUnpackedD16VMem. 1871 1872let SubtargetPredicate = HasPackedD16VMem in { 1873 defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, f16, "TBUFFER_STORE_FORMAT_D16_X">; 1874 defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, i32, "TBUFFER_STORE_FORMAT_D16_X">; 1875 defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v2f16, "TBUFFER_STORE_FORMAT_D16_XY">; 1876 defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v4f16, "TBUFFER_STORE_FORMAT_D16_XYZ", v3f16>; 1877 defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v4f16, "TBUFFER_STORE_FORMAT_D16_XYZW">; 1878} // End HasPackedD16VMem. 1879 1880//===----------------------------------------------------------------------===// 1881// Target-specific instruction encodings. 1882//===----------------------------------------------------------------------===// 1883 1884//===----------------------------------------------------------------------===// 1885// Base ENC_MUBUF for GFX6, GFX7, GFX10. 1886//===----------------------------------------------------------------------===// 1887 1888class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> : 1889 MUBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, ef> { 1890 let Inst{11-0} = !if(ps.has_offset, offset, ?); 1891 let Inst{12} = ps.offen; 1892 let Inst{13} = ps.idxen; 1893 let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); 1894 let Inst{16} = ps.lds; 1895 let Inst{24-18} = op; 1896 let Inst{31-26} = 0x38; 1897 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 1898 let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); 1899 let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); 1900 let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); 1901 let Inst{55} = !if(ps.has_tfe, tfe, ?); 1902 let Inst{63-56} = !if(ps.has_soffset, soffset, ?); 1903} 1904 1905class MUBUF_Real_gfx10<bits<8> op, MUBUF_Pseudo ps> : 1906 Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10> { 1907 let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value); 1908 let Inst{25} = op{7}; 1909} 1910 1911class MUBUF_Real_gfx6_gfx7<bits<8> op, MUBUF_Pseudo ps> : 1912 Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> { 1913 let Inst{15} = ps.addr64; 1914} 1915 1916//===----------------------------------------------------------------------===// 1917// MUBUF - GFX10. 1918//===----------------------------------------------------------------------===// 1919 1920let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { 1921 multiclass MUBUF_Real_AllAddr_gfx10<bits<8> op> { 1922 def _BOTHEN_gfx10 : 1923 MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>; 1924 def _IDXEN_gfx10 : 1925 MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>; 1926 def _OFFEN_gfx10 : 1927 MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>; 1928 def _OFFSET_gfx10 : 1929 MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>; 1930 } 1931 multiclass MUBUF_Real_AllAddr_Lds_gfx10<bits<8> op> { 1932 def _OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, 1933 MUBUFLdsTable<0, NAME # "_OFFSET_gfx10">; 1934 def _OFFEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>, 1935 MUBUFLdsTable<0, NAME # "_OFFEN_gfx10">; 1936 def _IDXEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>, 1937 MUBUFLdsTable<0, NAME # "_IDXEN_gfx10">; 1938 def _BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>, 1939 MUBUFLdsTable<0, NAME # "_BOTHEN_gfx10">; 1940 1941 def _LDS_OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>, 1942 MUBUFLdsTable<1, NAME # "_OFFSET_gfx10">; 1943 def _LDS_OFFEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>, 1944 MUBUFLdsTable<1, NAME # "_OFFEN_gfx10">; 1945 def _LDS_IDXEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>, 1946 MUBUFLdsTable<1, NAME # "_IDXEN_gfx10">; 1947 def _LDS_BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>, 1948 MUBUFLdsTable<1, NAME # "_BOTHEN_gfx10">; 1949 } 1950 multiclass MUBUF_Real_Atomics_RTN_gfx10<bits<8> op> { 1951 def _BOTHEN_RTN_gfx10 : 1952 MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>, 1953 AtomicNoRet<NAME # "_BOTHEN_gfx10", 1>; 1954 def _IDXEN_RTN_gfx10 : 1955 MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>, 1956 AtomicNoRet<NAME # "_IDXEN_gfx10", 1>; 1957 def _OFFEN_RTN_gfx10 : 1958 MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>, 1959 AtomicNoRet<NAME # "_OFFEN_gfx10", 1>; 1960 def _OFFSET_RTN_gfx10 : 1961 MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>, 1962 AtomicNoRet<NAME # "_OFFSET_gfx10", 1>; 1963 } 1964 multiclass MUBUF_Real_Atomics_gfx10<bits<8> op> : 1965 MUBUF_Real_Atomics_RTN_gfx10<op> { 1966 def _BOTHEN_gfx10 : 1967 MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>, 1968 AtomicNoRet<NAME # "_BOTHEN_gfx10", 0>; 1969 def _IDXEN_gfx10 : 1970 MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>, 1971 AtomicNoRet<NAME # "_IDXEN_gfx10", 0>; 1972 def _OFFEN_gfx10 : 1973 MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>, 1974 AtomicNoRet<NAME # "_OFFEN_gfx10", 0>; 1975 def _OFFSET_gfx10 : 1976 MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, 1977 AtomicNoRet<NAME # "_OFFSET_gfx10", 0>; 1978 } 1979} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" 1980 1981defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>; 1982defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x01b>; 1983defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Real_AllAddr_gfx10<0x020>; 1984defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x021>; 1985defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Real_AllAddr_gfx10<0x022>; 1986defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x023>; 1987defm BUFFER_LOAD_SHORT_D16 : MUBUF_Real_AllAddr_gfx10<0x024>; 1988defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x025>; 1989// FIXME-GFX10: Add following instructions: 1990//defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x026>; 1991//defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x027>; 1992defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx10<0x080>; 1993defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx10<0x081>; 1994defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx10<0x082>; 1995defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx10<0x083>; 1996defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx10<0x084>; 1997defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx10<0x085>; 1998defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx10<0x086>; 1999defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx10<0x087>; 2000 2001def BUFFER_GL0_INV_gfx10 : 2002 MUBUF_Real_gfx10<0x071, BUFFER_GL0_INV>; 2003def BUFFER_GL1_INV_gfx10 : 2004 MUBUF_Real_gfx10<0x072, BUFFER_GL1_INV>; 2005 2006//===----------------------------------------------------------------------===// 2007// MUBUF - GFX6, GFX7, GFX10. 2008//===----------------------------------------------------------------------===// 2009 2010let AssemblerPredicate = isGFX6, DecoderNamespace = "GFX6" in { 2011 multiclass MUBUF_Real_gfx6<bits<8> op> { 2012 def _gfx6 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME)>; 2013 } 2014} // End AssemblerPredicate = isGFX6, DecoderNamespace = "GFX6" 2015 2016let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { 2017 multiclass MUBUF_Real_gfx7<bits<8> op> { 2018 def _gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME)>; 2019 } 2020} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" 2021 2022let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 2023 multiclass MUBUF_Real_AllAddr_gfx6_gfx7<bits<8> op> { 2024 def _ADDR64_gfx6_gfx7 : 2025 MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>; 2026 def _BOTHEN_gfx6_gfx7 : 2027 MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>; 2028 def _IDXEN_gfx6_gfx7 : 2029 MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>; 2030 def _OFFEN_gfx6_gfx7 : 2031 MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>; 2032 def _OFFSET_gfx6_gfx7 : 2033 MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>; 2034 } 2035 multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7<bits<8> op> { 2036 def _OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, 2037 MUBUFLdsTable<0, NAME # "_OFFSET_gfx6_gfx7">; 2038 def _ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>, 2039 MUBUFLdsTable<0, NAME # "_ADDR64_gfx6_gfx7">; 2040 def _OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>, 2041 MUBUFLdsTable<0, NAME # "_OFFEN_gfx6_gfx7">; 2042 def _IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>, 2043 MUBUFLdsTable<0, NAME # "_IDXEN_gfx6_gfx7">; 2044 def _BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>, 2045 MUBUFLdsTable<0, NAME # "_BOTHEN_gfx6_gfx7">; 2046 2047 def _LDS_OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>, 2048 MUBUFLdsTable<1, NAME # "_OFFSET_gfx6_gfx7">; 2049 def _LDS_ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_ADDR64")>, 2050 MUBUFLdsTable<1, NAME # "_ADDR64_gfx6_gfx7">; 2051 def _LDS_OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>, 2052 MUBUFLdsTable<1, NAME # "_OFFEN_gfx6_gfx7">; 2053 def _LDS_IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>, 2054 MUBUFLdsTable<1, NAME # "_IDXEN_gfx6_gfx7">; 2055 def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>, 2056 MUBUFLdsTable<1, NAME # "_BOTHEN_gfx6_gfx7">; 2057 } 2058 multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> { 2059 def _ADDR64_gfx6_gfx7 : 2060 MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>, 2061 AtomicNoRet<NAME # "_ADDR64_gfx6_gfx7", 0>; 2062 def _BOTHEN_gfx6_gfx7 : 2063 MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>, 2064 AtomicNoRet<NAME # "_BOTHEN_gfx6_gfx7", 0>; 2065 def _IDXEN_gfx6_gfx7 : 2066 MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>, 2067 AtomicNoRet<NAME # "_IDXEN_gfx6_gfx7", 0>; 2068 def _OFFEN_gfx6_gfx7 : 2069 MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>, 2070 AtomicNoRet<NAME # "_OFFEN_gfx6_gfx7", 0>; 2071 def _OFFSET_gfx6_gfx7 : 2072 MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, 2073 AtomicNoRet<NAME # "_OFFSET_gfx6_gfx7", 0>; 2074 2075 def _ADDR64_RTN_gfx6_gfx7 : 2076 MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>, 2077 AtomicNoRet<NAME # "_ADDR64_gfx6_gfx7", 1>; 2078 def _BOTHEN_RTN_gfx6_gfx7 : 2079 MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>, 2080 AtomicNoRet<NAME # "_BOTHEN_gfx6_gfx7", 1>; 2081 def _IDXEN_RTN_gfx6_gfx7 : 2082 MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>, 2083 AtomicNoRet<NAME # "_IDXEN_gfx6_gfx7", 1>; 2084 def _OFFEN_RTN_gfx6_gfx7 : 2085 MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>, 2086 AtomicNoRet<NAME # "_OFFEN_gfx6_gfx7", 1>; 2087 def _OFFSET_RTN_gfx6_gfx7 : 2088 MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>, 2089 AtomicNoRet<NAME # "_OFFSET_gfx6_gfx7", 1>; 2090 } 2091} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 2092 2093multiclass MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<bits<8> op> : 2094 MUBUF_Real_AllAddr_gfx6_gfx7<op>, MUBUF_Real_AllAddr_gfx10<op>; 2095 2096multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<bits<8> op> : 2097 MUBUF_Real_AllAddr_Lds_gfx6_gfx7<op>, MUBUF_Real_AllAddr_Lds_gfx10<op>; 2098 2099multiclass MUBUF_Real_Atomics_gfx6_gfx7_gfx10<bits<8> op> : 2100 MUBUF_Real_Atomics_gfx6_gfx7<op>, MUBUF_Real_Atomics_gfx10<op>; 2101 2102// FIXME-GFX6: Following instructions are available only on GFX6. 2103//defm BUFFER_ATOMIC_RSUB : MUBUF_Real_Atomics_gfx6 <0x034>; 2104//defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Real_Atomics_gfx6 <0x054>; 2105 2106defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x000>; 2107defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x001>; 2108defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x002>; 2109defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x003>; 2110defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x004>; 2111defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x005>; 2112defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x006>; 2113defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>; 2114defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x008>; 2115defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x009>; 2116defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00a>; 2117defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00b>; 2118defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00c>; 2119defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00d>; 2120defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00e>; 2121defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00f>; 2122defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x018>; 2123defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01a>; 2124defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01c>; 2125defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01d>; 2126defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01e>; 2127defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01f>; 2128 2129defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x030>; 2130defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x031>; 2131defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x032>; 2132defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x033>; 2133defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x035>; 2134defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x036>; 2135defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x037>; 2136defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x038>; 2137defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x039>; 2138defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03a>; 2139defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03b>; 2140defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03c>; 2141defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03d>; 2142defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>; 2143defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>; 2144defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>; 2145defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x050>; 2146defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x051>; 2147defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x052>; 2148defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x053>; 2149defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x055>; 2150defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x056>; 2151defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x057>; 2152defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x058>; 2153defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x059>; 2154defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05a>; 2155defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05b>; 2156defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>; 2157defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>; 2158// FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7. 2159defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>; 2160defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>; 2161defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>; 2162 2163defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomics_RTN_gfx10<0x034>; 2164 2165defm BUFFER_WBINVL1_SC : MUBUF_Real_gfx6<0x070>; 2166defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>; 2167def BUFFER_WBINVL1_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<0x071, BUFFER_WBINVL1>; 2168 2169//===----------------------------------------------------------------------===// 2170// Base ENC_MTBUF for GFX6, GFX7, GFX10. 2171//===----------------------------------------------------------------------===// 2172 2173class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> : 2174 MTBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, ef> { 2175 let Inst{11-0} = !if(ps.has_offset, offset, ?); 2176 let Inst{12} = ps.offen; 2177 let Inst{13} = ps.idxen; 2178 let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); 2179 let Inst{18-16} = op; 2180 let Inst{31-26} = 0x3a; //encoding 2181 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 2182 let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); 2183 let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); 2184 let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); 2185 let Inst{55} = !if(ps.has_tfe, tfe, ?); 2186 let Inst{63-56} = !if(ps.has_soffset, soffset, ?); 2187} 2188 2189//===----------------------------------------------------------------------===// 2190// MTBUF - GFX10. 2191//===----------------------------------------------------------------------===// 2192 2193class MTBUF_Real_gfx10<bits<4> op, MTBUF_Pseudo ps> : 2194 Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.GFX10> { 2195 let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value); 2196 let Inst{25-19} = format; 2197 let Inst{53} = op{3}; 2198} 2199 2200let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { 2201 multiclass MTBUF_Real_AllAddr_gfx10<bits<4> op> { 2202 def _BOTHEN_gfx10 : 2203 MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>; 2204 def _IDXEN_gfx10 : 2205 MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>; 2206 def _OFFEN_gfx10 : 2207 MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>; 2208 def _OFFSET_gfx10 : 2209 MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>; 2210 } 2211} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" 2212 2213defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx10<0x008>; 2214defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx10<0x009>; 2215defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx10<0x00a>; 2216defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx10<0x00b>; 2217defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx10<0x00c>; 2218defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx10<0x00d>; 2219defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx10<0x00e>; 2220defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx10<0x00f>; 2221 2222//===----------------------------------------------------------------------===// 2223// MTBUF - GFX6, GFX7, GFX10. 2224//===----------------------------------------------------------------------===// 2225 2226class MTBUF_Real_gfx6_gfx7<bits<4> op, MTBUF_Pseudo ps> : 2227 Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.SI> { 2228 let Inst{15} = ps.addr64; 2229 let Inst{22-19} = dfmt; 2230 let Inst{25-23} = nfmt; 2231} 2232 2233let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 2234 multiclass MTBUF_Real_AllAddr_gfx6_gfx7<bits<4> op> { 2235 def _ADDR64_gfx6_gfx7 : 2236 MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_ADDR64")>; 2237 def _BOTHEN_gfx6_gfx7 : 2238 MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>; 2239 def _IDXEN_gfx6_gfx7 : 2240 MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>; 2241 def _OFFEN_gfx6_gfx7 : 2242 MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>; 2243 def _OFFSET_gfx6_gfx7 : 2244 MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>; 2245 } 2246} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 2247 2248multiclass MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<bits<4> op> : 2249 MTBUF_Real_AllAddr_gfx6_gfx7<op>, MTBUF_Real_AllAddr_gfx10<op>; 2250 2251defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x000>; 2252defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x001>; 2253defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x002>; 2254defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x003>; 2255defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x004>; 2256defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x005>; 2257defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x006>; 2258defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>; 2259 2260//===----------------------------------------------------------------------===// 2261// GFX8, GFX9 (VI). 2262//===----------------------------------------------------------------------===// 2263 2264class MUBUF_Real_Base_vi <bits<7> op, MUBUF_Pseudo ps, int Enc, 2265 bit has_sccb = ps.has_sccb> : 2266 MUBUF_Real<ps>, 2267 Enc64, 2268 SIMCInstr<ps.PseudoInstr, Enc>, 2269 AtomicNoRet<!subst("_RTN","",NAME), !if(ps.IsAtomicNoRet, 0, 2270 !if(ps.IsAtomicRet, 1, ?))> { 2271 2272 let Inst{11-0} = !if(ps.has_offset, offset, ?); 2273 let Inst{12} = ps.offen; 2274 let Inst{13} = ps.idxen; 2275 let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); 2276 let Inst{15} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccb_value); 2277 let Inst{16} = ps.lds; 2278 let Inst{17} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); 2279 let Inst{24-18} = op; 2280 let Inst{31-26} = 0x38; //encoding 2281 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 2282 let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); 2283 let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); 2284 let Inst{63-56} = !if(ps.has_soffset, soffset, ?); 2285} 2286 2287class MUBUF_Real_vi <bits<7> op, MUBUF_Pseudo ps, bit has_sccb = ps.has_sccb> : 2288 MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.VI, has_sccb> { 2289 let AssemblerPredicate = isGFX8GFX9NotGFX90A; 2290 let DecoderNamespace = "GFX8"; 2291 2292 let Inst{55} = !if(ps.has_tfe, tfe, ?); 2293} 2294 2295class MUBUF_Real_gfx90a <bits<7> op, MUBUF_Pseudo ps, 2296 bit has_sccb = ps.has_sccb> : 2297 MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.GFX90A, has_sccb> { 2298 let AssemblerPredicate = isGFX90APlus; 2299 let DecoderNamespace = "GFX90A"; 2300 let AsmString = ps.Mnemonic # !subst("$sccb", !if(has_sccb, "$sccb",""), 2301 !subst("$tfe", "", ps.AsmOperands)); 2302 2303 let Inst{55} = acc; 2304} 2305 2306multiclass MUBUF_Real_vi_gfx90a<bits<7> op, MUBUF_Pseudo ps> { 2307 def _vi : MUBUF_Real_vi<op, ps>; 2308 def _gfx90a : MUBUF_Real_gfx90a<op, ps, !and(ps.has_sccb,!not(ps.FPAtomic))>; 2309} 2310 2311multiclass MUBUF_Real_AllAddr_vi<bits<7> op> { 2312 defm _OFFSET : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>; 2313 defm _OFFEN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>; 2314 defm _IDXEN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>; 2315 defm _BOTHEN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>; 2316} 2317 2318multiclass MUBUF_Real_AllAddr_Lds_vi<bits<7> op> { 2319 2320 def _OFFSET_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, 2321 MUBUFLdsTable<0, NAME # "_OFFSET_vi">; 2322 def _OFFEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>, 2323 MUBUFLdsTable<0, NAME # "_OFFEN_vi">; 2324 def _IDXEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>, 2325 MUBUFLdsTable<0, NAME # "_IDXEN_vi">; 2326 def _BOTHEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>, 2327 MUBUFLdsTable<0, NAME # "_BOTHEN_vi">; 2328 2329 def _LDS_OFFSET_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>, 2330 MUBUFLdsTable<1, NAME # "_OFFSET_vi">; 2331 def _LDS_OFFEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>, 2332 MUBUFLdsTable<1, NAME # "_OFFEN_vi">; 2333 def _LDS_IDXEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>, 2334 MUBUFLdsTable<1, NAME # "_IDXEN_vi">; 2335 def _LDS_BOTHEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>, 2336 MUBUFLdsTable<1, NAME # "_BOTHEN_vi">; 2337 2338 def _OFFSET_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, 2339 MUBUFLdsTable<0, NAME # "_OFFSET_gfx90a">; 2340 def _OFFEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>, 2341 MUBUFLdsTable<0, NAME # "_OFFEN_gfx90a">; 2342 def _IDXEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>, 2343 MUBUFLdsTable<0, NAME # "_IDXEN_gfx90a">; 2344 def _BOTHEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>, 2345 MUBUFLdsTable<0, NAME # "_BOTHEN_gfx90a">; 2346 2347 def _LDS_OFFSET_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>, 2348 MUBUFLdsTable<1, NAME # "_OFFSET_gfx90a">; 2349 def _LDS_OFFEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>, 2350 MUBUFLdsTable<1, NAME # "_OFFEN_gfx90a">; 2351 def _LDS_IDXEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>, 2352 MUBUFLdsTable<1, NAME # "_IDXEN_gfx90a">; 2353 def _LDS_BOTHEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>, 2354 MUBUFLdsTable<1, NAME # "_BOTHEN_gfx90a">; 2355} 2356 2357class MUBUF_Real_gfx80 <bits<7> op, MUBUF_Pseudo ps> : 2358 MUBUF_Real<ps>, 2359 Enc64, 2360 SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX80> { 2361 let AssemblerPredicate=HasUnpackedD16VMem; 2362 let DecoderNamespace="GFX80_UNPACKED"; 2363 2364 let Inst{11-0} = !if(ps.has_offset, offset, ?); 2365 let Inst{12} = ps.offen; 2366 let Inst{13} = ps.idxen; 2367 let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); 2368 let Inst{16} = ps.lds; 2369 let Inst{17} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); 2370 let Inst{24-18} = op; 2371 let Inst{31-26} = 0x38; //encoding 2372 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 2373 let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); 2374 let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); 2375 let Inst{55} = !if(ps.has_tfe, tfe, ?); 2376 let Inst{63-56} = !if(ps.has_soffset, soffset, ?); 2377} 2378 2379multiclass MUBUF_Real_AllAddr_gfx80<bits<7> op> { 2380 def _OFFSET_gfx80 : MUBUF_Real_gfx80 <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>; 2381 def _OFFEN_gfx80 : MUBUF_Real_gfx80 <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>; 2382 def _IDXEN_gfx80 : MUBUF_Real_gfx80 <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>; 2383 def _BOTHEN_gfx80 : MUBUF_Real_gfx80 <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>; 2384} 2385 2386multiclass MUBUF_Real_Atomic_vi<bits<7> op> : 2387 MUBUF_Real_AllAddr_vi<op> { 2388 defm _OFFSET_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>; 2389 defm _OFFEN_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>; 2390 defm _IDXEN_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>; 2391 defm _BOTHEN_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>; 2392} 2393 2394defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_Lds_vi <0x00>; 2395defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_vi <0x01>; 2396defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_vi <0x02>; 2397defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_vi <0x03>; 2398defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_vi <0x04>; 2399defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_vi <0x05>; 2400defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_vi <0x06>; 2401defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_vi <0x07>; 2402let SubtargetPredicate = HasUnpackedD16VMem in { 2403 defm BUFFER_LOAD_FORMAT_D16_X_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x08>; 2404 defm BUFFER_LOAD_FORMAT_D16_XY_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x09>; 2405 defm BUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0a>; 2406 defm BUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0b>; 2407 defm BUFFER_STORE_FORMAT_D16_X_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0c>; 2408 defm BUFFER_STORE_FORMAT_D16_XY_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0d>; 2409 defm BUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0e>; 2410 defm BUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0f>; 2411} // End HasUnpackedD16VMem. 2412let SubtargetPredicate = HasPackedD16VMem in { 2413 defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Real_AllAddr_vi <0x08>; 2414 defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Real_AllAddr_vi <0x09>; 2415 defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_vi <0x0a>; 2416 defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_vi <0x0b>; 2417 defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Real_AllAddr_vi <0x0c>; 2418 defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Real_AllAddr_vi <0x0d>; 2419 defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_vi <0x0e>; 2420 defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_vi <0x0f>; 2421} // End HasPackedD16VMem. 2422defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_Lds_vi <0x10>; 2423defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_vi <0x11>; 2424defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_vi <0x12>; 2425defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_vi <0x13>; 2426defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_vi <0x14>; 2427defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_Lds_vi <0x15>; 2428defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_Lds_vi <0x16>; 2429defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_Lds_vi <0x17>; 2430defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_vi <0x18>; 2431defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_vi <0x19>; 2432defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_vi <0x1a>; 2433defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Real_AllAddr_vi <0x1b>; 2434defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_vi <0x1c>; 2435defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_vi <0x1d>; 2436defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_vi <0x1e>; 2437defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_vi <0x1f>; 2438 2439defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Real_AllAddr_vi <0x20>; 2440defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Real_AllAddr_vi <0x21>; 2441defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Real_AllAddr_vi <0x22>; 2442defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Real_AllAddr_vi <0x23>; 2443defm BUFFER_LOAD_SHORT_D16 : MUBUF_Real_AllAddr_vi <0x24>; 2444defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Real_AllAddr_vi <0x25>; 2445 2446defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_vi <0x26>; 2447defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_vi <0x27>; 2448 2449defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomic_vi <0x40>; 2450defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomic_vi <0x41>; 2451defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomic_vi <0x42>; 2452defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomic_vi <0x43>; 2453defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomic_vi <0x44>; 2454defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomic_vi <0x45>; 2455defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomic_vi <0x46>; 2456defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomic_vi <0x47>; 2457defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomic_vi <0x48>; 2458defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomic_vi <0x49>; 2459defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomic_vi <0x4a>; 2460defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomic_vi <0x4b>; 2461defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomic_vi <0x4c>; 2462 2463defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomic_vi <0x60>; 2464defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomic_vi <0x61>; 2465defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomic_vi <0x62>; 2466defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomic_vi <0x63>; 2467defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomic_vi <0x64>; 2468defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomic_vi <0x65>; 2469defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomic_vi <0x66>; 2470defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomic_vi <0x67>; 2471defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomic_vi <0x68>; 2472defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomic_vi <0x69>; 2473defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomic_vi <0x6a>; 2474defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomic_vi <0x6b>; 2475defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomic_vi <0x6c>; 2476 2477defm BUFFER_STORE_LDS_DWORD : MUBUF_Real_vi_gfx90a <0x3d, BUFFER_STORE_LDS_DWORD>; 2478 2479let AssemblerPredicate = isGFX8GFX9 in { 2480def BUFFER_WBINVL1_vi : MUBUF_Real_vi <0x3e, BUFFER_WBINVL1>; 2481def BUFFER_WBINVL1_VOL_vi : MUBUF_Real_vi <0x3f, BUFFER_WBINVL1_VOL>; 2482} // End AssemblerPredicate = isGFX8GFX9 2483 2484let SubtargetPredicate = HasAtomicFaddInsts in { 2485 2486defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomic_vi <0x4d>; 2487defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>; 2488 2489} // End SubtargetPredicate = HasAtomicFaddInsts 2490 2491let SubtargetPredicate = isGFX90APlus in { 2492 defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Real_Atomic_vi<0x4f>; 2493 defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Real_Atomic_vi<0x50>; 2494 defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Real_Atomic_vi<0x51>; 2495} // End SubtargetPredicate = isGFX90APlus, AssemblerPredicate = isGFX90APlus 2496 2497def BUFFER_WBL2_gfx90a : MUBUF_Real_gfx90a<0x28, BUFFER_WBL2> { 2498} 2499def BUFFER_INVL2_gfx90a : MUBUF_Real_gfx90a<0x29, BUFFER_INVL2>; 2500 2501class MTBUF_Real_Base_vi <bits<4> op, MTBUF_Pseudo ps, int Enc> : 2502 MTBUF_Real<ps>, 2503 Enc64, 2504 SIMCInstr<ps.PseudoInstr, Enc> { 2505 2506 let Inst{11-0} = !if(ps.has_offset, offset, ?); 2507 let Inst{12} = ps.offen; 2508 let Inst{13} = ps.idxen; 2509 let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); 2510 let Inst{18-15} = op; 2511 let Inst{22-19} = dfmt; 2512 let Inst{25-23} = nfmt; 2513 let Inst{31-26} = 0x3a; //encoding 2514 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 2515 let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); 2516 let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); 2517 let Inst{53} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccb_value); 2518 let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); 2519 let Inst{55} = !if(ps.has_tfe, tfe, ?); 2520 let Inst{63-56} = !if(ps.has_soffset, soffset, ?); 2521} 2522 2523class MTBUF_Real_vi <bits<4> op, MTBUF_Pseudo ps> : 2524 MTBUF_Real_Base_vi <op, ps, SIEncodingFamily.VI> { 2525 let AssemblerPredicate = isGFX8GFX9NotGFX90A; 2526 let DecoderNamespace = "GFX8"; 2527 2528 let Inst{55} = !if(ps.has_tfe, tfe, ?); 2529} 2530 2531class MTBUF_Real_gfx90a <bits<4> op, MTBUF_Pseudo ps> : 2532 MTBUF_Real_Base_vi <op, ps, SIEncodingFamily.GFX90A> { 2533 let AssemblerPredicate = isGFX90APlus; 2534 let DecoderNamespace = "GFX90A"; 2535 let AsmString = ps.Mnemonic # !subst("$tfe", "", ps.AsmOperands); 2536 2537 let Inst{55} = acc; 2538} 2539 2540multiclass MTBUF_Real_vi_gfx90a<bits<4> op, MTBUF_Pseudo ps> { 2541 def _vi : MTBUF_Real_vi<op, ps>; 2542 def _gfx90a : MTBUF_Real_gfx90a<op, ps>; 2543} 2544 2545multiclass MTBUF_Real_AllAddr_vi<bits<4> op> { 2546 defm _OFFSET : MTBUF_Real_vi_gfx90a <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>; 2547 defm _OFFEN : MTBUF_Real_vi_gfx90a <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>; 2548 defm _IDXEN : MTBUF_Real_vi_gfx90a <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>; 2549 defm _BOTHEN : MTBUF_Real_vi_gfx90a <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>; 2550} 2551 2552class MTBUF_Real_gfx80 <bits<4> op, MTBUF_Pseudo ps> : 2553 MTBUF_Real<ps>, 2554 Enc64, 2555 SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX80> { 2556 let AssemblerPredicate=HasUnpackedD16VMem; 2557 let DecoderNamespace="GFX80_UNPACKED"; 2558 2559 let Inst{11-0} = !if(ps.has_offset, offset, ?); 2560 let Inst{12} = ps.offen; 2561 let Inst{13} = ps.idxen; 2562 let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); 2563 let Inst{18-15} = op; 2564 let Inst{22-19} = dfmt; 2565 let Inst{25-23} = nfmt; 2566 let Inst{31-26} = 0x3a; //encoding 2567 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 2568 let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); 2569 let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); 2570 let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); 2571 let Inst{55} = !if(ps.has_tfe, tfe, ?); 2572 let Inst{63-56} = !if(ps.has_soffset, soffset, ?); 2573} 2574 2575multiclass MTBUF_Real_AllAddr_gfx80<bits<4> op> { 2576 def _OFFSET_gfx80 : MTBUF_Real_gfx80 <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>; 2577 def _OFFEN_gfx80 : MTBUF_Real_gfx80 <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>; 2578 def _IDXEN_gfx80 : MTBUF_Real_gfx80 <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>; 2579 def _BOTHEN_gfx80 : MTBUF_Real_gfx80 <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>; 2580} 2581 2582defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_vi <0x00>; 2583defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_vi <0x01>; 2584defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <0x02>; 2585defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <0x03>; 2586defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_vi <0x04>; 2587defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_vi <0x05>; 2588defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <0x06>; 2589defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <0x07>; 2590let SubtargetPredicate = HasUnpackedD16VMem in { 2591 defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x08>; 2592 defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x09>; 2593 defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0a>; 2594 defm TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0b>; 2595 defm TBUFFER_STORE_FORMAT_D16_X_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0c>; 2596 defm TBUFFER_STORE_FORMAT_D16_XY_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0d>; 2597 defm TBUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0e>; 2598 defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0f>; 2599} // End HasUnpackedD16VMem. 2600let SubtargetPredicate = HasPackedD16VMem in { 2601 defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_vi <0x08>; 2602 defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_vi <0x09>; 2603 defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_vi <0x0a>; 2604 defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_vi <0x0b>; 2605 defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Real_AllAddr_vi <0x0c>; 2606 defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Real_AllAddr_vi <0x0d>; 2607 defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_vi <0x0e>; 2608 defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_vi <0x0f>; 2609} // End HasUnpackedD16VMem. 2610 2611def MUBUFInfoTable : GenericTable { 2612 let FilterClass = "MUBUF_Pseudo"; 2613 let CppTypeName = "MUBUFInfo"; 2614 let Fields = [ 2615 "Opcode", "BaseOpcode", "elements", "has_vaddr", "has_srsrc", "has_soffset", 2616 "IsBufferInv" 2617 ]; 2618 2619 let PrimaryKey = ["Opcode"]; 2620 let PrimaryKeyName = "getMUBUFOpcodeHelper"; 2621} 2622 2623def getMUBUFInfoFromOpcode : SearchIndex { 2624 let Table = MUBUFInfoTable; 2625 let Key = ["Opcode"]; 2626} 2627 2628def getMUBUFInfoFromBaseOpcodeAndElements : SearchIndex { 2629 let Table = MUBUFInfoTable; 2630 let Key = ["BaseOpcode", "elements"]; 2631} 2632 2633def MTBUFInfoTable : GenericTable { 2634 let FilterClass = "MTBUF_Pseudo"; 2635 let CppTypeName = "MTBUFInfo"; 2636 let Fields = ["Opcode", "BaseOpcode", "elements", "has_vaddr", "has_srsrc", "has_soffset"]; 2637 2638 let PrimaryKey = ["Opcode"]; 2639 let PrimaryKeyName = "getMTBUFOpcodeHelper"; 2640} 2641 2642def getMTBUFInfoFromOpcode : SearchIndex { 2643 let Table = MTBUFInfoTable; 2644 let Key = ["Opcode"]; 2645} 2646 2647def getMTBUFInfoFromBaseOpcodeAndElements : SearchIndex { 2648 let Table = MTBUFInfoTable; 2649 let Key = ["BaseOpcode", "elements"]; 2650} 2651