1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the InstructionSelector class for 10 /// AMDGPU. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUInstructionSelector.h" 15 #include "AMDGPUInstrInfo.h" 16 #include "AMDGPURegisterBankInfo.h" 17 #include "AMDGPURegisterInfo.h" 18 #include "AMDGPUSubtarget.h" 19 #include "AMDGPUTargetMachine.h" 20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 24 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 25 #include "llvm/CodeGen/GlobalISel/Utils.h" 26 #include "llvm/CodeGen/MachineBasicBlock.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/IR/Type.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/raw_ostream.h" 34 35 #define DEBUG_TYPE "amdgpu-isel" 36 37 using namespace llvm; 38 using namespace MIPatternMatch; 39 40 #define GET_GLOBALISEL_IMPL 41 #define AMDGPUSubtarget GCNSubtarget 42 #include "AMDGPUGenGlobalISel.inc" 43 #undef GET_GLOBALISEL_IMPL 44 #undef AMDGPUSubtarget 45 46 AMDGPUInstructionSelector::AMDGPUInstructionSelector( 47 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, 48 const AMDGPUTargetMachine &TM) 49 : InstructionSelector(), TII(*STI.getInstrInfo()), 50 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), 51 STI(STI), 52 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG), 53 #define GET_GLOBALISEL_PREDICATES_INIT 54 #include "AMDGPUGenGlobalISel.inc" 55 #undef GET_GLOBALISEL_PREDICATES_INIT 56 #define GET_GLOBALISEL_TEMPORARIES_INIT 57 #include "AMDGPUGenGlobalISel.inc" 58 #undef GET_GLOBALISEL_TEMPORARIES_INIT 59 { 60 } 61 62 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } 63 64 static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) { 65 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 66 return Reg == AMDGPU::SCC; 67 68 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 69 const TargetRegisterClass *RC = 70 RegClassOrBank.dyn_cast<const TargetRegisterClass*>(); 71 if (RC) { 72 // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the 73 // context of the register bank has been lost. 74 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID) 75 return false; 76 const LLT Ty = MRI.getType(Reg); 77 return Ty.isValid() && Ty.getSizeInBits() == 1; 78 } 79 80 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); 81 return RB->getID() == AMDGPU::SCCRegBankID; 82 } 83 84 bool AMDGPUInstructionSelector::isVCC(Register Reg, 85 const MachineRegisterInfo &MRI) const { 86 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 87 return Reg == TRI.getVCC(); 88 89 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 90 const TargetRegisterClass *RC = 91 RegClassOrBank.dyn_cast<const TargetRegisterClass*>(); 92 if (RC) { 93 const LLT Ty = MRI.getType(Reg); 94 return RC->hasSuperClassEq(TRI.getBoolRC()) && 95 Ty.isValid() && Ty.getSizeInBits() == 1; 96 } 97 98 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); 99 return RB->getID() == AMDGPU::VCCRegBankID; 100 } 101 102 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { 103 const DebugLoc &DL = I.getDebugLoc(); 104 MachineBasicBlock *BB = I.getParent(); 105 MachineFunction *MF = BB->getParent(); 106 MachineRegisterInfo &MRI = MF->getRegInfo(); 107 I.setDesc(TII.get(TargetOpcode::COPY)); 108 109 const MachineOperand &Src = I.getOperand(1); 110 MachineOperand &Dst = I.getOperand(0); 111 Register DstReg = Dst.getReg(); 112 Register SrcReg = Src.getReg(); 113 114 if (isVCC(DstReg, MRI)) { 115 if (SrcReg == AMDGPU::SCC) { 116 const TargetRegisterClass *RC 117 = TRI.getConstrainedRegClassForOperand(Dst, MRI); 118 if (!RC) 119 return true; 120 return RBI.constrainGenericRegister(DstReg, *RC, MRI); 121 } 122 123 if (!isVCC(SrcReg, MRI)) { 124 // TODO: Should probably leave the copy and let copyPhysReg expand it. 125 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI)) 126 return false; 127 128 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg) 129 .addImm(0) 130 .addReg(SrcReg); 131 132 if (!MRI.getRegClassOrNull(SrcReg)) 133 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI)); 134 I.eraseFromParent(); 135 return true; 136 } 137 138 const TargetRegisterClass *RC = 139 TRI.getConstrainedRegClassForOperand(Dst, MRI); 140 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, MRI)) 141 return false; 142 143 // Don't constrain the source register to a class so the def instruction 144 // handles it (unless it's undef). 145 // 146 // FIXME: This is a hack. When selecting the def, we neeed to know 147 // specifically know that the result is VCCRegBank, and not just an SGPR 148 // with size 1. An SReg_32 with size 1 is ambiguous with wave32. 149 if (Src.isUndef()) { 150 const TargetRegisterClass *SrcRC = 151 TRI.getConstrainedRegClassForOperand(Src, MRI); 152 if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI)) 153 return false; 154 } 155 156 return true; 157 } 158 159 for (const MachineOperand &MO : I.operands()) { 160 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 161 continue; 162 163 const TargetRegisterClass *RC = 164 TRI.getConstrainedRegClassForOperand(MO, MRI); 165 if (!RC) 166 continue; 167 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 168 } 169 return true; 170 } 171 172 bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { 173 MachineBasicBlock *BB = I.getParent(); 174 MachineFunction *MF = BB->getParent(); 175 MachineRegisterInfo &MRI = MF->getRegInfo(); 176 177 const Register DefReg = I.getOperand(0).getReg(); 178 const LLT DefTy = MRI.getType(DefReg); 179 180 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy) 181 182 const RegClassOrRegBank &RegClassOrBank = 183 MRI.getRegClassOrRegBank(DefReg); 184 185 const TargetRegisterClass *DefRC 186 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); 187 if (!DefRC) { 188 if (!DefTy.isValid()) { 189 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); 190 return false; 191 } 192 193 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); 194 if (RB.getID() == AMDGPU::SCCRegBankID) { 195 LLVM_DEBUG(dbgs() << "illegal scc phi\n"); 196 return false; 197 } 198 199 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI); 200 if (!DefRC) { 201 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); 202 return false; 203 } 204 } 205 206 I.setDesc(TII.get(TargetOpcode::PHI)); 207 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); 208 } 209 210 MachineOperand 211 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, 212 const TargetRegisterClass &SubRC, 213 unsigned SubIdx) const { 214 215 MachineInstr *MI = MO.getParent(); 216 MachineBasicBlock *BB = MO.getParent()->getParent(); 217 MachineFunction *MF = BB->getParent(); 218 MachineRegisterInfo &MRI = MF->getRegInfo(); 219 Register DstReg = MRI.createVirtualRegister(&SubRC); 220 221 if (MO.isReg()) { 222 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); 223 unsigned Reg = MO.getReg(); 224 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) 225 .addReg(Reg, 0, ComposedSubIdx); 226 227 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), 228 MO.isKill(), MO.isDead(), MO.isUndef(), 229 MO.isEarlyClobber(), 0, MO.isDebug(), 230 MO.isInternalRead()); 231 } 232 233 assert(MO.isImm()); 234 235 APInt Imm(64, MO.getImm()); 236 237 switch (SubIdx) { 238 default: 239 llvm_unreachable("do not know to split immediate with this sub index."); 240 case AMDGPU::sub0: 241 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue()); 242 case AMDGPU::sub1: 243 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue()); 244 } 245 } 246 247 static int64_t getConstant(const MachineInstr *MI) { 248 return MI->getOperand(1).getCImm()->getSExtValue(); 249 } 250 251 static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) { 252 switch (Opc) { 253 case AMDGPU::G_AND: 254 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32; 255 case AMDGPU::G_OR: 256 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32; 257 case AMDGPU::G_XOR: 258 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32; 259 default: 260 llvm_unreachable("not a bit op"); 261 } 262 } 263 264 bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const { 265 MachineBasicBlock *BB = I.getParent(); 266 MachineFunction *MF = BB->getParent(); 267 MachineRegisterInfo &MRI = MF->getRegInfo(); 268 MachineOperand &Dst = I.getOperand(0); 269 MachineOperand &Src0 = I.getOperand(1); 270 MachineOperand &Src1 = I.getOperand(2); 271 Register DstReg = Dst.getReg(); 272 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 273 274 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 275 if (DstRB->getID() == AMDGPU::VCCRegBankID) { 276 const TargetRegisterClass *RC = TRI.getBoolRC(); 277 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), 278 RC == &AMDGPU::SReg_64RegClass); 279 I.setDesc(TII.get(InstOpc)); 280 281 // FIXME: Hack to avoid turning the register bank into a register class. 282 // The selector for G_ICMP relies on seeing the register bank for the result 283 // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will 284 // be ambiguous whether it's a scalar or vector bool. 285 if (Src0.isUndef() && !MRI.getRegClassOrNull(Src0.getReg())) 286 MRI.setRegClass(Src0.getReg(), RC); 287 if (Src1.isUndef() && !MRI.getRegClassOrNull(Src1.getReg())) 288 MRI.setRegClass(Src1.getReg(), RC); 289 290 return RBI.constrainGenericRegister(DstReg, *RC, MRI); 291 } 292 293 // TODO: Should this allow an SCC bank result, and produce a copy from SCC for 294 // the result? 295 if (DstRB->getID() == AMDGPU::SGPRRegBankID) { 296 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32); 297 I.setDesc(TII.get(InstOpc)); 298 299 const TargetRegisterClass *RC 300 = TRI.getConstrainedRegClassForOperand(Dst, MRI); 301 if (!RC) 302 return false; 303 return RBI.constrainGenericRegister(DstReg, *RC, MRI) && 304 RBI.constrainGenericRegister(Src0.getReg(), *RC, MRI) && 305 RBI.constrainGenericRegister(Src1.getReg(), *RC, MRI); 306 } 307 308 return false; 309 } 310 311 bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { 312 MachineBasicBlock *BB = I.getParent(); 313 MachineFunction *MF = BB->getParent(); 314 MachineRegisterInfo &MRI = MF->getRegInfo(); 315 Register DstReg = I.getOperand(0).getReg(); 316 const DebugLoc &DL = I.getDebugLoc(); 317 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 318 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 319 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID; 320 const bool Sub = I.getOpcode() == TargetOpcode::G_SUB; 321 322 if (Size == 32) { 323 if (IsSALU) { 324 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32; 325 MachineInstr *Add = 326 BuildMI(*BB, &I, DL, TII.get(Opc), DstReg) 327 .add(I.getOperand(1)) 328 .add(I.getOperand(2)); 329 I.eraseFromParent(); 330 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); 331 } 332 333 if (STI.hasAddNoCarry()) { 334 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64; 335 I.setDesc(TII.get(Opc)); 336 I.addOperand(*MF, MachineOperand::CreateImm(0)); 337 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); 338 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 339 } 340 341 const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64; 342 343 Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass()); 344 MachineInstr *Add 345 = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg) 346 .addDef(UnusedCarry, RegState::Dead) 347 .add(I.getOperand(1)) 348 .add(I.getOperand(2)) 349 .addImm(0); 350 I.eraseFromParent(); 351 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); 352 } 353 354 assert(!Sub && "illegal sub should not reach here"); 355 356 const TargetRegisterClass &RC 357 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass; 358 const TargetRegisterClass &HalfRC 359 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass; 360 361 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0)); 362 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0)); 363 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1)); 364 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1)); 365 366 Register DstLo = MRI.createVirtualRegister(&HalfRC); 367 Register DstHi = MRI.createVirtualRegister(&HalfRC); 368 369 if (IsSALU) { 370 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) 371 .add(Lo1) 372 .add(Lo2); 373 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) 374 .add(Hi1) 375 .add(Hi2); 376 } else { 377 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass(); 378 Register CarryReg = MRI.createVirtualRegister(CarryRC); 379 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo) 380 .addDef(CarryReg) 381 .add(Lo1) 382 .add(Lo2) 383 .addImm(0); 384 MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi) 385 .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead) 386 .add(Hi1) 387 .add(Hi2) 388 .addReg(CarryReg, RegState::Kill) 389 .addImm(0); 390 391 if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI)) 392 return false; 393 } 394 395 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 396 .addReg(DstLo) 397 .addImm(AMDGPU::sub0) 398 .addReg(DstHi) 399 .addImm(AMDGPU::sub1); 400 401 402 if (!RBI.constrainGenericRegister(DstReg, RC, MRI)) 403 return false; 404 405 I.eraseFromParent(); 406 return true; 407 } 408 409 bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { 410 MachineBasicBlock *BB = I.getParent(); 411 MachineFunction *MF = BB->getParent(); 412 MachineRegisterInfo &MRI = MF->getRegInfo(); 413 assert(I.getOperand(2).getImm() % 32 == 0); 414 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32); 415 const DebugLoc &DL = I.getDebugLoc(); 416 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), 417 I.getOperand(0).getReg()) 418 .addReg(I.getOperand(1).getReg(), 0, SubReg); 419 420 for (const MachineOperand &MO : Copy->operands()) { 421 const TargetRegisterClass *RC = 422 TRI.getConstrainedRegClassForOperand(MO, MRI); 423 if (!RC) 424 continue; 425 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 426 } 427 I.eraseFromParent(); 428 return true; 429 } 430 431 bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const { 432 MachineBasicBlock *BB = MI.getParent(); 433 MachineFunction *MF = BB->getParent(); 434 MachineRegisterInfo &MRI = MF->getRegInfo(); 435 Register DstReg = MI.getOperand(0).getReg(); 436 LLT DstTy = MRI.getType(DstReg); 437 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 438 439 const unsigned SrcSize = SrcTy.getSizeInBits(); 440 if (SrcSize < 32) 441 return false; 442 443 const DebugLoc &DL = MI.getDebugLoc(); 444 const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI); 445 const unsigned DstSize = DstTy.getSizeInBits(); 446 const TargetRegisterClass *DstRC = 447 TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI); 448 if (!DstRC) 449 return false; 450 451 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8); 452 MachineInstrBuilder MIB = 453 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg); 454 for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) { 455 MachineOperand &Src = MI.getOperand(I + 1); 456 MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef())); 457 MIB.addImm(SubRegs[I]); 458 459 const TargetRegisterClass *SrcRC 460 = TRI.getConstrainedRegClassForOperand(Src, MRI); 461 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI)) 462 return false; 463 } 464 465 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) 466 return false; 467 468 MI.eraseFromParent(); 469 return true; 470 } 471 472 bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const { 473 MachineBasicBlock *BB = MI.getParent(); 474 MachineFunction *MF = BB->getParent(); 475 MachineRegisterInfo &MRI = MF->getRegInfo(); 476 const int NumDst = MI.getNumOperands() - 1; 477 478 MachineOperand &Src = MI.getOperand(NumDst); 479 480 Register SrcReg = Src.getReg(); 481 Register DstReg0 = MI.getOperand(0).getReg(); 482 LLT DstTy = MRI.getType(DstReg0); 483 LLT SrcTy = MRI.getType(SrcReg); 484 485 const unsigned DstSize = DstTy.getSizeInBits(); 486 const unsigned SrcSize = SrcTy.getSizeInBits(); 487 const DebugLoc &DL = MI.getDebugLoc(); 488 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI); 489 490 const TargetRegisterClass *SrcRC = 491 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI); 492 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI)) 493 return false; 494 495 const unsigned SrcFlags = getUndefRegState(Src.isUndef()); 496 497 // Note we could have mixed SGPR and VGPR destination banks for an SGPR 498 // source, and this relies on the fact that the same subregister indices are 499 // used for both. 500 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8); 501 for (int I = 0, E = NumDst; I != E; ++I) { 502 MachineOperand &Dst = MI.getOperand(I); 503 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg()) 504 .addReg(SrcReg, SrcFlags, SubRegs[I]); 505 506 const TargetRegisterClass *DstRC = 507 TRI.getConstrainedRegClassForOperand(Dst, MRI); 508 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI)) 509 return false; 510 } 511 512 MI.eraseFromParent(); 513 return true; 514 } 515 516 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { 517 return selectG_ADD_SUB(I); 518 } 519 520 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { 521 MachineBasicBlock *BB = I.getParent(); 522 MachineFunction *MF = BB->getParent(); 523 MachineRegisterInfo &MRI = MF->getRegInfo(); 524 const MachineOperand &MO = I.getOperand(0); 525 526 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The 527 // regbank check here is to know why getConstrainedRegClassForOperand failed. 528 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI); 529 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) || 530 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) { 531 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 532 return true; 533 } 534 535 return false; 536 } 537 538 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { 539 MachineBasicBlock *BB = I.getParent(); 540 MachineFunction *MF = BB->getParent(); 541 MachineRegisterInfo &MRI = MF->getRegInfo(); 542 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32); 543 DebugLoc DL = I.getDebugLoc(); 544 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG)) 545 .addDef(I.getOperand(0).getReg()) 546 .addReg(I.getOperand(1).getReg()) 547 .addReg(I.getOperand(2).getReg()) 548 .addImm(SubReg); 549 550 for (const MachineOperand &MO : Ins->operands()) { 551 if (!MO.isReg()) 552 continue; 553 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 554 continue; 555 556 const TargetRegisterClass *RC = 557 TRI.getConstrainedRegClassForOperand(MO, MRI); 558 if (!RC) 559 continue; 560 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 561 } 562 I.eraseFromParent(); 563 return true; 564 } 565 566 bool AMDGPUInstructionSelector::selectG_INTRINSIC( 567 MachineInstr &I, CodeGenCoverage &CoverageInfo) const { 568 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID(); 569 switch (IntrinsicID) { 570 case Intrinsic::maxnum: 571 case Intrinsic::minnum: 572 case Intrinsic::amdgcn_cvt_pkrtz: 573 return selectImpl(I, CoverageInfo); 574 case Intrinsic::amdgcn_if_break: { 575 MachineBasicBlock *BB = I.getParent(); 576 MachineFunction *MF = BB->getParent(); 577 MachineRegisterInfo &MRI = MF->getRegInfo(); 578 579 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick 580 // SelectionDAG uses for wave32 vs wave64. 581 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK)) 582 .add(I.getOperand(0)) 583 .add(I.getOperand(2)) 584 .add(I.getOperand(3)); 585 586 Register DstReg = I.getOperand(0).getReg(); 587 Register Src0Reg = I.getOperand(2).getReg(); 588 Register Src1Reg = I.getOperand(3).getReg(); 589 590 I.eraseFromParent(); 591 592 for (Register Reg : { DstReg, Src0Reg, Src1Reg }) { 593 if (!MRI.getRegClassOrNull(Reg)) 594 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); 595 } 596 597 return true; 598 } 599 default: 600 return selectImpl(I, CoverageInfo); 601 } 602 } 603 604 static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) { 605 if (Size != 32 && Size != 64) 606 return -1; 607 switch (P) { 608 default: 609 llvm_unreachable("Unknown condition code!"); 610 case CmpInst::ICMP_NE: 611 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64; 612 case CmpInst::ICMP_EQ: 613 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64; 614 case CmpInst::ICMP_SGT: 615 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64; 616 case CmpInst::ICMP_SGE: 617 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64; 618 case CmpInst::ICMP_SLT: 619 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64; 620 case CmpInst::ICMP_SLE: 621 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64; 622 case CmpInst::ICMP_UGT: 623 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64; 624 case CmpInst::ICMP_UGE: 625 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64; 626 case CmpInst::ICMP_ULT: 627 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64; 628 case CmpInst::ICMP_ULE: 629 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64; 630 } 631 } 632 633 int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P, 634 unsigned Size) const { 635 if (Size == 64) { 636 if (!STI.hasScalarCompareEq64()) 637 return -1; 638 639 switch (P) { 640 case CmpInst::ICMP_NE: 641 return AMDGPU::S_CMP_LG_U64; 642 case CmpInst::ICMP_EQ: 643 return AMDGPU::S_CMP_EQ_U64; 644 default: 645 return -1; 646 } 647 } 648 649 if (Size != 32) 650 return -1; 651 652 switch (P) { 653 case CmpInst::ICMP_NE: 654 return AMDGPU::S_CMP_LG_U32; 655 case CmpInst::ICMP_EQ: 656 return AMDGPU::S_CMP_EQ_U32; 657 case CmpInst::ICMP_SGT: 658 return AMDGPU::S_CMP_GT_I32; 659 case CmpInst::ICMP_SGE: 660 return AMDGPU::S_CMP_GE_I32; 661 case CmpInst::ICMP_SLT: 662 return AMDGPU::S_CMP_LT_I32; 663 case CmpInst::ICMP_SLE: 664 return AMDGPU::S_CMP_LE_I32; 665 case CmpInst::ICMP_UGT: 666 return AMDGPU::S_CMP_GT_U32; 667 case CmpInst::ICMP_UGE: 668 return AMDGPU::S_CMP_GE_U32; 669 case CmpInst::ICMP_ULT: 670 return AMDGPU::S_CMP_LT_U32; 671 case CmpInst::ICMP_ULE: 672 return AMDGPU::S_CMP_LE_U32; 673 default: 674 llvm_unreachable("Unknown condition code!"); 675 } 676 } 677 678 bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const { 679 MachineBasicBlock *BB = I.getParent(); 680 MachineFunction *MF = BB->getParent(); 681 MachineRegisterInfo &MRI = MF->getRegInfo(); 682 const DebugLoc &DL = I.getDebugLoc(); 683 684 unsigned SrcReg = I.getOperand(2).getReg(); 685 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI); 686 687 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); 688 689 unsigned CCReg = I.getOperand(0).getReg(); 690 if (isSCC(CCReg, MRI)) { 691 int Opcode = getS_CMPOpcode(Pred, Size); 692 if (Opcode == -1) 693 return false; 694 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode)) 695 .add(I.getOperand(2)) 696 .add(I.getOperand(3)); 697 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg) 698 .addReg(AMDGPU::SCC); 699 bool Ret = 700 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) && 701 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI); 702 I.eraseFromParent(); 703 return Ret; 704 } 705 706 int Opcode = getV_CMPOpcode(Pred, Size); 707 if (Opcode == -1) 708 return false; 709 710 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), 711 I.getOperand(0).getReg()) 712 .add(I.getOperand(2)) 713 .add(I.getOperand(3)); 714 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), 715 *TRI.getBoolRC(), MRI); 716 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI); 717 I.eraseFromParent(); 718 return Ret; 719 } 720 721 static MachineInstr * 722 buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt, 723 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3, 724 unsigned VM, bool Compr, unsigned Enabled, bool Done) { 725 const DebugLoc &DL = Insert->getDebugLoc(); 726 MachineBasicBlock &BB = *Insert->getParent(); 727 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP; 728 return BuildMI(BB, Insert, DL, TII.get(Opcode)) 729 .addImm(Tgt) 730 .addReg(Reg0) 731 .addReg(Reg1) 732 .addReg(Reg2) 733 .addReg(Reg3) 734 .addImm(VM) 735 .addImm(Compr) 736 .addImm(Enabled); 737 } 738 739 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( 740 MachineInstr &I, CodeGenCoverage &CoverageInfo) const { 741 MachineBasicBlock *BB = I.getParent(); 742 MachineFunction *MF = BB->getParent(); 743 MachineRegisterInfo &MRI = MF->getRegInfo(); 744 745 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID(); 746 switch (IntrinsicID) { 747 case Intrinsic::amdgcn_exp: { 748 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); 749 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); 750 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg())); 751 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg())); 752 753 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(), 754 I.getOperand(4).getReg(), 755 I.getOperand(5).getReg(), 756 I.getOperand(6).getReg(), 757 VM, false, Enabled, Done); 758 759 I.eraseFromParent(); 760 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 761 } 762 case Intrinsic::amdgcn_exp_compr: { 763 const DebugLoc &DL = I.getDebugLoc(); 764 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); 765 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); 766 unsigned Reg0 = I.getOperand(3).getReg(); 767 unsigned Reg1 = I.getOperand(4).getReg(); 768 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 769 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg())); 770 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg())); 771 772 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); 773 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM, 774 true, Enabled, Done); 775 776 I.eraseFromParent(); 777 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 778 } 779 case Intrinsic::amdgcn_end_cf: { 780 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick 781 // SelectionDAG uses for wave32 vs wave64. 782 BuildMI(*BB, &I, I.getDebugLoc(), 783 TII.get(AMDGPU::SI_END_CF)) 784 .add(I.getOperand(1)); 785 786 Register Reg = I.getOperand(1).getReg(); 787 I.eraseFromParent(); 788 789 if (!MRI.getRegClassOrNull(Reg)) 790 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); 791 return true; 792 } 793 default: 794 return selectImpl(I, CoverageInfo); 795 } 796 } 797 798 bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { 799 MachineBasicBlock *BB = I.getParent(); 800 MachineFunction *MF = BB->getParent(); 801 MachineRegisterInfo &MRI = MF->getRegInfo(); 802 const DebugLoc &DL = I.getDebugLoc(); 803 804 unsigned DstReg = I.getOperand(0).getReg(); 805 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 806 assert(Size <= 32 || Size == 64); 807 const MachineOperand &CCOp = I.getOperand(1); 808 unsigned CCReg = CCOp.getReg(); 809 if (isSCC(CCReg, MRI)) { 810 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 : 811 AMDGPU::S_CSELECT_B32; 812 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 813 .addReg(CCReg); 814 815 // The generic constrainSelectedInstRegOperands doesn't work for the scc register 816 // bank, because it does not cover the register class that we used to represent 817 // for it. So we need to manually set the register class here. 818 if (!MRI.getRegClassOrNull(CCReg)) 819 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI)); 820 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg) 821 .add(I.getOperand(2)) 822 .add(I.getOperand(3)); 823 824 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) | 825 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI); 826 I.eraseFromParent(); 827 return Ret; 828 } 829 830 // Wide VGPR select should have been split in RegBankSelect. 831 if (Size > 32) 832 return false; 833 834 MachineInstr *Select = 835 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 836 .addImm(0) 837 .add(I.getOperand(3)) 838 .addImm(0) 839 .add(I.getOperand(2)) 840 .add(I.getOperand(1)); 841 842 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI); 843 I.eraseFromParent(); 844 return Ret; 845 } 846 847 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { 848 MachineBasicBlock *BB = I.getParent(); 849 MachineFunction *MF = BB->getParent(); 850 MachineRegisterInfo &MRI = MF->getRegInfo(); 851 DebugLoc DL = I.getDebugLoc(); 852 unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI); 853 if (PtrSize != 64) { 854 LLVM_DEBUG(dbgs() << "Unhandled address space\n"); 855 return false; 856 } 857 858 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); 859 unsigned Opcode; 860 861 // FIXME: Remove this when integers > s32 naturally selected. 862 switch (StoreSize) { 863 default: 864 return false; 865 case 32: 866 Opcode = AMDGPU::FLAT_STORE_DWORD; 867 break; 868 case 64: 869 Opcode = AMDGPU::FLAT_STORE_DWORDX2; 870 break; 871 case 96: 872 Opcode = AMDGPU::FLAT_STORE_DWORDX3; 873 break; 874 case 128: 875 Opcode = AMDGPU::FLAT_STORE_DWORDX4; 876 break; 877 } 878 879 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) 880 .add(I.getOperand(1)) 881 .add(I.getOperand(0)) 882 .addImm(0) // offset 883 .addImm(0) // glc 884 .addImm(0) // slc 885 .addImm(0); // dlc 886 887 888 // Now that we selected an opcode, we need to constrain the register 889 // operands to use appropriate classes. 890 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 891 892 I.eraseFromParent(); 893 return Ret; 894 } 895 896 static int sizeToSubRegIndex(unsigned Size) { 897 switch (Size) { 898 case 32: 899 return AMDGPU::sub0; 900 case 64: 901 return AMDGPU::sub0_sub1; 902 case 96: 903 return AMDGPU::sub0_sub1_sub2; 904 case 128: 905 return AMDGPU::sub0_sub1_sub2_sub3; 906 case 256: 907 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7; 908 default: 909 if (Size < 32) 910 return AMDGPU::sub0; 911 if (Size > 256) 912 return -1; 913 return sizeToSubRegIndex(PowerOf2Ceil(Size)); 914 } 915 } 916 917 bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { 918 MachineBasicBlock *BB = I.getParent(); 919 MachineFunction *MF = BB->getParent(); 920 MachineRegisterInfo &MRI = MF->getRegInfo(); 921 922 unsigned DstReg = I.getOperand(0).getReg(); 923 unsigned SrcReg = I.getOperand(1).getReg(); 924 const LLT DstTy = MRI.getType(DstReg); 925 const LLT SrcTy = MRI.getType(SrcReg); 926 if (!DstTy.isScalar()) 927 return false; 928 929 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 930 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI); 931 if (SrcRB != DstRB) 932 return false; 933 934 unsigned DstSize = DstTy.getSizeInBits(); 935 unsigned SrcSize = SrcTy.getSizeInBits(); 936 937 const TargetRegisterClass *SrcRC 938 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI); 939 const TargetRegisterClass *DstRC 940 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI); 941 942 if (SrcSize > 32) { 943 int SubRegIdx = sizeToSubRegIndex(DstSize); 944 if (SubRegIdx == -1) 945 return false; 946 947 // Deal with weird cases where the class only partially supports the subreg 948 // index. 949 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx); 950 if (!SrcRC) 951 return false; 952 953 I.getOperand(1).setSubReg(SubRegIdx); 954 } 955 956 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 957 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 958 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); 959 return false; 960 } 961 962 I.setDesc(TII.get(TargetOpcode::COPY)); 963 return true; 964 } 965 966 /// \returns true if a bitmask for \p Size bits will be an inline immediate. 967 static bool shouldUseAndMask(unsigned Size, unsigned &Mask) { 968 Mask = maskTrailingOnes<unsigned>(Size); 969 int SignedMask = static_cast<int>(Mask); 970 return SignedMask >= -16 && SignedMask <= 64; 971 } 972 973 bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { 974 bool Signed = I.getOpcode() == AMDGPU::G_SEXT; 975 const DebugLoc &DL = I.getDebugLoc(); 976 MachineBasicBlock &MBB = *I.getParent(); 977 MachineFunction &MF = *MBB.getParent(); 978 MachineRegisterInfo &MRI = MF.getRegInfo(); 979 const unsigned DstReg = I.getOperand(0).getReg(); 980 const unsigned SrcReg = I.getOperand(1).getReg(); 981 982 const LLT DstTy = MRI.getType(DstReg); 983 const LLT SrcTy = MRI.getType(SrcReg); 984 const LLT S1 = LLT::scalar(1); 985 const unsigned SrcSize = SrcTy.getSizeInBits(); 986 const unsigned DstSize = DstTy.getSizeInBits(); 987 if (!DstTy.isScalar()) 988 return false; 989 990 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI); 991 992 if (SrcBank->getID() == AMDGPU::SCCRegBankID) { 993 if (SrcTy != S1 || DstSize > 64) // Invalid 994 return false; 995 996 unsigned Opcode = 997 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; 998 const TargetRegisterClass *DstRC = 999 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass; 1000 1001 // FIXME: Create an extra copy to avoid incorrectly constraining the result 1002 // of the scc producer. 1003 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 1004 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg) 1005 .addReg(SrcReg); 1006 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 1007 .addReg(TmpReg); 1008 1009 // The instruction operands are backwards from what you would expect. 1010 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg) 1011 .addImm(0) 1012 .addImm(Signed ? -1 : 1); 1013 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 1014 } 1015 1016 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) { 1017 if (SrcTy != S1) // Invalid 1018 return false; 1019 1020 MachineInstr *ExtI = 1021 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 1022 .addImm(0) // src0_modifiers 1023 .addImm(0) // src0 1024 .addImm(0) // src1_modifiers 1025 .addImm(Signed ? -1 : 1) // src1 1026 .addUse(SrcReg); 1027 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 1028 } 1029 1030 if (I.getOpcode() == AMDGPU::G_ANYEXT) 1031 return selectCOPY(I); 1032 1033 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) { 1034 // 64-bit should have been split up in RegBankSelect 1035 1036 // Try to use an and with a mask if it will save code size. 1037 unsigned Mask; 1038 if (!Signed && shouldUseAndMask(SrcSize, Mask)) { 1039 MachineInstr *ExtI = 1040 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg) 1041 .addImm(Mask) 1042 .addReg(SrcReg); 1043 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 1044 } 1045 1046 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32; 1047 MachineInstr *ExtI = 1048 BuildMI(MBB, I, DL, TII.get(BFE), DstReg) 1049 .addReg(SrcReg) 1050 .addImm(0) // Offset 1051 .addImm(SrcSize); // Width 1052 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 1053 } 1054 1055 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) { 1056 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI)) 1057 return false; 1058 1059 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) { 1060 const unsigned SextOpc = SrcSize == 8 ? 1061 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16; 1062 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg) 1063 .addReg(SrcReg); 1064 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); 1065 } 1066 1067 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64; 1068 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1069 1070 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width. 1071 if (DstSize > 32 && SrcSize <= 32) { 1072 // We need a 64-bit register source, but the high bits don't matter. 1073 unsigned ExtReg 1074 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 1075 unsigned UndefReg 1076 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 1077 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg); 1078 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg) 1079 .addReg(SrcReg) 1080 .addImm(AMDGPU::sub0) 1081 .addReg(UndefReg) 1082 .addImm(AMDGPU::sub1); 1083 1084 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg) 1085 .addReg(ExtReg) 1086 .addImm(SrcSize << 16); 1087 1088 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI); 1089 } 1090 1091 unsigned Mask; 1092 if (!Signed && shouldUseAndMask(SrcSize, Mask)) { 1093 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg) 1094 .addReg(SrcReg) 1095 .addImm(Mask); 1096 } else { 1097 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg) 1098 .addReg(SrcReg) 1099 .addImm(SrcSize << 16); 1100 } 1101 1102 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); 1103 } 1104 1105 return false; 1106 } 1107 1108 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { 1109 MachineBasicBlock *BB = I.getParent(); 1110 MachineFunction *MF = BB->getParent(); 1111 MachineRegisterInfo &MRI = MF->getRegInfo(); 1112 MachineOperand &ImmOp = I.getOperand(1); 1113 1114 // The AMDGPU backend only supports Imm operands and not CImm or FPImm. 1115 if (ImmOp.isFPImm()) { 1116 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt(); 1117 ImmOp.ChangeToImmediate(Imm.getZExtValue()); 1118 } else if (ImmOp.isCImm()) { 1119 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue()); 1120 } 1121 1122 unsigned DstReg = I.getOperand(0).getReg(); 1123 unsigned Size; 1124 bool IsSgpr; 1125 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg()); 1126 if (RB) { 1127 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID; 1128 Size = MRI.getType(DstReg).getSizeInBits(); 1129 } else { 1130 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg); 1131 IsSgpr = TRI.isSGPRClass(RC); 1132 Size = TRI.getRegSizeInBits(*RC); 1133 } 1134 1135 if (Size != 32 && Size != 64) 1136 return false; 1137 1138 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 1139 if (Size == 32) { 1140 I.setDesc(TII.get(Opcode)); 1141 I.addImplicitDefUseOperands(*MF); 1142 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1143 } 1144 1145 DebugLoc DL = I.getDebugLoc(); 1146 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : 1147 &AMDGPU::VGPR_32RegClass; 1148 unsigned LoReg = MRI.createVirtualRegister(RC); 1149 unsigned HiReg = MRI.createVirtualRegister(RC); 1150 const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); 1151 1152 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) 1153 .addImm(Imm.trunc(32).getZExtValue()); 1154 1155 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) 1156 .addImm(Imm.ashr(32).getZExtValue()); 1157 1158 const MachineInstr *RS = 1159 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 1160 .addReg(LoReg) 1161 .addImm(AMDGPU::sub0) 1162 .addReg(HiReg) 1163 .addImm(AMDGPU::sub1); 1164 1165 // We can't call constrainSelectedInstRegOperands here, because it doesn't 1166 // work for target independent opcodes 1167 I.eraseFromParent(); 1168 const TargetRegisterClass *DstRC = 1169 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI); 1170 if (!DstRC) 1171 return true; 1172 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 1173 } 1174 1175 static bool isConstant(const MachineInstr &MI) { 1176 return MI.getOpcode() == TargetOpcode::G_CONSTANT; 1177 } 1178 1179 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, 1180 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { 1181 1182 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); 1183 1184 assert(PtrMI); 1185 1186 if (PtrMI->getOpcode() != TargetOpcode::G_GEP) 1187 return; 1188 1189 GEPInfo GEPInfo(*PtrMI); 1190 1191 for (unsigned i = 1, e = 3; i < e; ++i) { 1192 const MachineOperand &GEPOp = PtrMI->getOperand(i); 1193 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); 1194 assert(OpDef); 1195 if (isConstant(*OpDef)) { 1196 // FIXME: Is it possible to have multiple Imm parts? Maybe if we 1197 // are lacking other optimizations. 1198 assert(GEPInfo.Imm == 0); 1199 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); 1200 continue; 1201 } 1202 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI); 1203 if (OpBank->getID() == AMDGPU::SGPRRegBankID) 1204 GEPInfo.SgprParts.push_back(GEPOp.getReg()); 1205 else 1206 GEPInfo.VgprParts.push_back(GEPOp.getReg()); 1207 } 1208 1209 AddrInfo.push_back(GEPInfo); 1210 getAddrModeInfo(*PtrMI, MRI, AddrInfo); 1211 } 1212 1213 bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const { 1214 if (!MI.hasOneMemOperand()) 1215 return false; 1216 1217 const MachineMemOperand *MMO = *MI.memoperands_begin(); 1218 const Value *Ptr = MMO->getValue(); 1219 1220 // UndefValue means this is a load of a kernel input. These are uniform. 1221 // Sometimes LDS instructions have constant pointers. 1222 // If Ptr is null, then that means this mem operand contains a 1223 // PseudoSourceValue like GOT. 1224 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 1225 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 1226 return true; 1227 1228 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) 1229 return true; 1230 1231 const Instruction *I = dyn_cast<Instruction>(Ptr); 1232 return I && I->getMetadata("amdgpu.uniform"); 1233 } 1234 1235 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const { 1236 for (const GEPInfo &GEPInfo : AddrInfo) { 1237 if (!GEPInfo.VgprParts.empty()) 1238 return true; 1239 } 1240 return false; 1241 } 1242 1243 bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { 1244 // TODO: Can/should we insert m0 initialization here for DS instructions and 1245 // call the normal selector? 1246 return false; 1247 } 1248 1249 bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { 1250 MachineBasicBlock *BB = I.getParent(); 1251 MachineFunction *MF = BB->getParent(); 1252 MachineRegisterInfo &MRI = MF->getRegInfo(); 1253 MachineOperand &CondOp = I.getOperand(0); 1254 Register CondReg = CondOp.getReg(); 1255 const DebugLoc &DL = I.getDebugLoc(); 1256 1257 unsigned BrOpcode; 1258 Register CondPhysReg; 1259 const TargetRegisterClass *ConstrainRC; 1260 1261 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide 1262 // whether the branch is uniform when selecting the instruction. In 1263 // GlobalISel, we should push that decision into RegBankSelect. Assume for now 1264 // RegBankSelect knows what it's doing if the branch condition is scc, even 1265 // though it currently does not. 1266 if (isSCC(CondReg, MRI)) { 1267 CondPhysReg = AMDGPU::SCC; 1268 BrOpcode = AMDGPU::S_CBRANCH_SCC1; 1269 ConstrainRC = &AMDGPU::SReg_32_XM0RegClass; 1270 } else if (isVCC(CondReg, MRI)) { 1271 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG? 1272 // We sort of know that a VCC producer based on the register bank, that ands 1273 // inactive lanes with 0. What if there was a logical operation with vcc 1274 // producers in different blocks/with different exec masks? 1275 // FIXME: Should scc->vcc copies and with exec? 1276 CondPhysReg = TRI.getVCC(); 1277 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ; 1278 ConstrainRC = TRI.getBoolRC(); 1279 } else 1280 return false; 1281 1282 if (!MRI.getRegClassOrNull(CondReg)) 1283 MRI.setRegClass(CondReg, ConstrainRC); 1284 1285 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg) 1286 .addReg(CondReg); 1287 BuildMI(*BB, &I, DL, TII.get(BrOpcode)) 1288 .addMBB(I.getOperand(1).getMBB()); 1289 1290 I.eraseFromParent(); 1291 return true; 1292 } 1293 1294 bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const { 1295 MachineBasicBlock *BB = I.getParent(); 1296 MachineFunction *MF = BB->getParent(); 1297 MachineRegisterInfo &MRI = MF->getRegInfo(); 1298 1299 Register DstReg = I.getOperand(0).getReg(); 1300 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 1301 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID; 1302 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32)); 1303 if (IsVGPR) 1304 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); 1305 1306 return RBI.constrainGenericRegister( 1307 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI); 1308 } 1309 1310 bool AMDGPUInstructionSelector::select(MachineInstr &I, 1311 CodeGenCoverage &CoverageInfo) const { 1312 if (I.isPHI()) 1313 return selectPHI(I); 1314 1315 if (!isPreISelGenericOpcode(I.getOpcode())) { 1316 if (I.isCopy()) 1317 return selectCOPY(I); 1318 return true; 1319 } 1320 1321 switch (I.getOpcode()) { 1322 case TargetOpcode::G_AND: 1323 case TargetOpcode::G_OR: 1324 case TargetOpcode::G_XOR: 1325 if (selectG_AND_OR_XOR(I)) 1326 return true; 1327 return selectImpl(I, CoverageInfo); 1328 case TargetOpcode::G_ADD: 1329 case TargetOpcode::G_SUB: 1330 if (selectG_ADD_SUB(I)) 1331 return true; 1332 LLVM_FALLTHROUGH; 1333 default: 1334 return selectImpl(I, CoverageInfo); 1335 case TargetOpcode::G_INTTOPTR: 1336 case TargetOpcode::G_BITCAST: 1337 return selectCOPY(I); 1338 case TargetOpcode::G_CONSTANT: 1339 case TargetOpcode::G_FCONSTANT: 1340 return selectG_CONSTANT(I); 1341 case TargetOpcode::G_EXTRACT: 1342 return selectG_EXTRACT(I); 1343 case TargetOpcode::G_MERGE_VALUES: 1344 case TargetOpcode::G_BUILD_VECTOR: 1345 case TargetOpcode::G_CONCAT_VECTORS: 1346 return selectG_MERGE_VALUES(I); 1347 case TargetOpcode::G_UNMERGE_VALUES: 1348 return selectG_UNMERGE_VALUES(I); 1349 case TargetOpcode::G_GEP: 1350 return selectG_GEP(I); 1351 case TargetOpcode::G_IMPLICIT_DEF: 1352 return selectG_IMPLICIT_DEF(I); 1353 case TargetOpcode::G_INSERT: 1354 return selectG_INSERT(I); 1355 case TargetOpcode::G_INTRINSIC: 1356 return selectG_INTRINSIC(I, CoverageInfo); 1357 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 1358 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo); 1359 case TargetOpcode::G_ICMP: 1360 if (selectG_ICMP(I)) 1361 return true; 1362 return selectImpl(I, CoverageInfo); 1363 case TargetOpcode::G_LOAD: 1364 return selectImpl(I, CoverageInfo); 1365 case TargetOpcode::G_SELECT: 1366 return selectG_SELECT(I); 1367 case TargetOpcode::G_STORE: 1368 if (selectImpl(I, CoverageInfo)) 1369 return true; 1370 return selectG_STORE(I); 1371 case TargetOpcode::G_TRUNC: 1372 return selectG_TRUNC(I); 1373 case TargetOpcode::G_SEXT: 1374 case TargetOpcode::G_ZEXT: 1375 case TargetOpcode::G_ANYEXT: 1376 if (selectG_SZA_EXT(I)) { 1377 I.eraseFromParent(); 1378 return true; 1379 } 1380 1381 return false; 1382 case TargetOpcode::G_BRCOND: 1383 return selectG_BRCOND(I); 1384 case TargetOpcode::G_FRAME_INDEX: 1385 return selectG_FRAME_INDEX(I); 1386 case TargetOpcode::G_FENCE: 1387 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and 1388 // is checking for G_CONSTANT 1389 I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE)); 1390 return true; 1391 } 1392 return false; 1393 } 1394 1395 InstructionSelector::ComplexRendererFns 1396 AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const { 1397 return {{ 1398 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 1399 }}; 1400 1401 } 1402 1403 std::pair<Register, unsigned> 1404 AMDGPUInstructionSelector::selectVOP3ModsImpl( 1405 Register Src, const MachineRegisterInfo &MRI) const { 1406 unsigned Mods = 0; 1407 MachineInstr *MI = MRI.getVRegDef(Src); 1408 1409 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) { 1410 Src = MI->getOperand(1).getReg(); 1411 Mods |= SISrcMods::NEG; 1412 MI = MRI.getVRegDef(Src); 1413 } 1414 1415 if (MI && MI->getOpcode() == AMDGPU::G_FABS) { 1416 Src = MI->getOperand(1).getReg(); 1417 Mods |= SISrcMods::ABS; 1418 } 1419 1420 return std::make_pair(Src, Mods); 1421 } 1422 1423 /// 1424 /// This will select either an SGPR or VGPR operand and will save us from 1425 /// having to write an extra tablegen pattern. 1426 InstructionSelector::ComplexRendererFns 1427 AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const { 1428 return {{ 1429 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 1430 }}; 1431 } 1432 1433 InstructionSelector::ComplexRendererFns 1434 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { 1435 MachineRegisterInfo &MRI 1436 = Root.getParent()->getParent()->getParent()->getRegInfo(); 1437 1438 Register Src; 1439 unsigned Mods; 1440 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); 1441 1442 return {{ 1443 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 1444 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods 1445 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 1446 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 1447 }}; 1448 } 1449 InstructionSelector::ComplexRendererFns 1450 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { 1451 return {{ 1452 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 1453 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 1454 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 1455 }}; 1456 } 1457 1458 InstructionSelector::ComplexRendererFns 1459 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { 1460 MachineRegisterInfo &MRI 1461 = Root.getParent()->getParent()->getParent()->getRegInfo(); 1462 1463 Register Src; 1464 unsigned Mods; 1465 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); 1466 1467 return {{ 1468 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 1469 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods 1470 }}; 1471 } 1472 1473 InstructionSelector::ComplexRendererFns 1474 AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { 1475 MachineRegisterInfo &MRI = 1476 Root.getParent()->getParent()->getParent()->getRegInfo(); 1477 1478 SmallVector<GEPInfo, 4> AddrInfo; 1479 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); 1480 1481 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1482 return None; 1483 1484 const GEPInfo &GEPInfo = AddrInfo[0]; 1485 1486 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm)) 1487 return None; 1488 1489 unsigned PtrReg = GEPInfo.SgprParts[0]; 1490 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); 1491 return {{ 1492 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1493 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } 1494 }}; 1495 } 1496 1497 InstructionSelector::ComplexRendererFns 1498 AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const { 1499 MachineRegisterInfo &MRI = 1500 Root.getParent()->getParent()->getParent()->getRegInfo(); 1501 1502 SmallVector<GEPInfo, 4> AddrInfo; 1503 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); 1504 1505 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1506 return None; 1507 1508 const GEPInfo &GEPInfo = AddrInfo[0]; 1509 unsigned PtrReg = GEPInfo.SgprParts[0]; 1510 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); 1511 if (!isUInt<32>(EncodedImm)) 1512 return None; 1513 1514 return {{ 1515 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1516 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } 1517 }}; 1518 } 1519 1520 InstructionSelector::ComplexRendererFns 1521 AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { 1522 MachineInstr *MI = Root.getParent(); 1523 MachineBasicBlock *MBB = MI->getParent(); 1524 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1525 1526 SmallVector<GEPInfo, 4> AddrInfo; 1527 getAddrModeInfo(*MI, MRI, AddrInfo); 1528 1529 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, 1530 // then we can select all ptr + 32-bit offsets not just immediate offsets. 1531 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1532 return None; 1533 1534 const GEPInfo &GEPInfo = AddrInfo[0]; 1535 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm)) 1536 return None; 1537 1538 // If we make it this far we have a load with an 32-bit immediate offset. 1539 // It is OK to select this using a sgpr offset, because we have already 1540 // failed trying to select this load into one of the _IMM variants since 1541 // the _IMM Patterns are considered before the _SGPR patterns. 1542 unsigned PtrReg = GEPInfo.SgprParts[0]; 1543 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 1544 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg) 1545 .addImm(GEPInfo.Imm); 1546 return {{ 1547 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1548 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); } 1549 }}; 1550 } 1551 1552 template <bool Signed> 1553 InstructionSelector::ComplexRendererFns 1554 AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const { 1555 MachineInstr *MI = Root.getParent(); 1556 MachineBasicBlock *MBB = MI->getParent(); 1557 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1558 1559 InstructionSelector::ComplexRendererFns Default = {{ 1560 [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, 1561 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // offset 1562 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc 1563 }}; 1564 1565 if (!STI.hasFlatInstOffsets()) 1566 return Default; 1567 1568 const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg()); 1569 if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP) 1570 return Default; 1571 1572 Optional<int64_t> Offset = 1573 getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI); 1574 if (!Offset.hasValue()) 1575 return Default; 1576 1577 unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace(); 1578 if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed)) 1579 return Default; 1580 1581 Register BasePtr = OpDef->getOperand(1).getReg(); 1582 1583 return {{ 1584 [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); }, 1585 [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); }, 1586 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc 1587 }}; 1588 } 1589 1590 InstructionSelector::ComplexRendererFns 1591 AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const { 1592 return selectFlatOffsetImpl<false>(Root); 1593 } 1594 1595 InstructionSelector::ComplexRendererFns 1596 AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const { 1597 return selectFlatOffsetImpl<true>(Root); 1598 } 1599 1600 // FIXME: Implement 1601 static bool signBitIsZero(const MachineOperand &Op, 1602 const MachineRegisterInfo &MRI) { 1603 return false; 1604 } 1605 1606 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1607 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1608 return PSV && PSV->isStack(); 1609 } 1610 1611 InstructionSelector::ComplexRendererFns 1612 AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { 1613 MachineInstr *MI = Root.getParent(); 1614 MachineBasicBlock *MBB = MI->getParent(); 1615 MachineFunction *MF = MBB->getParent(); 1616 MachineRegisterInfo &MRI = MF->getRegInfo(); 1617 const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); 1618 1619 int64_t Offset = 0; 1620 if (mi_match(Root.getReg(), MRI, m_ICst(Offset))) { 1621 Register HighBits = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1622 1623 // TODO: Should this be inside the render function? The iterator seems to 1624 // move. 1625 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), 1626 HighBits) 1627 .addImm(Offset & ~4095); 1628 1629 return {{[=](MachineInstrBuilder &MIB) { // rsrc 1630 MIB.addReg(Info->getScratchRSrcReg()); 1631 }, 1632 [=](MachineInstrBuilder &MIB) { // vaddr 1633 MIB.addReg(HighBits); 1634 }, 1635 [=](MachineInstrBuilder &MIB) { // soffset 1636 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1637 const MachinePointerInfo &PtrInfo = MMO->getPointerInfo(); 1638 1639 Register SOffsetReg = isStackPtrRelative(PtrInfo) 1640 ? Info->getStackPtrOffsetReg() 1641 : Info->getScratchWaveOffsetReg(); 1642 MIB.addReg(SOffsetReg); 1643 }, 1644 [=](MachineInstrBuilder &MIB) { // offset 1645 MIB.addImm(Offset & 4095); 1646 }}}; 1647 } 1648 1649 assert(Offset == 0); 1650 1651 // Try to fold a frame index directly into the MUBUF vaddr field, and any 1652 // offsets. 1653 Optional<int> FI; 1654 Register VAddr = Root.getReg(); 1655 if (const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg())) { 1656 if (isBaseWithConstantOffset(Root, MRI)) { 1657 const MachineOperand &LHS = RootDef->getOperand(1); 1658 const MachineOperand &RHS = RootDef->getOperand(2); 1659 const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); 1660 const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); 1661 if (LHSDef && RHSDef) { 1662 int64_t PossibleOffset = 1663 RHSDef->getOperand(1).getCImm()->getSExtValue(); 1664 if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) && 1665 (!STI.privateMemoryResourceIsRangeChecked() || 1666 signBitIsZero(LHS, MRI))) { 1667 if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX) 1668 FI = LHSDef->getOperand(1).getIndex(); 1669 else 1670 VAddr = LHS.getReg(); 1671 Offset = PossibleOffset; 1672 } 1673 } 1674 } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) { 1675 FI = RootDef->getOperand(1).getIndex(); 1676 } 1677 } 1678 1679 // If we don't know this private access is a local stack object, it needs to 1680 // be relative to the entry point's scratch wave offset register. 1681 // TODO: Should split large offsets that don't fit like above. 1682 // TODO: Don't use scratch wave offset just because the offset didn't fit. 1683 Register SOffset = FI.hasValue() ? Info->getStackPtrOffsetReg() 1684 : Info->getScratchWaveOffsetReg(); 1685 1686 return {{[=](MachineInstrBuilder &MIB) { // rsrc 1687 MIB.addReg(Info->getScratchRSrcReg()); 1688 }, 1689 [=](MachineInstrBuilder &MIB) { // vaddr 1690 if (FI.hasValue()) 1691 MIB.addFrameIndex(FI.getValue()); 1692 else 1693 MIB.addReg(VAddr); 1694 }, 1695 [=](MachineInstrBuilder &MIB) { // soffset 1696 MIB.addReg(SOffset); 1697 }, 1698 [=](MachineInstrBuilder &MIB) { // offset 1699 MIB.addImm(Offset); 1700 }}}; 1701 } 1702 1703 InstructionSelector::ComplexRendererFns 1704 AMDGPUInstructionSelector::selectMUBUFScratchOffset( 1705 MachineOperand &Root) const { 1706 MachineInstr *MI = Root.getParent(); 1707 MachineBasicBlock *MBB = MI->getParent(); 1708 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1709 1710 int64_t Offset = 0; 1711 if (!mi_match(Root.getReg(), MRI, m_ICst(Offset)) || 1712 !SIInstrInfo::isLegalMUBUFImmOffset(Offset)) 1713 return {}; 1714 1715 const MachineFunction *MF = MBB->getParent(); 1716 const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); 1717 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1718 const MachinePointerInfo &PtrInfo = MMO->getPointerInfo(); 1719 1720 Register SOffsetReg = isStackPtrRelative(PtrInfo) 1721 ? Info->getStackPtrOffsetReg() 1722 : Info->getScratchWaveOffsetReg(); 1723 return {{ 1724 [=](MachineInstrBuilder &MIB) { 1725 MIB.addReg(Info->getScratchRSrcReg()); 1726 }, // rsrc 1727 [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffsetReg); }, // soffset 1728 [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset 1729 }}; 1730 } 1731