1 //===- R600MergeVectorRegisters.cpp ---------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass merges inputs of swizzeable instructions into vector sharing 11 /// common data and/or have enough undef subreg using swizzle abilities. 12 /// 13 /// For instance let's consider the following pseudo code : 14 /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3 15 /// ... 16 /// %7 = REG_SEQ %1, sub0, %3, sub1, undef, sub2, %4, sub3 17 /// (swizzable Inst) %7, SwizzleMask : sub0, sub1, sub2, sub3 18 /// 19 /// is turned into : 20 /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3 21 /// ... 22 /// %7 = INSERT_SUBREG %4, sub3 23 /// (swizzable Inst) %7, SwizzleMask : sub0, sub2, sub1, sub3 24 /// 25 /// This allow regalloc to reduce register pressure for vector registers and 26 /// to reduce MOV count. 27 //===----------------------------------------------------------------------===// 28 29 #include "AMDGPU.h" 30 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 31 #include "R600Defines.h" 32 #include "R600Subtarget.h" 33 #include "llvm/CodeGen/MachineDominators.h" 34 #include "llvm/CodeGen/MachineLoopInfo.h" 35 36 using namespace llvm; 37 38 #define DEBUG_TYPE "vec-merger" 39 40 static bool isImplicitlyDef(MachineRegisterInfo &MRI, Register Reg) { 41 if (Reg.isPhysical()) 42 return false; 43 const MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 44 return MI && MI->isImplicitDef(); 45 } 46 47 namespace { 48 49 class RegSeqInfo { 50 public: 51 MachineInstr *Instr; 52 DenseMap<Register, unsigned> RegToChan; 53 std::vector<Register> UndefReg; 54 55 RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) { 56 assert(MI->getOpcode() == R600::REG_SEQUENCE); 57 for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) { 58 MachineOperand &MO = Instr->getOperand(i); 59 unsigned Chan = Instr->getOperand(i + 1).getImm(); 60 if (isImplicitlyDef(MRI, MO.getReg())) 61 UndefReg.push_back(Chan); 62 else 63 RegToChan[MO.getReg()] = Chan; 64 } 65 } 66 67 RegSeqInfo() = default; 68 69 bool operator==(const RegSeqInfo &RSI) const { 70 return RSI.Instr == Instr; 71 } 72 }; 73 74 class R600VectorRegMerger : public MachineFunctionPass { 75 private: 76 using InstructionSetMap = DenseMap<unsigned, std::vector<MachineInstr *>>; 77 78 MachineRegisterInfo *MRI; 79 const R600InstrInfo *TII = nullptr; 80 DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq; 81 InstructionSetMap PreviousRegSeqByReg; 82 InstructionSetMap PreviousRegSeqByUndefCount; 83 84 bool canSwizzle(const MachineInstr &MI) const; 85 bool areAllUsesSwizzeable(Register Reg) const; 86 void SwizzleInput(MachineInstr &, 87 const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const; 88 bool tryMergeVector(const RegSeqInfo *Untouched, RegSeqInfo *ToMerge, 89 std::vector<std::pair<unsigned, unsigned>> &Remap) const; 90 bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 91 std::vector<std::pair<unsigned, unsigned>> &RemapChan); 92 bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 93 std::vector<std::pair<unsigned, unsigned>> &RemapChan); 94 MachineInstr *RebuildVector(RegSeqInfo *MI, const RegSeqInfo *BaseVec, 95 const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const; 96 void RemoveMI(MachineInstr *); 97 void trackRSI(const RegSeqInfo &RSI); 98 99 public: 100 static char ID; 101 102 R600VectorRegMerger() : MachineFunctionPass(ID) {} 103 104 void getAnalysisUsage(AnalysisUsage &AU) const override { 105 AU.setPreservesCFG(); 106 AU.addRequired<MachineDominatorTree>(); 107 AU.addPreserved<MachineDominatorTree>(); 108 AU.addRequired<MachineLoopInfo>(); 109 AU.addPreserved<MachineLoopInfo>(); 110 MachineFunctionPass::getAnalysisUsage(AU); 111 } 112 113 MachineFunctionProperties getRequiredProperties() const override { 114 return MachineFunctionProperties() 115 .set(MachineFunctionProperties::Property::IsSSA); 116 } 117 118 StringRef getPassName() const override { 119 return "R600 Vector Registers Merge Pass"; 120 } 121 122 bool runOnMachineFunction(MachineFunction &Fn) override; 123 }; 124 125 } // end anonymous namespace 126 127 INITIALIZE_PASS_BEGIN(R600VectorRegMerger, DEBUG_TYPE, 128 "R600 Vector Reg Merger", false, false) 129 INITIALIZE_PASS_END(R600VectorRegMerger, DEBUG_TYPE, 130 "R600 Vector Reg Merger", false, false) 131 132 char R600VectorRegMerger::ID = 0; 133 134 char &llvm::R600VectorRegMergerID = R600VectorRegMerger::ID; 135 136 bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI) 137 const { 138 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 139 return true; 140 switch (MI.getOpcode()) { 141 case R600::R600_ExportSwz: 142 case R600::EG_ExportSwz: 143 return true; 144 default: 145 return false; 146 } 147 } 148 149 bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched, 150 RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned>> &Remap) 151 const { 152 unsigned CurrentUndexIdx = 0; 153 for (DenseMap<Register, unsigned>::iterator It = ToMerge->RegToChan.begin(), 154 E = ToMerge->RegToChan.end(); It != E; ++It) { 155 DenseMap<Register, unsigned>::const_iterator PosInUntouched = 156 Untouched->RegToChan.find((*It).first); 157 if (PosInUntouched != Untouched->RegToChan.end()) { 158 Remap.push_back(std::pair<unsigned, unsigned> 159 ((*It).second, (*PosInUntouched).second)); 160 continue; 161 } 162 if (CurrentUndexIdx >= Untouched->UndefReg.size()) 163 return false; 164 Remap.push_back(std::pair<unsigned, unsigned> 165 ((*It).second, Untouched->UndefReg[CurrentUndexIdx++])); 166 } 167 168 return true; 169 } 170 171 static 172 unsigned getReassignedChan( 173 const std::vector<std::pair<unsigned, unsigned>> &RemapChan, 174 unsigned Chan) { 175 for (unsigned j = 0, je = RemapChan.size(); j < je; j++) { 176 if (RemapChan[j].first == Chan) 177 return RemapChan[j].second; 178 } 179 llvm_unreachable("Chan wasn't reassigned"); 180 } 181 182 MachineInstr *R600VectorRegMerger::RebuildVector( 183 RegSeqInfo *RSI, const RegSeqInfo *BaseRSI, 184 const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const { 185 Register Reg = RSI->Instr->getOperand(0).getReg(); 186 MachineBasicBlock::iterator Pos = RSI->Instr; 187 MachineBasicBlock &MBB = *Pos->getParent(); 188 DebugLoc DL = Pos->getDebugLoc(); 189 190 Register SrcVec = BaseRSI->Instr->getOperand(0).getReg(); 191 DenseMap<Register, unsigned> UpdatedRegToChan = BaseRSI->RegToChan; 192 std::vector<Register> UpdatedUndef = BaseRSI->UndefReg; 193 for (DenseMap<Register, unsigned>::iterator It = RSI->RegToChan.begin(), 194 E = RSI->RegToChan.end(); It != E; ++It) { 195 Register DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass); 196 unsigned SubReg = (*It).first; 197 unsigned Swizzle = (*It).second; 198 unsigned Chan = getReassignedChan(RemapChan, Swizzle); 199 200 MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(R600::INSERT_SUBREG), 201 DstReg) 202 .addReg(SrcVec) 203 .addReg(SubReg) 204 .addImm(Chan); 205 UpdatedRegToChan[SubReg] = Chan; 206 std::vector<Register>::iterator ChanPos = llvm::find(UpdatedUndef, Chan); 207 if (ChanPos != UpdatedUndef.end()) 208 UpdatedUndef.erase(ChanPos); 209 assert(!is_contained(UpdatedUndef, Chan) && 210 "UpdatedUndef shouldn't contain Chan more than once!"); 211 LLVM_DEBUG(dbgs() << " ->"; Tmp->dump();); 212 (void)Tmp; 213 SrcVec = DstReg; 214 } 215 MachineInstr *NewMI = 216 BuildMI(MBB, Pos, DL, TII->get(R600::COPY), Reg).addReg(SrcVec); 217 LLVM_DEBUG(dbgs() << " ->"; NewMI->dump();); 218 219 LLVM_DEBUG(dbgs() << " Updating Swizzle:\n"); 220 for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg), 221 E = MRI->use_instr_end(); It != E; ++It) { 222 LLVM_DEBUG(dbgs() << " "; (*It).dump(); dbgs() << " ->"); 223 SwizzleInput(*It, RemapChan); 224 LLVM_DEBUG((*It).dump()); 225 } 226 RSI->Instr->eraseFromParent(); 227 228 // Update RSI 229 RSI->Instr = NewMI; 230 RSI->RegToChan = UpdatedRegToChan; 231 RSI->UndefReg = UpdatedUndef; 232 233 return NewMI; 234 } 235 236 void R600VectorRegMerger::RemoveMI(MachineInstr *MI) { 237 for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(), 238 E = PreviousRegSeqByReg.end(); It != E; ++It) { 239 std::vector<MachineInstr *> &MIs = (*It).second; 240 MIs.erase(llvm::find(MIs, MI), MIs.end()); 241 } 242 for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(), 243 E = PreviousRegSeqByUndefCount.end(); It != E; ++It) { 244 std::vector<MachineInstr *> &MIs = (*It).second; 245 MIs.erase(llvm::find(MIs, MI), MIs.end()); 246 } 247 } 248 249 void R600VectorRegMerger::SwizzleInput(MachineInstr &MI, 250 const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const { 251 unsigned Offset; 252 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 253 Offset = 2; 254 else 255 Offset = 3; 256 for (unsigned i = 0; i < 4; i++) { 257 unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1; 258 for (unsigned j = 0, e = RemapChan.size(); j < e; j++) { 259 if (RemapChan[j].first == Swizzle) { 260 MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1); 261 break; 262 } 263 } 264 } 265 } 266 267 bool R600VectorRegMerger::areAllUsesSwizzeable(Register Reg) const { 268 for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg), 269 E = MRI->use_instr_end(); It != E; ++It) { 270 if (!canSwizzle(*It)) 271 return false; 272 } 273 return true; 274 } 275 276 bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI, 277 RegSeqInfo &CompatibleRSI, 278 std::vector<std::pair<unsigned, unsigned>> &RemapChan) { 279 for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(), 280 MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) { 281 if (!MOp->isReg()) 282 continue; 283 if (PreviousRegSeqByReg[MOp->getReg()].empty()) 284 continue; 285 for (MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) { 286 CompatibleRSI = PreviousRegSeq[MI]; 287 if (RSI == CompatibleRSI) 288 continue; 289 if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan)) 290 return true; 291 } 292 } 293 return false; 294 } 295 296 bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI, 297 RegSeqInfo &CompatibleRSI, 298 std::vector<std::pair<unsigned, unsigned>> &RemapChan) { 299 unsigned NeededUndefs = 4 - RSI.UndefReg.size(); 300 if (PreviousRegSeqByUndefCount[NeededUndefs].empty()) 301 return false; 302 std::vector<MachineInstr *> &MIs = 303 PreviousRegSeqByUndefCount[NeededUndefs]; 304 CompatibleRSI = PreviousRegSeq[MIs.back()]; 305 tryMergeVector(&CompatibleRSI, &RSI, RemapChan); 306 return true; 307 } 308 309 void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) { 310 for (DenseMap<Register, unsigned>::const_iterator 311 It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) { 312 PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr); 313 } 314 PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr); 315 PreviousRegSeq[RSI.Instr] = RSI; 316 } 317 318 bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { 319 if (skipFunction(Fn.getFunction())) 320 return false; 321 322 const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>(); 323 TII = ST.getInstrInfo(); 324 MRI = &Fn.getRegInfo(); 325 326 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 327 MBB != MBBe; ++MBB) { 328 MachineBasicBlock *MB = &*MBB; 329 PreviousRegSeq.clear(); 330 PreviousRegSeqByReg.clear(); 331 PreviousRegSeqByUndefCount.clear(); 332 333 for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end(); 334 MII != MIIE; ++MII) { 335 MachineInstr &MI = *MII; 336 if (MI.getOpcode() != R600::REG_SEQUENCE) { 337 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) { 338 Register Reg = MI.getOperand(1).getReg(); 339 for (MachineRegisterInfo::def_instr_iterator 340 It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end(); 341 It != E; ++It) { 342 RemoveMI(&(*It)); 343 } 344 } 345 continue; 346 } 347 348 RegSeqInfo RSI(*MRI, &MI); 349 350 // All uses of MI are swizzeable ? 351 Register Reg = MI.getOperand(0).getReg(); 352 if (!areAllUsesSwizzeable(Reg)) 353 continue; 354 355 LLVM_DEBUG({ 356 dbgs() << "Trying to optimize "; 357 MI.dump(); 358 }); 359 360 RegSeqInfo CandidateRSI; 361 std::vector<std::pair<unsigned, unsigned>> RemapChan; 362 LLVM_DEBUG(dbgs() << "Using common slots...\n";); 363 if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) { 364 // Remove CandidateRSI mapping 365 RemoveMI(CandidateRSI.Instr); 366 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 367 trackRSI(RSI); 368 continue; 369 } 370 LLVM_DEBUG(dbgs() << "Using free slots...\n";); 371 RemapChan.clear(); 372 if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) { 373 RemoveMI(CandidateRSI.Instr); 374 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 375 trackRSI(RSI); 376 continue; 377 } 378 //Failed to merge 379 trackRSI(RSI); 380 } 381 } 382 return false; 383 } 384 385 llvm::FunctionPass *llvm::createR600VectorRegMerger() { 386 return new R600VectorRegMerger(); 387 } 388