1 //=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // This class implements a deterministic finite automaton (DFA) based 9 // packetizing mechanism for VLIW architectures. It provides APIs to 10 // determine whether there exists a legal mapping of instructions to 11 // functional unit assignments in a packet. The DFA is auto-generated from 12 // the target's Schedule.td file. 13 // 14 // A DFA consists of 3 major elements: states, inputs, and transitions. For 15 // the packetizing mechanism, the input is the set of instruction classes for 16 // a target. The state models all possible combinations of functional unit 17 // consumption for a given set of instructions in a packet. A transition 18 // models the addition of an instruction to a packet. In the DFA constructed 19 // by this class, if an instruction can be added to a packet, then a valid 20 // transition exists from the corresponding state. Invalid transitions 21 // indicate that the instruction cannot be added to the current packet. 22 // 23 //===----------------------------------------------------------------------===// 24 25 #include "llvm/CodeGen/DFAPacketizer.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineInstr.h" 28 #include "llvm/CodeGen/MachineInstrBundle.h" 29 #include "llvm/CodeGen/ScheduleDAG.h" 30 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 31 #include "llvm/CodeGen/TargetInstrInfo.h" 32 #include "llvm/CodeGen/TargetSubtargetInfo.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrItineraries.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include <algorithm> 39 #include <cassert> 40 #include <iterator> 41 #include <memory> 42 #include <vector> 43 44 using namespace llvm; 45 46 #define DEBUG_TYPE "packets" 47 48 static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden, 49 cl::init(0), cl::desc("If present, stops packetizing after N instructions")); 50 51 static unsigned InstrCount = 0; 52 53 // -------------------------------------------------------------------- 54 // Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp 55 56 static DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) { 57 return (Inp << DFA_MAX_RESOURCES) | FuncUnits; 58 } 59 60 /// Return the DFAInput for an instruction class input vector. 61 /// This function is used in both DFAPacketizer.cpp and in 62 /// DFAPacketizerEmitter.cpp. 63 static DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) { 64 DFAInput InsnInput = 0; 65 assert((InsnClass.size() <= DFA_MAX_RESTERMS) && 66 "Exceeded maximum number of DFA terms"); 67 for (auto U : InsnClass) 68 InsnInput = addDFAFuncUnits(InsnInput, U); 69 return InsnInput; 70 } 71 72 // -------------------------------------------------------------------- 73 74 DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, 75 const DFAStateInput (*SIT)[2], 76 const unsigned *SET): 77 InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET) { 78 // Make sure DFA types are large enough for the number of terms & resources. 79 static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= 80 (8 * sizeof(DFAInput)), 81 "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput"); 82 static_assert( 83 (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)), 84 "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); 85 } 86 87 // Read the DFA transition table and update CachedTable. 88 // 89 // Format of the transition tables: 90 // DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid 91 // transitions 92 // DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable 93 // for the ith state 94 // 95 void DFAPacketizer::ReadTable(unsigned int state) { 96 unsigned ThisState = DFAStateEntryTable[state]; 97 unsigned NextStateInTable = DFAStateEntryTable[state+1]; 98 // Early exit in case CachedTable has already contains this 99 // state's transitions. 100 if (CachedTable.count(UnsignPair(state, DFAStateInputTable[ThisState][0]))) 101 return; 102 103 for (unsigned i = ThisState; i < NextStateInTable; i++) 104 CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] = 105 DFAStateInputTable[i][1]; 106 } 107 108 // Return the DFAInput for an instruction class. 109 DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) { 110 // Note: this logic must match that in DFAPacketizerDefs.h for input vectors. 111 DFAInput InsnInput = 0; 112 unsigned i = 0; 113 (void)i; 114 for (const InstrStage *IS = InstrItins->beginStage(InsnClass), 115 *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS) { 116 InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits()); 117 assert((i++ < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs"); 118 } 119 return InsnInput; 120 } 121 122 // Return the DFAInput for an instruction class input vector. 123 DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) { 124 return getDFAInsnInput(InsnClass); 125 } 126 127 // Check if the resources occupied by a MCInstrDesc are available in the 128 // current state. 129 bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) { 130 unsigned InsnClass = MID->getSchedClass(); 131 DFAInput InsnInput = getInsnInput(InsnClass); 132 UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); 133 ReadTable(CurrentState); 134 return CachedTable.count(StateTrans) != 0; 135 } 136 137 // Reserve the resources occupied by a MCInstrDesc and change the current 138 // state to reflect that change. 139 void DFAPacketizer::reserveResources(const MCInstrDesc *MID) { 140 unsigned InsnClass = MID->getSchedClass(); 141 DFAInput InsnInput = getInsnInput(InsnClass); 142 UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); 143 ReadTable(CurrentState); 144 assert(CachedTable.count(StateTrans) != 0); 145 CurrentState = CachedTable[StateTrans]; 146 } 147 148 // Check if the resources occupied by a machine instruction are available 149 // in the current state. 150 bool DFAPacketizer::canReserveResources(MachineInstr &MI) { 151 const MCInstrDesc &MID = MI.getDesc(); 152 return canReserveResources(&MID); 153 } 154 155 // Reserve the resources occupied by a machine instruction and change the 156 // current state to reflect that change. 157 void DFAPacketizer::reserveResources(MachineInstr &MI) { 158 const MCInstrDesc &MID = MI.getDesc(); 159 reserveResources(&MID); 160 } 161 162 namespace llvm { 163 164 // This class extends ScheduleDAGInstrs and overrides the schedule method 165 // to build the dependence graph. 166 class DefaultVLIWScheduler : public ScheduleDAGInstrs { 167 private: 168 AliasAnalysis *AA; 169 /// Ordered list of DAG postprocessing steps. 170 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; 171 172 public: 173 DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, 174 AliasAnalysis *AA); 175 176 // Actual scheduling work. 177 void schedule() override; 178 179 /// DefaultVLIWScheduler takes ownership of the Mutation object. 180 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { 181 Mutations.push_back(std::move(Mutation)); 182 } 183 184 protected: 185 void postprocessDAG(); 186 }; 187 188 } // end namespace llvm 189 190 DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, 191 MachineLoopInfo &MLI, 192 AliasAnalysis *AA) 193 : ScheduleDAGInstrs(MF, &MLI), AA(AA) { 194 CanHandleTerminators = true; 195 } 196 197 /// Apply each ScheduleDAGMutation step in order. 198 void DefaultVLIWScheduler::postprocessDAG() { 199 for (auto &M : Mutations) 200 M->apply(this); 201 } 202 203 void DefaultVLIWScheduler::schedule() { 204 // Build the scheduling graph. 205 buildSchedGraph(AA); 206 postprocessDAG(); 207 } 208 209 VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf, 210 MachineLoopInfo &mli, AliasAnalysis *aa) 211 : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) { 212 ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget()); 213 VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA); 214 } 215 216 VLIWPacketizerList::~VLIWPacketizerList() { 217 delete VLIWScheduler; 218 delete ResourceTracker; 219 } 220 221 // End the current packet, bundle packet instructions and reset DFA state. 222 void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, 223 MachineBasicBlock::iterator MI) { 224 LLVM_DEBUG({ 225 if (!CurrentPacketMIs.empty()) { 226 dbgs() << "Finalizing packet:\n"; 227 for (MachineInstr *MI : CurrentPacketMIs) 228 dbgs() << " * " << *MI; 229 } 230 }); 231 if (CurrentPacketMIs.size() > 1) { 232 MachineInstr &MIFirst = *CurrentPacketMIs.front(); 233 finalizeBundle(*MBB, MIFirst.getIterator(), MI.getInstrIterator()); 234 } 235 CurrentPacketMIs.clear(); 236 ResourceTracker->clearResources(); 237 LLVM_DEBUG(dbgs() << "End packet\n"); 238 } 239 240 // Bundle machine instructions into packets. 241 void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, 242 MachineBasicBlock::iterator BeginItr, 243 MachineBasicBlock::iterator EndItr) { 244 assert(VLIWScheduler && "VLIW Scheduler is not initialized!"); 245 VLIWScheduler->startBlock(MBB); 246 VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, 247 std::distance(BeginItr, EndItr)); 248 VLIWScheduler->schedule(); 249 250 LLVM_DEBUG({ 251 dbgs() << "Scheduling DAG of the packetize region\n"; 252 VLIWScheduler->dump(); 253 }); 254 255 // Generate MI -> SU map. 256 MIToSUnit.clear(); 257 for (SUnit &SU : VLIWScheduler->SUnits) 258 MIToSUnit[SU.getInstr()] = &SU; 259 260 bool LimitPresent = InstrLimit.getPosition(); 261 262 // The main packetizer loop. 263 for (; BeginItr != EndItr; ++BeginItr) { 264 if (LimitPresent) { 265 if (InstrCount >= InstrLimit) { 266 EndItr = BeginItr; 267 break; 268 } 269 InstrCount++; 270 } 271 MachineInstr &MI = *BeginItr; 272 initPacketizerState(); 273 274 // End the current packet if needed. 275 if (isSoloInstruction(MI)) { 276 endPacket(MBB, MI); 277 continue; 278 } 279 280 // Ignore pseudo instructions. 281 if (ignorePseudoInstruction(MI, MBB)) 282 continue; 283 284 SUnit *SUI = MIToSUnit[&MI]; 285 assert(SUI && "Missing SUnit Info!"); 286 287 // Ask DFA if machine resource is available for MI. 288 LLVM_DEBUG(dbgs() << "Checking resources for adding MI to packet " << MI); 289 290 bool ResourceAvail = ResourceTracker->canReserveResources(MI); 291 LLVM_DEBUG({ 292 if (ResourceAvail) 293 dbgs() << " Resources are available for adding MI to packet\n"; 294 else 295 dbgs() << " Resources NOT available\n"; 296 }); 297 if (ResourceAvail && shouldAddToPacket(MI)) { 298 // Dependency check for MI with instructions in CurrentPacketMIs. 299 for (auto MJ : CurrentPacketMIs) { 300 SUnit *SUJ = MIToSUnit[MJ]; 301 assert(SUJ && "Missing SUnit Info!"); 302 303 LLVM_DEBUG(dbgs() << " Checking against MJ " << *MJ); 304 // Is it legal to packetize SUI and SUJ together. 305 if (!isLegalToPacketizeTogether(SUI, SUJ)) { 306 LLVM_DEBUG(dbgs() << " Not legal to add MI, try to prune\n"); 307 // Allow packetization if dependency can be pruned. 308 if (!isLegalToPruneDependencies(SUI, SUJ)) { 309 // End the packet if dependency cannot be pruned. 310 LLVM_DEBUG(dbgs() 311 << " Could not prune dependencies for adding MI\n"); 312 endPacket(MBB, MI); 313 break; 314 } 315 LLVM_DEBUG(dbgs() << " Pruned dependence for adding MI\n"); 316 } 317 } 318 } else { 319 LLVM_DEBUG(if (ResourceAvail) dbgs() 320 << "Resources are available, but instruction should not be " 321 "added to packet\n " 322 << MI); 323 // End the packet if resource is not available, or if the instruction 324 // shoud not be added to the current packet. 325 endPacket(MBB, MI); 326 } 327 328 // Add MI to the current packet. 329 LLVM_DEBUG(dbgs() << "* Adding MI to packet " << MI << '\n'); 330 BeginItr = addToPacket(MI); 331 } // For all instructions in the packetization range. 332 333 // End any packet left behind. 334 endPacket(MBB, EndItr); 335 VLIWScheduler->exitRegion(); 336 VLIWScheduler->finishBlock(); 337 } 338 339 bool VLIWPacketizerList::alias(const MachineMemOperand &Op1, 340 const MachineMemOperand &Op2, 341 bool UseTBAA) const { 342 if (!Op1.getValue() || !Op2.getValue()) 343 return true; 344 345 int64_t MinOffset = std::min(Op1.getOffset(), Op2.getOffset()); 346 int64_t Overlapa = Op1.getSize() + Op1.getOffset() - MinOffset; 347 int64_t Overlapb = Op2.getSize() + Op2.getOffset() - MinOffset; 348 349 AliasResult AAResult = 350 AA->alias(MemoryLocation(Op1.getValue(), Overlapa, 351 UseTBAA ? Op1.getAAInfo() : AAMDNodes()), 352 MemoryLocation(Op2.getValue(), Overlapb, 353 UseTBAA ? Op2.getAAInfo() : AAMDNodes())); 354 355 return AAResult != NoAlias; 356 } 357 358 bool VLIWPacketizerList::alias(const MachineInstr &MI1, 359 const MachineInstr &MI2, 360 bool UseTBAA) const { 361 if (MI1.memoperands_empty() || MI2.memoperands_empty()) 362 return true; 363 364 for (const MachineMemOperand *Op1 : MI1.memoperands()) 365 for (const MachineMemOperand *Op2 : MI2.memoperands()) 366 if (alias(*Op1, *Op2, UseTBAA)) 367 return true; 368 return false; 369 } 370 371 // Add a DAG mutation object to the ordered list. 372 void VLIWPacketizerList::addMutation( 373 std::unique_ptr<ScheduleDAGMutation> Mutation) { 374 VLIWScheduler->addMutation(std::move(Mutation)); 375 } 376