1 //===--------------------- ResourceManager.h --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// The classes here represent processor resource units and their management 11 /// strategy. These classes are managed by the Scheduler. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_MCA_HARDWAREUNITS_RESOURCEMANAGER_H 16 #define LLVM_MCA_HARDWAREUNITS_RESOURCEMANAGER_H 17 18 #include "llvm/ADT/DenseMap.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/MC/MCSchedule.h" 21 #include "llvm/MCA/Instruction.h" 22 #include "llvm/MCA/Support.h" 23 #include "llvm/Support/Compiler.h" 24 25 namespace llvm { 26 namespace mca { 27 28 /// Used to notify the internal state of a processor resource. 29 /// 30 /// A processor resource is available if it is not reserved, and there are 31 /// available slots in the buffer. A processor resource is unavailable if it 32 /// is either reserved, or the associated buffer is full. A processor resource 33 /// with a buffer size of -1 is always available if it is not reserved. 34 /// 35 /// Values of type ResourceStateEvent are returned by method 36 /// ResourceManager::canBeDispatched() 37 /// 38 /// The naming convention for resource state events is: 39 /// * Event names start with prefix RS_ 40 /// * Prefix RS_ is followed by a string describing the actual resource state. 41 enum ResourceStateEvent { 42 RS_BUFFER_AVAILABLE, 43 RS_BUFFER_UNAVAILABLE, 44 RS_RESERVED 45 }; 46 47 /// Resource allocation strategy used by hardware scheduler resources. 48 class LLVM_ABI ResourceStrategy { 49 ResourceStrategy(const ResourceStrategy &) = delete; 50 ResourceStrategy &operator=(const ResourceStrategy &) = delete; 51 52 public: 53 ResourceStrategy() = default; 54 virtual ~ResourceStrategy(); 55 56 /// Selects a processor resource unit from a ReadyMask. 57 virtual uint64_t select(uint64_t ReadyMask) = 0; 58 59 /// Called by the ResourceManager when a processor resource group, or a 60 /// processor resource with multiple units has become unavailable. 61 /// 62 /// The default strategy uses this information to bias its selection logic. used(uint64_t ResourceMask)63 virtual void used(uint64_t ResourceMask) {} 64 }; 65 66 /// Default resource allocation strategy used by processor resource groups and 67 /// processor resources with multiple units. 68 class LLVM_ABI DefaultResourceStrategy final : public ResourceStrategy { 69 /// A Mask of resource unit identifiers. 70 /// 71 /// There is one bit set for every available resource unit. 72 /// It defaults to the value of field ResourceSizeMask in ResourceState. 73 const uint64_t ResourceUnitMask; 74 75 /// A simple round-robin selector for processor resource units. 76 /// Each bit of this mask identifies a sub resource within a group. 77 /// 78 /// As an example, lets assume that this is a default policy for a 79 /// processor resource group composed by the following three units: 80 /// ResourceA -- 0b001 81 /// ResourceB -- 0b010 82 /// ResourceC -- 0b100 83 /// 84 /// Field NextInSequenceMask is used to select the next unit from the set of 85 /// resource units. It defaults to the value of field `ResourceUnitMasks` (in 86 /// this example, it defaults to mask '0b111'). 87 /// 88 /// The round-robin selector would firstly select 'ResourceC', then 89 /// 'ResourceB', and eventually 'ResourceA'. When a resource R is used, the 90 /// corresponding bit in NextInSequenceMask is cleared. For example, if 91 /// 'ResourceC' is selected, then the new value of NextInSequenceMask becomes 92 /// 0xb011. 93 /// 94 /// When NextInSequenceMask becomes zero, it is automatically reset to the 95 /// default value (i.e. ResourceUnitMask). 96 uint64_t NextInSequenceMask; 97 98 /// This field is used to track resource units that are used (i.e. selected) 99 /// by other groups other than the one associated with this strategy object. 100 /// 101 /// In LLVM processor resource groups are allowed to partially (or fully) 102 /// overlap. That means, a same unit may be visible to multiple groups. 103 /// This field keeps track of uses that have originated from outside of 104 /// this group. The idea is to bias the selection strategy, so that resources 105 /// that haven't been used by other groups get prioritized. 106 /// 107 /// The end goal is to (try to) keep the resource distribution as much uniform 108 /// as possible. By construction, this mask only tracks one-level of resource 109 /// usage. Therefore, this strategy is expected to be less accurate when same 110 /// units are used multiple times by other groups within a single round of 111 /// select. 112 /// 113 /// Note: an LRU selector would have a better accuracy at the cost of being 114 /// slightly more expensive (mostly in terms of runtime cost). Methods 115 /// 'select' and 'used', are always in the hot execution path of llvm-mca. 116 /// Therefore, a slow implementation of 'select' would have a negative impact 117 /// on the overall performance of the tool. 118 uint64_t RemovedFromNextInSequence; 119 120 public: DefaultResourceStrategy(uint64_t UnitMask)121 DefaultResourceStrategy(uint64_t UnitMask) 122 : ResourceUnitMask(UnitMask), NextInSequenceMask(UnitMask), 123 RemovedFromNextInSequence(0) {} 124 virtual ~DefaultResourceStrategy() = default; 125 126 uint64_t select(uint64_t ReadyMask) override; 127 void used(uint64_t Mask) override; 128 }; 129 130 /// A processor resource descriptor. 131 /// 132 /// There is an instance of this class for every processor resource defined by 133 /// the machine scheduling model. 134 /// Objects of class ResourceState dynamically track the usage of processor 135 /// resource units. 136 class ResourceState { 137 /// An index to the MCProcResourceDesc entry in the processor model. 138 const unsigned ProcResourceDescIndex; 139 /// A resource mask. This is generated by the tool with the help of 140 /// function `mca::computeProcResourceMasks' (see Support.h). 141 /// 142 /// Field ResourceMask only has one bit set if this resource state describes a 143 /// processor resource unit (i.e. this is not a group). That means, we can 144 /// quickly check if a resource is a group by simply counting the number of 145 /// bits that are set in the mask. 146 /// 147 /// The most significant bit of a mask (MSB) uniquely identifies a resource. 148 /// Remaining bits are used to describe the composition of a group (Group). 149 /// 150 /// Example (little endian): 151 /// Resource | Mask | MSB | Group 152 /// ---------+------------+------------+------------ 153 /// A | 0b000001 | 0b000001 | 0b000000 154 /// | | | 155 /// B | 0b000010 | 0b000010 | 0b000000 156 /// | | | 157 /// C | 0b010000 | 0b010000 | 0b000000 158 /// | | | 159 /// D | 0b110010 | 0b100000 | 0b010010 160 /// 161 /// In this example, resources A, B and C are processor resource units. 162 /// Only resource D is a group resource, and it contains resources B and C. 163 /// That is because MSB(B) and MSB(C) are both contained within Group(D). 164 const uint64_t ResourceMask; 165 166 /// A ProcResource can have multiple units. 167 /// 168 /// For processor resource groups this field is a mask of contained resource 169 /// units. It is obtained from ResourceMask by clearing the highest set bit. 170 /// The number of resource units in a group can be simply computed as the 171 /// population count of this field. 172 /// 173 /// For normal (i.e. non-group) resources, the number of bits set in this mask 174 /// is equivalent to the number of units declared by the processor model (see 175 /// field 'NumUnits' in 'ProcResourceUnits'). 176 uint64_t ResourceSizeMask; 177 178 /// A mask of ready units. 179 uint64_t ReadyMask; 180 181 /// Buffered resources will have this field set to a positive number different 182 /// than zero. A buffered resource behaves like a reservation station 183 /// implementing its own buffer for out-of-order execution. 184 /// 185 /// A BufferSize of 1 is used by scheduler resources that force in-order 186 /// execution. 187 /// 188 /// A BufferSize of 0 is used to model in-order issue/dispatch resources. 189 /// Since in-order issue/dispatch resources don't implement buffers, dispatch 190 /// events coincide with issue events. 191 /// Also, no other instruction ca be dispatched/issue while this resource is 192 /// in use. Only when all the "resource cycles" are consumed (after the issue 193 /// event), a new instruction ca be dispatched. 194 const int BufferSize; 195 196 /// Available slots in the buffer (zero, if this is not a buffered resource). 197 unsigned AvailableSlots; 198 199 /// This field is set if this resource is currently reserved. 200 /// 201 /// Resources can be reserved for a number of cycles. 202 /// Instructions can still be dispatched to reserved resources. However, 203 /// istructions dispatched to a reserved resource cannot be issued to the 204 /// underlying units (i.e. pipelines) until the resource is released. 205 bool Unavailable; 206 207 const bool IsAGroup; 208 209 /// Checks for the availability of unit 'SubResMask' in the group. isSubResourceReady(uint64_t SubResMask)210 bool isSubResourceReady(uint64_t SubResMask) const { 211 return ReadyMask & SubResMask; 212 } 213 214 public: 215 LLVM_ABI ResourceState(const MCProcResourceDesc &Desc, unsigned Index, 216 uint64_t Mask); 217 getProcResourceID()218 unsigned getProcResourceID() const { return ProcResourceDescIndex; } getResourceMask()219 uint64_t getResourceMask() const { return ResourceMask; } getReadyMask()220 uint64_t getReadyMask() const { return ReadyMask; } getBufferSize()221 int getBufferSize() const { return BufferSize; } 222 isBuffered()223 bool isBuffered() const { return BufferSize > 0; } isInOrder()224 bool isInOrder() const { return BufferSize == 1; } 225 226 /// Returns true if this is an in-order dispatch/issue resource. isADispatchHazard()227 bool isADispatchHazard() const { return BufferSize == 0; } isReserved()228 bool isReserved() const { return Unavailable; } 229 setReserved()230 void setReserved() { Unavailable = true; } clearReserved()231 void clearReserved() { Unavailable = false; } 232 233 /// Returs true if this resource is not reserved, and if there are at least 234 /// `NumUnits` available units. 235 LLVM_ABI bool isReady(unsigned NumUnits = 1) const; 236 getNumReadyUnits()237 uint64_t getNumReadyUnits() const { return llvm::popcount(ReadyMask); } 238 isAResourceGroup()239 bool isAResourceGroup() const { return IsAGroup; } 240 containsResource(uint64_t ID)241 bool containsResource(uint64_t ID) const { return ResourceMask & ID; } 242 markSubResourceAsUsed(uint64_t ID)243 void markSubResourceAsUsed(uint64_t ID) { 244 assert(isSubResourceReady(ID)); 245 ReadyMask ^= ID; 246 } 247 releaseSubResource(uint64_t ID)248 void releaseSubResource(uint64_t ID) { 249 assert(!isSubResourceReady(ID)); 250 ReadyMask ^= ID; 251 } 252 getNumUnits()253 unsigned getNumUnits() const { 254 return isAResourceGroup() ? 1U : llvm::popcount(ResourceSizeMask); 255 } 256 257 /// Checks if there is an available slot in the resource buffer. 258 /// 259 /// Returns RS_BUFFER_AVAILABLE if this is not a buffered resource, or if 260 /// there is a slot available. 261 /// 262 /// Returns RS_RESERVED if this buffered resource is a dispatch hazard, and it 263 /// is reserved. 264 /// 265 /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots. 266 LLVM_ABI ResourceStateEvent isBufferAvailable() const; 267 268 /// Reserve a buffer slot. 269 /// 270 /// Returns true if the buffer is not full. 271 /// It always returns true if BufferSize is set to zero. reserveBuffer()272 bool reserveBuffer() { 273 if (BufferSize <= 0) 274 return true; 275 276 --AvailableSlots; 277 assert(AvailableSlots <= static_cast<unsigned>(BufferSize)); 278 return AvailableSlots; 279 } 280 281 /// Releases a slot in the buffer. releaseBuffer()282 void releaseBuffer() { 283 // Ignore dispatch hazards or invalid buffer sizes. 284 if (BufferSize <= 0) 285 return; 286 287 ++AvailableSlots; 288 assert(AvailableSlots <= static_cast<unsigned>(BufferSize)); 289 } 290 291 #ifndef NDEBUG 292 void dump() const; 293 #endif 294 }; 295 296 /// A resource unit identifier. 297 /// 298 /// This is used to identify a specific processor resource unit using a pair 299 /// of indices where the 'first' index is a processor resource mask, and the 300 /// 'second' index is an index for a "sub-resource" (i.e. unit). 301 typedef std::pair<uint64_t, uint64_t> ResourceRef; 302 303 // First: a MCProcResourceDesc index identifying a buffered resource. 304 // Second: max number of buffer entries used in this resource. 305 typedef std::pair<unsigned, unsigned> BufferUsageEntry; 306 307 /// A resource manager for processor resource units and groups. 308 /// 309 /// This class owns all the ResourceState objects, and it is responsible for 310 /// acting on requests from a Scheduler by updating the internal state of 311 /// ResourceState objects. 312 /// This class doesn't know about instruction itineraries and functional units. 313 /// In future, it can be extended to support itineraries too through the same 314 /// public interface. 315 class ResourceManager { 316 // Set of resources available on the subtarget. 317 // 318 // There is an instance of ResourceState for every resource declared by the 319 // target scheduling model. 320 // 321 // Elements of this vector are ordered by resource kind. In particular, 322 // resource units take precedence over resource groups. 323 // 324 // The index of a processor resource in this vector depends on the value of 325 // its mask (see the description of field ResourceState::ResourceMask). In 326 // particular, it is computed as the position of the most significant bit set 327 // (MSB) in the mask plus one (since we want to ignore the invalid resource 328 // descriptor at index zero). 329 // 330 // Example (little endian): 331 // 332 // Resource | Mask | MSB | Index 333 // ---------+---------+---------+------- 334 // A | 0b00001 | 0b00001 | 1 335 // | | | 336 // B | 0b00100 | 0b00100 | 3 337 // | | | 338 // C | 0b10010 | 0b10000 | 5 339 // 340 // 341 // The same index is also used to address elements within vector `Strategies` 342 // and vector `Resource2Groups`. 343 std::vector<std::unique_ptr<ResourceState>> Resources; 344 std::vector<std::unique_ptr<ResourceStrategy>> Strategies; 345 346 // Used to quickly identify groups that own a particular resource unit. 347 std::vector<uint64_t> Resource2Groups; 348 349 // A table that maps processor resource IDs to processor resource masks. 350 SmallVector<uint64_t, 8> ProcResID2Mask; 351 352 // A table that maps resource indices to actual processor resource IDs in the 353 // scheduling model. 354 SmallVector<unsigned, 8> ResIndex2ProcResID; 355 356 // Keeps track of which resources are busy, and how many cycles are left 357 // before those become usable again. 358 SmallDenseMap<ResourceRef, unsigned> BusyResources; 359 360 // Set of processor resource units available on the target. 361 uint64_t ProcResUnitMask; 362 363 // Set of processor resource units that are available during this cycle. 364 uint64_t AvailableProcResUnits; 365 366 // Set of processor resources that are currently reserved. 367 uint64_t ReservedResourceGroups; 368 369 // Set of unavailable scheduler buffer resources. This is used internally to 370 // speedup `canBeDispatched()` queries. 371 uint64_t AvailableBuffers; 372 373 // Set of dispatch hazard buffer resources that are currently unavailable. 374 uint64_t ReservedBuffers; 375 376 // Returns the actual resource unit that will be used. 377 ResourceRef selectPipe(uint64_t ResourceID); 378 379 void use(const ResourceRef &RR); 380 void release(const ResourceRef &RR); 381 382 unsigned getNumUnits(uint64_t ResourceID) const; 383 384 // Overrides the selection strategy for the processor resource with the given 385 // mask. 386 LLVM_ABI void setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S, 387 uint64_t ResourceMask); 388 389 public: 390 LLVM_ABI ResourceManager(const MCSchedModel &SM); 391 virtual ~ResourceManager() = default; 392 393 // Overrides the selection strategy for the resource at index ResourceID in 394 // the MCProcResourceDesc table. setCustomStrategy(std::unique_ptr<ResourceStrategy> S,unsigned ResourceID)395 void setCustomStrategy(std::unique_ptr<ResourceStrategy> S, 396 unsigned ResourceID) { 397 assert(ResourceID < ProcResID2Mask.size() && 398 "Invalid resource index in input!"); 399 return setCustomStrategyImpl(std::move(S), ProcResID2Mask[ResourceID]); 400 } 401 402 // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if 403 // there are enough available slots in the buffers. 404 LLVM_ABI ResourceStateEvent canBeDispatched(uint64_t ConsumedBuffers) const; 405 406 // Return the processor resource identifier associated to this Mask. 407 LLVM_ABI unsigned resolveResourceMask(uint64_t Mask) const; 408 409 // Acquires a slot from every buffered resource in mask `ConsumedBuffers`. 410 // Units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved. 411 LLVM_ABI void reserveBuffers(uint64_t ConsumedBuffers); 412 413 // Releases a slot from every buffered resource in mask `ConsumedBuffers`. 414 // ConsumedBuffers is a bitmask of previously acquired buffers (using method 415 // `reserveBuffers`). Units that are dispatch hazards (i.e. BufferSize=0) are 416 // not automatically unreserved by this method. 417 LLVM_ABI void releaseBuffers(uint64_t ConsumedBuffers); 418 419 // Reserve a processor resource. A reserved resource is not available for 420 // instruction issue until it is released. 421 LLVM_ABI void reserveResource(uint64_t ResourceID); 422 423 // Release a previously reserved processor resource. 424 LLVM_ABI void releaseResource(uint64_t ResourceID); 425 426 // Returns a zero mask if resources requested by Desc are all available during 427 // this cycle. It returns a non-zero mask value only if there are unavailable 428 // processor resources; each bit set in the mask represents a busy processor 429 // resource unit or a reserved processor resource group. 430 LLVM_ABI uint64_t checkAvailability(const InstrDesc &Desc) const; 431 getProcResUnitMask()432 uint64_t getProcResUnitMask() const { return ProcResUnitMask; } getAvailableProcResUnits()433 uint64_t getAvailableProcResUnits() const { return AvailableProcResUnits; } 434 435 using ResourceWithCycles = std::pair<ResourceRef, ReleaseAtCycles>; 436 issueInstruction(const InstrDesc & Desc,SmallVectorImpl<ResourceWithCycles> & Pipes)437 void issueInstruction(const InstrDesc &Desc, 438 SmallVectorImpl<ResourceWithCycles> &Pipes) { 439 if (Desc.HasPartiallyOverlappingGroups) 440 return issueInstructionImpl(Desc, Pipes); 441 442 return fastIssueInstruction(Desc, Pipes); 443 } 444 445 // Selects pipeline resources consumed by an instruction. 446 // This method works under the assumption that used group resources don't 447 // partially overlap. The logic is guaranteed to find a valid resource unit 448 // schedule, no matter in which order individual uses are processed. For that 449 // reason, the vector of resource uses is simply (and quickly) processed in 450 // sequence. The resulting schedule is eventually stored into vector `Pipes`. 451 LLVM_ABI void 452 fastIssueInstruction(const InstrDesc &Desc, 453 SmallVectorImpl<ResourceWithCycles> &Pipes); 454 455 // Selects pipeline resources consumed by an instruction. 456 // This method works under the assumption that used resource groups may 457 // partially overlap. This complicates the selection process, because the 458 // order in which uses are processed matters. The logic internally prioritizes 459 // groups which are more constrained than others. 460 LLVM_ABI void 461 issueInstructionImpl(const InstrDesc &Desc, 462 SmallVectorImpl<ResourceWithCycles> &Pipes); 463 464 LLVM_ABI void cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed); 465 466 #ifndef NDEBUG dump()467 void dump() const { 468 for (const std::unique_ptr<ResourceState> &Resource : Resources) 469 Resource->dump(); 470 } 471 #endif 472 }; 473 } // namespace mca 474 } // namespace llvm 475 476 #endif // LLVM_MCA_HARDWAREUNITS_RESOURCEMANAGER_H 477