xref: /freebsd/contrib/llvm-project/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===--------------------- ResourceManager.h --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// The classes here represent processor resource units and their management
11 /// strategy.  These classes are managed by the Scheduler.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_MCA_HARDWAREUNITS_RESOURCEMANAGER_H
16 #define LLVM_MCA_HARDWAREUNITS_RESOURCEMANAGER_H
17 
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/MC/MCSchedule.h"
21 #include "llvm/MCA/Instruction.h"
22 #include "llvm/MCA/Support.h"
23 #include "llvm/Support/Compiler.h"
24 
25 namespace llvm {
26 namespace mca {
27 
28 /// Used to notify the internal state of a processor resource.
29 ///
30 /// A processor resource is available if it is not reserved, and there are
31 /// available slots in the buffer.  A processor resource is unavailable if it
32 /// is either reserved, or the associated buffer is full. A processor resource
33 /// with a buffer size of -1 is always available if it is not reserved.
34 ///
35 /// Values of type ResourceStateEvent are returned by method
36 /// ResourceManager::canBeDispatched()
37 ///
38 /// The naming convention for resource state events is:
39 ///  * Event names start with prefix RS_
40 ///  * Prefix RS_ is followed by a string describing the actual resource state.
41 enum ResourceStateEvent {
42   RS_BUFFER_AVAILABLE,
43   RS_BUFFER_UNAVAILABLE,
44   RS_RESERVED
45 };
46 
47 /// Resource allocation strategy used by hardware scheduler resources.
48 class LLVM_ABI ResourceStrategy {
49   ResourceStrategy(const ResourceStrategy &) = delete;
50   ResourceStrategy &operator=(const ResourceStrategy &) = delete;
51 
52 public:
53   ResourceStrategy() = default;
54   virtual ~ResourceStrategy();
55 
56   /// Selects a processor resource unit from a ReadyMask.
57   virtual uint64_t select(uint64_t ReadyMask) = 0;
58 
59   /// Called by the ResourceManager when a processor resource group, or a
60   /// processor resource with multiple units has become unavailable.
61   ///
62   /// The default strategy uses this information to bias its selection logic.
used(uint64_t ResourceMask)63   virtual void used(uint64_t ResourceMask) {}
64 };
65 
66 /// Default resource allocation strategy used by processor resource groups and
67 /// processor resources with multiple units.
68 class LLVM_ABI DefaultResourceStrategy final : public ResourceStrategy {
69   /// A Mask of resource unit identifiers.
70   ///
71   /// There is one bit set for every available resource unit.
72   /// It defaults to the value of field ResourceSizeMask in ResourceState.
73   const uint64_t ResourceUnitMask;
74 
75   /// A simple round-robin selector for processor resource units.
76   /// Each bit of this mask identifies a sub resource within a group.
77   ///
78   /// As an example, lets assume that this is a default policy for a
79   /// processor resource group composed by the following three units:
80   ///   ResourceA -- 0b001
81   ///   ResourceB -- 0b010
82   ///   ResourceC -- 0b100
83   ///
84   /// Field NextInSequenceMask is used to select the next unit from the set of
85   /// resource units. It defaults to the value of field `ResourceUnitMasks` (in
86   /// this example, it defaults to mask '0b111').
87   ///
88   /// The round-robin selector would firstly select 'ResourceC', then
89   /// 'ResourceB', and eventually 'ResourceA'.  When a resource R is used, the
90   /// corresponding bit in NextInSequenceMask is cleared.  For example, if
91   /// 'ResourceC' is selected, then the new value of NextInSequenceMask becomes
92   /// 0xb011.
93   ///
94   /// When NextInSequenceMask becomes zero, it is automatically reset to the
95   /// default value (i.e. ResourceUnitMask).
96   uint64_t NextInSequenceMask;
97 
98   /// This field is used to track resource units that are used (i.e. selected)
99   /// by other groups other than the one associated with this strategy object.
100   ///
101   /// In LLVM processor resource groups are allowed to partially (or fully)
102   /// overlap. That means, a same unit may be visible to multiple groups.
103   /// This field keeps track of uses that have originated from outside of
104   /// this group. The idea is to bias the selection strategy, so that resources
105   /// that haven't been used by other groups get prioritized.
106   ///
107   /// The end goal is to (try to) keep the resource distribution as much uniform
108   /// as possible. By construction, this mask only tracks one-level of resource
109   /// usage. Therefore, this strategy is expected to be less accurate when same
110   /// units are used multiple times by other groups within a single round of
111   /// select.
112   ///
113   /// Note: an LRU selector would have a better accuracy at the cost of being
114   /// slightly more expensive (mostly in terms of runtime cost). Methods
115   /// 'select' and 'used', are always in the hot execution path of llvm-mca.
116   /// Therefore, a slow implementation of 'select' would have a negative impact
117   /// on the overall performance of the tool.
118   uint64_t RemovedFromNextInSequence;
119 
120 public:
DefaultResourceStrategy(uint64_t UnitMask)121   DefaultResourceStrategy(uint64_t UnitMask)
122       : ResourceUnitMask(UnitMask), NextInSequenceMask(UnitMask),
123         RemovedFromNextInSequence(0) {}
124   virtual ~DefaultResourceStrategy() = default;
125 
126   uint64_t select(uint64_t ReadyMask) override;
127   void used(uint64_t Mask) override;
128 };
129 
130 /// A processor resource descriptor.
131 ///
132 /// There is an instance of this class for every processor resource defined by
133 /// the machine scheduling model.
134 /// Objects of class ResourceState dynamically track the usage of processor
135 /// resource units.
136 class ResourceState {
137   /// An index to the MCProcResourceDesc entry in the processor model.
138   const unsigned ProcResourceDescIndex;
139   /// A resource mask. This is generated by the tool with the help of
140   /// function `mca::computeProcResourceMasks' (see Support.h).
141   ///
142   /// Field ResourceMask only has one bit set if this resource state describes a
143   /// processor resource unit (i.e. this is not a group). That means, we can
144   /// quickly check if a resource is a group by simply counting the number of
145   /// bits that are set in the mask.
146   ///
147   /// The most significant bit of a mask (MSB) uniquely identifies a resource.
148   /// Remaining bits are used to describe the composition of a group (Group).
149   ///
150   /// Example (little endian):
151   ///            Resource |  Mask      |  MSB       |  Group
152   ///            ---------+------------+------------+------------
153   ///            A        |  0b000001  |  0b000001  |  0b000000
154   ///                     |            |            |
155   ///            B        |  0b000010  |  0b000010  |  0b000000
156   ///                     |            |            |
157   ///            C        |  0b010000  |  0b010000  |  0b000000
158   ///                     |            |            |
159   ///            D        |  0b110010  |  0b100000  |  0b010010
160   ///
161   /// In this example, resources A, B and C are processor resource units.
162   /// Only resource D is a group resource, and it contains resources B and C.
163   /// That is because MSB(B) and MSB(C) are both contained within Group(D).
164   const uint64_t ResourceMask;
165 
166   /// A ProcResource can have multiple units.
167   ///
168   /// For processor resource groups this field is a mask of contained resource
169   /// units. It is obtained from ResourceMask by clearing the highest set bit.
170   /// The number of resource units in a group can be simply computed as the
171   /// population count of this field.
172   ///
173   /// For normal (i.e. non-group) resources, the number of bits set in this mask
174   /// is equivalent to the number of units declared by the processor model (see
175   /// field 'NumUnits' in 'ProcResourceUnits').
176   uint64_t ResourceSizeMask;
177 
178   /// A mask of ready units.
179   uint64_t ReadyMask;
180 
181   /// Buffered resources will have this field set to a positive number different
182   /// than zero. A buffered resource behaves like a reservation station
183   /// implementing its own buffer for out-of-order execution.
184   ///
185   /// A BufferSize of 1 is used by scheduler resources that force in-order
186   /// execution.
187   ///
188   /// A BufferSize of 0 is used to model in-order issue/dispatch resources.
189   /// Since in-order issue/dispatch resources don't implement buffers, dispatch
190   /// events coincide with issue events.
191   /// Also, no other instruction ca be dispatched/issue while this resource is
192   /// in use. Only when all the "resource cycles" are consumed (after the issue
193   /// event), a new instruction ca be dispatched.
194   const int BufferSize;
195 
196   /// Available slots in the buffer (zero, if this is not a buffered resource).
197   unsigned AvailableSlots;
198 
199   /// This field is set if this resource is currently reserved.
200   ///
201   /// Resources can be reserved for a number of cycles.
202   /// Instructions can still be dispatched to reserved resources. However,
203   /// istructions dispatched to a reserved resource cannot be issued to the
204   /// underlying units (i.e. pipelines) until the resource is released.
205   bool Unavailable;
206 
207   const bool IsAGroup;
208 
209   /// Checks for the availability of unit 'SubResMask' in the group.
isSubResourceReady(uint64_t SubResMask)210   bool isSubResourceReady(uint64_t SubResMask) const {
211     return ReadyMask & SubResMask;
212   }
213 
214 public:
215   LLVM_ABI ResourceState(const MCProcResourceDesc &Desc, unsigned Index,
216                          uint64_t Mask);
217 
getProcResourceID()218   unsigned getProcResourceID() const { return ProcResourceDescIndex; }
getResourceMask()219   uint64_t getResourceMask() const { return ResourceMask; }
getReadyMask()220   uint64_t getReadyMask() const { return ReadyMask; }
getBufferSize()221   int getBufferSize() const { return BufferSize; }
222 
isBuffered()223   bool isBuffered() const { return BufferSize > 0; }
isInOrder()224   bool isInOrder() const { return BufferSize == 1; }
225 
226   /// Returns true if this is an in-order dispatch/issue resource.
isADispatchHazard()227   bool isADispatchHazard() const { return BufferSize == 0; }
isReserved()228   bool isReserved() const { return Unavailable; }
229 
setReserved()230   void setReserved() { Unavailable = true; }
clearReserved()231   void clearReserved() { Unavailable = false; }
232 
233   /// Returs true if this resource is not reserved, and if there are at least
234   /// `NumUnits` available units.
235   LLVM_ABI bool isReady(unsigned NumUnits = 1) const;
236 
getNumReadyUnits()237   uint64_t getNumReadyUnits() const { return llvm::popcount(ReadyMask); }
238 
isAResourceGroup()239   bool isAResourceGroup() const { return IsAGroup; }
240 
containsResource(uint64_t ID)241   bool containsResource(uint64_t ID) const { return ResourceMask & ID; }
242 
markSubResourceAsUsed(uint64_t ID)243   void markSubResourceAsUsed(uint64_t ID) {
244     assert(isSubResourceReady(ID));
245     ReadyMask ^= ID;
246   }
247 
releaseSubResource(uint64_t ID)248   void releaseSubResource(uint64_t ID) {
249     assert(!isSubResourceReady(ID));
250     ReadyMask ^= ID;
251   }
252 
getNumUnits()253   unsigned getNumUnits() const {
254     return isAResourceGroup() ? 1U : llvm::popcount(ResourceSizeMask);
255   }
256 
257   /// Checks if there is an available slot in the resource buffer.
258   ///
259   /// Returns RS_BUFFER_AVAILABLE if this is not a buffered resource, or if
260   /// there is a slot available.
261   ///
262   /// Returns RS_RESERVED if this buffered resource is a dispatch hazard, and it
263   /// is reserved.
264   ///
265   /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots.
266   LLVM_ABI ResourceStateEvent isBufferAvailable() const;
267 
268   /// Reserve a buffer slot.
269   ///
270   /// Returns true if the buffer is not full.
271   /// It always returns true if BufferSize is set to zero.
reserveBuffer()272   bool reserveBuffer() {
273     if (BufferSize <= 0)
274       return true;
275 
276     --AvailableSlots;
277     assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
278     return AvailableSlots;
279   }
280 
281   /// Releases a slot in the buffer.
releaseBuffer()282   void releaseBuffer() {
283     // Ignore dispatch hazards or invalid buffer sizes.
284     if (BufferSize <= 0)
285       return;
286 
287     ++AvailableSlots;
288     assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
289   }
290 
291 #ifndef NDEBUG
292   void dump() const;
293 #endif
294 };
295 
296 /// A resource unit identifier.
297 ///
298 /// This is used to identify a specific processor resource unit using a pair
299 /// of indices where the 'first' index is a processor resource mask, and the
300 /// 'second' index is an index for a "sub-resource" (i.e. unit).
301 typedef std::pair<uint64_t, uint64_t> ResourceRef;
302 
303 // First: a MCProcResourceDesc index identifying a buffered resource.
304 // Second: max number of buffer entries used in this resource.
305 typedef std::pair<unsigned, unsigned> BufferUsageEntry;
306 
307 /// A resource manager for processor resource units and groups.
308 ///
309 /// This class owns all the ResourceState objects, and it is responsible for
310 /// acting on requests from a Scheduler by updating the internal state of
311 /// ResourceState objects.
312 /// This class doesn't know about instruction itineraries and functional units.
313 /// In future, it can be extended to support itineraries too through the same
314 /// public interface.
315 class ResourceManager {
316   // Set of resources available on the subtarget.
317   //
318   // There is an instance of ResourceState for every resource declared by the
319   // target scheduling model.
320   //
321   // Elements of this vector are ordered by resource kind. In particular,
322   // resource units take precedence over resource groups.
323   //
324   // The index of a processor resource in this vector depends on the value of
325   // its mask (see the description of field ResourceState::ResourceMask).  In
326   // particular, it is computed as the position of the most significant bit set
327   // (MSB) in the mask plus one (since we want to ignore the invalid resource
328   // descriptor at index zero).
329   //
330   // Example (little endian):
331   //
332   //             Resource | Mask    |  MSB    | Index
333   //             ---------+---------+---------+-------
334   //                 A    | 0b00001 | 0b00001 |   1
335   //                      |         |         |
336   //                 B    | 0b00100 | 0b00100 |   3
337   //                      |         |         |
338   //                 C    | 0b10010 | 0b10000 |   5
339   //
340   //
341   // The same index is also used to address elements within vector `Strategies`
342   // and vector `Resource2Groups`.
343   std::vector<std::unique_ptr<ResourceState>> Resources;
344   std::vector<std::unique_ptr<ResourceStrategy>> Strategies;
345 
346   // Used to quickly identify groups that own a particular resource unit.
347   std::vector<uint64_t> Resource2Groups;
348 
349   // A table that maps processor resource IDs to processor resource masks.
350   SmallVector<uint64_t, 8> ProcResID2Mask;
351 
352   // A table that maps resource indices to actual processor resource IDs in the
353   // scheduling model.
354   SmallVector<unsigned, 8> ResIndex2ProcResID;
355 
356   // Keeps track of which resources are busy, and how many cycles are left
357   // before those become usable again.
358   SmallDenseMap<ResourceRef, unsigned> BusyResources;
359 
360   // Set of processor resource units available on the target.
361   uint64_t ProcResUnitMask;
362 
363   // Set of processor resource units that are available during this cycle.
364   uint64_t AvailableProcResUnits;
365 
366   // Set of processor resources that are currently reserved.
367   uint64_t ReservedResourceGroups;
368 
369   // Set of unavailable scheduler buffer resources. This is used internally to
370   // speedup `canBeDispatched()` queries.
371   uint64_t AvailableBuffers;
372 
373   // Set of dispatch hazard buffer resources that are currently unavailable.
374   uint64_t ReservedBuffers;
375 
376   // Returns the actual resource unit that will be used.
377   ResourceRef selectPipe(uint64_t ResourceID);
378 
379   void use(const ResourceRef &RR);
380   void release(const ResourceRef &RR);
381 
382   unsigned getNumUnits(uint64_t ResourceID) const;
383 
384   // Overrides the selection strategy for the processor resource with the given
385   // mask.
386   LLVM_ABI void setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
387                                       uint64_t ResourceMask);
388 
389 public:
390   LLVM_ABI ResourceManager(const MCSchedModel &SM);
391   virtual ~ResourceManager() = default;
392 
393   // Overrides the selection strategy for the resource at index ResourceID in
394   // the MCProcResourceDesc table.
setCustomStrategy(std::unique_ptr<ResourceStrategy> S,unsigned ResourceID)395   void setCustomStrategy(std::unique_ptr<ResourceStrategy> S,
396                          unsigned ResourceID) {
397     assert(ResourceID < ProcResID2Mask.size() &&
398            "Invalid resource index in input!");
399     return setCustomStrategyImpl(std::move(S), ProcResID2Mask[ResourceID]);
400   }
401 
402   // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if
403   // there are enough available slots in the buffers.
404   LLVM_ABI ResourceStateEvent canBeDispatched(uint64_t ConsumedBuffers) const;
405 
406   // Return the processor resource identifier associated to this Mask.
407   LLVM_ABI unsigned resolveResourceMask(uint64_t Mask) const;
408 
409   // Acquires a slot from every buffered resource in mask `ConsumedBuffers`.
410   // Units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved.
411   LLVM_ABI void reserveBuffers(uint64_t ConsumedBuffers);
412 
413   // Releases a slot from every buffered resource in mask `ConsumedBuffers`.
414   // ConsumedBuffers is a bitmask of previously acquired buffers (using method
415   // `reserveBuffers`). Units that are dispatch hazards (i.e. BufferSize=0) are
416   // not automatically unreserved by this method.
417   LLVM_ABI void releaseBuffers(uint64_t ConsumedBuffers);
418 
419   // Reserve a processor resource. A reserved resource is not available for
420   // instruction issue until it is released.
421   LLVM_ABI void reserveResource(uint64_t ResourceID);
422 
423   // Release a previously reserved processor resource.
424   LLVM_ABI void releaseResource(uint64_t ResourceID);
425 
426   // Returns a zero mask if resources requested by Desc are all available during
427   // this cycle. It returns a non-zero mask value only if there are unavailable
428   // processor resources; each bit set in the mask represents a busy processor
429   // resource unit or a reserved processor resource group.
430   LLVM_ABI uint64_t checkAvailability(const InstrDesc &Desc) const;
431 
getProcResUnitMask()432   uint64_t getProcResUnitMask() const { return ProcResUnitMask; }
getAvailableProcResUnits()433   uint64_t getAvailableProcResUnits() const { return AvailableProcResUnits; }
434 
435   using ResourceWithCycles = std::pair<ResourceRef, ReleaseAtCycles>;
436 
issueInstruction(const InstrDesc & Desc,SmallVectorImpl<ResourceWithCycles> & Pipes)437   void issueInstruction(const InstrDesc &Desc,
438                         SmallVectorImpl<ResourceWithCycles> &Pipes) {
439     if (Desc.HasPartiallyOverlappingGroups)
440       return issueInstructionImpl(Desc, Pipes);
441 
442     return fastIssueInstruction(Desc, Pipes);
443   }
444 
445   // Selects pipeline resources consumed by an instruction.
446   // This method works under the assumption that used group resources don't
447   // partially overlap. The logic is guaranteed to find a valid resource unit
448   // schedule, no matter in which order individual uses are processed. For that
449   // reason, the vector of resource uses is simply (and quickly) processed in
450   // sequence. The resulting schedule is eventually stored into vector `Pipes`.
451   LLVM_ABI void
452   fastIssueInstruction(const InstrDesc &Desc,
453                        SmallVectorImpl<ResourceWithCycles> &Pipes);
454 
455   // Selects pipeline resources consumed by an instruction.
456   // This method works under the assumption that used resource groups may
457   // partially overlap. This complicates the selection process, because the
458   // order in which uses are processed matters. The logic internally prioritizes
459   // groups which are more constrained than others.
460   LLVM_ABI void
461   issueInstructionImpl(const InstrDesc &Desc,
462                        SmallVectorImpl<ResourceWithCycles> &Pipes);
463 
464   LLVM_ABI void cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed);
465 
466 #ifndef NDEBUG
dump()467   void dump() const {
468     for (const std::unique_ptr<ResourceState> &Resource : Resources)
469       Resource->dump();
470   }
471 #endif
472 };
473 } // namespace mca
474 } // namespace llvm
475 
476 #endif // LLVM_MCA_HARDWAREUNITS_RESOURCEMANAGER_H
477