10b57cec5SDimitry Andric //===- SIMemoryLegalizer.cpp ----------------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// Memory legalizer - implements memory model. More information can be
110b57cec5SDimitry Andric /// found here:
120b57cec5SDimitry Andric /// http://llvm.org/docs/AMDGPUUsage.html#memory-model
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
150b57cec5SDimitry Andric
160b57cec5SDimitry Andric #include "AMDGPU.h"
170b57cec5SDimitry Andric #include "AMDGPUMachineModuleInfo.h"
18e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
190b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
200b57cec5SDimitry Andric #include "llvm/ADT/BitmaskEnum.h"
21*0fca6ea1SDimitry Andric #include "llvm/ADT/StringExtras.h"
220b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
2381ad6265SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
240b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
25*0fca6ea1SDimitry Andric #include "llvm/IR/MemoryModelRelaxationAnnotations.h"
260b57cec5SDimitry Andric #include "llvm/Support/AtomicOrdering.h"
2706c3fb27SDimitry Andric #include "llvm/TargetParser/TargetParser.h"
280b57cec5SDimitry Andric
290b57cec5SDimitry Andric using namespace llvm;
300b57cec5SDimitry Andric using namespace llvm::AMDGPU;
310b57cec5SDimitry Andric
320b57cec5SDimitry Andric #define DEBUG_TYPE "si-memory-legalizer"
330b57cec5SDimitry Andric #define PASS_NAME "SI Memory Legalizer"
340b57cec5SDimitry Andric
35e8d8bef9SDimitry Andric static cl::opt<bool> AmdgcnSkipCacheInvalidations(
36e8d8bef9SDimitry Andric "amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden,
37e8d8bef9SDimitry Andric cl::desc("Use this to skip inserting cache invalidating instructions."));
38e8d8bef9SDimitry Andric
390b57cec5SDimitry Andric namespace {
400b57cec5SDimitry Andric
410b57cec5SDimitry Andric LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
420b57cec5SDimitry Andric
430b57cec5SDimitry Andric /// Memory operation flags. Can be ORed together.
440b57cec5SDimitry Andric enum class SIMemOp {
450b57cec5SDimitry Andric NONE = 0u,
460b57cec5SDimitry Andric LOAD = 1u << 0,
470b57cec5SDimitry Andric STORE = 1u << 1,
480b57cec5SDimitry Andric LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ STORE)
490b57cec5SDimitry Andric };
500b57cec5SDimitry Andric
510b57cec5SDimitry Andric /// Position to insert a new instruction relative to an existing
520b57cec5SDimitry Andric /// instruction.
530b57cec5SDimitry Andric enum class Position {
540b57cec5SDimitry Andric BEFORE,
550b57cec5SDimitry Andric AFTER
560b57cec5SDimitry Andric };
570b57cec5SDimitry Andric
580b57cec5SDimitry Andric /// The atomic synchronization scopes supported by the AMDGPU target.
590b57cec5SDimitry Andric enum class SIAtomicScope {
600b57cec5SDimitry Andric NONE,
610b57cec5SDimitry Andric SINGLETHREAD,
620b57cec5SDimitry Andric WAVEFRONT,
630b57cec5SDimitry Andric WORKGROUP,
640b57cec5SDimitry Andric AGENT,
650b57cec5SDimitry Andric SYSTEM
660b57cec5SDimitry Andric };
670b57cec5SDimitry Andric
680b57cec5SDimitry Andric /// The distinct address spaces supported by the AMDGPU target for
6981ad6265SDimitry Andric /// atomic memory operation. Can be ORed together.
700b57cec5SDimitry Andric enum class SIAtomicAddrSpace {
710b57cec5SDimitry Andric NONE = 0u,
720b57cec5SDimitry Andric GLOBAL = 1u << 0,
730b57cec5SDimitry Andric LDS = 1u << 1,
740b57cec5SDimitry Andric SCRATCH = 1u << 2,
750b57cec5SDimitry Andric GDS = 1u << 3,
760b57cec5SDimitry Andric OTHER = 1u << 4,
770b57cec5SDimitry Andric
780b57cec5SDimitry Andric /// The address spaces that can be accessed by a FLAT instruction.
790b57cec5SDimitry Andric FLAT = GLOBAL | LDS | SCRATCH,
800b57cec5SDimitry Andric
810b57cec5SDimitry Andric /// The address spaces that support atomic instructions.
820b57cec5SDimitry Andric ATOMIC = GLOBAL | LDS | SCRATCH | GDS,
830b57cec5SDimitry Andric
840b57cec5SDimitry Andric /// All address spaces.
850b57cec5SDimitry Andric ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
860b57cec5SDimitry Andric
870b57cec5SDimitry Andric LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
880b57cec5SDimitry Andric };
890b57cec5SDimitry Andric
900b57cec5SDimitry Andric class SIMemOpInfo final {
910b57cec5SDimitry Andric private:
920b57cec5SDimitry Andric
930b57cec5SDimitry Andric friend class SIMemOpAccess;
940b57cec5SDimitry Andric
950b57cec5SDimitry Andric AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
960b57cec5SDimitry Andric AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
970b57cec5SDimitry Andric SIAtomicScope Scope = SIAtomicScope::SYSTEM;
980b57cec5SDimitry Andric SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
990b57cec5SDimitry Andric SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
1000b57cec5SDimitry Andric bool IsCrossAddressSpaceOrdering = false;
101e8d8bef9SDimitry Andric bool IsVolatile = false;
1020b57cec5SDimitry Andric bool IsNonTemporal = false;
103*0fca6ea1SDimitry Andric bool IsLastUse = false;
1040b57cec5SDimitry Andric
SIMemOpInfo(AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent,SIAtomicScope Scope=SIAtomicScope::SYSTEM,SIAtomicAddrSpace OrderingAddrSpace=SIAtomicAddrSpace::ATOMIC,SIAtomicAddrSpace InstrAddrSpace=SIAtomicAddrSpace::ALL,bool IsCrossAddressSpaceOrdering=true,AtomicOrdering FailureOrdering=AtomicOrdering::SequentiallyConsistent,bool IsVolatile=false,bool IsNonTemporal=false,bool IsLastUse=false)105*0fca6ea1SDimitry Andric SIMemOpInfo(
106*0fca6ea1SDimitry Andric AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
1070b57cec5SDimitry Andric SIAtomicScope Scope = SIAtomicScope::SYSTEM,
1080b57cec5SDimitry Andric SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
1090b57cec5SDimitry Andric SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
1100b57cec5SDimitry Andric bool IsCrossAddressSpaceOrdering = true,
111*0fca6ea1SDimitry Andric AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent,
112*0fca6ea1SDimitry Andric bool IsVolatile = false, bool IsNonTemporal = false,
113*0fca6ea1SDimitry Andric bool IsLastUse = false)
114*0fca6ea1SDimitry Andric : Ordering(Ordering), FailureOrdering(FailureOrdering), Scope(Scope),
115*0fca6ea1SDimitry Andric OrderingAddrSpace(OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace),
1160b57cec5SDimitry Andric IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
117*0fca6ea1SDimitry Andric IsVolatile(IsVolatile), IsNonTemporal(IsNonTemporal),
118*0fca6ea1SDimitry Andric IsLastUse(IsLastUse) {
119fe6060f1SDimitry Andric
120fe6060f1SDimitry Andric if (Ordering == AtomicOrdering::NotAtomic) {
121fe6060f1SDimitry Andric assert(Scope == SIAtomicScope::NONE &&
122fe6060f1SDimitry Andric OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
123fe6060f1SDimitry Andric !IsCrossAddressSpaceOrdering &&
124fe6060f1SDimitry Andric FailureOrdering == AtomicOrdering::NotAtomic);
125fe6060f1SDimitry Andric return;
126fe6060f1SDimitry Andric }
127fe6060f1SDimitry Andric
128fe6060f1SDimitry Andric assert(Scope != SIAtomicScope::NONE &&
129fe6060f1SDimitry Andric (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
130fe6060f1SDimitry Andric SIAtomicAddrSpace::NONE &&
131fe6060f1SDimitry Andric (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
132349cc55cSDimitry Andric SIAtomicAddrSpace::NONE);
133fe6060f1SDimitry Andric
1340b57cec5SDimitry Andric // There is also no cross address space ordering if the ordering
1350b57cec5SDimitry Andric // address space is the same as the instruction address space and
1360b57cec5SDimitry Andric // only contains a single address space.
1370b57cec5SDimitry Andric if ((OrderingAddrSpace == InstrAddrSpace) &&
1380b57cec5SDimitry Andric isPowerOf2_32(uint32_t(InstrAddrSpace)))
1390b57cec5SDimitry Andric this->IsCrossAddressSpaceOrdering = false;
140fe6060f1SDimitry Andric
141fe6060f1SDimitry Andric // Limit the scope to the maximum supported by the instruction's address
142fe6060f1SDimitry Andric // spaces.
143fe6060f1SDimitry Andric if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
144fe6060f1SDimitry Andric SIAtomicAddrSpace::NONE) {
145fe6060f1SDimitry Andric this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
146fe6060f1SDimitry Andric } else if ((InstrAddrSpace &
147fe6060f1SDimitry Andric ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
148fe6060f1SDimitry Andric SIAtomicAddrSpace::NONE) {
149fe6060f1SDimitry Andric this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
150fe6060f1SDimitry Andric } else if ((InstrAddrSpace &
151fe6060f1SDimitry Andric ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
152fe6060f1SDimitry Andric SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
153fe6060f1SDimitry Andric this->Scope = std::min(Scope, SIAtomicScope::AGENT);
154fe6060f1SDimitry Andric }
1550b57cec5SDimitry Andric }
1560b57cec5SDimitry Andric
1570b57cec5SDimitry Andric public:
1580b57cec5SDimitry Andric /// \returns Atomic synchronization scope of the machine instruction used to
1590b57cec5SDimitry Andric /// create this SIMemOpInfo.
getScope() const1600b57cec5SDimitry Andric SIAtomicScope getScope() const {
1610b57cec5SDimitry Andric return Scope;
1620b57cec5SDimitry Andric }
1630b57cec5SDimitry Andric
1640b57cec5SDimitry Andric /// \returns Ordering constraint of the machine instruction used to
1650b57cec5SDimitry Andric /// create this SIMemOpInfo.
getOrdering() const1660b57cec5SDimitry Andric AtomicOrdering getOrdering() const {
1670b57cec5SDimitry Andric return Ordering;
1680b57cec5SDimitry Andric }
1690b57cec5SDimitry Andric
1700b57cec5SDimitry Andric /// \returns Failure ordering constraint of the machine instruction used to
1710b57cec5SDimitry Andric /// create this SIMemOpInfo.
getFailureOrdering() const1720b57cec5SDimitry Andric AtomicOrdering getFailureOrdering() const {
1730b57cec5SDimitry Andric return FailureOrdering;
1740b57cec5SDimitry Andric }
1750b57cec5SDimitry Andric
1760b57cec5SDimitry Andric /// \returns The address spaces be accessed by the machine
177bdd1243dSDimitry Andric /// instruction used to create this SIMemOpInfo.
getInstrAddrSpace() const1780b57cec5SDimitry Andric SIAtomicAddrSpace getInstrAddrSpace() const {
1790b57cec5SDimitry Andric return InstrAddrSpace;
1800b57cec5SDimitry Andric }
1810b57cec5SDimitry Andric
1820b57cec5SDimitry Andric /// \returns The address spaces that must be ordered by the machine
183bdd1243dSDimitry Andric /// instruction used to create this SIMemOpInfo.
getOrderingAddrSpace() const1840b57cec5SDimitry Andric SIAtomicAddrSpace getOrderingAddrSpace() const {
1850b57cec5SDimitry Andric return OrderingAddrSpace;
1860b57cec5SDimitry Andric }
1870b57cec5SDimitry Andric
1880b57cec5SDimitry Andric /// \returns Return true iff memory ordering of operations on
1890b57cec5SDimitry Andric /// different address spaces is required.
getIsCrossAddressSpaceOrdering() const1900b57cec5SDimitry Andric bool getIsCrossAddressSpaceOrdering() const {
1910b57cec5SDimitry Andric return IsCrossAddressSpaceOrdering;
1920b57cec5SDimitry Andric }
1930b57cec5SDimitry Andric
1940b57cec5SDimitry Andric /// \returns True if memory access of the machine instruction used to
195e8d8bef9SDimitry Andric /// create this SIMemOpInfo is volatile, false otherwise.
isVolatile() const196e8d8bef9SDimitry Andric bool isVolatile() const {
197e8d8bef9SDimitry Andric return IsVolatile;
198e8d8bef9SDimitry Andric }
199e8d8bef9SDimitry Andric
200e8d8bef9SDimitry Andric /// \returns True if memory access of the machine instruction used to
201e8d8bef9SDimitry Andric /// create this SIMemOpInfo is nontemporal, false otherwise.
isNonTemporal() const2020b57cec5SDimitry Andric bool isNonTemporal() const {
2030b57cec5SDimitry Andric return IsNonTemporal;
2040b57cec5SDimitry Andric }
2050b57cec5SDimitry Andric
206*0fca6ea1SDimitry Andric /// \returns True if memory access of the machine instruction used to
207*0fca6ea1SDimitry Andric /// create this SIMemOpInfo is last use, false otherwise.
isLastUse() const208*0fca6ea1SDimitry Andric bool isLastUse() const { return IsLastUse; }
209*0fca6ea1SDimitry Andric
2100b57cec5SDimitry Andric /// \returns True if ordering constraint of the machine instruction used to
2110b57cec5SDimitry Andric /// create this SIMemOpInfo is unordered or higher, false otherwise.
isAtomic() const2120b57cec5SDimitry Andric bool isAtomic() const {
2130b57cec5SDimitry Andric return Ordering != AtomicOrdering::NotAtomic;
2140b57cec5SDimitry Andric }
2150b57cec5SDimitry Andric
2160b57cec5SDimitry Andric };
2170b57cec5SDimitry Andric
2180b57cec5SDimitry Andric class SIMemOpAccess final {
2190b57cec5SDimitry Andric private:
220*0fca6ea1SDimitry Andric const AMDGPUMachineModuleInfo *MMI = nullptr;
2210b57cec5SDimitry Andric
2220b57cec5SDimitry Andric /// Reports unsupported message \p Msg for \p MI to LLVM context.
2230b57cec5SDimitry Andric void reportUnsupported(const MachineBasicBlock::iterator &MI,
2240b57cec5SDimitry Andric const char *Msg) const;
2250b57cec5SDimitry Andric
226fe6060f1SDimitry Andric /// Inspects the target synchronization scope \p SSID and determines
2270b57cec5SDimitry Andric /// the SI atomic scope it corresponds to, the address spaces it
2280b57cec5SDimitry Andric /// covers, and whether the memory ordering applies between address
2290b57cec5SDimitry Andric /// spaces.
230bdd1243dSDimitry Andric std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
231fe6060f1SDimitry Andric toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace) const;
2320b57cec5SDimitry Andric
2330b57cec5SDimitry Andric /// \return Return a bit set of the address spaces accessed by \p AS.
2340b57cec5SDimitry Andric SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
2350b57cec5SDimitry Andric
2360b57cec5SDimitry Andric /// \returns Info constructed from \p MI, which has at least machine memory
2370b57cec5SDimitry Andric /// operand.
238bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
239bdd1243dSDimitry Andric constructFromMIWithMMO(const MachineBasicBlock::iterator &MI) const;
2400b57cec5SDimitry Andric
2410b57cec5SDimitry Andric public:
2420b57cec5SDimitry Andric /// Construct class to support accessing the machine memory operands
2430b57cec5SDimitry Andric /// of instructions in the machine function \p MF.
244*0fca6ea1SDimitry Andric SIMemOpAccess(const AMDGPUMachineModuleInfo &MMI);
2450b57cec5SDimitry Andric
246bdd1243dSDimitry Andric /// \returns Load info if \p MI is a load operation, "std::nullopt" otherwise.
247bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
248bdd1243dSDimitry Andric getLoadInfo(const MachineBasicBlock::iterator &MI) const;
2490b57cec5SDimitry Andric
250bdd1243dSDimitry Andric /// \returns Store info if \p MI is a store operation, "std::nullopt"
251bdd1243dSDimitry Andric /// otherwise.
252bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
253bdd1243dSDimitry Andric getStoreInfo(const MachineBasicBlock::iterator &MI) const;
2540b57cec5SDimitry Andric
2550b57cec5SDimitry Andric /// \returns Atomic fence info if \p MI is an atomic fence operation,
256bdd1243dSDimitry Andric /// "std::nullopt" otherwise.
257bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
258bdd1243dSDimitry Andric getAtomicFenceInfo(const MachineBasicBlock::iterator &MI) const;
2590b57cec5SDimitry Andric
2600b57cec5SDimitry Andric /// \returns Atomic cmpxchg/rmw info if \p MI is an atomic cmpxchg or
261bdd1243dSDimitry Andric /// rmw operation, "std::nullopt" otherwise.
262bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
263bdd1243dSDimitry Andric getAtomicCmpxchgOrRmwInfo(const MachineBasicBlock::iterator &MI) const;
2640b57cec5SDimitry Andric };
2650b57cec5SDimitry Andric
2660b57cec5SDimitry Andric class SICacheControl {
2670b57cec5SDimitry Andric protected:
2680b57cec5SDimitry Andric
269e8d8bef9SDimitry Andric /// AMDGPU subtarget info.
270e8d8bef9SDimitry Andric const GCNSubtarget &ST;
271e8d8bef9SDimitry Andric
2720b57cec5SDimitry Andric /// Instruction info.
2730b57cec5SDimitry Andric const SIInstrInfo *TII = nullptr;
2740b57cec5SDimitry Andric
2750b57cec5SDimitry Andric IsaVersion IV;
2760b57cec5SDimitry Andric
277e8d8bef9SDimitry Andric /// Whether to insert cache invalidating instructions.
2785ffd83dbSDimitry Andric bool InsertCacheInv;
2795ffd83dbSDimitry Andric
2800b57cec5SDimitry Andric SICacheControl(const GCNSubtarget &ST);
2810b57cec5SDimitry Andric
282fe6060f1SDimitry Andric /// Sets named bit \p BitName to "true" if present in instruction \p MI.
283fe6060f1SDimitry Andric /// \returns Returns true if \p MI is modified, false otherwise.
284fe6060f1SDimitry Andric bool enableNamedBit(const MachineBasicBlock::iterator MI,
285fe6060f1SDimitry Andric AMDGPU::CPol::CPol Bit) const;
286fe6060f1SDimitry Andric
2870b57cec5SDimitry Andric public:
2880b57cec5SDimitry Andric
2890b57cec5SDimitry Andric /// Create a cache control for the subtarget \p ST.
2900b57cec5SDimitry Andric static std::unique_ptr<SICacheControl> create(const GCNSubtarget &ST);
2910b57cec5SDimitry Andric
2920b57cec5SDimitry Andric /// Update \p MI memory load instruction to bypass any caches up to
2930b57cec5SDimitry Andric /// the \p Scope memory scope for address spaces \p
2940b57cec5SDimitry Andric /// AddrSpace. Return true iff the instruction was modified.
2950b57cec5SDimitry Andric virtual bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
2960b57cec5SDimitry Andric SIAtomicScope Scope,
2970b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace) const = 0;
2980b57cec5SDimitry Andric
299fe6060f1SDimitry Andric /// Update \p MI memory store instruction to bypass any caches up to
300fe6060f1SDimitry Andric /// the \p Scope memory scope for address spaces \p
301fe6060f1SDimitry Andric /// AddrSpace. Return true iff the instruction was modified.
302fe6060f1SDimitry Andric virtual bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
303fe6060f1SDimitry Andric SIAtomicScope Scope,
304fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const = 0;
305fe6060f1SDimitry Andric
306fe6060f1SDimitry Andric /// Update \p MI memory read-modify-write instruction to bypass any caches up
307fe6060f1SDimitry Andric /// to the \p Scope memory scope for address spaces \p AddrSpace. Return true
308fe6060f1SDimitry Andric /// iff the instruction was modified.
309fe6060f1SDimitry Andric virtual bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
310fe6060f1SDimitry Andric SIAtomicScope Scope,
311fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const = 0;
312fe6060f1SDimitry Andric
313e8d8bef9SDimitry Andric /// Update \p MI memory instruction of kind \p Op associated with address
314*0fca6ea1SDimitry Andric /// spaces \p AddrSpace to indicate it is volatile and/or
315*0fca6ea1SDimitry Andric /// nontemporal/last-use. Return true iff the instruction was modified.
316e8d8bef9SDimitry Andric virtual bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
3170b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace,
318e8d8bef9SDimitry Andric SIMemOp Op, bool IsVolatile,
319*0fca6ea1SDimitry Andric bool IsNonTemporal,
320*0fca6ea1SDimitry Andric bool IsLastUse = false) const = 0;
321*0fca6ea1SDimitry Andric
expandSystemScopeStore(MachineBasicBlock::iterator & MI) const322*0fca6ea1SDimitry Andric virtual bool expandSystemScopeStore(MachineBasicBlock::iterator &MI) const {
323*0fca6ea1SDimitry Andric return false;
324*0fca6ea1SDimitry Andric };
3250b57cec5SDimitry Andric
3260b57cec5SDimitry Andric /// Inserts any necessary instructions at position \p Pos relative
327e8d8bef9SDimitry Andric /// to instruction \p MI to ensure memory instructions before \p Pos of kind
328e8d8bef9SDimitry Andric /// \p Op associated with address spaces \p AddrSpace have completed. Used
329e8d8bef9SDimitry Andric /// between memory instructions to enforce the order they become visible as
330e8d8bef9SDimitry Andric /// observed by other memory instructions executing in memory scope \p Scope.
331e8d8bef9SDimitry Andric /// \p IsCrossAddrSpaceOrdering indicates if the memory ordering is between
332e8d8bef9SDimitry Andric /// address spaces. Returns true iff any instructions inserted.
3330b57cec5SDimitry Andric virtual bool insertWait(MachineBasicBlock::iterator &MI,
3340b57cec5SDimitry Andric SIAtomicScope Scope,
3350b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace,
3360b57cec5SDimitry Andric SIMemOp Op,
3370b57cec5SDimitry Andric bool IsCrossAddrSpaceOrdering,
3380b57cec5SDimitry Andric Position Pos) const = 0;
3390b57cec5SDimitry Andric
340e8d8bef9SDimitry Andric /// Inserts any necessary instructions at position \p Pos relative to
341e8d8bef9SDimitry Andric /// instruction \p MI to ensure any subsequent memory instructions of this
342e8d8bef9SDimitry Andric /// thread with address spaces \p AddrSpace will observe the previous memory
343e8d8bef9SDimitry Andric /// operations by any thread for memory scopes up to memory scope \p Scope .
344e8d8bef9SDimitry Andric /// Returns true iff any instructions inserted.
345e8d8bef9SDimitry Andric virtual bool insertAcquire(MachineBasicBlock::iterator &MI,
346e8d8bef9SDimitry Andric SIAtomicScope Scope,
347e8d8bef9SDimitry Andric SIAtomicAddrSpace AddrSpace,
348e8d8bef9SDimitry Andric Position Pos) const = 0;
349e8d8bef9SDimitry Andric
350e8d8bef9SDimitry Andric /// Inserts any necessary instructions at position \p Pos relative to
351e8d8bef9SDimitry Andric /// instruction \p MI to ensure previous memory instructions by this thread
352e8d8bef9SDimitry Andric /// with address spaces \p AddrSpace have completed and can be observed by
353e8d8bef9SDimitry Andric /// subsequent memory instructions by any thread executing in memory scope \p
354e8d8bef9SDimitry Andric /// Scope. \p IsCrossAddrSpaceOrdering indicates if the memory ordering is
355e8d8bef9SDimitry Andric /// between address spaces. Returns true iff any instructions inserted.
356e8d8bef9SDimitry Andric virtual bool insertRelease(MachineBasicBlock::iterator &MI,
357e8d8bef9SDimitry Andric SIAtomicScope Scope,
358e8d8bef9SDimitry Andric SIAtomicAddrSpace AddrSpace,
359e8d8bef9SDimitry Andric bool IsCrossAddrSpaceOrdering,
360e8d8bef9SDimitry Andric Position Pos) const = 0;
361e8d8bef9SDimitry Andric
3620b57cec5SDimitry Andric /// Virtual destructor to allow derivations to be deleted.
3630b57cec5SDimitry Andric virtual ~SICacheControl() = default;
3640b57cec5SDimitry Andric
tryForceStoreSC0SC1(const SIMemOpInfo & MOI,MachineBasicBlock::iterator & MI) const36506c3fb27SDimitry Andric virtual bool tryForceStoreSC0SC1(const SIMemOpInfo &MOI,
36606c3fb27SDimitry Andric MachineBasicBlock::iterator &MI) const {
36706c3fb27SDimitry Andric return false;
36806c3fb27SDimitry Andric }
3690b57cec5SDimitry Andric };
3700b57cec5SDimitry Andric
3710b57cec5SDimitry Andric class SIGfx6CacheControl : public SICacheControl {
3720b57cec5SDimitry Andric protected:
3730b57cec5SDimitry Andric
3740b57cec5SDimitry Andric /// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
3750b57cec5SDimitry Andric /// is modified, false otherwise.
enableGLCBit(const MachineBasicBlock::iterator & MI) const3760b57cec5SDimitry Andric bool enableGLCBit(const MachineBasicBlock::iterator &MI) const {
377fe6060f1SDimitry Andric return enableNamedBit(MI, AMDGPU::CPol::GLC);
3780b57cec5SDimitry Andric }
3790b57cec5SDimitry Andric
3800b57cec5SDimitry Andric /// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
3810b57cec5SDimitry Andric /// is modified, false otherwise.
enableSLCBit(const MachineBasicBlock::iterator & MI) const3820b57cec5SDimitry Andric bool enableSLCBit(const MachineBasicBlock::iterator &MI) const {
383fe6060f1SDimitry Andric return enableNamedBit(MI, AMDGPU::CPol::SLC);
3840b57cec5SDimitry Andric }
3850b57cec5SDimitry Andric
3860b57cec5SDimitry Andric public:
3870b57cec5SDimitry Andric
SIGfx6CacheControl(const GCNSubtarget & ST)388349cc55cSDimitry Andric SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {}
3890b57cec5SDimitry Andric
3900b57cec5SDimitry Andric bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
3910b57cec5SDimitry Andric SIAtomicScope Scope,
3920b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
3930b57cec5SDimitry Andric
394fe6060f1SDimitry Andric bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
395fe6060f1SDimitry Andric SIAtomicScope Scope,
396fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
397fe6060f1SDimitry Andric
398fe6060f1SDimitry Andric bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
399fe6060f1SDimitry Andric SIAtomicScope Scope,
400fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
401fe6060f1SDimitry Andric
402e8d8bef9SDimitry Andric bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
403e8d8bef9SDimitry Andric SIAtomicAddrSpace AddrSpace, SIMemOp Op,
404*0fca6ea1SDimitry Andric bool IsVolatile, bool IsNonTemporal,
405*0fca6ea1SDimitry Andric bool IsLastUse) const override;
4060b57cec5SDimitry Andric
4070b57cec5SDimitry Andric bool insertWait(MachineBasicBlock::iterator &MI,
4080b57cec5SDimitry Andric SIAtomicScope Scope,
4090b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace,
4100b57cec5SDimitry Andric SIMemOp Op,
4110b57cec5SDimitry Andric bool IsCrossAddrSpaceOrdering,
4120b57cec5SDimitry Andric Position Pos) const override;
413e8d8bef9SDimitry Andric
414e8d8bef9SDimitry Andric bool insertAcquire(MachineBasicBlock::iterator &MI,
415e8d8bef9SDimitry Andric SIAtomicScope Scope,
416e8d8bef9SDimitry Andric SIAtomicAddrSpace AddrSpace,
417e8d8bef9SDimitry Andric Position Pos) const override;
418e8d8bef9SDimitry Andric
419e8d8bef9SDimitry Andric bool insertRelease(MachineBasicBlock::iterator &MI,
420e8d8bef9SDimitry Andric SIAtomicScope Scope,
421e8d8bef9SDimitry Andric SIAtomicAddrSpace AddrSpace,
422e8d8bef9SDimitry Andric bool IsCrossAddrSpaceOrdering,
423e8d8bef9SDimitry Andric Position Pos) const override;
4240b57cec5SDimitry Andric };
4250b57cec5SDimitry Andric
4260b57cec5SDimitry Andric class SIGfx7CacheControl : public SIGfx6CacheControl {
4270b57cec5SDimitry Andric public:
4280b57cec5SDimitry Andric
SIGfx7CacheControl(const GCNSubtarget & ST)429349cc55cSDimitry Andric SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {}
4300b57cec5SDimitry Andric
431e8d8bef9SDimitry Andric bool insertAcquire(MachineBasicBlock::iterator &MI,
4320b57cec5SDimitry Andric SIAtomicScope Scope,
4330b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace,
4340b57cec5SDimitry Andric Position Pos) const override;
4350b57cec5SDimitry Andric
4360b57cec5SDimitry Andric };
4370b57cec5SDimitry Andric
438fe6060f1SDimitry Andric class SIGfx90ACacheControl : public SIGfx7CacheControl {
439fe6060f1SDimitry Andric public:
440fe6060f1SDimitry Andric
SIGfx90ACacheControl(const GCNSubtarget & ST)441349cc55cSDimitry Andric SIGfx90ACacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {}
442fe6060f1SDimitry Andric
443fe6060f1SDimitry Andric bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
444fe6060f1SDimitry Andric SIAtomicScope Scope,
445fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
446fe6060f1SDimitry Andric
447fe6060f1SDimitry Andric bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
448fe6060f1SDimitry Andric SIAtomicScope Scope,
449fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
450fe6060f1SDimitry Andric
451fe6060f1SDimitry Andric bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
452fe6060f1SDimitry Andric SIAtomicScope Scope,
453fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
454fe6060f1SDimitry Andric
455fe6060f1SDimitry Andric bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
456fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace, SIMemOp Op,
457*0fca6ea1SDimitry Andric bool IsVolatile, bool IsNonTemporal,
458*0fca6ea1SDimitry Andric bool IsLastUse) const override;
459fe6060f1SDimitry Andric
460fe6060f1SDimitry Andric bool insertWait(MachineBasicBlock::iterator &MI,
461fe6060f1SDimitry Andric SIAtomicScope Scope,
462fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace,
463fe6060f1SDimitry Andric SIMemOp Op,
464fe6060f1SDimitry Andric bool IsCrossAddrSpaceOrdering,
465fe6060f1SDimitry Andric Position Pos) const override;
466fe6060f1SDimitry Andric
467fe6060f1SDimitry Andric bool insertAcquire(MachineBasicBlock::iterator &MI,
468fe6060f1SDimitry Andric SIAtomicScope Scope,
469fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace,
470fe6060f1SDimitry Andric Position Pos) const override;
471fe6060f1SDimitry Andric
472fe6060f1SDimitry Andric bool insertRelease(MachineBasicBlock::iterator &MI,
473fe6060f1SDimitry Andric SIAtomicScope Scope,
474fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace,
475fe6060f1SDimitry Andric bool IsCrossAddrSpaceOrdering,
476fe6060f1SDimitry Andric Position Pos) const override;
477fe6060f1SDimitry Andric };
478fe6060f1SDimitry Andric
47981ad6265SDimitry Andric class SIGfx940CacheControl : public SIGfx90ACacheControl {
48081ad6265SDimitry Andric protected:
48181ad6265SDimitry Andric
48281ad6265SDimitry Andric /// Sets SC0 bit to "true" if present in \p MI. Returns true if \p MI
48381ad6265SDimitry Andric /// is modified, false otherwise.
enableSC0Bit(const MachineBasicBlock::iterator & MI) const48481ad6265SDimitry Andric bool enableSC0Bit(const MachineBasicBlock::iterator &MI) const {
48581ad6265SDimitry Andric return enableNamedBit(MI, AMDGPU::CPol::SC0);
48681ad6265SDimitry Andric }
48781ad6265SDimitry Andric
48881ad6265SDimitry Andric /// Sets SC1 bit to "true" if present in \p MI. Returns true if \p MI
48981ad6265SDimitry Andric /// is modified, false otherwise.
enableSC1Bit(const MachineBasicBlock::iterator & MI) const49081ad6265SDimitry Andric bool enableSC1Bit(const MachineBasicBlock::iterator &MI) const {
49181ad6265SDimitry Andric return enableNamedBit(MI, AMDGPU::CPol::SC1);
49281ad6265SDimitry Andric }
49381ad6265SDimitry Andric
49481ad6265SDimitry Andric /// Sets NT bit to "true" if present in \p MI. Returns true if \p MI
49581ad6265SDimitry Andric /// is modified, false otherwise.
enableNTBit(const MachineBasicBlock::iterator & MI) const49681ad6265SDimitry Andric bool enableNTBit(const MachineBasicBlock::iterator &MI) const {
49781ad6265SDimitry Andric return enableNamedBit(MI, AMDGPU::CPol::NT);
49881ad6265SDimitry Andric }
49981ad6265SDimitry Andric
50081ad6265SDimitry Andric public:
50181ad6265SDimitry Andric
SIGfx940CacheControl(const GCNSubtarget & ST)50281ad6265SDimitry Andric SIGfx940CacheControl(const GCNSubtarget &ST) : SIGfx90ACacheControl(ST) {};
50381ad6265SDimitry Andric
50481ad6265SDimitry Andric bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
50581ad6265SDimitry Andric SIAtomicScope Scope,
50681ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
50781ad6265SDimitry Andric
50881ad6265SDimitry Andric bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
50981ad6265SDimitry Andric SIAtomicScope Scope,
51081ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
51181ad6265SDimitry Andric
51281ad6265SDimitry Andric bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
51381ad6265SDimitry Andric SIAtomicScope Scope,
51481ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
51581ad6265SDimitry Andric
51681ad6265SDimitry Andric bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
51781ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace, SIMemOp Op,
518*0fca6ea1SDimitry Andric bool IsVolatile, bool IsNonTemporal,
519*0fca6ea1SDimitry Andric bool IsLastUse) const override;
52081ad6265SDimitry Andric
52181ad6265SDimitry Andric bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
52281ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace, Position Pos) const override;
52381ad6265SDimitry Andric
52481ad6265SDimitry Andric bool insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
52581ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering,
52681ad6265SDimitry Andric Position Pos) const override;
52706c3fb27SDimitry Andric
tryForceStoreSC0SC1(const SIMemOpInfo & MOI,MachineBasicBlock::iterator & MI) const52806c3fb27SDimitry Andric bool tryForceStoreSC0SC1(const SIMemOpInfo &MOI,
52906c3fb27SDimitry Andric MachineBasicBlock::iterator &MI) const override {
53006c3fb27SDimitry Andric bool Changed = false;
53106c3fb27SDimitry Andric if (ST.hasForceStoreSC0SC1() &&
53206c3fb27SDimitry Andric (MOI.getInstrAddrSpace() & (SIAtomicAddrSpace::SCRATCH |
53306c3fb27SDimitry Andric SIAtomicAddrSpace::GLOBAL |
53406c3fb27SDimitry Andric SIAtomicAddrSpace::OTHER)) !=
53506c3fb27SDimitry Andric SIAtomicAddrSpace::NONE) {
53606c3fb27SDimitry Andric Changed |= enableSC0Bit(MI);
53706c3fb27SDimitry Andric Changed |= enableSC1Bit(MI);
53806c3fb27SDimitry Andric }
53906c3fb27SDimitry Andric return Changed;
54006c3fb27SDimitry Andric }
54181ad6265SDimitry Andric };
54281ad6265SDimitry Andric
5430b57cec5SDimitry Andric class SIGfx10CacheControl : public SIGfx7CacheControl {
5440b57cec5SDimitry Andric protected:
5450b57cec5SDimitry Andric
5460b57cec5SDimitry Andric /// Sets DLC bit to "true" if present in \p MI. Returns true if \p MI
5470b57cec5SDimitry Andric /// is modified, false otherwise.
enableDLCBit(const MachineBasicBlock::iterator & MI) const5480b57cec5SDimitry Andric bool enableDLCBit(const MachineBasicBlock::iterator &MI) const {
549fe6060f1SDimitry Andric return enableNamedBit(MI, AMDGPU::CPol::DLC);
5500b57cec5SDimitry Andric }
5510b57cec5SDimitry Andric
5520b57cec5SDimitry Andric public:
5530b57cec5SDimitry Andric
SIGfx10CacheControl(const GCNSubtarget & ST)554349cc55cSDimitry Andric SIGfx10CacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {}
5550b57cec5SDimitry Andric
5560b57cec5SDimitry Andric bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
5570b57cec5SDimitry Andric SIAtomicScope Scope,
5580b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
5590b57cec5SDimitry Andric
560e8d8bef9SDimitry Andric bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
561e8d8bef9SDimitry Andric SIAtomicAddrSpace AddrSpace, SIMemOp Op,
562*0fca6ea1SDimitry Andric bool IsVolatile, bool IsNonTemporal,
563*0fca6ea1SDimitry Andric bool IsLastUse) const override;
5640b57cec5SDimitry Andric
5650b57cec5SDimitry Andric bool insertWait(MachineBasicBlock::iterator &MI,
5660b57cec5SDimitry Andric SIAtomicScope Scope,
5670b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace,
5680b57cec5SDimitry Andric SIMemOp Op,
5690b57cec5SDimitry Andric bool IsCrossAddrSpaceOrdering,
5700b57cec5SDimitry Andric Position Pos) const override;
571e8d8bef9SDimitry Andric
572e8d8bef9SDimitry Andric bool insertAcquire(MachineBasicBlock::iterator &MI,
573e8d8bef9SDimitry Andric SIAtomicScope Scope,
574e8d8bef9SDimitry Andric SIAtomicAddrSpace AddrSpace,
575e8d8bef9SDimitry Andric Position Pos) const override;
5760b57cec5SDimitry Andric };
5770b57cec5SDimitry Andric
57881ad6265SDimitry Andric class SIGfx11CacheControl : public SIGfx10CacheControl {
57981ad6265SDimitry Andric public:
SIGfx11CacheControl(const GCNSubtarget & ST)58081ad6265SDimitry Andric SIGfx11CacheControl(const GCNSubtarget &ST) : SIGfx10CacheControl(ST) {}
58181ad6265SDimitry Andric
58281ad6265SDimitry Andric bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
58381ad6265SDimitry Andric SIAtomicScope Scope,
58481ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
58581ad6265SDimitry Andric
58681ad6265SDimitry Andric bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
58781ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace, SIMemOp Op,
588*0fca6ea1SDimitry Andric bool IsVolatile, bool IsNonTemporal,
589*0fca6ea1SDimitry Andric bool IsLastUse) const override;
59081ad6265SDimitry Andric };
59181ad6265SDimitry Andric
5921db9f3b2SDimitry Andric class SIGfx12CacheControl : public SIGfx11CacheControl {
5937a6dacacSDimitry Andric protected:
5947a6dacacSDimitry Andric // Sets TH policy to \p Value if CPol operand is present in instruction \p MI.
5957a6dacacSDimitry Andric // \returns Returns true if \p MI is modified, false otherwise.
5967a6dacacSDimitry Andric bool setTH(const MachineBasicBlock::iterator MI,
5977a6dacacSDimitry Andric AMDGPU::CPol::CPol Value) const;
5987a6dacacSDimitry Andric // Sets Scope policy to \p Value if CPol operand is present in instruction \p
5997a6dacacSDimitry Andric // MI. \returns Returns true if \p MI is modified, false otherwise.
6007a6dacacSDimitry Andric bool setScope(const MachineBasicBlock::iterator MI,
6017a6dacacSDimitry Andric AMDGPU::CPol::CPol Value) const;
6027a6dacacSDimitry Andric
603*0fca6ea1SDimitry Andric // Stores with system scope (SCOPE_SYS) need to wait for:
604*0fca6ea1SDimitry Andric // - loads or atomics(returning) - wait for {LOAD|SAMPLE|BVH|KM}CNT==0
605*0fca6ea1SDimitry Andric // - non-returning-atomics - wait for STORECNT==0
606*0fca6ea1SDimitry Andric // TODO: SIInsertWaitcnts will not always be able to remove STORECNT waits
607*0fca6ea1SDimitry Andric // since it does not distinguish atomics-with-return from regular stores.
608*0fca6ea1SDimitry Andric // There is no need to wait if memory is cached (mtype != UC).
609*0fca6ea1SDimitry Andric bool
610*0fca6ea1SDimitry Andric insertWaitsBeforeSystemScopeStore(const MachineBasicBlock::iterator MI) const;
611*0fca6ea1SDimitry Andric
612*0fca6ea1SDimitry Andric bool setAtomicScope(const MachineBasicBlock::iterator &MI,
613*0fca6ea1SDimitry Andric SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const;
614*0fca6ea1SDimitry Andric
6151db9f3b2SDimitry Andric public:
SIGfx12CacheControl(const GCNSubtarget & ST)6161db9f3b2SDimitry Andric SIGfx12CacheControl(const GCNSubtarget &ST) : SIGfx11CacheControl(ST) {}
6171db9f3b2SDimitry Andric
6187a6dacacSDimitry Andric bool insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
6197a6dacacSDimitry Andric SIAtomicAddrSpace AddrSpace, SIMemOp Op,
6207a6dacacSDimitry Andric bool IsCrossAddrSpaceOrdering, Position Pos) const override;
6217a6dacacSDimitry Andric
6221db9f3b2SDimitry Andric bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
6231db9f3b2SDimitry Andric SIAtomicAddrSpace AddrSpace, Position Pos) const override;
6247a6dacacSDimitry Andric
6257a6dacacSDimitry Andric bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
6267a6dacacSDimitry Andric SIAtomicAddrSpace AddrSpace, SIMemOp Op,
627*0fca6ea1SDimitry Andric bool IsVolatile, bool IsNonTemporal,
628*0fca6ea1SDimitry Andric bool IsLastUse) const override;
629*0fca6ea1SDimitry Andric
630*0fca6ea1SDimitry Andric bool expandSystemScopeStore(MachineBasicBlock::iterator &MI) const override;
631*0fca6ea1SDimitry Andric
632*0fca6ea1SDimitry Andric bool insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
633*0fca6ea1SDimitry Andric SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering,
634*0fca6ea1SDimitry Andric Position Pos) const override;
635*0fca6ea1SDimitry Andric
enableLoadCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const636*0fca6ea1SDimitry Andric bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
637*0fca6ea1SDimitry Andric SIAtomicScope Scope,
638*0fca6ea1SDimitry Andric SIAtomicAddrSpace AddrSpace) const override {
639*0fca6ea1SDimitry Andric return setAtomicScope(MI, Scope, AddrSpace);
640*0fca6ea1SDimitry Andric }
641*0fca6ea1SDimitry Andric
enableStoreCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const642*0fca6ea1SDimitry Andric bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
643*0fca6ea1SDimitry Andric SIAtomicScope Scope,
644*0fca6ea1SDimitry Andric SIAtomicAddrSpace AddrSpace) const override {
645*0fca6ea1SDimitry Andric return setAtomicScope(MI, Scope, AddrSpace);
646*0fca6ea1SDimitry Andric }
647*0fca6ea1SDimitry Andric
enableRMWCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const648*0fca6ea1SDimitry Andric bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
649*0fca6ea1SDimitry Andric SIAtomicScope Scope,
650*0fca6ea1SDimitry Andric SIAtomicAddrSpace AddrSpace) const override {
651*0fca6ea1SDimitry Andric return setAtomicScope(MI, Scope, AddrSpace);
652*0fca6ea1SDimitry Andric }
6531db9f3b2SDimitry Andric };
6541db9f3b2SDimitry Andric
6550b57cec5SDimitry Andric class SIMemoryLegalizer final : public MachineFunctionPass {
6560b57cec5SDimitry Andric private:
6570b57cec5SDimitry Andric
6580b57cec5SDimitry Andric /// Cache Control.
6590b57cec5SDimitry Andric std::unique_ptr<SICacheControl> CC = nullptr;
6600b57cec5SDimitry Andric
6610b57cec5SDimitry Andric /// List of atomic pseudo instructions.
6620b57cec5SDimitry Andric std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
6630b57cec5SDimitry Andric
6640b57cec5SDimitry Andric /// Return true iff instruction \p MI is a atomic instruction that
6650b57cec5SDimitry Andric /// returns a result.
isAtomicRet(const MachineInstr & MI) const6660b57cec5SDimitry Andric bool isAtomicRet(const MachineInstr &MI) const {
667fe6060f1SDimitry Andric return SIInstrInfo::isAtomicRet(MI);
6680b57cec5SDimitry Andric }
6690b57cec5SDimitry Andric
6700b57cec5SDimitry Andric /// Removes all processed atomic pseudo instructions from the current
6710b57cec5SDimitry Andric /// function. Returns true if current function is modified, false otherwise.
6720b57cec5SDimitry Andric bool removeAtomicPseudoMIs();
6730b57cec5SDimitry Andric
6740b57cec5SDimitry Andric /// Expands load operation \p MI. Returns true if instructions are
6750b57cec5SDimitry Andric /// added/deleted or \p MI is modified, false otherwise.
6760b57cec5SDimitry Andric bool expandLoad(const SIMemOpInfo &MOI,
6770b57cec5SDimitry Andric MachineBasicBlock::iterator &MI);
6780b57cec5SDimitry Andric /// Expands store operation \p MI. Returns true if instructions are
6790b57cec5SDimitry Andric /// added/deleted or \p MI is modified, false otherwise.
6800b57cec5SDimitry Andric bool expandStore(const SIMemOpInfo &MOI,
6810b57cec5SDimitry Andric MachineBasicBlock::iterator &MI);
6820b57cec5SDimitry Andric /// Expands atomic fence operation \p MI. Returns true if
6830b57cec5SDimitry Andric /// instructions are added/deleted or \p MI is modified, false otherwise.
6840b57cec5SDimitry Andric bool expandAtomicFence(const SIMemOpInfo &MOI,
6850b57cec5SDimitry Andric MachineBasicBlock::iterator &MI);
6860b57cec5SDimitry Andric /// Expands atomic cmpxchg or rmw operation \p MI. Returns true if
6870b57cec5SDimitry Andric /// instructions are added/deleted or \p MI is modified, false otherwise.
6880b57cec5SDimitry Andric bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
6890b57cec5SDimitry Andric MachineBasicBlock::iterator &MI);
6900b57cec5SDimitry Andric
6910b57cec5SDimitry Andric public:
6920b57cec5SDimitry Andric static char ID;
6930b57cec5SDimitry Andric
SIMemoryLegalizer()6940b57cec5SDimitry Andric SIMemoryLegalizer() : MachineFunctionPass(ID) {}
6950b57cec5SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const6960b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
6970b57cec5SDimitry Andric AU.setPreservesCFG();
6980b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
6990b57cec5SDimitry Andric }
7000b57cec5SDimitry Andric
getPassName() const7010b57cec5SDimitry Andric StringRef getPassName() const override {
7020b57cec5SDimitry Andric return PASS_NAME;
7030b57cec5SDimitry Andric }
7040b57cec5SDimitry Andric
7050b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
7060b57cec5SDimitry Andric };
7070b57cec5SDimitry Andric
708*0fca6ea1SDimitry Andric static const StringMap<SIAtomicAddrSpace> ASNames = {{
709*0fca6ea1SDimitry Andric {"global", SIAtomicAddrSpace::GLOBAL},
710*0fca6ea1SDimitry Andric {"local", SIAtomicAddrSpace::LDS},
711*0fca6ea1SDimitry Andric }};
712*0fca6ea1SDimitry Andric
diagnoseUnknownMMRAASName(const MachineInstr & MI,StringRef AS)713*0fca6ea1SDimitry Andric void diagnoseUnknownMMRAASName(const MachineInstr &MI, StringRef AS) {
714*0fca6ea1SDimitry Andric const MachineFunction *MF = MI.getMF();
715*0fca6ea1SDimitry Andric const Function &Fn = MF->getFunction();
716*0fca6ea1SDimitry Andric SmallString<128> Str;
717*0fca6ea1SDimitry Andric raw_svector_ostream OS(Str);
718*0fca6ea1SDimitry Andric OS << "unknown address space '" << AS << "'; expected one of ";
719*0fca6ea1SDimitry Andric ListSeparator LS;
720*0fca6ea1SDimitry Andric for (const auto &[Name, Val] : ASNames)
721*0fca6ea1SDimitry Andric OS << LS << '\'' << Name << '\'';
722*0fca6ea1SDimitry Andric DiagnosticInfoUnsupported BadTag(Fn, Str.str(), MI.getDebugLoc(), DS_Warning);
723*0fca6ea1SDimitry Andric Fn.getContext().diagnose(BadTag);
724*0fca6ea1SDimitry Andric }
725*0fca6ea1SDimitry Andric
726*0fca6ea1SDimitry Andric /// Reads \p MI's MMRAs to parse the "amdgpu-as" MMRA.
727*0fca6ea1SDimitry Andric /// If this tag isn't present, or if it has no meaningful values, returns \p
728*0fca6ea1SDimitry Andric /// Default. Otherwise returns all the address spaces concerned by the MMRA.
getFenceAddrSpaceMMRA(const MachineInstr & MI,SIAtomicAddrSpace Default)729*0fca6ea1SDimitry Andric static SIAtomicAddrSpace getFenceAddrSpaceMMRA(const MachineInstr &MI,
730*0fca6ea1SDimitry Andric SIAtomicAddrSpace Default) {
731*0fca6ea1SDimitry Andric static constexpr StringLiteral FenceASPrefix = "amdgpu-as";
732*0fca6ea1SDimitry Andric
733*0fca6ea1SDimitry Andric auto MMRA = MMRAMetadata(MI.getMMRAMetadata());
734*0fca6ea1SDimitry Andric if (!MMRA)
735*0fca6ea1SDimitry Andric return Default;
736*0fca6ea1SDimitry Andric
737*0fca6ea1SDimitry Andric SIAtomicAddrSpace Result = SIAtomicAddrSpace::NONE;
738*0fca6ea1SDimitry Andric for (const auto &[Prefix, Suffix] : MMRA) {
739*0fca6ea1SDimitry Andric if (Prefix != FenceASPrefix)
740*0fca6ea1SDimitry Andric continue;
741*0fca6ea1SDimitry Andric
742*0fca6ea1SDimitry Andric if (auto It = ASNames.find(Suffix); It != ASNames.end())
743*0fca6ea1SDimitry Andric Result |= It->second;
744*0fca6ea1SDimitry Andric else
745*0fca6ea1SDimitry Andric diagnoseUnknownMMRAASName(MI, Suffix);
746*0fca6ea1SDimitry Andric }
747*0fca6ea1SDimitry Andric
748*0fca6ea1SDimitry Andric return (Result != SIAtomicAddrSpace::NONE) ? Result : Default;
749*0fca6ea1SDimitry Andric }
750*0fca6ea1SDimitry Andric
751*0fca6ea1SDimitry Andric } // end anonymous namespace
7520b57cec5SDimitry Andric
reportUnsupported(const MachineBasicBlock::iterator & MI,const char * Msg) const7530b57cec5SDimitry Andric void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
7540b57cec5SDimitry Andric const char *Msg) const {
7550b57cec5SDimitry Andric const Function &Func = MI->getParent()->getParent()->getFunction();
7560b57cec5SDimitry Andric DiagnosticInfoUnsupported Diag(Func, Msg, MI->getDebugLoc());
7570b57cec5SDimitry Andric Func.getContext().diagnose(Diag);
7580b57cec5SDimitry Andric }
7590b57cec5SDimitry Andric
760bdd1243dSDimitry Andric std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
toSIAtomicScope(SyncScope::ID SSID,SIAtomicAddrSpace InstrAddrSpace) const7610b57cec5SDimitry Andric SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
762fe6060f1SDimitry Andric SIAtomicAddrSpace InstrAddrSpace) const {
7630b57cec5SDimitry Andric if (SSID == SyncScope::System)
764bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC, true);
7650b57cec5SDimitry Andric if (SSID == MMI->getAgentSSID())
766bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC, true);
7670b57cec5SDimitry Andric if (SSID == MMI->getWorkgroupSSID())
768bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,
7690b57cec5SDimitry Andric true);
7700b57cec5SDimitry Andric if (SSID == MMI->getWavefrontSSID())
771bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC,
7720b57cec5SDimitry Andric true);
7730b57cec5SDimitry Andric if (SSID == SyncScope::SingleThread)
774bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC,
7750b57cec5SDimitry Andric true);
7760b57cec5SDimitry Andric if (SSID == MMI->getSystemOneAddressSpaceSSID())
777bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::SYSTEM,
778bdd1243dSDimitry Andric SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
7790b57cec5SDimitry Andric if (SSID == MMI->getAgentOneAddressSpaceSSID())
780bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::AGENT,
781bdd1243dSDimitry Andric SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
7820b57cec5SDimitry Andric if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
783bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::WORKGROUP,
784bdd1243dSDimitry Andric SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
7850b57cec5SDimitry Andric if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
786bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::WAVEFRONT,
787bdd1243dSDimitry Andric SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
7880b57cec5SDimitry Andric if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
789bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::SINGLETHREAD,
790bdd1243dSDimitry Andric SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
791bdd1243dSDimitry Andric return std::nullopt;
7920b57cec5SDimitry Andric }
7930b57cec5SDimitry Andric
toSIAtomicAddrSpace(unsigned AS) const7940b57cec5SDimitry Andric SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
7950b57cec5SDimitry Andric if (AS == AMDGPUAS::FLAT_ADDRESS)
7960b57cec5SDimitry Andric return SIAtomicAddrSpace::FLAT;
7970b57cec5SDimitry Andric if (AS == AMDGPUAS::GLOBAL_ADDRESS)
7980b57cec5SDimitry Andric return SIAtomicAddrSpace::GLOBAL;
7990b57cec5SDimitry Andric if (AS == AMDGPUAS::LOCAL_ADDRESS)
8000b57cec5SDimitry Andric return SIAtomicAddrSpace::LDS;
8010b57cec5SDimitry Andric if (AS == AMDGPUAS::PRIVATE_ADDRESS)
8020b57cec5SDimitry Andric return SIAtomicAddrSpace::SCRATCH;
8030b57cec5SDimitry Andric if (AS == AMDGPUAS::REGION_ADDRESS)
8040b57cec5SDimitry Andric return SIAtomicAddrSpace::GDS;
8050b57cec5SDimitry Andric
8060b57cec5SDimitry Andric return SIAtomicAddrSpace::OTHER;
8070b57cec5SDimitry Andric }
8080b57cec5SDimitry Andric
SIMemOpAccess(const AMDGPUMachineModuleInfo & MMI_)809*0fca6ea1SDimitry Andric SIMemOpAccess::SIMemOpAccess(const AMDGPUMachineModuleInfo &MMI_)
810*0fca6ea1SDimitry Andric : MMI(&MMI_) {}
8110b57cec5SDimitry Andric
constructFromMIWithMMO(const MachineBasicBlock::iterator & MI) const812bdd1243dSDimitry Andric std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
8130b57cec5SDimitry Andric const MachineBasicBlock::iterator &MI) const {
8140b57cec5SDimitry Andric assert(MI->getNumMemOperands() > 0);
8150b57cec5SDimitry Andric
8160b57cec5SDimitry Andric SyncScope::ID SSID = SyncScope::SingleThread;
8170b57cec5SDimitry Andric AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
8180b57cec5SDimitry Andric AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
8190b57cec5SDimitry Andric SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
8200b57cec5SDimitry Andric bool IsNonTemporal = true;
821e8d8bef9SDimitry Andric bool IsVolatile = false;
822*0fca6ea1SDimitry Andric bool IsLastUse = false;
8230b57cec5SDimitry Andric
8240b57cec5SDimitry Andric // Validator should check whether or not MMOs cover the entire set of
8250b57cec5SDimitry Andric // locations accessed by the memory instruction.
8260b57cec5SDimitry Andric for (const auto &MMO : MI->memoperands()) {
8270b57cec5SDimitry Andric IsNonTemporal &= MMO->isNonTemporal();
828e8d8bef9SDimitry Andric IsVolatile |= MMO->isVolatile();
829*0fca6ea1SDimitry Andric IsLastUse |= MMO->getFlags() & MOLastUse;
8300b57cec5SDimitry Andric InstrAddrSpace |=
8310b57cec5SDimitry Andric toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
832fe6060f1SDimitry Andric AtomicOrdering OpOrdering = MMO->getSuccessOrdering();
8330b57cec5SDimitry Andric if (OpOrdering != AtomicOrdering::NotAtomic) {
8340b57cec5SDimitry Andric const auto &IsSyncScopeInclusion =
8350b57cec5SDimitry Andric MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
8360b57cec5SDimitry Andric if (!IsSyncScopeInclusion) {
8370b57cec5SDimitry Andric reportUnsupported(MI,
8380b57cec5SDimitry Andric "Unsupported non-inclusive atomic synchronization scope");
839bdd1243dSDimitry Andric return std::nullopt;
8400b57cec5SDimitry Andric }
8410b57cec5SDimitry Andric
84281ad6265SDimitry Andric SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
843349cc55cSDimitry Andric Ordering = getMergedAtomicOrdering(Ordering, OpOrdering);
8440b57cec5SDimitry Andric assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
8450b57cec5SDimitry Andric MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
8460b57cec5SDimitry Andric FailureOrdering =
847349cc55cSDimitry Andric getMergedAtomicOrdering(FailureOrdering, MMO->getFailureOrdering());
8480b57cec5SDimitry Andric }
8490b57cec5SDimitry Andric }
8500b57cec5SDimitry Andric
8510b57cec5SDimitry Andric SIAtomicScope Scope = SIAtomicScope::NONE;
8520b57cec5SDimitry Andric SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
8530b57cec5SDimitry Andric bool IsCrossAddressSpaceOrdering = false;
8540b57cec5SDimitry Andric if (Ordering != AtomicOrdering::NotAtomic) {
8550b57cec5SDimitry Andric auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
8560b57cec5SDimitry Andric if (!ScopeOrNone) {
8570b57cec5SDimitry Andric reportUnsupported(MI, "Unsupported atomic synchronization scope");
858bdd1243dSDimitry Andric return std::nullopt;
8590b57cec5SDimitry Andric }
8600b57cec5SDimitry Andric std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
86181ad6265SDimitry Andric *ScopeOrNone;
8620b57cec5SDimitry Andric if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
863fe6060f1SDimitry Andric ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
864fe6060f1SDimitry Andric ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
8650b57cec5SDimitry Andric reportUnsupported(MI, "Unsupported atomic address space");
866bdd1243dSDimitry Andric return std::nullopt;
8670b57cec5SDimitry Andric }
8680b57cec5SDimitry Andric }
8690b57cec5SDimitry Andric return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
870e8d8bef9SDimitry Andric IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
871*0fca6ea1SDimitry Andric IsNonTemporal, IsLastUse);
8720b57cec5SDimitry Andric }
8730b57cec5SDimitry Andric
874bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
getLoadInfo(const MachineBasicBlock::iterator & MI) const875bdd1243dSDimitry Andric SIMemOpAccess::getLoadInfo(const MachineBasicBlock::iterator &MI) const {
8760b57cec5SDimitry Andric assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
8770b57cec5SDimitry Andric
8780b57cec5SDimitry Andric if (!(MI->mayLoad() && !MI->mayStore()))
879bdd1243dSDimitry Andric return std::nullopt;
8800b57cec5SDimitry Andric
8810b57cec5SDimitry Andric // Be conservative if there are no memory operands.
8820b57cec5SDimitry Andric if (MI->getNumMemOperands() == 0)
8830b57cec5SDimitry Andric return SIMemOpInfo();
8840b57cec5SDimitry Andric
8850b57cec5SDimitry Andric return constructFromMIWithMMO(MI);
8860b57cec5SDimitry Andric }
8870b57cec5SDimitry Andric
888bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
getStoreInfo(const MachineBasicBlock::iterator & MI) const889bdd1243dSDimitry Andric SIMemOpAccess::getStoreInfo(const MachineBasicBlock::iterator &MI) const {
8900b57cec5SDimitry Andric assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
8910b57cec5SDimitry Andric
8920b57cec5SDimitry Andric if (!(!MI->mayLoad() && MI->mayStore()))
893bdd1243dSDimitry Andric return std::nullopt;
8940b57cec5SDimitry Andric
8950b57cec5SDimitry Andric // Be conservative if there are no memory operands.
8960b57cec5SDimitry Andric if (MI->getNumMemOperands() == 0)
8970b57cec5SDimitry Andric return SIMemOpInfo();
8980b57cec5SDimitry Andric
8990b57cec5SDimitry Andric return constructFromMIWithMMO(MI);
9000b57cec5SDimitry Andric }
9010b57cec5SDimitry Andric
902bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
getAtomicFenceInfo(const MachineBasicBlock::iterator & MI) const903bdd1243dSDimitry Andric SIMemOpAccess::getAtomicFenceInfo(const MachineBasicBlock::iterator &MI) const {
9040b57cec5SDimitry Andric assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
9050b57cec5SDimitry Andric
9060b57cec5SDimitry Andric if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
907bdd1243dSDimitry Andric return std::nullopt;
9080b57cec5SDimitry Andric
9090b57cec5SDimitry Andric AtomicOrdering Ordering =
9100b57cec5SDimitry Andric static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
9110b57cec5SDimitry Andric
9120b57cec5SDimitry Andric SyncScope::ID SSID = static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
9130b57cec5SDimitry Andric auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
9140b57cec5SDimitry Andric if (!ScopeOrNone) {
9150b57cec5SDimitry Andric reportUnsupported(MI, "Unsupported atomic synchronization scope");
916bdd1243dSDimitry Andric return std::nullopt;
9170b57cec5SDimitry Andric }
9180b57cec5SDimitry Andric
9190b57cec5SDimitry Andric SIAtomicScope Scope = SIAtomicScope::NONE;
9200b57cec5SDimitry Andric SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
9210b57cec5SDimitry Andric bool IsCrossAddressSpaceOrdering = false;
9220b57cec5SDimitry Andric std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
92381ad6265SDimitry Andric *ScopeOrNone;
9240b57cec5SDimitry Andric
9250b57cec5SDimitry Andric if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
9260b57cec5SDimitry Andric ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
9270b57cec5SDimitry Andric reportUnsupported(MI, "Unsupported atomic address space");
928bdd1243dSDimitry Andric return std::nullopt;
9290b57cec5SDimitry Andric }
9300b57cec5SDimitry Andric
9310b57cec5SDimitry Andric return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
932fe6060f1SDimitry Andric IsCrossAddressSpaceOrdering, AtomicOrdering::NotAtomic);
9330b57cec5SDimitry Andric }
9340b57cec5SDimitry Andric
getAtomicCmpxchgOrRmwInfo(const MachineBasicBlock::iterator & MI) const935bdd1243dSDimitry Andric std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
9360b57cec5SDimitry Andric const MachineBasicBlock::iterator &MI) const {
9370b57cec5SDimitry Andric assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
9380b57cec5SDimitry Andric
9390b57cec5SDimitry Andric if (!(MI->mayLoad() && MI->mayStore()))
940bdd1243dSDimitry Andric return std::nullopt;
9410b57cec5SDimitry Andric
9420b57cec5SDimitry Andric // Be conservative if there are no memory operands.
9430b57cec5SDimitry Andric if (MI->getNumMemOperands() == 0)
9440b57cec5SDimitry Andric return SIMemOpInfo();
9450b57cec5SDimitry Andric
9460b57cec5SDimitry Andric return constructFromMIWithMMO(MI);
9470b57cec5SDimitry Andric }
9480b57cec5SDimitry Andric
SICacheControl(const GCNSubtarget & ST)949e8d8bef9SDimitry Andric SICacheControl::SICacheControl(const GCNSubtarget &ST) : ST(ST) {
9500b57cec5SDimitry Andric TII = ST.getInstrInfo();
9510b57cec5SDimitry Andric IV = getIsaVersion(ST.getCPU());
952e8d8bef9SDimitry Andric InsertCacheInv = !AmdgcnSkipCacheInvalidations;
9530b57cec5SDimitry Andric }
9540b57cec5SDimitry Andric
enableNamedBit(const MachineBasicBlock::iterator MI,AMDGPU::CPol::CPol Bit) const955fe6060f1SDimitry Andric bool SICacheControl::enableNamedBit(const MachineBasicBlock::iterator MI,
956fe6060f1SDimitry Andric AMDGPU::CPol::CPol Bit) const {
957fe6060f1SDimitry Andric MachineOperand *CPol = TII->getNamedOperand(*MI, AMDGPU::OpName::cpol);
958fe6060f1SDimitry Andric if (!CPol)
959fe6060f1SDimitry Andric return false;
960fe6060f1SDimitry Andric
961fe6060f1SDimitry Andric CPol->setImm(CPol->getImm() | Bit);
962fe6060f1SDimitry Andric return true;
963fe6060f1SDimitry Andric }
964fe6060f1SDimitry Andric
9650b57cec5SDimitry Andric /* static */
create(const GCNSubtarget & ST)9660b57cec5SDimitry Andric std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
9670b57cec5SDimitry Andric GCNSubtarget::Generation Generation = ST.getGeneration();
96881ad6265SDimitry Andric if (ST.hasGFX940Insts())
96981ad6265SDimitry Andric return std::make_unique<SIGfx940CacheControl>(ST);
970fe6060f1SDimitry Andric if (ST.hasGFX90AInsts())
971fe6060f1SDimitry Andric return std::make_unique<SIGfx90ACacheControl>(ST);
9720b57cec5SDimitry Andric if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
9738bcb0991SDimitry Andric return std::make_unique<SIGfx6CacheControl>(ST);
9740b57cec5SDimitry Andric if (Generation < AMDGPUSubtarget::GFX10)
9758bcb0991SDimitry Andric return std::make_unique<SIGfx7CacheControl>(ST);
97681ad6265SDimitry Andric if (Generation < AMDGPUSubtarget::GFX11)
977e8d8bef9SDimitry Andric return std::make_unique<SIGfx10CacheControl>(ST);
9781db9f3b2SDimitry Andric if (Generation < AMDGPUSubtarget::GFX12)
97981ad6265SDimitry Andric return std::make_unique<SIGfx11CacheControl>(ST);
9801db9f3b2SDimitry Andric return std::make_unique<SIGfx12CacheControl>(ST);
9810b57cec5SDimitry Andric }
9820b57cec5SDimitry Andric
enableLoadCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const9830b57cec5SDimitry Andric bool SIGfx6CacheControl::enableLoadCacheBypass(
9840b57cec5SDimitry Andric const MachineBasicBlock::iterator &MI,
9850b57cec5SDimitry Andric SIAtomicScope Scope,
9860b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
9870b57cec5SDimitry Andric assert(MI->mayLoad() && !MI->mayStore());
9880b57cec5SDimitry Andric bool Changed = false;
9890b57cec5SDimitry Andric
9900b57cec5SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
9910b57cec5SDimitry Andric switch (Scope) {
9920b57cec5SDimitry Andric case SIAtomicScope::SYSTEM:
9930b57cec5SDimitry Andric case SIAtomicScope::AGENT:
9944824e7fdSDimitry Andric // Set L1 cache policy to MISS_EVICT.
9954824e7fdSDimitry Andric // Note: there is no L2 cache bypass policy at the ISA level.
9960b57cec5SDimitry Andric Changed |= enableGLCBit(MI);
9970b57cec5SDimitry Andric break;
9980b57cec5SDimitry Andric case SIAtomicScope::WORKGROUP:
9990b57cec5SDimitry Andric case SIAtomicScope::WAVEFRONT:
10000b57cec5SDimitry Andric case SIAtomicScope::SINGLETHREAD:
10010b57cec5SDimitry Andric // No cache to bypass.
10020b57cec5SDimitry Andric break;
10030b57cec5SDimitry Andric default:
10040b57cec5SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
10050b57cec5SDimitry Andric }
10060b57cec5SDimitry Andric }
10070b57cec5SDimitry Andric
10080b57cec5SDimitry Andric /// The scratch address space does not need the global memory caches
10090b57cec5SDimitry Andric /// to be bypassed as all memory operations by the same thread are
10100b57cec5SDimitry Andric /// sequentially consistent, and no other thread can access scratch
10110b57cec5SDimitry Andric /// memory.
10120b57cec5SDimitry Andric
1013e8d8bef9SDimitry Andric /// Other address spaces do not have a cache.
10140b57cec5SDimitry Andric
10150b57cec5SDimitry Andric return Changed;
10160b57cec5SDimitry Andric }
10170b57cec5SDimitry Andric
enableStoreCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const1018fe6060f1SDimitry Andric bool SIGfx6CacheControl::enableStoreCacheBypass(
1019fe6060f1SDimitry Andric const MachineBasicBlock::iterator &MI,
1020fe6060f1SDimitry Andric SIAtomicScope Scope,
1021fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
1022fe6060f1SDimitry Andric assert(!MI->mayLoad() && MI->mayStore());
1023fe6060f1SDimitry Andric bool Changed = false;
1024fe6060f1SDimitry Andric
1025fe6060f1SDimitry Andric /// The L1 cache is write through so does not need to be bypassed. There is no
1026fe6060f1SDimitry Andric /// bypass control for the L2 cache at the isa level.
1027fe6060f1SDimitry Andric
1028fe6060f1SDimitry Andric return Changed;
1029fe6060f1SDimitry Andric }
1030fe6060f1SDimitry Andric
enableRMWCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const1031fe6060f1SDimitry Andric bool SIGfx6CacheControl::enableRMWCacheBypass(
1032fe6060f1SDimitry Andric const MachineBasicBlock::iterator &MI,
1033fe6060f1SDimitry Andric SIAtomicScope Scope,
1034fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
1035fe6060f1SDimitry Andric assert(MI->mayLoad() && MI->mayStore());
1036fe6060f1SDimitry Andric bool Changed = false;
1037fe6060f1SDimitry Andric
10384824e7fdSDimitry Andric /// Do not set GLC for RMW atomic operations as L0/L1 cache is automatically
10394824e7fdSDimitry Andric /// bypassed, and the GLC bit is instead used to indicate if they are
10404824e7fdSDimitry Andric /// return or no-return.
10414824e7fdSDimitry Andric /// Note: there is no L2 cache coherent bypass control at the ISA level.
1042fe6060f1SDimitry Andric
1043fe6060f1SDimitry Andric return Changed;
1044fe6060f1SDimitry Andric }
1045fe6060f1SDimitry Andric
enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator & MI,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsVolatile,bool IsNonTemporal,bool IsLastUse=false) const1046e8d8bef9SDimitry Andric bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
1047e8d8bef9SDimitry Andric MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
1048*0fca6ea1SDimitry Andric bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const {
1049e8d8bef9SDimitry Andric // Only handle load and store, not atomic read-modify-write insructions. The
1050e8d8bef9SDimitry Andric // latter use glc to indicate if the atomic returns a result and so must not
1051e8d8bef9SDimitry Andric // be used for cache control.
10520b57cec5SDimitry Andric assert(MI->mayLoad() ^ MI->mayStore());
1053e8d8bef9SDimitry Andric
1054e8d8bef9SDimitry Andric // Only update load and store, not LLVM IR atomic read-modify-write
1055e8d8bef9SDimitry Andric // instructions. The latter are always marked as volatile so cannot sensibly
1056e8d8bef9SDimitry Andric // handle it as do not want to pessimize all atomics. Also they do not support
1057e8d8bef9SDimitry Andric // the nontemporal attribute.
1058e8d8bef9SDimitry Andric assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
1059e8d8bef9SDimitry Andric
10600b57cec5SDimitry Andric bool Changed = false;
10610b57cec5SDimitry Andric
1062e8d8bef9SDimitry Andric if (IsVolatile) {
10634824e7fdSDimitry Andric // Set L1 cache policy to be MISS_EVICT for load instructions
10644824e7fdSDimitry Andric // and MISS_LRU for store instructions.
10654824e7fdSDimitry Andric // Note: there is no L2 cache bypass policy at the ISA level.
1066e8d8bef9SDimitry Andric if (Op == SIMemOp::LOAD)
10670b57cec5SDimitry Andric Changed |= enableGLCBit(MI);
1068e8d8bef9SDimitry Andric
1069e8d8bef9SDimitry Andric // Ensure operation has completed at system scope to cause all volatile
1070e8d8bef9SDimitry Andric // operations to be visible outside the program in a global order. Do not
1071e8d8bef9SDimitry Andric // request cross address space as only the global address space can be
1072e8d8bef9SDimitry Andric // observable outside the program, so no need to cause a waitcnt for LDS
1073e8d8bef9SDimitry Andric // address space operations.
1074e8d8bef9SDimitry Andric Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
1075e8d8bef9SDimitry Andric Position::AFTER);
10760b57cec5SDimitry Andric
10770b57cec5SDimitry Andric return Changed;
10780b57cec5SDimitry Andric }
10790b57cec5SDimitry Andric
1080e8d8bef9SDimitry Andric if (IsNonTemporal) {
10814824e7fdSDimitry Andric // Setting both GLC and SLC configures L1 cache policy to MISS_EVICT
10824824e7fdSDimitry Andric // for both loads and stores, and the L2 cache policy to STREAM.
1083e8d8bef9SDimitry Andric Changed |= enableGLCBit(MI);
1084e8d8bef9SDimitry Andric Changed |= enableSLCBit(MI);
1085e8d8bef9SDimitry Andric return Changed;
1086e8d8bef9SDimitry Andric }
1087e8d8bef9SDimitry Andric
1088e8d8bef9SDimitry Andric return Changed;
1089e8d8bef9SDimitry Andric }
1090e8d8bef9SDimitry Andric
insertWait(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsCrossAddrSpaceOrdering,Position Pos) const1091e8d8bef9SDimitry Andric bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
1092e8d8bef9SDimitry Andric SIAtomicScope Scope,
1093e8d8bef9SDimitry Andric SIAtomicAddrSpace AddrSpace,
1094e8d8bef9SDimitry Andric SIMemOp Op,
1095e8d8bef9SDimitry Andric bool IsCrossAddrSpaceOrdering,
1096e8d8bef9SDimitry Andric Position Pos) const {
1097e8d8bef9SDimitry Andric bool Changed = false;
1098e8d8bef9SDimitry Andric
1099e8d8bef9SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
1100e8d8bef9SDimitry Andric DebugLoc DL = MI->getDebugLoc();
1101e8d8bef9SDimitry Andric
1102e8d8bef9SDimitry Andric if (Pos == Position::AFTER)
1103e8d8bef9SDimitry Andric ++MI;
1104e8d8bef9SDimitry Andric
1105e8d8bef9SDimitry Andric bool VMCnt = false;
1106e8d8bef9SDimitry Andric bool LGKMCnt = false;
1107e8d8bef9SDimitry Andric
1108e8d8bef9SDimitry Andric if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1109e8d8bef9SDimitry Andric SIAtomicAddrSpace::NONE) {
1110e8d8bef9SDimitry Andric switch (Scope) {
1111e8d8bef9SDimitry Andric case SIAtomicScope::SYSTEM:
1112e8d8bef9SDimitry Andric case SIAtomicScope::AGENT:
1113e8d8bef9SDimitry Andric VMCnt |= true;
1114e8d8bef9SDimitry Andric break;
1115e8d8bef9SDimitry Andric case SIAtomicScope::WORKGROUP:
1116e8d8bef9SDimitry Andric case SIAtomicScope::WAVEFRONT:
1117e8d8bef9SDimitry Andric case SIAtomicScope::SINGLETHREAD:
1118e8d8bef9SDimitry Andric // The L1 cache keeps all memory operations in order for
1119e8d8bef9SDimitry Andric // wavefronts in the same work-group.
1120e8d8bef9SDimitry Andric break;
1121e8d8bef9SDimitry Andric default:
1122e8d8bef9SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
1123e8d8bef9SDimitry Andric }
1124e8d8bef9SDimitry Andric }
1125e8d8bef9SDimitry Andric
1126e8d8bef9SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1127e8d8bef9SDimitry Andric switch (Scope) {
1128e8d8bef9SDimitry Andric case SIAtomicScope::SYSTEM:
1129e8d8bef9SDimitry Andric case SIAtomicScope::AGENT:
1130e8d8bef9SDimitry Andric case SIAtomicScope::WORKGROUP:
1131e8d8bef9SDimitry Andric // If no cross address space ordering then an "S_WAITCNT lgkmcnt(0)" is
1132e8d8bef9SDimitry Andric // not needed as LDS operations for all waves are executed in a total
1133e8d8bef9SDimitry Andric // global ordering as observed by all waves. Required if also
1134e8d8bef9SDimitry Andric // synchronizing with global/GDS memory as LDS operations could be
1135e8d8bef9SDimitry Andric // reordered with respect to later global/GDS memory operations of the
1136e8d8bef9SDimitry Andric // same wave.
1137e8d8bef9SDimitry Andric LGKMCnt |= IsCrossAddrSpaceOrdering;
1138e8d8bef9SDimitry Andric break;
1139e8d8bef9SDimitry Andric case SIAtomicScope::WAVEFRONT:
1140e8d8bef9SDimitry Andric case SIAtomicScope::SINGLETHREAD:
1141e8d8bef9SDimitry Andric // The LDS keeps all memory operations in order for
114281ad6265SDimitry Andric // the same wavefront.
1143e8d8bef9SDimitry Andric break;
1144e8d8bef9SDimitry Andric default:
1145e8d8bef9SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
1146e8d8bef9SDimitry Andric }
1147e8d8bef9SDimitry Andric }
1148e8d8bef9SDimitry Andric
1149e8d8bef9SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1150e8d8bef9SDimitry Andric switch (Scope) {
1151e8d8bef9SDimitry Andric case SIAtomicScope::SYSTEM:
1152e8d8bef9SDimitry Andric case SIAtomicScope::AGENT:
1153e8d8bef9SDimitry Andric // If no cross address space ordering then an GDS "S_WAITCNT lgkmcnt(0)"
1154e8d8bef9SDimitry Andric // is not needed as GDS operations for all waves are executed in a total
1155e8d8bef9SDimitry Andric // global ordering as observed by all waves. Required if also
1156e8d8bef9SDimitry Andric // synchronizing with global/LDS memory as GDS operations could be
1157e8d8bef9SDimitry Andric // reordered with respect to later global/LDS memory operations of the
1158e8d8bef9SDimitry Andric // same wave.
1159e8d8bef9SDimitry Andric LGKMCnt |= IsCrossAddrSpaceOrdering;
1160e8d8bef9SDimitry Andric break;
1161e8d8bef9SDimitry Andric case SIAtomicScope::WORKGROUP:
1162e8d8bef9SDimitry Andric case SIAtomicScope::WAVEFRONT:
1163e8d8bef9SDimitry Andric case SIAtomicScope::SINGLETHREAD:
1164e8d8bef9SDimitry Andric // The GDS keeps all memory operations in order for
1165e8d8bef9SDimitry Andric // the same work-group.
1166e8d8bef9SDimitry Andric break;
1167e8d8bef9SDimitry Andric default:
1168e8d8bef9SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
1169e8d8bef9SDimitry Andric }
1170e8d8bef9SDimitry Andric }
1171e8d8bef9SDimitry Andric
1172e8d8bef9SDimitry Andric if (VMCnt || LGKMCnt) {
1173e8d8bef9SDimitry Andric unsigned WaitCntImmediate =
1174e8d8bef9SDimitry Andric AMDGPU::encodeWaitcnt(IV,
1175e8d8bef9SDimitry Andric VMCnt ? 0 : getVmcntBitMask(IV),
1176e8d8bef9SDimitry Andric getExpcntBitMask(IV),
1177e8d8bef9SDimitry Andric LGKMCnt ? 0 : getLgkmcntBitMask(IV));
11785f757f3fSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_soft))
11795f757f3fSDimitry Andric .addImm(WaitCntImmediate);
1180e8d8bef9SDimitry Andric Changed = true;
1181e8d8bef9SDimitry Andric }
1182e8d8bef9SDimitry Andric
1183e8d8bef9SDimitry Andric if (Pos == Position::AFTER)
1184e8d8bef9SDimitry Andric --MI;
1185e8d8bef9SDimitry Andric
1186e8d8bef9SDimitry Andric return Changed;
1187e8d8bef9SDimitry Andric }
1188e8d8bef9SDimitry Andric
insertAcquire(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,Position Pos) const1189e8d8bef9SDimitry Andric bool SIGfx6CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
11900b57cec5SDimitry Andric SIAtomicScope Scope,
11910b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace,
11920b57cec5SDimitry Andric Position Pos) const {
11935ffd83dbSDimitry Andric if (!InsertCacheInv)
11945ffd83dbSDimitry Andric return false;
11955ffd83dbSDimitry Andric
11960b57cec5SDimitry Andric bool Changed = false;
11970b57cec5SDimitry Andric
11980b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
11990b57cec5SDimitry Andric DebugLoc DL = MI->getDebugLoc();
12000b57cec5SDimitry Andric
12010b57cec5SDimitry Andric if (Pos == Position::AFTER)
12020b57cec5SDimitry Andric ++MI;
12030b57cec5SDimitry Andric
12040b57cec5SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
12050b57cec5SDimitry Andric switch (Scope) {
12060b57cec5SDimitry Andric case SIAtomicScope::SYSTEM:
12070b57cec5SDimitry Andric case SIAtomicScope::AGENT:
12080b57cec5SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1));
12090b57cec5SDimitry Andric Changed = true;
12100b57cec5SDimitry Andric break;
12110b57cec5SDimitry Andric case SIAtomicScope::WORKGROUP:
12120b57cec5SDimitry Andric case SIAtomicScope::WAVEFRONT:
12130b57cec5SDimitry Andric case SIAtomicScope::SINGLETHREAD:
12140b57cec5SDimitry Andric // No cache to invalidate.
12150b57cec5SDimitry Andric break;
12160b57cec5SDimitry Andric default:
12170b57cec5SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
12180b57cec5SDimitry Andric }
12190b57cec5SDimitry Andric }
12200b57cec5SDimitry Andric
12210b57cec5SDimitry Andric /// The scratch address space does not need the global memory cache
12220b57cec5SDimitry Andric /// to be flushed as all memory operations by the same thread are
12230b57cec5SDimitry Andric /// sequentially consistent, and no other thread can access scratch
12240b57cec5SDimitry Andric /// memory.
12250b57cec5SDimitry Andric
1226e8d8bef9SDimitry Andric /// Other address spaces do not have a cache.
12270b57cec5SDimitry Andric
12280b57cec5SDimitry Andric if (Pos == Position::AFTER)
12290b57cec5SDimitry Andric --MI;
12300b57cec5SDimitry Andric
12310b57cec5SDimitry Andric return Changed;
12320b57cec5SDimitry Andric }
12330b57cec5SDimitry Andric
insertRelease(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,bool IsCrossAddrSpaceOrdering,Position Pos) const1234e8d8bef9SDimitry Andric bool SIGfx6CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
12350b57cec5SDimitry Andric SIAtomicScope Scope,
12360b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace,
12370b57cec5SDimitry Andric bool IsCrossAddrSpaceOrdering,
12380b57cec5SDimitry Andric Position Pos) const {
1239e8d8bef9SDimitry Andric return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1240e8d8bef9SDimitry Andric IsCrossAddrSpaceOrdering, Pos);
12410b57cec5SDimitry Andric }
12420b57cec5SDimitry Andric
insertAcquire(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,Position Pos) const1243e8d8bef9SDimitry Andric bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
12440b57cec5SDimitry Andric SIAtomicScope Scope,
12450b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace,
12460b57cec5SDimitry Andric Position Pos) const {
12475ffd83dbSDimitry Andric if (!InsertCacheInv)
12485ffd83dbSDimitry Andric return false;
12495ffd83dbSDimitry Andric
12500b57cec5SDimitry Andric bool Changed = false;
12510b57cec5SDimitry Andric
12520b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
12530b57cec5SDimitry Andric DebugLoc DL = MI->getDebugLoc();
12540b57cec5SDimitry Andric
12550b57cec5SDimitry Andric const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>();
12560b57cec5SDimitry Andric
1257e8d8bef9SDimitry Andric const unsigned InvalidateL1 = STM.isAmdPalOS() || STM.isMesa3DOS()
12580b57cec5SDimitry Andric ? AMDGPU::BUFFER_WBINVL1
12590b57cec5SDimitry Andric : AMDGPU::BUFFER_WBINVL1_VOL;
12600b57cec5SDimitry Andric
12610b57cec5SDimitry Andric if (Pos == Position::AFTER)
12620b57cec5SDimitry Andric ++MI;
12630b57cec5SDimitry Andric
12640b57cec5SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
12650b57cec5SDimitry Andric switch (Scope) {
12660b57cec5SDimitry Andric case SIAtomicScope::SYSTEM:
12670b57cec5SDimitry Andric case SIAtomicScope::AGENT:
1268e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, TII->get(InvalidateL1));
12690b57cec5SDimitry Andric Changed = true;
12700b57cec5SDimitry Andric break;
12710b57cec5SDimitry Andric case SIAtomicScope::WORKGROUP:
12720b57cec5SDimitry Andric case SIAtomicScope::WAVEFRONT:
12730b57cec5SDimitry Andric case SIAtomicScope::SINGLETHREAD:
12740b57cec5SDimitry Andric // No cache to invalidate.
12750b57cec5SDimitry Andric break;
12760b57cec5SDimitry Andric default:
12770b57cec5SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
12780b57cec5SDimitry Andric }
12790b57cec5SDimitry Andric }
12800b57cec5SDimitry Andric
12810b57cec5SDimitry Andric /// The scratch address space does not need the global memory cache
12820b57cec5SDimitry Andric /// to be flushed as all memory operations by the same thread are
12830b57cec5SDimitry Andric /// sequentially consistent, and no other thread can access scratch
12840b57cec5SDimitry Andric /// memory.
12850b57cec5SDimitry Andric
1286e8d8bef9SDimitry Andric /// Other address spaces do not have a cache.
12870b57cec5SDimitry Andric
12880b57cec5SDimitry Andric if (Pos == Position::AFTER)
12890b57cec5SDimitry Andric --MI;
12900b57cec5SDimitry Andric
12910b57cec5SDimitry Andric return Changed;
12920b57cec5SDimitry Andric }
12930b57cec5SDimitry Andric
enableLoadCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const1294fe6060f1SDimitry Andric bool SIGfx90ACacheControl::enableLoadCacheBypass(
1295fe6060f1SDimitry Andric const MachineBasicBlock::iterator &MI,
1296fe6060f1SDimitry Andric SIAtomicScope Scope,
1297fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
1298fe6060f1SDimitry Andric assert(MI->mayLoad() && !MI->mayStore());
1299fe6060f1SDimitry Andric bool Changed = false;
1300fe6060f1SDimitry Andric
1301fe6060f1SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1302fe6060f1SDimitry Andric switch (Scope) {
1303fe6060f1SDimitry Andric case SIAtomicScope::SYSTEM:
1304fe6060f1SDimitry Andric case SIAtomicScope::AGENT:
13054824e7fdSDimitry Andric // Set the L1 cache policy to MISS_LRU.
13064824e7fdSDimitry Andric // Note: there is no L2 cache bypass policy at the ISA level.
1307fe6060f1SDimitry Andric Changed |= enableGLCBit(MI);
1308fe6060f1SDimitry Andric break;
1309fe6060f1SDimitry Andric case SIAtomicScope::WORKGROUP:
1310fe6060f1SDimitry Andric // In threadgroup split mode the waves of a work-group can be executing on
1311fe6060f1SDimitry Andric // different CUs. Therefore need to bypass the L1 which is per CU.
1312fe6060f1SDimitry Andric // Otherwise in non-threadgroup split mode all waves of a work-group are
1313fe6060f1SDimitry Andric // on the same CU, and so the L1 does not need to be bypassed.
1314349cc55cSDimitry Andric if (ST.isTgSplitEnabled())
1315349cc55cSDimitry Andric Changed |= enableGLCBit(MI);
1316fe6060f1SDimitry Andric break;
1317fe6060f1SDimitry Andric case SIAtomicScope::WAVEFRONT:
1318fe6060f1SDimitry Andric case SIAtomicScope::SINGLETHREAD:
1319fe6060f1SDimitry Andric // No cache to bypass.
1320fe6060f1SDimitry Andric break;
1321fe6060f1SDimitry Andric default:
1322fe6060f1SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
1323fe6060f1SDimitry Andric }
1324fe6060f1SDimitry Andric }
1325fe6060f1SDimitry Andric
1326fe6060f1SDimitry Andric /// The scratch address space does not need the global memory caches
1327fe6060f1SDimitry Andric /// to be bypassed as all memory operations by the same thread are
1328fe6060f1SDimitry Andric /// sequentially consistent, and no other thread can access scratch
1329fe6060f1SDimitry Andric /// memory.
1330fe6060f1SDimitry Andric
1331fe6060f1SDimitry Andric /// Other address spaces do not have a cache.
1332fe6060f1SDimitry Andric
1333fe6060f1SDimitry Andric return Changed;
1334fe6060f1SDimitry Andric }
1335fe6060f1SDimitry Andric
enableStoreCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const1336fe6060f1SDimitry Andric bool SIGfx90ACacheControl::enableStoreCacheBypass(
1337fe6060f1SDimitry Andric const MachineBasicBlock::iterator &MI,
1338fe6060f1SDimitry Andric SIAtomicScope Scope,
1339fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
1340fe6060f1SDimitry Andric assert(!MI->mayLoad() && MI->mayStore());
1341fe6060f1SDimitry Andric bool Changed = false;
1342fe6060f1SDimitry Andric
1343fe6060f1SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1344fe6060f1SDimitry Andric switch (Scope) {
1345fe6060f1SDimitry Andric case SIAtomicScope::SYSTEM:
1346fe6060f1SDimitry Andric case SIAtomicScope::AGENT:
1347fe6060f1SDimitry Andric /// Do not set glc for store atomic operations as they implicitly write
1348fe6060f1SDimitry Andric /// through the L1 cache.
1349fe6060f1SDimitry Andric break;
1350fe6060f1SDimitry Andric case SIAtomicScope::WORKGROUP:
1351fe6060f1SDimitry Andric case SIAtomicScope::WAVEFRONT:
1352fe6060f1SDimitry Andric case SIAtomicScope::SINGLETHREAD:
1353fe6060f1SDimitry Andric // No cache to bypass. Store atomics implicitly write through the L1
1354fe6060f1SDimitry Andric // cache.
1355fe6060f1SDimitry Andric break;
1356fe6060f1SDimitry Andric default:
1357fe6060f1SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
1358fe6060f1SDimitry Andric }
1359fe6060f1SDimitry Andric }
1360fe6060f1SDimitry Andric
1361fe6060f1SDimitry Andric /// The scratch address space does not need the global memory caches
1362fe6060f1SDimitry Andric /// to be bypassed as all memory operations by the same thread are
1363fe6060f1SDimitry Andric /// sequentially consistent, and no other thread can access scratch
1364fe6060f1SDimitry Andric /// memory.
1365fe6060f1SDimitry Andric
1366fe6060f1SDimitry Andric /// Other address spaces do not have a cache.
1367fe6060f1SDimitry Andric
1368fe6060f1SDimitry Andric return Changed;
1369fe6060f1SDimitry Andric }
1370fe6060f1SDimitry Andric
enableRMWCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const1371fe6060f1SDimitry Andric bool SIGfx90ACacheControl::enableRMWCacheBypass(
1372fe6060f1SDimitry Andric const MachineBasicBlock::iterator &MI,
1373fe6060f1SDimitry Andric SIAtomicScope Scope,
1374fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
1375fe6060f1SDimitry Andric assert(MI->mayLoad() && MI->mayStore());
1376fe6060f1SDimitry Andric bool Changed = false;
1377fe6060f1SDimitry Andric
1378fe6060f1SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1379fe6060f1SDimitry Andric switch (Scope) {
1380fe6060f1SDimitry Andric case SIAtomicScope::SYSTEM:
1381fe6060f1SDimitry Andric case SIAtomicScope::AGENT:
1382fe6060f1SDimitry Andric /// Do not set glc for RMW atomic operations as they implicitly bypass
1383fe6060f1SDimitry Andric /// the L1 cache, and the glc bit is instead used to indicate if they are
1384fe6060f1SDimitry Andric /// return or no-return.
1385fe6060f1SDimitry Andric break;
1386fe6060f1SDimitry Andric case SIAtomicScope::WORKGROUP:
1387fe6060f1SDimitry Andric case SIAtomicScope::WAVEFRONT:
1388fe6060f1SDimitry Andric case SIAtomicScope::SINGLETHREAD:
1389fe6060f1SDimitry Andric // No cache to bypass. RMW atomics implicitly bypass the L1 cache.
1390fe6060f1SDimitry Andric break;
1391fe6060f1SDimitry Andric default:
1392fe6060f1SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
1393fe6060f1SDimitry Andric }
1394fe6060f1SDimitry Andric }
1395fe6060f1SDimitry Andric
1396fe6060f1SDimitry Andric return Changed;
1397fe6060f1SDimitry Andric }
1398fe6060f1SDimitry Andric
enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator & MI,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsVolatile,bool IsNonTemporal,bool IsLastUse=false) const1399fe6060f1SDimitry Andric bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
1400fe6060f1SDimitry Andric MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
1401*0fca6ea1SDimitry Andric bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const {
1402fe6060f1SDimitry Andric // Only handle load and store, not atomic read-modify-write insructions. The
1403fe6060f1SDimitry Andric // latter use glc to indicate if the atomic returns a result and so must not
1404fe6060f1SDimitry Andric // be used for cache control.
1405fe6060f1SDimitry Andric assert(MI->mayLoad() ^ MI->mayStore());
1406fe6060f1SDimitry Andric
1407fe6060f1SDimitry Andric // Only update load and store, not LLVM IR atomic read-modify-write
1408fe6060f1SDimitry Andric // instructions. The latter are always marked as volatile so cannot sensibly
1409fe6060f1SDimitry Andric // handle it as do not want to pessimize all atomics. Also they do not support
1410fe6060f1SDimitry Andric // the nontemporal attribute.
1411fe6060f1SDimitry Andric assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
1412fe6060f1SDimitry Andric
1413fe6060f1SDimitry Andric bool Changed = false;
1414fe6060f1SDimitry Andric
1415fe6060f1SDimitry Andric if (IsVolatile) {
14164824e7fdSDimitry Andric // Set L1 cache policy to be MISS_EVICT for load instructions
14174824e7fdSDimitry Andric // and MISS_LRU for store instructions.
14184824e7fdSDimitry Andric // Note: there is no L2 cache bypass policy at the ISA level.
1419349cc55cSDimitry Andric if (Op == SIMemOp::LOAD)
1420fe6060f1SDimitry Andric Changed |= enableGLCBit(MI);
1421fe6060f1SDimitry Andric
1422fe6060f1SDimitry Andric // Ensure operation has completed at system scope to cause all volatile
1423fe6060f1SDimitry Andric // operations to be visible outside the program in a global order. Do not
1424fe6060f1SDimitry Andric // request cross address space as only the global address space can be
1425fe6060f1SDimitry Andric // observable outside the program, so no need to cause a waitcnt for LDS
1426fe6060f1SDimitry Andric // address space operations.
1427fe6060f1SDimitry Andric Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
1428fe6060f1SDimitry Andric Position::AFTER);
1429fe6060f1SDimitry Andric
1430fe6060f1SDimitry Andric return Changed;
1431fe6060f1SDimitry Andric }
1432fe6060f1SDimitry Andric
1433fe6060f1SDimitry Andric if (IsNonTemporal) {
14344824e7fdSDimitry Andric // Setting both GLC and SLC configures L1 cache policy to MISS_EVICT
14354824e7fdSDimitry Andric // for both loads and stores, and the L2 cache policy to STREAM.
1436fe6060f1SDimitry Andric Changed |= enableGLCBit(MI);
1437fe6060f1SDimitry Andric Changed |= enableSLCBit(MI);
1438fe6060f1SDimitry Andric return Changed;
1439fe6060f1SDimitry Andric }
1440fe6060f1SDimitry Andric
1441fe6060f1SDimitry Andric return Changed;
1442fe6060f1SDimitry Andric }
1443fe6060f1SDimitry Andric
insertWait(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsCrossAddrSpaceOrdering,Position Pos) const1444fe6060f1SDimitry Andric bool SIGfx90ACacheControl::insertWait(MachineBasicBlock::iterator &MI,
1445fe6060f1SDimitry Andric SIAtomicScope Scope,
1446fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace,
1447fe6060f1SDimitry Andric SIMemOp Op,
1448fe6060f1SDimitry Andric bool IsCrossAddrSpaceOrdering,
1449fe6060f1SDimitry Andric Position Pos) const {
1450fe6060f1SDimitry Andric if (ST.isTgSplitEnabled()) {
1451fe6060f1SDimitry Andric // In threadgroup split mode the waves of a work-group can be executing on
1452fe6060f1SDimitry Andric // different CUs. Therefore need to wait for global or GDS memory operations
1453fe6060f1SDimitry Andric // to complete to ensure they are visible to waves in the other CUs.
1454fe6060f1SDimitry Andric // Otherwise in non-threadgroup split mode all waves of a work-group are on
1455fe6060f1SDimitry Andric // the same CU, so no need to wait for global memory as all waves in the
1456fe6060f1SDimitry Andric // work-group access the same the L1, nor wait for GDS as access are ordered
1457fe6060f1SDimitry Andric // on a CU.
1458fe6060f1SDimitry Andric if (((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH |
1459fe6060f1SDimitry Andric SIAtomicAddrSpace::GDS)) != SIAtomicAddrSpace::NONE) &&
1460fe6060f1SDimitry Andric (Scope == SIAtomicScope::WORKGROUP)) {
1461fe6060f1SDimitry Andric // Same as GFX7 using agent scope.
1462fe6060f1SDimitry Andric Scope = SIAtomicScope::AGENT;
1463fe6060f1SDimitry Andric }
1464fe6060f1SDimitry Andric // In threadgroup split mode LDS cannot be allocated so no need to wait for
1465fe6060f1SDimitry Andric // LDS memory operations.
1466fe6060f1SDimitry Andric AddrSpace &= ~SIAtomicAddrSpace::LDS;
1467fe6060f1SDimitry Andric }
1468fe6060f1SDimitry Andric return SIGfx7CacheControl::insertWait(MI, Scope, AddrSpace, Op,
1469fe6060f1SDimitry Andric IsCrossAddrSpaceOrdering, Pos);
1470fe6060f1SDimitry Andric }
1471fe6060f1SDimitry Andric
insertAcquire(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,Position Pos) const1472fe6060f1SDimitry Andric bool SIGfx90ACacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
1473fe6060f1SDimitry Andric SIAtomicScope Scope,
1474fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace,
1475fe6060f1SDimitry Andric Position Pos) const {
1476fe6060f1SDimitry Andric if (!InsertCacheInv)
1477fe6060f1SDimitry Andric return false;
1478fe6060f1SDimitry Andric
1479fe6060f1SDimitry Andric bool Changed = false;
1480fe6060f1SDimitry Andric
1481fe6060f1SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
1482fe6060f1SDimitry Andric DebugLoc DL = MI->getDebugLoc();
1483fe6060f1SDimitry Andric
1484fe6060f1SDimitry Andric if (Pos == Position::AFTER)
1485fe6060f1SDimitry Andric ++MI;
1486fe6060f1SDimitry Andric
1487fe6060f1SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1488fe6060f1SDimitry Andric switch (Scope) {
1489fe6060f1SDimitry Andric case SIAtomicScope::SYSTEM:
1490fe6060f1SDimitry Andric // Ensures that following loads will not see stale remote VMEM data or
1491fe6060f1SDimitry Andric // stale local VMEM data with MTYPE NC. Local VMEM data with MTYPE RW and
1492fe6060f1SDimitry Andric // CC will never be stale due to the local memory probes.
1493fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INVL2));
1494fe6060f1SDimitry Andric // Inserting a "S_WAITCNT vmcnt(0)" after is not required because the
1495fe6060f1SDimitry Andric // hardware does not reorder memory operations by the same wave with
1496fe6060f1SDimitry Andric // respect to a preceding "BUFFER_INVL2". The invalidate is guaranteed to
1497fe6060f1SDimitry Andric // remove any cache lines of earlier writes by the same wave and ensures
1498fe6060f1SDimitry Andric // later reads by the same wave will refetch the cache lines.
1499fe6060f1SDimitry Andric Changed = true;
1500fe6060f1SDimitry Andric break;
1501fe6060f1SDimitry Andric case SIAtomicScope::AGENT:
1502fe6060f1SDimitry Andric // Same as GFX7.
1503fe6060f1SDimitry Andric break;
1504fe6060f1SDimitry Andric case SIAtomicScope::WORKGROUP:
1505fe6060f1SDimitry Andric // In threadgroup split mode the waves of a work-group can be executing on
1506fe6060f1SDimitry Andric // different CUs. Therefore need to invalidate the L1 which is per CU.
1507fe6060f1SDimitry Andric // Otherwise in non-threadgroup split mode all waves of a work-group are
1508fe6060f1SDimitry Andric // on the same CU, and so the L1 does not need to be invalidated.
1509fe6060f1SDimitry Andric if (ST.isTgSplitEnabled()) {
1510fe6060f1SDimitry Andric // Same as GFX7 using agent scope.
1511fe6060f1SDimitry Andric Scope = SIAtomicScope::AGENT;
1512fe6060f1SDimitry Andric }
1513fe6060f1SDimitry Andric break;
1514fe6060f1SDimitry Andric case SIAtomicScope::WAVEFRONT:
1515fe6060f1SDimitry Andric case SIAtomicScope::SINGLETHREAD:
1516fe6060f1SDimitry Andric // Same as GFX7.
1517fe6060f1SDimitry Andric break;
1518fe6060f1SDimitry Andric default:
1519fe6060f1SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
1520fe6060f1SDimitry Andric }
1521fe6060f1SDimitry Andric }
1522fe6060f1SDimitry Andric
1523fe6060f1SDimitry Andric /// The scratch address space does not need the global memory cache
1524fe6060f1SDimitry Andric /// to be flushed as all memory operations by the same thread are
1525fe6060f1SDimitry Andric /// sequentially consistent, and no other thread can access scratch
1526fe6060f1SDimitry Andric /// memory.
1527fe6060f1SDimitry Andric
1528fe6060f1SDimitry Andric /// Other address spaces do not have a cache.
1529fe6060f1SDimitry Andric
1530fe6060f1SDimitry Andric if (Pos == Position::AFTER)
1531fe6060f1SDimitry Andric --MI;
1532fe6060f1SDimitry Andric
1533fe6060f1SDimitry Andric Changed |= SIGfx7CacheControl::insertAcquire(MI, Scope, AddrSpace, Pos);
1534fe6060f1SDimitry Andric
1535fe6060f1SDimitry Andric return Changed;
1536fe6060f1SDimitry Andric }
1537fe6060f1SDimitry Andric
insertRelease(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,bool IsCrossAddrSpaceOrdering,Position Pos) const1538fe6060f1SDimitry Andric bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI,
1539fe6060f1SDimitry Andric SIAtomicScope Scope,
1540fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace,
1541fe6060f1SDimitry Andric bool IsCrossAddrSpaceOrdering,
1542fe6060f1SDimitry Andric Position Pos) const {
1543fe6060f1SDimitry Andric bool Changed = false;
1544fe6060f1SDimitry Andric
1545fe6060f1SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
15461db9f3b2SDimitry Andric const DebugLoc &DL = MI->getDebugLoc();
1547fe6060f1SDimitry Andric
1548fe6060f1SDimitry Andric if (Pos == Position::AFTER)
1549fe6060f1SDimitry Andric ++MI;
1550fe6060f1SDimitry Andric
1551fe6060f1SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1552fe6060f1SDimitry Andric switch (Scope) {
1553fe6060f1SDimitry Andric case SIAtomicScope::SYSTEM:
1554fe6060f1SDimitry Andric // Inserting a "S_WAITCNT vmcnt(0)" before is not required because the
1555fe6060f1SDimitry Andric // hardware does not reorder memory operations by the same wave with
1556fe6060f1SDimitry Andric // respect to a following "BUFFER_WBL2". The "BUFFER_WBL2" is guaranteed
1557fe6060f1SDimitry Andric // to initiate writeback of any dirty cache lines of earlier writes by the
1558fe6060f1SDimitry Andric // same wave. A "S_WAITCNT vmcnt(0)" is needed after to ensure the
1559fe6060f1SDimitry Andric // writeback has completed.
156081ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
156181ad6265SDimitry Andric // Set SC bits to indicate system scope.
156281ad6265SDimitry Andric .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
1563fe6060f1SDimitry Andric // Followed by same as GFX7, which will ensure the necessary "S_WAITCNT
1564fe6060f1SDimitry Andric // vmcnt(0)" needed by the "BUFFER_WBL2".
1565fe6060f1SDimitry Andric Changed = true;
1566fe6060f1SDimitry Andric break;
1567fe6060f1SDimitry Andric case SIAtomicScope::AGENT:
1568fe6060f1SDimitry Andric case SIAtomicScope::WORKGROUP:
1569fe6060f1SDimitry Andric case SIAtomicScope::WAVEFRONT:
1570fe6060f1SDimitry Andric case SIAtomicScope::SINGLETHREAD:
1571fe6060f1SDimitry Andric // Same as GFX7.
1572fe6060f1SDimitry Andric break;
1573fe6060f1SDimitry Andric default:
1574fe6060f1SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
1575fe6060f1SDimitry Andric }
1576fe6060f1SDimitry Andric }
1577fe6060f1SDimitry Andric
1578fe6060f1SDimitry Andric if (Pos == Position::AFTER)
1579fe6060f1SDimitry Andric --MI;
1580fe6060f1SDimitry Andric
1581fe6060f1SDimitry Andric Changed |=
1582fe6060f1SDimitry Andric SIGfx7CacheControl::insertRelease(MI, Scope, AddrSpace,
1583fe6060f1SDimitry Andric IsCrossAddrSpaceOrdering, Pos);
1584fe6060f1SDimitry Andric
1585fe6060f1SDimitry Andric return Changed;
1586fe6060f1SDimitry Andric }
1587fe6060f1SDimitry Andric
enableLoadCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const158881ad6265SDimitry Andric bool SIGfx940CacheControl::enableLoadCacheBypass(
158981ad6265SDimitry Andric const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
159081ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
159181ad6265SDimitry Andric assert(MI->mayLoad() && !MI->mayStore());
159281ad6265SDimitry Andric bool Changed = false;
159381ad6265SDimitry Andric
159481ad6265SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
159581ad6265SDimitry Andric switch (Scope) {
159681ad6265SDimitry Andric case SIAtomicScope::SYSTEM:
159781ad6265SDimitry Andric // Set SC bits to indicate system scope.
159881ad6265SDimitry Andric Changed |= enableSC0Bit(MI);
159981ad6265SDimitry Andric Changed |= enableSC1Bit(MI);
160081ad6265SDimitry Andric break;
160181ad6265SDimitry Andric case SIAtomicScope::AGENT:
160281ad6265SDimitry Andric // Set SC bits to indicate agent scope.
160381ad6265SDimitry Andric Changed |= enableSC1Bit(MI);
160481ad6265SDimitry Andric break;
160581ad6265SDimitry Andric case SIAtomicScope::WORKGROUP:
160681ad6265SDimitry Andric // In threadgroup split mode the waves of a work-group can be executing on
160781ad6265SDimitry Andric // different CUs. Therefore need to bypass the L1 which is per CU.
160881ad6265SDimitry Andric // Otherwise in non-threadgroup split mode all waves of a work-group are
160981ad6265SDimitry Andric // on the same CU, and so the L1 does not need to be bypassed. Setting SC
161081ad6265SDimitry Andric // bits to indicate work-group scope will do this automatically.
161181ad6265SDimitry Andric Changed |= enableSC0Bit(MI);
161281ad6265SDimitry Andric break;
161381ad6265SDimitry Andric case SIAtomicScope::WAVEFRONT:
161481ad6265SDimitry Andric case SIAtomicScope::SINGLETHREAD:
161581ad6265SDimitry Andric // Leave SC bits unset to indicate wavefront scope.
161681ad6265SDimitry Andric break;
161781ad6265SDimitry Andric default:
161881ad6265SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
161981ad6265SDimitry Andric }
162081ad6265SDimitry Andric }
162181ad6265SDimitry Andric
162281ad6265SDimitry Andric /// The scratch address space does not need the global memory caches
162381ad6265SDimitry Andric /// to be bypassed as all memory operations by the same thread are
162481ad6265SDimitry Andric /// sequentially consistent, and no other thread can access scratch
162581ad6265SDimitry Andric /// memory.
162681ad6265SDimitry Andric
162781ad6265SDimitry Andric /// Other address spaces do not have a cache.
162881ad6265SDimitry Andric
162981ad6265SDimitry Andric return Changed;
163081ad6265SDimitry Andric }
163181ad6265SDimitry Andric
enableStoreCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const163281ad6265SDimitry Andric bool SIGfx940CacheControl::enableStoreCacheBypass(
163381ad6265SDimitry Andric const MachineBasicBlock::iterator &MI,
163481ad6265SDimitry Andric SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const {
163581ad6265SDimitry Andric assert(!MI->mayLoad() && MI->mayStore());
163681ad6265SDimitry Andric bool Changed = false;
163781ad6265SDimitry Andric
163881ad6265SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
163981ad6265SDimitry Andric switch (Scope) {
164081ad6265SDimitry Andric case SIAtomicScope::SYSTEM:
164181ad6265SDimitry Andric // Set SC bits to indicate system scope.
164281ad6265SDimitry Andric Changed |= enableSC0Bit(MI);
164381ad6265SDimitry Andric Changed |= enableSC1Bit(MI);
164481ad6265SDimitry Andric break;
164581ad6265SDimitry Andric case SIAtomicScope::AGENT:
164681ad6265SDimitry Andric // Set SC bits to indicate agent scope.
164781ad6265SDimitry Andric Changed |= enableSC1Bit(MI);
164881ad6265SDimitry Andric break;
164981ad6265SDimitry Andric case SIAtomicScope::WORKGROUP:
165081ad6265SDimitry Andric // Set SC bits to indicate workgroup scope.
165181ad6265SDimitry Andric Changed |= enableSC0Bit(MI);
165281ad6265SDimitry Andric break;
165381ad6265SDimitry Andric case SIAtomicScope::WAVEFRONT:
165481ad6265SDimitry Andric case SIAtomicScope::SINGLETHREAD:
165581ad6265SDimitry Andric // Leave SC bits unset to indicate wavefront scope.
165681ad6265SDimitry Andric break;
165781ad6265SDimitry Andric default:
165881ad6265SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
165981ad6265SDimitry Andric }
166081ad6265SDimitry Andric }
166181ad6265SDimitry Andric
166281ad6265SDimitry Andric /// The scratch address space does not need the global memory caches
166381ad6265SDimitry Andric /// to be bypassed as all memory operations by the same thread are
166481ad6265SDimitry Andric /// sequentially consistent, and no other thread can access scratch
166581ad6265SDimitry Andric /// memory.
166681ad6265SDimitry Andric
166781ad6265SDimitry Andric /// Other address spaces do not have a cache.
166881ad6265SDimitry Andric
166981ad6265SDimitry Andric return Changed;
167081ad6265SDimitry Andric }
167181ad6265SDimitry Andric
enableRMWCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const167281ad6265SDimitry Andric bool SIGfx940CacheControl::enableRMWCacheBypass(
167381ad6265SDimitry Andric const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
167481ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
167581ad6265SDimitry Andric assert(MI->mayLoad() && MI->mayStore());
167681ad6265SDimitry Andric bool Changed = false;
167781ad6265SDimitry Andric
167881ad6265SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
167981ad6265SDimitry Andric switch (Scope) {
168081ad6265SDimitry Andric case SIAtomicScope::SYSTEM:
168181ad6265SDimitry Andric // Set SC1 bit to indicate system scope.
168281ad6265SDimitry Andric Changed |= enableSC1Bit(MI);
168381ad6265SDimitry Andric break;
168481ad6265SDimitry Andric case SIAtomicScope::AGENT:
168581ad6265SDimitry Andric case SIAtomicScope::WORKGROUP:
168681ad6265SDimitry Andric case SIAtomicScope::WAVEFRONT:
168781ad6265SDimitry Andric case SIAtomicScope::SINGLETHREAD:
168881ad6265SDimitry Andric // RMW atomic operations implicitly bypass the L1 cache and only use SC1
168981ad6265SDimitry Andric // to indicate system or agent scope. The SC0 bit is used to indicate if
169081ad6265SDimitry Andric // they are return or no-return. Leave SC1 bit unset to indicate agent
169181ad6265SDimitry Andric // scope.
169281ad6265SDimitry Andric break;
169381ad6265SDimitry Andric default:
169481ad6265SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
169581ad6265SDimitry Andric }
169681ad6265SDimitry Andric }
169781ad6265SDimitry Andric
169881ad6265SDimitry Andric return Changed;
169981ad6265SDimitry Andric }
170081ad6265SDimitry Andric
enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator & MI,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsVolatile,bool IsNonTemporal,bool IsLastUse=false) const170181ad6265SDimitry Andric bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
170281ad6265SDimitry Andric MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
1703*0fca6ea1SDimitry Andric bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const {
170481ad6265SDimitry Andric // Only handle load and store, not atomic read-modify-write insructions. The
170581ad6265SDimitry Andric // latter use glc to indicate if the atomic returns a result and so must not
170681ad6265SDimitry Andric // be used for cache control.
170781ad6265SDimitry Andric assert(MI->mayLoad() ^ MI->mayStore());
170881ad6265SDimitry Andric
170981ad6265SDimitry Andric // Only update load and store, not LLVM IR atomic read-modify-write
171081ad6265SDimitry Andric // instructions. The latter are always marked as volatile so cannot sensibly
171181ad6265SDimitry Andric // handle it as do not want to pessimize all atomics. Also they do not support
171281ad6265SDimitry Andric // the nontemporal attribute.
171381ad6265SDimitry Andric assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
171481ad6265SDimitry Andric
171581ad6265SDimitry Andric bool Changed = false;
171681ad6265SDimitry Andric
171781ad6265SDimitry Andric if (IsVolatile) {
171881ad6265SDimitry Andric // Set SC bits to indicate system scope.
171981ad6265SDimitry Andric Changed |= enableSC0Bit(MI);
172081ad6265SDimitry Andric Changed |= enableSC1Bit(MI);
172181ad6265SDimitry Andric
172281ad6265SDimitry Andric // Ensure operation has completed at system scope to cause all volatile
172381ad6265SDimitry Andric // operations to be visible outside the program in a global order. Do not
172481ad6265SDimitry Andric // request cross address space as only the global address space can be
172581ad6265SDimitry Andric // observable outside the program, so no need to cause a waitcnt for LDS
172681ad6265SDimitry Andric // address space operations.
172781ad6265SDimitry Andric Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
172881ad6265SDimitry Andric Position::AFTER);
172981ad6265SDimitry Andric
173081ad6265SDimitry Andric return Changed;
173181ad6265SDimitry Andric }
173281ad6265SDimitry Andric
173381ad6265SDimitry Andric if (IsNonTemporal) {
173481ad6265SDimitry Andric Changed |= enableNTBit(MI);
173581ad6265SDimitry Andric return Changed;
173681ad6265SDimitry Andric }
173781ad6265SDimitry Andric
173881ad6265SDimitry Andric return Changed;
173981ad6265SDimitry Andric }
174081ad6265SDimitry Andric
insertAcquire(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,Position Pos) const174181ad6265SDimitry Andric bool SIGfx940CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
174281ad6265SDimitry Andric SIAtomicScope Scope,
174381ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace,
174481ad6265SDimitry Andric Position Pos) const {
174581ad6265SDimitry Andric if (!InsertCacheInv)
174681ad6265SDimitry Andric return false;
174781ad6265SDimitry Andric
174881ad6265SDimitry Andric bool Changed = false;
174981ad6265SDimitry Andric
175081ad6265SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
175181ad6265SDimitry Andric DebugLoc DL = MI->getDebugLoc();
175281ad6265SDimitry Andric
175381ad6265SDimitry Andric if (Pos == Position::AFTER)
175481ad6265SDimitry Andric ++MI;
175581ad6265SDimitry Andric
175681ad6265SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
175781ad6265SDimitry Andric switch (Scope) {
175881ad6265SDimitry Andric case SIAtomicScope::SYSTEM:
175981ad6265SDimitry Andric // Ensures that following loads will not see stale remote VMEM data or
176081ad6265SDimitry Andric // stale local VMEM data with MTYPE NC. Local VMEM data with MTYPE RW and
176181ad6265SDimitry Andric // CC will never be stale due to the local memory probes.
176281ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV))
176381ad6265SDimitry Andric // Set SC bits to indicate system scope.
176481ad6265SDimitry Andric .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
176581ad6265SDimitry Andric // Inserting a "S_WAITCNT vmcnt(0)" after is not required because the
176681ad6265SDimitry Andric // hardware does not reorder memory operations by the same wave with
176781ad6265SDimitry Andric // respect to a preceding "BUFFER_INV". The invalidate is guaranteed to
176881ad6265SDimitry Andric // remove any cache lines of earlier writes by the same wave and ensures
176981ad6265SDimitry Andric // later reads by the same wave will refetch the cache lines.
177081ad6265SDimitry Andric Changed = true;
177181ad6265SDimitry Andric break;
177281ad6265SDimitry Andric case SIAtomicScope::AGENT:
177381ad6265SDimitry Andric // Ensures that following loads will not see stale remote date or local
177481ad6265SDimitry Andric // MTYPE NC global data. Local MTYPE RW and CC memory will never be stale
177581ad6265SDimitry Andric // due to the memory probes.
177681ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV))
177781ad6265SDimitry Andric // Set SC bits to indicate agent scope.
177881ad6265SDimitry Andric .addImm(AMDGPU::CPol::SC1);
177981ad6265SDimitry Andric // Inserting "S_WAITCNT vmcnt(0)" is not required because the hardware
178081ad6265SDimitry Andric // does not reorder memory operations with respect to preceeding buffer
178181ad6265SDimitry Andric // invalidate. The invalidate is guaranteed to remove any cache lines of
178281ad6265SDimitry Andric // earlier writes and ensures later writes will refetch the cache lines.
178381ad6265SDimitry Andric Changed = true;
178481ad6265SDimitry Andric break;
178581ad6265SDimitry Andric case SIAtomicScope::WORKGROUP:
178681ad6265SDimitry Andric // In threadgroup split mode the waves of a work-group can be executing on
178781ad6265SDimitry Andric // different CUs. Therefore need to invalidate the L1 which is per CU.
178881ad6265SDimitry Andric // Otherwise in non-threadgroup split mode all waves of a work-group are
178981ad6265SDimitry Andric // on the same CU, and so the L1 does not need to be invalidated.
179081ad6265SDimitry Andric if (ST.isTgSplitEnabled()) {
179181ad6265SDimitry Andric // Ensures L1 is invalidated if in threadgroup split mode. In
179281ad6265SDimitry Andric // non-threadgroup split mode it is a NOP, but no point generating it in
179381ad6265SDimitry Andric // that case if know not in that mode.
179481ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV))
179581ad6265SDimitry Andric // Set SC bits to indicate work-group scope.
179681ad6265SDimitry Andric .addImm(AMDGPU::CPol::SC0);
179781ad6265SDimitry Andric // Inserting "S_WAITCNT vmcnt(0)" is not required because the hardware
179881ad6265SDimitry Andric // does not reorder memory operations with respect to preceeding buffer
179981ad6265SDimitry Andric // invalidate. The invalidate is guaranteed to remove any cache lines of
180081ad6265SDimitry Andric // earlier writes and ensures later writes will refetch the cache lines.
180181ad6265SDimitry Andric Changed = true;
180281ad6265SDimitry Andric }
180381ad6265SDimitry Andric break;
180481ad6265SDimitry Andric case SIAtomicScope::WAVEFRONT:
180581ad6265SDimitry Andric case SIAtomicScope::SINGLETHREAD:
180681ad6265SDimitry Andric // Could generate "BUFFER_INV" but it would do nothing as there are no
180781ad6265SDimitry Andric // caches to invalidate.
180881ad6265SDimitry Andric break;
180981ad6265SDimitry Andric default:
181081ad6265SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
181181ad6265SDimitry Andric }
181281ad6265SDimitry Andric }
181381ad6265SDimitry Andric
181481ad6265SDimitry Andric /// The scratch address space does not need the global memory cache
181581ad6265SDimitry Andric /// to be flushed as all memory operations by the same thread are
181681ad6265SDimitry Andric /// sequentially consistent, and no other thread can access scratch
181781ad6265SDimitry Andric /// memory.
181881ad6265SDimitry Andric
181981ad6265SDimitry Andric /// Other address spaces do not have a cache.
182081ad6265SDimitry Andric
182181ad6265SDimitry Andric if (Pos == Position::AFTER)
182281ad6265SDimitry Andric --MI;
182381ad6265SDimitry Andric
182481ad6265SDimitry Andric return Changed;
182581ad6265SDimitry Andric }
182681ad6265SDimitry Andric
insertRelease(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,bool IsCrossAddrSpaceOrdering,Position Pos) const182781ad6265SDimitry Andric bool SIGfx940CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
182881ad6265SDimitry Andric SIAtomicScope Scope,
182981ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace,
183081ad6265SDimitry Andric bool IsCrossAddrSpaceOrdering,
183181ad6265SDimitry Andric Position Pos) const {
183281ad6265SDimitry Andric bool Changed = false;
183381ad6265SDimitry Andric
183481ad6265SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
183581ad6265SDimitry Andric DebugLoc DL = MI->getDebugLoc();
183681ad6265SDimitry Andric
183781ad6265SDimitry Andric if (Pos == Position::AFTER)
183881ad6265SDimitry Andric ++MI;
183981ad6265SDimitry Andric
184081ad6265SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
184181ad6265SDimitry Andric switch (Scope) {
184281ad6265SDimitry Andric case SIAtomicScope::SYSTEM:
184381ad6265SDimitry Andric // Inserting a "S_WAITCNT vmcnt(0)" before is not required because the
184481ad6265SDimitry Andric // hardware does not reorder memory operations by the same wave with
184581ad6265SDimitry Andric // respect to a following "BUFFER_WBL2". The "BUFFER_WBL2" is guaranteed
184681ad6265SDimitry Andric // to initiate writeback of any dirty cache lines of earlier writes by the
184781ad6265SDimitry Andric // same wave. A "S_WAITCNT vmcnt(0)" is needed after to ensure the
184881ad6265SDimitry Andric // writeback has completed.
184981ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
185081ad6265SDimitry Andric // Set SC bits to indicate system scope.
185181ad6265SDimitry Andric .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
185281ad6265SDimitry Andric // Since AddrSpace contains SIAtomicAddrSpace::GLOBAL and Scope is
185381ad6265SDimitry Andric // SIAtomicScope::SYSTEM, the following insertWait will generate the
185481ad6265SDimitry Andric // required "S_WAITCNT vmcnt(0)" needed by the "BUFFER_WBL2".
185581ad6265SDimitry Andric Changed = true;
185681ad6265SDimitry Andric break;
185781ad6265SDimitry Andric case SIAtomicScope::AGENT:
185881ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
185981ad6265SDimitry Andric // Set SC bits to indicate agent scope.
186081ad6265SDimitry Andric .addImm(AMDGPU::CPol::SC1);
186181ad6265SDimitry Andric
186281ad6265SDimitry Andric // Since AddrSpace contains SIAtomicAddrSpace::GLOBAL and Scope is
186381ad6265SDimitry Andric // SIAtomicScope::AGENT, the following insertWait will generate the
186481ad6265SDimitry Andric // required "S_WAITCNT vmcnt(0)".
186581ad6265SDimitry Andric Changed = true;
186681ad6265SDimitry Andric break;
186781ad6265SDimitry Andric case SIAtomicScope::WORKGROUP:
186881ad6265SDimitry Andric case SIAtomicScope::WAVEFRONT:
186981ad6265SDimitry Andric case SIAtomicScope::SINGLETHREAD:
187081ad6265SDimitry Andric // Do not generate "BUFFER_WBL2" as there are no caches it would
187181ad6265SDimitry Andric // writeback, and would require an otherwise unnecessary
187281ad6265SDimitry Andric // "S_WAITCNT vmcnt(0)".
187381ad6265SDimitry Andric break;
187481ad6265SDimitry Andric default:
187581ad6265SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
187681ad6265SDimitry Andric }
187781ad6265SDimitry Andric }
187881ad6265SDimitry Andric
187981ad6265SDimitry Andric if (Pos == Position::AFTER)
188081ad6265SDimitry Andric --MI;
188181ad6265SDimitry Andric
188281ad6265SDimitry Andric // Ensure the necessary S_WAITCNT needed by any "BUFFER_WBL2" as well as other
188381ad6265SDimitry Andric // S_WAITCNT needed.
188481ad6265SDimitry Andric Changed |= insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
188581ad6265SDimitry Andric IsCrossAddrSpaceOrdering, Pos);
188681ad6265SDimitry Andric
188781ad6265SDimitry Andric return Changed;
188881ad6265SDimitry Andric }
188981ad6265SDimitry Andric
enableLoadCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const18900b57cec5SDimitry Andric bool SIGfx10CacheControl::enableLoadCacheBypass(
18910b57cec5SDimitry Andric const MachineBasicBlock::iterator &MI,
18920b57cec5SDimitry Andric SIAtomicScope Scope,
18930b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
18940b57cec5SDimitry Andric assert(MI->mayLoad() && !MI->mayStore());
18950b57cec5SDimitry Andric bool Changed = false;
18960b57cec5SDimitry Andric
18970b57cec5SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
18980b57cec5SDimitry Andric switch (Scope) {
18990b57cec5SDimitry Andric case SIAtomicScope::SYSTEM:
19000b57cec5SDimitry Andric case SIAtomicScope::AGENT:
19014824e7fdSDimitry Andric // Set the L0 and L1 cache policies to MISS_EVICT.
19024824e7fdSDimitry Andric // Note: there is no L2 cache coherent bypass control at the ISA level.
19030b57cec5SDimitry Andric Changed |= enableGLCBit(MI);
19040b57cec5SDimitry Andric Changed |= enableDLCBit(MI);
19050b57cec5SDimitry Andric break;
19060b57cec5SDimitry Andric case SIAtomicScope::WORKGROUP:
19070b57cec5SDimitry Andric // In WGP mode the waves of a work-group can be executing on either CU of
19080b57cec5SDimitry Andric // the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
1909e8d8bef9SDimitry Andric // CU mode all waves of a work-group are on the same CU, and so the L0
1910e8d8bef9SDimitry Andric // does not need to be bypassed.
1911349cc55cSDimitry Andric if (!ST.isCuModeEnabled())
1912349cc55cSDimitry Andric Changed |= enableGLCBit(MI);
19130b57cec5SDimitry Andric break;
19140b57cec5SDimitry Andric case SIAtomicScope::WAVEFRONT:
19150b57cec5SDimitry Andric case SIAtomicScope::SINGLETHREAD:
19160b57cec5SDimitry Andric // No cache to bypass.
19170b57cec5SDimitry Andric break;
19180b57cec5SDimitry Andric default:
19190b57cec5SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
19200b57cec5SDimitry Andric }
19210b57cec5SDimitry Andric }
19220b57cec5SDimitry Andric
19230b57cec5SDimitry Andric /// The scratch address space does not need the global memory caches
19240b57cec5SDimitry Andric /// to be bypassed as all memory operations by the same thread are
19250b57cec5SDimitry Andric /// sequentially consistent, and no other thread can access scratch
19260b57cec5SDimitry Andric /// memory.
19270b57cec5SDimitry Andric
1928e8d8bef9SDimitry Andric /// Other address spaces do not have a cache.
19290b57cec5SDimitry Andric
19300b57cec5SDimitry Andric return Changed;
19310b57cec5SDimitry Andric }
19320b57cec5SDimitry Andric
enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator & MI,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsVolatile,bool IsNonTemporal,bool IsLastUse=false) const1933e8d8bef9SDimitry Andric bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
1934e8d8bef9SDimitry Andric MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
1935*0fca6ea1SDimitry Andric bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const {
1936e8d8bef9SDimitry Andric
1937e8d8bef9SDimitry Andric // Only handle load and store, not atomic read-modify-write insructions. The
1938e8d8bef9SDimitry Andric // latter use glc to indicate if the atomic returns a result and so must not
1939e8d8bef9SDimitry Andric // be used for cache control.
19400b57cec5SDimitry Andric assert(MI->mayLoad() ^ MI->mayStore());
1941e8d8bef9SDimitry Andric
1942e8d8bef9SDimitry Andric // Only update load and store, not LLVM IR atomic read-modify-write
1943e8d8bef9SDimitry Andric // instructions. The latter are always marked as volatile so cannot sensibly
1944e8d8bef9SDimitry Andric // handle it as do not want to pessimize all atomics. Also they do not support
1945e8d8bef9SDimitry Andric // the nontemporal attribute.
1946e8d8bef9SDimitry Andric assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
1947e8d8bef9SDimitry Andric
19480b57cec5SDimitry Andric bool Changed = false;
19490b57cec5SDimitry Andric
1950e8d8bef9SDimitry Andric if (IsVolatile) {
19514824e7fdSDimitry Andric // Set L0 and L1 cache policy to be MISS_EVICT for load instructions
19524824e7fdSDimitry Andric // and MISS_LRU for store instructions.
19534824e7fdSDimitry Andric // Note: there is no L2 cache coherent bypass control at the ISA level.
1954e8d8bef9SDimitry Andric if (Op == SIMemOp::LOAD) {
1955e8d8bef9SDimitry Andric Changed |= enableGLCBit(MI);
1956e8d8bef9SDimitry Andric Changed |= enableDLCBit(MI);
1957e8d8bef9SDimitry Andric }
1958e8d8bef9SDimitry Andric
1959e8d8bef9SDimitry Andric // Ensure operation has completed at system scope to cause all volatile
1960e8d8bef9SDimitry Andric // operations to be visible outside the program in a global order. Do not
1961e8d8bef9SDimitry Andric // request cross address space as only the global address space can be
1962e8d8bef9SDimitry Andric // observable outside the program, so no need to cause a waitcnt for LDS
1963e8d8bef9SDimitry Andric // address space operations.
1964e8d8bef9SDimitry Andric Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
1965e8d8bef9SDimitry Andric Position::AFTER);
19660b57cec5SDimitry Andric return Changed;
19670b57cec5SDimitry Andric }
19680b57cec5SDimitry Andric
1969e8d8bef9SDimitry Andric if (IsNonTemporal) {
19704824e7fdSDimitry Andric // For loads setting SLC configures L0 and L1 cache policy to HIT_EVICT
19714824e7fdSDimitry Andric // and L2 cache policy to STREAM.
19724824e7fdSDimitry Andric // For stores setting both GLC and SLC configures L0 and L1 cache policy
19734824e7fdSDimitry Andric // to MISS_EVICT and the L2 cache policy to STREAM.
19744824e7fdSDimitry Andric if (Op == SIMemOp::STORE)
19754824e7fdSDimitry Andric Changed |= enableGLCBit(MI);
1976e8d8bef9SDimitry Andric Changed |= enableSLCBit(MI);
19774824e7fdSDimitry Andric
1978e8d8bef9SDimitry Andric return Changed;
19790b57cec5SDimitry Andric }
19800b57cec5SDimitry Andric
19810b57cec5SDimitry Andric return Changed;
19820b57cec5SDimitry Andric }
19830b57cec5SDimitry Andric
insertWait(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsCrossAddrSpaceOrdering,Position Pos) const19840b57cec5SDimitry Andric bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
19850b57cec5SDimitry Andric SIAtomicScope Scope,
19860b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace,
19870b57cec5SDimitry Andric SIMemOp Op,
19880b57cec5SDimitry Andric bool IsCrossAddrSpaceOrdering,
19890b57cec5SDimitry Andric Position Pos) const {
19900b57cec5SDimitry Andric bool Changed = false;
19910b57cec5SDimitry Andric
19920b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
19930b57cec5SDimitry Andric DebugLoc DL = MI->getDebugLoc();
19940b57cec5SDimitry Andric
19950b57cec5SDimitry Andric if (Pos == Position::AFTER)
19960b57cec5SDimitry Andric ++MI;
19970b57cec5SDimitry Andric
19980b57cec5SDimitry Andric bool VMCnt = false;
19990b57cec5SDimitry Andric bool VSCnt = false;
20000b57cec5SDimitry Andric bool LGKMCnt = false;
20010b57cec5SDimitry Andric
2002e8d8bef9SDimitry Andric if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
2003e8d8bef9SDimitry Andric SIAtomicAddrSpace::NONE) {
20040b57cec5SDimitry Andric switch (Scope) {
20050b57cec5SDimitry Andric case SIAtomicScope::SYSTEM:
20060b57cec5SDimitry Andric case SIAtomicScope::AGENT:
20070b57cec5SDimitry Andric if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
20080b57cec5SDimitry Andric VMCnt |= true;
20090b57cec5SDimitry Andric if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
20100b57cec5SDimitry Andric VSCnt |= true;
20110b57cec5SDimitry Andric break;
20120b57cec5SDimitry Andric case SIAtomicScope::WORKGROUP:
20130b57cec5SDimitry Andric // In WGP mode the waves of a work-group can be executing on either CU of
20140b57cec5SDimitry Andric // the WGP. Therefore need to wait for operations to complete to ensure
20150b57cec5SDimitry Andric // they are visible to waves in the other CU as the L0 is per CU.
20160b57cec5SDimitry Andric // Otherwise in CU mode and all waves of a work-group are on the same CU
20170b57cec5SDimitry Andric // which shares the same L0.
2018e8d8bef9SDimitry Andric if (!ST.isCuModeEnabled()) {
20190b57cec5SDimitry Andric if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
20200b57cec5SDimitry Andric VMCnt |= true;
20210b57cec5SDimitry Andric if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
20220b57cec5SDimitry Andric VSCnt |= true;
20230b57cec5SDimitry Andric }
20240b57cec5SDimitry Andric break;
20250b57cec5SDimitry Andric case SIAtomicScope::WAVEFRONT:
20260b57cec5SDimitry Andric case SIAtomicScope::SINGLETHREAD:
20270b57cec5SDimitry Andric // The L0 cache keeps all memory operations in order for
20280b57cec5SDimitry Andric // work-items in the same wavefront.
20290b57cec5SDimitry Andric break;
20300b57cec5SDimitry Andric default:
20310b57cec5SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
20320b57cec5SDimitry Andric }
20330b57cec5SDimitry Andric }
20340b57cec5SDimitry Andric
20350b57cec5SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
20360b57cec5SDimitry Andric switch (Scope) {
20370b57cec5SDimitry Andric case SIAtomicScope::SYSTEM:
20380b57cec5SDimitry Andric case SIAtomicScope::AGENT:
20390b57cec5SDimitry Andric case SIAtomicScope::WORKGROUP:
2040e8d8bef9SDimitry Andric // If no cross address space ordering then an "S_WAITCNT lgkmcnt(0)" is
2041e8d8bef9SDimitry Andric // not needed as LDS operations for all waves are executed in a total
2042e8d8bef9SDimitry Andric // global ordering as observed by all waves. Required if also
2043e8d8bef9SDimitry Andric // synchronizing with global/GDS memory as LDS operations could be
2044e8d8bef9SDimitry Andric // reordered with respect to later global/GDS memory operations of the
2045e8d8bef9SDimitry Andric // same wave.
20460b57cec5SDimitry Andric LGKMCnt |= IsCrossAddrSpaceOrdering;
20470b57cec5SDimitry Andric break;
20480b57cec5SDimitry Andric case SIAtomicScope::WAVEFRONT:
20490b57cec5SDimitry Andric case SIAtomicScope::SINGLETHREAD:
20500b57cec5SDimitry Andric // The LDS keeps all memory operations in order for
205181ad6265SDimitry Andric // the same wavefront.
20520b57cec5SDimitry Andric break;
20530b57cec5SDimitry Andric default:
20540b57cec5SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
20550b57cec5SDimitry Andric }
20560b57cec5SDimitry Andric }
20570b57cec5SDimitry Andric
20580b57cec5SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
20590b57cec5SDimitry Andric switch (Scope) {
20600b57cec5SDimitry Andric case SIAtomicScope::SYSTEM:
20610b57cec5SDimitry Andric case SIAtomicScope::AGENT:
2062e8d8bef9SDimitry Andric // If no cross address space ordering then an GDS "S_WAITCNT lgkmcnt(0)"
2063e8d8bef9SDimitry Andric // is not needed as GDS operations for all waves are executed in a total
2064e8d8bef9SDimitry Andric // global ordering as observed by all waves. Required if also
2065e8d8bef9SDimitry Andric // synchronizing with global/LDS memory as GDS operations could be
2066e8d8bef9SDimitry Andric // reordered with respect to later global/LDS memory operations of the
2067e8d8bef9SDimitry Andric // same wave.
20680b57cec5SDimitry Andric LGKMCnt |= IsCrossAddrSpaceOrdering;
20690b57cec5SDimitry Andric break;
20700b57cec5SDimitry Andric case SIAtomicScope::WORKGROUP:
20710b57cec5SDimitry Andric case SIAtomicScope::WAVEFRONT:
20720b57cec5SDimitry Andric case SIAtomicScope::SINGLETHREAD:
20730b57cec5SDimitry Andric // The GDS keeps all memory operations in order for
20740b57cec5SDimitry Andric // the same work-group.
20750b57cec5SDimitry Andric break;
20760b57cec5SDimitry Andric default:
20770b57cec5SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
20780b57cec5SDimitry Andric }
20790b57cec5SDimitry Andric }
20800b57cec5SDimitry Andric
20810b57cec5SDimitry Andric if (VMCnt || LGKMCnt) {
20820b57cec5SDimitry Andric unsigned WaitCntImmediate =
20830b57cec5SDimitry Andric AMDGPU::encodeWaitcnt(IV,
20840b57cec5SDimitry Andric VMCnt ? 0 : getVmcntBitMask(IV),
20850b57cec5SDimitry Andric getExpcntBitMask(IV),
20860b57cec5SDimitry Andric LGKMCnt ? 0 : getLgkmcntBitMask(IV));
20875f757f3fSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_soft))
20885f757f3fSDimitry Andric .addImm(WaitCntImmediate);
20890b57cec5SDimitry Andric Changed = true;
20900b57cec5SDimitry Andric }
20910b57cec5SDimitry Andric
20920b57cec5SDimitry Andric if (VSCnt) {
20935f757f3fSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT_soft))
20940b57cec5SDimitry Andric .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
20950b57cec5SDimitry Andric .addImm(0);
20960b57cec5SDimitry Andric Changed = true;
20970b57cec5SDimitry Andric }
20980b57cec5SDimitry Andric
20990b57cec5SDimitry Andric if (Pos == Position::AFTER)
21000b57cec5SDimitry Andric --MI;
21010b57cec5SDimitry Andric
21020b57cec5SDimitry Andric return Changed;
21030b57cec5SDimitry Andric }
21040b57cec5SDimitry Andric
insertAcquire(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,Position Pos) const2105e8d8bef9SDimitry Andric bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
2106e8d8bef9SDimitry Andric SIAtomicScope Scope,
2107e8d8bef9SDimitry Andric SIAtomicAddrSpace AddrSpace,
2108e8d8bef9SDimitry Andric Position Pos) const {
2109e8d8bef9SDimitry Andric if (!InsertCacheInv)
2110e8d8bef9SDimitry Andric return false;
2111e8d8bef9SDimitry Andric
2112e8d8bef9SDimitry Andric bool Changed = false;
2113e8d8bef9SDimitry Andric
2114e8d8bef9SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
2115e8d8bef9SDimitry Andric DebugLoc DL = MI->getDebugLoc();
2116e8d8bef9SDimitry Andric
2117e8d8bef9SDimitry Andric if (Pos == Position::AFTER)
2118e8d8bef9SDimitry Andric ++MI;
2119e8d8bef9SDimitry Andric
2120e8d8bef9SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2121e8d8bef9SDimitry Andric switch (Scope) {
2122e8d8bef9SDimitry Andric case SIAtomicScope::SYSTEM:
2123e8d8bef9SDimitry Andric case SIAtomicScope::AGENT:
2124*0fca6ea1SDimitry Andric // The order of invalidates matter here. We must invalidate "outer in"
2125*0fca6ea1SDimitry Andric // so L1 -> L0 to avoid L0 pulling in stale data from L1 when it is
2126*0fca6ea1SDimitry Andric // invalidated.
2127e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL1_INV));
2128*0fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
2129e8d8bef9SDimitry Andric Changed = true;
2130e8d8bef9SDimitry Andric break;
2131e8d8bef9SDimitry Andric case SIAtomicScope::WORKGROUP:
2132e8d8bef9SDimitry Andric // In WGP mode the waves of a work-group can be executing on either CU of
2133e8d8bef9SDimitry Andric // the WGP. Therefore need to invalidate the L0 which is per CU. Otherwise
2134e8d8bef9SDimitry Andric // in CU mode and all waves of a work-group are on the same CU, and so the
2135e8d8bef9SDimitry Andric // L0 does not need to be invalidated.
2136e8d8bef9SDimitry Andric if (!ST.isCuModeEnabled()) {
2137e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
2138e8d8bef9SDimitry Andric Changed = true;
2139e8d8bef9SDimitry Andric }
2140e8d8bef9SDimitry Andric break;
2141e8d8bef9SDimitry Andric case SIAtomicScope::WAVEFRONT:
2142e8d8bef9SDimitry Andric case SIAtomicScope::SINGLETHREAD:
2143e8d8bef9SDimitry Andric // No cache to invalidate.
2144e8d8bef9SDimitry Andric break;
2145e8d8bef9SDimitry Andric default:
2146e8d8bef9SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
2147e8d8bef9SDimitry Andric }
2148e8d8bef9SDimitry Andric }
2149e8d8bef9SDimitry Andric
2150e8d8bef9SDimitry Andric /// The scratch address space does not need the global memory cache
2151e8d8bef9SDimitry Andric /// to be flushed as all memory operations by the same thread are
2152e8d8bef9SDimitry Andric /// sequentially consistent, and no other thread can access scratch
2153e8d8bef9SDimitry Andric /// memory.
2154e8d8bef9SDimitry Andric
2155e8d8bef9SDimitry Andric /// Other address spaces do not have a cache.
2156e8d8bef9SDimitry Andric
2157e8d8bef9SDimitry Andric if (Pos == Position::AFTER)
2158e8d8bef9SDimitry Andric --MI;
2159e8d8bef9SDimitry Andric
2160e8d8bef9SDimitry Andric return Changed;
2161e8d8bef9SDimitry Andric }
2162e8d8bef9SDimitry Andric
enableLoadCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const216381ad6265SDimitry Andric bool SIGfx11CacheControl::enableLoadCacheBypass(
216481ad6265SDimitry Andric const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
216581ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
216681ad6265SDimitry Andric assert(MI->mayLoad() && !MI->mayStore());
216781ad6265SDimitry Andric bool Changed = false;
216881ad6265SDimitry Andric
216981ad6265SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
217081ad6265SDimitry Andric switch (Scope) {
217181ad6265SDimitry Andric case SIAtomicScope::SYSTEM:
217281ad6265SDimitry Andric case SIAtomicScope::AGENT:
217381ad6265SDimitry Andric // Set the L0 and L1 cache policies to MISS_EVICT.
217481ad6265SDimitry Andric // Note: there is no L2 cache coherent bypass control at the ISA level.
217581ad6265SDimitry Andric Changed |= enableGLCBit(MI);
217681ad6265SDimitry Andric break;
217781ad6265SDimitry Andric case SIAtomicScope::WORKGROUP:
217881ad6265SDimitry Andric // In WGP mode the waves of a work-group can be executing on either CU of
217981ad6265SDimitry Andric // the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
218081ad6265SDimitry Andric // CU mode all waves of a work-group are on the same CU, and so the L0
218181ad6265SDimitry Andric // does not need to be bypassed.
218281ad6265SDimitry Andric if (!ST.isCuModeEnabled())
218381ad6265SDimitry Andric Changed |= enableGLCBit(MI);
218481ad6265SDimitry Andric break;
218581ad6265SDimitry Andric case SIAtomicScope::WAVEFRONT:
218681ad6265SDimitry Andric case SIAtomicScope::SINGLETHREAD:
218781ad6265SDimitry Andric // No cache to bypass.
218881ad6265SDimitry Andric break;
218981ad6265SDimitry Andric default:
219081ad6265SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
219181ad6265SDimitry Andric }
219281ad6265SDimitry Andric }
219381ad6265SDimitry Andric
219481ad6265SDimitry Andric /// The scratch address space does not need the global memory caches
219581ad6265SDimitry Andric /// to be bypassed as all memory operations by the same thread are
219681ad6265SDimitry Andric /// sequentially consistent, and no other thread can access scratch
219781ad6265SDimitry Andric /// memory.
219881ad6265SDimitry Andric
219981ad6265SDimitry Andric /// Other address spaces do not have a cache.
220081ad6265SDimitry Andric
220181ad6265SDimitry Andric return Changed;
220281ad6265SDimitry Andric }
220381ad6265SDimitry Andric
enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator & MI,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsVolatile,bool IsNonTemporal,bool IsLastUse=false) const220481ad6265SDimitry Andric bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal(
220581ad6265SDimitry Andric MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
2206*0fca6ea1SDimitry Andric bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const {
220781ad6265SDimitry Andric
220881ad6265SDimitry Andric // Only handle load and store, not atomic read-modify-write insructions. The
220981ad6265SDimitry Andric // latter use glc to indicate if the atomic returns a result and so must not
221081ad6265SDimitry Andric // be used for cache control.
221181ad6265SDimitry Andric assert(MI->mayLoad() ^ MI->mayStore());
221281ad6265SDimitry Andric
221381ad6265SDimitry Andric // Only update load and store, not LLVM IR atomic read-modify-write
221481ad6265SDimitry Andric // instructions. The latter are always marked as volatile so cannot sensibly
221581ad6265SDimitry Andric // handle it as do not want to pessimize all atomics. Also they do not support
221681ad6265SDimitry Andric // the nontemporal attribute.
221781ad6265SDimitry Andric assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
221881ad6265SDimitry Andric
221981ad6265SDimitry Andric bool Changed = false;
222081ad6265SDimitry Andric
222181ad6265SDimitry Andric if (IsVolatile) {
222281ad6265SDimitry Andric // Set L0 and L1 cache policy to be MISS_EVICT for load instructions
222381ad6265SDimitry Andric // and MISS_LRU for store instructions.
222481ad6265SDimitry Andric // Note: there is no L2 cache coherent bypass control at the ISA level.
222581ad6265SDimitry Andric if (Op == SIMemOp::LOAD)
222681ad6265SDimitry Andric Changed |= enableGLCBit(MI);
222781ad6265SDimitry Andric
222881ad6265SDimitry Andric // Set MALL NOALLOC for load and store instructions.
222981ad6265SDimitry Andric Changed |= enableDLCBit(MI);
223081ad6265SDimitry Andric
223181ad6265SDimitry Andric // Ensure operation has completed at system scope to cause all volatile
223281ad6265SDimitry Andric // operations to be visible outside the program in a global order. Do not
223381ad6265SDimitry Andric // request cross address space as only the global address space can be
223481ad6265SDimitry Andric // observable outside the program, so no need to cause a waitcnt for LDS
223581ad6265SDimitry Andric // address space operations.
223681ad6265SDimitry Andric Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
223781ad6265SDimitry Andric Position::AFTER);
223881ad6265SDimitry Andric return Changed;
223981ad6265SDimitry Andric }
224081ad6265SDimitry Andric
224181ad6265SDimitry Andric if (IsNonTemporal) {
224281ad6265SDimitry Andric // For loads setting SLC configures L0 and L1 cache policy to HIT_EVICT
224381ad6265SDimitry Andric // and L2 cache policy to STREAM.
224481ad6265SDimitry Andric // For stores setting both GLC and SLC configures L0 and L1 cache policy
224581ad6265SDimitry Andric // to MISS_EVICT and the L2 cache policy to STREAM.
224681ad6265SDimitry Andric if (Op == SIMemOp::STORE)
224781ad6265SDimitry Andric Changed |= enableGLCBit(MI);
224881ad6265SDimitry Andric Changed |= enableSLCBit(MI);
224981ad6265SDimitry Andric
225081ad6265SDimitry Andric // Set MALL NOALLOC for load and store instructions.
225181ad6265SDimitry Andric Changed |= enableDLCBit(MI);
225281ad6265SDimitry Andric return Changed;
225381ad6265SDimitry Andric }
225481ad6265SDimitry Andric
225581ad6265SDimitry Andric return Changed;
225681ad6265SDimitry Andric }
225781ad6265SDimitry Andric
setTH(const MachineBasicBlock::iterator MI,AMDGPU::CPol::CPol Value) const22587a6dacacSDimitry Andric bool SIGfx12CacheControl::setTH(const MachineBasicBlock::iterator MI,
22597a6dacacSDimitry Andric AMDGPU::CPol::CPol Value) const {
22607a6dacacSDimitry Andric MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);
22617a6dacacSDimitry Andric if (!CPol)
22627a6dacacSDimitry Andric return false;
22637a6dacacSDimitry Andric
22647a6dacacSDimitry Andric uint64_t NewTH = Value & AMDGPU::CPol::TH;
22657a6dacacSDimitry Andric if ((CPol->getImm() & AMDGPU::CPol::TH) != NewTH) {
22667a6dacacSDimitry Andric CPol->setImm((CPol->getImm() & ~AMDGPU::CPol::TH) | NewTH);
22677a6dacacSDimitry Andric return true;
22687a6dacacSDimitry Andric }
22697a6dacacSDimitry Andric
22707a6dacacSDimitry Andric return false;
22717a6dacacSDimitry Andric }
22727a6dacacSDimitry Andric
setScope(const MachineBasicBlock::iterator MI,AMDGPU::CPol::CPol Value) const22737a6dacacSDimitry Andric bool SIGfx12CacheControl::setScope(const MachineBasicBlock::iterator MI,
22747a6dacacSDimitry Andric AMDGPU::CPol::CPol Value) const {
22757a6dacacSDimitry Andric MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);
22767a6dacacSDimitry Andric if (!CPol)
22777a6dacacSDimitry Andric return false;
22787a6dacacSDimitry Andric
22797a6dacacSDimitry Andric uint64_t NewScope = Value & AMDGPU::CPol::SCOPE;
22807a6dacacSDimitry Andric if ((CPol->getImm() & AMDGPU::CPol::SCOPE) != NewScope) {
22817a6dacacSDimitry Andric CPol->setImm((CPol->getImm() & ~AMDGPU::CPol::SCOPE) | NewScope);
22827a6dacacSDimitry Andric return true;
22837a6dacacSDimitry Andric }
22847a6dacacSDimitry Andric
22857a6dacacSDimitry Andric return false;
22867a6dacacSDimitry Andric }
22877a6dacacSDimitry Andric
insertWaitsBeforeSystemScopeStore(const MachineBasicBlock::iterator MI) const2288*0fca6ea1SDimitry Andric bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(
2289*0fca6ea1SDimitry Andric const MachineBasicBlock::iterator MI) const {
2290*0fca6ea1SDimitry Andric // TODO: implement flag for frontend to give us a hint not to insert waits.
2291*0fca6ea1SDimitry Andric
2292*0fca6ea1SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
2293*0fca6ea1SDimitry Andric const DebugLoc &DL = MI->getDebugLoc();
2294*0fca6ea1SDimitry Andric
2295*0fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII->get(S_WAIT_LOADCNT_soft)).addImm(0);
2296*0fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII->get(S_WAIT_SAMPLECNT_soft)).addImm(0);
2297*0fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII->get(S_WAIT_BVHCNT_soft)).addImm(0);
2298*0fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII->get(S_WAIT_KMCNT_soft)).addImm(0);
2299*0fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII->get(S_WAIT_STORECNT_soft)).addImm(0);
2300*0fca6ea1SDimitry Andric
2301*0fca6ea1SDimitry Andric return true;
2302*0fca6ea1SDimitry Andric }
2303*0fca6ea1SDimitry Andric
insertWait(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsCrossAddrSpaceOrdering,Position Pos) const23047a6dacacSDimitry Andric bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI,
23057a6dacacSDimitry Andric SIAtomicScope Scope,
23067a6dacacSDimitry Andric SIAtomicAddrSpace AddrSpace, SIMemOp Op,
23077a6dacacSDimitry Andric bool IsCrossAddrSpaceOrdering,
23087a6dacacSDimitry Andric Position Pos) const {
23097a6dacacSDimitry Andric bool Changed = false;
23107a6dacacSDimitry Andric
23117a6dacacSDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
23127a6dacacSDimitry Andric DebugLoc DL = MI->getDebugLoc();
23137a6dacacSDimitry Andric
23147a6dacacSDimitry Andric bool LOADCnt = false;
23157a6dacacSDimitry Andric bool DSCnt = false;
23167a6dacacSDimitry Andric bool STORECnt = false;
23177a6dacacSDimitry Andric
23187a6dacacSDimitry Andric if (Pos == Position::AFTER)
23197a6dacacSDimitry Andric ++MI;
23207a6dacacSDimitry Andric
23217a6dacacSDimitry Andric if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
23227a6dacacSDimitry Andric SIAtomicAddrSpace::NONE) {
23237a6dacacSDimitry Andric switch (Scope) {
23247a6dacacSDimitry Andric case SIAtomicScope::SYSTEM:
23257a6dacacSDimitry Andric case SIAtomicScope::AGENT:
23267a6dacacSDimitry Andric if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
23277a6dacacSDimitry Andric LOADCnt |= true;
23287a6dacacSDimitry Andric if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
23297a6dacacSDimitry Andric STORECnt |= true;
23307a6dacacSDimitry Andric break;
23317a6dacacSDimitry Andric case SIAtomicScope::WORKGROUP:
23327a6dacacSDimitry Andric // In WGP mode the waves of a work-group can be executing on either CU of
23337a6dacacSDimitry Andric // the WGP. Therefore need to wait for operations to complete to ensure
23347a6dacacSDimitry Andric // they are visible to waves in the other CU as the L0 is per CU.
23357a6dacacSDimitry Andric // Otherwise in CU mode and all waves of a work-group are on the same CU
23367a6dacacSDimitry Andric // which shares the same L0.
23377a6dacacSDimitry Andric if (!ST.isCuModeEnabled()) {
23387a6dacacSDimitry Andric if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
23397a6dacacSDimitry Andric LOADCnt |= true;
23407a6dacacSDimitry Andric if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
23417a6dacacSDimitry Andric STORECnt |= true;
23427a6dacacSDimitry Andric }
23437a6dacacSDimitry Andric break;
23447a6dacacSDimitry Andric case SIAtomicScope::WAVEFRONT:
23457a6dacacSDimitry Andric case SIAtomicScope::SINGLETHREAD:
23467a6dacacSDimitry Andric // The L0 cache keeps all memory operations in order for
23477a6dacacSDimitry Andric // work-items in the same wavefront.
23487a6dacacSDimitry Andric break;
23497a6dacacSDimitry Andric default:
23507a6dacacSDimitry Andric llvm_unreachable("Unsupported synchronization scope");
23517a6dacacSDimitry Andric }
23527a6dacacSDimitry Andric }
23537a6dacacSDimitry Andric
23547a6dacacSDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
23557a6dacacSDimitry Andric switch (Scope) {
23567a6dacacSDimitry Andric case SIAtomicScope::SYSTEM:
23577a6dacacSDimitry Andric case SIAtomicScope::AGENT:
23587a6dacacSDimitry Andric case SIAtomicScope::WORKGROUP:
23597a6dacacSDimitry Andric // If no cross address space ordering then an "S_WAITCNT lgkmcnt(0)" is
23607a6dacacSDimitry Andric // not needed as LDS operations for all waves are executed in a total
23617a6dacacSDimitry Andric // global ordering as observed by all waves. Required if also
23627a6dacacSDimitry Andric // synchronizing with global/GDS memory as LDS operations could be
23637a6dacacSDimitry Andric // reordered with respect to later global/GDS memory operations of the
23647a6dacacSDimitry Andric // same wave.
23657a6dacacSDimitry Andric DSCnt |= IsCrossAddrSpaceOrdering;
23667a6dacacSDimitry Andric break;
23677a6dacacSDimitry Andric case SIAtomicScope::WAVEFRONT:
23687a6dacacSDimitry Andric case SIAtomicScope::SINGLETHREAD:
23697a6dacacSDimitry Andric // The LDS keeps all memory operations in order for
23707a6dacacSDimitry Andric // the same wavefront.
23717a6dacacSDimitry Andric break;
23727a6dacacSDimitry Andric default:
23737a6dacacSDimitry Andric llvm_unreachable("Unsupported synchronization scope");
23747a6dacacSDimitry Andric }
23757a6dacacSDimitry Andric }
23767a6dacacSDimitry Andric
23777a6dacacSDimitry Andric if (LOADCnt) {
23787a6dacacSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_BVHCNT_soft)).addImm(0);
23797a6dacacSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_SAMPLECNT_soft)).addImm(0);
23807a6dacacSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_LOADCNT_soft)).addImm(0);
23817a6dacacSDimitry Andric Changed = true;
23827a6dacacSDimitry Andric }
23837a6dacacSDimitry Andric
23847a6dacacSDimitry Andric if (STORECnt) {
23857a6dacacSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_STORECNT_soft)).addImm(0);
23867a6dacacSDimitry Andric Changed = true;
23877a6dacacSDimitry Andric }
23887a6dacacSDimitry Andric
23897a6dacacSDimitry Andric if (DSCnt) {
23907a6dacacSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_DSCNT_soft)).addImm(0);
23917a6dacacSDimitry Andric Changed = true;
23927a6dacacSDimitry Andric }
23937a6dacacSDimitry Andric
23947a6dacacSDimitry Andric if (Pos == Position::AFTER)
23957a6dacacSDimitry Andric --MI;
23967a6dacacSDimitry Andric
23977a6dacacSDimitry Andric return Changed;
23987a6dacacSDimitry Andric }
23997a6dacacSDimitry Andric
insertAcquire(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,Position Pos) const24001db9f3b2SDimitry Andric bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
24011db9f3b2SDimitry Andric SIAtomicScope Scope,
24021db9f3b2SDimitry Andric SIAtomicAddrSpace AddrSpace,
24031db9f3b2SDimitry Andric Position Pos) const {
24041db9f3b2SDimitry Andric if (!InsertCacheInv)
24051db9f3b2SDimitry Andric return false;
24061db9f3b2SDimitry Andric
24071db9f3b2SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
24081db9f3b2SDimitry Andric DebugLoc DL = MI->getDebugLoc();
24091db9f3b2SDimitry Andric
24101db9f3b2SDimitry Andric /// The scratch address space does not need the global memory cache
24111db9f3b2SDimitry Andric /// to be flushed as all memory operations by the same thread are
24121db9f3b2SDimitry Andric /// sequentially consistent, and no other thread can access scratch
24131db9f3b2SDimitry Andric /// memory.
24141db9f3b2SDimitry Andric
24151db9f3b2SDimitry Andric /// Other address spaces do not have a cache.
24161db9f3b2SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
24171db9f3b2SDimitry Andric return false;
24181db9f3b2SDimitry Andric
24191db9f3b2SDimitry Andric AMDGPU::CPol::CPol ScopeImm = AMDGPU::CPol::SCOPE_DEV;
24201db9f3b2SDimitry Andric switch (Scope) {
24211db9f3b2SDimitry Andric case SIAtomicScope::SYSTEM:
24221db9f3b2SDimitry Andric ScopeImm = AMDGPU::CPol::SCOPE_SYS;
24231db9f3b2SDimitry Andric break;
24241db9f3b2SDimitry Andric case SIAtomicScope::AGENT:
24251db9f3b2SDimitry Andric ScopeImm = AMDGPU::CPol::SCOPE_DEV;
24261db9f3b2SDimitry Andric break;
24271db9f3b2SDimitry Andric case SIAtomicScope::WORKGROUP:
24281db9f3b2SDimitry Andric // In WGP mode the waves of a work-group can be executing on either CU of
24291db9f3b2SDimitry Andric // the WGP. Therefore we need to invalidate the L0 which is per CU.
24301db9f3b2SDimitry Andric // Otherwise in CU mode all waves of a work-group are on the same CU, and so
24311db9f3b2SDimitry Andric // the L0 does not need to be invalidated.
24321db9f3b2SDimitry Andric if (ST.isCuModeEnabled())
24331db9f3b2SDimitry Andric return false;
24341db9f3b2SDimitry Andric
24351db9f3b2SDimitry Andric ScopeImm = AMDGPU::CPol::SCOPE_SE;
24361db9f3b2SDimitry Andric break;
24371db9f3b2SDimitry Andric case SIAtomicScope::WAVEFRONT:
24381db9f3b2SDimitry Andric case SIAtomicScope::SINGLETHREAD:
24391db9f3b2SDimitry Andric // No cache to invalidate.
24401db9f3b2SDimitry Andric return false;
24411db9f3b2SDimitry Andric default:
24421db9f3b2SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
24431db9f3b2SDimitry Andric }
24441db9f3b2SDimitry Andric
24451db9f3b2SDimitry Andric if (Pos == Position::AFTER)
24461db9f3b2SDimitry Andric ++MI;
24471db9f3b2SDimitry Andric
24481db9f3b2SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_INV)).addImm(ScopeImm);
24491db9f3b2SDimitry Andric
24501db9f3b2SDimitry Andric if (Pos == Position::AFTER)
24511db9f3b2SDimitry Andric --MI;
24521db9f3b2SDimitry Andric
24531db9f3b2SDimitry Andric return true;
24541db9f3b2SDimitry Andric }
24551db9f3b2SDimitry Andric
insertRelease(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,bool IsCrossAddrSpaceOrdering,Position Pos) const2456*0fca6ea1SDimitry Andric bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
2457*0fca6ea1SDimitry Andric SIAtomicScope Scope,
2458*0fca6ea1SDimitry Andric SIAtomicAddrSpace AddrSpace,
2459*0fca6ea1SDimitry Andric bool IsCrossAddrSpaceOrdering,
2460*0fca6ea1SDimitry Andric Position Pos) const {
2461*0fca6ea1SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
2462*0fca6ea1SDimitry Andric DebugLoc DL = MI->getDebugLoc();
2463*0fca6ea1SDimitry Andric
2464*0fca6ea1SDimitry Andric // The scratch address space does not need the global memory cache
2465*0fca6ea1SDimitry Andric // writeback as all memory operations by the same thread are
2466*0fca6ea1SDimitry Andric // sequentially consistent, and no other thread can access scratch
2467*0fca6ea1SDimitry Andric // memory.
2468*0fca6ea1SDimitry Andric
2469*0fca6ea1SDimitry Andric // Other address spaces do not have a cache.
2470*0fca6ea1SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
2471*0fca6ea1SDimitry Andric return false;
2472*0fca6ea1SDimitry Andric
2473*0fca6ea1SDimitry Andric if (Pos == Position::AFTER)
2474*0fca6ea1SDimitry Andric ++MI;
2475*0fca6ea1SDimitry Andric
2476*0fca6ea1SDimitry Andric // GLOBAL_WB is always needed, even for write-through caches, as it
2477*0fca6ea1SDimitry Andric // additionally ensures all operations have reached the desired cache level.
2478*0fca6ea1SDimitry Andric bool SkipWB = false;
2479*0fca6ea1SDimitry Andric AMDGPU::CPol::CPol ScopeImm = AMDGPU::CPol::SCOPE_DEV;
2480*0fca6ea1SDimitry Andric switch (Scope) {
2481*0fca6ea1SDimitry Andric case SIAtomicScope::SYSTEM:
2482*0fca6ea1SDimitry Andric ScopeImm = AMDGPU::CPol::SCOPE_SYS;
2483*0fca6ea1SDimitry Andric break;
2484*0fca6ea1SDimitry Andric case SIAtomicScope::AGENT:
2485*0fca6ea1SDimitry Andric ScopeImm = AMDGPU::CPol::SCOPE_DEV;
2486*0fca6ea1SDimitry Andric break;
2487*0fca6ea1SDimitry Andric case SIAtomicScope::WORKGROUP:
2488*0fca6ea1SDimitry Andric // In WGP mode the waves of a work-group can be executing on either CU of
2489*0fca6ea1SDimitry Andric // the WGP. Therefore we need to ensure all operations have reached L1,
2490*0fca6ea1SDimitry Andric // hence the SCOPE_SE WB.
2491*0fca6ea1SDimitry Andric // For CU mode, we need operations to reach L0, so the wait is enough -
2492*0fca6ea1SDimitry Andric // there are no ways for an operation to report completion without reaching
2493*0fca6ea1SDimitry Andric // at least L0.
2494*0fca6ea1SDimitry Andric if (ST.isCuModeEnabled())
2495*0fca6ea1SDimitry Andric SkipWB = true;
2496*0fca6ea1SDimitry Andric else
2497*0fca6ea1SDimitry Andric ScopeImm = AMDGPU::CPol::SCOPE_SE;
2498*0fca6ea1SDimitry Andric break;
2499*0fca6ea1SDimitry Andric case SIAtomicScope::WAVEFRONT:
2500*0fca6ea1SDimitry Andric case SIAtomicScope::SINGLETHREAD:
2501*0fca6ea1SDimitry Andric // No cache to invalidate.
2502*0fca6ea1SDimitry Andric return false;
2503*0fca6ea1SDimitry Andric default:
2504*0fca6ea1SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
2505*0fca6ea1SDimitry Andric }
2506*0fca6ea1SDimitry Andric
2507*0fca6ea1SDimitry Andric if (!SkipWB)
2508*0fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_WB)).addImm(ScopeImm);
2509*0fca6ea1SDimitry Andric
2510*0fca6ea1SDimitry Andric if (Pos == Position::AFTER)
2511*0fca6ea1SDimitry Andric --MI;
2512*0fca6ea1SDimitry Andric
2513*0fca6ea1SDimitry Andric // We always have to wait for previous memory operations (load/store) to
2514*0fca6ea1SDimitry Andric // complete, whether we inserted a WB or not. If we inserted a WB (storecnt),
2515*0fca6ea1SDimitry Andric // we of course need to wait for that as well.
2516*0fca6ea1SDimitry Andric insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2517*0fca6ea1SDimitry Andric IsCrossAddrSpaceOrdering, Pos);
2518*0fca6ea1SDimitry Andric
2519*0fca6ea1SDimitry Andric return true;
2520*0fca6ea1SDimitry Andric }
2521*0fca6ea1SDimitry Andric
enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator & MI,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsVolatile,bool IsNonTemporal,bool IsLastUse=false) const25227a6dacacSDimitry Andric bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
25237a6dacacSDimitry Andric MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
2524*0fca6ea1SDimitry Andric bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const {
25257a6dacacSDimitry Andric
25267a6dacacSDimitry Andric // Only handle load and store, not atomic read-modify-write instructions.
25277a6dacacSDimitry Andric assert(MI->mayLoad() ^ MI->mayStore());
25287a6dacacSDimitry Andric
25297a6dacacSDimitry Andric // Only update load and store, not LLVM IR atomic read-modify-write
25307a6dacacSDimitry Andric // instructions. The latter are always marked as volatile so cannot sensibly
25317a6dacacSDimitry Andric // handle it as do not want to pessimize all atomics. Also they do not support
25327a6dacacSDimitry Andric // the nontemporal attribute.
25337a6dacacSDimitry Andric assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
25347a6dacacSDimitry Andric
25357a6dacacSDimitry Andric bool Changed = false;
25367a6dacacSDimitry Andric
2537*0fca6ea1SDimitry Andric if (IsLastUse) {
2538*0fca6ea1SDimitry Andric // Set last-use hint.
2539*0fca6ea1SDimitry Andric Changed |= setTH(MI, AMDGPU::CPol::TH_LU);
2540*0fca6ea1SDimitry Andric } else if (IsNonTemporal) {
25415678d1d9SDimitry Andric // Set non-temporal hint for all cache levels.
25425678d1d9SDimitry Andric Changed |= setTH(MI, AMDGPU::CPol::TH_NT);
25435678d1d9SDimitry Andric }
25445678d1d9SDimitry Andric
25457a6dacacSDimitry Andric if (IsVolatile) {
25467a6dacacSDimitry Andric Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SYS);
25477a6dacacSDimitry Andric
2548*0fca6ea1SDimitry Andric if (Op == SIMemOp::STORE)
2549*0fca6ea1SDimitry Andric Changed |= insertWaitsBeforeSystemScopeStore(MI);
2550*0fca6ea1SDimitry Andric
25517a6dacacSDimitry Andric // Ensure operation has completed at system scope to cause all volatile
25527a6dacacSDimitry Andric // operations to be visible outside the program in a global order. Do not
25537a6dacacSDimitry Andric // request cross address space as only the global address space can be
25547a6dacacSDimitry Andric // observable outside the program, so no need to cause a waitcnt for LDS
25557a6dacacSDimitry Andric // address space operations.
25567a6dacacSDimitry Andric Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
25577a6dacacSDimitry Andric Position::AFTER);
25587a6dacacSDimitry Andric }
25597a6dacacSDimitry Andric
25607a6dacacSDimitry Andric return Changed;
25617a6dacacSDimitry Andric }
25627a6dacacSDimitry Andric
expandSystemScopeStore(MachineBasicBlock::iterator & MI) const2563*0fca6ea1SDimitry Andric bool SIGfx12CacheControl::expandSystemScopeStore(
2564*0fca6ea1SDimitry Andric MachineBasicBlock::iterator &MI) const {
2565*0fca6ea1SDimitry Andric MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);
2566*0fca6ea1SDimitry Andric if (CPol && ((CPol->getImm() & CPol::SCOPE) == CPol::SCOPE_SYS))
2567*0fca6ea1SDimitry Andric return insertWaitsBeforeSystemScopeStore(MI);
2568*0fca6ea1SDimitry Andric
2569*0fca6ea1SDimitry Andric return false;
2570*0fca6ea1SDimitry Andric }
2571*0fca6ea1SDimitry Andric
setAtomicScope(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const2572*0fca6ea1SDimitry Andric bool SIGfx12CacheControl::setAtomicScope(const MachineBasicBlock::iterator &MI,
2573*0fca6ea1SDimitry Andric SIAtomicScope Scope,
2574*0fca6ea1SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
2575*0fca6ea1SDimitry Andric bool Changed = false;
2576*0fca6ea1SDimitry Andric
2577*0fca6ea1SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2578*0fca6ea1SDimitry Andric switch (Scope) {
2579*0fca6ea1SDimitry Andric case SIAtomicScope::SYSTEM:
2580*0fca6ea1SDimitry Andric Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SYS);
2581*0fca6ea1SDimitry Andric break;
2582*0fca6ea1SDimitry Andric case SIAtomicScope::AGENT:
2583*0fca6ea1SDimitry Andric Changed |= setScope(MI, AMDGPU::CPol::SCOPE_DEV);
2584*0fca6ea1SDimitry Andric break;
2585*0fca6ea1SDimitry Andric case SIAtomicScope::WORKGROUP:
2586*0fca6ea1SDimitry Andric // In workgroup mode, SCOPE_SE is needed as waves can executes on
2587*0fca6ea1SDimitry Andric // different CUs that access different L0s.
2588*0fca6ea1SDimitry Andric if (!ST.isCuModeEnabled())
2589*0fca6ea1SDimitry Andric Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SE);
2590*0fca6ea1SDimitry Andric break;
2591*0fca6ea1SDimitry Andric case SIAtomicScope::WAVEFRONT:
2592*0fca6ea1SDimitry Andric case SIAtomicScope::SINGLETHREAD:
2593*0fca6ea1SDimitry Andric // No cache to bypass.
2594*0fca6ea1SDimitry Andric break;
2595*0fca6ea1SDimitry Andric default:
2596*0fca6ea1SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
2597*0fca6ea1SDimitry Andric }
2598*0fca6ea1SDimitry Andric }
2599*0fca6ea1SDimitry Andric
2600*0fca6ea1SDimitry Andric // The scratch address space does not need the global memory caches
2601*0fca6ea1SDimitry Andric // to be bypassed as all memory operations by the same thread are
2602*0fca6ea1SDimitry Andric // sequentially consistent, and no other thread can access scratch
2603*0fca6ea1SDimitry Andric // memory.
2604*0fca6ea1SDimitry Andric
2605*0fca6ea1SDimitry Andric // Other address spaces do not have a cache.
2606*0fca6ea1SDimitry Andric
2607*0fca6ea1SDimitry Andric return Changed;
2608*0fca6ea1SDimitry Andric }
2609*0fca6ea1SDimitry Andric
removeAtomicPseudoMIs()26100b57cec5SDimitry Andric bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
26110b57cec5SDimitry Andric if (AtomicPseudoMIs.empty())
26120b57cec5SDimitry Andric return false;
26130b57cec5SDimitry Andric
26140b57cec5SDimitry Andric for (auto &MI : AtomicPseudoMIs)
26150b57cec5SDimitry Andric MI->eraseFromParent();
26160b57cec5SDimitry Andric
26170b57cec5SDimitry Andric AtomicPseudoMIs.clear();
26180b57cec5SDimitry Andric return true;
26190b57cec5SDimitry Andric }
26200b57cec5SDimitry Andric
expandLoad(const SIMemOpInfo & MOI,MachineBasicBlock::iterator & MI)26210b57cec5SDimitry Andric bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
26220b57cec5SDimitry Andric MachineBasicBlock::iterator &MI) {
26230b57cec5SDimitry Andric assert(MI->mayLoad() && !MI->mayStore());
26240b57cec5SDimitry Andric
26250b57cec5SDimitry Andric bool Changed = false;
26260b57cec5SDimitry Andric
26270b57cec5SDimitry Andric if (MOI.isAtomic()) {
26280b57cec5SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
26290b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::Acquire ||
26300b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
26310b57cec5SDimitry Andric Changed |= CC->enableLoadCacheBypass(MI, MOI.getScope(),
26320b57cec5SDimitry Andric MOI.getOrderingAddrSpace());
26330b57cec5SDimitry Andric }
26340b57cec5SDimitry Andric
26350b57cec5SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
26360b57cec5SDimitry Andric Changed |= CC->insertWait(MI, MOI.getScope(),
26370b57cec5SDimitry Andric MOI.getOrderingAddrSpace(),
26380b57cec5SDimitry Andric SIMemOp::LOAD | SIMemOp::STORE,
26390b57cec5SDimitry Andric MOI.getIsCrossAddressSpaceOrdering(),
26400b57cec5SDimitry Andric Position::BEFORE);
26410b57cec5SDimitry Andric
26420b57cec5SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Acquire ||
26430b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
26440b57cec5SDimitry Andric Changed |= CC->insertWait(MI, MOI.getScope(),
26450b57cec5SDimitry Andric MOI.getInstrAddrSpace(),
26460b57cec5SDimitry Andric SIMemOp::LOAD,
26470b57cec5SDimitry Andric MOI.getIsCrossAddressSpaceOrdering(),
26480b57cec5SDimitry Andric Position::AFTER);
2649e8d8bef9SDimitry Andric Changed |= CC->insertAcquire(MI, MOI.getScope(),
26500b57cec5SDimitry Andric MOI.getOrderingAddrSpace(),
26510b57cec5SDimitry Andric Position::AFTER);
26520b57cec5SDimitry Andric }
26530b57cec5SDimitry Andric
26540b57cec5SDimitry Andric return Changed;
26550b57cec5SDimitry Andric }
26560b57cec5SDimitry Andric
2657e8d8bef9SDimitry Andric // Atomic instructions already bypass caches to the scope specified by the
2658*0fca6ea1SDimitry Andric // SyncScope operand. Only non-atomic volatile and nontemporal/last-use
2659*0fca6ea1SDimitry Andric // instructions need additional treatment.
2660*0fca6ea1SDimitry Andric Changed |= CC->enableVolatileAndOrNonTemporal(
2661*0fca6ea1SDimitry Andric MI, MOI.getInstrAddrSpace(), SIMemOp::LOAD, MOI.isVolatile(),
2662*0fca6ea1SDimitry Andric MOI.isNonTemporal(), MOI.isLastUse());
2663*0fca6ea1SDimitry Andric
26640b57cec5SDimitry Andric return Changed;
26650b57cec5SDimitry Andric }
26660b57cec5SDimitry Andric
expandStore(const SIMemOpInfo & MOI,MachineBasicBlock::iterator & MI)26670b57cec5SDimitry Andric bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
26680b57cec5SDimitry Andric MachineBasicBlock::iterator &MI) {
26690b57cec5SDimitry Andric assert(!MI->mayLoad() && MI->mayStore());
26700b57cec5SDimitry Andric
26710b57cec5SDimitry Andric bool Changed = false;
26720b57cec5SDimitry Andric
26730b57cec5SDimitry Andric if (MOI.isAtomic()) {
2674fe6060f1SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2675fe6060f1SDimitry Andric MOI.getOrdering() == AtomicOrdering::Release ||
2676fe6060f1SDimitry Andric MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2677fe6060f1SDimitry Andric Changed |= CC->enableStoreCacheBypass(MI, MOI.getScope(),
2678fe6060f1SDimitry Andric MOI.getOrderingAddrSpace());
2679fe6060f1SDimitry Andric }
2680fe6060f1SDimitry Andric
26810b57cec5SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Release ||
26820b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2683e8d8bef9SDimitry Andric Changed |= CC->insertRelease(MI, MOI.getScope(),
26840b57cec5SDimitry Andric MOI.getOrderingAddrSpace(),
26850b57cec5SDimitry Andric MOI.getIsCrossAddressSpaceOrdering(),
26860b57cec5SDimitry Andric Position::BEFORE);
26870b57cec5SDimitry Andric
26880b57cec5SDimitry Andric return Changed;
26890b57cec5SDimitry Andric }
26900b57cec5SDimitry Andric
2691e8d8bef9SDimitry Andric // Atomic instructions already bypass caches to the scope specified by the
2692e8d8bef9SDimitry Andric // SyncScope operand. Only non-atomic volatile and nontemporal instructions
2693e8d8bef9SDimitry Andric // need additional treatment.
2694e8d8bef9SDimitry Andric Changed |= CC->enableVolatileAndOrNonTemporal(
2695e8d8bef9SDimitry Andric MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
2696e8d8bef9SDimitry Andric MOI.isNonTemporal());
2697*0fca6ea1SDimitry Andric
2698*0fca6ea1SDimitry Andric // GFX12 specific, scope(desired coherence domain in cache hierarchy) is
2699*0fca6ea1SDimitry Andric // instruction field, do not confuse it with atomic scope.
2700*0fca6ea1SDimitry Andric Changed |= CC->expandSystemScopeStore(MI);
27010b57cec5SDimitry Andric return Changed;
27020b57cec5SDimitry Andric }
27030b57cec5SDimitry Andric
expandAtomicFence(const SIMemOpInfo & MOI,MachineBasicBlock::iterator & MI)27040b57cec5SDimitry Andric bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
27050b57cec5SDimitry Andric MachineBasicBlock::iterator &MI) {
27060b57cec5SDimitry Andric assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
27070b57cec5SDimitry Andric
27080b57cec5SDimitry Andric AtomicPseudoMIs.push_back(MI);
27090b57cec5SDimitry Andric bool Changed = false;
27100b57cec5SDimitry Andric
2711*0fca6ea1SDimitry Andric // Refine fenced address space based on MMRAs.
2712*0fca6ea1SDimitry Andric //
2713*0fca6ea1SDimitry Andric // TODO: Should we support this MMRA on other atomic operations?
2714*0fca6ea1SDimitry Andric auto OrderingAddrSpace =
2715*0fca6ea1SDimitry Andric getFenceAddrSpaceMMRA(*MI, MOI.getOrderingAddrSpace());
2716*0fca6ea1SDimitry Andric
27170b57cec5SDimitry Andric if (MOI.isAtomic()) {
271806c3fb27SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Acquire)
2719*0fca6ea1SDimitry Andric Changed |= CC->insertWait(
2720*0fca6ea1SDimitry Andric MI, MOI.getScope(), OrderingAddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2721*0fca6ea1SDimitry Andric MOI.getIsCrossAddressSpaceOrdering(), Position::BEFORE);
272206c3fb27SDimitry Andric
272306c3fb27SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Release ||
27240b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
27250b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
27260b57cec5SDimitry Andric /// TODO: This relies on a barrier always generating a waitcnt
27270b57cec5SDimitry Andric /// for LDS to ensure it is not reordered with the completion of
27280b57cec5SDimitry Andric /// the proceeding LDS operations. If barrier had a memory
27290b57cec5SDimitry Andric /// ordering and memory scope, then library does not need to
27300b57cec5SDimitry Andric /// generate a fence. Could add support in this file for
27310b57cec5SDimitry Andric /// barrier. SIInsertWaitcnt.cpp could then stop unconditionally
2732e8d8bef9SDimitry Andric /// adding S_WAITCNT before a S_BARRIER.
2733*0fca6ea1SDimitry Andric Changed |= CC->insertRelease(MI, MOI.getScope(), OrderingAddrSpace,
27340b57cec5SDimitry Andric MOI.getIsCrossAddressSpaceOrdering(),
27350b57cec5SDimitry Andric Position::BEFORE);
27360b57cec5SDimitry Andric
2737e8d8bef9SDimitry Andric // TODO: If both release and invalidate are happening they could be combined
2738fe6060f1SDimitry Andric // to use the single "BUFFER_WBINV*" instruction. This could be done by
2739e8d8bef9SDimitry Andric // reorganizing this code or as part of optimizing SIInsertWaitcnt pass to
2740e8d8bef9SDimitry Andric // track cache invalidate and write back instructions.
2741e8d8bef9SDimitry Andric
27420b57cec5SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Acquire ||
27430b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
27440b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2745*0fca6ea1SDimitry Andric Changed |= CC->insertAcquire(MI, MOI.getScope(), OrderingAddrSpace,
27460b57cec5SDimitry Andric Position::BEFORE);
27470b57cec5SDimitry Andric
27480b57cec5SDimitry Andric return Changed;
27490b57cec5SDimitry Andric }
27500b57cec5SDimitry Andric
27510b57cec5SDimitry Andric return Changed;
27520b57cec5SDimitry Andric }
27530b57cec5SDimitry Andric
expandAtomicCmpxchgOrRmw(const SIMemOpInfo & MOI,MachineBasicBlock::iterator & MI)27540b57cec5SDimitry Andric bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
27550b57cec5SDimitry Andric MachineBasicBlock::iterator &MI) {
27560b57cec5SDimitry Andric assert(MI->mayLoad() && MI->mayStore());
27570b57cec5SDimitry Andric
27580b57cec5SDimitry Andric bool Changed = false;
27590b57cec5SDimitry Andric
27600b57cec5SDimitry Andric if (MOI.isAtomic()) {
2761fe6060f1SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2762fe6060f1SDimitry Andric MOI.getOrdering() == AtomicOrdering::Acquire ||
2763fe6060f1SDimitry Andric MOI.getOrdering() == AtomicOrdering::Release ||
2764fe6060f1SDimitry Andric MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2765fe6060f1SDimitry Andric MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2766fe6060f1SDimitry Andric Changed |= CC->enableRMWCacheBypass(MI, MOI.getScope(),
2767fe6060f1SDimitry Andric MOI.getInstrAddrSpace());
2768fe6060f1SDimitry Andric }
2769fe6060f1SDimitry Andric
27700b57cec5SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Release ||
27710b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
27720b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
27730b57cec5SDimitry Andric MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
2774e8d8bef9SDimitry Andric Changed |= CC->insertRelease(MI, MOI.getScope(),
27750b57cec5SDimitry Andric MOI.getOrderingAddrSpace(),
27760b57cec5SDimitry Andric MOI.getIsCrossAddressSpaceOrdering(),
27770b57cec5SDimitry Andric Position::BEFORE);
27780b57cec5SDimitry Andric
27790b57cec5SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Acquire ||
27800b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
27810b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
27820b57cec5SDimitry Andric MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
27830b57cec5SDimitry Andric MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
27840b57cec5SDimitry Andric Changed |= CC->insertWait(MI, MOI.getScope(),
2785fe6060f1SDimitry Andric MOI.getInstrAddrSpace(),
27860b57cec5SDimitry Andric isAtomicRet(*MI) ? SIMemOp::LOAD :
27870b57cec5SDimitry Andric SIMemOp::STORE,
27880b57cec5SDimitry Andric MOI.getIsCrossAddressSpaceOrdering(),
27890b57cec5SDimitry Andric Position::AFTER);
2790e8d8bef9SDimitry Andric Changed |= CC->insertAcquire(MI, MOI.getScope(),
27910b57cec5SDimitry Andric MOI.getOrderingAddrSpace(),
27920b57cec5SDimitry Andric Position::AFTER);
27930b57cec5SDimitry Andric }
27940b57cec5SDimitry Andric
27950b57cec5SDimitry Andric return Changed;
27960b57cec5SDimitry Andric }
27970b57cec5SDimitry Andric
27980b57cec5SDimitry Andric return Changed;
27990b57cec5SDimitry Andric }
28000b57cec5SDimitry Andric
runOnMachineFunction(MachineFunction & MF)28010b57cec5SDimitry Andric bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
28020b57cec5SDimitry Andric bool Changed = false;
28030b57cec5SDimitry Andric
2804*0fca6ea1SDimitry Andric const MachineModuleInfo &MMI =
2805*0fca6ea1SDimitry Andric getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
2806*0fca6ea1SDimitry Andric
2807*0fca6ea1SDimitry Andric SIMemOpAccess MOA(MMI.getObjFileInfo<AMDGPUMachineModuleInfo>());
28080b57cec5SDimitry Andric CC = SICacheControl::create(MF.getSubtarget<GCNSubtarget>());
28090b57cec5SDimitry Andric
28100b57cec5SDimitry Andric for (auto &MBB : MF) {
28110b57cec5SDimitry Andric for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
28125ffd83dbSDimitry Andric
2813e8d8bef9SDimitry Andric // Unbundle instructions after the post-RA scheduler.
2814fe6060f1SDimitry Andric if (MI->isBundle() && MI->mayLoadOrStore()) {
28155ffd83dbSDimitry Andric MachineBasicBlock::instr_iterator II(MI->getIterator());
28165ffd83dbSDimitry Andric for (MachineBasicBlock::instr_iterator I = ++II, E = MBB.instr_end();
28175ffd83dbSDimitry Andric I != E && I->isBundledWithPred(); ++I) {
28185ffd83dbSDimitry Andric I->unbundleFromPred();
28195ffd83dbSDimitry Andric for (MachineOperand &MO : I->operands())
28205ffd83dbSDimitry Andric if (MO.isReg())
28215ffd83dbSDimitry Andric MO.setIsInternalRead(false);
28225ffd83dbSDimitry Andric }
28235ffd83dbSDimitry Andric
28245ffd83dbSDimitry Andric MI->eraseFromParent();
28255ffd83dbSDimitry Andric MI = II->getIterator();
28265ffd83dbSDimitry Andric }
28275ffd83dbSDimitry Andric
28280b57cec5SDimitry Andric if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
28290b57cec5SDimitry Andric continue;
28300b57cec5SDimitry Andric
28310b57cec5SDimitry Andric if (const auto &MOI = MOA.getLoadInfo(MI))
2832bdd1243dSDimitry Andric Changed |= expandLoad(*MOI, MI);
283306c3fb27SDimitry Andric else if (const auto &MOI = MOA.getStoreInfo(MI)) {
2834bdd1243dSDimitry Andric Changed |= expandStore(*MOI, MI);
283506c3fb27SDimitry Andric Changed |= CC->tryForceStoreSC0SC1(*MOI, MI);
283606c3fb27SDimitry Andric } else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
2837bdd1243dSDimitry Andric Changed |= expandAtomicFence(*MOI, MI);
28380b57cec5SDimitry Andric else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
2839bdd1243dSDimitry Andric Changed |= expandAtomicCmpxchgOrRmw(*MOI, MI);
28400b57cec5SDimitry Andric }
28410b57cec5SDimitry Andric }
28420b57cec5SDimitry Andric
28430b57cec5SDimitry Andric Changed |= removeAtomicPseudoMIs();
28440b57cec5SDimitry Andric return Changed;
28450b57cec5SDimitry Andric }
28460b57cec5SDimitry Andric
28470b57cec5SDimitry Andric INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
28480b57cec5SDimitry Andric
28490b57cec5SDimitry Andric char SIMemoryLegalizer::ID = 0;
28500b57cec5SDimitry Andric char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID;
28510b57cec5SDimitry Andric
createSIMemoryLegalizerPass()28520b57cec5SDimitry Andric FunctionPass *llvm::createSIMemoryLegalizerPass() {
28530b57cec5SDimitry Andric return new SIMemoryLegalizer();
28540b57cec5SDimitry Andric }
2855