Lines Matching full:mi
222 /// Reports unsupported message \p Msg for \p MI to LLVM context.
223 void reportUnsupported(const MachineBasicBlock::iterator &MI,
236 /// \returns Info constructed from \p MI, which has at least machine memory
239 constructFromMIWithMMO(const MachineBasicBlock::iterator &MI) const;
246 /// \returns Load info if \p MI is a load operation, "std::nullopt" otherwise.
248 getLoadInfo(const MachineBasicBlock::iterator &MI) const;
250 /// \returns Store info if \p MI is a store operation, "std::nullopt"
253 getStoreInfo(const MachineBasicBlock::iterator &MI) const;
255 /// \returns Atomic fence info if \p MI is an atomic fence operation,
258 getAtomicFenceInfo(const MachineBasicBlock::iterator &MI) const;
260 /// \returns Atomic cmpxchg/rmw info if \p MI is an atomic cmpxchg or
263 getAtomicCmpxchgOrRmwInfo(const MachineBasicBlock::iterator &MI) const;
282 /// Sets named bit \p BitName to "true" if present in instruction \p MI.
283 /// \returns Returns true if \p MI is modified, false otherwise.
284 bool enableNamedBit(const MachineBasicBlock::iterator MI,
292 /// Update \p MI memory load instruction to bypass any caches up to
295 virtual bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
299 /// Update \p MI memory store instruction to bypass any caches up to
302 virtual bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
306 /// Update \p MI memory read-modify-write instruction to bypass any caches up
309 virtual bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
313 /// Update \p MI memory instruction of kind \p Op associated with address
316 virtual bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
322 virtual bool expandSystemScopeStore(MachineBasicBlock::iterator &MI) const { in expandSystemScopeStore()
327 /// to instruction \p MI to ensure memory instructions before \p Pos of kind
333 virtual bool insertWait(MachineBasicBlock::iterator &MI,
341 /// instruction \p MI to ensure any subsequent memory instructions of this
345 virtual bool insertAcquire(MachineBasicBlock::iterator &MI,
351 /// instruction \p MI to ensure previous memory instructions by this thread
356 virtual bool insertRelease(MachineBasicBlock::iterator &MI,
366 MachineBasicBlock::iterator &MI) const { in tryForceStoreSC0SC1()
374 /// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
376 bool enableGLCBit(const MachineBasicBlock::iterator &MI) const { in enableGLCBit()
377 return enableNamedBit(MI, AMDGPU::CPol::GLC); in enableGLCBit()
380 /// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
382 bool enableSLCBit(const MachineBasicBlock::iterator &MI) const { in enableSLCBit()
383 return enableNamedBit(MI, AMDGPU::CPol::SLC); in enableSLCBit()
390 bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
394 bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
398 bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
402 bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
407 bool insertWait(MachineBasicBlock::iterator &MI,
414 bool insertAcquire(MachineBasicBlock::iterator &MI,
419 bool insertRelease(MachineBasicBlock::iterator &MI,
431 bool insertAcquire(MachineBasicBlock::iterator &MI,
443 bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
447 bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
451 bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
455 bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
460 bool insertWait(MachineBasicBlock::iterator &MI,
467 bool insertAcquire(MachineBasicBlock::iterator &MI,
472 bool insertRelease(MachineBasicBlock::iterator &MI,
482 /// Sets SC0 bit to "true" if present in \p MI. Returns true if \p MI
484 bool enableSC0Bit(const MachineBasicBlock::iterator &MI) const { in enableSC0Bit()
485 return enableNamedBit(MI, AMDGPU::CPol::SC0); in enableSC0Bit()
488 /// Sets SC1 bit to "true" if present in \p MI. Returns true if \p MI
490 bool enableSC1Bit(const MachineBasicBlock::iterator &MI) const { in enableSC1Bit()
491 return enableNamedBit(MI, AMDGPU::CPol::SC1); in enableSC1Bit()
494 /// Sets NT bit to "true" if present in \p MI. Returns true if \p MI
496 bool enableNTBit(const MachineBasicBlock::iterator &MI) const { in enableNTBit()
497 return enableNamedBit(MI, AMDGPU::CPol::NT); in enableNTBit()
504 bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
508 bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
512 bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
516 bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
521 bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
524 bool insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
529 MachineBasicBlock::iterator &MI) const override { in tryForceStoreSC0SC1()
536 Changed |= enableSC0Bit(MI); in tryForceStoreSC0SC1()
537 Changed |= enableSC1Bit(MI); in tryForceStoreSC0SC1()
546 /// Sets DLC bit to "true" if present in \p MI. Returns true if \p MI
548 bool enableDLCBit(const MachineBasicBlock::iterator &MI) const { in enableDLCBit()
549 return enableNamedBit(MI, AMDGPU::CPol::DLC); in enableDLCBit()
556 bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
560 bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
565 bool insertWait(MachineBasicBlock::iterator &MI,
572 bool insertAcquire(MachineBasicBlock::iterator &MI,
582 bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
586 bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
594 // Sets TH policy to \p Value if CPol operand is present in instruction \p MI.
595 // \returns Returns true if \p MI is modified, false otherwise.
596 bool setTH(const MachineBasicBlock::iterator MI,
599 // MI. \returns Returns true if \p MI is modified, false otherwise.
600 bool setScope(const MachineBasicBlock::iterator MI,
610 insertWaitsBeforeSystemScopeStore(const MachineBasicBlock::iterator MI) const;
612 bool setAtomicScope(const MachineBasicBlock::iterator &MI,
618 bool insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
622 bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
625 bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
630 bool expandSystemScopeStore(MachineBasicBlock::iterator &MI) const override;
632 bool insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
636 bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI, in enableLoadCacheBypass() argument
639 return setAtomicScope(MI, Scope, AddrSpace); in enableLoadCacheBypass()
642 bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI, in enableStoreCacheBypass() argument
645 return setAtomicScope(MI, Scope, AddrSpace); in enableStoreCacheBypass()
648 bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI, in enableRMWCacheBypass() argument
651 return setAtomicScope(MI, Scope, AddrSpace); in enableRMWCacheBypass()
664 /// Return true iff instruction \p MI is a atomic instruction that
666 bool isAtomicRet(const MachineInstr &MI) const { in isAtomicRet()
667 return SIInstrInfo::isAtomicRet(MI); in isAtomicRet()
674 /// Expands load operation \p MI. Returns true if instructions are
675 /// added/deleted or \p MI is modified, false otherwise.
677 MachineBasicBlock::iterator &MI);
678 /// Expands store operation \p MI. Returns true if instructions are
679 /// added/deleted or \p MI is modified, false otherwise.
681 MachineBasicBlock::iterator &MI);
682 /// Expands atomic fence operation \p MI. Returns true if
683 /// instructions are added/deleted or \p MI is modified, false otherwise.
685 MachineBasicBlock::iterator &MI);
686 /// Expands atomic cmpxchg or rmw operation \p MI. Returns true if
687 /// instructions are added/deleted or \p MI is modified, false otherwise.
689 MachineBasicBlock::iterator &MI);
713 void diagnoseUnknownMMRAASName(const MachineInstr &MI, StringRef AS) { in diagnoseUnknownMMRAASName() argument
714 const MachineFunction *MF = MI.getMF(); in diagnoseUnknownMMRAASName()
722 DiagnosticInfoUnsupported BadTag(Fn, Str.str(), MI.getDebugLoc(), DS_Warning); in diagnoseUnknownMMRAASName()
726 /// Reads \p MI's MMRAs to parse the "amdgpu-as" MMRA.
729 static SIAtomicAddrSpace getFenceAddrSpaceMMRA(const MachineInstr &MI, in getFenceAddrSpaceMMRA() argument
733 auto MMRA = MMRAMetadata(MI.getMMRAMetadata()); in getFenceAddrSpaceMMRA()
745 diagnoseUnknownMMRAASName(MI, Suffix); in getFenceAddrSpaceMMRA()
753 void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI, in reportUnsupported() argument
755 const Function &Func = MI->getParent()->getParent()->getFunction(); in reportUnsupported()
756 DiagnosticInfoUnsupported Diag(Func, Msg, MI->getDebugLoc()); in reportUnsupported()
813 const MachineBasicBlock::iterator &MI) const { in constructFromMIWithMMO()
814 assert(MI->getNumMemOperands() > 0); in constructFromMIWithMMO()
826 for (const auto &MMO : MI->memoperands()) { in constructFromMIWithMMO()
837 reportUnsupported(MI, in constructFromMIWithMMO()
857 reportUnsupported(MI, "Unsupported atomic synchronization scope"); in constructFromMIWithMMO()
865 reportUnsupported(MI, "Unsupported atomic address space"); in constructFromMIWithMMO()
875 SIMemOpAccess::getLoadInfo(const MachineBasicBlock::iterator &MI) const { in getLoadInfo()
876 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); in getLoadInfo()
878 if (!(MI->mayLoad() && !MI->mayStore())) in getLoadInfo()
882 if (MI->getNumMemOperands() == 0) in getLoadInfo()
885 return constructFromMIWithMMO(MI); in getLoadInfo()
889 SIMemOpAccess::getStoreInfo(const MachineBasicBlock::iterator &MI) const { in getStoreInfo()
890 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); in getStoreInfo()
892 if (!(!MI->mayLoad() && MI->mayStore())) in getStoreInfo()
896 if (MI->getNumMemOperands() == 0) in getStoreInfo()
899 return constructFromMIWithMMO(MI); in getStoreInfo()
903 SIMemOpAccess::getAtomicFenceInfo(const MachineBasicBlock::iterator &MI) const { in getAtomicFenceInfo()
904 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); in getAtomicFenceInfo()
906 if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE) in getAtomicFenceInfo()
910 static_cast<AtomicOrdering>(MI->getOperand(0).getImm()); in getAtomicFenceInfo()
912 SyncScope::ID SSID = static_cast<SyncScope::ID>(MI->getOperand(1).getImm()); in getAtomicFenceInfo()
915 reportUnsupported(MI, "Unsupported atomic synchronization scope"); in getAtomicFenceInfo()
927 reportUnsupported(MI, "Unsupported atomic address space"); in getAtomicFenceInfo()
936 const MachineBasicBlock::iterator &MI) const { in getAtomicCmpxchgOrRmwInfo()
937 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); in getAtomicCmpxchgOrRmwInfo()
939 if (!(MI->mayLoad() && MI->mayStore())) in getAtomicCmpxchgOrRmwInfo()
943 if (MI->getNumMemOperands() == 0) in getAtomicCmpxchgOrRmwInfo()
946 return constructFromMIWithMMO(MI); in getAtomicCmpxchgOrRmwInfo()
955 bool SICacheControl::enableNamedBit(const MachineBasicBlock::iterator MI, in enableNamedBit() argument
957 MachineOperand *CPol = TII->getNamedOperand(*MI, AMDGPU::OpName::cpol); in enableNamedBit()
984 const MachineBasicBlock::iterator &MI, in enableLoadCacheBypass() argument
987 assert(MI->mayLoad() && !MI->mayStore()); in enableLoadCacheBypass()
996 Changed |= enableGLCBit(MI); in enableLoadCacheBypass()
1019 const MachineBasicBlock::iterator &MI, in enableStoreCacheBypass() argument
1022 assert(!MI->mayLoad() && MI->mayStore()); in enableStoreCacheBypass()
1032 const MachineBasicBlock::iterator &MI, in enableRMWCacheBypass() argument
1035 assert(MI->mayLoad() && MI->mayStore()); in enableRMWCacheBypass()
1047 MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op, in enableVolatileAndOrNonTemporal() argument
1052 assert(MI->mayLoad() ^ MI->mayStore()); in enableVolatileAndOrNonTemporal()
1067 Changed |= enableGLCBit(MI); in enableVolatileAndOrNonTemporal()
1074 Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false, in enableVolatileAndOrNonTemporal()
1083 Changed |= enableGLCBit(MI); in enableVolatileAndOrNonTemporal()
1084 Changed |= enableSLCBit(MI); in enableVolatileAndOrNonTemporal()
1091 bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI, in insertWait() argument
1099 MachineBasicBlock &MBB = *MI->getParent(); in insertWait()
1100 DebugLoc DL = MI->getDebugLoc(); in insertWait()
1103 ++MI; in insertWait()
1178 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_soft)) in insertWait()
1184 --MI; in insertWait()
1189 bool SIGfx6CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, in insertAcquire() argument
1198 MachineBasicBlock &MBB = *MI->getParent(); in insertAcquire()
1199 DebugLoc DL = MI->getDebugLoc(); in insertAcquire()
1202 ++MI; in insertAcquire()
1208 BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1)); in insertAcquire()
1229 --MI; in insertAcquire()
1234 bool SIGfx6CacheControl::insertRelease(MachineBasicBlock::iterator &MI, in insertRelease() argument
1239 return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE, in insertRelease()
1243 bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, in insertAcquire() argument
1252 MachineBasicBlock &MBB = *MI->getParent(); in insertAcquire()
1253 DebugLoc DL = MI->getDebugLoc(); in insertAcquire()
1262 ++MI; in insertAcquire()
1268 BuildMI(MBB, MI, DL, TII->get(InvalidateL1)); in insertAcquire()
1289 --MI; in insertAcquire()
1295 const MachineBasicBlock::iterator &MI, in enableLoadCacheBypass() argument
1298 assert(MI->mayLoad() && !MI->mayStore()); in enableLoadCacheBypass()
1307 Changed |= enableGLCBit(MI); in enableLoadCacheBypass()
1315 Changed |= enableGLCBit(MI); in enableLoadCacheBypass()
1337 const MachineBasicBlock::iterator &MI, in enableStoreCacheBypass() argument
1340 assert(!MI->mayLoad() && MI->mayStore()); in enableStoreCacheBypass()
1372 const MachineBasicBlock::iterator &MI, in enableRMWCacheBypass() argument
1375 assert(MI->mayLoad() && MI->mayStore()); in enableRMWCacheBypass()
1400 MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op, in enableVolatileAndOrNonTemporal() argument
1405 assert(MI->mayLoad() ^ MI->mayStore()); in enableVolatileAndOrNonTemporal()
1420 Changed |= enableGLCBit(MI); in enableVolatileAndOrNonTemporal()
1427 Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false, in enableVolatileAndOrNonTemporal()
1436 Changed |= enableGLCBit(MI); in enableVolatileAndOrNonTemporal()
1437 Changed |= enableSLCBit(MI); in enableVolatileAndOrNonTemporal()
1444 bool SIGfx90ACacheControl::insertWait(MachineBasicBlock::iterator &MI, in insertWait() argument
1468 return SIGfx7CacheControl::insertWait(MI, Scope, AddrSpace, Op, in insertWait()
1472 bool SIGfx90ACacheControl::insertAcquire(MachineBasicBlock::iterator &MI, in insertAcquire() argument
1481 MachineBasicBlock &MBB = *MI->getParent(); in insertAcquire()
1482 DebugLoc DL = MI->getDebugLoc(); in insertAcquire()
1485 ++MI; in insertAcquire()
1493 BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INVL2)); in insertAcquire()
1531 --MI; in insertAcquire()
1533 Changed |= SIGfx7CacheControl::insertAcquire(MI, Scope, AddrSpace, Pos); in insertAcquire()
1538 bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI, in insertRelease() argument
1545 MachineBasicBlock &MBB = *MI->getParent(); in insertRelease()
1546 const DebugLoc &DL = MI->getDebugLoc(); in insertRelease()
1549 ++MI; in insertRelease()
1560 BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2)) in insertRelease()
1579 --MI; in insertRelease()
1582 SIGfx7CacheControl::insertRelease(MI, Scope, AddrSpace, in insertRelease()
1589 const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, in enableLoadCacheBypass() argument
1591 assert(MI->mayLoad() && !MI->mayStore()); in enableLoadCacheBypass()
1598 Changed |= enableSC0Bit(MI); in enableLoadCacheBypass()
1599 Changed |= enableSC1Bit(MI); in enableLoadCacheBypass()
1603 Changed |= enableSC1Bit(MI); in enableLoadCacheBypass()
1611 Changed |= enableSC0Bit(MI); in enableLoadCacheBypass()
1633 const MachineBasicBlock::iterator &MI, in enableStoreCacheBypass() argument
1635 assert(!MI->mayLoad() && MI->mayStore()); in enableStoreCacheBypass()
1642 Changed |= enableSC0Bit(MI); in enableStoreCacheBypass()
1643 Changed |= enableSC1Bit(MI); in enableStoreCacheBypass()
1647 Changed |= enableSC1Bit(MI); in enableStoreCacheBypass()
1651 Changed |= enableSC0Bit(MI); in enableStoreCacheBypass()
1673 const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, in enableRMWCacheBypass() argument
1675 assert(MI->mayLoad() && MI->mayStore()); in enableRMWCacheBypass()
1682 Changed |= enableSC1Bit(MI); in enableRMWCacheBypass()
1702 MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op, in enableVolatileAndOrNonTemporal() argument
1707 assert(MI->mayLoad() ^ MI->mayStore()); in enableVolatileAndOrNonTemporal()
1719 Changed |= enableSC0Bit(MI); in enableVolatileAndOrNonTemporal()
1720 Changed |= enableSC1Bit(MI); in enableVolatileAndOrNonTemporal()
1727 Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false, in enableVolatileAndOrNonTemporal()
1734 Changed |= enableNTBit(MI); in enableVolatileAndOrNonTemporal()
1741 bool SIGfx940CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, in insertAcquire() argument
1750 MachineBasicBlock &MBB = *MI->getParent(); in insertAcquire()
1751 DebugLoc DL = MI->getDebugLoc(); in insertAcquire()
1754 ++MI; in insertAcquire()
1762 BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV)) in insertAcquire()
1776 BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV)) in insertAcquire()
1794 BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV)) in insertAcquire()
1822 --MI; in insertAcquire()
1827 bool SIGfx940CacheControl::insertRelease(MachineBasicBlock::iterator &MI, in insertRelease() argument
1834 MachineBasicBlock &MBB = *MI->getParent(); in insertRelease()
1835 DebugLoc DL = MI->getDebugLoc(); in insertRelease()
1838 ++MI; in insertRelease()
1849 BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2)) in insertRelease()
1858 BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2)) in insertRelease()
1880 --MI; in insertRelease()
1884 Changed |= insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE, in insertRelease()
1891 const MachineBasicBlock::iterator &MI, in enableLoadCacheBypass() argument
1894 assert(MI->mayLoad() && !MI->mayStore()); in enableLoadCacheBypass()
1903 Changed |= enableGLCBit(MI); in enableLoadCacheBypass()
1904 Changed |= enableDLCBit(MI); in enableLoadCacheBypass()
1912 Changed |= enableGLCBit(MI); in enableLoadCacheBypass()
1934 MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op, in enableVolatileAndOrNonTemporal() argument
1940 assert(MI->mayLoad() ^ MI->mayStore()); in enableVolatileAndOrNonTemporal()
1955 Changed |= enableGLCBit(MI); in enableVolatileAndOrNonTemporal()
1956 Changed |= enableDLCBit(MI); in enableVolatileAndOrNonTemporal()
1964 Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false, in enableVolatileAndOrNonTemporal()
1975 Changed |= enableGLCBit(MI); in enableVolatileAndOrNonTemporal()
1976 Changed |= enableSLCBit(MI); in enableVolatileAndOrNonTemporal()
1984 bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI, in insertWait() argument
1992 MachineBasicBlock &MBB = *MI->getParent(); in insertWait()
1993 DebugLoc DL = MI->getDebugLoc(); in insertWait()
1996 ++MI; in insertWait()
2087 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_soft)) in insertWait()
2093 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT_soft)) in insertWait()
2100 --MI; in insertWait()
2105 bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, in insertAcquire() argument
2114 MachineBasicBlock &MBB = *MI->getParent(); in insertAcquire()
2115 DebugLoc DL = MI->getDebugLoc(); in insertAcquire()
2118 ++MI; in insertAcquire()
2127 BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL1_INV)); in insertAcquire()
2128 BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV)); in insertAcquire()
2137 BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV)); in insertAcquire()
2158 --MI; in insertAcquire()
2164 const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, in enableLoadCacheBypass() argument
2166 assert(MI->mayLoad() && !MI->mayStore()); in enableLoadCacheBypass()
2175 Changed |= enableGLCBit(MI); in enableLoadCacheBypass()
2183 Changed |= enableGLCBit(MI); in enableLoadCacheBypass()
2205 MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op, in enableVolatileAndOrNonTemporal() argument
2211 assert(MI->mayLoad() ^ MI->mayStore()); in enableVolatileAndOrNonTemporal()
2226 Changed |= enableGLCBit(MI); in enableVolatileAndOrNonTemporal()
2229 Changed |= enableDLCBit(MI); in enableVolatileAndOrNonTemporal()
2236 Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false, in enableVolatileAndOrNonTemporal()
2247 Changed |= enableGLCBit(MI); in enableVolatileAndOrNonTemporal()
2248 Changed |= enableSLCBit(MI); in enableVolatileAndOrNonTemporal()
2251 Changed |= enableDLCBit(MI); in enableVolatileAndOrNonTemporal()
2258 bool SIGfx12CacheControl::setTH(const MachineBasicBlock::iterator MI, in setTH() argument
2260 MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol); in setTH()
2273 bool SIGfx12CacheControl::setScope(const MachineBasicBlock::iterator MI, in setScope() argument
2275 MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol); in setScope()
2289 const MachineBasicBlock::iterator MI) const { in insertWaitsBeforeSystemScopeStore()
2292 MachineBasicBlock &MBB = *MI->getParent(); in insertWaitsBeforeSystemScopeStore()
2293 const DebugLoc &DL = MI->getDebugLoc(); in insertWaitsBeforeSystemScopeStore()
2295 BuildMI(MBB, MI, DL, TII->get(S_WAIT_LOADCNT_soft)).addImm(0); in insertWaitsBeforeSystemScopeStore()
2296 BuildMI(MBB, MI, DL, TII->get(S_WAIT_SAMPLECNT_soft)).addImm(0); in insertWaitsBeforeSystemScopeStore()
2297 BuildMI(MBB, MI, DL, TII->get(S_WAIT_BVHCNT_soft)).addImm(0); in insertWaitsBeforeSystemScopeStore()
2298 BuildMI(MBB, MI, DL, TII->get(S_WAIT_KMCNT_soft)).addImm(0); in insertWaitsBeforeSystemScopeStore()
2299 BuildMI(MBB, MI, DL, TII->get(S_WAIT_STORECNT_soft)).addImm(0); in insertWaitsBeforeSystemScopeStore()
2304 bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI, in insertWait() argument
2311 MachineBasicBlock &MBB = *MI->getParent(); in insertWait()
2312 DebugLoc DL = MI->getDebugLoc(); in insertWait()
2319 ++MI; in insertWait()
2378 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_BVHCNT_soft)).addImm(0); in insertWait()
2379 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_SAMPLECNT_soft)).addImm(0); in insertWait()
2380 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_LOADCNT_soft)).addImm(0); in insertWait()
2385 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_STORECNT_soft)).addImm(0); in insertWait()
2390 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_DSCNT_soft)).addImm(0); in insertWait()
2395 --MI; in insertWait()
2400 bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, in insertAcquire() argument
2407 MachineBasicBlock &MBB = *MI->getParent(); in insertAcquire()
2408 DebugLoc DL = MI->getDebugLoc(); in insertAcquire()
2446 ++MI; in insertAcquire()
2448 BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_INV)).addImm(ScopeImm); in insertAcquire()
2451 --MI; in insertAcquire()
2456 bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI, in insertRelease() argument
2461 MachineBasicBlock &MBB = *MI->getParent(); in insertRelease()
2462 DebugLoc DL = MI->getDebugLoc(); in insertRelease()
2474 ++MI; in insertRelease()
2508 BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_WB)).addImm(ScopeImm); in insertRelease()
2511 --MI; in insertRelease()
2516 insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE, in insertRelease()
2523 MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op, in enableVolatileAndOrNonTemporal() argument
2527 assert(MI->mayLoad() ^ MI->mayStore()); in enableVolatileAndOrNonTemporal()
2539 Changed |= setTH(MI, AMDGPU::CPol::TH_LU); in enableVolatileAndOrNonTemporal()
2542 Changed |= setTH(MI, AMDGPU::CPol::TH_NT); in enableVolatileAndOrNonTemporal()
2546 Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SYS); in enableVolatileAndOrNonTemporal()
2549 Changed |= insertWaitsBeforeSystemScopeStore(MI); in enableVolatileAndOrNonTemporal()
2556 Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false, in enableVolatileAndOrNonTemporal()
2564 MachineBasicBlock::iterator &MI) const { in expandSystemScopeStore()
2565 MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol); in expandSystemScopeStore()
2567 return insertWaitsBeforeSystemScopeStore(MI); in expandSystemScopeStore()
2572 bool SIGfx12CacheControl::setAtomicScope(const MachineBasicBlock::iterator &MI, in setAtomicScope() argument
2580 Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SYS); in setAtomicScope()
2583 Changed |= setScope(MI, AMDGPU::CPol::SCOPE_DEV); in setAtomicScope()
2589 Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SE); in setAtomicScope()
2614 for (auto &MI : AtomicPseudoMIs) in removeAtomicPseudoMIs() local
2615 MI->eraseFromParent(); in removeAtomicPseudoMIs()
2622 MachineBasicBlock::iterator &MI) { in expandLoad() argument
2623 assert(MI->mayLoad() && !MI->mayStore()); in expandLoad()
2631 Changed |= CC->enableLoadCacheBypass(MI, MOI.getScope(), in expandLoad()
2636 Changed |= CC->insertWait(MI, MOI.getScope(), in expandLoad()
2644 Changed |= CC->insertWait(MI, MOI.getScope(), in expandLoad()
2649 Changed |= CC->insertAcquire(MI, MOI.getScope(), in expandLoad()
2661 MI, MOI.getInstrAddrSpace(), SIMemOp::LOAD, MOI.isVolatile(), in expandLoad()
2668 MachineBasicBlock::iterator &MI) { in expandStore() argument
2669 assert(!MI->mayLoad() && MI->mayStore()); in expandStore()
2677 Changed |= CC->enableStoreCacheBypass(MI, MOI.getScope(), in expandStore()
2683 Changed |= CC->insertRelease(MI, MOI.getScope(), in expandStore()
2695 MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(), in expandStore()
2700 Changed |= CC->expandSystemScopeStore(MI); in expandStore()
2705 MachineBasicBlock::iterator &MI) { in expandAtomicFence() argument
2706 assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE); in expandAtomicFence()
2708 AtomicPseudoMIs.push_back(MI); in expandAtomicFence()
2715 getFenceAddrSpaceMMRA(*MI, MOI.getOrderingAddrSpace()); in expandAtomicFence()
2720 MI, MOI.getScope(), OrderingAddrSpace, SIMemOp::LOAD | SIMemOp::STORE, in expandAtomicFence()
2733 Changed |= CC->insertRelease(MI, MOI.getScope(), OrderingAddrSpace, in expandAtomicFence()
2745 Changed |= CC->insertAcquire(MI, MOI.getScope(), OrderingAddrSpace, in expandAtomicFence()
2755 MachineBasicBlock::iterator &MI) { in expandAtomicCmpxchgOrRmw() argument
2756 assert(MI->mayLoad() && MI->mayStore()); in expandAtomicCmpxchgOrRmw()
2766 Changed |= CC->enableRMWCacheBypass(MI, MOI.getScope(), in expandAtomicCmpxchgOrRmw()
2774 Changed |= CC->insertRelease(MI, MOI.getScope(), in expandAtomicCmpxchgOrRmw()
2784 Changed |= CC->insertWait(MI, MOI.getScope(), in expandAtomicCmpxchgOrRmw()
2786 isAtomicRet(*MI) ? SIMemOp::LOAD : in expandAtomicCmpxchgOrRmw()
2790 Changed |= CC->insertAcquire(MI, MOI.getScope(), in expandAtomicCmpxchgOrRmw()
2811 for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { in runOnMachineFunction() local
2814 if (MI->isBundle() && MI->mayLoadOrStore()) { in runOnMachineFunction()
2815 MachineBasicBlock::instr_iterator II(MI->getIterator()); in runOnMachineFunction()
2824 MI->eraseFromParent(); in runOnMachineFunction()
2825 MI = II->getIterator(); in runOnMachineFunction()
2828 if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic)) in runOnMachineFunction()
2831 if (const auto &MOI = MOA.getLoadInfo(MI)) in runOnMachineFunction()
2832 Changed |= expandLoad(*MOI, MI); in runOnMachineFunction()
2833 else if (const auto &MOI = MOA.getStoreInfo(MI)) { in runOnMachineFunction()
2834 Changed |= expandStore(*MOI, MI); in runOnMachineFunction()
2835 Changed |= CC->tryForceStoreSC0SC1(*MOI, MI); in runOnMachineFunction()
2836 } else if (const auto &MOI = MOA.getAtomicFenceInfo(MI)) in runOnMachineFunction()
2837 Changed |= expandAtomicFence(*MOI, MI); in runOnMachineFunction()
2838 else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI)) in runOnMachineFunction()
2839 Changed |= expandAtomicCmpxchgOrRmw(*MOI, MI); in runOnMachineFunction()