17a6dacacSDimitry Andric //===-- AMDGPUMarkLastScratchLoad.cpp -------------------------------------===//
27a6dacacSDimitry Andric //
37a6dacacSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47a6dacacSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
57a6dacacSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67a6dacacSDimitry Andric //
77a6dacacSDimitry Andric //===----------------------------------------------------------------------===//
87a6dacacSDimitry Andric //
97a6dacacSDimitry Andric // Mark scratch load/spill instructions which are guaranteed to be the last time
107a6dacacSDimitry Andric // this scratch slot is used so it can be evicted from caches.
117a6dacacSDimitry Andric //
127a6dacacSDimitry Andric // TODO: Handle general stack accesses not just spilling.
137a6dacacSDimitry Andric //
147a6dacacSDimitry Andric //===----------------------------------------------------------------------===//
157a6dacacSDimitry Andric
167a6dacacSDimitry Andric #include "AMDGPU.h"
177a6dacacSDimitry Andric #include "GCNSubtarget.h"
187a6dacacSDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
197a6dacacSDimitry Andric #include "llvm/CodeGen/LiveStacks.h"
207a6dacacSDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
217a6dacacSDimitry Andric
227a6dacacSDimitry Andric using namespace llvm;
237a6dacacSDimitry Andric
247a6dacacSDimitry Andric #define DEBUG_TYPE "amdgpu-mark-last-scratch-load"
257a6dacacSDimitry Andric
267a6dacacSDimitry Andric namespace {
277a6dacacSDimitry Andric
287a6dacacSDimitry Andric class AMDGPUMarkLastScratchLoad : public MachineFunctionPass {
297a6dacacSDimitry Andric private:
307a6dacacSDimitry Andric LiveStacks *LS = nullptr;
317a6dacacSDimitry Andric LiveIntervals *LIS = nullptr;
327a6dacacSDimitry Andric SlotIndexes *SI = nullptr;
337a6dacacSDimitry Andric const SIInstrInfo *SII = nullptr;
347a6dacacSDimitry Andric
357a6dacacSDimitry Andric public:
367a6dacacSDimitry Andric static char ID;
377a6dacacSDimitry Andric
AMDGPUMarkLastScratchLoad()387a6dacacSDimitry Andric AMDGPUMarkLastScratchLoad() : MachineFunctionPass(ID) {
397a6dacacSDimitry Andric initializeAMDGPUMarkLastScratchLoadPass(*PassRegistry::getPassRegistry());
407a6dacacSDimitry Andric }
417a6dacacSDimitry Andric
427a6dacacSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
437a6dacacSDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const447a6dacacSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
45*0fca6ea1SDimitry Andric AU.addRequired<SlotIndexesWrapperPass>();
46*0fca6ea1SDimitry Andric AU.addRequired<LiveIntervalsWrapperPass>();
477a6dacacSDimitry Andric AU.addRequired<LiveStacks>();
487a6dacacSDimitry Andric AU.setPreservesAll();
497a6dacacSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
507a6dacacSDimitry Andric }
517a6dacacSDimitry Andric
getPassName() const527a6dacacSDimitry Andric StringRef getPassName() const override {
537a6dacacSDimitry Andric return "AMDGPU Mark Last Scratch Load";
547a6dacacSDimitry Andric }
557a6dacacSDimitry Andric };
567a6dacacSDimitry Andric
577a6dacacSDimitry Andric } // end anonymous namespace
587a6dacacSDimitry Andric
runOnMachineFunction(MachineFunction & MF)597a6dacacSDimitry Andric bool AMDGPUMarkLastScratchLoad::runOnMachineFunction(MachineFunction &MF) {
607a6dacacSDimitry Andric if (skipFunction(MF.getFunction()))
617a6dacacSDimitry Andric return false;
627a6dacacSDimitry Andric
637a6dacacSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
647a6dacacSDimitry Andric if (ST.getGeneration() < AMDGPUSubtarget::GFX12)
657a6dacacSDimitry Andric return false;
667a6dacacSDimitry Andric
677a6dacacSDimitry Andric LS = &getAnalysis<LiveStacks>();
68*0fca6ea1SDimitry Andric LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
69*0fca6ea1SDimitry Andric SI = &getAnalysis<SlotIndexesWrapperPass>().getSI();
707a6dacacSDimitry Andric SII = ST.getInstrInfo();
717a6dacacSDimitry Andric SlotIndexes &Slots = *LIS->getSlotIndexes();
727a6dacacSDimitry Andric
737a6dacacSDimitry Andric const unsigned NumSlots = LS->getNumIntervals();
747a6dacacSDimitry Andric if (NumSlots == 0) {
757a6dacacSDimitry Andric LLVM_DEBUG(dbgs() << "No live slots, skipping\n");
767a6dacacSDimitry Andric return false;
777a6dacacSDimitry Andric }
787a6dacacSDimitry Andric
797a6dacacSDimitry Andric LLVM_DEBUG(dbgs() << LS->getNumIntervals() << " intervals\n");
807a6dacacSDimitry Andric
817a6dacacSDimitry Andric bool Changed = false;
827a6dacacSDimitry Andric
837a6dacacSDimitry Andric for (auto &[SS, LI] : *LS) {
847a6dacacSDimitry Andric for (const LiveRange::Segment &Segment : LI.segments) {
857a6dacacSDimitry Andric
867a6dacacSDimitry Andric // Ignore segments that run to the end of basic block because in this case
877a6dacacSDimitry Andric // slot is still live at the end of it.
887a6dacacSDimitry Andric if (Segment.end.isBlock())
897a6dacacSDimitry Andric continue;
907a6dacacSDimitry Andric
917a6dacacSDimitry Andric const int FrameIndex = Register::stackSlot2Index(LI.reg());
927a6dacacSDimitry Andric MachineInstr *LastLoad = nullptr;
937a6dacacSDimitry Andric
947a6dacacSDimitry Andric MachineInstr *MISegmentEnd = SI->getInstructionFromIndex(Segment.end);
957a6dacacSDimitry Andric
967a6dacacSDimitry Andric // If there is no instruction at this slot because it was deleted take the
977a6dacacSDimitry Andric // instruction from the next slot.
987a6dacacSDimitry Andric if (!MISegmentEnd) {
997a6dacacSDimitry Andric SlotIndex NextSlot = Slots.getNextNonNullIndex(Segment.end);
1007a6dacacSDimitry Andric MISegmentEnd = SI->getInstructionFromIndex(NextSlot);
1017a6dacacSDimitry Andric }
1027a6dacacSDimitry Andric
1037a6dacacSDimitry Andric MachineInstr *MISegmentStart = SI->getInstructionFromIndex(Segment.start);
1047a6dacacSDimitry Andric MachineBasicBlock *BB = MISegmentEnd->getParent();
1057a6dacacSDimitry Andric
1067a6dacacSDimitry Andric // Start iteration backwards from segment end until the start of basic
1077a6dacacSDimitry Andric // block or start of segment if it is in the same basic block.
1087a6dacacSDimitry Andric auto End = BB->rend();
1097a6dacacSDimitry Andric if (MISegmentStart && MISegmentStart->getParent() == BB)
1107a6dacacSDimitry Andric End = MISegmentStart->getReverseIterator();
1117a6dacacSDimitry Andric
1127a6dacacSDimitry Andric for (auto MI = MISegmentEnd->getReverseIterator(); MI != End; ++MI) {
1137a6dacacSDimitry Andric int LoadFI = 0;
1147a6dacacSDimitry Andric
1157a6dacacSDimitry Andric if (SII->isLoadFromStackSlot(*MI, LoadFI) && LoadFI == FrameIndex) {
1167a6dacacSDimitry Andric LastLoad = &*MI;
1177a6dacacSDimitry Andric break;
1187a6dacacSDimitry Andric }
1197a6dacacSDimitry Andric }
1207a6dacacSDimitry Andric
1217a6dacacSDimitry Andric if (LastLoad && !LastLoad->memoperands_empty()) {
1227a6dacacSDimitry Andric MachineMemOperand *MMO = *LastLoad->memoperands_begin();
1237a6dacacSDimitry Andric MMO->setFlags(MOLastUse);
1247a6dacacSDimitry Andric Changed = true;
1257a6dacacSDimitry Andric LLVM_DEBUG(dbgs() << " Found last load: " << *LastLoad);
1267a6dacacSDimitry Andric }
1277a6dacacSDimitry Andric }
1287a6dacacSDimitry Andric }
1297a6dacacSDimitry Andric
1307a6dacacSDimitry Andric return Changed;
1317a6dacacSDimitry Andric }
1327a6dacacSDimitry Andric
1337a6dacacSDimitry Andric char AMDGPUMarkLastScratchLoad::ID = 0;
1347a6dacacSDimitry Andric
1357a6dacacSDimitry Andric char &llvm::AMDGPUMarkLastScratchLoadID = AMDGPUMarkLastScratchLoad::ID;
1367a6dacacSDimitry Andric
1377a6dacacSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUMarkLastScratchLoad, DEBUG_TYPE,
1387a6dacacSDimitry Andric "AMDGPU Mark last scratch load", false, false)
139*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
1407a6dacacSDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveStacks)
1417a6dacacSDimitry Andric INITIALIZE_PASS_END(AMDGPUMarkLastScratchLoad, DEBUG_TYPE,
1427a6dacacSDimitry Andric "AMDGPU Mark last scratch load", false, false)
143