//===-- RISCVInsertWriteVXRM.cpp - Insert Write of RISC-V VXRM CSR --------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass inserts writes to the VXRM CSR as needed by vector instructions. // Each instruction that uses VXRM carries an operand that contains its required // VXRM value. This pass tries to optimize placement to avoid redundant writes // to VXRM. // // This is done using 2 dataflow algorithms. The first is a forward data flow // to calculate where a VXRM value is available. The second is a backwards // dataflow to determine where a VXRM value is anticipated. // // Finally, we use the results of these two dataflows to insert VXRM writes // where a value is anticipated, but not available. // // FIXME: This pass does not split critical edges, so there can still be some // redundancy. // // FIXME: If we are willing to have writes that aren't always needed, we could // reduce the number of VXRM writes in some cases. //===----------------------------------------------------------------------===// #include "MCTargetDesc/RISCVBaseInfo.h" #include "RISCV.h" #include "RISCVSubtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include using namespace llvm; #define DEBUG_TYPE "riscv-insert-write-vxrm" #define RISCV_INSERT_WRITE_VXRM_NAME "RISC-V Insert Write VXRM Pass" namespace { class VXRMInfo { uint8_t VXRMImm = 0; enum : uint8_t { Uninitialized, Static, Unknown, } State = Uninitialized; public: VXRMInfo() {} static VXRMInfo getUnknown() { VXRMInfo Info; Info.setUnknown(); return Info; } bool isValid() const { return State != Uninitialized; } void setUnknown() { State = Unknown; } bool isUnknown() const { return State == Unknown; } bool isStatic() const { return State == Static; } void setVXRMImm(unsigned Imm) { assert(Imm <= 3 && "Unexpected VXRM value"); VXRMImm = Imm; State = Static; } unsigned getVXRMImm() const { assert(isStatic() && VXRMImm <= 3 && "Unexpected state"); return VXRMImm; } bool operator==(const VXRMInfo &Other) const { // Uninitialized is only equal to another Uninitialized. if (State != Other.State) return false; if (isStatic()) return VXRMImm == Other.VXRMImm; assert((isValid() || isUnknown()) && "Unexpected state"); return true; } bool operator!=(const VXRMInfo &Other) const { return !(*this == Other); } // Calculate the VXRMInfo visible to a block assuming this and Other are // both predecessors. VXRMInfo intersect(const VXRMInfo &Other) const { // If the new value isn't valid, ignore it. if (!Other.isValid()) return *this; // If this value isn't valid, this must be the first predecessor, use it. if (!isValid()) return Other; // If either is unknown, the result is unknown. if (isUnknown() || Other.isUnknown()) return VXRMInfo::getUnknown(); // If we have an exact match, return this. if (*this == Other) return *this; // Otherwise the result is unknown. return VXRMInfo::getUnknown(); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Support for debugging, callable in GDB: V->dump() LLVM_DUMP_METHOD void dump() const { print(dbgs()); dbgs() << "\n"; } void print(raw_ostream &OS) const { OS << '{'; if (!isValid()) OS << "Uninitialized"; else if (isUnknown()) OS << "Unknown"; else OS << getVXRMImm(); OS << '}'; } #endif }; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_ATTRIBUTE_USED inline raw_ostream &operator<<(raw_ostream &OS, const VXRMInfo &V) { V.print(OS); return OS; } #endif struct BlockData { // Indicates if the block uses VXRM. Uninitialized means no use. VXRMInfo VXRMUse; // Indicates the VXRM output from the block. Unitialized means transparent. VXRMInfo VXRMOut; // Keeps track of the available VXRM value at the start of the basic bloc. VXRMInfo AvailableIn; // Keeps track of the available VXRM value at the end of the basic block. VXRMInfo AvailableOut; // Keeps track of what VXRM is anticipated at the start of the basic block. VXRMInfo AnticipatedIn; // Keeps track of what VXRM is anticipated at the end of the basic block. VXRMInfo AnticipatedOut; // Keeps track of whether the block is already in the queue. bool InQueue; BlockData() = default; }; class RISCVInsertWriteVXRM : public MachineFunctionPass { const TargetInstrInfo *TII; std::vector BlockInfo; std::queue WorkList; public: static char ID; RISCVInsertWriteVXRM() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } StringRef getPassName() const override { return RISCV_INSERT_WRITE_VXRM_NAME; } private: bool computeVXRMChanges(const MachineBasicBlock &MBB); void computeAvailable(const MachineBasicBlock &MBB); void computeAnticipated(const MachineBasicBlock &MBB); void emitWriteVXRM(MachineBasicBlock &MBB); }; } // end anonymous namespace char RISCVInsertWriteVXRM::ID = 0; INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME, false, false) static bool ignoresVXRM(const MachineInstr &MI) { switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { default: return false; case RISCV::VNCLIP_WI: case RISCV::VNCLIPU_WI: return MI.getOperand(3).getImm() == 0; } } bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) { BlockData &BBInfo = BlockInfo[MBB.getNumber()]; bool NeedVXRMWrite = false; for (const MachineInstr &MI : MBB) { int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc()); if (VXRMIdx >= 0 && !ignoresVXRM(MI)) { unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm(); if (!BBInfo.VXRMUse.isValid()) BBInfo.VXRMUse.setVXRMImm(NewVXRMImm); BBInfo.VXRMOut.setVXRMImm(NewVXRMImm); NeedVXRMWrite = true; continue; } if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VXRM, /*TRI=*/nullptr)) { if (!BBInfo.VXRMUse.isValid()) BBInfo.VXRMUse.setUnknown(); BBInfo.VXRMOut.setUnknown(); } } return NeedVXRMWrite; } void RISCVInsertWriteVXRM::computeAvailable(const MachineBasicBlock &MBB) { BlockData &BBInfo = BlockInfo[MBB.getNumber()]; BBInfo.InQueue = false; VXRMInfo Available; if (MBB.pred_empty()) { Available.setUnknown(); } else { for (const MachineBasicBlock *P : MBB.predecessors()) Available = Available.intersect(BlockInfo[P->getNumber()].AvailableOut); } // If we don't have any valid available info, wait until we do. if (!Available.isValid()) return; if (Available != BBInfo.AvailableIn) { BBInfo.AvailableIn = Available; LLVM_DEBUG(dbgs() << "AvailableIn state of " << printMBBReference(MBB) << " changed to " << BBInfo.AvailableIn << "\n"); } if (BBInfo.VXRMOut.isValid()) Available = BBInfo.VXRMOut; if (Available == BBInfo.AvailableOut) return; BBInfo.AvailableOut = Available; LLVM_DEBUG(dbgs() << "AvailableOut state of " << printMBBReference(MBB) << " changed to " << BBInfo.AvailableOut << "\n"); // Add the successors to the work list so that we can propagate. for (MachineBasicBlock *S : MBB.successors()) { if (!BlockInfo[S->getNumber()].InQueue) { BlockInfo[S->getNumber()].InQueue = true; WorkList.push(S); } } } void RISCVInsertWriteVXRM::computeAnticipated(const MachineBasicBlock &MBB) { BlockData &BBInfo = BlockInfo[MBB.getNumber()]; BBInfo.InQueue = false; VXRMInfo Anticipated; if (MBB.succ_empty()) { Anticipated.setUnknown(); } else { for (const MachineBasicBlock *S : MBB.successors()) Anticipated = Anticipated.intersect(BlockInfo[S->getNumber()].AnticipatedIn); } // If we don't have any valid anticipated info, wait until we do. if (!Anticipated.isValid()) return; if (Anticipated != BBInfo.AnticipatedOut) { BBInfo.AnticipatedOut = Anticipated; LLVM_DEBUG(dbgs() << "AnticipatedOut state of " << printMBBReference(MBB) << " changed to " << BBInfo.AnticipatedOut << "\n"); } // If this block reads VXRM, copy it. if (BBInfo.VXRMUse.isValid()) Anticipated = BBInfo.VXRMUse; if (Anticipated == BBInfo.AnticipatedIn) return; BBInfo.AnticipatedIn = Anticipated; LLVM_DEBUG(dbgs() << "AnticipatedIn state of " << printMBBReference(MBB) << " changed to " << BBInfo.AnticipatedIn << "\n"); // Add the predecessors to the work list so that we can propagate. for (MachineBasicBlock *P : MBB.predecessors()) { if (!BlockInfo[P->getNumber()].InQueue) { BlockInfo[P->getNumber()].InQueue = true; WorkList.push(P); } } } void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) { const BlockData &BBInfo = BlockInfo[MBB.getNumber()]; VXRMInfo Info = BBInfo.AvailableIn; // Flag to indicates we need to insert a VXRM write. We want to delay it as // late as possible in this block. bool PendingInsert = false; // Insert VXRM write if anticipated and not available. if (BBInfo.AnticipatedIn.isStatic()) { // If this is the entry block and the value is anticipated, insert. if (MBB.isEntryBlock()) { PendingInsert = true; } else { // Search for any predecessors that wouldn't satisfy our requirement and // insert a write VXRM if needed. // NOTE: If one predecessor is able to provide the requirement, but // another isn't, it means we have a critical edge. The better placement // would be to split the critical edge. for (MachineBasicBlock *P : MBB.predecessors()) { const BlockData &PInfo = BlockInfo[P->getNumber()]; // If it's available out of the predecessor, then we're ok. if (PInfo.AvailableOut.isStatic() && PInfo.AvailableOut.getVXRMImm() == BBInfo.AnticipatedIn.getVXRMImm()) continue; // If the predecessor anticipates this value for all its succesors, // then a write to VXRM would have already occured before this block is // executed. if (PInfo.AnticipatedOut.isStatic() && PInfo.AnticipatedOut.getVXRMImm() == BBInfo.AnticipatedIn.getVXRMImm()) continue; PendingInsert = true; break; } } Info = BBInfo.AnticipatedIn; } for (MachineInstr &MI : MBB) { int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc()); if (VXRMIdx >= 0 && !ignoresVXRM(MI)) { unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm(); if (PendingInsert || !Info.isStatic() || Info.getVXRMImm() != NewVXRMImm) { assert((!PendingInsert || (Info.isStatic() && Info.getVXRMImm() == NewVXRMImm)) && "Pending VXRM insertion mismatch"); LLVM_DEBUG(dbgs() << "Inserting before "; MI.print(dbgs())); BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteVXRMImm)) .addImm(NewVXRMImm); PendingInsert = false; } MI.addOperand(MachineOperand::CreateReg(RISCV::VXRM, /*IsDef*/ false, /*IsImp*/ true)); Info.setVXRMImm(NewVXRMImm); continue; } if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VXRM, /*TRI=*/nullptr)) Info.setUnknown(); } // If all our successors anticipate a value, do the insert. // NOTE: It's possible that not all predecessors of our successor provide the // correct value. This can occur on critical edges. If we don't split the // critical edge we'll also have a write vxrm in the succesor that is // redundant with this one. if (PendingInsert || (BBInfo.AnticipatedOut.isStatic() && (!Info.isStatic() || Info.getVXRMImm() != BBInfo.AnticipatedOut.getVXRMImm()))) { assert((!PendingInsert || (Info.isStatic() && BBInfo.AnticipatedOut.isStatic() && Info.getVXRMImm() == BBInfo.AnticipatedOut.getVXRMImm())) && "Pending VXRM insertion mismatch"); LLVM_DEBUG(dbgs() << "Inserting at end of " << printMBBReference(MBB) << " changing to " << BBInfo.AnticipatedOut << "\n"); BuildMI(MBB, MBB.getFirstTerminator(), DebugLoc(), TII->get(RISCV::WriteVXRMImm)) .addImm(BBInfo.AnticipatedOut.getVXRMImm()); } } bool RISCVInsertWriteVXRM::runOnMachineFunction(MachineFunction &MF) { // Skip if the vector extension is not enabled. const RISCVSubtarget &ST = MF.getSubtarget(); if (!ST.hasVInstructions()) return false; TII = ST.getInstrInfo(); assert(BlockInfo.empty() && "Expect empty block infos"); BlockInfo.resize(MF.getNumBlockIDs()); // Phase 1 - collect block information. bool NeedVXRMChange = false; for (const MachineBasicBlock &MBB : MF) NeedVXRMChange |= computeVXRMChanges(MBB); if (!NeedVXRMChange) { BlockInfo.clear(); return false; } // Phase 2 - Compute available VXRM using a forward walk. for (const MachineBasicBlock &MBB : MF) { WorkList.push(&MBB); BlockInfo[MBB.getNumber()].InQueue = true; } while (!WorkList.empty()) { const MachineBasicBlock &MBB = *WorkList.front(); WorkList.pop(); computeAvailable(MBB); } // Phase 3 - Compute anticipated VXRM using a backwards walk. for (const MachineBasicBlock &MBB : llvm::reverse(MF)) { WorkList.push(&MBB); BlockInfo[MBB.getNumber()].InQueue = true; } while (!WorkList.empty()) { const MachineBasicBlock &MBB = *WorkList.front(); WorkList.pop(); computeAnticipated(MBB); } // Phase 4 - Emit VXRM writes at the earliest place possible. for (MachineBasicBlock &MBB : MF) emitWriteVXRM(MBB); BlockInfo.clear(); return true; } FunctionPass *llvm::createRISCVInsertWriteVXRMPass() { return new RISCVInsertWriteVXRM(); }