//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements a function pass that inserts VSETVLI instructions where // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL // instructions. // // This pass consists of 3 phases: // // Phase 1 collects how each basic block affects VL/VTYPE. // // Phase 2 uses the information from phase 1 to do a data flow analysis to // propagate the VL/VTYPE changes through the function. This gives us the // VL/VTYPE at the start of each basic block. // // Phase 3 inserts VSETVLI instructions in each basic block. Information from // phase 2 is used to prevent inserting a VSETVLI before the first vector // instruction in the block if possible. // //===----------------------------------------------------------------------===// #include "RISCV.h" #include "RISCVSubtarget.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include using namespace llvm; #define DEBUG_TYPE "riscv-insert-vsetvli" #define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass" static cl::opt DisableInsertVSETVLPHIOpt( "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, cl::desc("Disable looking through phis when inserting vsetvlis.")); static cl::opt UseStrictAsserts( "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden, cl::desc("Enable strict assertion checking for the dataflow algorithm")); namespace { static unsigned getVLOpNum(const MachineInstr &MI) { return RISCVII::getVLOpNum(MI.getDesc()); } static unsigned getSEWOpNum(const MachineInstr &MI) { return RISCVII::getSEWOpNum(MI.getDesc()); } static bool isScalarMoveInstr(const MachineInstr &MI) { switch (MI.getOpcode()) { default: return false; case RISCV::PseudoVMV_S_X_M1: case RISCV::PseudoVMV_S_X_M2: case RISCV::PseudoVMV_S_X_M4: case RISCV::PseudoVMV_S_X_M8: case RISCV::PseudoVMV_S_X_MF2: case RISCV::PseudoVMV_S_X_MF4: case RISCV::PseudoVMV_S_X_MF8: case RISCV::PseudoVFMV_S_F16_M1: case RISCV::PseudoVFMV_S_F16_M2: case RISCV::PseudoVFMV_S_F16_M4: case RISCV::PseudoVFMV_S_F16_M8: case RISCV::PseudoVFMV_S_F16_MF2: case RISCV::PseudoVFMV_S_F16_MF4: case RISCV::PseudoVFMV_S_F32_M1: case RISCV::PseudoVFMV_S_F32_M2: case RISCV::PseudoVFMV_S_F32_M4: case RISCV::PseudoVFMV_S_F32_M8: case RISCV::PseudoVFMV_S_F32_MF2: case RISCV::PseudoVFMV_S_F64_M1: case RISCV::PseudoVFMV_S_F64_M2: case RISCV::PseudoVFMV_S_F64_M4: case RISCV::PseudoVFMV_S_F64_M8: return true; } } /// Get the EEW for a load or store instruction. Return None if MI is not /// a load or store which ignores SEW. static Optional getEEWForLoadStore(const MachineInstr &MI) { switch (MI.getOpcode()) { default: return None; case RISCV::PseudoVLE8_V_M1: case RISCV::PseudoVLE8_V_M1_MASK: case RISCV::PseudoVLE8_V_M2: case RISCV::PseudoVLE8_V_M2_MASK: case RISCV::PseudoVLE8_V_M4: case RISCV::PseudoVLE8_V_M4_MASK: case RISCV::PseudoVLE8_V_M8: case RISCV::PseudoVLE8_V_M8_MASK: case RISCV::PseudoVLE8_V_MF2: case RISCV::PseudoVLE8_V_MF2_MASK: case RISCV::PseudoVLE8_V_MF4: case RISCV::PseudoVLE8_V_MF4_MASK: case RISCV::PseudoVLE8_V_MF8: case RISCV::PseudoVLE8_V_MF8_MASK: case RISCV::PseudoVLSE8_V_M1: case RISCV::PseudoVLSE8_V_M1_MASK: case RISCV::PseudoVLSE8_V_M2: case RISCV::PseudoVLSE8_V_M2_MASK: case RISCV::PseudoVLSE8_V_M4: case RISCV::PseudoVLSE8_V_M4_MASK: case RISCV::PseudoVLSE8_V_M8: case RISCV::PseudoVLSE8_V_M8_MASK: case RISCV::PseudoVLSE8_V_MF2: case RISCV::PseudoVLSE8_V_MF2_MASK: case RISCV::PseudoVLSE8_V_MF4: case RISCV::PseudoVLSE8_V_MF4_MASK: case RISCV::PseudoVLSE8_V_MF8: case RISCV::PseudoVLSE8_V_MF8_MASK: case RISCV::PseudoVSE8_V_M1: case RISCV::PseudoVSE8_V_M1_MASK: case RISCV::PseudoVSE8_V_M2: case RISCV::PseudoVSE8_V_M2_MASK: case RISCV::PseudoVSE8_V_M4: case RISCV::PseudoVSE8_V_M4_MASK: case RISCV::PseudoVSE8_V_M8: case RISCV::PseudoVSE8_V_M8_MASK: case RISCV::PseudoVSE8_V_MF2: case RISCV::PseudoVSE8_V_MF2_MASK: case RISCV::PseudoVSE8_V_MF4: case RISCV::PseudoVSE8_V_MF4_MASK: case RISCV::PseudoVSE8_V_MF8: case RISCV::PseudoVSE8_V_MF8_MASK: case RISCV::PseudoVSSE8_V_M1: case RISCV::PseudoVSSE8_V_M1_MASK: case RISCV::PseudoVSSE8_V_M2: case RISCV::PseudoVSSE8_V_M2_MASK: case RISCV::PseudoVSSE8_V_M4: case RISCV::PseudoVSSE8_V_M4_MASK: case RISCV::PseudoVSSE8_V_M8: case RISCV::PseudoVSSE8_V_M8_MASK: case RISCV::PseudoVSSE8_V_MF2: case RISCV::PseudoVSSE8_V_MF2_MASK: case RISCV::PseudoVSSE8_V_MF4: case RISCV::PseudoVSSE8_V_MF4_MASK: case RISCV::PseudoVSSE8_V_MF8: case RISCV::PseudoVSSE8_V_MF8_MASK: return 8; case RISCV::PseudoVLE16_V_M1: case RISCV::PseudoVLE16_V_M1_MASK: case RISCV::PseudoVLE16_V_M2: case RISCV::PseudoVLE16_V_M2_MASK: case RISCV::PseudoVLE16_V_M4: case RISCV::PseudoVLE16_V_M4_MASK: case RISCV::PseudoVLE16_V_M8: case RISCV::PseudoVLE16_V_M8_MASK: case RISCV::PseudoVLE16_V_MF2: case RISCV::PseudoVLE16_V_MF2_MASK: case RISCV::PseudoVLE16_V_MF4: case RISCV::PseudoVLE16_V_MF4_MASK: case RISCV::PseudoVLSE16_V_M1: case RISCV::PseudoVLSE16_V_M1_MASK: case RISCV::PseudoVLSE16_V_M2: case RISCV::PseudoVLSE16_V_M2_MASK: case RISCV::PseudoVLSE16_V_M4: case RISCV::PseudoVLSE16_V_M4_MASK: case RISCV::PseudoVLSE16_V_M8: case RISCV::PseudoVLSE16_V_M8_MASK: case RISCV::PseudoVLSE16_V_MF2: case RISCV::PseudoVLSE16_V_MF2_MASK: case RISCV::PseudoVLSE16_V_MF4: case RISCV::PseudoVLSE16_V_MF4_MASK: case RISCV::PseudoVSE16_V_M1: case RISCV::PseudoVSE16_V_M1_MASK: case RISCV::PseudoVSE16_V_M2: case RISCV::PseudoVSE16_V_M2_MASK: case RISCV::PseudoVSE16_V_M4: case RISCV::PseudoVSE16_V_M4_MASK: case RISCV::PseudoVSE16_V_M8: case RISCV::PseudoVSE16_V_M8_MASK: case RISCV::PseudoVSE16_V_MF2: case RISCV::PseudoVSE16_V_MF2_MASK: case RISCV::PseudoVSE16_V_MF4: case RISCV::PseudoVSE16_V_MF4_MASK: case RISCV::PseudoVSSE16_V_M1: case RISCV::PseudoVSSE16_V_M1_MASK: case RISCV::PseudoVSSE16_V_M2: case RISCV::PseudoVSSE16_V_M2_MASK: case RISCV::PseudoVSSE16_V_M4: case RISCV::PseudoVSSE16_V_M4_MASK: case RISCV::PseudoVSSE16_V_M8: case RISCV::PseudoVSSE16_V_M8_MASK: case RISCV::PseudoVSSE16_V_MF2: case RISCV::PseudoVSSE16_V_MF2_MASK: case RISCV::PseudoVSSE16_V_MF4: case RISCV::PseudoVSSE16_V_MF4_MASK: return 16; case RISCV::PseudoVLE32_V_M1: case RISCV::PseudoVLE32_V_M1_MASK: case RISCV::PseudoVLE32_V_M2: case RISCV::PseudoVLE32_V_M2_MASK: case RISCV::PseudoVLE32_V_M4: case RISCV::PseudoVLE32_V_M4_MASK: case RISCV::PseudoVLE32_V_M8: case RISCV::PseudoVLE32_V_M8_MASK: case RISCV::PseudoVLE32_V_MF2: case RISCV::PseudoVLE32_V_MF2_MASK: case RISCV::PseudoVLSE32_V_M1: case RISCV::PseudoVLSE32_V_M1_MASK: case RISCV::PseudoVLSE32_V_M2: case RISCV::PseudoVLSE32_V_M2_MASK: case RISCV::PseudoVLSE32_V_M4: case RISCV::PseudoVLSE32_V_M4_MASK: case RISCV::PseudoVLSE32_V_M8: case RISCV::PseudoVLSE32_V_M8_MASK: case RISCV::PseudoVLSE32_V_MF2: case RISCV::PseudoVLSE32_V_MF2_MASK: case RISCV::PseudoVSE32_V_M1: case RISCV::PseudoVSE32_V_M1_MASK: case RISCV::PseudoVSE32_V_M2: case RISCV::PseudoVSE32_V_M2_MASK: case RISCV::PseudoVSE32_V_M4: case RISCV::PseudoVSE32_V_M4_MASK: case RISCV::PseudoVSE32_V_M8: case RISCV::PseudoVSE32_V_M8_MASK: case RISCV::PseudoVSE32_V_MF2: case RISCV::PseudoVSE32_V_MF2_MASK: case RISCV::PseudoVSSE32_V_M1: case RISCV::PseudoVSSE32_V_M1_MASK: case RISCV::PseudoVSSE32_V_M2: case RISCV::PseudoVSSE32_V_M2_MASK: case RISCV::PseudoVSSE32_V_M4: case RISCV::PseudoVSSE32_V_M4_MASK: case RISCV::PseudoVSSE32_V_M8: case RISCV::PseudoVSSE32_V_M8_MASK: case RISCV::PseudoVSSE32_V_MF2: case RISCV::PseudoVSSE32_V_MF2_MASK: return 32; case RISCV::PseudoVLE64_V_M1: case RISCV::PseudoVLE64_V_M1_MASK: case RISCV::PseudoVLE64_V_M2: case RISCV::PseudoVLE64_V_M2_MASK: case RISCV::PseudoVLE64_V_M4: case RISCV::PseudoVLE64_V_M4_MASK: case RISCV::PseudoVLE64_V_M8: case RISCV::PseudoVLE64_V_M8_MASK: case RISCV::PseudoVLSE64_V_M1: case RISCV::PseudoVLSE64_V_M1_MASK: case RISCV::PseudoVLSE64_V_M2: case RISCV::PseudoVLSE64_V_M2_MASK: case RISCV::PseudoVLSE64_V_M4: case RISCV::PseudoVLSE64_V_M4_MASK: case RISCV::PseudoVLSE64_V_M8: case RISCV::PseudoVLSE64_V_M8_MASK: case RISCV::PseudoVSE64_V_M1: case RISCV::PseudoVSE64_V_M1_MASK: case RISCV::PseudoVSE64_V_M2: case RISCV::PseudoVSE64_V_M2_MASK: case RISCV::PseudoVSE64_V_M4: case RISCV::PseudoVSE64_V_M4_MASK: case RISCV::PseudoVSE64_V_M8: case RISCV::PseudoVSE64_V_M8_MASK: case RISCV::PseudoVSSE64_V_M1: case RISCV::PseudoVSSE64_V_M1_MASK: case RISCV::PseudoVSSE64_V_M2: case RISCV::PseudoVSSE64_V_M2_MASK: case RISCV::PseudoVSSE64_V_M4: case RISCV::PseudoVSSE64_V_M4_MASK: case RISCV::PseudoVSSE64_V_M8: case RISCV::PseudoVSSE64_V_M8_MASK: return 64; } } /// Return true if this is an operation on mask registers. Note that /// this includes both arithmetic/logical ops and load/store (vlm/vsm). static bool isMaskRegOp(const MachineInstr &MI) { if (RISCVII::hasSEWOp(MI.getDesc().TSFlags)) { const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); // A Log2SEW of 0 is an operation on mask registers only. return Log2SEW == 0; } return false; } static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) { unsigned LMul; bool Fractional; std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul); // Convert LMul to a fixed point value with 3 fractional bits. LMul = Fractional ? (8 / LMul) : (LMul * 8); assert(SEW >= 8 && "Unexpected SEW value"); return (SEW * 8) / LMul; } /// Which subfields of VL or VTYPE have values we need to preserve? struct DemandedFields { bool VL = false; bool SEW = false; bool LMUL = false; bool SEWLMULRatio = false; bool TailPolicy = false; bool MaskPolicy = false; // Return true if any part of VTYPE was used bool usedVTYPE() { return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy; } // Mark all VTYPE subfields and properties as demanded void demandVTYPE() { SEW = true; LMUL = true; SEWLMULRatio = true; TailPolicy = true; MaskPolicy = true; } }; /// Return true if the two values of the VTYPE register provided are /// indistinguishable from the perspective of an instruction (or set of /// instructions) which use only the Used subfields and properties. static bool areCompatibleVTYPEs(uint64_t VType1, uint64_t VType2, const DemandedFields &Used) { if (Used.SEW && RISCVVType::getSEW(VType1) != RISCVVType::getSEW(VType2)) return false; if (Used.LMUL && RISCVVType::getVLMUL(VType1) != RISCVVType::getVLMUL(VType2)) return false; if (Used.SEWLMULRatio) { auto Ratio1 = getSEWLMULRatio(RISCVVType::getSEW(VType1), RISCVVType::getVLMUL(VType1)); auto Ratio2 = getSEWLMULRatio(RISCVVType::getSEW(VType2), RISCVVType::getVLMUL(VType2)); if (Ratio1 != Ratio2) return false; } if (Used.TailPolicy && RISCVVType::isTailAgnostic(VType1) != RISCVVType::isTailAgnostic(VType2)) return false; if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(VType1) != RISCVVType::isMaskAgnostic(VType2)) return false; return true; } /// Return the fields and properties demanded by the provided instruction. static DemandedFields getDemanded(const MachineInstr &MI) { // Warning: This function has to work on both the lowered (i.e. post // emitVSETVLIs) and pre-lowering forms. The main implication of this is // that it can't use the value of a SEW, VL, or Policy operand as they might // be stale after lowering. // Most instructions don't use any of these subfeilds. DemandedFields Res; // Start conservative if registers are used if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL)) Res.VL = true; if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE)) Res.demandVTYPE(); // Start conservative on the unlowered form too uint64_t TSFlags = MI.getDesc().TSFlags; if (RISCVII::hasSEWOp(TSFlags)) { Res.demandVTYPE(); if (RISCVII::hasVLOp(TSFlags)) Res.VL = true; } // Loads and stores with implicit EEW do not demand SEW or LMUL directly. // They instead demand the ratio of the two which is used in computing // EMUL, but which allows us the flexibility to change SEW and LMUL // provided we don't change the ratio. // Note: We assume that the instructions initial SEW is the EEW encoded // in the opcode. This is asserted when constructing the VSETVLIInfo. if (getEEWForLoadStore(MI)) { Res.SEW = false; Res.LMUL = false; } // Store instructions don't use the policy fields. if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) { Res.TailPolicy = false; Res.MaskPolicy = false; } // If this is a mask reg operation, it only cares about VLMAX. // TODO: Possible extensions to this logic // * Probably ok if available VLMax is larger than demanded // * The policy bits can probably be ignored.. if (isMaskRegOp(MI)) { Res.SEW = false; Res.LMUL = false; } return Res; } /// Defines the abstract state with which the forward dataflow models the /// values of the VL and VTYPE registers after insertion. class VSETVLIInfo { union { Register AVLReg; unsigned AVLImm; }; enum : uint8_t { Uninitialized, AVLIsReg, AVLIsImm, Unknown, } State = Uninitialized; // Fields from VTYPE. RISCVII::VLMUL VLMul = RISCVII::LMUL_1; uint8_t SEW = 0; uint8_t TailAgnostic : 1; uint8_t MaskAgnostic : 1; uint8_t SEWLMULRatioOnly : 1; public: VSETVLIInfo() : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), SEWLMULRatioOnly(false) {} static VSETVLIInfo getUnknown() { VSETVLIInfo Info; Info.setUnknown(); return Info; } bool isValid() const { return State != Uninitialized; } void setUnknown() { State = Unknown; } bool isUnknown() const { return State == Unknown; } void setAVLReg(Register Reg) { AVLReg = Reg; State = AVLIsReg; } void setAVLImm(unsigned Imm) { AVLImm = Imm; State = AVLIsImm; } bool hasAVLImm() const { return State == AVLIsImm; } bool hasAVLReg() const { return State == AVLIsReg; } Register getAVLReg() const { assert(hasAVLReg()); return AVLReg; } unsigned getAVLImm() const { assert(hasAVLImm()); return AVLImm; } unsigned getSEW() const { return SEW; } RISCVII::VLMUL getVLMUL() const { return VLMul; } bool hasNonZeroAVL() const { if (hasAVLImm()) return getAVLImm() > 0; if (hasAVLReg()) return getAVLReg() == RISCV::X0; return false; } bool hasSameAVL(const VSETVLIInfo &Other) const { assert(isValid() && Other.isValid() && "Can't compare invalid VSETVLIInfos"); assert(!isUnknown() && !Other.isUnknown() && "Can't compare AVL in unknown state"); if (hasAVLReg() && Other.hasAVLReg()) return getAVLReg() == Other.getAVLReg(); if (hasAVLImm() && Other.hasAVLImm()) return getAVLImm() == Other.getAVLImm(); return false; } void setVTYPE(unsigned VType) { assert(isValid() && !isUnknown() && "Can't set VTYPE for uninitialized or unknown"); VLMul = RISCVVType::getVLMUL(VType); SEW = RISCVVType::getSEW(VType); TailAgnostic = RISCVVType::isTailAgnostic(VType); MaskAgnostic = RISCVVType::isMaskAgnostic(VType); } void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) { assert(isValid() && !isUnknown() && "Can't set VTYPE for uninitialized or unknown"); VLMul = L; SEW = S; TailAgnostic = TA; MaskAgnostic = MA; } unsigned encodeVTYPE() const { assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && "Can't encode VTYPE for uninitialized or unknown"); return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); } bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } bool hasSameSEW(const VSETVLIInfo &Other) const { assert(isValid() && Other.isValid() && "Can't compare invalid VSETVLIInfos"); assert(!isUnknown() && !Other.isUnknown() && "Can't compare VTYPE in unknown state"); assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && "Can't compare when only LMUL/SEW ratio is valid."); return SEW == Other.SEW; } bool hasSameVTYPE(const VSETVLIInfo &Other) const { assert(isValid() && Other.isValid() && "Can't compare invalid VSETVLIInfos"); assert(!isUnknown() && !Other.isUnknown() && "Can't compare VTYPE in unknown state"); assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && "Can't compare when only LMUL/SEW ratio is valid."); return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, Other.MaskAgnostic); } unsigned getSEWLMULRatio() const { assert(isValid() && !isUnknown() && "Can't use VTYPE for uninitialized or unknown"); return ::getSEWLMULRatio(SEW, VLMul); } // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. // Note that having the same VLMAX ensures that both share the same // function from AVL to VL; that is, they must produce the same VL value // for any given AVL value. bool hasSameVLMAX(const VSETVLIInfo &Other) const { assert(isValid() && Other.isValid() && "Can't compare invalid VSETVLIInfos"); assert(!isUnknown() && !Other.isUnknown() && "Can't compare VTYPE in unknown state"); return getSEWLMULRatio() == Other.getSEWLMULRatio(); } bool hasSamePolicy(const VSETVLIInfo &Other) const { assert(isValid() && Other.isValid() && "Can't compare invalid VSETVLIInfos"); assert(!isUnknown() && !Other.isUnknown() && "Can't compare VTYPE in unknown state"); return TailAgnostic == Other.TailAgnostic && MaskAgnostic == Other.MaskAgnostic; } bool hasCompatibleVTYPE(const MachineInstr &MI, const VSETVLIInfo &Require) const { const DemandedFields Used = getDemanded(MI); return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used); } // Determine whether the vector instructions requirements represented by // Require are compatible with the previous vsetvli instruction represented // by this. MI is the instruction whose requirements we're considering. bool isCompatible(const MachineInstr &MI, const VSETVLIInfo &Require) const { assert(isValid() && Require.isValid() && "Can't compare invalid VSETVLIInfos"); assert(!Require.SEWLMULRatioOnly && "Expected a valid VTYPE for instruction!"); // Nothing is compatible with Unknown. if (isUnknown() || Require.isUnknown()) return false; // If only our VLMAX ratio is valid, then this isn't compatible. if (SEWLMULRatioOnly) return false; // If the instruction doesn't need an AVLReg and the SEW matches, consider // it compatible. if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister) if (SEW == Require.SEW) return true; return hasSameAVL(Require) && hasCompatibleVTYPE(MI, Require); } bool operator==(const VSETVLIInfo &Other) const { // Uninitialized is only equal to another Uninitialized. if (!isValid()) return !Other.isValid(); if (!Other.isValid()) return !isValid(); // Unknown is only equal to another Unknown. if (isUnknown()) return Other.isUnknown(); if (Other.isUnknown()) return isUnknown(); if (!hasSameAVL(Other)) return false; // If the SEWLMULRatioOnly bits are different, then they aren't equal. if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly) return false; // If only the VLMAX is valid, check that it is the same. if (SEWLMULRatioOnly) return hasSameVLMAX(Other); // If the full VTYPE is valid, check that it is the same. return hasSameVTYPE(Other); } bool operator!=(const VSETVLIInfo &Other) const { return !(*this == Other); } // Calculate the VSETVLIInfo visible to a block assuming this and Other are // both predecessors. VSETVLIInfo intersect(const VSETVLIInfo &Other) const { // If the new value isn't valid, ignore it. if (!Other.isValid()) return *this; // If this value isn't valid, this must be the first predecessor, use it. if (!isValid()) return Other; // If either is unknown, the result is unknown. if (isUnknown() || Other.isUnknown()) return VSETVLIInfo::getUnknown(); // If we have an exact, match return this. if (*this == Other) return *this; // Not an exact match, but maybe the AVL and VLMAX are the same. If so, // return an SEW/LMUL ratio only value. if (hasSameAVL(Other) && hasSameVLMAX(Other)) { VSETVLIInfo MergeInfo = *this; MergeInfo.SEWLMULRatioOnly = true; return MergeInfo; } // Otherwise the result is unknown. return VSETVLIInfo::getUnknown(); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Support for debugging, callable in GDB: V->dump() LLVM_DUMP_METHOD void dump() const { print(dbgs()); dbgs() << "\n"; } /// Implement operator<<. /// @{ void print(raw_ostream &OS) const { OS << "{"; if (!isValid()) OS << "Uninitialized"; if (isUnknown()) OS << "unknown"; if (hasAVLReg()) OS << "AVLReg=" << (unsigned)AVLReg; if (hasAVLImm()) OS << "AVLImm=" << (unsigned)AVLImm; OS << ", " << "VLMul=" << (unsigned)VLMul << ", " << "SEW=" << (unsigned)SEW << ", " << "TailAgnostic=" << (bool)TailAgnostic << ", " << "MaskAgnostic=" << (bool)MaskAgnostic << ", " << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}"; } #endif }; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_ATTRIBUTE_USED inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) { V.print(OS); return OS; } #endif struct BlockData { // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers // made by this block. Calculated in Phase 1. VSETVLIInfo Change; // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this // block. Calculated in Phase 2. VSETVLIInfo Exit; // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor // blocks. Calculated in Phase 2, and used by Phase 3. VSETVLIInfo Pred; // Keeps track of whether the block is already in the queue. bool InQueue = false; BlockData() = default; }; class RISCVInsertVSETVLI : public MachineFunctionPass { const TargetInstrInfo *TII; MachineRegisterInfo *MRI; std::vector BlockInfo; std::queue WorkList; public: static char ID; RISCVInsertVSETVLI() : MachineFunctionPass(ID) { initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } private: bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo) const; bool needVSETVLIPHI(const VSETVLIInfo &Require, const MachineBasicBlock &MBB) const; void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); void insertVSETVLI(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc DL, const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI); void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI); bool computeVLVTYPEChanges(const MachineBasicBlock &MBB); void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); void emitVSETVLIs(MachineBasicBlock &MBB); void doLocalPostpass(MachineBasicBlock &MBB); void doPRE(MachineBasicBlock &MBB); void insertReadVL(MachineBasicBlock &MBB); }; } // end anonymous namespace char RISCVInsertVSETVLI::ID = 0; INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, false, false) static bool isVectorConfigInstr(const MachineInstr &MI) { return MI.getOpcode() == RISCV::PseudoVSETVLI || MI.getOpcode() == RISCV::PseudoVSETVLIX0 || MI.getOpcode() == RISCV::PseudoVSETIVLI; } /// Return true if this is 'vsetvli x0, x0, vtype' which preserves /// VL and only sets VTYPE. static bool isVLPreservingConfig(const MachineInstr &MI) { if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) return false; assert(RISCV::X0 == MI.getOperand(1).getReg()); return RISCV::X0 == MI.getOperand(0).getReg(); } static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, const MachineRegisterInfo *MRI) { VSETVLIInfo InstrInfo; // If the instruction has policy argument, use the argument. // If there is no policy argument, default to tail agnostic unless the // destination is tied to a source. Unless the source is undef. In that case // the user would have some control over the policy values. bool TailAgnostic = true; bool UsesMaskPolicy = RISCVII::usesMaskPolicy(TSFlags); // FIXME: Could we look at the above or below instructions to choose the // matched mask policy to reduce vsetvli instructions? Default mask policy is // agnostic if instructions use mask policy, otherwise is undisturbed. Because // most mask operations are mask undisturbed, so we could possibly reduce the // vsetvli between mask and nomasked instruction sequence. bool MaskAgnostic = UsesMaskPolicy; unsigned UseOpIdx; if (RISCVII::hasVecPolicyOp(TSFlags)) { const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); uint64_t Policy = Op.getImm(); assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && "Invalid Policy Value"); // Although in some cases, mismatched passthru/maskedoff with policy value // does not make sense (ex. tied operand is IMPLICIT_DEF with non-TAMA // policy, or tied operand is not IMPLICIT_DEF with TAMA policy), but users // have set the policy value explicitly, so compiler would not fix it. TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; } else if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) { TailAgnostic = false; if (UsesMaskPolicy) MaskAgnostic = false; // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. const MachineOperand &UseMO = MI.getOperand(UseOpIdx); MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg()); if (UseMI && UseMI->isImplicitDef()) { TailAgnostic = true; if (UsesMaskPolicy) MaskAgnostic = true; } // Some pseudo instructions force a tail agnostic policy despite having a // tied def. if (RISCVII::doesForceTailAgnostic(TSFlags)) TailAgnostic = true; } RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); // A Log2SEW of 0 is an operation on mask registers only. unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); if (RISCVII::hasVLOp(TSFlags)) { const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); if (VLOp.isImm()) { int64_t Imm = VLOp.getImm(); // Conver the VLMax sentintel to X0 register. if (Imm == RISCV::VLMaxSentinel) InstrInfo.setAVLReg(RISCV::X0); else InstrInfo.setAVLImm(Imm); } else { InstrInfo.setAVLReg(VLOp.getReg()); } } else { InstrInfo.setAVLReg(RISCV::NoRegister); } #ifndef NDEBUG if (Optional EEW = getEEWForLoadStore(MI)) { assert(SEW == EEW && "Initial SEW doesn't match expected EEW"); } #endif InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); return InstrInfo; } void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { DebugLoc DL = MI.getDebugLoc(); insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo); } void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc DL, const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same // VLMAX. if (PrevInfo.isValid() && !PrevInfo.isUnknown() && Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) .addReg(RISCV::X0, RegState::Define | RegState::Dead) .addReg(RISCV::X0, RegState::Kill) .addImm(Info.encodeVTYPE()) .addReg(RISCV::VL, RegState::Implicit); return; } if (Info.hasAVLImm()) { BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) .addReg(RISCV::X0, RegState::Define | RegState::Dead) .addImm(Info.getAVLImm()) .addImm(Info.encodeVTYPE()); return; } Register AVLReg = Info.getAVLReg(); if (AVLReg == RISCV::NoRegister) { // We can only use x0, x0 if there's no chance of the vtype change causing // the previous vl to become invalid. if (PrevInfo.isValid() && !PrevInfo.isUnknown() && Info.hasSameVLMAX(PrevInfo)) { BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) .addReg(RISCV::X0, RegState::Define | RegState::Dead) .addReg(RISCV::X0, RegState::Kill) .addImm(Info.encodeVTYPE()) .addReg(RISCV::VL, RegState::Implicit); return; } // Otherwise use an AVL of 0 to avoid depending on previous vl. BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) .addReg(RISCV::X0, RegState::Define | RegState::Dead) .addImm(0) .addImm(Info.encodeVTYPE()); return; } if (AVLReg.isVirtual()) MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass); // Use X0 as the DestReg unless AVLReg is X0. We also need to change the // opcode if the AVLReg is X0 as they have different register classes for // the AVL operand. Register DestReg = RISCV::X0; unsigned Opcode = RISCV::PseudoVSETVLI; if (AVLReg == RISCV::X0) { DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); Opcode = RISCV::PseudoVSETVLIX0; } BuildMI(MBB, InsertPt, DL, TII->get(Opcode)) .addReg(DestReg, RegState::Define | RegState::Dead) .addReg(AVLReg) .addImm(Info.encodeVTYPE()); } // Return a VSETVLIInfo representing the changes made by this VSETVLI or // VSETIVLI instruction. static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { VSETVLIInfo NewInfo; if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { NewInfo.setAVLImm(MI.getOperand(1).getImm()); } else { assert(MI.getOpcode() == RISCV::PseudoVSETVLI || MI.getOpcode() == RISCV::PseudoVSETVLIX0); Register AVLReg = MI.getOperand(1).getReg(); assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && "Can't handle X0, X0 vsetvli yet"); NewInfo.setAVLReg(AVLReg); } NewInfo.setVTYPE(MI.getOperand(2).getImm()); return NewInfo; } /// Return true if a VSETVLI is required to transition from CurInfo to Require /// before MI. bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo) const { assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI)); if (CurInfo.isCompatible(MI, Require)) return false; if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) return true; // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0. // VL=0 is uninteresting (as it should have been deleted already), so it is // compatible if we can prove both are non-zero. Additionally, if writing // to an implicit_def operand, we don't need to preserve any other bits and // are thus compatible with any larger etype, and can disregard policy bits. if (isScalarMoveInstr(MI) && CurInfo.hasNonZeroAVL() && Require.hasNonZeroAVL()) { auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg()); if (VRegDef && VRegDef->isImplicitDef() && CurInfo.getSEW() >= Require.getSEW()) return false; if (CurInfo.hasSameSEW(Require) && CurInfo.hasSamePolicy(Require)) return false; } // We didn't find a compatible value. If our AVL is a virtual register, // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need // and the last VL/VTYPE we observed is the same, we don't need a // VSETVLI here. if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() && CurInfo.hasCompatibleVTYPE(MI, Require)) { if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { if (isVectorConfigInstr(*DefMI)) { VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo)) return false; } } } return true; } // Given an incoming state reaching MI, modifies that state so that it is minimally // compatible with MI. The resulting state is guaranteed to be semantically legal // for MI, but may not be the state requested by MI. void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) { uint64_t TSFlags = MI.getDesc().TSFlags; if (!RISCVII::hasSEWOp(TSFlags)) return; const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info)) return; const VSETVLIInfo PrevInfo = Info; Info = NewInfo; if (!RISCVII::hasVLOp(TSFlags)) return; // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and // VL > 0. We can discard the user requested AVL and just use the last // one if we can prove it equally zero. This removes a vsetvli entirely // if the types match or allows use of cheaper avl preserving variant // if VLMAX doesn't change. If VLMAX might change, we couldn't use // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to // prevent extending live range of an avl register operand. // TODO: We can probably relax this for immediates. if (isScalarMoveInstr(MI) && PrevInfo.isValid() && PrevInfo.hasNonZeroAVL() && Info.hasNonZeroAVL() && Info.hasSameVLMAX(PrevInfo)) { if (PrevInfo.hasAVLImm()) Info.setAVLImm(PrevInfo.getAVLImm()); else Info.setAVLReg(PrevInfo.getAVLReg()); return; } // If AVL is defined by a vsetvli with the same VLMAX, we can // replace the AVL operand with the AVL of the defining vsetvli. // We avoid general register AVLs to avoid extending live ranges // without being sure we can kill the original source reg entirely. if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual()) return; MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg()); if (!DefMI || !isVectorConfigInstr(*DefMI)) return; VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); if (DefInfo.hasSameVLMAX(Info) && (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) { if (DefInfo.hasAVLImm()) Info.setAVLImm(DefInfo.getAVLImm()); else Info.setAVLReg(DefInfo.getAVLReg()); return; } } // Given a state with which we evaluated MI (see transferBefore above for why // this might be different that the state MI requested), modify the state to // reflect the changes MI might make. void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) { if (isVectorConfigInstr(MI)) { Info = getInfoForVSETVLI(MI); return; } if (RISCV::isFaultFirstLoad(MI)) { // Update AVL to vl-output of the fault first load. Info.setAVLReg(MI.getOperand(1).getReg()); return; } // If this is something that updates VL/VTYPE that we don't know about, set // the state to unknown. if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || MI.modifiesRegister(RISCV::VTYPE)) Info = VSETVLIInfo::getUnknown(); } bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { bool HadVectorOp = false; BlockData &BBInfo = BlockInfo[MBB.getNumber()]; BBInfo.Change = BBInfo.Pred; for (const MachineInstr &MI : MBB) { transferBefore(BBInfo.Change, MI); if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags)) HadVectorOp = true; transferAfter(BBInfo.Change, MI); } return HadVectorOp; } void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { BlockData &BBInfo = BlockInfo[MBB.getNumber()]; BBInfo.InQueue = false; VSETVLIInfo InInfo; if (MBB.pred_empty()) { // There are no predecessors, so use the default starting status. InInfo.setUnknown(); } else { for (MachineBasicBlock *P : MBB.predecessors()) InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); } // If we don't have any valid predecessor value, wait until we do. if (!InInfo.isValid()) return; // If no change, no need to rerun block if (InInfo == BBInfo.Pred) return; BBInfo.Pred = InInfo; LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) << " changed to " << BBInfo.Pred << "\n"); // Note: It's tempting to cache the state changes here, but due to the // compatibility checks performed a blocks output state can change based on // the input state. To cache, we'd have to add logic for finding // never-compatible state changes. computeVLVTYPEChanges(MBB); VSETVLIInfo TmpStatus = BBInfo.Change; // If the new exit value matches the old exit value, we don't need to revisit // any blocks. if (BBInfo.Exit == TmpStatus) return; BBInfo.Exit = TmpStatus; LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB) << " changed to " << BBInfo.Exit << "\n"); // Add the successors to the work list so we can propagate the changed exit // status. for (MachineBasicBlock *S : MBB.successors()) if (!BlockInfo[S->getNumber()].InQueue) WorkList.push(S); } // If we weren't able to prove a vsetvli was directly unneeded, it might still // be unneeded if the AVL is a phi node where all incoming values are VL // outputs from the last VSETVLI in their respective basic blocks. bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, const MachineBasicBlock &MBB) const { if (DisableInsertVSETVLPHIOpt) return true; if (!Require.hasAVLReg()) return true; Register AVLReg = Require.getAVLReg(); if (!AVLReg.isVirtual()) return true; // We need the AVL to be produce by a PHI node in this basic block. MachineInstr *PHI = MRI->getVRegDef(AVLReg); if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB) return true; for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps; PHIOp += 2) { Register InReg = PHI->getOperand(PHIOp).getReg(); MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB(); const BlockData &PBBInfo = BlockInfo[PBB->getNumber()]; // If the exit from the predecessor has the VTYPE we are looking for // we might be able to avoid a VSETVLI. if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require)) return true; // We need the PHI input to the be the output of a VSET(I)VLI. MachineInstr *DefMI = MRI->getVRegDef(InReg); if (!DefMI || !isVectorConfigInstr(*DefMI)) return true; // We found a VSET(I)VLI make sure it matches the output of the // predecessor block. VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); if (!DefInfo.hasSameAVL(PBBInfo.Exit) || !DefInfo.hasSameVTYPE(PBBInfo.Exit)) return true; } // If all the incoming values to the PHI checked out, we don't need // to insert a VSETVLI. return false; } void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred; // Track whether the prefix of the block we've scanned is transparent // (meaning has not yet changed the abstract state). bool PrefixTransparent = true; for (MachineInstr &MI : MBB) { const VSETVLIInfo PrevInfo = CurInfo; transferBefore(CurInfo, MI); // If this is an explicit VSETVLI or VSETIVLI, update our state. if (isVectorConfigInstr(MI)) { // Conservatively, mark the VL and VTYPE as live. assert(MI.getOperand(3).getReg() == RISCV::VL && MI.getOperand(4).getReg() == RISCV::VTYPE && "Unexpected operands where VL and VTYPE should be"); MI.getOperand(3).setIsDead(false); MI.getOperand(4).setIsDead(false); PrefixTransparent = false; } uint64_t TSFlags = MI.getDesc().TSFlags; if (RISCVII::hasSEWOp(TSFlags)) { if (PrevInfo != CurInfo) { // If this is the first implicit state change, and the state change // requested can be proven to produce the same register contents, we // can skip emitting the actual state change and continue as if we // had since we know the GPR result of the implicit state change // wouldn't be used and VL/VTYPE registers are correct. Note that // we *do* need to model the state as if it changed as while the // register contents are unchanged, the abstract model can change. if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB)) insertVSETVLI(MBB, MI, CurInfo, PrevInfo); PrefixTransparent = false; } if (RISCVII::hasVLOp(TSFlags)) { MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); if (VLOp.isReg()) { // Erase the AVL operand from the instruction. VLOp.setReg(RISCV::NoRegister); VLOp.setIsKill(false); } MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, /*isImp*/ true)); } MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, /*isImp*/ true)); } if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || MI.modifiesRegister(RISCV::VTYPE)) PrefixTransparent = false; transferAfter(CurInfo, MI); } // If we reach the end of the block and our current info doesn't match the // expected info, insert a vsetvli to correct. if (!UseStrictAsserts) { const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit; if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() && CurInfo != ExitInfo) { // Note there's an implicit assumption here that terminators never use // or modify VL or VTYPE. Also, fallthrough will return end(). auto InsertPt = MBB.getFirstInstrTerminator(); insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo, CurInfo); CurInfo = ExitInfo; } } if (UseStrictAsserts && CurInfo.isValid()) { const auto &Info = BlockInfo[MBB.getNumber()]; if (CurInfo != Info.Exit) { LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n"); LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n"); LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n"); LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n"); } assert(CurInfo == Info.Exit && "InsertVSETVLI dataflow invariant violated"); } } /// Return true if the VL value configured must be equal to the requested one. static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) { if (!Info.hasAVLImm()) // VLMAX is always the same value. // TODO: Could extend to other registers by looking at the associated vreg // def placement. return RISCV::X0 == Info.getAVLReg(); unsigned AVL = Info.getAVLImm(); unsigned SEW = Info.getSEW(); unsigned AVLInBits = AVL * SEW; unsigned LMul; bool Fractional; std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL()); if (Fractional) return ST.getRealMinVLen() / LMul >= AVLInBits; return ST.getRealMinVLen() * LMul >= AVLInBits; } /// Perform simple partial redundancy elimination of the VSETVLI instructions /// we're about to insert by looking for cases where we can PRE from the /// beginning of one block to the end of one of its predecessors. Specifically, /// this is geared to catch the common case of a fixed length vsetvl in a single /// block loop when it could execute once in the preheader instead. void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); const RISCVSubtarget &ST = MF.getSubtarget(); if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) return; MachineBasicBlock *UnavailablePred = nullptr; VSETVLIInfo AvailableInfo; for (MachineBasicBlock *P : MBB.predecessors()) { const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; if (PredInfo.isUnknown()) { if (UnavailablePred) return; UnavailablePred = P; } else if (!AvailableInfo.isValid()) { AvailableInfo = PredInfo; } else if (AvailableInfo != PredInfo) { return; } } // Unreachable, single pred, or full redundancy. Note that FRE is handled by // phase 3. if (!UnavailablePred || !AvailableInfo.isValid()) return; // Critical edge - TODO: consider splitting? if (UnavailablePred->succ_size() != 1) return; // If VL can be less than AVL, then we can't reduce the frequency of exec. if (!hasFixedResult(AvailableInfo, ST)) return; // Does it actually let us remove an implicit transition in MBB? bool Found = false; for (auto &MI : MBB) { if (isVectorConfigInstr(MI)) return; const uint64_t TSFlags = MI.getDesc().TSFlags; if (RISCVII::hasSEWOp(TSFlags)) { if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI)) return; Found = true; break; } } if (!Found) return; // Finally, update both data flow state and insert the actual vsetvli. // Doing both keeps the code in sync with the dataflow results, which // is critical for correctness of phase 3. auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit; LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " << UnavailablePred->getName() << " with state " << AvailableInfo << "\n"); BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; BlockInfo[MBB.getNumber()].Pred = AvailableInfo; // Note there's an implicit assumption here that terminators never use // or modify VL or VTYPE. Also, fallthrough will return end(). auto InsertPt = UnavailablePred->getFirstInstrTerminator(); insertVSETVLI(*UnavailablePred, InsertPt, UnavailablePred->findDebugLoc(InsertPt), AvailableInfo, OldInfo); } static void doUnion(DemandedFields &A, DemandedFields B) { A.VL |= B.VL; A.SEW |= B.SEW; A.LMUL |= B.LMUL; A.SEWLMULRatio |= B.SEWLMULRatio; A.TailPolicy |= B.TailPolicy; A.MaskPolicy |= B.MaskPolicy; } // Return true if we can mutate PrevMI's VTYPE to match MI's // without changing any the fields which have been used. // TODO: Restructure code to allow code reuse between this and isCompatible // above. static bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI, const DemandedFields &Used) { // TODO: Extend this to handle cases where VL does change, but VL // has not been used. (e.g. over a vmv.x.s) if (!isVLPreservingConfig(MI)) // Note: `vsetvli x0, x0, vtype' is the canonical instruction // for this case. If you find yourself wanting to add other forms // to this "unused VTYPE" case, we're probably missing a // canonicalization earlier. return false; if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm()) return false; auto PriorVType = PrevMI.getOperand(2).getImm(); auto VType = MI.getOperand(2).getImm(); return areCompatibleVTYPEs(PriorVType, VType, Used); } void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { MachineInstr *PrevMI = nullptr; DemandedFields Used; SmallVector ToDelete; for (MachineInstr &MI : MBB) { // Note: Must be *before* vsetvli handling to account for config cases // which only change some subfields. doUnion(Used, getDemanded(MI)); if (!isVectorConfigInstr(MI)) continue; if (PrevMI) { if (!Used.VL && !Used.usedVTYPE()) { ToDelete.push_back(PrevMI); // fallthrough } else if (canMutatePriorConfig(*PrevMI, MI, Used)) { PrevMI->getOperand(2).setImm(MI.getOperand(2).getImm()); ToDelete.push_back(&MI); // Leave PrevMI unchanged continue; } } PrevMI = &MI; Used = getDemanded(MI); Register VRegDef = MI.getOperand(0).getReg(); if (VRegDef != RISCV::X0 && !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) Used.VL = true; } for (auto *MI : ToDelete) MI->eraseFromParent(); } void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { for (auto I = MBB.begin(), E = MBB.end(); I != E;) { MachineInstr &MI = *I++; if (RISCV::isFaultFirstLoad(MI)) { Register VLOutput = MI.getOperand(1).getReg(); if (!MRI->use_nodbg_empty(VLOutput)) BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL), VLOutput); // We don't use the vl output of the VLEFF/VLSEGFF anymore. MI.getOperand(1).setReg(RISCV::X0); } } } bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { // Skip if the vector extension is not enabled. const RISCVSubtarget &ST = MF.getSubtarget(); if (!ST.hasVInstructions()) return false; LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n"); TII = ST.getInstrInfo(); MRI = &MF.getRegInfo(); assert(BlockInfo.empty() && "Expect empty block infos"); BlockInfo.resize(MF.getNumBlockIDs()); bool HaveVectorOp = false; // Phase 1 - determine how VL/VTYPE are affected by the each block. for (const MachineBasicBlock &MBB : MF) { HaveVectorOp |= computeVLVTYPEChanges(MBB); // Initial exit state is whatever change we found in the block. BlockData &BBInfo = BlockInfo[MBB.getNumber()]; BBInfo.Exit = BBInfo.Change; LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) << " is " << BBInfo.Exit << "\n"); } // If we didn't find any instructions that need VSETVLI, we're done. if (!HaveVectorOp) { BlockInfo.clear(); return false; } // Phase 2 - determine the exit VL/VTYPE from each block. We add all // blocks to the list here, but will also add any that need to be revisited // during Phase 2 processing. for (const MachineBasicBlock &MBB : MF) { WorkList.push(&MBB); BlockInfo[MBB.getNumber()].InQueue = true; } while (!WorkList.empty()) { const MachineBasicBlock &MBB = *WorkList.front(); WorkList.pop(); computeIncomingVLVTYPE(MBB); } // Perform partial redundancy elimination of vsetvli transitions. for (MachineBasicBlock &MBB : MF) doPRE(MBB); // Phase 3 - add any vsetvli instructions needed in the block. Use the // Phase 2 information to avoid adding vsetvlis before the first vector // instruction in the block if the VL/VTYPE is satisfied by its // predecessors. for (MachineBasicBlock &MBB : MF) emitVSETVLIs(MBB); // Now that all vsetvlis are explicit, go through and do block local // DSE and peephole based demanded fields based transforms. Note that // this *must* be done outside the main dataflow so long as we allow // any cross block analysis within the dataflow. We can't have both // demanded fields based mutation and non-local analysis in the // dataflow at the same time without introducing inconsistencies. for (MachineBasicBlock &MBB : MF) doLocalPostpass(MBB); // Once we're fully done rewriting all the instructions, do a final pass // through to check for VSETVLIs which write to an unused destination. // For the non X0, X0 variant, we can replace the destination register // with X0 to reduce register pressure. This is really a generic // optimization which can be applied to any dead def (TODO: generalize). for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { if (MI.getOpcode() == RISCV::PseudoVSETVLI || MI.getOpcode() == RISCV::PseudoVSETIVLI) { Register VRegDef = MI.getOperand(0).getReg(); if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef)) MI.getOperand(0).setReg(RISCV::X0); } } } // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output // of VLEFF/VLSEGFF. for (MachineBasicBlock &MBB : MF) insertReadVL(MBB); BlockInfo.clear(); return HaveVectorOp; } /// Returns an instance of the Insert VSETVLI pass. FunctionPass *llvm::createRISCVInsertVSETVLIPass() { return new RISCVInsertVSETVLI(); }