1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a function pass that inserts VSETVLI instructions where 10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL 11 // instructions. 12 // 13 // This pass consists of 3 phases: 14 // 15 // Phase 1 collects how each basic block affects VL/VTYPE. 16 // 17 // Phase 2 uses the information from phase 1 to do a data flow analysis to 18 // propagate the VL/VTYPE changes through the function. This gives us the 19 // VL/VTYPE at the start of each basic block. 20 // 21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from 22 // phase 2 is used to prevent inserting a VSETVLI before the first vector 23 // instruction in the block if possible. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "RISCV.h" 28 #include "RISCVSubtarget.h" 29 #include "llvm/CodeGen/LiveIntervals.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include <queue> 32 using namespace llvm; 33 34 #define DEBUG_TYPE "riscv-insert-vsetvli" 35 #define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass" 36 37 static cl::opt<bool> DisableInsertVSETVLPHIOpt( 38 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, 39 cl::desc("Disable looking through phis when inserting vsetvlis.")); 40 41 static cl::opt<bool> UseStrictAsserts( 42 "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden, 43 cl::desc("Enable strict assertion checking for the dataflow algorithm")); 44 45 namespace { 46 47 static unsigned getVLOpNum(const MachineInstr &MI) { 48 return RISCVII::getVLOpNum(MI.getDesc()); 49 } 50 51 static unsigned getSEWOpNum(const MachineInstr &MI) { 52 return RISCVII::getSEWOpNum(MI.getDesc()); 53 } 54 55 static bool isScalarMoveInstr(const MachineInstr &MI) { 56 switch (MI.getOpcode()) { 57 default: 58 return false; 59 case RISCV::PseudoVMV_S_X_M1: 60 case RISCV::PseudoVMV_S_X_M2: 61 case RISCV::PseudoVMV_S_X_M4: 62 case RISCV::PseudoVMV_S_X_M8: 63 case RISCV::PseudoVMV_S_X_MF2: 64 case RISCV::PseudoVMV_S_X_MF4: 65 case RISCV::PseudoVMV_S_X_MF8: 66 case RISCV::PseudoVFMV_S_F16_M1: 67 case RISCV::PseudoVFMV_S_F16_M2: 68 case RISCV::PseudoVFMV_S_F16_M4: 69 case RISCV::PseudoVFMV_S_F16_M8: 70 case RISCV::PseudoVFMV_S_F16_MF2: 71 case RISCV::PseudoVFMV_S_F16_MF4: 72 case RISCV::PseudoVFMV_S_F32_M1: 73 case RISCV::PseudoVFMV_S_F32_M2: 74 case RISCV::PseudoVFMV_S_F32_M4: 75 case RISCV::PseudoVFMV_S_F32_M8: 76 case RISCV::PseudoVFMV_S_F32_MF2: 77 case RISCV::PseudoVFMV_S_F64_M1: 78 case RISCV::PseudoVFMV_S_F64_M2: 79 case RISCV::PseudoVFMV_S_F64_M4: 80 case RISCV::PseudoVFMV_S_F64_M8: 81 return true; 82 } 83 } 84 85 /// Get the EEW for a load or store instruction. Return None if MI is not 86 /// a load or store which ignores SEW. 87 static Optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) { 88 switch (MI.getOpcode()) { 89 default: 90 return None; 91 case RISCV::PseudoVLE8_V_M1: 92 case RISCV::PseudoVLE8_V_M1_MASK: 93 case RISCV::PseudoVLE8_V_M2: 94 case RISCV::PseudoVLE8_V_M2_MASK: 95 case RISCV::PseudoVLE8_V_M4: 96 case RISCV::PseudoVLE8_V_M4_MASK: 97 case RISCV::PseudoVLE8_V_M8: 98 case RISCV::PseudoVLE8_V_M8_MASK: 99 case RISCV::PseudoVLE8_V_MF2: 100 case RISCV::PseudoVLE8_V_MF2_MASK: 101 case RISCV::PseudoVLE8_V_MF4: 102 case RISCV::PseudoVLE8_V_MF4_MASK: 103 case RISCV::PseudoVLE8_V_MF8: 104 case RISCV::PseudoVLE8_V_MF8_MASK: 105 case RISCV::PseudoVLSE8_V_M1: 106 case RISCV::PseudoVLSE8_V_M1_MASK: 107 case RISCV::PseudoVLSE8_V_M2: 108 case RISCV::PseudoVLSE8_V_M2_MASK: 109 case RISCV::PseudoVLSE8_V_M4: 110 case RISCV::PseudoVLSE8_V_M4_MASK: 111 case RISCV::PseudoVLSE8_V_M8: 112 case RISCV::PseudoVLSE8_V_M8_MASK: 113 case RISCV::PseudoVLSE8_V_MF2: 114 case RISCV::PseudoVLSE8_V_MF2_MASK: 115 case RISCV::PseudoVLSE8_V_MF4: 116 case RISCV::PseudoVLSE8_V_MF4_MASK: 117 case RISCV::PseudoVLSE8_V_MF8: 118 case RISCV::PseudoVLSE8_V_MF8_MASK: 119 case RISCV::PseudoVSE8_V_M1: 120 case RISCV::PseudoVSE8_V_M1_MASK: 121 case RISCV::PseudoVSE8_V_M2: 122 case RISCV::PseudoVSE8_V_M2_MASK: 123 case RISCV::PseudoVSE8_V_M4: 124 case RISCV::PseudoVSE8_V_M4_MASK: 125 case RISCV::PseudoVSE8_V_M8: 126 case RISCV::PseudoVSE8_V_M8_MASK: 127 case RISCV::PseudoVSE8_V_MF2: 128 case RISCV::PseudoVSE8_V_MF2_MASK: 129 case RISCV::PseudoVSE8_V_MF4: 130 case RISCV::PseudoVSE8_V_MF4_MASK: 131 case RISCV::PseudoVSE8_V_MF8: 132 case RISCV::PseudoVSE8_V_MF8_MASK: 133 case RISCV::PseudoVSSE8_V_M1: 134 case RISCV::PseudoVSSE8_V_M1_MASK: 135 case RISCV::PseudoVSSE8_V_M2: 136 case RISCV::PseudoVSSE8_V_M2_MASK: 137 case RISCV::PseudoVSSE8_V_M4: 138 case RISCV::PseudoVSSE8_V_M4_MASK: 139 case RISCV::PseudoVSSE8_V_M8: 140 case RISCV::PseudoVSSE8_V_M8_MASK: 141 case RISCV::PseudoVSSE8_V_MF2: 142 case RISCV::PseudoVSSE8_V_MF2_MASK: 143 case RISCV::PseudoVSSE8_V_MF4: 144 case RISCV::PseudoVSSE8_V_MF4_MASK: 145 case RISCV::PseudoVSSE8_V_MF8: 146 case RISCV::PseudoVSSE8_V_MF8_MASK: 147 return 8; 148 case RISCV::PseudoVLE16_V_M1: 149 case RISCV::PseudoVLE16_V_M1_MASK: 150 case RISCV::PseudoVLE16_V_M2: 151 case RISCV::PseudoVLE16_V_M2_MASK: 152 case RISCV::PseudoVLE16_V_M4: 153 case RISCV::PseudoVLE16_V_M4_MASK: 154 case RISCV::PseudoVLE16_V_M8: 155 case RISCV::PseudoVLE16_V_M8_MASK: 156 case RISCV::PseudoVLE16_V_MF2: 157 case RISCV::PseudoVLE16_V_MF2_MASK: 158 case RISCV::PseudoVLE16_V_MF4: 159 case RISCV::PseudoVLE16_V_MF4_MASK: 160 case RISCV::PseudoVLSE16_V_M1: 161 case RISCV::PseudoVLSE16_V_M1_MASK: 162 case RISCV::PseudoVLSE16_V_M2: 163 case RISCV::PseudoVLSE16_V_M2_MASK: 164 case RISCV::PseudoVLSE16_V_M4: 165 case RISCV::PseudoVLSE16_V_M4_MASK: 166 case RISCV::PseudoVLSE16_V_M8: 167 case RISCV::PseudoVLSE16_V_M8_MASK: 168 case RISCV::PseudoVLSE16_V_MF2: 169 case RISCV::PseudoVLSE16_V_MF2_MASK: 170 case RISCV::PseudoVLSE16_V_MF4: 171 case RISCV::PseudoVLSE16_V_MF4_MASK: 172 case RISCV::PseudoVSE16_V_M1: 173 case RISCV::PseudoVSE16_V_M1_MASK: 174 case RISCV::PseudoVSE16_V_M2: 175 case RISCV::PseudoVSE16_V_M2_MASK: 176 case RISCV::PseudoVSE16_V_M4: 177 case RISCV::PseudoVSE16_V_M4_MASK: 178 case RISCV::PseudoVSE16_V_M8: 179 case RISCV::PseudoVSE16_V_M8_MASK: 180 case RISCV::PseudoVSE16_V_MF2: 181 case RISCV::PseudoVSE16_V_MF2_MASK: 182 case RISCV::PseudoVSE16_V_MF4: 183 case RISCV::PseudoVSE16_V_MF4_MASK: 184 case RISCV::PseudoVSSE16_V_M1: 185 case RISCV::PseudoVSSE16_V_M1_MASK: 186 case RISCV::PseudoVSSE16_V_M2: 187 case RISCV::PseudoVSSE16_V_M2_MASK: 188 case RISCV::PseudoVSSE16_V_M4: 189 case RISCV::PseudoVSSE16_V_M4_MASK: 190 case RISCV::PseudoVSSE16_V_M8: 191 case RISCV::PseudoVSSE16_V_M8_MASK: 192 case RISCV::PseudoVSSE16_V_MF2: 193 case RISCV::PseudoVSSE16_V_MF2_MASK: 194 case RISCV::PseudoVSSE16_V_MF4: 195 case RISCV::PseudoVSSE16_V_MF4_MASK: 196 return 16; 197 case RISCV::PseudoVLE32_V_M1: 198 case RISCV::PseudoVLE32_V_M1_MASK: 199 case RISCV::PseudoVLE32_V_M2: 200 case RISCV::PseudoVLE32_V_M2_MASK: 201 case RISCV::PseudoVLE32_V_M4: 202 case RISCV::PseudoVLE32_V_M4_MASK: 203 case RISCV::PseudoVLE32_V_M8: 204 case RISCV::PseudoVLE32_V_M8_MASK: 205 case RISCV::PseudoVLE32_V_MF2: 206 case RISCV::PseudoVLE32_V_MF2_MASK: 207 case RISCV::PseudoVLSE32_V_M1: 208 case RISCV::PseudoVLSE32_V_M1_MASK: 209 case RISCV::PseudoVLSE32_V_M2: 210 case RISCV::PseudoVLSE32_V_M2_MASK: 211 case RISCV::PseudoVLSE32_V_M4: 212 case RISCV::PseudoVLSE32_V_M4_MASK: 213 case RISCV::PseudoVLSE32_V_M8: 214 case RISCV::PseudoVLSE32_V_M8_MASK: 215 case RISCV::PseudoVLSE32_V_MF2: 216 case RISCV::PseudoVLSE32_V_MF2_MASK: 217 case RISCV::PseudoVSE32_V_M1: 218 case RISCV::PseudoVSE32_V_M1_MASK: 219 case RISCV::PseudoVSE32_V_M2: 220 case RISCV::PseudoVSE32_V_M2_MASK: 221 case RISCV::PseudoVSE32_V_M4: 222 case RISCV::PseudoVSE32_V_M4_MASK: 223 case RISCV::PseudoVSE32_V_M8: 224 case RISCV::PseudoVSE32_V_M8_MASK: 225 case RISCV::PseudoVSE32_V_MF2: 226 case RISCV::PseudoVSE32_V_MF2_MASK: 227 case RISCV::PseudoVSSE32_V_M1: 228 case RISCV::PseudoVSSE32_V_M1_MASK: 229 case RISCV::PseudoVSSE32_V_M2: 230 case RISCV::PseudoVSSE32_V_M2_MASK: 231 case RISCV::PseudoVSSE32_V_M4: 232 case RISCV::PseudoVSSE32_V_M4_MASK: 233 case RISCV::PseudoVSSE32_V_M8: 234 case RISCV::PseudoVSSE32_V_M8_MASK: 235 case RISCV::PseudoVSSE32_V_MF2: 236 case RISCV::PseudoVSSE32_V_MF2_MASK: 237 return 32; 238 case RISCV::PseudoVLE64_V_M1: 239 case RISCV::PseudoVLE64_V_M1_MASK: 240 case RISCV::PseudoVLE64_V_M2: 241 case RISCV::PseudoVLE64_V_M2_MASK: 242 case RISCV::PseudoVLE64_V_M4: 243 case RISCV::PseudoVLE64_V_M4_MASK: 244 case RISCV::PseudoVLE64_V_M8: 245 case RISCV::PseudoVLE64_V_M8_MASK: 246 case RISCV::PseudoVLSE64_V_M1: 247 case RISCV::PseudoVLSE64_V_M1_MASK: 248 case RISCV::PseudoVLSE64_V_M2: 249 case RISCV::PseudoVLSE64_V_M2_MASK: 250 case RISCV::PseudoVLSE64_V_M4: 251 case RISCV::PseudoVLSE64_V_M4_MASK: 252 case RISCV::PseudoVLSE64_V_M8: 253 case RISCV::PseudoVLSE64_V_M8_MASK: 254 case RISCV::PseudoVSE64_V_M1: 255 case RISCV::PseudoVSE64_V_M1_MASK: 256 case RISCV::PseudoVSE64_V_M2: 257 case RISCV::PseudoVSE64_V_M2_MASK: 258 case RISCV::PseudoVSE64_V_M4: 259 case RISCV::PseudoVSE64_V_M4_MASK: 260 case RISCV::PseudoVSE64_V_M8: 261 case RISCV::PseudoVSE64_V_M8_MASK: 262 case RISCV::PseudoVSSE64_V_M1: 263 case RISCV::PseudoVSSE64_V_M1_MASK: 264 case RISCV::PseudoVSSE64_V_M2: 265 case RISCV::PseudoVSSE64_V_M2_MASK: 266 case RISCV::PseudoVSSE64_V_M4: 267 case RISCV::PseudoVSSE64_V_M4_MASK: 268 case RISCV::PseudoVSSE64_V_M8: 269 case RISCV::PseudoVSSE64_V_M8_MASK: 270 return 64; 271 } 272 } 273 274 /// Return true if this is an operation on mask registers. Note that 275 /// this includes both arithmetic/logical ops and load/store (vlm/vsm). 276 static bool isMaskRegOp(const MachineInstr &MI) { 277 if (RISCVII::hasSEWOp(MI.getDesc().TSFlags)) { 278 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); 279 // A Log2SEW of 0 is an operation on mask registers only. 280 return Log2SEW == 0; 281 } 282 return false; 283 } 284 285 static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) { 286 unsigned LMul; 287 bool Fractional; 288 std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul); 289 290 // Convert LMul to a fixed point value with 3 fractional bits. 291 LMul = Fractional ? (8 / LMul) : (LMul * 8); 292 293 assert(SEW >= 8 && "Unexpected SEW value"); 294 return (SEW * 8) / LMul; 295 } 296 297 /// Which subfields of VL or VTYPE have values we need to preserve? 298 struct DemandedFields { 299 bool VL = false; 300 bool SEW = false; 301 bool LMUL = false; 302 bool SEWLMULRatio = false; 303 bool TailPolicy = false; 304 bool MaskPolicy = false; 305 306 // Return true if any part of VTYPE was used 307 bool usedVTYPE() { 308 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy; 309 } 310 311 // Mark all VTYPE subfields and properties as demanded 312 void demandVTYPE() { 313 SEW = true; 314 LMUL = true; 315 SEWLMULRatio = true; 316 TailPolicy = true; 317 MaskPolicy = true; 318 } 319 }; 320 321 /// Return true if the two values of the VTYPE register provided are 322 /// indistinguishable from the perspective of an instruction (or set of 323 /// instructions) which use only the Used subfields and properties. 324 static bool areCompatibleVTYPEs(uint64_t VType1, 325 uint64_t VType2, 326 const DemandedFields &Used) { 327 if (Used.SEW && 328 RISCVVType::getSEW(VType1) != RISCVVType::getSEW(VType2)) 329 return false; 330 331 if (Used.LMUL && 332 RISCVVType::getVLMUL(VType1) != RISCVVType::getVLMUL(VType2)) 333 return false; 334 335 if (Used.SEWLMULRatio) { 336 auto Ratio1 = getSEWLMULRatio(RISCVVType::getSEW(VType1), 337 RISCVVType::getVLMUL(VType1)); 338 auto Ratio2 = getSEWLMULRatio(RISCVVType::getSEW(VType2), 339 RISCVVType::getVLMUL(VType2)); 340 if (Ratio1 != Ratio2) 341 return false; 342 } 343 344 if (Used.TailPolicy && 345 RISCVVType::isTailAgnostic(VType1) != RISCVVType::isTailAgnostic(VType2)) 346 return false; 347 if (Used.MaskPolicy && 348 RISCVVType::isMaskAgnostic(VType1) != RISCVVType::isMaskAgnostic(VType2)) 349 return false; 350 return true; 351 } 352 353 /// Return the fields and properties demanded by the provided instruction. 354 static DemandedFields getDemanded(const MachineInstr &MI) { 355 // Warning: This function has to work on both the lowered (i.e. post 356 // emitVSETVLIs) and pre-lowering forms. The main implication of this is 357 // that it can't use the value of a SEW, VL, or Policy operand as they might 358 // be stale after lowering. 359 360 // Most instructions don't use any of these subfeilds. 361 DemandedFields Res; 362 // Start conservative if registers are used 363 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL)) 364 Res.VL = true; 365 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE)) 366 Res.demandVTYPE(); 367 // Start conservative on the unlowered form too 368 uint64_t TSFlags = MI.getDesc().TSFlags; 369 if (RISCVII::hasSEWOp(TSFlags)) { 370 Res.demandVTYPE(); 371 if (RISCVII::hasVLOp(TSFlags)) 372 Res.VL = true; 373 } 374 375 // Loads and stores with implicit EEW do not demand SEW or LMUL directly. 376 // They instead demand the ratio of the two which is used in computing 377 // EMUL, but which allows us the flexibility to change SEW and LMUL 378 // provided we don't change the ratio. 379 // Note: We assume that the instructions initial SEW is the EEW encoded 380 // in the opcode. This is asserted when constructing the VSETVLIInfo. 381 if (getEEWForLoadStore(MI)) { 382 Res.SEW = false; 383 Res.LMUL = false; 384 } 385 386 // Store instructions don't use the policy fields. 387 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) { 388 Res.TailPolicy = false; 389 Res.MaskPolicy = false; 390 } 391 392 // If this is a mask reg operation, it only cares about VLMAX. 393 // TODO: Possible extensions to this logic 394 // * Probably ok if available VLMax is larger than demanded 395 // * The policy bits can probably be ignored.. 396 if (isMaskRegOp(MI)) { 397 Res.SEW = false; 398 Res.LMUL = false; 399 } 400 401 return Res; 402 } 403 404 /// Defines the abstract state with which the forward dataflow models the 405 /// values of the VL and VTYPE registers after insertion. 406 class VSETVLIInfo { 407 union { 408 Register AVLReg; 409 unsigned AVLImm; 410 }; 411 412 enum : uint8_t { 413 Uninitialized, 414 AVLIsReg, 415 AVLIsImm, 416 Unknown, 417 } State = Uninitialized; 418 419 // Fields from VTYPE. 420 RISCVII::VLMUL VLMul = RISCVII::LMUL_1; 421 uint8_t SEW = 0; 422 uint8_t TailAgnostic : 1; 423 uint8_t MaskAgnostic : 1; 424 uint8_t SEWLMULRatioOnly : 1; 425 426 public: 427 VSETVLIInfo() 428 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), 429 SEWLMULRatioOnly(false) {} 430 431 static VSETVLIInfo getUnknown() { 432 VSETVLIInfo Info; 433 Info.setUnknown(); 434 return Info; 435 } 436 437 bool isValid() const { return State != Uninitialized; } 438 void setUnknown() { State = Unknown; } 439 bool isUnknown() const { return State == Unknown; } 440 441 void setAVLReg(Register Reg) { 442 AVLReg = Reg; 443 State = AVLIsReg; 444 } 445 446 void setAVLImm(unsigned Imm) { 447 AVLImm = Imm; 448 State = AVLIsImm; 449 } 450 451 bool hasAVLImm() const { return State == AVLIsImm; } 452 bool hasAVLReg() const { return State == AVLIsReg; } 453 Register getAVLReg() const { 454 assert(hasAVLReg()); 455 return AVLReg; 456 } 457 unsigned getAVLImm() const { 458 assert(hasAVLImm()); 459 return AVLImm; 460 } 461 462 unsigned getSEW() const { return SEW; } 463 RISCVII::VLMUL getVLMUL() const { return VLMul; } 464 465 bool hasNonZeroAVL() const { 466 if (hasAVLImm()) 467 return getAVLImm() > 0; 468 if (hasAVLReg()) 469 return getAVLReg() == RISCV::X0; 470 return false; 471 } 472 473 bool hasSameAVL(const VSETVLIInfo &Other) const { 474 assert(isValid() && Other.isValid() && 475 "Can't compare invalid VSETVLIInfos"); 476 assert(!isUnknown() && !Other.isUnknown() && 477 "Can't compare AVL in unknown state"); 478 if (hasAVLReg() && Other.hasAVLReg()) 479 return getAVLReg() == Other.getAVLReg(); 480 481 if (hasAVLImm() && Other.hasAVLImm()) 482 return getAVLImm() == Other.getAVLImm(); 483 484 return false; 485 } 486 487 void setVTYPE(unsigned VType) { 488 assert(isValid() && !isUnknown() && 489 "Can't set VTYPE for uninitialized or unknown"); 490 VLMul = RISCVVType::getVLMUL(VType); 491 SEW = RISCVVType::getSEW(VType); 492 TailAgnostic = RISCVVType::isTailAgnostic(VType); 493 MaskAgnostic = RISCVVType::isMaskAgnostic(VType); 494 } 495 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) { 496 assert(isValid() && !isUnknown() && 497 "Can't set VTYPE for uninitialized or unknown"); 498 VLMul = L; 499 SEW = S; 500 TailAgnostic = TA; 501 MaskAgnostic = MA; 502 } 503 504 unsigned encodeVTYPE() const { 505 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && 506 "Can't encode VTYPE for uninitialized or unknown"); 507 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 508 } 509 510 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } 511 512 bool hasSameSEW(const VSETVLIInfo &Other) const { 513 assert(isValid() && Other.isValid() && 514 "Can't compare invalid VSETVLIInfos"); 515 assert(!isUnknown() && !Other.isUnknown() && 516 "Can't compare VTYPE in unknown state"); 517 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && 518 "Can't compare when only LMUL/SEW ratio is valid."); 519 return SEW == Other.SEW; 520 } 521 522 bool hasSameVTYPE(const VSETVLIInfo &Other) const { 523 assert(isValid() && Other.isValid() && 524 "Can't compare invalid VSETVLIInfos"); 525 assert(!isUnknown() && !Other.isUnknown() && 526 "Can't compare VTYPE in unknown state"); 527 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && 528 "Can't compare when only LMUL/SEW ratio is valid."); 529 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == 530 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, 531 Other.MaskAgnostic); 532 } 533 534 unsigned getSEWLMULRatio() const { 535 assert(isValid() && !isUnknown() && 536 "Can't use VTYPE for uninitialized or unknown"); 537 return ::getSEWLMULRatio(SEW, VLMul); 538 } 539 540 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. 541 // Note that having the same VLMAX ensures that both share the same 542 // function from AVL to VL; that is, they must produce the same VL value 543 // for any given AVL value. 544 bool hasSameVLMAX(const VSETVLIInfo &Other) const { 545 assert(isValid() && Other.isValid() && 546 "Can't compare invalid VSETVLIInfos"); 547 assert(!isUnknown() && !Other.isUnknown() && 548 "Can't compare VTYPE in unknown state"); 549 return getSEWLMULRatio() == Other.getSEWLMULRatio(); 550 } 551 552 bool hasSamePolicy(const VSETVLIInfo &Other) const { 553 assert(isValid() && Other.isValid() && 554 "Can't compare invalid VSETVLIInfos"); 555 assert(!isUnknown() && !Other.isUnknown() && 556 "Can't compare VTYPE in unknown state"); 557 return TailAgnostic == Other.TailAgnostic && 558 MaskAgnostic == Other.MaskAgnostic; 559 } 560 561 bool hasCompatibleVTYPE(const MachineInstr &MI, 562 const VSETVLIInfo &Require) const { 563 const DemandedFields Used = getDemanded(MI); 564 return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used); 565 } 566 567 // Determine whether the vector instructions requirements represented by 568 // Require are compatible with the previous vsetvli instruction represented 569 // by this. MI is the instruction whose requirements we're considering. 570 bool isCompatible(const MachineInstr &MI, const VSETVLIInfo &Require) const { 571 assert(isValid() && Require.isValid() && 572 "Can't compare invalid VSETVLIInfos"); 573 assert(!Require.SEWLMULRatioOnly && 574 "Expected a valid VTYPE for instruction!"); 575 // Nothing is compatible with Unknown. 576 if (isUnknown() || Require.isUnknown()) 577 return false; 578 579 // If only our VLMAX ratio is valid, then this isn't compatible. 580 if (SEWLMULRatioOnly) 581 return false; 582 583 // If the instruction doesn't need an AVLReg and the SEW matches, consider 584 // it compatible. 585 if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister) 586 if (SEW == Require.SEW) 587 return true; 588 589 return hasSameAVL(Require) && hasCompatibleVTYPE(MI, Require); 590 } 591 592 bool operator==(const VSETVLIInfo &Other) const { 593 // Uninitialized is only equal to another Uninitialized. 594 if (!isValid()) 595 return !Other.isValid(); 596 if (!Other.isValid()) 597 return !isValid(); 598 599 // Unknown is only equal to another Unknown. 600 if (isUnknown()) 601 return Other.isUnknown(); 602 if (Other.isUnknown()) 603 return isUnknown(); 604 605 if (!hasSameAVL(Other)) 606 return false; 607 608 // If the SEWLMULRatioOnly bits are different, then they aren't equal. 609 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly) 610 return false; 611 612 // If only the VLMAX is valid, check that it is the same. 613 if (SEWLMULRatioOnly) 614 return hasSameVLMAX(Other); 615 616 // If the full VTYPE is valid, check that it is the same. 617 return hasSameVTYPE(Other); 618 } 619 620 bool operator!=(const VSETVLIInfo &Other) const { 621 return !(*this == Other); 622 } 623 624 // Calculate the VSETVLIInfo visible to a block assuming this and Other are 625 // both predecessors. 626 VSETVLIInfo intersect(const VSETVLIInfo &Other) const { 627 // If the new value isn't valid, ignore it. 628 if (!Other.isValid()) 629 return *this; 630 631 // If this value isn't valid, this must be the first predecessor, use it. 632 if (!isValid()) 633 return Other; 634 635 // If either is unknown, the result is unknown. 636 if (isUnknown() || Other.isUnknown()) 637 return VSETVLIInfo::getUnknown(); 638 639 // If we have an exact, match return this. 640 if (*this == Other) 641 return *this; 642 643 // Not an exact match, but maybe the AVL and VLMAX are the same. If so, 644 // return an SEW/LMUL ratio only value. 645 if (hasSameAVL(Other) && hasSameVLMAX(Other)) { 646 VSETVLIInfo MergeInfo = *this; 647 MergeInfo.SEWLMULRatioOnly = true; 648 return MergeInfo; 649 } 650 651 // Otherwise the result is unknown. 652 return VSETVLIInfo::getUnknown(); 653 } 654 655 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 656 /// Support for debugging, callable in GDB: V->dump() 657 LLVM_DUMP_METHOD void dump() const { 658 print(dbgs()); 659 dbgs() << "\n"; 660 } 661 662 /// Implement operator<<. 663 /// @{ 664 void print(raw_ostream &OS) const { 665 OS << "{"; 666 if (!isValid()) 667 OS << "Uninitialized"; 668 if (isUnknown()) 669 OS << "unknown"; 670 if (hasAVLReg()) 671 OS << "AVLReg=" << (unsigned)AVLReg; 672 if (hasAVLImm()) 673 OS << "AVLImm=" << (unsigned)AVLImm; 674 OS << ", " 675 << "VLMul=" << (unsigned)VLMul << ", " 676 << "SEW=" << (unsigned)SEW << ", " 677 << "TailAgnostic=" << (bool)TailAgnostic << ", " 678 << "MaskAgnostic=" << (bool)MaskAgnostic << ", " 679 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}"; 680 } 681 #endif 682 }; 683 684 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 685 LLVM_ATTRIBUTE_USED 686 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) { 687 V.print(OS); 688 return OS; 689 } 690 #endif 691 692 struct BlockData { 693 // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers 694 // made by this block. Calculated in Phase 1. 695 VSETVLIInfo Change; 696 697 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this 698 // block. Calculated in Phase 2. 699 VSETVLIInfo Exit; 700 701 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor 702 // blocks. Calculated in Phase 2, and used by Phase 3. 703 VSETVLIInfo Pred; 704 705 // Keeps track of whether the block is already in the queue. 706 bool InQueue = false; 707 708 BlockData() = default; 709 }; 710 711 class RISCVInsertVSETVLI : public MachineFunctionPass { 712 const TargetInstrInfo *TII; 713 MachineRegisterInfo *MRI; 714 715 std::vector<BlockData> BlockInfo; 716 std::queue<const MachineBasicBlock *> WorkList; 717 718 public: 719 static char ID; 720 721 RISCVInsertVSETVLI() : MachineFunctionPass(ID) { 722 initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry()); 723 } 724 bool runOnMachineFunction(MachineFunction &MF) override; 725 726 void getAnalysisUsage(AnalysisUsage &AU) const override { 727 AU.setPreservesCFG(); 728 MachineFunctionPass::getAnalysisUsage(AU); 729 } 730 731 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } 732 733 private: 734 bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require, 735 const VSETVLIInfo &CurInfo) const; 736 bool needVSETVLIPHI(const VSETVLIInfo &Require, 737 const MachineBasicBlock &MBB) const; 738 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, 739 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); 740 void insertVSETVLI(MachineBasicBlock &MBB, 741 MachineBasicBlock::iterator InsertPt, DebugLoc DL, 742 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); 743 744 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI); 745 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI); 746 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB); 747 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); 748 void emitVSETVLIs(MachineBasicBlock &MBB); 749 void doLocalPostpass(MachineBasicBlock &MBB); 750 void doPRE(MachineBasicBlock &MBB); 751 void insertReadVL(MachineBasicBlock &MBB); 752 }; 753 754 } // end anonymous namespace 755 756 char RISCVInsertVSETVLI::ID = 0; 757 758 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, 759 false, false) 760 761 static bool isVectorConfigInstr(const MachineInstr &MI) { 762 return MI.getOpcode() == RISCV::PseudoVSETVLI || 763 MI.getOpcode() == RISCV::PseudoVSETVLIX0 || 764 MI.getOpcode() == RISCV::PseudoVSETIVLI; 765 } 766 767 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves 768 /// VL and only sets VTYPE. 769 static bool isVLPreservingConfig(const MachineInstr &MI) { 770 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) 771 return false; 772 assert(RISCV::X0 == MI.getOperand(1).getReg()); 773 return RISCV::X0 == MI.getOperand(0).getReg(); 774 } 775 776 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, 777 const MachineRegisterInfo *MRI) { 778 VSETVLIInfo InstrInfo; 779 780 // If the instruction has policy argument, use the argument. 781 // If there is no policy argument, default to tail agnostic unless the 782 // destination is tied to a source. Unless the source is undef. In that case 783 // the user would have some control over the policy values. 784 bool TailAgnostic = true; 785 bool UsesMaskPolicy = RISCVII::usesMaskPolicy(TSFlags); 786 // FIXME: Could we look at the above or below instructions to choose the 787 // matched mask policy to reduce vsetvli instructions? Default mask policy is 788 // agnostic if instructions use mask policy, otherwise is undisturbed. Because 789 // most mask operations are mask undisturbed, so we could possibly reduce the 790 // vsetvli between mask and nomasked instruction sequence. 791 bool MaskAgnostic = UsesMaskPolicy; 792 unsigned UseOpIdx; 793 if (RISCVII::hasVecPolicyOp(TSFlags)) { 794 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); 795 uint64_t Policy = Op.getImm(); 796 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && 797 "Invalid Policy Value"); 798 // Although in some cases, mismatched passthru/maskedoff with policy value 799 // does not make sense (ex. tied operand is IMPLICIT_DEF with non-TAMA 800 // policy, or tied operand is not IMPLICIT_DEF with TAMA policy), but users 801 // have set the policy value explicitly, so compiler would not fix it. 802 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; 803 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; 804 } else if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 805 TailAgnostic = false; 806 if (UsesMaskPolicy) 807 MaskAgnostic = false; 808 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 809 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 810 MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg()); 811 if (UseMI && UseMI->isImplicitDef()) { 812 TailAgnostic = true; 813 if (UsesMaskPolicy) 814 MaskAgnostic = true; 815 } 816 // Some pseudo instructions force a tail agnostic policy despite having a 817 // tied def. 818 if (RISCVII::doesForceTailAgnostic(TSFlags)) 819 TailAgnostic = true; 820 } 821 822 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); 823 824 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); 825 // A Log2SEW of 0 is an operation on mask registers only. 826 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 827 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 828 829 if (RISCVII::hasVLOp(TSFlags)) { 830 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 831 if (VLOp.isImm()) { 832 int64_t Imm = VLOp.getImm(); 833 // Conver the VLMax sentintel to X0 register. 834 if (Imm == RISCV::VLMaxSentinel) 835 InstrInfo.setAVLReg(RISCV::X0); 836 else 837 InstrInfo.setAVLImm(Imm); 838 } else { 839 InstrInfo.setAVLReg(VLOp.getReg()); 840 } 841 } else { 842 InstrInfo.setAVLReg(RISCV::NoRegister); 843 } 844 #ifndef NDEBUG 845 if (Optional<unsigned> EEW = getEEWForLoadStore(MI)) { 846 assert(SEW == EEW && "Initial SEW doesn't match expected EEW"); 847 } 848 #endif 849 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 850 851 return InstrInfo; 852 } 853 854 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, 855 const VSETVLIInfo &Info, 856 const VSETVLIInfo &PrevInfo) { 857 DebugLoc DL = MI.getDebugLoc(); 858 insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo); 859 } 860 861 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, 862 MachineBasicBlock::iterator InsertPt, DebugLoc DL, 863 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { 864 865 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same 866 // VLMAX. 867 if (PrevInfo.isValid() && !PrevInfo.isUnknown() && 868 Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { 869 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 870 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 871 .addReg(RISCV::X0, RegState::Kill) 872 .addImm(Info.encodeVTYPE()) 873 .addReg(RISCV::VL, RegState::Implicit); 874 return; 875 } 876 877 if (Info.hasAVLImm()) { 878 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) 879 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 880 .addImm(Info.getAVLImm()) 881 .addImm(Info.encodeVTYPE()); 882 return; 883 } 884 885 Register AVLReg = Info.getAVLReg(); 886 if (AVLReg == RISCV::NoRegister) { 887 // We can only use x0, x0 if there's no chance of the vtype change causing 888 // the previous vl to become invalid. 889 if (PrevInfo.isValid() && !PrevInfo.isUnknown() && 890 Info.hasSameVLMAX(PrevInfo)) { 891 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 892 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 893 .addReg(RISCV::X0, RegState::Kill) 894 .addImm(Info.encodeVTYPE()) 895 .addReg(RISCV::VL, RegState::Implicit); 896 return; 897 } 898 // Otherwise use an AVL of 0 to avoid depending on previous vl. 899 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) 900 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 901 .addImm(0) 902 .addImm(Info.encodeVTYPE()); 903 return; 904 } 905 906 if (AVLReg.isVirtual()) 907 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass); 908 909 // Use X0 as the DestReg unless AVLReg is X0. We also need to change the 910 // opcode if the AVLReg is X0 as they have different register classes for 911 // the AVL operand. 912 Register DestReg = RISCV::X0; 913 unsigned Opcode = RISCV::PseudoVSETVLI; 914 if (AVLReg == RISCV::X0) { 915 DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); 916 Opcode = RISCV::PseudoVSETVLIX0; 917 } 918 BuildMI(MBB, InsertPt, DL, TII->get(Opcode)) 919 .addReg(DestReg, RegState::Define | RegState::Dead) 920 .addReg(AVLReg) 921 .addImm(Info.encodeVTYPE()); 922 } 923 924 // Return a VSETVLIInfo representing the changes made by this VSETVLI or 925 // VSETIVLI instruction. 926 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { 927 VSETVLIInfo NewInfo; 928 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { 929 NewInfo.setAVLImm(MI.getOperand(1).getImm()); 930 } else { 931 assert(MI.getOpcode() == RISCV::PseudoVSETVLI || 932 MI.getOpcode() == RISCV::PseudoVSETVLIX0); 933 Register AVLReg = MI.getOperand(1).getReg(); 934 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && 935 "Can't handle X0, X0 vsetvli yet"); 936 NewInfo.setAVLReg(AVLReg); 937 } 938 NewInfo.setVTYPE(MI.getOperand(2).getImm()); 939 940 return NewInfo; 941 } 942 943 /// Return true if a VSETVLI is required to transition from CurInfo to Require 944 /// before MI. 945 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, 946 const VSETVLIInfo &Require, 947 const VSETVLIInfo &CurInfo) const { 948 assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI)); 949 950 if (CurInfo.isCompatible(MI, Require)) 951 return false; 952 953 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) 954 return true; 955 956 // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0. 957 // VL=0 is uninteresting (as it should have been deleted already), so it is 958 // compatible if we can prove both are non-zero. Additionally, if writing 959 // to an implicit_def operand, we don't need to preserve any other bits and 960 // are thus compatible with any larger etype, and can disregard policy bits. 961 if (isScalarMoveInstr(MI) && 962 CurInfo.hasNonZeroAVL() && Require.hasNonZeroAVL()) { 963 auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg()); 964 if (VRegDef && VRegDef->isImplicitDef() && 965 CurInfo.getSEW() >= Require.getSEW()) 966 return false; 967 if (CurInfo.hasSameSEW(Require) && CurInfo.hasSamePolicy(Require)) 968 return false; 969 } 970 971 // We didn't find a compatible value. If our AVL is a virtual register, 972 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need 973 // and the last VL/VTYPE we observed is the same, we don't need a 974 // VSETVLI here. 975 if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() && 976 CurInfo.hasCompatibleVTYPE(MI, Require)) { 977 if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { 978 if (isVectorConfigInstr(*DefMI)) { 979 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 980 if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo)) 981 return false; 982 } 983 } 984 } 985 986 return true; 987 } 988 989 // Given an incoming state reaching MI, modifies that state so that it is minimally 990 // compatible with MI. The resulting state is guaranteed to be semantically legal 991 // for MI, but may not be the state requested by MI. 992 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) { 993 uint64_t TSFlags = MI.getDesc().TSFlags; 994 if (!RISCVII::hasSEWOp(TSFlags)) 995 return; 996 997 const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); 998 if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info)) 999 return; 1000 1001 const VSETVLIInfo PrevInfo = Info; 1002 Info = NewInfo; 1003 1004 if (!RISCVII::hasVLOp(TSFlags)) 1005 return; 1006 1007 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and 1008 // VL > 0. We can discard the user requested AVL and just use the last 1009 // one if we can prove it equally zero. This removes a vsetvli entirely 1010 // if the types match or allows use of cheaper avl preserving variant 1011 // if VLMAX doesn't change. If VLMAX might change, we couldn't use 1012 // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to 1013 // prevent extending live range of an avl register operand. 1014 // TODO: We can probably relax this for immediates. 1015 if (isScalarMoveInstr(MI) && PrevInfo.isValid() && 1016 PrevInfo.hasNonZeroAVL() && Info.hasNonZeroAVL() && 1017 Info.hasSameVLMAX(PrevInfo)) { 1018 if (PrevInfo.hasAVLImm()) 1019 Info.setAVLImm(PrevInfo.getAVLImm()); 1020 else 1021 Info.setAVLReg(PrevInfo.getAVLReg()); 1022 return; 1023 } 1024 1025 // If AVL is defined by a vsetvli with the same VLMAX, we can 1026 // replace the AVL operand with the AVL of the defining vsetvli. 1027 // We avoid general register AVLs to avoid extending live ranges 1028 // without being sure we can kill the original source reg entirely. 1029 if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual()) 1030 return; 1031 MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg()); 1032 if (!DefMI || !isVectorConfigInstr(*DefMI)) 1033 return; 1034 1035 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 1036 if (DefInfo.hasSameVLMAX(Info) && 1037 (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) { 1038 if (DefInfo.hasAVLImm()) 1039 Info.setAVLImm(DefInfo.getAVLImm()); 1040 else 1041 Info.setAVLReg(DefInfo.getAVLReg()); 1042 return; 1043 } 1044 } 1045 1046 // Given a state with which we evaluated MI (see transferBefore above for why 1047 // this might be different that the state MI requested), modify the state to 1048 // reflect the changes MI might make. 1049 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) { 1050 if (isVectorConfigInstr(MI)) { 1051 Info = getInfoForVSETVLI(MI); 1052 return; 1053 } 1054 1055 if (RISCV::isFaultFirstLoad(MI)) { 1056 // Update AVL to vl-output of the fault first load. 1057 Info.setAVLReg(MI.getOperand(1).getReg()); 1058 return; 1059 } 1060 1061 // If this is something that updates VL/VTYPE that we don't know about, set 1062 // the state to unknown. 1063 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 1064 MI.modifiesRegister(RISCV::VTYPE)) 1065 Info = VSETVLIInfo::getUnknown(); 1066 } 1067 1068 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { 1069 bool HadVectorOp = false; 1070 1071 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1072 BBInfo.Change = BBInfo.Pred; 1073 for (const MachineInstr &MI : MBB) { 1074 transferBefore(BBInfo.Change, MI); 1075 1076 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags)) 1077 HadVectorOp = true; 1078 1079 transferAfter(BBInfo.Change, MI); 1080 } 1081 1082 return HadVectorOp; 1083 } 1084 1085 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { 1086 1087 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1088 1089 BBInfo.InQueue = false; 1090 1091 VSETVLIInfo InInfo; 1092 if (MBB.pred_empty()) { 1093 // There are no predecessors, so use the default starting status. 1094 InInfo.setUnknown(); 1095 } else { 1096 for (MachineBasicBlock *P : MBB.predecessors()) 1097 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); 1098 } 1099 1100 // If we don't have any valid predecessor value, wait until we do. 1101 if (!InInfo.isValid()) 1102 return; 1103 1104 // If no change, no need to rerun block 1105 if (InInfo == BBInfo.Pred) 1106 return; 1107 1108 BBInfo.Pred = InInfo; 1109 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) 1110 << " changed to " << BBInfo.Pred << "\n"); 1111 1112 // Note: It's tempting to cache the state changes here, but due to the 1113 // compatibility checks performed a blocks output state can change based on 1114 // the input state. To cache, we'd have to add logic for finding 1115 // never-compatible state changes. 1116 computeVLVTYPEChanges(MBB); 1117 VSETVLIInfo TmpStatus = BBInfo.Change; 1118 1119 // If the new exit value matches the old exit value, we don't need to revisit 1120 // any blocks. 1121 if (BBInfo.Exit == TmpStatus) 1122 return; 1123 1124 BBInfo.Exit = TmpStatus; 1125 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB) 1126 << " changed to " << BBInfo.Exit << "\n"); 1127 1128 // Add the successors to the work list so we can propagate the changed exit 1129 // status. 1130 for (MachineBasicBlock *S : MBB.successors()) 1131 if (!BlockInfo[S->getNumber()].InQueue) 1132 WorkList.push(S); 1133 } 1134 1135 // If we weren't able to prove a vsetvli was directly unneeded, it might still 1136 // be unneeded if the AVL is a phi node where all incoming values are VL 1137 // outputs from the last VSETVLI in their respective basic blocks. 1138 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, 1139 const MachineBasicBlock &MBB) const { 1140 if (DisableInsertVSETVLPHIOpt) 1141 return true; 1142 1143 if (!Require.hasAVLReg()) 1144 return true; 1145 1146 Register AVLReg = Require.getAVLReg(); 1147 if (!AVLReg.isVirtual()) 1148 return true; 1149 1150 // We need the AVL to be produce by a PHI node in this basic block. 1151 MachineInstr *PHI = MRI->getVRegDef(AVLReg); 1152 if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB) 1153 return true; 1154 1155 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps; 1156 PHIOp += 2) { 1157 Register InReg = PHI->getOperand(PHIOp).getReg(); 1158 MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB(); 1159 const BlockData &PBBInfo = BlockInfo[PBB->getNumber()]; 1160 // If the exit from the predecessor has the VTYPE we are looking for 1161 // we might be able to avoid a VSETVLI. 1162 if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require)) 1163 return true; 1164 1165 // We need the PHI input to the be the output of a VSET(I)VLI. 1166 MachineInstr *DefMI = MRI->getVRegDef(InReg); 1167 if (!DefMI || !isVectorConfigInstr(*DefMI)) 1168 return true; 1169 1170 // We found a VSET(I)VLI make sure it matches the output of the 1171 // predecessor block. 1172 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 1173 if (!DefInfo.hasSameAVL(PBBInfo.Exit) || 1174 !DefInfo.hasSameVTYPE(PBBInfo.Exit)) 1175 return true; 1176 } 1177 1178 // If all the incoming values to the PHI checked out, we don't need 1179 // to insert a VSETVLI. 1180 return false; 1181 } 1182 1183 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { 1184 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred; 1185 // Track whether the prefix of the block we've scanned is transparent 1186 // (meaning has not yet changed the abstract state). 1187 bool PrefixTransparent = true; 1188 for (MachineInstr &MI : MBB) { 1189 const VSETVLIInfo PrevInfo = CurInfo; 1190 transferBefore(CurInfo, MI); 1191 1192 // If this is an explicit VSETVLI or VSETIVLI, update our state. 1193 if (isVectorConfigInstr(MI)) { 1194 // Conservatively, mark the VL and VTYPE as live. 1195 assert(MI.getOperand(3).getReg() == RISCV::VL && 1196 MI.getOperand(4).getReg() == RISCV::VTYPE && 1197 "Unexpected operands where VL and VTYPE should be"); 1198 MI.getOperand(3).setIsDead(false); 1199 MI.getOperand(4).setIsDead(false); 1200 PrefixTransparent = false; 1201 } 1202 1203 uint64_t TSFlags = MI.getDesc().TSFlags; 1204 if (RISCVII::hasSEWOp(TSFlags)) { 1205 if (PrevInfo != CurInfo) { 1206 // If this is the first implicit state change, and the state change 1207 // requested can be proven to produce the same register contents, we 1208 // can skip emitting the actual state change and continue as if we 1209 // had since we know the GPR result of the implicit state change 1210 // wouldn't be used and VL/VTYPE registers are correct. Note that 1211 // we *do* need to model the state as if it changed as while the 1212 // register contents are unchanged, the abstract model can change. 1213 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB)) 1214 insertVSETVLI(MBB, MI, CurInfo, PrevInfo); 1215 PrefixTransparent = false; 1216 } 1217 1218 if (RISCVII::hasVLOp(TSFlags)) { 1219 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 1220 if (VLOp.isReg()) { 1221 // Erase the AVL operand from the instruction. 1222 VLOp.setReg(RISCV::NoRegister); 1223 VLOp.setIsKill(false); 1224 } 1225 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, 1226 /*isImp*/ true)); 1227 } 1228 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, 1229 /*isImp*/ true)); 1230 } 1231 1232 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 1233 MI.modifiesRegister(RISCV::VTYPE)) 1234 PrefixTransparent = false; 1235 1236 transferAfter(CurInfo, MI); 1237 } 1238 1239 // If we reach the end of the block and our current info doesn't match the 1240 // expected info, insert a vsetvli to correct. 1241 if (!UseStrictAsserts) { 1242 const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit; 1243 if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() && 1244 CurInfo != ExitInfo) { 1245 // Note there's an implicit assumption here that terminators never use 1246 // or modify VL or VTYPE. Also, fallthrough will return end(). 1247 auto InsertPt = MBB.getFirstInstrTerminator(); 1248 insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo, 1249 CurInfo); 1250 CurInfo = ExitInfo; 1251 } 1252 } 1253 1254 if (UseStrictAsserts && CurInfo.isValid()) { 1255 const auto &Info = BlockInfo[MBB.getNumber()]; 1256 if (CurInfo != Info.Exit) { 1257 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n"); 1258 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n"); 1259 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n"); 1260 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n"); 1261 } 1262 assert(CurInfo == Info.Exit && 1263 "InsertVSETVLI dataflow invariant violated"); 1264 } 1265 } 1266 1267 /// Return true if the VL value configured must be equal to the requested one. 1268 static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) { 1269 if (!Info.hasAVLImm()) 1270 // VLMAX is always the same value. 1271 // TODO: Could extend to other registers by looking at the associated vreg 1272 // def placement. 1273 return RISCV::X0 == Info.getAVLReg(); 1274 1275 unsigned AVL = Info.getAVLImm(); 1276 unsigned SEW = Info.getSEW(); 1277 unsigned AVLInBits = AVL * SEW; 1278 1279 unsigned LMul; 1280 bool Fractional; 1281 std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL()); 1282 1283 if (Fractional) 1284 return ST.getRealMinVLen() / LMul >= AVLInBits; 1285 return ST.getRealMinVLen() * LMul >= AVLInBits; 1286 } 1287 1288 /// Perform simple partial redundancy elimination of the VSETVLI instructions 1289 /// we're about to insert by looking for cases where we can PRE from the 1290 /// beginning of one block to the end of one of its predecessors. Specifically, 1291 /// this is geared to catch the common case of a fixed length vsetvl in a single 1292 /// block loop when it could execute once in the preheader instead. 1293 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { 1294 const MachineFunction &MF = *MBB.getParent(); 1295 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); 1296 1297 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) 1298 return; 1299 1300 MachineBasicBlock *UnavailablePred = nullptr; 1301 VSETVLIInfo AvailableInfo; 1302 for (MachineBasicBlock *P : MBB.predecessors()) { 1303 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; 1304 if (PredInfo.isUnknown()) { 1305 if (UnavailablePred) 1306 return; 1307 UnavailablePred = P; 1308 } else if (!AvailableInfo.isValid()) { 1309 AvailableInfo = PredInfo; 1310 } else if (AvailableInfo != PredInfo) { 1311 return; 1312 } 1313 } 1314 1315 // Unreachable, single pred, or full redundancy. Note that FRE is handled by 1316 // phase 3. 1317 if (!UnavailablePred || !AvailableInfo.isValid()) 1318 return; 1319 1320 // Critical edge - TODO: consider splitting? 1321 if (UnavailablePred->succ_size() != 1) 1322 return; 1323 1324 // If VL can be less than AVL, then we can't reduce the frequency of exec. 1325 if (!hasFixedResult(AvailableInfo, ST)) 1326 return; 1327 1328 // Does it actually let us remove an implicit transition in MBB? 1329 bool Found = false; 1330 for (auto &MI : MBB) { 1331 if (isVectorConfigInstr(MI)) 1332 return; 1333 1334 const uint64_t TSFlags = MI.getDesc().TSFlags; 1335 if (RISCVII::hasSEWOp(TSFlags)) { 1336 if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI)) 1337 return; 1338 Found = true; 1339 break; 1340 } 1341 } 1342 if (!Found) 1343 return; 1344 1345 // Finally, update both data flow state and insert the actual vsetvli. 1346 // Doing both keeps the code in sync with the dataflow results, which 1347 // is critical for correctness of phase 3. 1348 auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit; 1349 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " 1350 << UnavailablePred->getName() << " with state " 1351 << AvailableInfo << "\n"); 1352 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; 1353 BlockInfo[MBB.getNumber()].Pred = AvailableInfo; 1354 1355 // Note there's an implicit assumption here that terminators never use 1356 // or modify VL or VTYPE. Also, fallthrough will return end(). 1357 auto InsertPt = UnavailablePred->getFirstInstrTerminator(); 1358 insertVSETVLI(*UnavailablePred, InsertPt, 1359 UnavailablePred->findDebugLoc(InsertPt), 1360 AvailableInfo, OldInfo); 1361 } 1362 1363 static void doUnion(DemandedFields &A, DemandedFields B) { 1364 A.VL |= B.VL; 1365 A.SEW |= B.SEW; 1366 A.LMUL |= B.LMUL; 1367 A.SEWLMULRatio |= B.SEWLMULRatio; 1368 A.TailPolicy |= B.TailPolicy; 1369 A.MaskPolicy |= B.MaskPolicy; 1370 } 1371 1372 // Return true if we can mutate PrevMI's VTYPE to match MI's 1373 // without changing any the fields which have been used. 1374 // TODO: Restructure code to allow code reuse between this and isCompatible 1375 // above. 1376 static bool canMutatePriorConfig(const MachineInstr &PrevMI, 1377 const MachineInstr &MI, 1378 const DemandedFields &Used) { 1379 // TODO: Extend this to handle cases where VL does change, but VL 1380 // has not been used. (e.g. over a vmv.x.s) 1381 if (!isVLPreservingConfig(MI)) 1382 // Note: `vsetvli x0, x0, vtype' is the canonical instruction 1383 // for this case. If you find yourself wanting to add other forms 1384 // to this "unused VTYPE" case, we're probably missing a 1385 // canonicalization earlier. 1386 return false; 1387 1388 if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm()) 1389 return false; 1390 1391 auto PriorVType = PrevMI.getOperand(2).getImm(); 1392 auto VType = MI.getOperand(2).getImm(); 1393 return areCompatibleVTYPEs(PriorVType, VType, Used); 1394 } 1395 1396 void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { 1397 MachineInstr *PrevMI = nullptr; 1398 DemandedFields Used; 1399 SmallVector<MachineInstr*> ToDelete; 1400 for (MachineInstr &MI : MBB) { 1401 // Note: Must be *before* vsetvli handling to account for config cases 1402 // which only change some subfields. 1403 doUnion(Used, getDemanded(MI)); 1404 1405 if (!isVectorConfigInstr(MI)) 1406 continue; 1407 1408 if (PrevMI) { 1409 if (!Used.VL && !Used.usedVTYPE()) { 1410 ToDelete.push_back(PrevMI); 1411 // fallthrough 1412 } else if (canMutatePriorConfig(*PrevMI, MI, Used)) { 1413 PrevMI->getOperand(2).setImm(MI.getOperand(2).getImm()); 1414 ToDelete.push_back(&MI); 1415 // Leave PrevMI unchanged 1416 continue; 1417 } 1418 } 1419 PrevMI = &MI; 1420 Used = getDemanded(MI); 1421 Register VRegDef = MI.getOperand(0).getReg(); 1422 if (VRegDef != RISCV::X0 && 1423 !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) 1424 Used.VL = true; 1425 } 1426 1427 for (auto *MI : ToDelete) 1428 MI->eraseFromParent(); 1429 } 1430 1431 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { 1432 for (auto I = MBB.begin(), E = MBB.end(); I != E;) { 1433 MachineInstr &MI = *I++; 1434 if (RISCV::isFaultFirstLoad(MI)) { 1435 Register VLOutput = MI.getOperand(1).getReg(); 1436 if (!MRI->use_nodbg_empty(VLOutput)) 1437 BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL), 1438 VLOutput); 1439 // We don't use the vl output of the VLEFF/VLSEGFF anymore. 1440 MI.getOperand(1).setReg(RISCV::X0); 1441 } 1442 } 1443 } 1444 1445 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { 1446 // Skip if the vector extension is not enabled. 1447 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); 1448 if (!ST.hasVInstructions()) 1449 return false; 1450 1451 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n"); 1452 1453 TII = ST.getInstrInfo(); 1454 MRI = &MF.getRegInfo(); 1455 1456 assert(BlockInfo.empty() && "Expect empty block infos"); 1457 BlockInfo.resize(MF.getNumBlockIDs()); 1458 1459 bool HaveVectorOp = false; 1460 1461 // Phase 1 - determine how VL/VTYPE are affected by the each block. 1462 for (const MachineBasicBlock &MBB : MF) { 1463 HaveVectorOp |= computeVLVTYPEChanges(MBB); 1464 // Initial exit state is whatever change we found in the block. 1465 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1466 BBInfo.Exit = BBInfo.Change; 1467 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) 1468 << " is " << BBInfo.Exit << "\n"); 1469 1470 } 1471 1472 // If we didn't find any instructions that need VSETVLI, we're done. 1473 if (!HaveVectorOp) { 1474 BlockInfo.clear(); 1475 return false; 1476 } 1477 1478 // Phase 2 - determine the exit VL/VTYPE from each block. We add all 1479 // blocks to the list here, but will also add any that need to be revisited 1480 // during Phase 2 processing. 1481 for (const MachineBasicBlock &MBB : MF) { 1482 WorkList.push(&MBB); 1483 BlockInfo[MBB.getNumber()].InQueue = true; 1484 } 1485 while (!WorkList.empty()) { 1486 const MachineBasicBlock &MBB = *WorkList.front(); 1487 WorkList.pop(); 1488 computeIncomingVLVTYPE(MBB); 1489 } 1490 1491 // Perform partial redundancy elimination of vsetvli transitions. 1492 for (MachineBasicBlock &MBB : MF) 1493 doPRE(MBB); 1494 1495 // Phase 3 - add any vsetvli instructions needed in the block. Use the 1496 // Phase 2 information to avoid adding vsetvlis before the first vector 1497 // instruction in the block if the VL/VTYPE is satisfied by its 1498 // predecessors. 1499 for (MachineBasicBlock &MBB : MF) 1500 emitVSETVLIs(MBB); 1501 1502 // Now that all vsetvlis are explicit, go through and do block local 1503 // DSE and peephole based demanded fields based transforms. Note that 1504 // this *must* be done outside the main dataflow so long as we allow 1505 // any cross block analysis within the dataflow. We can't have both 1506 // demanded fields based mutation and non-local analysis in the 1507 // dataflow at the same time without introducing inconsistencies. 1508 for (MachineBasicBlock &MBB : MF) 1509 doLocalPostpass(MBB); 1510 1511 // Once we're fully done rewriting all the instructions, do a final pass 1512 // through to check for VSETVLIs which write to an unused destination. 1513 // For the non X0, X0 variant, we can replace the destination register 1514 // with X0 to reduce register pressure. This is really a generic 1515 // optimization which can be applied to any dead def (TODO: generalize). 1516 for (MachineBasicBlock &MBB : MF) { 1517 for (MachineInstr &MI : MBB) { 1518 if (MI.getOpcode() == RISCV::PseudoVSETVLI || 1519 MI.getOpcode() == RISCV::PseudoVSETIVLI) { 1520 Register VRegDef = MI.getOperand(0).getReg(); 1521 if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef)) 1522 MI.getOperand(0).setReg(RISCV::X0); 1523 } 1524 } 1525 } 1526 1527 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output 1528 // of VLEFF/VLSEGFF. 1529 for (MachineBasicBlock &MBB : MF) 1530 insertReadVL(MBB); 1531 1532 BlockInfo.clear(); 1533 return HaveVectorOp; 1534 } 1535 1536 /// Returns an instance of the Insert VSETVLI pass. 1537 FunctionPass *llvm::createRISCVInsertVSETVLIPass() { 1538 return new RISCVInsertVSETVLI(); 1539 } 1540