1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a function pass that inserts VSETVLI instructions where 10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL 11 // instructions. 12 // 13 // This pass consists of 3 phases: 14 // 15 // Phase 1 collects how each basic block affects VL/VTYPE. 16 // 17 // Phase 2 uses the information from phase 1 to do a data flow analysis to 18 // propagate the VL/VTYPE changes through the function. This gives us the 19 // VL/VTYPE at the start of each basic block. 20 // 21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from 22 // phase 2 is used to prevent inserting a VSETVLI before the first vector 23 // instruction in the block if possible. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "RISCV.h" 28 #include "RISCVSubtarget.h" 29 #include "llvm/ADT/Statistic.h" 30 #include "llvm/CodeGen/LiveIntervals.h" 31 #include "llvm/CodeGen/MachineFunctionPass.h" 32 #include <queue> 33 using namespace llvm; 34 35 #define DEBUG_TYPE "riscv-insert-vsetvli" 36 #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass" 37 38 STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted"); 39 STATISTIC(NumRemovedVSETVL, "Number of VSETVL inst removed"); 40 41 static cl::opt<bool> DisableInsertVSETVLPHIOpt( 42 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, 43 cl::desc("Disable looking through phis when inserting vsetvlis.")); 44 45 static cl::opt<bool> UseStrictAsserts( 46 "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden, 47 cl::desc("Enable strict assertion checking for the dataflow algorithm")); 48 49 namespace { 50 51 static unsigned getVLOpNum(const MachineInstr &MI) { 52 return RISCVII::getVLOpNum(MI.getDesc()); 53 } 54 55 static unsigned getSEWOpNum(const MachineInstr &MI) { 56 return RISCVII::getSEWOpNum(MI.getDesc()); 57 } 58 59 static bool isVectorConfigInstr(const MachineInstr &MI) { 60 return MI.getOpcode() == RISCV::PseudoVSETVLI || 61 MI.getOpcode() == RISCV::PseudoVSETVLIX0 || 62 MI.getOpcode() == RISCV::PseudoVSETIVLI; 63 } 64 65 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves 66 /// VL and only sets VTYPE. 67 static bool isVLPreservingConfig(const MachineInstr &MI) { 68 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) 69 return false; 70 assert(RISCV::X0 == MI.getOperand(1).getReg()); 71 return RISCV::X0 == MI.getOperand(0).getReg(); 72 } 73 74 static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) { 75 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 76 default: 77 return false; 78 case RISCV::VFMV_S_F: 79 case RISCV::VFMV_V_F: 80 return true; 81 } 82 } 83 84 static bool isScalarExtractInstr(const MachineInstr &MI) { 85 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 86 default: 87 return false; 88 case RISCV::VMV_X_S: 89 case RISCV::VFMV_F_S: 90 return true; 91 } 92 } 93 94 static bool isScalarInsertInstr(const MachineInstr &MI) { 95 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 96 default: 97 return false; 98 case RISCV::VMV_S_X: 99 case RISCV::VFMV_S_F: 100 return true; 101 } 102 } 103 104 static bool isScalarSplatInstr(const MachineInstr &MI) { 105 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 106 default: 107 return false; 108 case RISCV::VMV_V_I: 109 case RISCV::VMV_V_X: 110 case RISCV::VFMV_V_F: 111 return true; 112 } 113 } 114 115 static bool isVSlideInstr(const MachineInstr &MI) { 116 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 117 default: 118 return false; 119 case RISCV::VSLIDEDOWN_VX: 120 case RISCV::VSLIDEDOWN_VI: 121 case RISCV::VSLIDEUP_VX: 122 case RISCV::VSLIDEUP_VI: 123 return true; 124 } 125 } 126 127 /// Get the EEW for a load or store instruction. Return std::nullopt if MI is 128 /// not a load or store which ignores SEW. 129 static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) { 130 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 131 default: 132 return std::nullopt; 133 case RISCV::VLE8_V: 134 case RISCV::VLSE8_V: 135 case RISCV::VSE8_V: 136 case RISCV::VSSE8_V: 137 return 8; 138 case RISCV::VLE16_V: 139 case RISCV::VLSE16_V: 140 case RISCV::VSE16_V: 141 case RISCV::VSSE16_V: 142 return 16; 143 case RISCV::VLE32_V: 144 case RISCV::VLSE32_V: 145 case RISCV::VSE32_V: 146 case RISCV::VSSE32_V: 147 return 32; 148 case RISCV::VLE64_V: 149 case RISCV::VLSE64_V: 150 case RISCV::VSE64_V: 151 case RISCV::VSSE64_V: 152 return 64; 153 } 154 } 155 156 static bool isNonZeroLoadImmediate(MachineInstr &MI) { 157 return MI.getOpcode() == RISCV::ADDI && 158 MI.getOperand(1).isReg() && MI.getOperand(2).isImm() && 159 MI.getOperand(1).getReg() == RISCV::X0 && 160 MI.getOperand(2).getImm() != 0; 161 } 162 163 /// Return true if this is an operation on mask registers. Note that 164 /// this includes both arithmetic/logical ops and load/store (vlm/vsm). 165 static bool isMaskRegOp(const MachineInstr &MI) { 166 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags)) 167 return false; 168 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); 169 // A Log2SEW of 0 is an operation on mask registers only. 170 return Log2SEW == 0; 171 } 172 173 /// Return true if the inactive elements in the result are entirely undefined. 174 /// Note that this is different from "agnostic" as defined by the vector 175 /// specification. Agnostic requires each lane to either be undisturbed, or 176 /// take the value -1; no other value is allowed. 177 static bool hasUndefinedMergeOp(const MachineInstr &MI, 178 const MachineRegisterInfo &MRI) { 179 180 unsigned UseOpIdx; 181 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) 182 // If there is no passthrough operand, then the pass through 183 // lanes are undefined. 184 return true; 185 186 // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose 187 // operands are solely IMPLICIT_DEFS, then the pass through lanes are 188 // undefined. 189 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 190 if (UseMO.getReg() == RISCV::NoRegister) 191 return true; 192 193 if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) { 194 if (UseMI->isImplicitDef()) 195 return true; 196 197 if (UseMI->isRegSequence()) { 198 for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) { 199 MachineInstr *SourceMI = MRI.getVRegDef(UseMI->getOperand(i).getReg()); 200 if (!SourceMI || !SourceMI->isImplicitDef()) 201 return false; 202 } 203 return true; 204 } 205 } 206 return false; 207 } 208 209 /// Which subfields of VL or VTYPE have values we need to preserve? 210 struct DemandedFields { 211 // Some unknown property of VL is used. If demanded, must preserve entire 212 // value. 213 bool VLAny = false; 214 // Only zero vs non-zero is used. If demanded, can change non-zero values. 215 bool VLZeroness = false; 216 // What properties of SEW we need to preserve. 217 enum : uint8_t { 218 SEWEqual = 3, // The exact value of SEW needs to be preserved. 219 SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater 220 // than or equal to the original value. 221 SEWGreaterThanOrEqualAndLessThan64 = 222 1, // SEW can be changed as long as it's greater 223 // than or equal to the original value, but must be less 224 // than 64. 225 SEWNone = 0 // We don't need to preserve SEW at all. 226 } SEW = SEWNone; 227 bool LMUL = false; 228 bool SEWLMULRatio = false; 229 bool TailPolicy = false; 230 bool MaskPolicy = false; 231 232 // Return true if any part of VTYPE was used 233 bool usedVTYPE() const { 234 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy; 235 } 236 237 // Return true if any property of VL was used 238 bool usedVL() { 239 return VLAny || VLZeroness; 240 } 241 242 // Mark all VTYPE subfields and properties as demanded 243 void demandVTYPE() { 244 SEW = SEWEqual; 245 LMUL = true; 246 SEWLMULRatio = true; 247 TailPolicy = true; 248 MaskPolicy = true; 249 } 250 251 // Mark all VL properties as demanded 252 void demandVL() { 253 VLAny = true; 254 VLZeroness = true; 255 } 256 257 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 258 /// Support for debugging, callable in GDB: V->dump() 259 LLVM_DUMP_METHOD void dump() const { 260 print(dbgs()); 261 dbgs() << "\n"; 262 } 263 264 /// Implement operator<<. 265 void print(raw_ostream &OS) const { 266 OS << "{"; 267 OS << "VLAny=" << VLAny << ", "; 268 OS << "VLZeroness=" << VLZeroness << ", "; 269 OS << "SEW="; 270 switch (SEW) { 271 case SEWEqual: 272 OS << "SEWEqual"; 273 break; 274 case SEWGreaterThanOrEqual: 275 OS << "SEWGreaterThanOrEqual"; 276 break; 277 case SEWGreaterThanOrEqualAndLessThan64: 278 OS << "SEWGreaterThanOrEqualAndLessThan64"; 279 break; 280 case SEWNone: 281 OS << "SEWNone"; 282 break; 283 }; 284 OS << ", "; 285 OS << "LMUL=" << LMUL << ", "; 286 OS << "SEWLMULRatio=" << SEWLMULRatio << ", "; 287 OS << "TailPolicy=" << TailPolicy << ", "; 288 OS << "MaskPolicy=" << MaskPolicy; 289 OS << "}"; 290 } 291 #endif 292 }; 293 294 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 295 LLVM_ATTRIBUTE_USED 296 inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) { 297 DF.print(OS); 298 return OS; 299 } 300 #endif 301 302 /// Return true if moving from CurVType to NewVType is 303 /// indistinguishable from the perspective of an instruction (or set 304 /// of instructions) which use only the Used subfields and properties. 305 static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, 306 const DemandedFields &Used) { 307 switch (Used.SEW) { 308 case DemandedFields::SEWNone: 309 break; 310 case DemandedFields::SEWEqual: 311 if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType)) 312 return false; 313 break; 314 case DemandedFields::SEWGreaterThanOrEqual: 315 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType)) 316 return false; 317 break; 318 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64: 319 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) || 320 RISCVVType::getSEW(NewVType) >= 64) 321 return false; 322 break; 323 } 324 325 if (Used.LMUL && 326 RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType)) 327 return false; 328 329 if (Used.SEWLMULRatio) { 330 auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType), 331 RISCVVType::getVLMUL(CurVType)); 332 auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType), 333 RISCVVType::getVLMUL(NewVType)); 334 if (Ratio1 != Ratio2) 335 return false; 336 } 337 338 if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) != 339 RISCVVType::isTailAgnostic(NewVType)) 340 return false; 341 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) != 342 RISCVVType::isMaskAgnostic(NewVType)) 343 return false; 344 return true; 345 } 346 347 /// Return the fields and properties demanded by the provided instruction. 348 DemandedFields getDemanded(const MachineInstr &MI, 349 const MachineRegisterInfo *MRI, 350 const RISCVSubtarget *ST) { 351 // Warning: This function has to work on both the lowered (i.e. post 352 // emitVSETVLIs) and pre-lowering forms. The main implication of this is 353 // that it can't use the value of a SEW, VL, or Policy operand as they might 354 // be stale after lowering. 355 356 // Most instructions don't use any of these subfeilds. 357 DemandedFields Res; 358 // Start conservative if registers are used 359 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL)) 360 Res.demandVL(); 361 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE)) 362 Res.demandVTYPE(); 363 // Start conservative on the unlowered form too 364 uint64_t TSFlags = MI.getDesc().TSFlags; 365 if (RISCVII::hasSEWOp(TSFlags)) { 366 Res.demandVTYPE(); 367 if (RISCVII::hasVLOp(TSFlags)) 368 Res.demandVL(); 369 370 // Behavior is independent of mask policy. 371 if (!RISCVII::usesMaskPolicy(TSFlags)) 372 Res.MaskPolicy = false; 373 } 374 375 // Loads and stores with implicit EEW do not demand SEW or LMUL directly. 376 // They instead demand the ratio of the two which is used in computing 377 // EMUL, but which allows us the flexibility to change SEW and LMUL 378 // provided we don't change the ratio. 379 // Note: We assume that the instructions initial SEW is the EEW encoded 380 // in the opcode. This is asserted when constructing the VSETVLIInfo. 381 if (getEEWForLoadStore(MI)) { 382 Res.SEW = DemandedFields::SEWNone; 383 Res.LMUL = false; 384 } 385 386 // Store instructions don't use the policy fields. 387 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) { 388 Res.TailPolicy = false; 389 Res.MaskPolicy = false; 390 } 391 392 // If this is a mask reg operation, it only cares about VLMAX. 393 // TODO: Possible extensions to this logic 394 // * Probably ok if available VLMax is larger than demanded 395 // * The policy bits can probably be ignored.. 396 if (isMaskRegOp(MI)) { 397 Res.SEW = DemandedFields::SEWNone; 398 Res.LMUL = false; 399 } 400 401 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0. 402 if (isScalarInsertInstr(MI)) { 403 Res.LMUL = false; 404 Res.SEWLMULRatio = false; 405 Res.VLAny = false; 406 // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't 407 // need to preserve any other bits and are thus compatible with any larger, 408 // etype and can disregard policy bits. Warning: It's tempting to try doing 409 // this for any tail agnostic operation, but we can't as TA requires 410 // tail lanes to either be the original value or -1. We are writing 411 // unknown bits to the lanes here. 412 if (hasUndefinedMergeOp(MI, *MRI)) { 413 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64()) 414 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; 415 else 416 Res.SEW = DemandedFields::SEWGreaterThanOrEqual; 417 Res.TailPolicy = false; 418 } 419 } 420 421 // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW. 422 if (isScalarExtractInstr(MI)) { 423 assert(!RISCVII::hasVLOp(TSFlags)); 424 Res.LMUL = false; 425 Res.SEWLMULRatio = false; 426 Res.TailPolicy = false; 427 Res.MaskPolicy = false; 428 } 429 430 return Res; 431 } 432 433 /// Defines the abstract state with which the forward dataflow models the 434 /// values of the VL and VTYPE registers after insertion. 435 class VSETVLIInfo { 436 union { 437 Register AVLReg; 438 unsigned AVLImm; 439 }; 440 441 enum : uint8_t { 442 Uninitialized, 443 AVLIsReg, 444 AVLIsImm, 445 Unknown, 446 } State = Uninitialized; 447 448 // Fields from VTYPE. 449 RISCVII::VLMUL VLMul = RISCVII::LMUL_1; 450 uint8_t SEW = 0; 451 uint8_t TailAgnostic : 1; 452 uint8_t MaskAgnostic : 1; 453 uint8_t SEWLMULRatioOnly : 1; 454 455 public: 456 VSETVLIInfo() 457 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), 458 SEWLMULRatioOnly(false) {} 459 460 static VSETVLIInfo getUnknown() { 461 VSETVLIInfo Info; 462 Info.setUnknown(); 463 return Info; 464 } 465 466 bool isValid() const { return State != Uninitialized; } 467 void setUnknown() { State = Unknown; } 468 bool isUnknown() const { return State == Unknown; } 469 470 void setAVLReg(Register Reg) { 471 AVLReg = Reg; 472 State = AVLIsReg; 473 } 474 475 void setAVLImm(unsigned Imm) { 476 AVLImm = Imm; 477 State = AVLIsImm; 478 } 479 480 bool hasAVLImm() const { return State == AVLIsImm; } 481 bool hasAVLReg() const { return State == AVLIsReg; } 482 Register getAVLReg() const { 483 assert(hasAVLReg()); 484 return AVLReg; 485 } 486 unsigned getAVLImm() const { 487 assert(hasAVLImm()); 488 return AVLImm; 489 } 490 491 void setAVL(VSETVLIInfo Info) { 492 assert(Info.isValid()); 493 if (Info.isUnknown()) 494 setUnknown(); 495 else if (Info.hasAVLReg()) 496 setAVLReg(Info.getAVLReg()); 497 else { 498 assert(Info.hasAVLImm()); 499 setAVLImm(Info.getAVLImm()); 500 } 501 } 502 503 unsigned getSEW() const { return SEW; } 504 RISCVII::VLMUL getVLMUL() const { return VLMul; } 505 bool getTailAgnostic() const { return TailAgnostic; } 506 bool getMaskAgnostic() const { return MaskAgnostic; } 507 508 bool hasNonZeroAVL(const MachineRegisterInfo &MRI) const { 509 if (hasAVLImm()) 510 return getAVLImm() > 0; 511 if (hasAVLReg()) { 512 if (getAVLReg() == RISCV::X0) 513 return true; 514 if (MachineInstr *MI = MRI.getVRegDef(getAVLReg()); 515 MI && isNonZeroLoadImmediate(*MI)) 516 return true; 517 return false; 518 } 519 return false; 520 } 521 522 bool hasEquallyZeroAVL(const VSETVLIInfo &Other, 523 const MachineRegisterInfo &MRI) const { 524 if (hasSameAVL(Other)) 525 return true; 526 return (hasNonZeroAVL(MRI) && Other.hasNonZeroAVL(MRI)); 527 } 528 529 bool hasSameAVL(const VSETVLIInfo &Other) const { 530 if (hasAVLReg() && Other.hasAVLReg()) 531 return getAVLReg() == Other.getAVLReg(); 532 533 if (hasAVLImm() && Other.hasAVLImm()) 534 return getAVLImm() == Other.getAVLImm(); 535 536 return false; 537 } 538 539 void setVTYPE(unsigned VType) { 540 assert(isValid() && !isUnknown() && 541 "Can't set VTYPE for uninitialized or unknown"); 542 VLMul = RISCVVType::getVLMUL(VType); 543 SEW = RISCVVType::getSEW(VType); 544 TailAgnostic = RISCVVType::isTailAgnostic(VType); 545 MaskAgnostic = RISCVVType::isMaskAgnostic(VType); 546 } 547 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) { 548 assert(isValid() && !isUnknown() && 549 "Can't set VTYPE for uninitialized or unknown"); 550 VLMul = L; 551 SEW = S; 552 TailAgnostic = TA; 553 MaskAgnostic = MA; 554 } 555 556 void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; } 557 558 unsigned encodeVTYPE() const { 559 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && 560 "Can't encode VTYPE for uninitialized or unknown"); 561 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 562 } 563 564 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } 565 566 bool hasSameVTYPE(const VSETVLIInfo &Other) const { 567 assert(isValid() && Other.isValid() && 568 "Can't compare invalid VSETVLIInfos"); 569 assert(!isUnknown() && !Other.isUnknown() && 570 "Can't compare VTYPE in unknown state"); 571 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && 572 "Can't compare when only LMUL/SEW ratio is valid."); 573 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == 574 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, 575 Other.MaskAgnostic); 576 } 577 578 unsigned getSEWLMULRatio() const { 579 assert(isValid() && !isUnknown() && 580 "Can't use VTYPE for uninitialized or unknown"); 581 return RISCVVType::getSEWLMULRatio(SEW, VLMul); 582 } 583 584 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. 585 // Note that having the same VLMAX ensures that both share the same 586 // function from AVL to VL; that is, they must produce the same VL value 587 // for any given AVL value. 588 bool hasSameVLMAX(const VSETVLIInfo &Other) const { 589 assert(isValid() && Other.isValid() && 590 "Can't compare invalid VSETVLIInfos"); 591 assert(!isUnknown() && !Other.isUnknown() && 592 "Can't compare VTYPE in unknown state"); 593 return getSEWLMULRatio() == Other.getSEWLMULRatio(); 594 } 595 596 bool hasCompatibleVTYPE(const DemandedFields &Used, 597 const VSETVLIInfo &Require) const { 598 return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used); 599 } 600 601 // Determine whether the vector instructions requirements represented by 602 // Require are compatible with the previous vsetvli instruction represented 603 // by this. MI is the instruction whose requirements we're considering. 604 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require, 605 const MachineRegisterInfo &MRI) const { 606 assert(isValid() && Require.isValid() && 607 "Can't compare invalid VSETVLIInfos"); 608 assert(!Require.SEWLMULRatioOnly && 609 "Expected a valid VTYPE for instruction!"); 610 // Nothing is compatible with Unknown. 611 if (isUnknown() || Require.isUnknown()) 612 return false; 613 614 // If only our VLMAX ratio is valid, then this isn't compatible. 615 if (SEWLMULRatioOnly) 616 return false; 617 618 if (Used.VLAny && !hasSameAVL(Require)) 619 return false; 620 621 if (Used.VLZeroness && !hasEquallyZeroAVL(Require, MRI)) 622 return false; 623 624 return hasCompatibleVTYPE(Used, Require); 625 } 626 627 bool operator==(const VSETVLIInfo &Other) const { 628 // Uninitialized is only equal to another Uninitialized. 629 if (!isValid()) 630 return !Other.isValid(); 631 if (!Other.isValid()) 632 return !isValid(); 633 634 // Unknown is only equal to another Unknown. 635 if (isUnknown()) 636 return Other.isUnknown(); 637 if (Other.isUnknown()) 638 return isUnknown(); 639 640 if (!hasSameAVL(Other)) 641 return false; 642 643 // If the SEWLMULRatioOnly bits are different, then they aren't equal. 644 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly) 645 return false; 646 647 // If only the VLMAX is valid, check that it is the same. 648 if (SEWLMULRatioOnly) 649 return hasSameVLMAX(Other); 650 651 // If the full VTYPE is valid, check that it is the same. 652 return hasSameVTYPE(Other); 653 } 654 655 bool operator!=(const VSETVLIInfo &Other) const { 656 return !(*this == Other); 657 } 658 659 // Calculate the VSETVLIInfo visible to a block assuming this and Other are 660 // both predecessors. 661 VSETVLIInfo intersect(const VSETVLIInfo &Other) const { 662 // If the new value isn't valid, ignore it. 663 if (!Other.isValid()) 664 return *this; 665 666 // If this value isn't valid, this must be the first predecessor, use it. 667 if (!isValid()) 668 return Other; 669 670 // If either is unknown, the result is unknown. 671 if (isUnknown() || Other.isUnknown()) 672 return VSETVLIInfo::getUnknown(); 673 674 // If we have an exact, match return this. 675 if (*this == Other) 676 return *this; 677 678 // Not an exact match, but maybe the AVL and VLMAX are the same. If so, 679 // return an SEW/LMUL ratio only value. 680 if (hasSameAVL(Other) && hasSameVLMAX(Other)) { 681 VSETVLIInfo MergeInfo = *this; 682 MergeInfo.SEWLMULRatioOnly = true; 683 return MergeInfo; 684 } 685 686 // Otherwise the result is unknown. 687 return VSETVLIInfo::getUnknown(); 688 } 689 690 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 691 /// Support for debugging, callable in GDB: V->dump() 692 LLVM_DUMP_METHOD void dump() const { 693 print(dbgs()); 694 dbgs() << "\n"; 695 } 696 697 /// Implement operator<<. 698 /// @{ 699 void print(raw_ostream &OS) const { 700 OS << "{"; 701 if (!isValid()) 702 OS << "Uninitialized"; 703 if (isUnknown()) 704 OS << "unknown"; 705 if (hasAVLReg()) 706 OS << "AVLReg=" << (unsigned)AVLReg; 707 if (hasAVLImm()) 708 OS << "AVLImm=" << (unsigned)AVLImm; 709 OS << ", " 710 << "VLMul=" << (unsigned)VLMul << ", " 711 << "SEW=" << (unsigned)SEW << ", " 712 << "TailAgnostic=" << (bool)TailAgnostic << ", " 713 << "MaskAgnostic=" << (bool)MaskAgnostic << ", " 714 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}"; 715 } 716 #endif 717 }; 718 719 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 720 LLVM_ATTRIBUTE_USED 721 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) { 722 V.print(OS); 723 return OS; 724 } 725 #endif 726 727 struct BlockData { 728 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this 729 // block. Calculated in Phase 2. 730 VSETVLIInfo Exit; 731 732 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor 733 // blocks. Calculated in Phase 2, and used by Phase 3. 734 VSETVLIInfo Pred; 735 736 // Keeps track of whether the block is already in the queue. 737 bool InQueue = false; 738 739 BlockData() = default; 740 }; 741 742 class RISCVInsertVSETVLI : public MachineFunctionPass { 743 const RISCVSubtarget *ST; 744 const TargetInstrInfo *TII; 745 MachineRegisterInfo *MRI; 746 747 std::vector<BlockData> BlockInfo; 748 std::queue<const MachineBasicBlock *> WorkList; 749 750 public: 751 static char ID; 752 753 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {} 754 bool runOnMachineFunction(MachineFunction &MF) override; 755 756 void getAnalysisUsage(AnalysisUsage &AU) const override { 757 AU.setPreservesCFG(); 758 MachineFunctionPass::getAnalysisUsage(AU); 759 } 760 761 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } 762 763 private: 764 bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require, 765 const VSETVLIInfo &CurInfo) const; 766 bool needVSETVLIPHI(const VSETVLIInfo &Require, 767 const MachineBasicBlock &MBB) const; 768 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, 769 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); 770 void insertVSETVLI(MachineBasicBlock &MBB, 771 MachineBasicBlock::iterator InsertPt, DebugLoc DL, 772 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); 773 774 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const; 775 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const; 776 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB, 777 VSETVLIInfo &Info) const; 778 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); 779 void emitVSETVLIs(MachineBasicBlock &MBB); 780 void doLocalPostpass(MachineBasicBlock &MBB); 781 void doPRE(MachineBasicBlock &MBB); 782 void insertReadVL(MachineBasicBlock &MBB); 783 }; 784 785 } // end anonymous namespace 786 787 char RISCVInsertVSETVLI::ID = 0; 788 789 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, 790 false, false) 791 792 // Return a VSETVLIInfo representing the changes made by this VSETVLI or 793 // VSETIVLI instruction. 794 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { 795 VSETVLIInfo NewInfo; 796 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { 797 NewInfo.setAVLImm(MI.getOperand(1).getImm()); 798 } else { 799 assert(MI.getOpcode() == RISCV::PseudoVSETVLI || 800 MI.getOpcode() == RISCV::PseudoVSETVLIX0); 801 Register AVLReg = MI.getOperand(1).getReg(); 802 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && 803 "Can't handle X0, X0 vsetvli yet"); 804 NewInfo.setAVLReg(AVLReg); 805 } 806 NewInfo.setVTYPE(MI.getOperand(2).getImm()); 807 808 return NewInfo; 809 } 810 811 static unsigned computeVLMAX(unsigned VLEN, unsigned SEW, 812 RISCVII::VLMUL VLMul) { 813 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul); 814 if (Fractional) 815 VLEN = VLEN / LMul; 816 else 817 VLEN = VLEN * LMul; 818 return VLEN/SEW; 819 } 820 821 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, 822 const RISCVSubtarget &ST, 823 const MachineRegisterInfo *MRI) { 824 VSETVLIInfo InstrInfo; 825 826 bool TailAgnostic = true; 827 bool MaskAgnostic = true; 828 if (!hasUndefinedMergeOp(MI, *MRI)) { 829 // Start with undisturbed. 830 TailAgnostic = false; 831 MaskAgnostic = false; 832 833 // If there is a policy operand, use it. 834 if (RISCVII::hasVecPolicyOp(TSFlags)) { 835 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); 836 uint64_t Policy = Op.getImm(); 837 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && 838 "Invalid Policy Value"); 839 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; 840 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; 841 } 842 843 // Some pseudo instructions force a tail agnostic policy despite having a 844 // tied def. 845 if (RISCVII::doesForceTailAgnostic(TSFlags)) 846 TailAgnostic = true; 847 848 if (!RISCVII::usesMaskPolicy(TSFlags)) 849 MaskAgnostic = true; 850 } 851 852 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); 853 854 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); 855 // A Log2SEW of 0 is an operation on mask registers only. 856 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 857 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 858 859 if (RISCVII::hasVLOp(TSFlags)) { 860 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 861 if (VLOp.isImm()) { 862 int64_t Imm = VLOp.getImm(); 863 // Conver the VLMax sentintel to X0 register. 864 if (Imm == RISCV::VLMaxSentinel) { 865 // If we know the exact VLEN, see if we can use the constant encoding 866 // for the VLMAX instead. This reduces register pressure slightly. 867 const unsigned VLMAX = computeVLMAX(ST.getRealMaxVLen(), SEW, VLMul); 868 if (ST.getRealMinVLen() == ST.getRealMaxVLen() && VLMAX <= 31) 869 InstrInfo.setAVLImm(VLMAX); 870 else 871 InstrInfo.setAVLReg(RISCV::X0); 872 } 873 else 874 InstrInfo.setAVLImm(Imm); 875 } else { 876 InstrInfo.setAVLReg(VLOp.getReg()); 877 } 878 } else { 879 assert(isScalarExtractInstr(MI)); 880 InstrInfo.setAVLReg(RISCV::NoRegister); 881 } 882 #ifndef NDEBUG 883 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) { 884 assert(SEW == EEW && "Initial SEW doesn't match expected EEW"); 885 } 886 #endif 887 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 888 889 // If AVL is defined by a vsetvli with the same VLMAX, we can replace the 890 // AVL operand with the AVL of the defining vsetvli. We avoid general 891 // register AVLs to avoid extending live ranges without being sure we can 892 // kill the original source reg entirely. 893 if (InstrInfo.hasAVLReg() && InstrInfo.getAVLReg().isVirtual()) { 894 MachineInstr *DefMI = MRI->getVRegDef(InstrInfo.getAVLReg()); 895 if (DefMI && isVectorConfigInstr(*DefMI)) { 896 VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI); 897 if (DefInstrInfo.hasSameVLMAX(InstrInfo) && 898 (DefInstrInfo.hasAVLImm() || DefInstrInfo.getAVLReg() == RISCV::X0)) { 899 InstrInfo.setAVL(DefInstrInfo); 900 } 901 } 902 } 903 904 return InstrInfo; 905 } 906 907 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, 908 const VSETVLIInfo &Info, 909 const VSETVLIInfo &PrevInfo) { 910 DebugLoc DL = MI.getDebugLoc(); 911 insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo); 912 } 913 914 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, 915 MachineBasicBlock::iterator InsertPt, DebugLoc DL, 916 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { 917 918 ++NumInsertedVSETVL; 919 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) { 920 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same 921 // VLMAX. 922 if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { 923 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 924 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 925 .addReg(RISCV::X0, RegState::Kill) 926 .addImm(Info.encodeVTYPE()) 927 .addReg(RISCV::VL, RegState::Implicit); 928 return; 929 } 930 931 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If 932 // it has the same VLMAX we want and the last VL/VTYPE we observed is the 933 // same, we can use the X0, X0 form. 934 if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg() && 935 Info.getAVLReg().isVirtual()) { 936 if (MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg())) { 937 if (isVectorConfigInstr(*DefMI)) { 938 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 939 if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) { 940 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 941 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 942 .addReg(RISCV::X0, RegState::Kill) 943 .addImm(Info.encodeVTYPE()) 944 .addReg(RISCV::VL, RegState::Implicit); 945 return; 946 } 947 } 948 } 949 } 950 } 951 952 if (Info.hasAVLImm()) { 953 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) 954 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 955 .addImm(Info.getAVLImm()) 956 .addImm(Info.encodeVTYPE()); 957 return; 958 } 959 960 Register AVLReg = Info.getAVLReg(); 961 if (AVLReg == RISCV::NoRegister) { 962 // We can only use x0, x0 if there's no chance of the vtype change causing 963 // the previous vl to become invalid. 964 if (PrevInfo.isValid() && !PrevInfo.isUnknown() && 965 Info.hasSameVLMAX(PrevInfo)) { 966 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 967 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 968 .addReg(RISCV::X0, RegState::Kill) 969 .addImm(Info.encodeVTYPE()) 970 .addReg(RISCV::VL, RegState::Implicit); 971 return; 972 } 973 // Otherwise use an AVL of 1 to avoid depending on previous vl. 974 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) 975 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 976 .addImm(1) 977 .addImm(Info.encodeVTYPE()); 978 return; 979 } 980 981 if (AVLReg.isVirtual()) 982 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass); 983 984 // Use X0 as the DestReg unless AVLReg is X0. We also need to change the 985 // opcode if the AVLReg is X0 as they have different register classes for 986 // the AVL operand. 987 Register DestReg = RISCV::X0; 988 unsigned Opcode = RISCV::PseudoVSETVLI; 989 if (AVLReg == RISCV::X0) { 990 DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); 991 Opcode = RISCV::PseudoVSETVLIX0; 992 } 993 BuildMI(MBB, InsertPt, DL, TII->get(Opcode)) 994 .addReg(DestReg, RegState::Define | RegState::Dead) 995 .addReg(AVLReg) 996 .addImm(Info.encodeVTYPE()); 997 } 998 999 static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) { 1000 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL); 1001 return Fractional || LMul == 1; 1002 } 1003 1004 /// Return true if a VSETVLI is required to transition from CurInfo to Require 1005 /// before MI. 1006 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, 1007 const VSETVLIInfo &Require, 1008 const VSETVLIInfo &CurInfo) const { 1009 assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, MRI)); 1010 1011 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) 1012 return true; 1013 1014 DemandedFields Used = getDemanded(MI, MRI, ST); 1015 1016 // A slidedown/slideup with an *undefined* merge op can freely clobber 1017 // elements not copied from the source vector (e.g. masked off, tail, or 1018 // slideup's prefix). Notes: 1019 // * We can't modify SEW here since the slide amount is in units of SEW. 1020 // * VL=1 is special only because we have existing support for zero vs 1021 // non-zero VL. We could generalize this if we had a VL > C predicate. 1022 // * The LMUL1 restriction is for machines whose latency may depend on VL. 1023 // * As above, this is only legal for tail "undefined" not "agnostic". 1024 if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 && 1025 isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) { 1026 Used.VLAny = false; 1027 Used.VLZeroness = true; 1028 Used.LMUL = false; 1029 Used.TailPolicy = false; 1030 } 1031 1032 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same 1033 // semantically as vmv.s.x. This is particularly useful since we don't have an 1034 // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place. 1035 // Since a splat is non-constant time in LMUL, we do need to be careful to not 1036 // increase the number of active vector registers (unlike for vmv.s.x.) 1037 if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 && 1038 isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) { 1039 Used.LMUL = false; 1040 Used.SEWLMULRatio = false; 1041 Used.VLAny = false; 1042 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64()) 1043 Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; 1044 else 1045 Used.SEW = DemandedFields::SEWGreaterThanOrEqual; 1046 Used.TailPolicy = false; 1047 } 1048 1049 if (CurInfo.isCompatible(Used, Require, *MRI)) 1050 return false; 1051 1052 // We didn't find a compatible value. If our AVL is a virtual register, 1053 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need 1054 // and the last VL/VTYPE we observed is the same, we don't need a 1055 // VSETVLI here. 1056 if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() && 1057 CurInfo.hasCompatibleVTYPE(Used, Require)) { 1058 if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { 1059 if (isVectorConfigInstr(*DefMI)) { 1060 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 1061 if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo)) 1062 return false; 1063 } 1064 } 1065 } 1066 1067 return true; 1068 } 1069 1070 // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we 1071 // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more 1072 // places. 1073 static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo, 1074 DemandedFields &Demanded) { 1075 VSETVLIInfo Info = NewInfo; 1076 1077 if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() && 1078 !PrevInfo.isUnknown()) { 1079 if (auto NewVLMul = RISCVVType::getSameRatioLMUL( 1080 PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW())) 1081 Info.setVLMul(*NewVLMul); 1082 Demanded.LMUL = true; 1083 } 1084 1085 return Info; 1086 } 1087 1088 // Given an incoming state reaching MI, minimally modifies that state so that it 1089 // is compatible with MI. The resulting state is guaranteed to be semantically 1090 // legal for MI, but may not be the state requested by MI. 1091 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, 1092 const MachineInstr &MI) const { 1093 uint64_t TSFlags = MI.getDesc().TSFlags; 1094 if (!RISCVII::hasSEWOp(TSFlags)) 1095 return; 1096 1097 const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, *ST, MRI); 1098 assert(NewInfo.isValid() && !NewInfo.isUnknown()); 1099 if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info)) 1100 return; 1101 1102 const VSETVLIInfo PrevInfo = Info; 1103 if (!Info.isValid() || Info.isUnknown()) 1104 Info = NewInfo; 1105 1106 DemandedFields Demanded = getDemanded(MI, MRI, ST); 1107 const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded); 1108 1109 // If MI only demands that VL has the same zeroness, we only need to set the 1110 // AVL if the zeroness differs. This removes a vsetvli entirely if the types 1111 // match or allows use of cheaper avl preserving variant if VLMAX doesn't 1112 // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype" 1113 // variant, so we avoid the transform to prevent extending live range of an 1114 // avl register operand. 1115 // TODO: We can probably relax this for immediates. 1116 bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, *MRI) && 1117 IncomingInfo.hasSameVLMAX(PrevInfo); 1118 if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero)) 1119 Info.setAVL(IncomingInfo); 1120 1121 Info.setVTYPE( 1122 ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info) 1123 .getVLMUL(), 1124 ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(), 1125 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later 1126 // if needed. 1127 (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() || 1128 IncomingInfo.getTailAgnostic(), 1129 (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() || 1130 IncomingInfo.getMaskAgnostic()); 1131 1132 // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep 1133 // the AVL. 1134 if (Info.hasSEWLMULRatioOnly()) { 1135 VSETVLIInfo RatiolessInfo = IncomingInfo; 1136 RatiolessInfo.setAVL(Info); 1137 Info = RatiolessInfo; 1138 } 1139 } 1140 1141 // Given a state with which we evaluated MI (see transferBefore above for why 1142 // this might be different that the state MI requested), modify the state to 1143 // reflect the changes MI might make. 1144 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, 1145 const MachineInstr &MI) const { 1146 if (isVectorConfigInstr(MI)) { 1147 Info = getInfoForVSETVLI(MI); 1148 return; 1149 } 1150 1151 if (RISCV::isFaultFirstLoad(MI)) { 1152 // Update AVL to vl-output of the fault first load. 1153 Info.setAVLReg(MI.getOperand(1).getReg()); 1154 return; 1155 } 1156 1157 // If this is something that updates VL/VTYPE that we don't know about, set 1158 // the state to unknown. 1159 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 1160 MI.modifiesRegister(RISCV::VTYPE)) 1161 Info = VSETVLIInfo::getUnknown(); 1162 } 1163 1164 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB, 1165 VSETVLIInfo &Info) const { 1166 bool HadVectorOp = false; 1167 1168 Info = BlockInfo[MBB.getNumber()].Pred; 1169 for (const MachineInstr &MI : MBB) { 1170 transferBefore(Info, MI); 1171 1172 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags)) 1173 HadVectorOp = true; 1174 1175 transferAfter(Info, MI); 1176 } 1177 1178 return HadVectorOp; 1179 } 1180 1181 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { 1182 1183 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1184 1185 BBInfo.InQueue = false; 1186 1187 // Start with the previous entry so that we keep the most conservative state 1188 // we have ever found. 1189 VSETVLIInfo InInfo = BBInfo.Pred; 1190 if (MBB.pred_empty()) { 1191 // There are no predecessors, so use the default starting status. 1192 InInfo.setUnknown(); 1193 } else { 1194 for (MachineBasicBlock *P : MBB.predecessors()) 1195 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); 1196 } 1197 1198 // If we don't have any valid predecessor value, wait until we do. 1199 if (!InInfo.isValid()) 1200 return; 1201 1202 // If no change, no need to rerun block 1203 if (InInfo == BBInfo.Pred) 1204 return; 1205 1206 BBInfo.Pred = InInfo; 1207 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) 1208 << " changed to " << BBInfo.Pred << "\n"); 1209 1210 // Note: It's tempting to cache the state changes here, but due to the 1211 // compatibility checks performed a blocks output state can change based on 1212 // the input state. To cache, we'd have to add logic for finding 1213 // never-compatible state changes. 1214 VSETVLIInfo TmpStatus; 1215 computeVLVTYPEChanges(MBB, TmpStatus); 1216 1217 // If the new exit value matches the old exit value, we don't need to revisit 1218 // any blocks. 1219 if (BBInfo.Exit == TmpStatus) 1220 return; 1221 1222 BBInfo.Exit = TmpStatus; 1223 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB) 1224 << " changed to " << BBInfo.Exit << "\n"); 1225 1226 // Add the successors to the work list so we can propagate the changed exit 1227 // status. 1228 for (MachineBasicBlock *S : MBB.successors()) 1229 if (!BlockInfo[S->getNumber()].InQueue) { 1230 BlockInfo[S->getNumber()].InQueue = true; 1231 WorkList.push(S); 1232 } 1233 } 1234 1235 // If we weren't able to prove a vsetvli was directly unneeded, it might still 1236 // be unneeded if the AVL is a phi node where all incoming values are VL 1237 // outputs from the last VSETVLI in their respective basic blocks. 1238 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, 1239 const MachineBasicBlock &MBB) const { 1240 if (DisableInsertVSETVLPHIOpt) 1241 return true; 1242 1243 if (!Require.hasAVLReg()) 1244 return true; 1245 1246 Register AVLReg = Require.getAVLReg(); 1247 if (!AVLReg.isVirtual()) 1248 return true; 1249 1250 // We need the AVL to be produce by a PHI node in this basic block. 1251 MachineInstr *PHI = MRI->getVRegDef(AVLReg); 1252 if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB) 1253 return true; 1254 1255 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps; 1256 PHIOp += 2) { 1257 Register InReg = PHI->getOperand(PHIOp).getReg(); 1258 MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB(); 1259 const BlockData &PBBInfo = BlockInfo[PBB->getNumber()]; 1260 // If the exit from the predecessor has the VTYPE we are looking for 1261 // we might be able to avoid a VSETVLI. 1262 if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require)) 1263 return true; 1264 1265 // We need the PHI input to the be the output of a VSET(I)VLI. 1266 MachineInstr *DefMI = MRI->getVRegDef(InReg); 1267 if (!DefMI || !isVectorConfigInstr(*DefMI)) 1268 return true; 1269 1270 // We found a VSET(I)VLI make sure it matches the output of the 1271 // predecessor block. 1272 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 1273 if (!DefInfo.hasSameAVL(PBBInfo.Exit) || 1274 !DefInfo.hasSameVTYPE(PBBInfo.Exit)) 1275 return true; 1276 } 1277 1278 // If all the incoming values to the PHI checked out, we don't need 1279 // to insert a VSETVLI. 1280 return false; 1281 } 1282 1283 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { 1284 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred; 1285 // Track whether the prefix of the block we've scanned is transparent 1286 // (meaning has not yet changed the abstract state). 1287 bool PrefixTransparent = true; 1288 for (MachineInstr &MI : MBB) { 1289 const VSETVLIInfo PrevInfo = CurInfo; 1290 transferBefore(CurInfo, MI); 1291 1292 // If this is an explicit VSETVLI or VSETIVLI, update our state. 1293 if (isVectorConfigInstr(MI)) { 1294 // Conservatively, mark the VL and VTYPE as live. 1295 assert(MI.getOperand(3).getReg() == RISCV::VL && 1296 MI.getOperand(4).getReg() == RISCV::VTYPE && 1297 "Unexpected operands where VL and VTYPE should be"); 1298 MI.getOperand(3).setIsDead(false); 1299 MI.getOperand(4).setIsDead(false); 1300 PrefixTransparent = false; 1301 } 1302 1303 uint64_t TSFlags = MI.getDesc().TSFlags; 1304 if (RISCVII::hasSEWOp(TSFlags)) { 1305 if (PrevInfo != CurInfo) { 1306 // If this is the first implicit state change, and the state change 1307 // requested can be proven to produce the same register contents, we 1308 // can skip emitting the actual state change and continue as if we 1309 // had since we know the GPR result of the implicit state change 1310 // wouldn't be used and VL/VTYPE registers are correct. Note that 1311 // we *do* need to model the state as if it changed as while the 1312 // register contents are unchanged, the abstract model can change. 1313 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB)) 1314 insertVSETVLI(MBB, MI, CurInfo, PrevInfo); 1315 PrefixTransparent = false; 1316 } 1317 1318 if (RISCVII::hasVLOp(TSFlags)) { 1319 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 1320 if (VLOp.isReg()) { 1321 Register Reg = VLOp.getReg(); 1322 MachineInstr *VLOpDef = MRI->getVRegDef(Reg); 1323 1324 // Erase the AVL operand from the instruction. 1325 VLOp.setReg(RISCV::NoRegister); 1326 VLOp.setIsKill(false); 1327 1328 // If the AVL was an immediate > 31, then it would have been emitted 1329 // as an ADDI. However, the ADDI might not have been used in the 1330 // vsetvli, or a vsetvli might not have been emitted, so it may be 1331 // dead now. 1332 if (VLOpDef && TII->isAddImmediate(*VLOpDef, Reg) && 1333 MRI->use_nodbg_empty(Reg)) 1334 VLOpDef->eraseFromParent(); 1335 } 1336 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, 1337 /*isImp*/ true)); 1338 } 1339 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, 1340 /*isImp*/ true)); 1341 } 1342 1343 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 1344 MI.modifiesRegister(RISCV::VTYPE)) 1345 PrefixTransparent = false; 1346 1347 transferAfter(CurInfo, MI); 1348 } 1349 1350 // If we reach the end of the block and our current info doesn't match the 1351 // expected info, insert a vsetvli to correct. 1352 if (!UseStrictAsserts) { 1353 const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit; 1354 if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() && 1355 CurInfo != ExitInfo) { 1356 // Note there's an implicit assumption here that terminators never use 1357 // or modify VL or VTYPE. Also, fallthrough will return end(). 1358 auto InsertPt = MBB.getFirstInstrTerminator(); 1359 insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo, 1360 CurInfo); 1361 CurInfo = ExitInfo; 1362 } 1363 } 1364 1365 if (UseStrictAsserts && CurInfo.isValid()) { 1366 const auto &Info = BlockInfo[MBB.getNumber()]; 1367 if (CurInfo != Info.Exit) { 1368 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n"); 1369 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n"); 1370 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n"); 1371 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n"); 1372 } 1373 assert(CurInfo == Info.Exit && 1374 "InsertVSETVLI dataflow invariant violated"); 1375 } 1376 } 1377 1378 /// Perform simple partial redundancy elimination of the VSETVLI instructions 1379 /// we're about to insert by looking for cases where we can PRE from the 1380 /// beginning of one block to the end of one of its predecessors. Specifically, 1381 /// this is geared to catch the common case of a fixed length vsetvl in a single 1382 /// block loop when it could execute once in the preheader instead. 1383 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { 1384 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) 1385 return; 1386 1387 MachineBasicBlock *UnavailablePred = nullptr; 1388 VSETVLIInfo AvailableInfo; 1389 for (MachineBasicBlock *P : MBB.predecessors()) { 1390 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; 1391 if (PredInfo.isUnknown()) { 1392 if (UnavailablePred) 1393 return; 1394 UnavailablePred = P; 1395 } else if (!AvailableInfo.isValid()) { 1396 AvailableInfo = PredInfo; 1397 } else if (AvailableInfo != PredInfo) { 1398 return; 1399 } 1400 } 1401 1402 // Unreachable, single pred, or full redundancy. Note that FRE is handled by 1403 // phase 3. 1404 if (!UnavailablePred || !AvailableInfo.isValid()) 1405 return; 1406 1407 // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of 1408 // the unavailable pred. 1409 if (AvailableInfo.hasSEWLMULRatioOnly()) 1410 return; 1411 1412 // Critical edge - TODO: consider splitting? 1413 if (UnavailablePred->succ_size() != 1) 1414 return; 1415 1416 // If the AVL value is a register (other than our VLMAX sentinel), 1417 // we need to prove the value is available at the point we're going 1418 // to insert the vsetvli at. 1419 if (AvailableInfo.hasAVLReg() && RISCV::X0 != AvailableInfo.getAVLReg()) { 1420 MachineInstr *AVLDefMI = MRI->getVRegDef(AvailableInfo.getAVLReg()); 1421 if (!AVLDefMI) 1422 return; 1423 // This is an inline dominance check which covers the case of 1424 // UnavailablePred being the preheader of a loop. 1425 if (AVLDefMI->getParent() != UnavailablePred) 1426 return; 1427 for (auto &TermMI : UnavailablePred->terminators()) 1428 if (&TermMI == AVLDefMI) 1429 return; 1430 } 1431 1432 // Model the effect of changing the input state of the block MBB to 1433 // AvailableInfo. We're looking for two issues here; one legality, 1434 // one profitability. 1435 // 1) If the block doesn't use some of the fields from VL or VTYPE, we 1436 // may hit the end of the block with a different end state. We can 1437 // not make this change without reflowing later blocks as well. 1438 // 2) If we don't actually remove a transition, inserting a vsetvli 1439 // into the predecessor block would be correct, but unprofitable. 1440 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred; 1441 VSETVLIInfo CurInfo = AvailableInfo; 1442 int TransitionsRemoved = 0; 1443 for (const MachineInstr &MI : MBB) { 1444 const VSETVLIInfo LastInfo = CurInfo; 1445 const VSETVLIInfo LastOldInfo = OldInfo; 1446 transferBefore(CurInfo, MI); 1447 transferBefore(OldInfo, MI); 1448 if (CurInfo == LastInfo) 1449 TransitionsRemoved++; 1450 if (LastOldInfo == OldInfo) 1451 TransitionsRemoved--; 1452 transferAfter(CurInfo, MI); 1453 transferAfter(OldInfo, MI); 1454 if (CurInfo == OldInfo) 1455 // Convergence. All transitions after this must match by construction. 1456 break; 1457 } 1458 if (CurInfo != OldInfo || TransitionsRemoved <= 0) 1459 // Issues 1 and 2 above 1460 return; 1461 1462 // Finally, update both data flow state and insert the actual vsetvli. 1463 // Doing both keeps the code in sync with the dataflow results, which 1464 // is critical for correctness of phase 3. 1465 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit; 1466 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " 1467 << UnavailablePred->getName() << " with state " 1468 << AvailableInfo << "\n"); 1469 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; 1470 BlockInfo[MBB.getNumber()].Pred = AvailableInfo; 1471 1472 // Note there's an implicit assumption here that terminators never use 1473 // or modify VL or VTYPE. Also, fallthrough will return end(). 1474 auto InsertPt = UnavailablePred->getFirstInstrTerminator(); 1475 insertVSETVLI(*UnavailablePred, InsertPt, 1476 UnavailablePred->findDebugLoc(InsertPt), 1477 AvailableInfo, OldExit); 1478 } 1479 1480 static void doUnion(DemandedFields &A, DemandedFields B) { 1481 A.VLAny |= B.VLAny; 1482 A.VLZeroness |= B.VLZeroness; 1483 A.SEW = std::max(A.SEW, B.SEW); 1484 A.LMUL |= B.LMUL; 1485 A.SEWLMULRatio |= B.SEWLMULRatio; 1486 A.TailPolicy |= B.TailPolicy; 1487 A.MaskPolicy |= B.MaskPolicy; 1488 } 1489 1490 // Return true if we can mutate PrevMI to match MI without changing any the 1491 // fields which would be observed. 1492 static bool canMutatePriorConfig(const MachineInstr &PrevMI, 1493 const MachineInstr &MI, 1494 const DemandedFields &Used, 1495 const MachineRegisterInfo &MRI) { 1496 // If the VL values aren't equal, return false if either a) the former is 1497 // demanded, or b) we can't rewrite the former to be the later for 1498 // implementation reasons. 1499 if (!isVLPreservingConfig(MI)) { 1500 if (Used.VLAny) 1501 return false; 1502 1503 if (Used.VLZeroness) { 1504 if (isVLPreservingConfig(PrevMI)) 1505 return false; 1506 if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI), 1507 MRI)) 1508 return false; 1509 } 1510 1511 auto &AVL = MI.getOperand(1); 1512 auto &PrevAVL = PrevMI.getOperand(1); 1513 assert(MRI.isSSA()); 1514 1515 // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI. 1516 // For now just check that PrevMI uses the same virtual register. 1517 if (AVL.isReg() && AVL.getReg() != RISCV::X0) { 1518 if (AVL.getReg().isPhysical()) 1519 return false; 1520 if (!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg()) 1521 return false; 1522 } 1523 } 1524 1525 if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm()) 1526 return false; 1527 1528 auto PriorVType = PrevMI.getOperand(2).getImm(); 1529 auto VType = MI.getOperand(2).getImm(); 1530 return areCompatibleVTYPEs(PriorVType, VType, Used); 1531 } 1532 1533 void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { 1534 MachineInstr *NextMI = nullptr; 1535 // We can have arbitrary code in successors, so VL and VTYPE 1536 // must be considered demanded. 1537 DemandedFields Used; 1538 Used.demandVL(); 1539 Used.demandVTYPE(); 1540 SmallVector<MachineInstr*> ToDelete; 1541 for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { 1542 1543 if (!isVectorConfigInstr(MI)) { 1544 doUnion(Used, getDemanded(MI, MRI, ST)); 1545 continue; 1546 } 1547 1548 Register VRegDef = MI.getOperand(0).getReg(); 1549 if (VRegDef != RISCV::X0 && 1550 !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) 1551 Used.demandVL(); 1552 1553 if (NextMI) { 1554 if (!Used.usedVL() && !Used.usedVTYPE()) { 1555 ToDelete.push_back(&MI); 1556 // Leave NextMI unchanged 1557 continue; 1558 } else if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) { 1559 if (!isVLPreservingConfig(*NextMI)) { 1560 MI.getOperand(0).setReg(NextMI->getOperand(0).getReg()); 1561 MI.getOperand(0).setIsDead(false); 1562 Register OldVLReg; 1563 if (MI.getOperand(1).isReg()) 1564 OldVLReg = MI.getOperand(1).getReg(); 1565 if (NextMI->getOperand(1).isImm()) 1566 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm()); 1567 else 1568 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false); 1569 if (OldVLReg) { 1570 MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg); 1571 if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) && 1572 MRI->use_nodbg_empty(OldVLReg)) 1573 VLOpDef->eraseFromParent(); 1574 } 1575 MI.setDesc(NextMI->getDesc()); 1576 } 1577 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm()); 1578 ToDelete.push_back(NextMI); 1579 // fallthrough 1580 } 1581 } 1582 NextMI = &MI; 1583 Used = getDemanded(MI, MRI, ST); 1584 } 1585 1586 NumRemovedVSETVL += ToDelete.size(); 1587 for (auto *MI : ToDelete) 1588 MI->eraseFromParent(); 1589 } 1590 1591 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { 1592 for (auto I = MBB.begin(), E = MBB.end(); I != E;) { 1593 MachineInstr &MI = *I++; 1594 if (RISCV::isFaultFirstLoad(MI)) { 1595 Register VLOutput = MI.getOperand(1).getReg(); 1596 if (!MRI->use_nodbg_empty(VLOutput)) 1597 BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL), 1598 VLOutput); 1599 // We don't use the vl output of the VLEFF/VLSEGFF anymore. 1600 MI.getOperand(1).setReg(RISCV::X0); 1601 } 1602 } 1603 } 1604 1605 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { 1606 // Skip if the vector extension is not enabled. 1607 ST = &MF.getSubtarget<RISCVSubtarget>(); 1608 if (!ST->hasVInstructions()) 1609 return false; 1610 1611 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n"); 1612 1613 TII = ST->getInstrInfo(); 1614 MRI = &MF.getRegInfo(); 1615 1616 assert(BlockInfo.empty() && "Expect empty block infos"); 1617 BlockInfo.resize(MF.getNumBlockIDs()); 1618 1619 bool HaveVectorOp = false; 1620 1621 // Phase 1 - determine how VL/VTYPE are affected by the each block. 1622 for (const MachineBasicBlock &MBB : MF) { 1623 VSETVLIInfo TmpStatus; 1624 HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus); 1625 // Initial exit state is whatever change we found in the block. 1626 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1627 BBInfo.Exit = TmpStatus; 1628 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) 1629 << " is " << BBInfo.Exit << "\n"); 1630 1631 } 1632 1633 // If we didn't find any instructions that need VSETVLI, we're done. 1634 if (!HaveVectorOp) { 1635 BlockInfo.clear(); 1636 return false; 1637 } 1638 1639 // Phase 2 - determine the exit VL/VTYPE from each block. We add all 1640 // blocks to the list here, but will also add any that need to be revisited 1641 // during Phase 2 processing. 1642 for (const MachineBasicBlock &MBB : MF) { 1643 WorkList.push(&MBB); 1644 BlockInfo[MBB.getNumber()].InQueue = true; 1645 } 1646 while (!WorkList.empty()) { 1647 const MachineBasicBlock &MBB = *WorkList.front(); 1648 WorkList.pop(); 1649 computeIncomingVLVTYPE(MBB); 1650 } 1651 1652 // Perform partial redundancy elimination of vsetvli transitions. 1653 for (MachineBasicBlock &MBB : MF) 1654 doPRE(MBB); 1655 1656 // Phase 3 - add any vsetvli instructions needed in the block. Use the 1657 // Phase 2 information to avoid adding vsetvlis before the first vector 1658 // instruction in the block if the VL/VTYPE is satisfied by its 1659 // predecessors. 1660 for (MachineBasicBlock &MBB : MF) 1661 emitVSETVLIs(MBB); 1662 1663 // Now that all vsetvlis are explicit, go through and do block local 1664 // DSE and peephole based demanded fields based transforms. Note that 1665 // this *must* be done outside the main dataflow so long as we allow 1666 // any cross block analysis within the dataflow. We can't have both 1667 // demanded fields based mutation and non-local analysis in the 1668 // dataflow at the same time without introducing inconsistencies. 1669 for (MachineBasicBlock &MBB : MF) 1670 doLocalPostpass(MBB); 1671 1672 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output 1673 // of VLEFF/VLSEGFF. 1674 for (MachineBasicBlock &MBB : MF) 1675 insertReadVL(MBB); 1676 1677 BlockInfo.clear(); 1678 return HaveVectorOp; 1679 } 1680 1681 /// Returns an instance of the Insert VSETVLI pass. 1682 FunctionPass *llvm::createRISCVInsertVSETVLIPass() { 1683 return new RISCVInsertVSETVLI(); 1684 } 1685