1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a function pass that inserts VSETVLI instructions where 10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL 11 // instructions. 12 // 13 // This pass consists of 3 phases: 14 // 15 // Phase 1 collects how each basic block affects VL/VTYPE. 16 // 17 // Phase 2 uses the information from phase 1 to do a data flow analysis to 18 // propagate the VL/VTYPE changes through the function. This gives us the 19 // VL/VTYPE at the start of each basic block. 20 // 21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from 22 // phase 2 is used to prevent inserting a VSETVLI before the first vector 23 // instruction in the block if possible. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "RISCV.h" 28 #include "RISCVSubtarget.h" 29 #include "llvm/CodeGen/LiveIntervals.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include <queue> 32 using namespace llvm; 33 34 #define DEBUG_TYPE "riscv-insert-vsetvli" 35 #define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass" 36 37 static cl::opt<bool> DisableInsertVSETVLPHIOpt( 38 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, 39 cl::desc("Disable looking through phis when inserting vsetvlis.")); 40 41 static cl::opt<bool> UseStrictAsserts( 42 "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden, 43 cl::desc("Enable strict assertion checking for the dataflow algorithm")); 44 45 namespace { 46 47 static unsigned getVLOpNum(const MachineInstr &MI) { 48 return RISCVII::getVLOpNum(MI.getDesc()); 49 } 50 51 static unsigned getSEWOpNum(const MachineInstr &MI) { 52 return RISCVII::getSEWOpNum(MI.getDesc()); 53 } 54 55 static bool isVectorConfigInstr(const MachineInstr &MI) { 56 return MI.getOpcode() == RISCV::PseudoVSETVLI || 57 MI.getOpcode() == RISCV::PseudoVSETVLIX0 || 58 MI.getOpcode() == RISCV::PseudoVSETIVLI; 59 } 60 61 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves 62 /// VL and only sets VTYPE. 63 static bool isVLPreservingConfig(const MachineInstr &MI) { 64 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) 65 return false; 66 assert(RISCV::X0 == MI.getOperand(1).getReg()); 67 return RISCV::X0 == MI.getOperand(0).getReg(); 68 } 69 70 static uint16_t getRVVMCOpcode(uint16_t RVVPseudoOpcode) { 71 const RISCVVPseudosTable::PseudoInfo *RVV = 72 RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode); 73 if (!RVV) 74 return 0; 75 return RVV->BaseInstr; 76 } 77 78 static bool isScalarMoveInstr(const MachineInstr &MI) { 79 switch (getRVVMCOpcode(MI.getOpcode())) { 80 default: 81 return false; 82 case RISCV::VMV_S_X: 83 case RISCV::VFMV_S_F: 84 return true; 85 } 86 } 87 88 /// Get the EEW for a load or store instruction. Return std::nullopt if MI is 89 /// not a load or store which ignores SEW. 90 static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) { 91 switch (getRVVMCOpcode(MI.getOpcode())) { 92 default: 93 return std::nullopt; 94 case RISCV::VLE8_V: 95 case RISCV::VLSE8_V: 96 case RISCV::VSE8_V: 97 case RISCV::VSSE8_V: 98 return 8; 99 case RISCV::VLE16_V: 100 case RISCV::VLSE16_V: 101 case RISCV::VSE16_V: 102 case RISCV::VSSE16_V: 103 return 16; 104 case RISCV::VLE32_V: 105 case RISCV::VLSE32_V: 106 case RISCV::VSE32_V: 107 case RISCV::VSSE32_V: 108 return 32; 109 case RISCV::VLE64_V: 110 case RISCV::VLSE64_V: 111 case RISCV::VSE64_V: 112 case RISCV::VSSE64_V: 113 return 64; 114 } 115 } 116 117 /// Return true if this is an operation on mask registers. Note that 118 /// this includes both arithmetic/logical ops and load/store (vlm/vsm). 119 static bool isMaskRegOp(const MachineInstr &MI) { 120 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags)) 121 return false; 122 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); 123 // A Log2SEW of 0 is an operation on mask registers only. 124 return Log2SEW == 0; 125 } 126 127 /// Which subfields of VL or VTYPE have values we need to preserve? 128 struct DemandedFields { 129 // Some unknown property of VL is used. If demanded, must preserve entire 130 // value. 131 bool VLAny = false; 132 // Only zero vs non-zero is used. If demanded, can change non-zero values. 133 bool VLZeroness = false; 134 bool SEW = false; 135 bool LMUL = false; 136 bool SEWLMULRatio = false; 137 bool TailPolicy = false; 138 bool MaskPolicy = false; 139 140 // Return true if any part of VTYPE was used 141 bool usedVTYPE() const { 142 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy; 143 } 144 145 // Return true if any property of VL was used 146 bool usedVL() { 147 return VLAny || VLZeroness; 148 } 149 150 // Mark all VTYPE subfields and properties as demanded 151 void demandVTYPE() { 152 SEW = true; 153 LMUL = true; 154 SEWLMULRatio = true; 155 TailPolicy = true; 156 MaskPolicy = true; 157 } 158 159 // Mark all VL properties as demanded 160 void demandVL() { 161 VLAny = true; 162 VLZeroness = true; 163 } 164 165 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 166 /// Support for debugging, callable in GDB: V->dump() 167 LLVM_DUMP_METHOD void dump() const { 168 print(dbgs()); 169 dbgs() << "\n"; 170 } 171 172 /// Implement operator<<. 173 void print(raw_ostream &OS) const { 174 OS << "{"; 175 OS << "VLAny=" << VLAny << ", "; 176 OS << "VLZeroness=" << VLZeroness << ", "; 177 OS << "SEW=" << SEW << ", "; 178 OS << "LMUL=" << LMUL << ", "; 179 OS << "SEWLMULRatio=" << SEWLMULRatio << ", "; 180 OS << "TailPolicy=" << TailPolicy << ", "; 181 OS << "MaskPolicy=" << MaskPolicy; 182 OS << "}"; 183 } 184 #endif 185 }; 186 187 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 188 LLVM_ATTRIBUTE_USED 189 inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) { 190 DF.print(OS); 191 return OS; 192 } 193 #endif 194 195 196 /// Return true if the two values of the VTYPE register provided are 197 /// indistinguishable from the perspective of an instruction (or set of 198 /// instructions) which use only the Used subfields and properties. 199 static bool areCompatibleVTYPEs(uint64_t VType1, 200 uint64_t VType2, 201 const DemandedFields &Used) { 202 if (Used.SEW && 203 RISCVVType::getSEW(VType1) != RISCVVType::getSEW(VType2)) 204 return false; 205 206 if (Used.LMUL && 207 RISCVVType::getVLMUL(VType1) != RISCVVType::getVLMUL(VType2)) 208 return false; 209 210 if (Used.SEWLMULRatio) { 211 auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(VType1), 212 RISCVVType::getVLMUL(VType1)); 213 auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(VType2), 214 RISCVVType::getVLMUL(VType2)); 215 if (Ratio1 != Ratio2) 216 return false; 217 } 218 219 if (Used.TailPolicy && 220 RISCVVType::isTailAgnostic(VType1) != RISCVVType::isTailAgnostic(VType2)) 221 return false; 222 if (Used.MaskPolicy && 223 RISCVVType::isMaskAgnostic(VType1) != RISCVVType::isMaskAgnostic(VType2)) 224 return false; 225 return true; 226 } 227 228 /// Return the fields and properties demanded by the provided instruction. 229 static DemandedFields getDemanded(const MachineInstr &MI) { 230 // Warning: This function has to work on both the lowered (i.e. post 231 // emitVSETVLIs) and pre-lowering forms. The main implication of this is 232 // that it can't use the value of a SEW, VL, or Policy operand as they might 233 // be stale after lowering. 234 235 // Most instructions don't use any of these subfeilds. 236 DemandedFields Res; 237 // Start conservative if registers are used 238 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL)) 239 Res.demandVL();; 240 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE)) 241 Res.demandVTYPE(); 242 // Start conservative on the unlowered form too 243 uint64_t TSFlags = MI.getDesc().TSFlags; 244 if (RISCVII::hasSEWOp(TSFlags)) { 245 Res.demandVTYPE(); 246 if (RISCVII::hasVLOp(TSFlags)) 247 Res.demandVL(); 248 249 // Behavior is independent of mask policy. 250 if (!RISCVII::usesMaskPolicy(TSFlags)) 251 Res.MaskPolicy = false; 252 } 253 254 // Loads and stores with implicit EEW do not demand SEW or LMUL directly. 255 // They instead demand the ratio of the two which is used in computing 256 // EMUL, but which allows us the flexibility to change SEW and LMUL 257 // provided we don't change the ratio. 258 // Note: We assume that the instructions initial SEW is the EEW encoded 259 // in the opcode. This is asserted when constructing the VSETVLIInfo. 260 if (getEEWForLoadStore(MI)) { 261 Res.SEW = false; 262 Res.LMUL = false; 263 } 264 265 // Store instructions don't use the policy fields. 266 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) { 267 Res.TailPolicy = false; 268 Res.MaskPolicy = false; 269 } 270 271 // If this is a mask reg operation, it only cares about VLMAX. 272 // TODO: Possible extensions to this logic 273 // * Probably ok if available VLMax is larger than demanded 274 // * The policy bits can probably be ignored.. 275 if (isMaskRegOp(MI)) { 276 Res.SEW = false; 277 Res.LMUL = false; 278 } 279 280 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0. 281 if (isScalarMoveInstr(MI)) { 282 Res.LMUL = false; 283 Res.SEWLMULRatio = false; 284 Res.VLAny = false; 285 } 286 287 return Res; 288 } 289 290 /// Defines the abstract state with which the forward dataflow models the 291 /// values of the VL and VTYPE registers after insertion. 292 class VSETVLIInfo { 293 union { 294 Register AVLReg; 295 unsigned AVLImm; 296 }; 297 298 enum : uint8_t { 299 Uninitialized, 300 AVLIsReg, 301 AVLIsImm, 302 Unknown, 303 } State = Uninitialized; 304 305 // Fields from VTYPE. 306 RISCVII::VLMUL VLMul = RISCVII::LMUL_1; 307 uint8_t SEW = 0; 308 uint8_t TailAgnostic : 1; 309 uint8_t MaskAgnostic : 1; 310 uint8_t SEWLMULRatioOnly : 1; 311 312 public: 313 VSETVLIInfo() 314 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), 315 SEWLMULRatioOnly(false) {} 316 317 static VSETVLIInfo getUnknown() { 318 VSETVLIInfo Info; 319 Info.setUnknown(); 320 return Info; 321 } 322 323 bool isValid() const { return State != Uninitialized; } 324 void setUnknown() { State = Unknown; } 325 bool isUnknown() const { return State == Unknown; } 326 327 void setAVLReg(Register Reg) { 328 AVLReg = Reg; 329 State = AVLIsReg; 330 } 331 332 void setAVLImm(unsigned Imm) { 333 AVLImm = Imm; 334 State = AVLIsImm; 335 } 336 337 bool hasAVLImm() const { return State == AVLIsImm; } 338 bool hasAVLReg() const { return State == AVLIsReg; } 339 Register getAVLReg() const { 340 assert(hasAVLReg()); 341 return AVLReg; 342 } 343 unsigned getAVLImm() const { 344 assert(hasAVLImm()); 345 return AVLImm; 346 } 347 348 unsigned getSEW() const { return SEW; } 349 RISCVII::VLMUL getVLMUL() const { return VLMul; } 350 351 bool hasNonZeroAVL() const { 352 if (hasAVLImm()) 353 return getAVLImm() > 0; 354 if (hasAVLReg()) 355 return getAVLReg() == RISCV::X0; 356 return false; 357 } 358 359 bool hasEquallyZeroAVL(const VSETVLIInfo &Other) const { 360 if (hasSameAVL(Other)) 361 return true; 362 return (hasNonZeroAVL() && Other.hasNonZeroAVL()); 363 } 364 365 bool hasSameAVL(const VSETVLIInfo &Other) const { 366 if (hasAVLReg() && Other.hasAVLReg()) 367 return getAVLReg() == Other.getAVLReg(); 368 369 if (hasAVLImm() && Other.hasAVLImm()) 370 return getAVLImm() == Other.getAVLImm(); 371 372 return false; 373 } 374 375 void setVTYPE(unsigned VType) { 376 assert(isValid() && !isUnknown() && 377 "Can't set VTYPE for uninitialized or unknown"); 378 VLMul = RISCVVType::getVLMUL(VType); 379 SEW = RISCVVType::getSEW(VType); 380 TailAgnostic = RISCVVType::isTailAgnostic(VType); 381 MaskAgnostic = RISCVVType::isMaskAgnostic(VType); 382 } 383 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) { 384 assert(isValid() && !isUnknown() && 385 "Can't set VTYPE for uninitialized or unknown"); 386 VLMul = L; 387 SEW = S; 388 TailAgnostic = TA; 389 MaskAgnostic = MA; 390 } 391 392 unsigned encodeVTYPE() const { 393 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && 394 "Can't encode VTYPE for uninitialized or unknown"); 395 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 396 } 397 398 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } 399 400 bool hasSameVTYPE(const VSETVLIInfo &Other) const { 401 assert(isValid() && Other.isValid() && 402 "Can't compare invalid VSETVLIInfos"); 403 assert(!isUnknown() && !Other.isUnknown() && 404 "Can't compare VTYPE in unknown state"); 405 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && 406 "Can't compare when only LMUL/SEW ratio is valid."); 407 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == 408 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, 409 Other.MaskAgnostic); 410 } 411 412 unsigned getSEWLMULRatio() const { 413 assert(isValid() && !isUnknown() && 414 "Can't use VTYPE for uninitialized or unknown"); 415 return RISCVVType::getSEWLMULRatio(SEW, VLMul); 416 } 417 418 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. 419 // Note that having the same VLMAX ensures that both share the same 420 // function from AVL to VL; that is, they must produce the same VL value 421 // for any given AVL value. 422 bool hasSameVLMAX(const VSETVLIInfo &Other) const { 423 assert(isValid() && Other.isValid() && 424 "Can't compare invalid VSETVLIInfos"); 425 assert(!isUnknown() && !Other.isUnknown() && 426 "Can't compare VTYPE in unknown state"); 427 return getSEWLMULRatio() == Other.getSEWLMULRatio(); 428 } 429 430 bool hasCompatibleVTYPE(const DemandedFields &Used, 431 const VSETVLIInfo &Require) const { 432 return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used); 433 } 434 435 // Determine whether the vector instructions requirements represented by 436 // Require are compatible with the previous vsetvli instruction represented 437 // by this. MI is the instruction whose requirements we're considering. 438 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require) const { 439 assert(isValid() && Require.isValid() && 440 "Can't compare invalid VSETVLIInfos"); 441 assert(!Require.SEWLMULRatioOnly && 442 "Expected a valid VTYPE for instruction!"); 443 // Nothing is compatible with Unknown. 444 if (isUnknown() || Require.isUnknown()) 445 return false; 446 447 // If only our VLMAX ratio is valid, then this isn't compatible. 448 if (SEWLMULRatioOnly) 449 return false; 450 451 // If the instruction doesn't need an AVLReg and the SEW matches, consider 452 // it compatible. 453 if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister) 454 if (SEW == Require.SEW) 455 return true; 456 457 if (Used.VLAny && !hasSameAVL(Require)) 458 return false; 459 460 if (Used.VLZeroness && !hasEquallyZeroAVL(Require)) 461 return false; 462 463 return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used); 464 } 465 466 bool operator==(const VSETVLIInfo &Other) const { 467 // Uninitialized is only equal to another Uninitialized. 468 if (!isValid()) 469 return !Other.isValid(); 470 if (!Other.isValid()) 471 return !isValid(); 472 473 // Unknown is only equal to another Unknown. 474 if (isUnknown()) 475 return Other.isUnknown(); 476 if (Other.isUnknown()) 477 return isUnknown(); 478 479 if (!hasSameAVL(Other)) 480 return false; 481 482 // If the SEWLMULRatioOnly bits are different, then they aren't equal. 483 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly) 484 return false; 485 486 // If only the VLMAX is valid, check that it is the same. 487 if (SEWLMULRatioOnly) 488 return hasSameVLMAX(Other); 489 490 // If the full VTYPE is valid, check that it is the same. 491 return hasSameVTYPE(Other); 492 } 493 494 bool operator!=(const VSETVLIInfo &Other) const { 495 return !(*this == Other); 496 } 497 498 // Calculate the VSETVLIInfo visible to a block assuming this and Other are 499 // both predecessors. 500 VSETVLIInfo intersect(const VSETVLIInfo &Other) const { 501 // If the new value isn't valid, ignore it. 502 if (!Other.isValid()) 503 return *this; 504 505 // If this value isn't valid, this must be the first predecessor, use it. 506 if (!isValid()) 507 return Other; 508 509 // If either is unknown, the result is unknown. 510 if (isUnknown() || Other.isUnknown()) 511 return VSETVLIInfo::getUnknown(); 512 513 // If we have an exact, match return this. 514 if (*this == Other) 515 return *this; 516 517 // Not an exact match, but maybe the AVL and VLMAX are the same. If so, 518 // return an SEW/LMUL ratio only value. 519 if (hasSameAVL(Other) && hasSameVLMAX(Other)) { 520 VSETVLIInfo MergeInfo = *this; 521 MergeInfo.SEWLMULRatioOnly = true; 522 return MergeInfo; 523 } 524 525 // Otherwise the result is unknown. 526 return VSETVLIInfo::getUnknown(); 527 } 528 529 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 530 /// Support for debugging, callable in GDB: V->dump() 531 LLVM_DUMP_METHOD void dump() const { 532 print(dbgs()); 533 dbgs() << "\n"; 534 } 535 536 /// Implement operator<<. 537 /// @{ 538 void print(raw_ostream &OS) const { 539 OS << "{"; 540 if (!isValid()) 541 OS << "Uninitialized"; 542 if (isUnknown()) 543 OS << "unknown"; 544 if (hasAVLReg()) 545 OS << "AVLReg=" << (unsigned)AVLReg; 546 if (hasAVLImm()) 547 OS << "AVLImm=" << (unsigned)AVLImm; 548 OS << ", " 549 << "VLMul=" << (unsigned)VLMul << ", " 550 << "SEW=" << (unsigned)SEW << ", " 551 << "TailAgnostic=" << (bool)TailAgnostic << ", " 552 << "MaskAgnostic=" << (bool)MaskAgnostic << ", " 553 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}"; 554 } 555 #endif 556 }; 557 558 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 559 LLVM_ATTRIBUTE_USED 560 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) { 561 V.print(OS); 562 return OS; 563 } 564 #endif 565 566 struct BlockData { 567 // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers 568 // made by this block. Calculated in Phase 1. 569 VSETVLIInfo Change; 570 571 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this 572 // block. Calculated in Phase 2. 573 VSETVLIInfo Exit; 574 575 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor 576 // blocks. Calculated in Phase 2, and used by Phase 3. 577 VSETVLIInfo Pred; 578 579 // Keeps track of whether the block is already in the queue. 580 bool InQueue = false; 581 582 BlockData() = default; 583 }; 584 585 class RISCVInsertVSETVLI : public MachineFunctionPass { 586 const TargetInstrInfo *TII; 587 MachineRegisterInfo *MRI; 588 589 std::vector<BlockData> BlockInfo; 590 std::queue<const MachineBasicBlock *> WorkList; 591 592 public: 593 static char ID; 594 595 RISCVInsertVSETVLI() : MachineFunctionPass(ID) { 596 initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry()); 597 } 598 bool runOnMachineFunction(MachineFunction &MF) override; 599 600 void getAnalysisUsage(AnalysisUsage &AU) const override { 601 AU.setPreservesCFG(); 602 MachineFunctionPass::getAnalysisUsage(AU); 603 } 604 605 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } 606 607 private: 608 bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require, 609 const VSETVLIInfo &CurInfo) const; 610 bool needVSETVLIPHI(const VSETVLIInfo &Require, 611 const MachineBasicBlock &MBB) const; 612 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, 613 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); 614 void insertVSETVLI(MachineBasicBlock &MBB, 615 MachineBasicBlock::iterator InsertPt, DebugLoc DL, 616 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); 617 618 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI); 619 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI); 620 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB); 621 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); 622 void emitVSETVLIs(MachineBasicBlock &MBB); 623 void doLocalPostpass(MachineBasicBlock &MBB); 624 void doPRE(MachineBasicBlock &MBB); 625 void insertReadVL(MachineBasicBlock &MBB); 626 }; 627 628 } // end anonymous namespace 629 630 char RISCVInsertVSETVLI::ID = 0; 631 632 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, 633 false, false) 634 635 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, 636 const MachineRegisterInfo *MRI) { 637 VSETVLIInfo InstrInfo; 638 639 bool TailAgnostic, MaskAgnostic; 640 unsigned UseOpIdx; 641 if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 642 // Start with undisturbed. 643 TailAgnostic = false; 644 MaskAgnostic = false; 645 646 // If there is a policy operand, use it. 647 if (RISCVII::hasVecPolicyOp(TSFlags)) { 648 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); 649 uint64_t Policy = Op.getImm(); 650 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && 651 "Invalid Policy Value"); 652 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; 653 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; 654 } 655 656 // If the tied operand is an IMPLICIT_DEF we can use TailAgnostic and 657 // MaskAgnostic. 658 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 659 MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg()); 660 if (UseMI && UseMI->isImplicitDef()) { 661 TailAgnostic = true; 662 MaskAgnostic = true; 663 } 664 // Some pseudo instructions force a tail agnostic policy despite having a 665 // tied def. 666 if (RISCVII::doesForceTailAgnostic(TSFlags)) 667 TailAgnostic = true; 668 669 if (!RISCVII::usesMaskPolicy(TSFlags)) 670 MaskAgnostic = true; 671 } else { 672 // If there is no tied operand,, there shouldn't be a policy operand. 673 assert(!RISCVII::hasVecPolicyOp(TSFlags) && "Unexpected policy operand"); 674 // No tied operand use agnostic policies. 675 TailAgnostic = true; 676 MaskAgnostic = true; 677 } 678 679 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); 680 681 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); 682 // A Log2SEW of 0 is an operation on mask registers only. 683 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 684 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 685 686 if (RISCVII::hasVLOp(TSFlags)) { 687 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 688 if (VLOp.isImm()) { 689 int64_t Imm = VLOp.getImm(); 690 // Conver the VLMax sentintel to X0 register. 691 if (Imm == RISCV::VLMaxSentinel) 692 InstrInfo.setAVLReg(RISCV::X0); 693 else 694 InstrInfo.setAVLImm(Imm); 695 } else { 696 InstrInfo.setAVLReg(VLOp.getReg()); 697 } 698 } else { 699 InstrInfo.setAVLReg(RISCV::NoRegister); 700 } 701 #ifndef NDEBUG 702 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) { 703 assert(SEW == EEW && "Initial SEW doesn't match expected EEW"); 704 } 705 #endif 706 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 707 708 return InstrInfo; 709 } 710 711 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, 712 const VSETVLIInfo &Info, 713 const VSETVLIInfo &PrevInfo) { 714 DebugLoc DL = MI.getDebugLoc(); 715 insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo); 716 } 717 718 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, 719 MachineBasicBlock::iterator InsertPt, DebugLoc DL, 720 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { 721 722 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same 723 // VLMAX. 724 if (PrevInfo.isValid() && !PrevInfo.isUnknown() && 725 Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { 726 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 727 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 728 .addReg(RISCV::X0, RegState::Kill) 729 .addImm(Info.encodeVTYPE()) 730 .addReg(RISCV::VL, RegState::Implicit); 731 return; 732 } 733 734 if (Info.hasAVLImm()) { 735 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) 736 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 737 .addImm(Info.getAVLImm()) 738 .addImm(Info.encodeVTYPE()); 739 return; 740 } 741 742 Register AVLReg = Info.getAVLReg(); 743 if (AVLReg == RISCV::NoRegister) { 744 // We can only use x0, x0 if there's no chance of the vtype change causing 745 // the previous vl to become invalid. 746 if (PrevInfo.isValid() && !PrevInfo.isUnknown() && 747 Info.hasSameVLMAX(PrevInfo)) { 748 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 749 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 750 .addReg(RISCV::X0, RegState::Kill) 751 .addImm(Info.encodeVTYPE()) 752 .addReg(RISCV::VL, RegState::Implicit); 753 return; 754 } 755 // Otherwise use an AVL of 0 to avoid depending on previous vl. 756 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) 757 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 758 .addImm(0) 759 .addImm(Info.encodeVTYPE()); 760 return; 761 } 762 763 if (AVLReg.isVirtual()) 764 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass); 765 766 // Use X0 as the DestReg unless AVLReg is X0. We also need to change the 767 // opcode if the AVLReg is X0 as they have different register classes for 768 // the AVL operand. 769 Register DestReg = RISCV::X0; 770 unsigned Opcode = RISCV::PseudoVSETVLI; 771 if (AVLReg == RISCV::X0) { 772 DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); 773 Opcode = RISCV::PseudoVSETVLIX0; 774 } 775 BuildMI(MBB, InsertPt, DL, TII->get(Opcode)) 776 .addReg(DestReg, RegState::Define | RegState::Dead) 777 .addReg(AVLReg) 778 .addImm(Info.encodeVTYPE()); 779 } 780 781 // Return a VSETVLIInfo representing the changes made by this VSETVLI or 782 // VSETIVLI instruction. 783 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { 784 VSETVLIInfo NewInfo; 785 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { 786 NewInfo.setAVLImm(MI.getOperand(1).getImm()); 787 } else { 788 assert(MI.getOpcode() == RISCV::PseudoVSETVLI || 789 MI.getOpcode() == RISCV::PseudoVSETVLIX0); 790 Register AVLReg = MI.getOperand(1).getReg(); 791 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && 792 "Can't handle X0, X0 vsetvli yet"); 793 NewInfo.setAVLReg(AVLReg); 794 } 795 NewInfo.setVTYPE(MI.getOperand(2).getImm()); 796 797 return NewInfo; 798 } 799 800 /// Return true if a VSETVLI is required to transition from CurInfo to Require 801 /// before MI. 802 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, 803 const VSETVLIInfo &Require, 804 const VSETVLIInfo &CurInfo) const { 805 assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI)); 806 807 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) 808 return true; 809 810 DemandedFields Used = getDemanded(MI); 811 812 if (isScalarMoveInstr(MI)) { 813 // For vmv.s.x and vfmv.s.f, if writing to an implicit_def operand, we don't 814 // need to preserve any other bits and are thus compatible with any larger, 815 // etype and can disregard policy bits. Warning: It's tempting to try doing 816 // this for any tail agnostic operation, but we can't as TA requires 817 // tail lanes to either be the original value or -1. We are writing 818 // unknown bits to the lanes here. 819 auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg()); 820 if (VRegDef && VRegDef->isImplicitDef() && 821 CurInfo.getSEW() >= Require.getSEW()) { 822 Used.SEW = false; 823 Used.TailPolicy = false; 824 } 825 } 826 827 if (CurInfo.isCompatible(Used, Require)) 828 return false; 829 830 // We didn't find a compatible value. If our AVL is a virtual register, 831 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need 832 // and the last VL/VTYPE we observed is the same, we don't need a 833 // VSETVLI here. 834 if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() && 835 CurInfo.hasCompatibleVTYPE(Used, Require)) { 836 if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { 837 if (isVectorConfigInstr(*DefMI)) { 838 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 839 if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo)) 840 return false; 841 } 842 } 843 } 844 845 return true; 846 } 847 848 // Given an incoming state reaching MI, modifies that state so that it is minimally 849 // compatible with MI. The resulting state is guaranteed to be semantically legal 850 // for MI, but may not be the state requested by MI. 851 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) { 852 uint64_t TSFlags = MI.getDesc().TSFlags; 853 if (!RISCVII::hasSEWOp(TSFlags)) 854 return; 855 856 const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); 857 if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info)) 858 return; 859 860 const VSETVLIInfo PrevInfo = Info; 861 Info = NewInfo; 862 863 if (!RISCVII::hasVLOp(TSFlags)) 864 return; 865 866 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and 867 // VL > 0. We can discard the user requested AVL and just use the last 868 // one if we can prove it equally zero. This removes a vsetvli entirely 869 // if the types match or allows use of cheaper avl preserving variant 870 // if VLMAX doesn't change. If VLMAX might change, we couldn't use 871 // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to 872 // prevent extending live range of an avl register operand. 873 // TODO: We can probably relax this for immediates. 874 if (isScalarMoveInstr(MI) && PrevInfo.isValid() && 875 PrevInfo.hasEquallyZeroAVL(Info) && 876 Info.hasSameVLMAX(PrevInfo)) { 877 if (PrevInfo.hasAVLImm()) 878 Info.setAVLImm(PrevInfo.getAVLImm()); 879 else 880 Info.setAVLReg(PrevInfo.getAVLReg()); 881 return; 882 } 883 884 // If AVL is defined by a vsetvli with the same VLMAX, we can 885 // replace the AVL operand with the AVL of the defining vsetvli. 886 // We avoid general register AVLs to avoid extending live ranges 887 // without being sure we can kill the original source reg entirely. 888 if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual()) 889 return; 890 MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg()); 891 if (!DefMI || !isVectorConfigInstr(*DefMI)) 892 return; 893 894 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 895 if (DefInfo.hasSameVLMAX(Info) && 896 (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) { 897 if (DefInfo.hasAVLImm()) 898 Info.setAVLImm(DefInfo.getAVLImm()); 899 else 900 Info.setAVLReg(DefInfo.getAVLReg()); 901 return; 902 } 903 } 904 905 // Given a state with which we evaluated MI (see transferBefore above for why 906 // this might be different that the state MI requested), modify the state to 907 // reflect the changes MI might make. 908 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) { 909 if (isVectorConfigInstr(MI)) { 910 Info = getInfoForVSETVLI(MI); 911 return; 912 } 913 914 if (RISCV::isFaultFirstLoad(MI)) { 915 // Update AVL to vl-output of the fault first load. 916 Info.setAVLReg(MI.getOperand(1).getReg()); 917 return; 918 } 919 920 // If this is something that updates VL/VTYPE that we don't know about, set 921 // the state to unknown. 922 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 923 MI.modifiesRegister(RISCV::VTYPE)) 924 Info = VSETVLIInfo::getUnknown(); 925 } 926 927 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { 928 bool HadVectorOp = false; 929 930 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 931 BBInfo.Change = BBInfo.Pred; 932 for (const MachineInstr &MI : MBB) { 933 transferBefore(BBInfo.Change, MI); 934 935 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags)) 936 HadVectorOp = true; 937 938 transferAfter(BBInfo.Change, MI); 939 } 940 941 return HadVectorOp; 942 } 943 944 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { 945 946 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 947 948 BBInfo.InQueue = false; 949 950 // Start with the previous entry so that we keep the most conservative state 951 // we have ever found. 952 VSETVLIInfo InInfo = BBInfo.Pred; 953 if (MBB.pred_empty()) { 954 // There are no predecessors, so use the default starting status. 955 InInfo.setUnknown(); 956 } else { 957 for (MachineBasicBlock *P : MBB.predecessors()) 958 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); 959 } 960 961 // If we don't have any valid predecessor value, wait until we do. 962 if (!InInfo.isValid()) 963 return; 964 965 // If no change, no need to rerun block 966 if (InInfo == BBInfo.Pred) 967 return; 968 969 BBInfo.Pred = InInfo; 970 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) 971 << " changed to " << BBInfo.Pred << "\n"); 972 973 // Note: It's tempting to cache the state changes here, but due to the 974 // compatibility checks performed a blocks output state can change based on 975 // the input state. To cache, we'd have to add logic for finding 976 // never-compatible state changes. 977 computeVLVTYPEChanges(MBB); 978 VSETVLIInfo TmpStatus = BBInfo.Change; 979 980 // If the new exit value matches the old exit value, we don't need to revisit 981 // any blocks. 982 if (BBInfo.Exit == TmpStatus) 983 return; 984 985 BBInfo.Exit = TmpStatus; 986 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB) 987 << " changed to " << BBInfo.Exit << "\n"); 988 989 // Add the successors to the work list so we can propagate the changed exit 990 // status. 991 for (MachineBasicBlock *S : MBB.successors()) 992 if (!BlockInfo[S->getNumber()].InQueue) { 993 BlockInfo[S->getNumber()].InQueue = true; 994 WorkList.push(S); 995 } 996 } 997 998 // If we weren't able to prove a vsetvli was directly unneeded, it might still 999 // be unneeded if the AVL is a phi node where all incoming values are VL 1000 // outputs from the last VSETVLI in their respective basic blocks. 1001 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, 1002 const MachineBasicBlock &MBB) const { 1003 if (DisableInsertVSETVLPHIOpt) 1004 return true; 1005 1006 if (!Require.hasAVLReg()) 1007 return true; 1008 1009 Register AVLReg = Require.getAVLReg(); 1010 if (!AVLReg.isVirtual()) 1011 return true; 1012 1013 // We need the AVL to be produce by a PHI node in this basic block. 1014 MachineInstr *PHI = MRI->getVRegDef(AVLReg); 1015 if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB) 1016 return true; 1017 1018 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps; 1019 PHIOp += 2) { 1020 Register InReg = PHI->getOperand(PHIOp).getReg(); 1021 MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB(); 1022 const BlockData &PBBInfo = BlockInfo[PBB->getNumber()]; 1023 // If the exit from the predecessor has the VTYPE we are looking for 1024 // we might be able to avoid a VSETVLI. 1025 if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require)) 1026 return true; 1027 1028 // We need the PHI input to the be the output of a VSET(I)VLI. 1029 MachineInstr *DefMI = MRI->getVRegDef(InReg); 1030 if (!DefMI || !isVectorConfigInstr(*DefMI)) 1031 return true; 1032 1033 // We found a VSET(I)VLI make sure it matches the output of the 1034 // predecessor block. 1035 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 1036 if (!DefInfo.hasSameAVL(PBBInfo.Exit) || 1037 !DefInfo.hasSameVTYPE(PBBInfo.Exit)) 1038 return true; 1039 } 1040 1041 // If all the incoming values to the PHI checked out, we don't need 1042 // to insert a VSETVLI. 1043 return false; 1044 } 1045 1046 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { 1047 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred; 1048 // Track whether the prefix of the block we've scanned is transparent 1049 // (meaning has not yet changed the abstract state). 1050 bool PrefixTransparent = true; 1051 for (MachineInstr &MI : MBB) { 1052 const VSETVLIInfo PrevInfo = CurInfo; 1053 transferBefore(CurInfo, MI); 1054 1055 // If this is an explicit VSETVLI or VSETIVLI, update our state. 1056 if (isVectorConfigInstr(MI)) { 1057 // Conservatively, mark the VL and VTYPE as live. 1058 assert(MI.getOperand(3).getReg() == RISCV::VL && 1059 MI.getOperand(4).getReg() == RISCV::VTYPE && 1060 "Unexpected operands where VL and VTYPE should be"); 1061 MI.getOperand(3).setIsDead(false); 1062 MI.getOperand(4).setIsDead(false); 1063 PrefixTransparent = false; 1064 } 1065 1066 uint64_t TSFlags = MI.getDesc().TSFlags; 1067 if (RISCVII::hasSEWOp(TSFlags)) { 1068 if (PrevInfo != CurInfo) { 1069 // If this is the first implicit state change, and the state change 1070 // requested can be proven to produce the same register contents, we 1071 // can skip emitting the actual state change and continue as if we 1072 // had since we know the GPR result of the implicit state change 1073 // wouldn't be used and VL/VTYPE registers are correct. Note that 1074 // we *do* need to model the state as if it changed as while the 1075 // register contents are unchanged, the abstract model can change. 1076 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB)) 1077 insertVSETVLI(MBB, MI, CurInfo, PrevInfo); 1078 PrefixTransparent = false; 1079 } 1080 1081 if (RISCVII::hasVLOp(TSFlags)) { 1082 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 1083 if (VLOp.isReg()) { 1084 // Erase the AVL operand from the instruction. 1085 VLOp.setReg(RISCV::NoRegister); 1086 VLOp.setIsKill(false); 1087 } 1088 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, 1089 /*isImp*/ true)); 1090 } 1091 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, 1092 /*isImp*/ true)); 1093 } 1094 1095 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 1096 MI.modifiesRegister(RISCV::VTYPE)) 1097 PrefixTransparent = false; 1098 1099 transferAfter(CurInfo, MI); 1100 } 1101 1102 // If we reach the end of the block and our current info doesn't match the 1103 // expected info, insert a vsetvli to correct. 1104 if (!UseStrictAsserts) { 1105 const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit; 1106 if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() && 1107 CurInfo != ExitInfo) { 1108 // Note there's an implicit assumption here that terminators never use 1109 // or modify VL or VTYPE. Also, fallthrough will return end(). 1110 auto InsertPt = MBB.getFirstInstrTerminator(); 1111 insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo, 1112 CurInfo); 1113 CurInfo = ExitInfo; 1114 } 1115 } 1116 1117 if (UseStrictAsserts && CurInfo.isValid()) { 1118 const auto &Info = BlockInfo[MBB.getNumber()]; 1119 if (CurInfo != Info.Exit) { 1120 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n"); 1121 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n"); 1122 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n"); 1123 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n"); 1124 } 1125 assert(CurInfo == Info.Exit && 1126 "InsertVSETVLI dataflow invariant violated"); 1127 } 1128 } 1129 1130 /// Return true if the VL value configured must be equal to the requested one. 1131 static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) { 1132 if (!Info.hasAVLImm()) 1133 // VLMAX is always the same value. 1134 // TODO: Could extend to other registers by looking at the associated vreg 1135 // def placement. 1136 return RISCV::X0 == Info.getAVLReg(); 1137 1138 unsigned AVL = Info.getAVLImm(); 1139 unsigned SEW = Info.getSEW(); 1140 unsigned AVLInBits = AVL * SEW; 1141 1142 unsigned LMul; 1143 bool Fractional; 1144 std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL()); 1145 1146 if (Fractional) 1147 return ST.getRealMinVLen() / LMul >= AVLInBits; 1148 return ST.getRealMinVLen() * LMul >= AVLInBits; 1149 } 1150 1151 /// Perform simple partial redundancy elimination of the VSETVLI instructions 1152 /// we're about to insert by looking for cases where we can PRE from the 1153 /// beginning of one block to the end of one of its predecessors. Specifically, 1154 /// this is geared to catch the common case of a fixed length vsetvl in a single 1155 /// block loop when it could execute once in the preheader instead. 1156 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { 1157 const MachineFunction &MF = *MBB.getParent(); 1158 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); 1159 1160 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) 1161 return; 1162 1163 MachineBasicBlock *UnavailablePred = nullptr; 1164 VSETVLIInfo AvailableInfo; 1165 for (MachineBasicBlock *P : MBB.predecessors()) { 1166 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; 1167 if (PredInfo.isUnknown()) { 1168 if (UnavailablePred) 1169 return; 1170 UnavailablePred = P; 1171 } else if (!AvailableInfo.isValid()) { 1172 AvailableInfo = PredInfo; 1173 } else if (AvailableInfo != PredInfo) { 1174 return; 1175 } 1176 } 1177 1178 // Unreachable, single pred, or full redundancy. Note that FRE is handled by 1179 // phase 3. 1180 if (!UnavailablePred || !AvailableInfo.isValid()) 1181 return; 1182 1183 // Critical edge - TODO: consider splitting? 1184 if (UnavailablePred->succ_size() != 1) 1185 return; 1186 1187 // If VL can be less than AVL, then we can't reduce the frequency of exec. 1188 if (!hasFixedResult(AvailableInfo, ST)) 1189 return; 1190 1191 // Does it actually let us remove an implicit transition in MBB? 1192 bool Found = false; 1193 for (auto &MI : MBB) { 1194 if (isVectorConfigInstr(MI)) 1195 return; 1196 1197 const uint64_t TSFlags = MI.getDesc().TSFlags; 1198 if (RISCVII::hasSEWOp(TSFlags)) { 1199 if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI)) 1200 return; 1201 Found = true; 1202 break; 1203 } 1204 } 1205 if (!Found) 1206 return; 1207 1208 // Finally, update both data flow state and insert the actual vsetvli. 1209 // Doing both keeps the code in sync with the dataflow results, which 1210 // is critical for correctness of phase 3. 1211 auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit; 1212 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " 1213 << UnavailablePred->getName() << " with state " 1214 << AvailableInfo << "\n"); 1215 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; 1216 BlockInfo[MBB.getNumber()].Pred = AvailableInfo; 1217 1218 // Note there's an implicit assumption here that terminators never use 1219 // or modify VL or VTYPE. Also, fallthrough will return end(). 1220 auto InsertPt = UnavailablePred->getFirstInstrTerminator(); 1221 insertVSETVLI(*UnavailablePred, InsertPt, 1222 UnavailablePred->findDebugLoc(InsertPt), 1223 AvailableInfo, OldInfo); 1224 } 1225 1226 static void doUnion(DemandedFields &A, DemandedFields B) { 1227 A.VLAny |= B.VLAny; 1228 A.VLZeroness |= B.VLZeroness; 1229 A.SEW |= B.SEW; 1230 A.LMUL |= B.LMUL; 1231 A.SEWLMULRatio |= B.SEWLMULRatio; 1232 A.TailPolicy |= B.TailPolicy; 1233 A.MaskPolicy |= B.MaskPolicy; 1234 } 1235 1236 static bool isNonZeroAVL(const MachineOperand &MO) { 1237 if (MO.isReg()) 1238 return RISCV::X0 == MO.getReg(); 1239 assert(MO.isImm()); 1240 return 0 != MO.getImm(); 1241 } 1242 1243 // Return true if we can mutate PrevMI to match MI without changing any the 1244 // fields which would be observed. 1245 static bool canMutatePriorConfig(const MachineInstr &PrevMI, 1246 const MachineInstr &MI, 1247 const DemandedFields &Used) { 1248 // If the VL values aren't equal, return false if either a) the former is 1249 // demanded, or b) we can't rewrite the former to be the later for 1250 // implementation reasons. 1251 if (!isVLPreservingConfig(MI)) { 1252 if (Used.VLAny) 1253 return false; 1254 1255 // TODO: Requires more care in the mutation... 1256 if (isVLPreservingConfig(PrevMI)) 1257 return false; 1258 1259 // We don't bother to handle the equally zero case here as it's largely 1260 // uninteresting. 1261 if (Used.VLZeroness && 1262 (!isNonZeroAVL(MI.getOperand(1)) || 1263 !isNonZeroAVL(PrevMI.getOperand(1)))) 1264 return false; 1265 1266 // TODO: Track whether the register is defined between 1267 // PrevMI and MI. 1268 if (MI.getOperand(1).isReg() && 1269 RISCV::X0 != MI.getOperand(1).getReg()) 1270 return false; 1271 1272 // TODO: We need to change the result register to allow this rewrite 1273 // without the result forming a vl preserving vsetvli which is not 1274 // a correct state merge. 1275 if (PrevMI.getOperand(0).getReg() == RISCV::X0 && 1276 MI.getOperand(1).isReg()) 1277 return false; 1278 } 1279 1280 if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm()) 1281 return false; 1282 1283 auto PriorVType = PrevMI.getOperand(2).getImm(); 1284 auto VType = MI.getOperand(2).getImm(); 1285 return areCompatibleVTYPEs(PriorVType, VType, Used); 1286 } 1287 1288 void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { 1289 MachineInstr *NextMI = nullptr; 1290 // We can have arbitrary code in successors, so VL and VTYPE 1291 // must be considered demanded. 1292 DemandedFields Used; 1293 Used.demandVL(); 1294 Used.demandVTYPE(); 1295 SmallVector<MachineInstr*> ToDelete; 1296 for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { 1297 1298 if (!isVectorConfigInstr(MI)) { 1299 doUnion(Used, getDemanded(MI)); 1300 continue; 1301 } 1302 1303 Register VRegDef = MI.getOperand(0).getReg(); 1304 if (VRegDef != RISCV::X0 && 1305 !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) 1306 Used.demandVL(); 1307 1308 if (NextMI) { 1309 if (!Used.usedVL() && !Used.usedVTYPE()) { 1310 ToDelete.push_back(&MI); 1311 // Leave NextMI unchanged 1312 continue; 1313 } else if (canMutatePriorConfig(MI, *NextMI, Used)) { 1314 if (!isVLPreservingConfig(*NextMI)) { 1315 if (NextMI->getOperand(1).isImm()) 1316 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm()); 1317 else 1318 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false); 1319 MI.setDesc(NextMI->getDesc()); 1320 } 1321 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm()); 1322 ToDelete.push_back(NextMI); 1323 // fallthrough 1324 } 1325 } 1326 NextMI = &MI; 1327 Used = getDemanded(MI); 1328 } 1329 1330 for (auto *MI : ToDelete) 1331 MI->eraseFromParent(); 1332 } 1333 1334 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { 1335 for (auto I = MBB.begin(), E = MBB.end(); I != E;) { 1336 MachineInstr &MI = *I++; 1337 if (RISCV::isFaultFirstLoad(MI)) { 1338 Register VLOutput = MI.getOperand(1).getReg(); 1339 if (!MRI->use_nodbg_empty(VLOutput)) 1340 BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL), 1341 VLOutput); 1342 // We don't use the vl output of the VLEFF/VLSEGFF anymore. 1343 MI.getOperand(1).setReg(RISCV::X0); 1344 } 1345 } 1346 } 1347 1348 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { 1349 // Skip if the vector extension is not enabled. 1350 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); 1351 if (!ST.hasVInstructions()) 1352 return false; 1353 1354 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n"); 1355 1356 TII = ST.getInstrInfo(); 1357 MRI = &MF.getRegInfo(); 1358 1359 assert(BlockInfo.empty() && "Expect empty block infos"); 1360 BlockInfo.resize(MF.getNumBlockIDs()); 1361 1362 bool HaveVectorOp = false; 1363 1364 // Phase 1 - determine how VL/VTYPE are affected by the each block. 1365 for (const MachineBasicBlock &MBB : MF) { 1366 HaveVectorOp |= computeVLVTYPEChanges(MBB); 1367 // Initial exit state is whatever change we found in the block. 1368 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1369 BBInfo.Exit = BBInfo.Change; 1370 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) 1371 << " is " << BBInfo.Exit << "\n"); 1372 1373 } 1374 1375 // If we didn't find any instructions that need VSETVLI, we're done. 1376 if (!HaveVectorOp) { 1377 BlockInfo.clear(); 1378 return false; 1379 } 1380 1381 // Phase 2 - determine the exit VL/VTYPE from each block. We add all 1382 // blocks to the list here, but will also add any that need to be revisited 1383 // during Phase 2 processing. 1384 for (const MachineBasicBlock &MBB : MF) { 1385 WorkList.push(&MBB); 1386 BlockInfo[MBB.getNumber()].InQueue = true; 1387 } 1388 while (!WorkList.empty()) { 1389 const MachineBasicBlock &MBB = *WorkList.front(); 1390 WorkList.pop(); 1391 computeIncomingVLVTYPE(MBB); 1392 } 1393 1394 // Perform partial redundancy elimination of vsetvli transitions. 1395 for (MachineBasicBlock &MBB : MF) 1396 doPRE(MBB); 1397 1398 // Phase 3 - add any vsetvli instructions needed in the block. Use the 1399 // Phase 2 information to avoid adding vsetvlis before the first vector 1400 // instruction in the block if the VL/VTYPE is satisfied by its 1401 // predecessors. 1402 for (MachineBasicBlock &MBB : MF) 1403 emitVSETVLIs(MBB); 1404 1405 // Now that all vsetvlis are explicit, go through and do block local 1406 // DSE and peephole based demanded fields based transforms. Note that 1407 // this *must* be done outside the main dataflow so long as we allow 1408 // any cross block analysis within the dataflow. We can't have both 1409 // demanded fields based mutation and non-local analysis in the 1410 // dataflow at the same time without introducing inconsistencies. 1411 for (MachineBasicBlock &MBB : MF) 1412 doLocalPostpass(MBB); 1413 1414 // Once we're fully done rewriting all the instructions, do a final pass 1415 // through to check for VSETVLIs which write to an unused destination. 1416 // For the non X0, X0 variant, we can replace the destination register 1417 // with X0 to reduce register pressure. This is really a generic 1418 // optimization which can be applied to any dead def (TODO: generalize). 1419 for (MachineBasicBlock &MBB : MF) { 1420 for (MachineInstr &MI : MBB) { 1421 if (MI.getOpcode() == RISCV::PseudoVSETVLI || 1422 MI.getOpcode() == RISCV::PseudoVSETIVLI) { 1423 Register VRegDef = MI.getOperand(0).getReg(); 1424 if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef)) 1425 MI.getOperand(0).setReg(RISCV::X0); 1426 } 1427 } 1428 } 1429 1430 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output 1431 // of VLEFF/VLSEGFF. 1432 for (MachineBasicBlock &MBB : MF) 1433 insertReadVL(MBB); 1434 1435 BlockInfo.clear(); 1436 return HaveVectorOp; 1437 } 1438 1439 /// Returns an instance of the Insert VSETVLI pass. 1440 FunctionPass *llvm::createRISCVInsertVSETVLIPass() { 1441 return new RISCVInsertVSETVLI(); 1442 } 1443