1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a function pass that inserts VSETVLI instructions where 10 // needed. 11 // 12 // This pass consists of 3 phases: 13 // 14 // Phase 1 collects how each basic block affects VL/VTYPE. 15 // 16 // Phase 2 uses the information from phase 1 to do a data flow analysis to 17 // propagate the VL/VTYPE changes through the function. This gives us the 18 // VL/VTYPE at the start of each basic block. 19 // 20 // Phase 3 inserts VSETVLI instructions in each basic block. Information from 21 // phase 2 is used to prevent inserting a VSETVLI before the first vector 22 // instruction in the block if possible. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "RISCV.h" 27 #include "RISCVSubtarget.h" 28 #include "llvm/CodeGen/LiveIntervals.h" 29 #include "llvm/CodeGen/MachineFunctionPass.h" 30 #include <queue> 31 using namespace llvm; 32 33 #define DEBUG_TYPE "riscv-insert-vsetvli" 34 #define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass" 35 36 static cl::opt<bool> DisableInsertVSETVLPHIOpt( 37 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, 38 cl::desc("Disable looking through phis when inserting vsetvlis.")); 39 40 namespace { 41 42 class VSETVLIInfo { 43 union { 44 Register AVLReg; 45 unsigned AVLImm; 46 }; 47 48 enum : uint8_t { 49 Uninitialized, 50 AVLIsReg, 51 AVLIsImm, 52 Unknown, 53 } State = Uninitialized; 54 55 // Fields from VTYPE. 56 RISCVII::VLMUL VLMul = RISCVII::LMUL_1; 57 uint8_t SEW = 0; 58 uint8_t TailAgnostic : 1; 59 uint8_t MaskAgnostic : 1; 60 uint8_t MaskRegOp : 1; 61 uint8_t StoreOp : 1; 62 uint8_t ScalarMovOp : 1; 63 uint8_t SEWLMULRatioOnly : 1; 64 65 public: 66 VSETVLIInfo() 67 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), MaskRegOp(false), 68 StoreOp(false), ScalarMovOp(false), SEWLMULRatioOnly(false) {} 69 70 static VSETVLIInfo getUnknown() { 71 VSETVLIInfo Info; 72 Info.setUnknown(); 73 return Info; 74 } 75 76 bool isValid() const { return State != Uninitialized; } 77 void setUnknown() { State = Unknown; } 78 bool isUnknown() const { return State == Unknown; } 79 80 void setAVLReg(Register Reg) { 81 AVLReg = Reg; 82 State = AVLIsReg; 83 } 84 85 void setAVLImm(unsigned Imm) { 86 AVLImm = Imm; 87 State = AVLIsImm; 88 } 89 90 bool hasAVLImm() const { return State == AVLIsImm; } 91 bool hasAVLReg() const { return State == AVLIsReg; } 92 Register getAVLReg() const { 93 assert(hasAVLReg()); 94 return AVLReg; 95 } 96 unsigned getAVLImm() const { 97 assert(hasAVLImm()); 98 return AVLImm; 99 } 100 bool hasZeroAVL() const { 101 if (hasAVLImm()) 102 return getAVLImm() == 0; 103 return false; 104 } 105 bool hasNonZeroAVL() const { 106 if (hasAVLImm()) 107 return getAVLImm() > 0; 108 if (hasAVLReg()) 109 return getAVLReg() == RISCV::X0; 110 return false; 111 } 112 113 bool hasSameAVL(const VSETVLIInfo &Other) const { 114 assert(isValid() && Other.isValid() && 115 "Can't compare invalid VSETVLIInfos"); 116 assert(!isUnknown() && !Other.isUnknown() && 117 "Can't compare AVL in unknown state"); 118 if (hasAVLReg() && Other.hasAVLReg()) 119 return getAVLReg() == Other.getAVLReg(); 120 121 if (hasAVLImm() && Other.hasAVLImm()) 122 return getAVLImm() == Other.getAVLImm(); 123 124 return false; 125 } 126 127 void setVTYPE(unsigned VType) { 128 assert(isValid() && !isUnknown() && 129 "Can't set VTYPE for uninitialized or unknown"); 130 VLMul = RISCVVType::getVLMUL(VType); 131 SEW = RISCVVType::getSEW(VType); 132 TailAgnostic = RISCVVType::isTailAgnostic(VType); 133 MaskAgnostic = RISCVVType::isMaskAgnostic(VType); 134 } 135 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA, bool MRO, 136 bool IsStore, bool IsScalarMovOp) { 137 assert(isValid() && !isUnknown() && 138 "Can't set VTYPE for uninitialized or unknown"); 139 VLMul = L; 140 SEW = S; 141 TailAgnostic = TA; 142 MaskAgnostic = MA; 143 MaskRegOp = MRO; 144 StoreOp = IsStore; 145 ScalarMovOp = IsScalarMovOp; 146 } 147 148 unsigned encodeVTYPE() const { 149 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && 150 "Can't encode VTYPE for uninitialized or unknown"); 151 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 152 } 153 154 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } 155 156 bool hasSameSEW(const VSETVLIInfo &Other) const { 157 assert(isValid() && Other.isValid() && 158 "Can't compare invalid VSETVLIInfos"); 159 assert(!isUnknown() && !Other.isUnknown() && 160 "Can't compare VTYPE in unknown state"); 161 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && 162 "Can't compare when only LMUL/SEW ratio is valid."); 163 return SEW == Other.SEW; 164 } 165 166 bool hasSameVTYPE(const VSETVLIInfo &Other) const { 167 assert(isValid() && Other.isValid() && 168 "Can't compare invalid VSETVLIInfos"); 169 assert(!isUnknown() && !Other.isUnknown() && 170 "Can't compare VTYPE in unknown state"); 171 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && 172 "Can't compare when only LMUL/SEW ratio is valid."); 173 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == 174 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, 175 Other.MaskAgnostic); 176 } 177 178 static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) { 179 unsigned LMul; 180 bool Fractional; 181 std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul); 182 183 // Convert LMul to a fixed point value with 3 fractional bits. 184 LMul = Fractional ? (8 / LMul) : (LMul * 8); 185 186 assert(SEW >= 8 && "Unexpected SEW value"); 187 return (SEW * 8) / LMul; 188 } 189 190 unsigned getSEWLMULRatio() const { 191 assert(isValid() && !isUnknown() && 192 "Can't use VTYPE for uninitialized or unknown"); 193 return getSEWLMULRatio(SEW, VLMul); 194 } 195 196 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. 197 bool hasSameVLMAX(const VSETVLIInfo &Other) const { 198 assert(isValid() && Other.isValid() && 199 "Can't compare invalid VSETVLIInfos"); 200 assert(!isUnknown() && !Other.isUnknown() && 201 "Can't compare VTYPE in unknown state"); 202 return getSEWLMULRatio() == Other.getSEWLMULRatio(); 203 } 204 205 bool hasSamePolicy(const VSETVLIInfo &Other) const { 206 assert(isValid() && Other.isValid() && 207 "Can't compare invalid VSETVLIInfos"); 208 assert(!isUnknown() && !Other.isUnknown() && 209 "Can't compare VTYPE in unknown state"); 210 return TailAgnostic == Other.TailAgnostic && 211 MaskAgnostic == Other.MaskAgnostic; 212 } 213 214 bool hasCompatibleVTYPE(const VSETVLIInfo &InstrInfo, bool Strict) const { 215 // Simple case, see if full VTYPE matches. 216 if (hasSameVTYPE(InstrInfo)) 217 return true; 218 219 if (Strict) 220 return false; 221 222 // If this is a mask reg operation, it only cares about VLMAX. 223 // FIXME: Mask reg operations are probably ok if "this" VLMAX is larger 224 // than "InstrInfo". 225 // FIXME: The policy bits can probably be ignored for mask reg operations. 226 if (InstrInfo.MaskRegOp && hasSameVLMAX(InstrInfo) && 227 TailAgnostic == InstrInfo.TailAgnostic && 228 MaskAgnostic == InstrInfo.MaskAgnostic) 229 return true; 230 231 return false; 232 } 233 234 // Determine whether the vector instructions requirements represented by 235 // InstrInfo are compatible with the previous vsetvli instruction represented 236 // by this. 237 bool isCompatible(const VSETVLIInfo &InstrInfo, bool Strict) const { 238 assert(isValid() && InstrInfo.isValid() && 239 "Can't compare invalid VSETVLIInfos"); 240 assert(!InstrInfo.SEWLMULRatioOnly && 241 "Expected a valid VTYPE for instruction!"); 242 // Nothing is compatible with Unknown. 243 if (isUnknown() || InstrInfo.isUnknown()) 244 return false; 245 246 // If only our VLMAX ratio is valid, then this isn't compatible. 247 if (SEWLMULRatioOnly) 248 return false; 249 250 // If the instruction doesn't need an AVLReg and the SEW matches, consider 251 // it compatible. 252 if (!Strict && InstrInfo.hasAVLReg() && 253 InstrInfo.AVLReg == RISCV::NoRegister) { 254 if (SEW == InstrInfo.SEW) 255 return true; 256 } 257 258 // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0. 259 // So it's compatible when we could make sure that both VL be the same 260 // situation. 261 if (!Strict && InstrInfo.ScalarMovOp && InstrInfo.hasAVLImm() && 262 ((hasNonZeroAVL() && InstrInfo.hasNonZeroAVL()) || 263 (hasZeroAVL() && InstrInfo.hasZeroAVL())) && 264 hasSameSEW(InstrInfo) && hasSamePolicy(InstrInfo)) 265 return true; 266 267 // The AVL must match. 268 if (!hasSameAVL(InstrInfo)) 269 return false; 270 271 if (hasCompatibleVTYPE(InstrInfo, Strict)) 272 return true; 273 274 // Strict matches must ensure a full VTYPE match. 275 if (Strict) 276 return false; 277 278 // Store instructions don't use the policy fields. 279 // TODO: Move into hasCompatibleVTYPE? 280 if (InstrInfo.StoreOp && VLMul == InstrInfo.VLMul && SEW == InstrInfo.SEW) 281 return true; 282 283 // Anything else is not compatible. 284 return false; 285 } 286 287 bool isCompatibleWithLoadStoreEEW(unsigned EEW, 288 const VSETVLIInfo &InstrInfo) const { 289 assert(isValid() && InstrInfo.isValid() && 290 "Can't compare invalid VSETVLIInfos"); 291 assert(!InstrInfo.SEWLMULRatioOnly && 292 "Expected a valid VTYPE for instruction!"); 293 assert(EEW == InstrInfo.SEW && "Mismatched EEW/SEW for store"); 294 295 if (isUnknown() || hasSEWLMULRatioOnly()) 296 return false; 297 298 if (!hasSameAVL(InstrInfo)) 299 return false; 300 301 // Stores can ignore the tail and mask policies. 302 if (!InstrInfo.StoreOp && (TailAgnostic != InstrInfo.TailAgnostic || 303 MaskAgnostic != InstrInfo.MaskAgnostic)) 304 return false; 305 306 return getSEWLMULRatio() == getSEWLMULRatio(EEW, InstrInfo.VLMul); 307 } 308 309 bool operator==(const VSETVLIInfo &Other) const { 310 // Uninitialized is only equal to another Uninitialized. 311 if (!isValid()) 312 return !Other.isValid(); 313 if (!Other.isValid()) 314 return !isValid(); 315 316 // Unknown is only equal to another Unknown. 317 if (isUnknown()) 318 return Other.isUnknown(); 319 if (Other.isUnknown()) 320 return isUnknown(); 321 322 if (!hasSameAVL(Other)) 323 return false; 324 325 // If only the VLMAX is valid, check that it is the same. 326 if (SEWLMULRatioOnly && Other.SEWLMULRatioOnly) 327 return hasSameVLMAX(Other); 328 329 // If the full VTYPE is valid, check that it is the same. 330 if (!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly) 331 return hasSameVTYPE(Other); 332 333 // If the SEWLMULRatioOnly bits are different, then they aren't equal. 334 return false; 335 } 336 337 bool operator!=(const VSETVLIInfo &Other) const { 338 return !(*this == Other); 339 } 340 341 // Calculate the VSETVLIInfo visible to a block assuming this and Other are 342 // both predecessors. 343 VSETVLIInfo intersect(const VSETVLIInfo &Other) const { 344 // If the new value isn't valid, ignore it. 345 if (!Other.isValid()) 346 return *this; 347 348 // If this value isn't valid, this must be the first predecessor, use it. 349 if (!isValid()) 350 return Other; 351 352 // If either is unknown, the result is unknown. 353 if (isUnknown() || Other.isUnknown()) 354 return VSETVLIInfo::getUnknown(); 355 356 // If we have an exact, match return this. 357 if (*this == Other) 358 return *this; 359 360 // Not an exact match, but maybe the AVL and VLMAX are the same. If so, 361 // return an SEW/LMUL ratio only value. 362 if (hasSameAVL(Other) && hasSameVLMAX(Other)) { 363 VSETVLIInfo MergeInfo = *this; 364 MergeInfo.SEWLMULRatioOnly = true; 365 return MergeInfo; 366 } 367 368 // Otherwise the result is unknown. 369 return VSETVLIInfo::getUnknown(); 370 } 371 372 // Calculate the VSETVLIInfo visible at the end of the block assuming this 373 // is the predecessor value, and Other is change for this block. 374 VSETVLIInfo merge(const VSETVLIInfo &Other) const { 375 assert(isValid() && "Can only merge with a valid VSETVLInfo"); 376 377 // Nothing changed from the predecessor, keep it. 378 if (!Other.isValid()) 379 return *this; 380 381 // If the change is compatible with the input, we won't create a VSETVLI 382 // and should keep the predecessor. 383 if (isCompatible(Other, /*Strict*/ true)) 384 return *this; 385 386 // Otherwise just use whatever is in this block. 387 return Other; 388 } 389 }; 390 391 struct BlockData { 392 // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers 393 // made by this block. Calculated in Phase 1. 394 VSETVLIInfo Change; 395 396 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this 397 // block. Calculated in Phase 2. 398 VSETVLIInfo Exit; 399 400 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor 401 // blocks. Calculated in Phase 2, and used by Phase 3. 402 VSETVLIInfo Pred; 403 404 // Keeps track of whether the block is already in the queue. 405 bool InQueue = false; 406 407 BlockData() {} 408 }; 409 410 class RISCVInsertVSETVLI : public MachineFunctionPass { 411 const TargetInstrInfo *TII; 412 MachineRegisterInfo *MRI; 413 414 std::vector<BlockData> BlockInfo; 415 std::queue<const MachineBasicBlock *> WorkList; 416 417 public: 418 static char ID; 419 420 RISCVInsertVSETVLI() : MachineFunctionPass(ID) { 421 initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry()); 422 } 423 bool runOnMachineFunction(MachineFunction &MF) override; 424 425 void getAnalysisUsage(AnalysisUsage &AU) const override { 426 AU.setPreservesCFG(); 427 MachineFunctionPass::getAnalysisUsage(AU); 428 } 429 430 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } 431 432 private: 433 bool needVSETVLI(const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo); 434 bool needVSETVLIPHI(const VSETVLIInfo &Require, const MachineBasicBlock &MBB); 435 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, 436 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); 437 438 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB); 439 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); 440 void emitVSETVLIs(MachineBasicBlock &MBB); 441 }; 442 443 } // end anonymous namespace 444 445 char RISCVInsertVSETVLI::ID = 0; 446 447 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, 448 false, false) 449 450 static MachineInstr *elideCopies(MachineInstr *MI, 451 const MachineRegisterInfo *MRI) { 452 while (true) { 453 if (!MI->isFullCopy()) 454 return MI; 455 if (!Register::isVirtualRegister(MI->getOperand(1).getReg())) 456 return nullptr; 457 MI = MRI->getVRegDef(MI->getOperand(1).getReg()); 458 if (!MI) 459 return nullptr; 460 } 461 } 462 463 static bool isScalarMoveInstr(const MachineInstr &MI) { 464 switch (MI.getOpcode()) { 465 default: 466 return false; 467 case RISCV::PseudoVMV_S_X_M1: 468 case RISCV::PseudoVMV_S_X_M2: 469 case RISCV::PseudoVMV_S_X_M4: 470 case RISCV::PseudoVMV_S_X_M8: 471 case RISCV::PseudoVMV_S_X_MF2: 472 case RISCV::PseudoVMV_S_X_MF4: 473 case RISCV::PseudoVMV_S_X_MF8: 474 case RISCV::PseudoVFMV_S_F16_M1: 475 case RISCV::PseudoVFMV_S_F16_M2: 476 case RISCV::PseudoVFMV_S_F16_M4: 477 case RISCV::PseudoVFMV_S_F16_M8: 478 case RISCV::PseudoVFMV_S_F16_MF2: 479 case RISCV::PseudoVFMV_S_F16_MF4: 480 case RISCV::PseudoVFMV_S_F32_M1: 481 case RISCV::PseudoVFMV_S_F32_M2: 482 case RISCV::PseudoVFMV_S_F32_M4: 483 case RISCV::PseudoVFMV_S_F32_M8: 484 case RISCV::PseudoVFMV_S_F32_MF2: 485 case RISCV::PseudoVFMV_S_F64_M1: 486 case RISCV::PseudoVFMV_S_F64_M2: 487 case RISCV::PseudoVFMV_S_F64_M4: 488 case RISCV::PseudoVFMV_S_F64_M8: 489 return true; 490 } 491 } 492 493 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, 494 const MachineRegisterInfo *MRI) { 495 VSETVLIInfo InstrInfo; 496 unsigned NumOperands = MI.getNumExplicitOperands(); 497 bool HasPolicy = RISCVII::hasVecPolicyOp(TSFlags); 498 499 // Default to tail agnostic unless the destination is tied to a source. 500 // Unless the source is undef. In that case the user would have some control 501 // over the tail values. Some pseudo instructions force a tail agnostic policy 502 // despite having a tied def. 503 bool ForceTailAgnostic = RISCVII::doesForceTailAgnostic(TSFlags); 504 bool TailAgnostic = true; 505 // If the instruction has policy argument, use the argument. 506 if (HasPolicy) { 507 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); 508 TailAgnostic = Op.getImm() & 0x1; 509 } 510 511 unsigned UseOpIdx; 512 if (!(ForceTailAgnostic || (HasPolicy && TailAgnostic)) && 513 MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 514 TailAgnostic = false; 515 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 516 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 517 MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg()); 518 if (UseMI) { 519 UseMI = elideCopies(UseMI, MRI); 520 if (UseMI && UseMI->isImplicitDef()) 521 TailAgnostic = true; 522 } 523 } 524 525 // Remove the tail policy so we can find the SEW and VL. 526 if (HasPolicy) 527 --NumOperands; 528 529 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); 530 531 unsigned Log2SEW = MI.getOperand(NumOperands - 1).getImm(); 532 // A Log2SEW of 0 is an operation on mask registers only. 533 bool MaskRegOp = Log2SEW == 0; 534 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 535 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 536 537 // If there are no explicit defs, this is a store instruction which can 538 // ignore the tail and mask policies. 539 bool StoreOp = MI.getNumExplicitDefs() == 0; 540 bool ScalarMovOp = isScalarMoveInstr(MI); 541 542 if (RISCVII::hasVLOp(TSFlags)) { 543 const MachineOperand &VLOp = MI.getOperand(NumOperands - 2); 544 if (VLOp.isImm()) { 545 int64_t Imm = VLOp.getImm(); 546 // Conver the VLMax sentintel to X0 register. 547 if (Imm == RISCV::VLMaxSentinel) 548 InstrInfo.setAVLReg(RISCV::X0); 549 else 550 InstrInfo.setAVLImm(Imm); 551 } else { 552 InstrInfo.setAVLReg(VLOp.getReg()); 553 } 554 } else 555 InstrInfo.setAVLReg(RISCV::NoRegister); 556 InstrInfo.setVTYPE(VLMul, SEW, /*TailAgnostic*/ TailAgnostic, 557 /*MaskAgnostic*/ false, MaskRegOp, StoreOp, ScalarMovOp); 558 559 return InstrInfo; 560 } 561 562 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, 563 const VSETVLIInfo &Info, 564 const VSETVLIInfo &PrevInfo) { 565 DebugLoc DL = MI.getDebugLoc(); 566 567 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same 568 // VLMAX. 569 if (PrevInfo.isValid() && !PrevInfo.isUnknown() && 570 Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { 571 BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLIX0)) 572 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 573 .addReg(RISCV::X0, RegState::Kill) 574 .addImm(Info.encodeVTYPE()) 575 .addReg(RISCV::VL, RegState::Implicit); 576 return; 577 } 578 579 if (Info.hasAVLImm()) { 580 BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETIVLI)) 581 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 582 .addImm(Info.getAVLImm()) 583 .addImm(Info.encodeVTYPE()); 584 return; 585 } 586 587 Register AVLReg = Info.getAVLReg(); 588 if (AVLReg == RISCV::NoRegister) { 589 // We can only use x0, x0 if there's no chance of the vtype change causing 590 // the previous vl to become invalid. 591 if (PrevInfo.isValid() && !PrevInfo.isUnknown() && 592 Info.hasSameVLMAX(PrevInfo)) { 593 BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLIX0)) 594 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 595 .addReg(RISCV::X0, RegState::Kill) 596 .addImm(Info.encodeVTYPE()) 597 .addReg(RISCV::VL, RegState::Implicit); 598 return; 599 } 600 // Otherwise use an AVL of 0 to avoid depending on previous vl. 601 BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETIVLI)) 602 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 603 .addImm(0) 604 .addImm(Info.encodeVTYPE()); 605 return; 606 } 607 608 if (AVLReg.isVirtual()) 609 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass); 610 611 // Use X0 as the DestReg unless AVLReg is X0. We also need to change the 612 // opcode if the AVLReg is X0 as they have different register classes for 613 // the AVL operand. 614 Register DestReg = RISCV::X0; 615 unsigned Opcode = RISCV::PseudoVSETVLI; 616 if (AVLReg == RISCV::X0) { 617 DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); 618 Opcode = RISCV::PseudoVSETVLIX0; 619 } 620 BuildMI(MBB, MI, DL, TII->get(Opcode)) 621 .addReg(DestReg, RegState::Define | RegState::Dead) 622 .addReg(AVLReg) 623 .addImm(Info.encodeVTYPE()); 624 } 625 626 // Return a VSETVLIInfo representing the changes made by this VSETVLI or 627 // VSETIVLI instruction. 628 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { 629 VSETVLIInfo NewInfo; 630 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { 631 NewInfo.setAVLImm(MI.getOperand(1).getImm()); 632 } else { 633 assert(MI.getOpcode() == RISCV::PseudoVSETVLI || 634 MI.getOpcode() == RISCV::PseudoVSETVLIX0); 635 Register AVLReg = MI.getOperand(1).getReg(); 636 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && 637 "Can't handle X0, X0 vsetvli yet"); 638 NewInfo.setAVLReg(AVLReg); 639 } 640 NewInfo.setVTYPE(MI.getOperand(2).getImm()); 641 642 return NewInfo; 643 } 644 645 bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require, 646 const VSETVLIInfo &CurInfo) { 647 if (CurInfo.isCompatible(Require, /*Strict*/ false)) 648 return false; 649 650 // We didn't find a compatible value. If our AVL is a virtual register, 651 // it might be defined by a VSET(I)VLI. If it has the same VTYPE we need 652 // and the last VL/VTYPE we observed is the same, we don't need a 653 // VSETVLI here. 654 if (!CurInfo.isUnknown() && Require.hasAVLReg() && 655 Require.getAVLReg().isVirtual() && !CurInfo.hasSEWLMULRatioOnly() && 656 CurInfo.hasCompatibleVTYPE(Require, /*Strict*/ false)) { 657 if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { 658 if (DefMI->getOpcode() == RISCV::PseudoVSETVLI || 659 DefMI->getOpcode() == RISCV::PseudoVSETVLIX0 || 660 DefMI->getOpcode() == RISCV::PseudoVSETIVLI) { 661 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 662 if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo)) 663 return false; 664 } 665 } 666 } 667 668 return true; 669 } 670 671 bool canSkipVSETVLIForLoadStore(const MachineInstr &MI, 672 const VSETVLIInfo &Require, 673 const VSETVLIInfo &CurInfo) { 674 unsigned EEW; 675 switch (MI.getOpcode()) { 676 default: 677 return false; 678 case RISCV::PseudoVLE8_V_M1: 679 case RISCV::PseudoVLE8_V_M1_MASK: 680 case RISCV::PseudoVLE8_V_M2: 681 case RISCV::PseudoVLE8_V_M2_MASK: 682 case RISCV::PseudoVLE8_V_M4: 683 case RISCV::PseudoVLE8_V_M4_MASK: 684 case RISCV::PseudoVLE8_V_M8: 685 case RISCV::PseudoVLE8_V_M8_MASK: 686 case RISCV::PseudoVLE8_V_MF2: 687 case RISCV::PseudoVLE8_V_MF2_MASK: 688 case RISCV::PseudoVLE8_V_MF4: 689 case RISCV::PseudoVLE8_V_MF4_MASK: 690 case RISCV::PseudoVLE8_V_MF8: 691 case RISCV::PseudoVLE8_V_MF8_MASK: 692 case RISCV::PseudoVLSE8_V_M1: 693 case RISCV::PseudoVLSE8_V_M1_MASK: 694 case RISCV::PseudoVLSE8_V_M2: 695 case RISCV::PseudoVLSE8_V_M2_MASK: 696 case RISCV::PseudoVLSE8_V_M4: 697 case RISCV::PseudoVLSE8_V_M4_MASK: 698 case RISCV::PseudoVLSE8_V_M8: 699 case RISCV::PseudoVLSE8_V_M8_MASK: 700 case RISCV::PseudoVLSE8_V_MF2: 701 case RISCV::PseudoVLSE8_V_MF2_MASK: 702 case RISCV::PseudoVLSE8_V_MF4: 703 case RISCV::PseudoVLSE8_V_MF4_MASK: 704 case RISCV::PseudoVLSE8_V_MF8: 705 case RISCV::PseudoVLSE8_V_MF8_MASK: 706 case RISCV::PseudoVSE8_V_M1: 707 case RISCV::PseudoVSE8_V_M1_MASK: 708 case RISCV::PseudoVSE8_V_M2: 709 case RISCV::PseudoVSE8_V_M2_MASK: 710 case RISCV::PseudoVSE8_V_M4: 711 case RISCV::PseudoVSE8_V_M4_MASK: 712 case RISCV::PseudoVSE8_V_M8: 713 case RISCV::PseudoVSE8_V_M8_MASK: 714 case RISCV::PseudoVSE8_V_MF2: 715 case RISCV::PseudoVSE8_V_MF2_MASK: 716 case RISCV::PseudoVSE8_V_MF4: 717 case RISCV::PseudoVSE8_V_MF4_MASK: 718 case RISCV::PseudoVSE8_V_MF8: 719 case RISCV::PseudoVSE8_V_MF8_MASK: 720 case RISCV::PseudoVSSE8_V_M1: 721 case RISCV::PseudoVSSE8_V_M1_MASK: 722 case RISCV::PseudoVSSE8_V_M2: 723 case RISCV::PseudoVSSE8_V_M2_MASK: 724 case RISCV::PseudoVSSE8_V_M4: 725 case RISCV::PseudoVSSE8_V_M4_MASK: 726 case RISCV::PseudoVSSE8_V_M8: 727 case RISCV::PseudoVSSE8_V_M8_MASK: 728 case RISCV::PseudoVSSE8_V_MF2: 729 case RISCV::PseudoVSSE8_V_MF2_MASK: 730 case RISCV::PseudoVSSE8_V_MF4: 731 case RISCV::PseudoVSSE8_V_MF4_MASK: 732 case RISCV::PseudoVSSE8_V_MF8: 733 case RISCV::PseudoVSSE8_V_MF8_MASK: 734 EEW = 8; 735 break; 736 case RISCV::PseudoVLE16_V_M1: 737 case RISCV::PseudoVLE16_V_M1_MASK: 738 case RISCV::PseudoVLE16_V_M2: 739 case RISCV::PseudoVLE16_V_M2_MASK: 740 case RISCV::PseudoVLE16_V_M4: 741 case RISCV::PseudoVLE16_V_M4_MASK: 742 case RISCV::PseudoVLE16_V_M8: 743 case RISCV::PseudoVLE16_V_M8_MASK: 744 case RISCV::PseudoVLE16_V_MF2: 745 case RISCV::PseudoVLE16_V_MF2_MASK: 746 case RISCV::PseudoVLE16_V_MF4: 747 case RISCV::PseudoVLE16_V_MF4_MASK: 748 case RISCV::PseudoVLSE16_V_M1: 749 case RISCV::PseudoVLSE16_V_M1_MASK: 750 case RISCV::PseudoVLSE16_V_M2: 751 case RISCV::PseudoVLSE16_V_M2_MASK: 752 case RISCV::PseudoVLSE16_V_M4: 753 case RISCV::PseudoVLSE16_V_M4_MASK: 754 case RISCV::PseudoVLSE16_V_M8: 755 case RISCV::PseudoVLSE16_V_M8_MASK: 756 case RISCV::PseudoVLSE16_V_MF2: 757 case RISCV::PseudoVLSE16_V_MF2_MASK: 758 case RISCV::PseudoVLSE16_V_MF4: 759 case RISCV::PseudoVLSE16_V_MF4_MASK: 760 case RISCV::PseudoVSE16_V_M1: 761 case RISCV::PseudoVSE16_V_M1_MASK: 762 case RISCV::PseudoVSE16_V_M2: 763 case RISCV::PseudoVSE16_V_M2_MASK: 764 case RISCV::PseudoVSE16_V_M4: 765 case RISCV::PseudoVSE16_V_M4_MASK: 766 case RISCV::PseudoVSE16_V_M8: 767 case RISCV::PseudoVSE16_V_M8_MASK: 768 case RISCV::PseudoVSE16_V_MF2: 769 case RISCV::PseudoVSE16_V_MF2_MASK: 770 case RISCV::PseudoVSE16_V_MF4: 771 case RISCV::PseudoVSE16_V_MF4_MASK: 772 case RISCV::PseudoVSSE16_V_M1: 773 case RISCV::PseudoVSSE16_V_M1_MASK: 774 case RISCV::PseudoVSSE16_V_M2: 775 case RISCV::PseudoVSSE16_V_M2_MASK: 776 case RISCV::PseudoVSSE16_V_M4: 777 case RISCV::PseudoVSSE16_V_M4_MASK: 778 case RISCV::PseudoVSSE16_V_M8: 779 case RISCV::PseudoVSSE16_V_M8_MASK: 780 case RISCV::PseudoVSSE16_V_MF2: 781 case RISCV::PseudoVSSE16_V_MF2_MASK: 782 case RISCV::PseudoVSSE16_V_MF4: 783 case RISCV::PseudoVSSE16_V_MF4_MASK: 784 EEW = 16; 785 break; 786 case RISCV::PseudoVLE32_V_M1: 787 case RISCV::PseudoVLE32_V_M1_MASK: 788 case RISCV::PseudoVLE32_V_M2: 789 case RISCV::PseudoVLE32_V_M2_MASK: 790 case RISCV::PseudoVLE32_V_M4: 791 case RISCV::PseudoVLE32_V_M4_MASK: 792 case RISCV::PseudoVLE32_V_M8: 793 case RISCV::PseudoVLE32_V_M8_MASK: 794 case RISCV::PseudoVLE32_V_MF2: 795 case RISCV::PseudoVLE32_V_MF2_MASK: 796 case RISCV::PseudoVLSE32_V_M1: 797 case RISCV::PseudoVLSE32_V_M1_MASK: 798 case RISCV::PseudoVLSE32_V_M2: 799 case RISCV::PseudoVLSE32_V_M2_MASK: 800 case RISCV::PseudoVLSE32_V_M4: 801 case RISCV::PseudoVLSE32_V_M4_MASK: 802 case RISCV::PseudoVLSE32_V_M8: 803 case RISCV::PseudoVLSE32_V_M8_MASK: 804 case RISCV::PseudoVLSE32_V_MF2: 805 case RISCV::PseudoVLSE32_V_MF2_MASK: 806 case RISCV::PseudoVSE32_V_M1: 807 case RISCV::PseudoVSE32_V_M1_MASK: 808 case RISCV::PseudoVSE32_V_M2: 809 case RISCV::PseudoVSE32_V_M2_MASK: 810 case RISCV::PseudoVSE32_V_M4: 811 case RISCV::PseudoVSE32_V_M4_MASK: 812 case RISCV::PseudoVSE32_V_M8: 813 case RISCV::PseudoVSE32_V_M8_MASK: 814 case RISCV::PseudoVSE32_V_MF2: 815 case RISCV::PseudoVSE32_V_MF2_MASK: 816 case RISCV::PseudoVSSE32_V_M1: 817 case RISCV::PseudoVSSE32_V_M1_MASK: 818 case RISCV::PseudoVSSE32_V_M2: 819 case RISCV::PseudoVSSE32_V_M2_MASK: 820 case RISCV::PseudoVSSE32_V_M4: 821 case RISCV::PseudoVSSE32_V_M4_MASK: 822 case RISCV::PseudoVSSE32_V_M8: 823 case RISCV::PseudoVSSE32_V_M8_MASK: 824 case RISCV::PseudoVSSE32_V_MF2: 825 case RISCV::PseudoVSSE32_V_MF2_MASK: 826 EEW = 32; 827 break; 828 case RISCV::PseudoVLE64_V_M1: 829 case RISCV::PseudoVLE64_V_M1_MASK: 830 case RISCV::PseudoVLE64_V_M2: 831 case RISCV::PseudoVLE64_V_M2_MASK: 832 case RISCV::PseudoVLE64_V_M4: 833 case RISCV::PseudoVLE64_V_M4_MASK: 834 case RISCV::PseudoVLE64_V_M8: 835 case RISCV::PseudoVLE64_V_M8_MASK: 836 case RISCV::PseudoVLSE64_V_M1: 837 case RISCV::PseudoVLSE64_V_M1_MASK: 838 case RISCV::PseudoVLSE64_V_M2: 839 case RISCV::PseudoVLSE64_V_M2_MASK: 840 case RISCV::PseudoVLSE64_V_M4: 841 case RISCV::PseudoVLSE64_V_M4_MASK: 842 case RISCV::PseudoVLSE64_V_M8: 843 case RISCV::PseudoVLSE64_V_M8_MASK: 844 case RISCV::PseudoVSE64_V_M1: 845 case RISCV::PseudoVSE64_V_M1_MASK: 846 case RISCV::PseudoVSE64_V_M2: 847 case RISCV::PseudoVSE64_V_M2_MASK: 848 case RISCV::PseudoVSE64_V_M4: 849 case RISCV::PseudoVSE64_V_M4_MASK: 850 case RISCV::PseudoVSE64_V_M8: 851 case RISCV::PseudoVSE64_V_M8_MASK: 852 case RISCV::PseudoVSSE64_V_M1: 853 case RISCV::PseudoVSSE64_V_M1_MASK: 854 case RISCV::PseudoVSSE64_V_M2: 855 case RISCV::PseudoVSSE64_V_M2_MASK: 856 case RISCV::PseudoVSSE64_V_M4: 857 case RISCV::PseudoVSSE64_V_M4_MASK: 858 case RISCV::PseudoVSSE64_V_M8: 859 case RISCV::PseudoVSSE64_V_M8_MASK: 860 EEW = 64; 861 break; 862 } 863 864 return CurInfo.isCompatibleWithLoadStoreEEW(EEW, Require); 865 } 866 867 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { 868 bool HadVectorOp = false; 869 870 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 871 for (const MachineInstr &MI : MBB) { 872 // If this is an explicit VSETVLI or VSETIVLI, update our state. 873 if (MI.getOpcode() == RISCV::PseudoVSETVLI || 874 MI.getOpcode() == RISCV::PseudoVSETVLIX0 || 875 MI.getOpcode() == RISCV::PseudoVSETIVLI) { 876 HadVectorOp = true; 877 BBInfo.Change = getInfoForVSETVLI(MI); 878 continue; 879 } 880 881 uint64_t TSFlags = MI.getDesc().TSFlags; 882 if (RISCVII::hasSEWOp(TSFlags)) { 883 HadVectorOp = true; 884 885 VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); 886 887 if (!BBInfo.Change.isValid()) { 888 BBInfo.Change = NewInfo; 889 } else { 890 // If this instruction isn't compatible with the previous VL/VTYPE 891 // we need to insert a VSETVLI. 892 // If this is a unit-stride or strided load/store, we may be able to use 893 // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. 894 // NOTE: We only do this if the vtype we're comparing against was 895 // created in this block. We need the first and third phase to treat 896 // the store the same way. 897 if (!canSkipVSETVLIForLoadStore(MI, NewInfo, BBInfo.Change) && 898 needVSETVLI(NewInfo, BBInfo.Change)) 899 BBInfo.Change = NewInfo; 900 } 901 } 902 903 // If this is something that updates VL/VTYPE that we don't know about, set 904 // the state to unknown. 905 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 906 MI.modifiesRegister(RISCV::VTYPE)) { 907 BBInfo.Change = VSETVLIInfo::getUnknown(); 908 } 909 } 910 911 // Initial exit state is whatever change we found in the block. 912 BBInfo.Exit = BBInfo.Change; 913 914 return HadVectorOp; 915 } 916 917 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { 918 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 919 920 BBInfo.InQueue = false; 921 922 VSETVLIInfo InInfo; 923 if (MBB.pred_empty()) { 924 // There are no predecessors, so use the default starting status. 925 InInfo.setUnknown(); 926 } else { 927 for (MachineBasicBlock *P : MBB.predecessors()) 928 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); 929 } 930 931 // If we don't have any valid predecessor value, wait until we do. 932 if (!InInfo.isValid()) 933 return; 934 935 BBInfo.Pred = InInfo; 936 937 VSETVLIInfo TmpStatus = BBInfo.Pred.merge(BBInfo.Change); 938 939 // If the new exit value matches the old exit value, we don't need to revisit 940 // any blocks. 941 if (BBInfo.Exit == TmpStatus) 942 return; 943 944 BBInfo.Exit = TmpStatus; 945 946 // Add the successors to the work list so we can propagate the changed exit 947 // status. 948 for (MachineBasicBlock *S : MBB.successors()) 949 if (!BlockInfo[S->getNumber()].InQueue) 950 WorkList.push(S); 951 } 952 953 // If we weren't able to prove a vsetvli was directly unneeded, it might still 954 // be/ unneeded if the AVL is a phi node where all incoming values are VL 955 // outputs from the last VSETVLI in their respective basic blocks. 956 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, 957 const MachineBasicBlock &MBB) { 958 if (DisableInsertVSETVLPHIOpt) 959 return true; 960 961 if (!Require.hasAVLReg()) 962 return true; 963 964 Register AVLReg = Require.getAVLReg(); 965 if (!AVLReg.isVirtual()) 966 return true; 967 968 // We need the AVL to be produce by a PHI node in this basic block. 969 MachineInstr *PHI = MRI->getVRegDef(AVLReg); 970 if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB) 971 return true; 972 973 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps; 974 PHIOp += 2) { 975 Register InReg = PHI->getOperand(PHIOp).getReg(); 976 MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB(); 977 const BlockData &PBBInfo = BlockInfo[PBB->getNumber()]; 978 // If the exit from the predecessor has the VTYPE we are looking for 979 // we might be able to avoid a VSETVLI. 980 if (PBBInfo.Exit.isUnknown() || 981 !PBBInfo.Exit.hasCompatibleVTYPE(Require, /*Strict*/ false)) 982 return true; 983 984 // We need the PHI input to the be the output of a VSET(I)VLI. 985 MachineInstr *DefMI = MRI->getVRegDef(InReg); 986 if (!DefMI || (DefMI->getOpcode() != RISCV::PseudoVSETVLI && 987 DefMI->getOpcode() != RISCV::PseudoVSETVLIX0 && 988 DefMI->getOpcode() != RISCV::PseudoVSETIVLI)) 989 return true; 990 991 // We found a VSET(I)VLI make sure it matches the output of the 992 // predecessor block. 993 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 994 if (!DefInfo.hasSameAVL(PBBInfo.Exit) || 995 !DefInfo.hasSameVTYPE(PBBInfo.Exit)) 996 return true; 997 } 998 999 // If all the incoming values to the PHI checked out, we don't need 1000 // to insert a VSETVLI. 1001 return false; 1002 } 1003 1004 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { 1005 VSETVLIInfo CurInfo; 1006 // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI. 1007 MachineInstr *PrevVSETVLIMI = nullptr; 1008 1009 for (MachineInstr &MI : MBB) { 1010 // If this is an explicit VSETVLI or VSETIVLI, update our state. 1011 if (MI.getOpcode() == RISCV::PseudoVSETVLI || 1012 MI.getOpcode() == RISCV::PseudoVSETVLIX0 || 1013 MI.getOpcode() == RISCV::PseudoVSETIVLI) { 1014 // Conservatively, mark the VL and VTYPE as live. 1015 assert(MI.getOperand(3).getReg() == RISCV::VL && 1016 MI.getOperand(4).getReg() == RISCV::VTYPE && 1017 "Unexpected operands where VL and VTYPE should be"); 1018 MI.getOperand(3).setIsDead(false); 1019 MI.getOperand(4).setIsDead(false); 1020 CurInfo = getInfoForVSETVLI(MI); 1021 PrevVSETVLIMI = &MI; 1022 continue; 1023 } 1024 1025 uint64_t TSFlags = MI.getDesc().TSFlags; 1026 if (RISCVII::hasSEWOp(TSFlags)) { 1027 VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); 1028 if (RISCVII::hasVLOp(TSFlags)) { 1029 unsigned Offset = 2; 1030 if (RISCVII::hasVecPolicyOp(TSFlags)) 1031 Offset = 3; 1032 MachineOperand &VLOp = 1033 MI.getOperand(MI.getNumExplicitOperands() - Offset); 1034 if (VLOp.isReg()) { 1035 // Erase the AVL operand from the instruction. 1036 VLOp.setReg(RISCV::NoRegister); 1037 VLOp.setIsKill(false); 1038 } 1039 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, 1040 /*isImp*/ true)); 1041 } 1042 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, 1043 /*isImp*/ true)); 1044 1045 if (!CurInfo.isValid()) { 1046 // We haven't found any vector instructions or VL/VTYPE changes yet, 1047 // use the predecessor information. 1048 assert(BlockInfo[MBB.getNumber()].Pred.isValid() && 1049 "Expected a valid predecessor state."); 1050 if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) && 1051 needVSETVLIPHI(NewInfo, MBB)) { 1052 insertVSETVLI(MBB, MI, NewInfo, BlockInfo[MBB.getNumber()].Pred); 1053 CurInfo = NewInfo; 1054 } 1055 } else { 1056 // If this instruction isn't compatible with the previous VL/VTYPE 1057 // we need to insert a VSETVLI. 1058 // If this is a unit-stride or strided load/store, we may be able to use 1059 // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. 1060 // NOTE: We can't use predecessor information for the store. We must 1061 // treat it the same as the first phase so that we produce the correct 1062 // vl/vtype for succesor blocks. 1063 if (!canSkipVSETVLIForLoadStore(MI, NewInfo, CurInfo) && 1064 needVSETVLI(NewInfo, CurInfo)) { 1065 // If the previous VL/VTYPE is set by VSETVLI and do not use, Merge it 1066 // with current VL/VTYPE. 1067 bool NeedInsertVSETVLI = true; 1068 if (PrevVSETVLIMI) { 1069 bool HasSameAVL = 1070 CurInfo.hasSameAVL(NewInfo) || 1071 (NewInfo.hasAVLReg() && NewInfo.getAVLReg().isVirtual() && 1072 NewInfo.getAVLReg() == PrevVSETVLIMI->getOperand(0).getReg()); 1073 // If these two VSETVLI have the same AVL and the same VLMAX, 1074 // we could merge these two VSETVLI. 1075 if (HasSameAVL && 1076 CurInfo.getSEWLMULRatio() == NewInfo.getSEWLMULRatio()) { 1077 PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); 1078 NeedInsertVSETVLI = false; 1079 } 1080 if (isScalarMoveInstr(MI) && 1081 ((CurInfo.hasNonZeroAVL() && NewInfo.hasNonZeroAVL()) || 1082 (CurInfo.hasZeroAVL() && NewInfo.hasZeroAVL())) && 1083 NewInfo.hasSameVLMAX(CurInfo)) { 1084 PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); 1085 NeedInsertVSETVLI = false; 1086 } 1087 } 1088 if (NeedInsertVSETVLI) 1089 insertVSETVLI(MBB, MI, NewInfo, CurInfo); 1090 CurInfo = NewInfo; 1091 } 1092 } 1093 PrevVSETVLIMI = nullptr; 1094 } 1095 1096 // If this is something updates VL/VTYPE that we don't know about, set 1097 // the state to unknown. 1098 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 1099 MI.modifiesRegister(RISCV::VTYPE)) { 1100 CurInfo = VSETVLIInfo::getUnknown(); 1101 PrevVSETVLIMI = nullptr; 1102 } 1103 1104 // If we reach the end of the block and our current info doesn't match the 1105 // expected info, insert a vsetvli to correct. 1106 if (MI.isTerminator()) { 1107 const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit; 1108 if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() && 1109 CurInfo != ExitInfo) { 1110 insertVSETVLI(MBB, MI, ExitInfo, CurInfo); 1111 CurInfo = ExitInfo; 1112 } 1113 } 1114 } 1115 } 1116 1117 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { 1118 // Skip if the vector extension is not enabled. 1119 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); 1120 if (!ST.hasVInstructions()) 1121 return false; 1122 1123 TII = ST.getInstrInfo(); 1124 MRI = &MF.getRegInfo(); 1125 1126 assert(BlockInfo.empty() && "Expect empty block infos"); 1127 BlockInfo.resize(MF.getNumBlockIDs()); 1128 1129 bool HaveVectorOp = false; 1130 1131 // Phase 1 - determine how VL/VTYPE are affected by the each block. 1132 for (const MachineBasicBlock &MBB : MF) 1133 HaveVectorOp |= computeVLVTYPEChanges(MBB); 1134 1135 // If we didn't find any instructions that need VSETVLI, we're done. 1136 if (HaveVectorOp) { 1137 // Phase 2 - determine the exit VL/VTYPE from each block. We add all 1138 // blocks to the list here, but will also add any that need to be revisited 1139 // during Phase 2 processing. 1140 for (const MachineBasicBlock &MBB : MF) { 1141 WorkList.push(&MBB); 1142 BlockInfo[MBB.getNumber()].InQueue = true; 1143 } 1144 while (!WorkList.empty()) { 1145 const MachineBasicBlock &MBB = *WorkList.front(); 1146 WorkList.pop(); 1147 computeIncomingVLVTYPE(MBB); 1148 } 1149 1150 // Phase 3 - add any vsetvli instructions needed in the block. Use the 1151 // Phase 2 information to avoid adding vsetvlis before the first vector 1152 // instruction in the block if the VL/VTYPE is satisfied by its 1153 // predecessors. 1154 for (MachineBasicBlock &MBB : MF) 1155 emitVSETVLIs(MBB); 1156 } 1157 1158 BlockInfo.clear(); 1159 1160 return HaveVectorOp; 1161 } 1162 1163 /// Returns an instance of the Insert VSETVLI pass. 1164 FunctionPass *llvm::createRISCVInsertVSETVLIPass() { 1165 return new RISCVInsertVSETVLI(); 1166 } 1167