1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a function pass that inserts VSETVLI instructions where 10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL 11 // instructions. 12 // 13 // This pass consists of 3 phases: 14 // 15 // Phase 1 collects how each basic block affects VL/VTYPE. 16 // 17 // Phase 2 uses the information from phase 1 to do a data flow analysis to 18 // propagate the VL/VTYPE changes through the function. This gives us the 19 // VL/VTYPE at the start of each basic block. 20 // 21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from 22 // phase 2 is used to prevent inserting a VSETVLI before the first vector 23 // instruction in the block if possible. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "RISCV.h" 28 #include "RISCVSubtarget.h" 29 #include "llvm/CodeGen/LiveIntervals.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include <queue> 32 using namespace llvm; 33 34 #define DEBUG_TYPE "riscv-insert-vsetvli" 35 #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass" 36 37 static cl::opt<bool> DisableInsertVSETVLPHIOpt( 38 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, 39 cl::desc("Disable looking through phis when inserting vsetvlis.")); 40 41 static cl::opt<bool> UseStrictAsserts( 42 "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden, 43 cl::desc("Enable strict assertion checking for the dataflow algorithm")); 44 45 namespace { 46 47 static unsigned getVLOpNum(const MachineInstr &MI) { 48 return RISCVII::getVLOpNum(MI.getDesc()); 49 } 50 51 static unsigned getSEWOpNum(const MachineInstr &MI) { 52 return RISCVII::getSEWOpNum(MI.getDesc()); 53 } 54 55 static bool isVectorConfigInstr(const MachineInstr &MI) { 56 return MI.getOpcode() == RISCV::PseudoVSETVLI || 57 MI.getOpcode() == RISCV::PseudoVSETVLIX0 || 58 MI.getOpcode() == RISCV::PseudoVSETIVLI; 59 } 60 61 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves 62 /// VL and only sets VTYPE. 63 static bool isVLPreservingConfig(const MachineInstr &MI) { 64 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) 65 return false; 66 assert(RISCV::X0 == MI.getOperand(1).getReg()); 67 return RISCV::X0 == MI.getOperand(0).getReg(); 68 } 69 70 static uint16_t getRVVMCOpcode(uint16_t RVVPseudoOpcode) { 71 const RISCVVPseudosTable::PseudoInfo *RVV = 72 RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode); 73 if (!RVV) 74 return 0; 75 return RVV->BaseInstr; 76 } 77 78 static bool isScalarMoveInstr(const MachineInstr &MI) { 79 switch (getRVVMCOpcode(MI.getOpcode())) { 80 default: 81 return false; 82 case RISCV::VMV_S_X: 83 case RISCV::VFMV_S_F: 84 return true; 85 } 86 } 87 88 static bool isScalarSplatInstr(const MachineInstr &MI) { 89 switch (getRVVMCOpcode(MI.getOpcode())) { 90 default: 91 return false; 92 case RISCV::VMV_V_I: 93 case RISCV::VMV_V_X: 94 case RISCV::VFMV_V_F: 95 return true; 96 } 97 } 98 99 static bool isVSlideInstr(const MachineInstr &MI) { 100 switch (getRVVMCOpcode(MI.getOpcode())) { 101 default: 102 return false; 103 case RISCV::VSLIDEDOWN_VX: 104 case RISCV::VSLIDEDOWN_VI: 105 case RISCV::VSLIDEUP_VX: 106 case RISCV::VSLIDEUP_VI: 107 return true; 108 } 109 } 110 111 /// Get the EEW for a load or store instruction. Return std::nullopt if MI is 112 /// not a load or store which ignores SEW. 113 static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) { 114 switch (getRVVMCOpcode(MI.getOpcode())) { 115 default: 116 return std::nullopt; 117 case RISCV::VLE8_V: 118 case RISCV::VLSE8_V: 119 case RISCV::VSE8_V: 120 case RISCV::VSSE8_V: 121 return 8; 122 case RISCV::VLE16_V: 123 case RISCV::VLSE16_V: 124 case RISCV::VSE16_V: 125 case RISCV::VSSE16_V: 126 return 16; 127 case RISCV::VLE32_V: 128 case RISCV::VLSE32_V: 129 case RISCV::VSE32_V: 130 case RISCV::VSSE32_V: 131 return 32; 132 case RISCV::VLE64_V: 133 case RISCV::VLSE64_V: 134 case RISCV::VSE64_V: 135 case RISCV::VSSE64_V: 136 return 64; 137 } 138 } 139 140 /// Return true if this is an operation on mask registers. Note that 141 /// this includes both arithmetic/logical ops and load/store (vlm/vsm). 142 static bool isMaskRegOp(const MachineInstr &MI) { 143 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags)) 144 return false; 145 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); 146 // A Log2SEW of 0 is an operation on mask registers only. 147 return Log2SEW == 0; 148 } 149 150 /// Return true if the inactive elements in the result are entirely undefined. 151 /// Note that this is different from "agnostic" as defined by the vector 152 /// specification. Agnostic requires each lane to either be undisturbed, or 153 /// take the value -1; no other value is allowed. 154 static bool hasUndefinedMergeOp(const MachineInstr &MI, 155 const MachineRegisterInfo &MRI) { 156 157 unsigned UseOpIdx; 158 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) 159 // If there is no passthrough operand, then the pass through 160 // lanes are undefined. 161 return true; 162 163 // If the tied operand is an IMPLICIT_DEF (or a REG_SEQUENCE whose operands 164 // are solely IMPLICIT_DEFS), the pass through lanes are undefined. 165 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 166 if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) { 167 if (UseMI->isImplicitDef()) 168 return true; 169 170 if (UseMI->isRegSequence()) { 171 for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) { 172 MachineInstr *SourceMI = MRI.getVRegDef(UseMI->getOperand(i).getReg()); 173 if (!SourceMI || !SourceMI->isImplicitDef()) 174 return false; 175 } 176 return true; 177 } 178 } 179 return false; 180 } 181 182 /// Which subfields of VL or VTYPE have values we need to preserve? 183 struct DemandedFields { 184 // Some unknown property of VL is used. If demanded, must preserve entire 185 // value. 186 bool VLAny = false; 187 // Only zero vs non-zero is used. If demanded, can change non-zero values. 188 bool VLZeroness = false; 189 // What properties of SEW we need to preserve. 190 enum : uint8_t { 191 SEWEqual = 2, // The exact value of SEW needs to be preserved. 192 SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater 193 // than or equal to the original value. 194 SEWNone = 0 // We don't need to preserve SEW at all. 195 } SEW = SEWNone; 196 bool LMUL = false; 197 bool SEWLMULRatio = false; 198 bool TailPolicy = false; 199 bool MaskPolicy = false; 200 201 // Return true if any part of VTYPE was used 202 bool usedVTYPE() const { 203 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy; 204 } 205 206 // Return true if any property of VL was used 207 bool usedVL() { 208 return VLAny || VLZeroness; 209 } 210 211 // Mark all VTYPE subfields and properties as demanded 212 void demandVTYPE() { 213 SEW = SEWEqual; 214 LMUL = true; 215 SEWLMULRatio = true; 216 TailPolicy = true; 217 MaskPolicy = true; 218 } 219 220 // Mark all VL properties as demanded 221 void demandVL() { 222 VLAny = true; 223 VLZeroness = true; 224 } 225 226 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 227 /// Support for debugging, callable in GDB: V->dump() 228 LLVM_DUMP_METHOD void dump() const { 229 print(dbgs()); 230 dbgs() << "\n"; 231 } 232 233 /// Implement operator<<. 234 void print(raw_ostream &OS) const { 235 OS << "{"; 236 OS << "VLAny=" << VLAny << ", "; 237 OS << "VLZeroness=" << VLZeroness << ", "; 238 OS << "SEW="; 239 switch (SEW) { 240 case SEWEqual: 241 OS << "SEWEqual"; 242 break; 243 case SEWGreaterThanOrEqual: 244 OS << "SEWGreaterThanOrEqual"; 245 break; 246 case SEWNone: 247 OS << "SEWNone"; 248 break; 249 }; 250 OS << ", "; 251 OS << "LMUL=" << LMUL << ", "; 252 OS << "SEWLMULRatio=" << SEWLMULRatio << ", "; 253 OS << "TailPolicy=" << TailPolicy << ", "; 254 OS << "MaskPolicy=" << MaskPolicy; 255 OS << "}"; 256 } 257 #endif 258 }; 259 260 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 261 LLVM_ATTRIBUTE_USED 262 inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) { 263 DF.print(OS); 264 return OS; 265 } 266 #endif 267 268 /// Return true if moving from CurVType to NewVType is 269 /// indistinguishable from the perspective of an instruction (or set 270 /// of instructions) which use only the Used subfields and properties. 271 static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, 272 const DemandedFields &Used) { 273 if (Used.SEW == DemandedFields::SEWEqual && 274 RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType)) 275 return false; 276 277 if (Used.SEW == DemandedFields::SEWGreaterThanOrEqual && 278 RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType)) 279 return false; 280 281 if (Used.LMUL && 282 RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType)) 283 return false; 284 285 if (Used.SEWLMULRatio) { 286 auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType), 287 RISCVVType::getVLMUL(CurVType)); 288 auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType), 289 RISCVVType::getVLMUL(NewVType)); 290 if (Ratio1 != Ratio2) 291 return false; 292 } 293 294 if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) != 295 RISCVVType::isTailAgnostic(NewVType)) 296 return false; 297 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) != 298 RISCVVType::isMaskAgnostic(NewVType)) 299 return false; 300 return true; 301 } 302 303 /// Return the fields and properties demanded by the provided instruction. 304 DemandedFields getDemanded(const MachineInstr &MI, 305 const MachineRegisterInfo *MRI) { 306 // Warning: This function has to work on both the lowered (i.e. post 307 // emitVSETVLIs) and pre-lowering forms. The main implication of this is 308 // that it can't use the value of a SEW, VL, or Policy operand as they might 309 // be stale after lowering. 310 311 // Most instructions don't use any of these subfeilds. 312 DemandedFields Res; 313 // Start conservative if registers are used 314 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL)) 315 Res.demandVL(); 316 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE)) 317 Res.demandVTYPE(); 318 // Start conservative on the unlowered form too 319 uint64_t TSFlags = MI.getDesc().TSFlags; 320 if (RISCVII::hasSEWOp(TSFlags)) { 321 Res.demandVTYPE(); 322 if (RISCVII::hasVLOp(TSFlags)) 323 Res.demandVL(); 324 325 // Behavior is independent of mask policy. 326 if (!RISCVII::usesMaskPolicy(TSFlags)) 327 Res.MaskPolicy = false; 328 } 329 330 // Loads and stores with implicit EEW do not demand SEW or LMUL directly. 331 // They instead demand the ratio of the two which is used in computing 332 // EMUL, but which allows us the flexibility to change SEW and LMUL 333 // provided we don't change the ratio. 334 // Note: We assume that the instructions initial SEW is the EEW encoded 335 // in the opcode. This is asserted when constructing the VSETVLIInfo. 336 if (getEEWForLoadStore(MI)) { 337 Res.SEW = DemandedFields::SEWNone; 338 Res.LMUL = false; 339 } 340 341 // Store instructions don't use the policy fields. 342 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) { 343 Res.TailPolicy = false; 344 Res.MaskPolicy = false; 345 } 346 347 // If this is a mask reg operation, it only cares about VLMAX. 348 // TODO: Possible extensions to this logic 349 // * Probably ok if available VLMax is larger than demanded 350 // * The policy bits can probably be ignored.. 351 if (isMaskRegOp(MI)) { 352 Res.SEW = DemandedFields::SEWNone; 353 Res.LMUL = false; 354 } 355 356 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0. 357 if (isScalarMoveInstr(MI)) { 358 Res.LMUL = false; 359 Res.SEWLMULRatio = false; 360 Res.VLAny = false; 361 // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't 362 // need to preserve any other bits and are thus compatible with any larger, 363 // etype and can disregard policy bits. Warning: It's tempting to try doing 364 // this for any tail agnostic operation, but we can't as TA requires 365 // tail lanes to either be the original value or -1. We are writing 366 // unknown bits to the lanes here. 367 if (hasUndefinedMergeOp(MI, *MRI)) { 368 Res.SEW = DemandedFields::SEWGreaterThanOrEqual; 369 Res.TailPolicy = false; 370 } 371 } 372 373 return Res; 374 } 375 376 /// Defines the abstract state with which the forward dataflow models the 377 /// values of the VL and VTYPE registers after insertion. 378 class VSETVLIInfo { 379 union { 380 Register AVLReg; 381 unsigned AVLImm; 382 }; 383 384 enum : uint8_t { 385 Uninitialized, 386 AVLIsReg, 387 AVLIsImm, 388 Unknown, 389 } State = Uninitialized; 390 391 // Fields from VTYPE. 392 RISCVII::VLMUL VLMul = RISCVII::LMUL_1; 393 uint8_t SEW = 0; 394 uint8_t TailAgnostic : 1; 395 uint8_t MaskAgnostic : 1; 396 uint8_t SEWLMULRatioOnly : 1; 397 398 public: 399 VSETVLIInfo() 400 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), 401 SEWLMULRatioOnly(false) {} 402 403 static VSETVLIInfo getUnknown() { 404 VSETVLIInfo Info; 405 Info.setUnknown(); 406 return Info; 407 } 408 409 bool isValid() const { return State != Uninitialized; } 410 void setUnknown() { State = Unknown; } 411 bool isUnknown() const { return State == Unknown; } 412 413 void setAVLReg(Register Reg) { 414 AVLReg = Reg; 415 State = AVLIsReg; 416 } 417 418 void setAVLImm(unsigned Imm) { 419 AVLImm = Imm; 420 State = AVLIsImm; 421 } 422 423 bool hasAVLImm() const { return State == AVLIsImm; } 424 bool hasAVLReg() const { return State == AVLIsReg; } 425 Register getAVLReg() const { 426 assert(hasAVLReg()); 427 return AVLReg; 428 } 429 unsigned getAVLImm() const { 430 assert(hasAVLImm()); 431 return AVLImm; 432 } 433 434 unsigned getSEW() const { return SEW; } 435 RISCVII::VLMUL getVLMUL() const { return VLMul; } 436 437 bool hasNonZeroAVL(const MachineRegisterInfo &MRI) const { 438 if (hasAVLImm()) 439 return getAVLImm() > 0; 440 if (hasAVLReg()) { 441 if (getAVLReg() == RISCV::X0) 442 return true; 443 if (MachineInstr *MI = MRI.getVRegDef(getAVLReg()); 444 MI && MI->getOpcode() == RISCV::ADDI && 445 MI->getOperand(1).isReg() && MI->getOperand(2).isImm() && 446 MI->getOperand(1).getReg() == RISCV::X0 && 447 MI->getOperand(2).getImm() != 0) 448 return true; 449 return false; 450 } 451 return false; 452 } 453 454 bool hasEquallyZeroAVL(const VSETVLIInfo &Other, 455 const MachineRegisterInfo &MRI) const { 456 if (hasSameAVL(Other)) 457 return true; 458 return (hasNonZeroAVL(MRI) && Other.hasNonZeroAVL(MRI)); 459 } 460 461 bool hasSameAVL(const VSETVLIInfo &Other) const { 462 if (hasAVLReg() && Other.hasAVLReg()) 463 return getAVLReg() == Other.getAVLReg(); 464 465 if (hasAVLImm() && Other.hasAVLImm()) 466 return getAVLImm() == Other.getAVLImm(); 467 468 return false; 469 } 470 471 void setVTYPE(unsigned VType) { 472 assert(isValid() && !isUnknown() && 473 "Can't set VTYPE for uninitialized or unknown"); 474 VLMul = RISCVVType::getVLMUL(VType); 475 SEW = RISCVVType::getSEW(VType); 476 TailAgnostic = RISCVVType::isTailAgnostic(VType); 477 MaskAgnostic = RISCVVType::isMaskAgnostic(VType); 478 } 479 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) { 480 assert(isValid() && !isUnknown() && 481 "Can't set VTYPE for uninitialized or unknown"); 482 VLMul = L; 483 SEW = S; 484 TailAgnostic = TA; 485 MaskAgnostic = MA; 486 } 487 488 unsigned encodeVTYPE() const { 489 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && 490 "Can't encode VTYPE for uninitialized or unknown"); 491 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 492 } 493 494 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } 495 496 bool hasSameVTYPE(const VSETVLIInfo &Other) const { 497 assert(isValid() && Other.isValid() && 498 "Can't compare invalid VSETVLIInfos"); 499 assert(!isUnknown() && !Other.isUnknown() && 500 "Can't compare VTYPE in unknown state"); 501 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && 502 "Can't compare when only LMUL/SEW ratio is valid."); 503 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == 504 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, 505 Other.MaskAgnostic); 506 } 507 508 unsigned getSEWLMULRatio() const { 509 assert(isValid() && !isUnknown() && 510 "Can't use VTYPE for uninitialized or unknown"); 511 return RISCVVType::getSEWLMULRatio(SEW, VLMul); 512 } 513 514 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. 515 // Note that having the same VLMAX ensures that both share the same 516 // function from AVL to VL; that is, they must produce the same VL value 517 // for any given AVL value. 518 bool hasSameVLMAX(const VSETVLIInfo &Other) const { 519 assert(isValid() && Other.isValid() && 520 "Can't compare invalid VSETVLIInfos"); 521 assert(!isUnknown() && !Other.isUnknown() && 522 "Can't compare VTYPE in unknown state"); 523 return getSEWLMULRatio() == Other.getSEWLMULRatio(); 524 } 525 526 bool hasCompatibleVTYPE(const DemandedFields &Used, 527 const VSETVLIInfo &Require) const { 528 return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used); 529 } 530 531 // Determine whether the vector instructions requirements represented by 532 // Require are compatible with the previous vsetvli instruction represented 533 // by this. MI is the instruction whose requirements we're considering. 534 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require, 535 const MachineRegisterInfo &MRI) const { 536 assert(isValid() && Require.isValid() && 537 "Can't compare invalid VSETVLIInfos"); 538 assert(!Require.SEWLMULRatioOnly && 539 "Expected a valid VTYPE for instruction!"); 540 // Nothing is compatible with Unknown. 541 if (isUnknown() || Require.isUnknown()) 542 return false; 543 544 // If only our VLMAX ratio is valid, then this isn't compatible. 545 if (SEWLMULRatioOnly) 546 return false; 547 548 // If the instruction doesn't need an AVLReg and the SEW matches, consider 549 // it compatible. 550 if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister) 551 if (SEW == Require.SEW) 552 return true; 553 554 if (Used.VLAny && !hasSameAVL(Require)) 555 return false; 556 557 if (Used.VLZeroness && !hasEquallyZeroAVL(Require, MRI)) 558 return false; 559 560 return hasCompatibleVTYPE(Used, Require); 561 } 562 563 bool operator==(const VSETVLIInfo &Other) const { 564 // Uninitialized is only equal to another Uninitialized. 565 if (!isValid()) 566 return !Other.isValid(); 567 if (!Other.isValid()) 568 return !isValid(); 569 570 // Unknown is only equal to another Unknown. 571 if (isUnknown()) 572 return Other.isUnknown(); 573 if (Other.isUnknown()) 574 return isUnknown(); 575 576 if (!hasSameAVL(Other)) 577 return false; 578 579 // If the SEWLMULRatioOnly bits are different, then they aren't equal. 580 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly) 581 return false; 582 583 // If only the VLMAX is valid, check that it is the same. 584 if (SEWLMULRatioOnly) 585 return hasSameVLMAX(Other); 586 587 // If the full VTYPE is valid, check that it is the same. 588 return hasSameVTYPE(Other); 589 } 590 591 bool operator!=(const VSETVLIInfo &Other) const { 592 return !(*this == Other); 593 } 594 595 // Calculate the VSETVLIInfo visible to a block assuming this and Other are 596 // both predecessors. 597 VSETVLIInfo intersect(const VSETVLIInfo &Other) const { 598 // If the new value isn't valid, ignore it. 599 if (!Other.isValid()) 600 return *this; 601 602 // If this value isn't valid, this must be the first predecessor, use it. 603 if (!isValid()) 604 return Other; 605 606 // If either is unknown, the result is unknown. 607 if (isUnknown() || Other.isUnknown()) 608 return VSETVLIInfo::getUnknown(); 609 610 // If we have an exact, match return this. 611 if (*this == Other) 612 return *this; 613 614 // Not an exact match, but maybe the AVL and VLMAX are the same. If so, 615 // return an SEW/LMUL ratio only value. 616 if (hasSameAVL(Other) && hasSameVLMAX(Other)) { 617 VSETVLIInfo MergeInfo = *this; 618 MergeInfo.SEWLMULRatioOnly = true; 619 return MergeInfo; 620 } 621 622 // Otherwise the result is unknown. 623 return VSETVLIInfo::getUnknown(); 624 } 625 626 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 627 /// Support for debugging, callable in GDB: V->dump() 628 LLVM_DUMP_METHOD void dump() const { 629 print(dbgs()); 630 dbgs() << "\n"; 631 } 632 633 /// Implement operator<<. 634 /// @{ 635 void print(raw_ostream &OS) const { 636 OS << "{"; 637 if (!isValid()) 638 OS << "Uninitialized"; 639 if (isUnknown()) 640 OS << "unknown"; 641 if (hasAVLReg()) 642 OS << "AVLReg=" << (unsigned)AVLReg; 643 if (hasAVLImm()) 644 OS << "AVLImm=" << (unsigned)AVLImm; 645 OS << ", " 646 << "VLMul=" << (unsigned)VLMul << ", " 647 << "SEW=" << (unsigned)SEW << ", " 648 << "TailAgnostic=" << (bool)TailAgnostic << ", " 649 << "MaskAgnostic=" << (bool)MaskAgnostic << ", " 650 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}"; 651 } 652 #endif 653 }; 654 655 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 656 LLVM_ATTRIBUTE_USED 657 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) { 658 V.print(OS); 659 return OS; 660 } 661 #endif 662 663 struct BlockData { 664 // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers 665 // made by this block. Calculated in Phase 1. 666 VSETVLIInfo Change; 667 668 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this 669 // block. Calculated in Phase 2. 670 VSETVLIInfo Exit; 671 672 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor 673 // blocks. Calculated in Phase 2, and used by Phase 3. 674 VSETVLIInfo Pred; 675 676 // Keeps track of whether the block is already in the queue. 677 bool InQueue = false; 678 679 BlockData() = default; 680 }; 681 682 class RISCVInsertVSETVLI : public MachineFunctionPass { 683 const TargetInstrInfo *TII; 684 MachineRegisterInfo *MRI; 685 686 std::vector<BlockData> BlockInfo; 687 std::queue<const MachineBasicBlock *> WorkList; 688 689 public: 690 static char ID; 691 692 RISCVInsertVSETVLI() : MachineFunctionPass(ID) { 693 initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry()); 694 } 695 bool runOnMachineFunction(MachineFunction &MF) override; 696 697 void getAnalysisUsage(AnalysisUsage &AU) const override { 698 AU.setPreservesCFG(); 699 MachineFunctionPass::getAnalysisUsage(AU); 700 } 701 702 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } 703 704 private: 705 bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require, 706 const VSETVLIInfo &CurInfo) const; 707 bool needVSETVLIPHI(const VSETVLIInfo &Require, 708 const MachineBasicBlock &MBB) const; 709 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, 710 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); 711 void insertVSETVLI(MachineBasicBlock &MBB, 712 MachineBasicBlock::iterator InsertPt, DebugLoc DL, 713 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); 714 715 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI); 716 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI); 717 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB); 718 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); 719 void emitVSETVLIs(MachineBasicBlock &MBB); 720 void doLocalPostpass(MachineBasicBlock &MBB); 721 void doPRE(MachineBasicBlock &MBB); 722 void insertReadVL(MachineBasicBlock &MBB); 723 }; 724 725 } // end anonymous namespace 726 727 char RISCVInsertVSETVLI::ID = 0; 728 729 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, 730 false, false) 731 732 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, 733 const MachineRegisterInfo *MRI) { 734 VSETVLIInfo InstrInfo; 735 736 bool TailAgnostic = true; 737 bool MaskAgnostic = true; 738 if (!hasUndefinedMergeOp(MI, *MRI)) { 739 // Start with undisturbed. 740 TailAgnostic = false; 741 MaskAgnostic = false; 742 743 // If there is a policy operand, use it. 744 if (RISCVII::hasVecPolicyOp(TSFlags)) { 745 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); 746 uint64_t Policy = Op.getImm(); 747 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && 748 "Invalid Policy Value"); 749 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; 750 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; 751 } 752 753 // Some pseudo instructions force a tail agnostic policy despite having a 754 // tied def. 755 if (RISCVII::doesForceTailAgnostic(TSFlags)) 756 TailAgnostic = true; 757 758 if (!RISCVII::usesMaskPolicy(TSFlags)) 759 MaskAgnostic = true; 760 } 761 762 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); 763 764 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); 765 // A Log2SEW of 0 is an operation on mask registers only. 766 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 767 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 768 769 if (RISCVII::hasVLOp(TSFlags)) { 770 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 771 if (VLOp.isImm()) { 772 int64_t Imm = VLOp.getImm(); 773 // Conver the VLMax sentintel to X0 register. 774 if (Imm == RISCV::VLMaxSentinel) 775 InstrInfo.setAVLReg(RISCV::X0); 776 else 777 InstrInfo.setAVLImm(Imm); 778 } else { 779 InstrInfo.setAVLReg(VLOp.getReg()); 780 } 781 } else { 782 InstrInfo.setAVLReg(RISCV::NoRegister); 783 } 784 #ifndef NDEBUG 785 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) { 786 assert(SEW == EEW && "Initial SEW doesn't match expected EEW"); 787 } 788 #endif 789 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 790 791 return InstrInfo; 792 } 793 794 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, 795 const VSETVLIInfo &Info, 796 const VSETVLIInfo &PrevInfo) { 797 DebugLoc DL = MI.getDebugLoc(); 798 insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo); 799 } 800 801 // Return a VSETVLIInfo representing the changes made by this VSETVLI or 802 // VSETIVLI instruction. 803 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { 804 VSETVLIInfo NewInfo; 805 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { 806 NewInfo.setAVLImm(MI.getOperand(1).getImm()); 807 } else { 808 assert(MI.getOpcode() == RISCV::PseudoVSETVLI || 809 MI.getOpcode() == RISCV::PseudoVSETVLIX0); 810 Register AVLReg = MI.getOperand(1).getReg(); 811 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && 812 "Can't handle X0, X0 vsetvli yet"); 813 NewInfo.setAVLReg(AVLReg); 814 } 815 NewInfo.setVTYPE(MI.getOperand(2).getImm()); 816 817 return NewInfo; 818 } 819 820 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, 821 MachineBasicBlock::iterator InsertPt, DebugLoc DL, 822 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { 823 824 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) { 825 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same 826 // VLMAX. 827 if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { 828 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 829 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 830 .addReg(RISCV::X0, RegState::Kill) 831 .addImm(Info.encodeVTYPE()) 832 .addReg(RISCV::VL, RegState::Implicit); 833 return; 834 } 835 836 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If 837 // it has the same VLMAX we want and the last VL/VTYPE we observed is the 838 // same, we can use the X0, X0 form. 839 if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg() && 840 Info.getAVLReg().isVirtual()) { 841 if (MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg())) { 842 if (isVectorConfigInstr(*DefMI)) { 843 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 844 if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) { 845 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 846 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 847 .addReg(RISCV::X0, RegState::Kill) 848 .addImm(Info.encodeVTYPE()) 849 .addReg(RISCV::VL, RegState::Implicit); 850 return; 851 } 852 } 853 } 854 } 855 } 856 857 if (Info.hasAVLImm()) { 858 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) 859 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 860 .addImm(Info.getAVLImm()) 861 .addImm(Info.encodeVTYPE()); 862 return; 863 } 864 865 Register AVLReg = Info.getAVLReg(); 866 if (AVLReg == RISCV::NoRegister) { 867 // We can only use x0, x0 if there's no chance of the vtype change causing 868 // the previous vl to become invalid. 869 if (PrevInfo.isValid() && !PrevInfo.isUnknown() && 870 Info.hasSameVLMAX(PrevInfo)) { 871 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 872 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 873 .addReg(RISCV::X0, RegState::Kill) 874 .addImm(Info.encodeVTYPE()) 875 .addReg(RISCV::VL, RegState::Implicit); 876 return; 877 } 878 // Otherwise use an AVL of 0 to avoid depending on previous vl. 879 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) 880 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 881 .addImm(0) 882 .addImm(Info.encodeVTYPE()); 883 return; 884 } 885 886 if (AVLReg.isVirtual()) 887 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass); 888 889 // Use X0 as the DestReg unless AVLReg is X0. We also need to change the 890 // opcode if the AVLReg is X0 as they have different register classes for 891 // the AVL operand. 892 Register DestReg = RISCV::X0; 893 unsigned Opcode = RISCV::PseudoVSETVLI; 894 if (AVLReg == RISCV::X0) { 895 DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); 896 Opcode = RISCV::PseudoVSETVLIX0; 897 } 898 BuildMI(MBB, InsertPt, DL, TII->get(Opcode)) 899 .addReg(DestReg, RegState::Define | RegState::Dead) 900 .addReg(AVLReg) 901 .addImm(Info.encodeVTYPE()); 902 } 903 904 static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) { 905 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL); 906 return Fractional || LMul == 1; 907 } 908 909 /// Return true if a VSETVLI is required to transition from CurInfo to Require 910 /// before MI. 911 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, 912 const VSETVLIInfo &Require, 913 const VSETVLIInfo &CurInfo) const { 914 assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI)); 915 916 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) 917 return true; 918 919 DemandedFields Used = getDemanded(MI, MRI); 920 921 // A slidedown/slideup with an *undefined* merge op can freely clobber 922 // elements not copied from the source vector (e.g. masked off, tail, or 923 // slideup's prefix). Notes: 924 // * We can't modify SEW here since the slide amount is in units of SEW. 925 // * VL=1 is special only because we have existing support for zero vs 926 // non-zero VL. We could generalize this if we had a VL > C predicate. 927 // * The LMUL1 restriction is for machines whose latency may depend on VL. 928 // * As above, this is only legal for tail "undefined" not "agnostic". 929 if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 && 930 isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) { 931 Used.VLAny = false; 932 Used.VLZeroness = true; 933 Used.LMUL = false; 934 Used.TailPolicy = false; 935 } 936 937 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same 938 // semantically as vmv.s.x. This is particularly useful since we don't have an 939 // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place. 940 // Since a splat is non-constant time in LMUL, we do need to be careful to not 941 // increase the number of active vector registers (unlike for vmv.s.x.) 942 if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 && 943 isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) { 944 Used.LMUL = false; 945 Used.SEWLMULRatio = false; 946 Used.VLAny = false; 947 Used.SEW = DemandedFields::SEWGreaterThanOrEqual; 948 Used.TailPolicy = false; 949 } 950 951 if (CurInfo.isCompatible(Used, Require, *MRI)) 952 return false; 953 954 // We didn't find a compatible value. If our AVL is a virtual register, 955 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need 956 // and the last VL/VTYPE we observed is the same, we don't need a 957 // VSETVLI here. 958 if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() && 959 CurInfo.hasCompatibleVTYPE(Used, Require)) { 960 if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { 961 if (isVectorConfigInstr(*DefMI)) { 962 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 963 if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo)) 964 return false; 965 } 966 } 967 } 968 969 return true; 970 } 971 972 // Given an incoming state reaching MI, modifies that state so that it is minimally 973 // compatible with MI. The resulting state is guaranteed to be semantically legal 974 // for MI, but may not be the state requested by MI. 975 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) { 976 uint64_t TSFlags = MI.getDesc().TSFlags; 977 if (!RISCVII::hasSEWOp(TSFlags)) 978 return; 979 980 const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); 981 if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info)) 982 return; 983 984 const VSETVLIInfo PrevInfo = Info; 985 Info = NewInfo; 986 987 if (!RISCVII::hasVLOp(TSFlags)) 988 return; 989 990 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and 991 // VL > 0. We can discard the user requested AVL and just use the last 992 // one if we can prove it equally zero. This removes a vsetvli entirely 993 // if the types match or allows use of cheaper avl preserving variant 994 // if VLMAX doesn't change. If VLMAX might change, we couldn't use 995 // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to 996 // prevent extending live range of an avl register operand. 997 // TODO: We can probably relax this for immediates. 998 if (isScalarMoveInstr(MI) && PrevInfo.isValid() && 999 PrevInfo.hasEquallyZeroAVL(Info, *MRI) && 1000 Info.hasSameVLMAX(PrevInfo)) { 1001 if (PrevInfo.hasAVLImm()) 1002 Info.setAVLImm(PrevInfo.getAVLImm()); 1003 else 1004 Info.setAVLReg(PrevInfo.getAVLReg()); 1005 return; 1006 } 1007 1008 // If AVL is defined by a vsetvli with the same VLMAX, we can 1009 // replace the AVL operand with the AVL of the defining vsetvli. 1010 // We avoid general register AVLs to avoid extending live ranges 1011 // without being sure we can kill the original source reg entirely. 1012 if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual()) 1013 return; 1014 MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg()); 1015 if (!DefMI || !isVectorConfigInstr(*DefMI)) 1016 return; 1017 1018 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 1019 if (DefInfo.hasSameVLMAX(Info) && 1020 (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) { 1021 if (DefInfo.hasAVLImm()) 1022 Info.setAVLImm(DefInfo.getAVLImm()); 1023 else 1024 Info.setAVLReg(DefInfo.getAVLReg()); 1025 return; 1026 } 1027 } 1028 1029 // Given a state with which we evaluated MI (see transferBefore above for why 1030 // this might be different that the state MI requested), modify the state to 1031 // reflect the changes MI might make. 1032 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) { 1033 if (isVectorConfigInstr(MI)) { 1034 Info = getInfoForVSETVLI(MI); 1035 return; 1036 } 1037 1038 if (RISCV::isFaultFirstLoad(MI)) { 1039 // Update AVL to vl-output of the fault first load. 1040 Info.setAVLReg(MI.getOperand(1).getReg()); 1041 return; 1042 } 1043 1044 // If this is something that updates VL/VTYPE that we don't know about, set 1045 // the state to unknown. 1046 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 1047 MI.modifiesRegister(RISCV::VTYPE)) 1048 Info = VSETVLIInfo::getUnknown(); 1049 } 1050 1051 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { 1052 bool HadVectorOp = false; 1053 1054 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1055 BBInfo.Change = BBInfo.Pred; 1056 for (const MachineInstr &MI : MBB) { 1057 transferBefore(BBInfo.Change, MI); 1058 1059 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags)) 1060 HadVectorOp = true; 1061 1062 transferAfter(BBInfo.Change, MI); 1063 } 1064 1065 return HadVectorOp; 1066 } 1067 1068 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { 1069 1070 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1071 1072 BBInfo.InQueue = false; 1073 1074 // Start with the previous entry so that we keep the most conservative state 1075 // we have ever found. 1076 VSETVLIInfo InInfo = BBInfo.Pred; 1077 if (MBB.pred_empty()) { 1078 // There are no predecessors, so use the default starting status. 1079 InInfo.setUnknown(); 1080 } else { 1081 for (MachineBasicBlock *P : MBB.predecessors()) 1082 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); 1083 } 1084 1085 // If we don't have any valid predecessor value, wait until we do. 1086 if (!InInfo.isValid()) 1087 return; 1088 1089 // If no change, no need to rerun block 1090 if (InInfo == BBInfo.Pred) 1091 return; 1092 1093 BBInfo.Pred = InInfo; 1094 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) 1095 << " changed to " << BBInfo.Pred << "\n"); 1096 1097 // Note: It's tempting to cache the state changes here, but due to the 1098 // compatibility checks performed a blocks output state can change based on 1099 // the input state. To cache, we'd have to add logic for finding 1100 // never-compatible state changes. 1101 computeVLVTYPEChanges(MBB); 1102 VSETVLIInfo TmpStatus = BBInfo.Change; 1103 1104 // If the new exit value matches the old exit value, we don't need to revisit 1105 // any blocks. 1106 if (BBInfo.Exit == TmpStatus) 1107 return; 1108 1109 BBInfo.Exit = TmpStatus; 1110 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB) 1111 << " changed to " << BBInfo.Exit << "\n"); 1112 1113 // Add the successors to the work list so we can propagate the changed exit 1114 // status. 1115 for (MachineBasicBlock *S : MBB.successors()) 1116 if (!BlockInfo[S->getNumber()].InQueue) { 1117 BlockInfo[S->getNumber()].InQueue = true; 1118 WorkList.push(S); 1119 } 1120 } 1121 1122 // If we weren't able to prove a vsetvli was directly unneeded, it might still 1123 // be unneeded if the AVL is a phi node where all incoming values are VL 1124 // outputs from the last VSETVLI in their respective basic blocks. 1125 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, 1126 const MachineBasicBlock &MBB) const { 1127 if (DisableInsertVSETVLPHIOpt) 1128 return true; 1129 1130 if (!Require.hasAVLReg()) 1131 return true; 1132 1133 Register AVLReg = Require.getAVLReg(); 1134 if (!AVLReg.isVirtual()) 1135 return true; 1136 1137 // We need the AVL to be produce by a PHI node in this basic block. 1138 MachineInstr *PHI = MRI->getVRegDef(AVLReg); 1139 if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB) 1140 return true; 1141 1142 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps; 1143 PHIOp += 2) { 1144 Register InReg = PHI->getOperand(PHIOp).getReg(); 1145 MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB(); 1146 const BlockData &PBBInfo = BlockInfo[PBB->getNumber()]; 1147 // If the exit from the predecessor has the VTYPE we are looking for 1148 // we might be able to avoid a VSETVLI. 1149 if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require)) 1150 return true; 1151 1152 // We need the PHI input to the be the output of a VSET(I)VLI. 1153 MachineInstr *DefMI = MRI->getVRegDef(InReg); 1154 if (!DefMI || !isVectorConfigInstr(*DefMI)) 1155 return true; 1156 1157 // We found a VSET(I)VLI make sure it matches the output of the 1158 // predecessor block. 1159 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 1160 if (!DefInfo.hasSameAVL(PBBInfo.Exit) || 1161 !DefInfo.hasSameVTYPE(PBBInfo.Exit)) 1162 return true; 1163 } 1164 1165 // If all the incoming values to the PHI checked out, we don't need 1166 // to insert a VSETVLI. 1167 return false; 1168 } 1169 1170 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { 1171 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred; 1172 // Track whether the prefix of the block we've scanned is transparent 1173 // (meaning has not yet changed the abstract state). 1174 bool PrefixTransparent = true; 1175 for (MachineInstr &MI : MBB) { 1176 const VSETVLIInfo PrevInfo = CurInfo; 1177 transferBefore(CurInfo, MI); 1178 1179 // If this is an explicit VSETVLI or VSETIVLI, update our state. 1180 if (isVectorConfigInstr(MI)) { 1181 // Conservatively, mark the VL and VTYPE as live. 1182 assert(MI.getOperand(3).getReg() == RISCV::VL && 1183 MI.getOperand(4).getReg() == RISCV::VTYPE && 1184 "Unexpected operands where VL and VTYPE should be"); 1185 MI.getOperand(3).setIsDead(false); 1186 MI.getOperand(4).setIsDead(false); 1187 PrefixTransparent = false; 1188 } 1189 1190 uint64_t TSFlags = MI.getDesc().TSFlags; 1191 if (RISCVII::hasSEWOp(TSFlags)) { 1192 if (PrevInfo != CurInfo) { 1193 // If this is the first implicit state change, and the state change 1194 // requested can be proven to produce the same register contents, we 1195 // can skip emitting the actual state change and continue as if we 1196 // had since we know the GPR result of the implicit state change 1197 // wouldn't be used and VL/VTYPE registers are correct. Note that 1198 // we *do* need to model the state as if it changed as while the 1199 // register contents are unchanged, the abstract model can change. 1200 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB)) 1201 insertVSETVLI(MBB, MI, CurInfo, PrevInfo); 1202 PrefixTransparent = false; 1203 } 1204 1205 if (RISCVII::hasVLOp(TSFlags)) { 1206 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 1207 if (VLOp.isReg()) { 1208 // Erase the AVL operand from the instruction. 1209 VLOp.setReg(RISCV::NoRegister); 1210 VLOp.setIsKill(false); 1211 } 1212 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, 1213 /*isImp*/ true)); 1214 } 1215 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, 1216 /*isImp*/ true)); 1217 } 1218 1219 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 1220 MI.modifiesRegister(RISCV::VTYPE)) 1221 PrefixTransparent = false; 1222 1223 transferAfter(CurInfo, MI); 1224 } 1225 1226 // If we reach the end of the block and our current info doesn't match the 1227 // expected info, insert a vsetvli to correct. 1228 if (!UseStrictAsserts) { 1229 const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit; 1230 if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() && 1231 CurInfo != ExitInfo) { 1232 // Note there's an implicit assumption here that terminators never use 1233 // or modify VL or VTYPE. Also, fallthrough will return end(). 1234 auto InsertPt = MBB.getFirstInstrTerminator(); 1235 insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo, 1236 CurInfo); 1237 CurInfo = ExitInfo; 1238 } 1239 } 1240 1241 if (UseStrictAsserts && CurInfo.isValid()) { 1242 const auto &Info = BlockInfo[MBB.getNumber()]; 1243 if (CurInfo != Info.Exit) { 1244 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n"); 1245 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n"); 1246 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n"); 1247 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n"); 1248 } 1249 assert(CurInfo == Info.Exit && 1250 "InsertVSETVLI dataflow invariant violated"); 1251 } 1252 } 1253 1254 /// Return true if the VL value configured must be equal to the requested one. 1255 static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) { 1256 if (!Info.hasAVLImm()) 1257 // VLMAX is always the same value. 1258 // TODO: Could extend to other registers by looking at the associated vreg 1259 // def placement. 1260 return RISCV::X0 == Info.getAVLReg(); 1261 1262 unsigned AVL = Info.getAVLImm(); 1263 unsigned SEW = Info.getSEW(); 1264 unsigned AVLInBits = AVL * SEW; 1265 1266 unsigned LMul; 1267 bool Fractional; 1268 std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL()); 1269 1270 if (Fractional) 1271 return ST.getRealMinVLen() / LMul >= AVLInBits; 1272 return ST.getRealMinVLen() * LMul >= AVLInBits; 1273 } 1274 1275 /// Perform simple partial redundancy elimination of the VSETVLI instructions 1276 /// we're about to insert by looking for cases where we can PRE from the 1277 /// beginning of one block to the end of one of its predecessors. Specifically, 1278 /// this is geared to catch the common case of a fixed length vsetvl in a single 1279 /// block loop when it could execute once in the preheader instead. 1280 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { 1281 const MachineFunction &MF = *MBB.getParent(); 1282 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); 1283 1284 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) 1285 return; 1286 1287 MachineBasicBlock *UnavailablePred = nullptr; 1288 VSETVLIInfo AvailableInfo; 1289 for (MachineBasicBlock *P : MBB.predecessors()) { 1290 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; 1291 if (PredInfo.isUnknown()) { 1292 if (UnavailablePred) 1293 return; 1294 UnavailablePred = P; 1295 } else if (!AvailableInfo.isValid()) { 1296 AvailableInfo = PredInfo; 1297 } else if (AvailableInfo != PredInfo) { 1298 return; 1299 } 1300 } 1301 1302 // Unreachable, single pred, or full redundancy. Note that FRE is handled by 1303 // phase 3. 1304 if (!UnavailablePred || !AvailableInfo.isValid()) 1305 return; 1306 1307 // Critical edge - TODO: consider splitting? 1308 if (UnavailablePred->succ_size() != 1) 1309 return; 1310 1311 // If VL can be less than AVL, then we can't reduce the frequency of exec. 1312 if (!hasFixedResult(AvailableInfo, ST)) 1313 return; 1314 1315 // Model the effect of changing the input state of the block MBB to 1316 // AvailableInfo. We're looking for two issues here; one legality, 1317 // one profitability. 1318 // 1) If the block doesn't use some of the fields from VL or VTYPE, we 1319 // may hit the end of the block with a different end state. We can 1320 // not make this change without reflowing later blocks as well. 1321 // 2) If we don't actually remove a transition, inserting a vsetvli 1322 // into the predecessor block would be correct, but unprofitable. 1323 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred; 1324 VSETVLIInfo CurInfo = AvailableInfo; 1325 int TransitionsRemoved = 0; 1326 for (const MachineInstr &MI : MBB) { 1327 const VSETVLIInfo LastInfo = CurInfo; 1328 const VSETVLIInfo LastOldInfo = OldInfo; 1329 transferBefore(CurInfo, MI); 1330 transferBefore(OldInfo, MI); 1331 if (CurInfo == LastInfo) 1332 TransitionsRemoved++; 1333 if (LastOldInfo == OldInfo) 1334 TransitionsRemoved--; 1335 transferAfter(CurInfo, MI); 1336 transferAfter(OldInfo, MI); 1337 if (CurInfo == OldInfo) 1338 // Convergence. All transitions after this must match by construction. 1339 break; 1340 } 1341 if (CurInfo != OldInfo || TransitionsRemoved <= 0) 1342 // Issues 1 and 2 above 1343 return; 1344 1345 // Finally, update both data flow state and insert the actual vsetvli. 1346 // Doing both keeps the code in sync with the dataflow results, which 1347 // is critical for correctness of phase 3. 1348 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit; 1349 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " 1350 << UnavailablePred->getName() << " with state " 1351 << AvailableInfo << "\n"); 1352 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; 1353 BlockInfo[MBB.getNumber()].Pred = AvailableInfo; 1354 1355 // Note there's an implicit assumption here that terminators never use 1356 // or modify VL or VTYPE. Also, fallthrough will return end(). 1357 auto InsertPt = UnavailablePred->getFirstInstrTerminator(); 1358 insertVSETVLI(*UnavailablePred, InsertPt, 1359 UnavailablePred->findDebugLoc(InsertPt), 1360 AvailableInfo, OldExit); 1361 } 1362 1363 static void doUnion(DemandedFields &A, DemandedFields B) { 1364 A.VLAny |= B.VLAny; 1365 A.VLZeroness |= B.VLZeroness; 1366 A.SEW = std::max(A.SEW, B.SEW); 1367 A.LMUL |= B.LMUL; 1368 A.SEWLMULRatio |= B.SEWLMULRatio; 1369 A.TailPolicy |= B.TailPolicy; 1370 A.MaskPolicy |= B.MaskPolicy; 1371 } 1372 1373 static bool isNonZeroAVL(const MachineOperand &MO) { 1374 if (MO.isReg()) 1375 return RISCV::X0 == MO.getReg(); 1376 assert(MO.isImm()); 1377 return 0 != MO.getImm(); 1378 } 1379 1380 // Return true if we can mutate PrevMI to match MI without changing any the 1381 // fields which would be observed. 1382 static bool canMutatePriorConfig(const MachineInstr &PrevMI, 1383 const MachineInstr &MI, 1384 const DemandedFields &Used) { 1385 // If the VL values aren't equal, return false if either a) the former is 1386 // demanded, or b) we can't rewrite the former to be the later for 1387 // implementation reasons. 1388 if (!isVLPreservingConfig(MI)) { 1389 if (Used.VLAny) 1390 return false; 1391 1392 // TODO: Requires more care in the mutation... 1393 if (isVLPreservingConfig(PrevMI)) 1394 return false; 1395 1396 // We don't bother to handle the equally zero case here as it's largely 1397 // uninteresting. 1398 if (Used.VLZeroness && 1399 (!isNonZeroAVL(MI.getOperand(1)) || 1400 !isNonZeroAVL(PrevMI.getOperand(1)))) 1401 return false; 1402 1403 // TODO: Track whether the register is defined between 1404 // PrevMI and MI. 1405 if (MI.getOperand(1).isReg() && 1406 RISCV::X0 != MI.getOperand(1).getReg()) 1407 return false; 1408 1409 // TODO: We need to change the result register to allow this rewrite 1410 // without the result forming a vl preserving vsetvli which is not 1411 // a correct state merge. 1412 if (PrevMI.getOperand(0).getReg() == RISCV::X0 && 1413 MI.getOperand(1).isReg()) 1414 return false; 1415 } 1416 1417 if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm()) 1418 return false; 1419 1420 auto PriorVType = PrevMI.getOperand(2).getImm(); 1421 auto VType = MI.getOperand(2).getImm(); 1422 return areCompatibleVTYPEs(PriorVType, VType, Used); 1423 } 1424 1425 void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { 1426 MachineInstr *NextMI = nullptr; 1427 // We can have arbitrary code in successors, so VL and VTYPE 1428 // must be considered demanded. 1429 DemandedFields Used; 1430 Used.demandVL(); 1431 Used.demandVTYPE(); 1432 SmallVector<MachineInstr*> ToDelete; 1433 for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { 1434 1435 if (!isVectorConfigInstr(MI)) { 1436 doUnion(Used, getDemanded(MI, MRI)); 1437 continue; 1438 } 1439 1440 Register VRegDef = MI.getOperand(0).getReg(); 1441 if (VRegDef != RISCV::X0 && 1442 !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) 1443 Used.demandVL(); 1444 1445 if (NextMI) { 1446 if (!Used.usedVL() && !Used.usedVTYPE()) { 1447 ToDelete.push_back(&MI); 1448 // Leave NextMI unchanged 1449 continue; 1450 } else if (canMutatePriorConfig(MI, *NextMI, Used)) { 1451 if (!isVLPreservingConfig(*NextMI)) { 1452 if (NextMI->getOperand(1).isImm()) 1453 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm()); 1454 else 1455 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false); 1456 MI.setDesc(NextMI->getDesc()); 1457 } 1458 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm()); 1459 // Don't delete a vsetvli if its result might be used. 1460 Register NextVRefDef = NextMI->getOperand(0).getReg(); 1461 if (NextVRefDef == RISCV::X0 || 1462 (NextVRefDef.isVirtual() && MRI->use_nodbg_empty(NextVRefDef))) 1463 ToDelete.push_back(NextMI); 1464 // fallthrough 1465 } 1466 } 1467 NextMI = &MI; 1468 Used = getDemanded(MI, MRI); 1469 } 1470 1471 for (auto *MI : ToDelete) 1472 MI->eraseFromParent(); 1473 } 1474 1475 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { 1476 for (auto I = MBB.begin(), E = MBB.end(); I != E;) { 1477 MachineInstr &MI = *I++; 1478 if (RISCV::isFaultFirstLoad(MI)) { 1479 Register VLOutput = MI.getOperand(1).getReg(); 1480 if (!MRI->use_nodbg_empty(VLOutput)) 1481 BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL), 1482 VLOutput); 1483 // We don't use the vl output of the VLEFF/VLSEGFF anymore. 1484 MI.getOperand(1).setReg(RISCV::X0); 1485 } 1486 } 1487 } 1488 1489 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { 1490 // Skip if the vector extension is not enabled. 1491 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); 1492 if (!ST.hasVInstructions()) 1493 return false; 1494 1495 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n"); 1496 1497 TII = ST.getInstrInfo(); 1498 MRI = &MF.getRegInfo(); 1499 1500 assert(BlockInfo.empty() && "Expect empty block infos"); 1501 BlockInfo.resize(MF.getNumBlockIDs()); 1502 1503 bool HaveVectorOp = false; 1504 1505 // Phase 1 - determine how VL/VTYPE are affected by the each block. 1506 for (const MachineBasicBlock &MBB : MF) { 1507 HaveVectorOp |= computeVLVTYPEChanges(MBB); 1508 // Initial exit state is whatever change we found in the block. 1509 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1510 BBInfo.Exit = BBInfo.Change; 1511 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) 1512 << " is " << BBInfo.Exit << "\n"); 1513 1514 } 1515 1516 // If we didn't find any instructions that need VSETVLI, we're done. 1517 if (!HaveVectorOp) { 1518 BlockInfo.clear(); 1519 return false; 1520 } 1521 1522 // Phase 2 - determine the exit VL/VTYPE from each block. We add all 1523 // blocks to the list here, but will also add any that need to be revisited 1524 // during Phase 2 processing. 1525 for (const MachineBasicBlock &MBB : MF) { 1526 WorkList.push(&MBB); 1527 BlockInfo[MBB.getNumber()].InQueue = true; 1528 } 1529 while (!WorkList.empty()) { 1530 const MachineBasicBlock &MBB = *WorkList.front(); 1531 WorkList.pop(); 1532 computeIncomingVLVTYPE(MBB); 1533 } 1534 1535 // Perform partial redundancy elimination of vsetvli transitions. 1536 for (MachineBasicBlock &MBB : MF) 1537 doPRE(MBB); 1538 1539 // Phase 3 - add any vsetvli instructions needed in the block. Use the 1540 // Phase 2 information to avoid adding vsetvlis before the first vector 1541 // instruction in the block if the VL/VTYPE is satisfied by its 1542 // predecessors. 1543 for (MachineBasicBlock &MBB : MF) 1544 emitVSETVLIs(MBB); 1545 1546 // Now that all vsetvlis are explicit, go through and do block local 1547 // DSE and peephole based demanded fields based transforms. Note that 1548 // this *must* be done outside the main dataflow so long as we allow 1549 // any cross block analysis within the dataflow. We can't have both 1550 // demanded fields based mutation and non-local analysis in the 1551 // dataflow at the same time without introducing inconsistencies. 1552 for (MachineBasicBlock &MBB : MF) 1553 doLocalPostpass(MBB); 1554 1555 // Once we're fully done rewriting all the instructions, do a final pass 1556 // through to check for VSETVLIs which write to an unused destination. 1557 // For the non X0, X0 variant, we can replace the destination register 1558 // with X0 to reduce register pressure. This is really a generic 1559 // optimization which can be applied to any dead def (TODO: generalize). 1560 for (MachineBasicBlock &MBB : MF) { 1561 for (MachineInstr &MI : MBB) { 1562 if (MI.getOpcode() == RISCV::PseudoVSETVLI || 1563 MI.getOpcode() == RISCV::PseudoVSETIVLI) { 1564 Register VRegDef = MI.getOperand(0).getReg(); 1565 if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef)) 1566 MI.getOperand(0).setReg(RISCV::X0); 1567 } 1568 } 1569 } 1570 1571 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output 1572 // of VLEFF/VLSEGFF. 1573 for (MachineBasicBlock &MBB : MF) 1574 insertReadVL(MBB); 1575 1576 BlockInfo.clear(); 1577 return HaveVectorOp; 1578 } 1579 1580 /// Returns an instance of the Insert VSETVLI pass. 1581 FunctionPass *llvm::createRISCVInsertVSETVLIPass() { 1582 return new RISCVInsertVSETVLI(); 1583 } 1584