1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a function pass that inserts VSETVLI instructions where 10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL 11 // instructions. 12 // 13 // This pass consists of 3 phases: 14 // 15 // Phase 1 collects how each basic block affects VL/VTYPE. 16 // 17 // Phase 2 uses the information from phase 1 to do a data flow analysis to 18 // propagate the VL/VTYPE changes through the function. This gives us the 19 // VL/VTYPE at the start of each basic block. 20 // 21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from 22 // phase 2 is used to prevent inserting a VSETVLI before the first vector 23 // instruction in the block if possible. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "RISCV.h" 28 #include "RISCVSubtarget.h" 29 #include "llvm/CodeGen/LiveIntervals.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include <queue> 32 using namespace llvm; 33 34 #define DEBUG_TYPE "riscv-insert-vsetvli" 35 #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass" 36 37 static cl::opt<bool> DisableInsertVSETVLPHIOpt( 38 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, 39 cl::desc("Disable looking through phis when inserting vsetvlis.")); 40 41 static cl::opt<bool> UseStrictAsserts( 42 "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden, 43 cl::desc("Enable strict assertion checking for the dataflow algorithm")); 44 45 namespace { 46 47 static unsigned getVLOpNum(const MachineInstr &MI) { 48 return RISCVII::getVLOpNum(MI.getDesc()); 49 } 50 51 static unsigned getSEWOpNum(const MachineInstr &MI) { 52 return RISCVII::getSEWOpNum(MI.getDesc()); 53 } 54 55 static bool isVectorConfigInstr(const MachineInstr &MI) { 56 return MI.getOpcode() == RISCV::PseudoVSETVLI || 57 MI.getOpcode() == RISCV::PseudoVSETVLIX0 || 58 MI.getOpcode() == RISCV::PseudoVSETIVLI; 59 } 60 61 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves 62 /// VL and only sets VTYPE. 63 static bool isVLPreservingConfig(const MachineInstr &MI) { 64 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) 65 return false; 66 assert(RISCV::X0 == MI.getOperand(1).getReg()); 67 return RISCV::X0 == MI.getOperand(0).getReg(); 68 } 69 70 static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) { 71 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 72 default: 73 return false; 74 case RISCV::VFMV_S_F: 75 case RISCV::VFMV_V_F: 76 return true; 77 } 78 } 79 80 static bool isScalarExtractInstr(const MachineInstr &MI) { 81 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 82 default: 83 return false; 84 case RISCV::VMV_X_S: 85 case RISCV::VFMV_F_S: 86 return true; 87 } 88 } 89 90 static bool isScalarInsertInstr(const MachineInstr &MI) { 91 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 92 default: 93 return false; 94 case RISCV::VMV_S_X: 95 case RISCV::VFMV_S_F: 96 return true; 97 } 98 } 99 100 static bool isScalarSplatInstr(const MachineInstr &MI) { 101 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 102 default: 103 return false; 104 case RISCV::VMV_V_I: 105 case RISCV::VMV_V_X: 106 case RISCV::VFMV_V_F: 107 return true; 108 } 109 } 110 111 static bool isVSlideInstr(const MachineInstr &MI) { 112 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 113 default: 114 return false; 115 case RISCV::VSLIDEDOWN_VX: 116 case RISCV::VSLIDEDOWN_VI: 117 case RISCV::VSLIDEUP_VX: 118 case RISCV::VSLIDEUP_VI: 119 return true; 120 } 121 } 122 123 /// Get the EEW for a load or store instruction. Return std::nullopt if MI is 124 /// not a load or store which ignores SEW. 125 static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) { 126 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 127 default: 128 return std::nullopt; 129 case RISCV::VLE8_V: 130 case RISCV::VLSE8_V: 131 case RISCV::VSE8_V: 132 case RISCV::VSSE8_V: 133 return 8; 134 case RISCV::VLE16_V: 135 case RISCV::VLSE16_V: 136 case RISCV::VSE16_V: 137 case RISCV::VSSE16_V: 138 return 16; 139 case RISCV::VLE32_V: 140 case RISCV::VLSE32_V: 141 case RISCV::VSE32_V: 142 case RISCV::VSSE32_V: 143 return 32; 144 case RISCV::VLE64_V: 145 case RISCV::VLSE64_V: 146 case RISCV::VSE64_V: 147 case RISCV::VSSE64_V: 148 return 64; 149 } 150 } 151 152 static bool isNonZeroLoadImmediate(MachineInstr &MI) { 153 return MI.getOpcode() == RISCV::ADDI && 154 MI.getOperand(1).isReg() && MI.getOperand(2).isImm() && 155 MI.getOperand(1).getReg() == RISCV::X0 && 156 MI.getOperand(2).getImm() != 0; 157 } 158 159 /// Return true if this is an operation on mask registers. Note that 160 /// this includes both arithmetic/logical ops and load/store (vlm/vsm). 161 static bool isMaskRegOp(const MachineInstr &MI) { 162 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags)) 163 return false; 164 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); 165 // A Log2SEW of 0 is an operation on mask registers only. 166 return Log2SEW == 0; 167 } 168 169 /// Return true if the inactive elements in the result are entirely undefined. 170 /// Note that this is different from "agnostic" as defined by the vector 171 /// specification. Agnostic requires each lane to either be undisturbed, or 172 /// take the value -1; no other value is allowed. 173 static bool hasUndefinedMergeOp(const MachineInstr &MI, 174 const MachineRegisterInfo &MRI) { 175 176 unsigned UseOpIdx; 177 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) 178 // If there is no passthrough operand, then the pass through 179 // lanes are undefined. 180 return true; 181 182 // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose 183 // operands are solely IMPLICIT_DEFS, then the pass through lanes are 184 // undefined. 185 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 186 if (UseMO.getReg() == RISCV::NoRegister) 187 return true; 188 189 if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) { 190 if (UseMI->isImplicitDef()) 191 return true; 192 193 if (UseMI->isRegSequence()) { 194 for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) { 195 MachineInstr *SourceMI = MRI.getVRegDef(UseMI->getOperand(i).getReg()); 196 if (!SourceMI || !SourceMI->isImplicitDef()) 197 return false; 198 } 199 return true; 200 } 201 } 202 return false; 203 } 204 205 /// Which subfields of VL or VTYPE have values we need to preserve? 206 struct DemandedFields { 207 // Some unknown property of VL is used. If demanded, must preserve entire 208 // value. 209 bool VLAny = false; 210 // Only zero vs non-zero is used. If demanded, can change non-zero values. 211 bool VLZeroness = false; 212 // What properties of SEW we need to preserve. 213 enum : uint8_t { 214 SEWEqual = 3, // The exact value of SEW needs to be preserved. 215 SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater 216 // than or equal to the original value. 217 SEWGreaterThanOrEqualAndLessThan64 = 218 1, // SEW can be changed as long as it's greater 219 // than or equal to the original value, but must be less 220 // than 64. 221 SEWNone = 0 // We don't need to preserve SEW at all. 222 } SEW = SEWNone; 223 bool LMUL = false; 224 bool SEWLMULRatio = false; 225 bool TailPolicy = false; 226 bool MaskPolicy = false; 227 228 // Return true if any part of VTYPE was used 229 bool usedVTYPE() const { 230 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy; 231 } 232 233 // Return true if any property of VL was used 234 bool usedVL() { 235 return VLAny || VLZeroness; 236 } 237 238 // Mark all VTYPE subfields and properties as demanded 239 void demandVTYPE() { 240 SEW = SEWEqual; 241 LMUL = true; 242 SEWLMULRatio = true; 243 TailPolicy = true; 244 MaskPolicy = true; 245 } 246 247 // Mark all VL properties as demanded 248 void demandVL() { 249 VLAny = true; 250 VLZeroness = true; 251 } 252 253 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 254 /// Support for debugging, callable in GDB: V->dump() 255 LLVM_DUMP_METHOD void dump() const { 256 print(dbgs()); 257 dbgs() << "\n"; 258 } 259 260 /// Implement operator<<. 261 void print(raw_ostream &OS) const { 262 OS << "{"; 263 OS << "VLAny=" << VLAny << ", "; 264 OS << "VLZeroness=" << VLZeroness << ", "; 265 OS << "SEW="; 266 switch (SEW) { 267 case SEWEqual: 268 OS << "SEWEqual"; 269 break; 270 case SEWGreaterThanOrEqual: 271 OS << "SEWGreaterThanOrEqual"; 272 break; 273 case SEWGreaterThanOrEqualAndLessThan64: 274 OS << "SEWGreaterThanOrEqualAndLessThan64"; 275 break; 276 case SEWNone: 277 OS << "SEWNone"; 278 break; 279 }; 280 OS << ", "; 281 OS << "LMUL=" << LMUL << ", "; 282 OS << "SEWLMULRatio=" << SEWLMULRatio << ", "; 283 OS << "TailPolicy=" << TailPolicy << ", "; 284 OS << "MaskPolicy=" << MaskPolicy; 285 OS << "}"; 286 } 287 #endif 288 }; 289 290 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 291 LLVM_ATTRIBUTE_USED 292 inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) { 293 DF.print(OS); 294 return OS; 295 } 296 #endif 297 298 /// Return true if moving from CurVType to NewVType is 299 /// indistinguishable from the perspective of an instruction (or set 300 /// of instructions) which use only the Used subfields and properties. 301 static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, 302 const DemandedFields &Used) { 303 switch (Used.SEW) { 304 case DemandedFields::SEWNone: 305 break; 306 case DemandedFields::SEWEqual: 307 if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType)) 308 return false; 309 break; 310 case DemandedFields::SEWGreaterThanOrEqual: 311 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType)) 312 return false; 313 break; 314 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64: 315 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) || 316 RISCVVType::getSEW(NewVType) >= 64) 317 return false; 318 break; 319 } 320 321 if (Used.LMUL && 322 RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType)) 323 return false; 324 325 if (Used.SEWLMULRatio) { 326 auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType), 327 RISCVVType::getVLMUL(CurVType)); 328 auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType), 329 RISCVVType::getVLMUL(NewVType)); 330 if (Ratio1 != Ratio2) 331 return false; 332 } 333 334 if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) != 335 RISCVVType::isTailAgnostic(NewVType)) 336 return false; 337 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) != 338 RISCVVType::isMaskAgnostic(NewVType)) 339 return false; 340 return true; 341 } 342 343 /// Return the fields and properties demanded by the provided instruction. 344 DemandedFields getDemanded(const MachineInstr &MI, 345 const MachineRegisterInfo *MRI, 346 const RISCVSubtarget *ST) { 347 // Warning: This function has to work on both the lowered (i.e. post 348 // emitVSETVLIs) and pre-lowering forms. The main implication of this is 349 // that it can't use the value of a SEW, VL, or Policy operand as they might 350 // be stale after lowering. 351 352 // Most instructions don't use any of these subfeilds. 353 DemandedFields Res; 354 // Start conservative if registers are used 355 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL)) 356 Res.demandVL(); 357 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE)) 358 Res.demandVTYPE(); 359 // Start conservative on the unlowered form too 360 uint64_t TSFlags = MI.getDesc().TSFlags; 361 if (RISCVII::hasSEWOp(TSFlags)) { 362 Res.demandVTYPE(); 363 if (RISCVII::hasVLOp(TSFlags)) 364 Res.demandVL(); 365 366 // Behavior is independent of mask policy. 367 if (!RISCVII::usesMaskPolicy(TSFlags)) 368 Res.MaskPolicy = false; 369 } 370 371 // Loads and stores with implicit EEW do not demand SEW or LMUL directly. 372 // They instead demand the ratio of the two which is used in computing 373 // EMUL, but which allows us the flexibility to change SEW and LMUL 374 // provided we don't change the ratio. 375 // Note: We assume that the instructions initial SEW is the EEW encoded 376 // in the opcode. This is asserted when constructing the VSETVLIInfo. 377 if (getEEWForLoadStore(MI)) { 378 Res.SEW = DemandedFields::SEWNone; 379 Res.LMUL = false; 380 } 381 382 // Store instructions don't use the policy fields. 383 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) { 384 Res.TailPolicy = false; 385 Res.MaskPolicy = false; 386 } 387 388 // If this is a mask reg operation, it only cares about VLMAX. 389 // TODO: Possible extensions to this logic 390 // * Probably ok if available VLMax is larger than demanded 391 // * The policy bits can probably be ignored.. 392 if (isMaskRegOp(MI)) { 393 Res.SEW = DemandedFields::SEWNone; 394 Res.LMUL = false; 395 } 396 397 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0. 398 if (isScalarInsertInstr(MI)) { 399 Res.LMUL = false; 400 Res.SEWLMULRatio = false; 401 Res.VLAny = false; 402 // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't 403 // need to preserve any other bits and are thus compatible with any larger, 404 // etype and can disregard policy bits. Warning: It's tempting to try doing 405 // this for any tail agnostic operation, but we can't as TA requires 406 // tail lanes to either be the original value or -1. We are writing 407 // unknown bits to the lanes here. 408 if (hasUndefinedMergeOp(MI, *MRI)) { 409 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64()) 410 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; 411 else 412 Res.SEW = DemandedFields::SEWGreaterThanOrEqual; 413 Res.TailPolicy = false; 414 } 415 } 416 417 // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW. 418 if (isScalarExtractInstr(MI)) { 419 assert(!RISCVII::hasVLOp(TSFlags)); 420 Res.LMUL = false; 421 Res.SEWLMULRatio = false; 422 Res.TailPolicy = false; 423 Res.MaskPolicy = false; 424 } 425 426 return Res; 427 } 428 429 /// Defines the abstract state with which the forward dataflow models the 430 /// values of the VL and VTYPE registers after insertion. 431 class VSETVLIInfo { 432 union { 433 Register AVLReg; 434 unsigned AVLImm; 435 }; 436 437 enum : uint8_t { 438 Uninitialized, 439 AVLIsReg, 440 AVLIsImm, 441 Unknown, 442 } State = Uninitialized; 443 444 // Fields from VTYPE. 445 RISCVII::VLMUL VLMul = RISCVII::LMUL_1; 446 uint8_t SEW = 0; 447 uint8_t TailAgnostic : 1; 448 uint8_t MaskAgnostic : 1; 449 uint8_t SEWLMULRatioOnly : 1; 450 451 public: 452 VSETVLIInfo() 453 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), 454 SEWLMULRatioOnly(false) {} 455 456 static VSETVLIInfo getUnknown() { 457 VSETVLIInfo Info; 458 Info.setUnknown(); 459 return Info; 460 } 461 462 bool isValid() const { return State != Uninitialized; } 463 void setUnknown() { State = Unknown; } 464 bool isUnknown() const { return State == Unknown; } 465 466 void setAVLReg(Register Reg) { 467 AVLReg = Reg; 468 State = AVLIsReg; 469 } 470 471 void setAVLImm(unsigned Imm) { 472 AVLImm = Imm; 473 State = AVLIsImm; 474 } 475 476 bool hasAVLImm() const { return State == AVLIsImm; } 477 bool hasAVLReg() const { return State == AVLIsReg; } 478 Register getAVLReg() const { 479 assert(hasAVLReg()); 480 return AVLReg; 481 } 482 unsigned getAVLImm() const { 483 assert(hasAVLImm()); 484 return AVLImm; 485 } 486 487 void setAVL(VSETVLIInfo Info) { 488 assert(Info.isValid()); 489 if (Info.isUnknown()) 490 setUnknown(); 491 else if (Info.hasAVLReg()) 492 setAVLReg(Info.getAVLReg()); 493 else { 494 assert(Info.hasAVLImm()); 495 setAVLImm(Info.getAVLImm()); 496 } 497 } 498 499 unsigned getSEW() const { return SEW; } 500 RISCVII::VLMUL getVLMUL() const { return VLMul; } 501 bool getTailAgnostic() const { return TailAgnostic; } 502 bool getMaskAgnostic() const { return MaskAgnostic; } 503 504 bool hasNonZeroAVL(const MachineRegisterInfo &MRI) const { 505 if (hasAVLImm()) 506 return getAVLImm() > 0; 507 if (hasAVLReg()) { 508 if (getAVLReg() == RISCV::X0) 509 return true; 510 if (MachineInstr *MI = MRI.getVRegDef(getAVLReg()); 511 MI && isNonZeroLoadImmediate(*MI)) 512 return true; 513 return false; 514 } 515 return false; 516 } 517 518 bool hasEquallyZeroAVL(const VSETVLIInfo &Other, 519 const MachineRegisterInfo &MRI) const { 520 if (hasSameAVL(Other)) 521 return true; 522 return (hasNonZeroAVL(MRI) && Other.hasNonZeroAVL(MRI)); 523 } 524 525 bool hasSameAVL(const VSETVLIInfo &Other) const { 526 if (hasAVLReg() && Other.hasAVLReg()) 527 return getAVLReg() == Other.getAVLReg(); 528 529 if (hasAVLImm() && Other.hasAVLImm()) 530 return getAVLImm() == Other.getAVLImm(); 531 532 return false; 533 } 534 535 void setVTYPE(unsigned VType) { 536 assert(isValid() && !isUnknown() && 537 "Can't set VTYPE for uninitialized or unknown"); 538 VLMul = RISCVVType::getVLMUL(VType); 539 SEW = RISCVVType::getSEW(VType); 540 TailAgnostic = RISCVVType::isTailAgnostic(VType); 541 MaskAgnostic = RISCVVType::isMaskAgnostic(VType); 542 } 543 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) { 544 assert(isValid() && !isUnknown() && 545 "Can't set VTYPE for uninitialized or unknown"); 546 VLMul = L; 547 SEW = S; 548 TailAgnostic = TA; 549 MaskAgnostic = MA; 550 } 551 552 void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; } 553 554 unsigned encodeVTYPE() const { 555 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && 556 "Can't encode VTYPE for uninitialized or unknown"); 557 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 558 } 559 560 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } 561 562 bool hasSameVTYPE(const VSETVLIInfo &Other) const { 563 assert(isValid() && Other.isValid() && 564 "Can't compare invalid VSETVLIInfos"); 565 assert(!isUnknown() && !Other.isUnknown() && 566 "Can't compare VTYPE in unknown state"); 567 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && 568 "Can't compare when only LMUL/SEW ratio is valid."); 569 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == 570 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, 571 Other.MaskAgnostic); 572 } 573 574 unsigned getSEWLMULRatio() const { 575 assert(isValid() && !isUnknown() && 576 "Can't use VTYPE for uninitialized or unknown"); 577 return RISCVVType::getSEWLMULRatio(SEW, VLMul); 578 } 579 580 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. 581 // Note that having the same VLMAX ensures that both share the same 582 // function from AVL to VL; that is, they must produce the same VL value 583 // for any given AVL value. 584 bool hasSameVLMAX(const VSETVLIInfo &Other) const { 585 assert(isValid() && Other.isValid() && 586 "Can't compare invalid VSETVLIInfos"); 587 assert(!isUnknown() && !Other.isUnknown() && 588 "Can't compare VTYPE in unknown state"); 589 return getSEWLMULRatio() == Other.getSEWLMULRatio(); 590 } 591 592 bool hasCompatibleVTYPE(const DemandedFields &Used, 593 const VSETVLIInfo &Require) const { 594 return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used); 595 } 596 597 // Determine whether the vector instructions requirements represented by 598 // Require are compatible with the previous vsetvli instruction represented 599 // by this. MI is the instruction whose requirements we're considering. 600 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require, 601 const MachineRegisterInfo &MRI) const { 602 assert(isValid() && Require.isValid() && 603 "Can't compare invalid VSETVLIInfos"); 604 assert(!Require.SEWLMULRatioOnly && 605 "Expected a valid VTYPE for instruction!"); 606 // Nothing is compatible with Unknown. 607 if (isUnknown() || Require.isUnknown()) 608 return false; 609 610 // If only our VLMAX ratio is valid, then this isn't compatible. 611 if (SEWLMULRatioOnly) 612 return false; 613 614 if (Used.VLAny && !hasSameAVL(Require)) 615 return false; 616 617 if (Used.VLZeroness && !hasEquallyZeroAVL(Require, MRI)) 618 return false; 619 620 return hasCompatibleVTYPE(Used, Require); 621 } 622 623 bool operator==(const VSETVLIInfo &Other) const { 624 // Uninitialized is only equal to another Uninitialized. 625 if (!isValid()) 626 return !Other.isValid(); 627 if (!Other.isValid()) 628 return !isValid(); 629 630 // Unknown is only equal to another Unknown. 631 if (isUnknown()) 632 return Other.isUnknown(); 633 if (Other.isUnknown()) 634 return isUnknown(); 635 636 if (!hasSameAVL(Other)) 637 return false; 638 639 // If the SEWLMULRatioOnly bits are different, then they aren't equal. 640 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly) 641 return false; 642 643 // If only the VLMAX is valid, check that it is the same. 644 if (SEWLMULRatioOnly) 645 return hasSameVLMAX(Other); 646 647 // If the full VTYPE is valid, check that it is the same. 648 return hasSameVTYPE(Other); 649 } 650 651 bool operator!=(const VSETVLIInfo &Other) const { 652 return !(*this == Other); 653 } 654 655 // Calculate the VSETVLIInfo visible to a block assuming this and Other are 656 // both predecessors. 657 VSETVLIInfo intersect(const VSETVLIInfo &Other) const { 658 // If the new value isn't valid, ignore it. 659 if (!Other.isValid()) 660 return *this; 661 662 // If this value isn't valid, this must be the first predecessor, use it. 663 if (!isValid()) 664 return Other; 665 666 // If either is unknown, the result is unknown. 667 if (isUnknown() || Other.isUnknown()) 668 return VSETVLIInfo::getUnknown(); 669 670 // If we have an exact, match return this. 671 if (*this == Other) 672 return *this; 673 674 // Not an exact match, but maybe the AVL and VLMAX are the same. If so, 675 // return an SEW/LMUL ratio only value. 676 if (hasSameAVL(Other) && hasSameVLMAX(Other)) { 677 VSETVLIInfo MergeInfo = *this; 678 MergeInfo.SEWLMULRatioOnly = true; 679 return MergeInfo; 680 } 681 682 // Otherwise the result is unknown. 683 return VSETVLIInfo::getUnknown(); 684 } 685 686 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 687 /// Support for debugging, callable in GDB: V->dump() 688 LLVM_DUMP_METHOD void dump() const { 689 print(dbgs()); 690 dbgs() << "\n"; 691 } 692 693 /// Implement operator<<. 694 /// @{ 695 void print(raw_ostream &OS) const { 696 OS << "{"; 697 if (!isValid()) 698 OS << "Uninitialized"; 699 if (isUnknown()) 700 OS << "unknown"; 701 if (hasAVLReg()) 702 OS << "AVLReg=" << (unsigned)AVLReg; 703 if (hasAVLImm()) 704 OS << "AVLImm=" << (unsigned)AVLImm; 705 OS << ", " 706 << "VLMul=" << (unsigned)VLMul << ", " 707 << "SEW=" << (unsigned)SEW << ", " 708 << "TailAgnostic=" << (bool)TailAgnostic << ", " 709 << "MaskAgnostic=" << (bool)MaskAgnostic << ", " 710 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}"; 711 } 712 #endif 713 }; 714 715 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 716 LLVM_ATTRIBUTE_USED 717 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) { 718 V.print(OS); 719 return OS; 720 } 721 #endif 722 723 struct BlockData { 724 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this 725 // block. Calculated in Phase 2. 726 VSETVLIInfo Exit; 727 728 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor 729 // blocks. Calculated in Phase 2, and used by Phase 3. 730 VSETVLIInfo Pred; 731 732 // Keeps track of whether the block is already in the queue. 733 bool InQueue = false; 734 735 BlockData() = default; 736 }; 737 738 class RISCVInsertVSETVLI : public MachineFunctionPass { 739 const RISCVSubtarget *ST; 740 const TargetInstrInfo *TII; 741 MachineRegisterInfo *MRI; 742 743 std::vector<BlockData> BlockInfo; 744 std::queue<const MachineBasicBlock *> WorkList; 745 746 public: 747 static char ID; 748 749 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {} 750 bool runOnMachineFunction(MachineFunction &MF) override; 751 752 void getAnalysisUsage(AnalysisUsage &AU) const override { 753 AU.setPreservesCFG(); 754 MachineFunctionPass::getAnalysisUsage(AU); 755 } 756 757 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } 758 759 private: 760 bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require, 761 const VSETVLIInfo &CurInfo) const; 762 bool needVSETVLIPHI(const VSETVLIInfo &Require, 763 const MachineBasicBlock &MBB) const; 764 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, 765 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); 766 void insertVSETVLI(MachineBasicBlock &MBB, 767 MachineBasicBlock::iterator InsertPt, DebugLoc DL, 768 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); 769 770 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const; 771 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const; 772 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB, 773 VSETVLIInfo &Info) const; 774 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); 775 void emitVSETVLIs(MachineBasicBlock &MBB); 776 void doLocalPostpass(MachineBasicBlock &MBB); 777 void doPRE(MachineBasicBlock &MBB); 778 void insertReadVL(MachineBasicBlock &MBB); 779 }; 780 781 } // end anonymous namespace 782 783 char RISCVInsertVSETVLI::ID = 0; 784 785 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, 786 false, false) 787 788 // Return a VSETVLIInfo representing the changes made by this VSETVLI or 789 // VSETIVLI instruction. 790 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { 791 VSETVLIInfo NewInfo; 792 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { 793 NewInfo.setAVLImm(MI.getOperand(1).getImm()); 794 } else { 795 assert(MI.getOpcode() == RISCV::PseudoVSETVLI || 796 MI.getOpcode() == RISCV::PseudoVSETVLIX0); 797 Register AVLReg = MI.getOperand(1).getReg(); 798 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && 799 "Can't handle X0, X0 vsetvli yet"); 800 NewInfo.setAVLReg(AVLReg); 801 } 802 NewInfo.setVTYPE(MI.getOperand(2).getImm()); 803 804 return NewInfo; 805 } 806 807 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, 808 const MachineRegisterInfo *MRI) { 809 VSETVLIInfo InstrInfo; 810 811 bool TailAgnostic = true; 812 bool MaskAgnostic = true; 813 if (!hasUndefinedMergeOp(MI, *MRI)) { 814 // Start with undisturbed. 815 TailAgnostic = false; 816 MaskAgnostic = false; 817 818 // If there is a policy operand, use it. 819 if (RISCVII::hasVecPolicyOp(TSFlags)) { 820 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); 821 uint64_t Policy = Op.getImm(); 822 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && 823 "Invalid Policy Value"); 824 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; 825 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; 826 } 827 828 // Some pseudo instructions force a tail agnostic policy despite having a 829 // tied def. 830 if (RISCVII::doesForceTailAgnostic(TSFlags)) 831 TailAgnostic = true; 832 833 if (!RISCVII::usesMaskPolicy(TSFlags)) 834 MaskAgnostic = true; 835 } 836 837 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); 838 839 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); 840 // A Log2SEW of 0 is an operation on mask registers only. 841 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 842 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 843 844 if (RISCVII::hasVLOp(TSFlags)) { 845 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 846 if (VLOp.isImm()) { 847 int64_t Imm = VLOp.getImm(); 848 // Conver the VLMax sentintel to X0 register. 849 if (Imm == RISCV::VLMaxSentinel) 850 InstrInfo.setAVLReg(RISCV::X0); 851 else 852 InstrInfo.setAVLImm(Imm); 853 } else { 854 InstrInfo.setAVLReg(VLOp.getReg()); 855 } 856 } else { 857 assert(isScalarExtractInstr(MI)); 858 InstrInfo.setAVLReg(RISCV::NoRegister); 859 } 860 #ifndef NDEBUG 861 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) { 862 assert(SEW == EEW && "Initial SEW doesn't match expected EEW"); 863 } 864 #endif 865 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 866 867 // If AVL is defined by a vsetvli with the same VLMAX, we can replace the 868 // AVL operand with the AVL of the defining vsetvli. We avoid general 869 // register AVLs to avoid extending live ranges without being sure we can 870 // kill the original source reg entirely. 871 if (InstrInfo.hasAVLReg() && InstrInfo.getAVLReg().isVirtual()) { 872 MachineInstr *DefMI = MRI->getVRegDef(InstrInfo.getAVLReg()); 873 if (DefMI && isVectorConfigInstr(*DefMI)) { 874 VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI); 875 if (DefInstrInfo.hasSameVLMAX(InstrInfo) && 876 (DefInstrInfo.hasAVLImm() || DefInstrInfo.getAVLReg() == RISCV::X0)) { 877 InstrInfo.setAVL(DefInstrInfo); 878 } 879 } 880 } 881 882 return InstrInfo; 883 } 884 885 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, 886 const VSETVLIInfo &Info, 887 const VSETVLIInfo &PrevInfo) { 888 DebugLoc DL = MI.getDebugLoc(); 889 insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo); 890 } 891 892 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, 893 MachineBasicBlock::iterator InsertPt, DebugLoc DL, 894 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { 895 896 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) { 897 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same 898 // VLMAX. 899 if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { 900 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 901 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 902 .addReg(RISCV::X0, RegState::Kill) 903 .addImm(Info.encodeVTYPE()) 904 .addReg(RISCV::VL, RegState::Implicit); 905 return; 906 } 907 908 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If 909 // it has the same VLMAX we want and the last VL/VTYPE we observed is the 910 // same, we can use the X0, X0 form. 911 if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg() && 912 Info.getAVLReg().isVirtual()) { 913 if (MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg())) { 914 if (isVectorConfigInstr(*DefMI)) { 915 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 916 if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) { 917 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 918 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 919 .addReg(RISCV::X0, RegState::Kill) 920 .addImm(Info.encodeVTYPE()) 921 .addReg(RISCV::VL, RegState::Implicit); 922 return; 923 } 924 } 925 } 926 } 927 } 928 929 if (Info.hasAVLImm()) { 930 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) 931 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 932 .addImm(Info.getAVLImm()) 933 .addImm(Info.encodeVTYPE()); 934 return; 935 } 936 937 Register AVLReg = Info.getAVLReg(); 938 if (AVLReg == RISCV::NoRegister) { 939 // We can only use x0, x0 if there's no chance of the vtype change causing 940 // the previous vl to become invalid. 941 if (PrevInfo.isValid() && !PrevInfo.isUnknown() && 942 Info.hasSameVLMAX(PrevInfo)) { 943 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 944 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 945 .addReg(RISCV::X0, RegState::Kill) 946 .addImm(Info.encodeVTYPE()) 947 .addReg(RISCV::VL, RegState::Implicit); 948 return; 949 } 950 // Otherwise use an AVL of 1 to avoid depending on previous vl. 951 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) 952 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 953 .addImm(1) 954 .addImm(Info.encodeVTYPE()); 955 return; 956 } 957 958 if (AVLReg.isVirtual()) 959 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass); 960 961 // Use X0 as the DestReg unless AVLReg is X0. We also need to change the 962 // opcode if the AVLReg is X0 as they have different register classes for 963 // the AVL operand. 964 Register DestReg = RISCV::X0; 965 unsigned Opcode = RISCV::PseudoVSETVLI; 966 if (AVLReg == RISCV::X0) { 967 DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); 968 Opcode = RISCV::PseudoVSETVLIX0; 969 } 970 BuildMI(MBB, InsertPt, DL, TII->get(Opcode)) 971 .addReg(DestReg, RegState::Define | RegState::Dead) 972 .addReg(AVLReg) 973 .addImm(Info.encodeVTYPE()); 974 } 975 976 static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) { 977 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL); 978 return Fractional || LMul == 1; 979 } 980 981 /// Return true if a VSETVLI is required to transition from CurInfo to Require 982 /// before MI. 983 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, 984 const VSETVLIInfo &Require, 985 const VSETVLIInfo &CurInfo) const { 986 assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI)); 987 988 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) 989 return true; 990 991 DemandedFields Used = getDemanded(MI, MRI, ST); 992 993 // A slidedown/slideup with an *undefined* merge op can freely clobber 994 // elements not copied from the source vector (e.g. masked off, tail, or 995 // slideup's prefix). Notes: 996 // * We can't modify SEW here since the slide amount is in units of SEW. 997 // * VL=1 is special only because we have existing support for zero vs 998 // non-zero VL. We could generalize this if we had a VL > C predicate. 999 // * The LMUL1 restriction is for machines whose latency may depend on VL. 1000 // * As above, this is only legal for tail "undefined" not "agnostic". 1001 if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 && 1002 isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) { 1003 Used.VLAny = false; 1004 Used.VLZeroness = true; 1005 Used.LMUL = false; 1006 Used.TailPolicy = false; 1007 } 1008 1009 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same 1010 // semantically as vmv.s.x. This is particularly useful since we don't have an 1011 // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place. 1012 // Since a splat is non-constant time in LMUL, we do need to be careful to not 1013 // increase the number of active vector registers (unlike for vmv.s.x.) 1014 if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 && 1015 isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) { 1016 Used.LMUL = false; 1017 Used.SEWLMULRatio = false; 1018 Used.VLAny = false; 1019 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64()) 1020 Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; 1021 else 1022 Used.SEW = DemandedFields::SEWGreaterThanOrEqual; 1023 Used.TailPolicy = false; 1024 } 1025 1026 if (CurInfo.isCompatible(Used, Require, *MRI)) 1027 return false; 1028 1029 // We didn't find a compatible value. If our AVL is a virtual register, 1030 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need 1031 // and the last VL/VTYPE we observed is the same, we don't need a 1032 // VSETVLI here. 1033 if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() && 1034 CurInfo.hasCompatibleVTYPE(Used, Require)) { 1035 if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { 1036 if (isVectorConfigInstr(*DefMI)) { 1037 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 1038 if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo)) 1039 return false; 1040 } 1041 } 1042 } 1043 1044 return true; 1045 } 1046 1047 // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we 1048 // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more 1049 // places. 1050 static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo, 1051 DemandedFields &Demanded) { 1052 VSETVLIInfo Info = NewInfo; 1053 1054 if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() && 1055 !PrevInfo.isUnknown()) { 1056 if (auto NewVLMul = RISCVVType::getSameRatioLMUL( 1057 PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW())) 1058 Info.setVLMul(*NewVLMul); 1059 Demanded.LMUL = true; 1060 } 1061 1062 return Info; 1063 } 1064 1065 // Given an incoming state reaching MI, minimally modifies that state so that it 1066 // is compatible with MI. The resulting state is guaranteed to be semantically 1067 // legal for MI, but may not be the state requested by MI. 1068 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, 1069 const MachineInstr &MI) const { 1070 uint64_t TSFlags = MI.getDesc().TSFlags; 1071 if (!RISCVII::hasSEWOp(TSFlags)) 1072 return; 1073 1074 const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); 1075 assert(NewInfo.isValid() && !NewInfo.isUnknown()); 1076 if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info)) 1077 return; 1078 1079 const VSETVLIInfo PrevInfo = Info; 1080 if (!Info.isValid() || Info.isUnknown()) 1081 Info = NewInfo; 1082 1083 DemandedFields Demanded = getDemanded(MI, MRI, ST); 1084 const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded); 1085 1086 // If MI only demands that VL has the same zeroness, we only need to set the 1087 // AVL if the zeroness differs. This removes a vsetvli entirely if the types 1088 // match or allows use of cheaper avl preserving variant if VLMAX doesn't 1089 // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype" 1090 // variant, so we avoid the transform to prevent extending live range of an 1091 // avl register operand. 1092 // TODO: We can probably relax this for immediates. 1093 bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, *MRI) && 1094 IncomingInfo.hasSameVLMAX(PrevInfo); 1095 if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero)) 1096 Info.setAVL(IncomingInfo); 1097 1098 Info.setVTYPE( 1099 ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info) 1100 .getVLMUL(), 1101 ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(), 1102 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later 1103 // if needed. 1104 (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() || 1105 IncomingInfo.getTailAgnostic(), 1106 (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() || 1107 IncomingInfo.getMaskAgnostic()); 1108 1109 // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep 1110 // the AVL. 1111 if (Info.hasSEWLMULRatioOnly()) { 1112 VSETVLIInfo RatiolessInfo = IncomingInfo; 1113 RatiolessInfo.setAVL(Info); 1114 Info = RatiolessInfo; 1115 } 1116 } 1117 1118 // Given a state with which we evaluated MI (see transferBefore above for why 1119 // this might be different that the state MI requested), modify the state to 1120 // reflect the changes MI might make. 1121 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, 1122 const MachineInstr &MI) const { 1123 if (isVectorConfigInstr(MI)) { 1124 Info = getInfoForVSETVLI(MI); 1125 return; 1126 } 1127 1128 if (RISCV::isFaultFirstLoad(MI)) { 1129 // Update AVL to vl-output of the fault first load. 1130 Info.setAVLReg(MI.getOperand(1).getReg()); 1131 return; 1132 } 1133 1134 // If this is something that updates VL/VTYPE that we don't know about, set 1135 // the state to unknown. 1136 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 1137 MI.modifiesRegister(RISCV::VTYPE)) 1138 Info = VSETVLIInfo::getUnknown(); 1139 } 1140 1141 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB, 1142 VSETVLIInfo &Info) const { 1143 bool HadVectorOp = false; 1144 1145 Info = BlockInfo[MBB.getNumber()].Pred; 1146 for (const MachineInstr &MI : MBB) { 1147 transferBefore(Info, MI); 1148 1149 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags)) 1150 HadVectorOp = true; 1151 1152 transferAfter(Info, MI); 1153 } 1154 1155 return HadVectorOp; 1156 } 1157 1158 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { 1159 1160 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1161 1162 BBInfo.InQueue = false; 1163 1164 // Start with the previous entry so that we keep the most conservative state 1165 // we have ever found. 1166 VSETVLIInfo InInfo = BBInfo.Pred; 1167 if (MBB.pred_empty()) { 1168 // There are no predecessors, so use the default starting status. 1169 InInfo.setUnknown(); 1170 } else { 1171 for (MachineBasicBlock *P : MBB.predecessors()) 1172 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); 1173 } 1174 1175 // If we don't have any valid predecessor value, wait until we do. 1176 if (!InInfo.isValid()) 1177 return; 1178 1179 // If no change, no need to rerun block 1180 if (InInfo == BBInfo.Pred) 1181 return; 1182 1183 BBInfo.Pred = InInfo; 1184 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) 1185 << " changed to " << BBInfo.Pred << "\n"); 1186 1187 // Note: It's tempting to cache the state changes here, but due to the 1188 // compatibility checks performed a blocks output state can change based on 1189 // the input state. To cache, we'd have to add logic for finding 1190 // never-compatible state changes. 1191 VSETVLIInfo TmpStatus; 1192 computeVLVTYPEChanges(MBB, TmpStatus); 1193 1194 // If the new exit value matches the old exit value, we don't need to revisit 1195 // any blocks. 1196 if (BBInfo.Exit == TmpStatus) 1197 return; 1198 1199 BBInfo.Exit = TmpStatus; 1200 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB) 1201 << " changed to " << BBInfo.Exit << "\n"); 1202 1203 // Add the successors to the work list so we can propagate the changed exit 1204 // status. 1205 for (MachineBasicBlock *S : MBB.successors()) 1206 if (!BlockInfo[S->getNumber()].InQueue) { 1207 BlockInfo[S->getNumber()].InQueue = true; 1208 WorkList.push(S); 1209 } 1210 } 1211 1212 // If we weren't able to prove a vsetvli was directly unneeded, it might still 1213 // be unneeded if the AVL is a phi node where all incoming values are VL 1214 // outputs from the last VSETVLI in their respective basic blocks. 1215 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, 1216 const MachineBasicBlock &MBB) const { 1217 if (DisableInsertVSETVLPHIOpt) 1218 return true; 1219 1220 if (!Require.hasAVLReg()) 1221 return true; 1222 1223 Register AVLReg = Require.getAVLReg(); 1224 if (!AVLReg.isVirtual()) 1225 return true; 1226 1227 // We need the AVL to be produce by a PHI node in this basic block. 1228 MachineInstr *PHI = MRI->getVRegDef(AVLReg); 1229 if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB) 1230 return true; 1231 1232 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps; 1233 PHIOp += 2) { 1234 Register InReg = PHI->getOperand(PHIOp).getReg(); 1235 MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB(); 1236 const BlockData &PBBInfo = BlockInfo[PBB->getNumber()]; 1237 // If the exit from the predecessor has the VTYPE we are looking for 1238 // we might be able to avoid a VSETVLI. 1239 if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require)) 1240 return true; 1241 1242 // We need the PHI input to the be the output of a VSET(I)VLI. 1243 MachineInstr *DefMI = MRI->getVRegDef(InReg); 1244 if (!DefMI || !isVectorConfigInstr(*DefMI)) 1245 return true; 1246 1247 // We found a VSET(I)VLI make sure it matches the output of the 1248 // predecessor block. 1249 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 1250 if (!DefInfo.hasSameAVL(PBBInfo.Exit) || 1251 !DefInfo.hasSameVTYPE(PBBInfo.Exit)) 1252 return true; 1253 } 1254 1255 // If all the incoming values to the PHI checked out, we don't need 1256 // to insert a VSETVLI. 1257 return false; 1258 } 1259 1260 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { 1261 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred; 1262 // Track whether the prefix of the block we've scanned is transparent 1263 // (meaning has not yet changed the abstract state). 1264 bool PrefixTransparent = true; 1265 for (MachineInstr &MI : MBB) { 1266 const VSETVLIInfo PrevInfo = CurInfo; 1267 transferBefore(CurInfo, MI); 1268 1269 // If this is an explicit VSETVLI or VSETIVLI, update our state. 1270 if (isVectorConfigInstr(MI)) { 1271 // Conservatively, mark the VL and VTYPE as live. 1272 assert(MI.getOperand(3).getReg() == RISCV::VL && 1273 MI.getOperand(4).getReg() == RISCV::VTYPE && 1274 "Unexpected operands where VL and VTYPE should be"); 1275 MI.getOperand(3).setIsDead(false); 1276 MI.getOperand(4).setIsDead(false); 1277 PrefixTransparent = false; 1278 } 1279 1280 uint64_t TSFlags = MI.getDesc().TSFlags; 1281 if (RISCVII::hasSEWOp(TSFlags)) { 1282 if (PrevInfo != CurInfo) { 1283 // If this is the first implicit state change, and the state change 1284 // requested can be proven to produce the same register contents, we 1285 // can skip emitting the actual state change and continue as if we 1286 // had since we know the GPR result of the implicit state change 1287 // wouldn't be used and VL/VTYPE registers are correct. Note that 1288 // we *do* need to model the state as if it changed as while the 1289 // register contents are unchanged, the abstract model can change. 1290 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB)) 1291 insertVSETVLI(MBB, MI, CurInfo, PrevInfo); 1292 PrefixTransparent = false; 1293 } 1294 1295 if (RISCVII::hasVLOp(TSFlags)) { 1296 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 1297 if (VLOp.isReg()) { 1298 Register Reg = VLOp.getReg(); 1299 MachineInstr *VLOpDef = MRI->getVRegDef(Reg); 1300 1301 // Erase the AVL operand from the instruction. 1302 VLOp.setReg(RISCV::NoRegister); 1303 VLOp.setIsKill(false); 1304 1305 // If the AVL was an immediate > 31, then it would have been emitted 1306 // as an ADDI. However, the ADDI might not have been used in the 1307 // vsetvli, or a vsetvli might not have been emitted, so it may be 1308 // dead now. 1309 if (VLOpDef && TII->isAddImmediate(*VLOpDef, Reg) && 1310 MRI->use_nodbg_empty(Reg)) 1311 VLOpDef->eraseFromParent(); 1312 } 1313 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, 1314 /*isImp*/ true)); 1315 } 1316 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, 1317 /*isImp*/ true)); 1318 } 1319 1320 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 1321 MI.modifiesRegister(RISCV::VTYPE)) 1322 PrefixTransparent = false; 1323 1324 transferAfter(CurInfo, MI); 1325 } 1326 1327 // If we reach the end of the block and our current info doesn't match the 1328 // expected info, insert a vsetvli to correct. 1329 if (!UseStrictAsserts) { 1330 const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit; 1331 if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() && 1332 CurInfo != ExitInfo) { 1333 // Note there's an implicit assumption here that terminators never use 1334 // or modify VL or VTYPE. Also, fallthrough will return end(). 1335 auto InsertPt = MBB.getFirstInstrTerminator(); 1336 insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo, 1337 CurInfo); 1338 CurInfo = ExitInfo; 1339 } 1340 } 1341 1342 if (UseStrictAsserts && CurInfo.isValid()) { 1343 const auto &Info = BlockInfo[MBB.getNumber()]; 1344 if (CurInfo != Info.Exit) { 1345 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n"); 1346 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n"); 1347 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n"); 1348 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n"); 1349 } 1350 assert(CurInfo == Info.Exit && 1351 "InsertVSETVLI dataflow invariant violated"); 1352 } 1353 } 1354 1355 /// Perform simple partial redundancy elimination of the VSETVLI instructions 1356 /// we're about to insert by looking for cases where we can PRE from the 1357 /// beginning of one block to the end of one of its predecessors. Specifically, 1358 /// this is geared to catch the common case of a fixed length vsetvl in a single 1359 /// block loop when it could execute once in the preheader instead. 1360 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { 1361 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) 1362 return; 1363 1364 MachineBasicBlock *UnavailablePred = nullptr; 1365 VSETVLIInfo AvailableInfo; 1366 for (MachineBasicBlock *P : MBB.predecessors()) { 1367 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; 1368 if (PredInfo.isUnknown()) { 1369 if (UnavailablePred) 1370 return; 1371 UnavailablePred = P; 1372 } else if (!AvailableInfo.isValid()) { 1373 AvailableInfo = PredInfo; 1374 } else if (AvailableInfo != PredInfo) { 1375 return; 1376 } 1377 } 1378 1379 // Unreachable, single pred, or full redundancy. Note that FRE is handled by 1380 // phase 3. 1381 if (!UnavailablePred || !AvailableInfo.isValid()) 1382 return; 1383 1384 // Critical edge - TODO: consider splitting? 1385 if (UnavailablePred->succ_size() != 1) 1386 return; 1387 1388 // If the AVL value is a register (other than our VLMAX sentinel), 1389 // we need to prove the value is available at the point we're going 1390 // to insert the vsetvli at. 1391 if (AvailableInfo.hasAVLReg() && RISCV::X0 != AvailableInfo.getAVLReg()) { 1392 MachineInstr *AVLDefMI = MRI->getVRegDef(AvailableInfo.getAVLReg()); 1393 if (!AVLDefMI) 1394 return; 1395 // This is an inline dominance check which covers the case of 1396 // UnavailablePred being the preheader of a loop. 1397 if (AVLDefMI->getParent() != UnavailablePred) 1398 return; 1399 for (auto &TermMI : UnavailablePred->terminators()) 1400 if (&TermMI == AVLDefMI) 1401 return; 1402 } 1403 1404 // Model the effect of changing the input state of the block MBB to 1405 // AvailableInfo. We're looking for two issues here; one legality, 1406 // one profitability. 1407 // 1) If the block doesn't use some of the fields from VL or VTYPE, we 1408 // may hit the end of the block with a different end state. We can 1409 // not make this change without reflowing later blocks as well. 1410 // 2) If we don't actually remove a transition, inserting a vsetvli 1411 // into the predecessor block would be correct, but unprofitable. 1412 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred; 1413 VSETVLIInfo CurInfo = AvailableInfo; 1414 int TransitionsRemoved = 0; 1415 for (const MachineInstr &MI : MBB) { 1416 const VSETVLIInfo LastInfo = CurInfo; 1417 const VSETVLIInfo LastOldInfo = OldInfo; 1418 transferBefore(CurInfo, MI); 1419 transferBefore(OldInfo, MI); 1420 if (CurInfo == LastInfo) 1421 TransitionsRemoved++; 1422 if (LastOldInfo == OldInfo) 1423 TransitionsRemoved--; 1424 transferAfter(CurInfo, MI); 1425 transferAfter(OldInfo, MI); 1426 if (CurInfo == OldInfo) 1427 // Convergence. All transitions after this must match by construction. 1428 break; 1429 } 1430 if (CurInfo != OldInfo || TransitionsRemoved <= 0) 1431 // Issues 1 and 2 above 1432 return; 1433 1434 // Finally, update both data flow state and insert the actual vsetvli. 1435 // Doing both keeps the code in sync with the dataflow results, which 1436 // is critical for correctness of phase 3. 1437 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit; 1438 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " 1439 << UnavailablePred->getName() << " with state " 1440 << AvailableInfo << "\n"); 1441 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; 1442 BlockInfo[MBB.getNumber()].Pred = AvailableInfo; 1443 1444 // Note there's an implicit assumption here that terminators never use 1445 // or modify VL or VTYPE. Also, fallthrough will return end(). 1446 auto InsertPt = UnavailablePred->getFirstInstrTerminator(); 1447 insertVSETVLI(*UnavailablePred, InsertPt, 1448 UnavailablePred->findDebugLoc(InsertPt), 1449 AvailableInfo, OldExit); 1450 } 1451 1452 static void doUnion(DemandedFields &A, DemandedFields B) { 1453 A.VLAny |= B.VLAny; 1454 A.VLZeroness |= B.VLZeroness; 1455 A.SEW = std::max(A.SEW, B.SEW); 1456 A.LMUL |= B.LMUL; 1457 A.SEWLMULRatio |= B.SEWLMULRatio; 1458 A.TailPolicy |= B.TailPolicy; 1459 A.MaskPolicy |= B.MaskPolicy; 1460 } 1461 1462 static bool isNonZeroAVL(const MachineOperand &MO, 1463 const MachineRegisterInfo &MRI) { 1464 if (MO.isReg()) { 1465 if (MO.getReg() == RISCV::X0) 1466 return true; 1467 if (MachineInstr *MI = MRI.getVRegDef(MO.getReg()); 1468 MI && isNonZeroLoadImmediate(*MI)) 1469 return true; 1470 return false; 1471 } 1472 assert(MO.isImm()); 1473 return 0 != MO.getImm(); 1474 } 1475 1476 // Return true if we can mutate PrevMI to match MI without changing any the 1477 // fields which would be observed. 1478 static bool canMutatePriorConfig(const MachineInstr &PrevMI, 1479 const MachineInstr &MI, 1480 const DemandedFields &Used, 1481 const MachineRegisterInfo &MRI) { 1482 // If the VL values aren't equal, return false if either a) the former is 1483 // demanded, or b) we can't rewrite the former to be the later for 1484 // implementation reasons. 1485 if (!isVLPreservingConfig(MI)) { 1486 if (Used.VLAny) 1487 return false; 1488 1489 // We don't bother to handle the equally zero case here as it's largely 1490 // uninteresting. 1491 if (Used.VLZeroness) { 1492 if (isVLPreservingConfig(PrevMI)) 1493 return false; 1494 if (!isNonZeroAVL(MI.getOperand(1), MRI) || 1495 !isNonZeroAVL(PrevMI.getOperand(1), MRI)) 1496 return false; 1497 } 1498 1499 // TODO: Track whether the register is defined between 1500 // PrevMI and MI. 1501 if (MI.getOperand(1).isReg() && 1502 RISCV::X0 != MI.getOperand(1).getReg()) 1503 return false; 1504 } 1505 1506 if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm()) 1507 return false; 1508 1509 auto PriorVType = PrevMI.getOperand(2).getImm(); 1510 auto VType = MI.getOperand(2).getImm(); 1511 return areCompatibleVTYPEs(PriorVType, VType, Used); 1512 } 1513 1514 void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { 1515 MachineInstr *NextMI = nullptr; 1516 // We can have arbitrary code in successors, so VL and VTYPE 1517 // must be considered demanded. 1518 DemandedFields Used; 1519 Used.demandVL(); 1520 Used.demandVTYPE(); 1521 SmallVector<MachineInstr*> ToDelete; 1522 for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { 1523 1524 if (!isVectorConfigInstr(MI)) { 1525 doUnion(Used, getDemanded(MI, MRI, ST)); 1526 continue; 1527 } 1528 1529 Register VRegDef = MI.getOperand(0).getReg(); 1530 if (VRegDef != RISCV::X0 && 1531 !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) 1532 Used.demandVL(); 1533 1534 if (NextMI) { 1535 if (!Used.usedVL() && !Used.usedVTYPE()) { 1536 ToDelete.push_back(&MI); 1537 // Leave NextMI unchanged 1538 continue; 1539 } else if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) { 1540 if (!isVLPreservingConfig(*NextMI)) { 1541 MI.getOperand(0).setReg(NextMI->getOperand(0).getReg()); 1542 MI.getOperand(0).setIsDead(false); 1543 Register OldVLReg; 1544 if (MI.getOperand(1).isReg()) 1545 OldVLReg = MI.getOperand(1).getReg(); 1546 if (NextMI->getOperand(1).isImm()) 1547 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm()); 1548 else 1549 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false); 1550 if (OldVLReg) { 1551 MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg); 1552 if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) && 1553 MRI->use_nodbg_empty(OldVLReg)) 1554 VLOpDef->eraseFromParent(); 1555 } 1556 MI.setDesc(NextMI->getDesc()); 1557 } 1558 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm()); 1559 ToDelete.push_back(NextMI); 1560 // fallthrough 1561 } 1562 } 1563 NextMI = &MI; 1564 Used = getDemanded(MI, MRI, ST); 1565 } 1566 1567 for (auto *MI : ToDelete) 1568 MI->eraseFromParent(); 1569 } 1570 1571 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { 1572 for (auto I = MBB.begin(), E = MBB.end(); I != E;) { 1573 MachineInstr &MI = *I++; 1574 if (RISCV::isFaultFirstLoad(MI)) { 1575 Register VLOutput = MI.getOperand(1).getReg(); 1576 if (!MRI->use_nodbg_empty(VLOutput)) 1577 BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL), 1578 VLOutput); 1579 // We don't use the vl output of the VLEFF/VLSEGFF anymore. 1580 MI.getOperand(1).setReg(RISCV::X0); 1581 } 1582 } 1583 } 1584 1585 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { 1586 // Skip if the vector extension is not enabled. 1587 ST = &MF.getSubtarget<RISCVSubtarget>(); 1588 if (!ST->hasVInstructions()) 1589 return false; 1590 1591 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n"); 1592 1593 TII = ST->getInstrInfo(); 1594 MRI = &MF.getRegInfo(); 1595 1596 assert(BlockInfo.empty() && "Expect empty block infos"); 1597 BlockInfo.resize(MF.getNumBlockIDs()); 1598 1599 bool HaveVectorOp = false; 1600 1601 // Phase 1 - determine how VL/VTYPE are affected by the each block. 1602 for (const MachineBasicBlock &MBB : MF) { 1603 VSETVLIInfo TmpStatus; 1604 HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus); 1605 // Initial exit state is whatever change we found in the block. 1606 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1607 BBInfo.Exit = TmpStatus; 1608 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) 1609 << " is " << BBInfo.Exit << "\n"); 1610 1611 } 1612 1613 // If we didn't find any instructions that need VSETVLI, we're done. 1614 if (!HaveVectorOp) { 1615 BlockInfo.clear(); 1616 return false; 1617 } 1618 1619 // Phase 2 - determine the exit VL/VTYPE from each block. We add all 1620 // blocks to the list here, but will also add any that need to be revisited 1621 // during Phase 2 processing. 1622 for (const MachineBasicBlock &MBB : MF) { 1623 WorkList.push(&MBB); 1624 BlockInfo[MBB.getNumber()].InQueue = true; 1625 } 1626 while (!WorkList.empty()) { 1627 const MachineBasicBlock &MBB = *WorkList.front(); 1628 WorkList.pop(); 1629 computeIncomingVLVTYPE(MBB); 1630 } 1631 1632 // Perform partial redundancy elimination of vsetvli transitions. 1633 for (MachineBasicBlock &MBB : MF) 1634 doPRE(MBB); 1635 1636 // Phase 3 - add any vsetvli instructions needed in the block. Use the 1637 // Phase 2 information to avoid adding vsetvlis before the first vector 1638 // instruction in the block if the VL/VTYPE is satisfied by its 1639 // predecessors. 1640 for (MachineBasicBlock &MBB : MF) 1641 emitVSETVLIs(MBB); 1642 1643 // Now that all vsetvlis are explicit, go through and do block local 1644 // DSE and peephole based demanded fields based transforms. Note that 1645 // this *must* be done outside the main dataflow so long as we allow 1646 // any cross block analysis within the dataflow. We can't have both 1647 // demanded fields based mutation and non-local analysis in the 1648 // dataflow at the same time without introducing inconsistencies. 1649 for (MachineBasicBlock &MBB : MF) 1650 doLocalPostpass(MBB); 1651 1652 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output 1653 // of VLEFF/VLSEGFF. 1654 for (MachineBasicBlock &MBB : MF) 1655 insertReadVL(MBB); 1656 1657 BlockInfo.clear(); 1658 return HaveVectorOp; 1659 } 1660 1661 /// Returns an instance of the Insert VSETVLI pass. 1662 FunctionPass *llvm::createRISCVInsertVSETVLIPass() { 1663 return new RISCVInsertVSETVLI(); 1664 } 1665