1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a function pass that inserts VSETVLI instructions where 10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL 11 // instructions. 12 // 13 // This pass consists of 3 phases: 14 // 15 // Phase 1 collects how each basic block affects VL/VTYPE. 16 // 17 // Phase 2 uses the information from phase 1 to do a data flow analysis to 18 // propagate the VL/VTYPE changes through the function. This gives us the 19 // VL/VTYPE at the start of each basic block. 20 // 21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from 22 // phase 2 is used to prevent inserting a VSETVLI before the first vector 23 // instruction in the block if possible. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "RISCV.h" 28 #include "RISCVSubtarget.h" 29 #include "llvm/ADT/Statistic.h" 30 #include "llvm/CodeGen/LiveDebugVariables.h" 31 #include "llvm/CodeGen/LiveIntervals.h" 32 #include "llvm/CodeGen/LiveStacks.h" 33 #include "llvm/CodeGen/MachineFunctionPass.h" 34 #include <queue> 35 using namespace llvm; 36 37 #define DEBUG_TYPE "riscv-insert-vsetvli" 38 #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass" 39 #define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass" 40 41 STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted"); 42 STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced"); 43 44 namespace { 45 46 /// Given a virtual register \p Reg, return the corresponding VNInfo for it. 47 /// This will return nullptr if the virtual register is an implicit_def or 48 /// if LiveIntervals is not available. 49 static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI, 50 const LiveIntervals *LIS) { 51 assert(Reg.isVirtual()); 52 if (!LIS) 53 return nullptr; 54 auto &LI = LIS->getInterval(Reg); 55 SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI); 56 return LI.getVNInfoBefore(SI); 57 } 58 59 static unsigned getVLOpNum(const MachineInstr &MI) { 60 return RISCVII::getVLOpNum(MI.getDesc()); 61 } 62 63 static unsigned getSEWOpNum(const MachineInstr &MI) { 64 return RISCVII::getSEWOpNum(MI.getDesc()); 65 } 66 67 static bool isVectorConfigInstr(const MachineInstr &MI) { 68 return MI.getOpcode() == RISCV::PseudoVSETVLI || 69 MI.getOpcode() == RISCV::PseudoVSETVLIX0 || 70 MI.getOpcode() == RISCV::PseudoVSETIVLI; 71 } 72 73 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves 74 /// VL and only sets VTYPE. 75 static bool isVLPreservingConfig(const MachineInstr &MI) { 76 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) 77 return false; 78 assert(RISCV::X0 == MI.getOperand(1).getReg()); 79 return RISCV::X0 == MI.getOperand(0).getReg(); 80 } 81 82 static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) { 83 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 84 default: 85 return false; 86 case RISCV::VFMV_S_F: 87 case RISCV::VFMV_V_F: 88 return true; 89 } 90 } 91 92 static bool isScalarExtractInstr(const MachineInstr &MI) { 93 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 94 default: 95 return false; 96 case RISCV::VMV_X_S: 97 case RISCV::VFMV_F_S: 98 return true; 99 } 100 } 101 102 static bool isScalarInsertInstr(const MachineInstr &MI) { 103 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 104 default: 105 return false; 106 case RISCV::VMV_S_X: 107 case RISCV::VFMV_S_F: 108 return true; 109 } 110 } 111 112 static bool isScalarSplatInstr(const MachineInstr &MI) { 113 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 114 default: 115 return false; 116 case RISCV::VMV_V_I: 117 case RISCV::VMV_V_X: 118 case RISCV::VFMV_V_F: 119 return true; 120 } 121 } 122 123 static bool isVSlideInstr(const MachineInstr &MI) { 124 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 125 default: 126 return false; 127 case RISCV::VSLIDEDOWN_VX: 128 case RISCV::VSLIDEDOWN_VI: 129 case RISCV::VSLIDEUP_VX: 130 case RISCV::VSLIDEUP_VI: 131 return true; 132 } 133 } 134 135 /// Get the EEW for a load or store instruction. Return std::nullopt if MI is 136 /// not a load or store which ignores SEW. 137 static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) { 138 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 139 default: 140 return std::nullopt; 141 case RISCV::VLE8_V: 142 case RISCV::VLSE8_V: 143 case RISCV::VSE8_V: 144 case RISCV::VSSE8_V: 145 return 8; 146 case RISCV::VLE16_V: 147 case RISCV::VLSE16_V: 148 case RISCV::VSE16_V: 149 case RISCV::VSSE16_V: 150 return 16; 151 case RISCV::VLE32_V: 152 case RISCV::VLSE32_V: 153 case RISCV::VSE32_V: 154 case RISCV::VSSE32_V: 155 return 32; 156 case RISCV::VLE64_V: 157 case RISCV::VLSE64_V: 158 case RISCV::VSE64_V: 159 case RISCV::VSSE64_V: 160 return 64; 161 } 162 } 163 164 static bool isNonZeroLoadImmediate(const MachineInstr &MI) { 165 return MI.getOpcode() == RISCV::ADDI && 166 MI.getOperand(1).isReg() && MI.getOperand(2).isImm() && 167 MI.getOperand(1).getReg() == RISCV::X0 && 168 MI.getOperand(2).getImm() != 0; 169 } 170 171 /// Return true if this is an operation on mask registers. Note that 172 /// this includes both arithmetic/logical ops and load/store (vlm/vsm). 173 static bool isMaskRegOp(const MachineInstr &MI) { 174 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags)) 175 return false; 176 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); 177 // A Log2SEW of 0 is an operation on mask registers only. 178 return Log2SEW == 0; 179 } 180 181 /// Return true if the inactive elements in the result are entirely undefined. 182 /// Note that this is different from "agnostic" as defined by the vector 183 /// specification. Agnostic requires each lane to either be undisturbed, or 184 /// take the value -1; no other value is allowed. 185 static bool hasUndefinedMergeOp(const MachineInstr &MI) { 186 187 unsigned UseOpIdx; 188 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) 189 // If there is no passthrough operand, then the pass through 190 // lanes are undefined. 191 return true; 192 193 // All undefined passthrus should be $noreg: see 194 // RISCVDAGToDAGISel::doPeepholeNoRegPassThru 195 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 196 return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef(); 197 } 198 199 /// Which subfields of VL or VTYPE have values we need to preserve? 200 struct DemandedFields { 201 // Some unknown property of VL is used. If demanded, must preserve entire 202 // value. 203 bool VLAny = false; 204 // Only zero vs non-zero is used. If demanded, can change non-zero values. 205 bool VLZeroness = false; 206 // What properties of SEW we need to preserve. 207 enum : uint8_t { 208 SEWEqual = 3, // The exact value of SEW needs to be preserved. 209 SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater 210 // than or equal to the original value. 211 SEWGreaterThanOrEqualAndLessThan64 = 212 1, // SEW can be changed as long as it's greater 213 // than or equal to the original value, but must be less 214 // than 64. 215 SEWNone = 0 // We don't need to preserve SEW at all. 216 } SEW = SEWNone; 217 enum : uint8_t { 218 LMULEqual = 2, // The exact value of LMUL needs to be preserved. 219 LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1. 220 LMULNone = 0 // We don't need to preserve LMUL at all. 221 } LMUL = LMULNone; 222 bool SEWLMULRatio = false; 223 bool TailPolicy = false; 224 bool MaskPolicy = false; 225 226 // Return true if any part of VTYPE was used 227 bool usedVTYPE() const { 228 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy; 229 } 230 231 // Return true if any property of VL was used 232 bool usedVL() { 233 return VLAny || VLZeroness; 234 } 235 236 // Mark all VTYPE subfields and properties as demanded 237 void demandVTYPE() { 238 SEW = SEWEqual; 239 LMUL = LMULEqual; 240 SEWLMULRatio = true; 241 TailPolicy = true; 242 MaskPolicy = true; 243 } 244 245 // Mark all VL properties as demanded 246 void demandVL() { 247 VLAny = true; 248 VLZeroness = true; 249 } 250 251 static DemandedFields all() { 252 DemandedFields DF; 253 DF.demandVTYPE(); 254 DF.demandVL(); 255 return DF; 256 } 257 258 // Make this the result of demanding both the fields in this and B. 259 void doUnion(const DemandedFields &B) { 260 VLAny |= B.VLAny; 261 VLZeroness |= B.VLZeroness; 262 SEW = std::max(SEW, B.SEW); 263 LMUL = std::max(LMUL, B.LMUL); 264 SEWLMULRatio |= B.SEWLMULRatio; 265 TailPolicy |= B.TailPolicy; 266 MaskPolicy |= B.MaskPolicy; 267 } 268 269 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 270 /// Support for debugging, callable in GDB: V->dump() 271 LLVM_DUMP_METHOD void dump() const { 272 print(dbgs()); 273 dbgs() << "\n"; 274 } 275 276 /// Implement operator<<. 277 void print(raw_ostream &OS) const { 278 OS << "{"; 279 OS << "VLAny=" << VLAny << ", "; 280 OS << "VLZeroness=" << VLZeroness << ", "; 281 OS << "SEW="; 282 switch (SEW) { 283 case SEWEqual: 284 OS << "SEWEqual"; 285 break; 286 case SEWGreaterThanOrEqual: 287 OS << "SEWGreaterThanOrEqual"; 288 break; 289 case SEWGreaterThanOrEqualAndLessThan64: 290 OS << "SEWGreaterThanOrEqualAndLessThan64"; 291 break; 292 case SEWNone: 293 OS << "SEWNone"; 294 break; 295 }; 296 OS << ", "; 297 OS << "LMUL="; 298 switch (LMUL) { 299 case LMULEqual: 300 OS << "LMULEqual"; 301 break; 302 case LMULLessThanOrEqualToM1: 303 OS << "LMULLessThanOrEqualToM1"; 304 break; 305 case LMULNone: 306 OS << "LMULNone"; 307 break; 308 }; 309 OS << ", "; 310 OS << "SEWLMULRatio=" << SEWLMULRatio << ", "; 311 OS << "TailPolicy=" << TailPolicy << ", "; 312 OS << "MaskPolicy=" << MaskPolicy; 313 OS << "}"; 314 } 315 #endif 316 }; 317 318 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 319 LLVM_ATTRIBUTE_USED 320 inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) { 321 DF.print(OS); 322 return OS; 323 } 324 #endif 325 326 static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) { 327 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL); 328 return Fractional || LMul == 1; 329 } 330 331 /// Return true if moving from CurVType to NewVType is 332 /// indistinguishable from the perspective of an instruction (or set 333 /// of instructions) which use only the Used subfields and properties. 334 static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, 335 const DemandedFields &Used) { 336 switch (Used.SEW) { 337 case DemandedFields::SEWNone: 338 break; 339 case DemandedFields::SEWEqual: 340 if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType)) 341 return false; 342 break; 343 case DemandedFields::SEWGreaterThanOrEqual: 344 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType)) 345 return false; 346 break; 347 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64: 348 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) || 349 RISCVVType::getSEW(NewVType) >= 64) 350 return false; 351 break; 352 } 353 354 switch (Used.LMUL) { 355 case DemandedFields::LMULNone: 356 break; 357 case DemandedFields::LMULEqual: 358 if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType)) 359 return false; 360 break; 361 case DemandedFields::LMULLessThanOrEqualToM1: 362 if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType))) 363 return false; 364 break; 365 } 366 367 if (Used.SEWLMULRatio) { 368 auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType), 369 RISCVVType::getVLMUL(CurVType)); 370 auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType), 371 RISCVVType::getVLMUL(NewVType)); 372 if (Ratio1 != Ratio2) 373 return false; 374 } 375 376 if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) != 377 RISCVVType::isTailAgnostic(NewVType)) 378 return false; 379 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) != 380 RISCVVType::isMaskAgnostic(NewVType)) 381 return false; 382 return true; 383 } 384 385 /// Return the fields and properties demanded by the provided instruction. 386 DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) { 387 // This function works in coalesceVSETVLI too. We can still use the value of a 388 // SEW, VL, or Policy operand even though it might not be the exact value in 389 // the VL or VTYPE, since we only care about what the instruction originally 390 // demanded. 391 392 // Most instructions don't use any of these subfeilds. 393 DemandedFields Res; 394 // Start conservative if registers are used 395 if (MI.isCall() || MI.isInlineAsm() || 396 MI.readsRegister(RISCV::VL, /*TRI=*/nullptr)) 397 Res.demandVL(); 398 if (MI.isCall() || MI.isInlineAsm() || 399 MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr)) 400 Res.demandVTYPE(); 401 // Start conservative on the unlowered form too 402 uint64_t TSFlags = MI.getDesc().TSFlags; 403 if (RISCVII::hasSEWOp(TSFlags)) { 404 Res.demandVTYPE(); 405 if (RISCVII::hasVLOp(TSFlags)) 406 if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 407 !VLOp.isReg() || !VLOp.isUndef()) 408 Res.demandVL(); 409 410 // Behavior is independent of mask policy. 411 if (!RISCVII::usesMaskPolicy(TSFlags)) 412 Res.MaskPolicy = false; 413 } 414 415 // Loads and stores with implicit EEW do not demand SEW or LMUL directly. 416 // They instead demand the ratio of the two which is used in computing 417 // EMUL, but which allows us the flexibility to change SEW and LMUL 418 // provided we don't change the ratio. 419 // Note: We assume that the instructions initial SEW is the EEW encoded 420 // in the opcode. This is asserted when constructing the VSETVLIInfo. 421 if (getEEWForLoadStore(MI)) { 422 Res.SEW = DemandedFields::SEWNone; 423 Res.LMUL = DemandedFields::LMULNone; 424 } 425 426 // Store instructions don't use the policy fields. 427 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) { 428 Res.TailPolicy = false; 429 Res.MaskPolicy = false; 430 } 431 432 // If this is a mask reg operation, it only cares about VLMAX. 433 // TODO: Possible extensions to this logic 434 // * Probably ok if available VLMax is larger than demanded 435 // * The policy bits can probably be ignored.. 436 if (isMaskRegOp(MI)) { 437 Res.SEW = DemandedFields::SEWNone; 438 Res.LMUL = DemandedFields::LMULNone; 439 } 440 441 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0. 442 if (isScalarInsertInstr(MI)) { 443 Res.LMUL = DemandedFields::LMULNone; 444 Res.SEWLMULRatio = false; 445 Res.VLAny = false; 446 // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't 447 // need to preserve any other bits and are thus compatible with any larger, 448 // etype and can disregard policy bits. Warning: It's tempting to try doing 449 // this for any tail agnostic operation, but we can't as TA requires 450 // tail lanes to either be the original value or -1. We are writing 451 // unknown bits to the lanes here. 452 if (hasUndefinedMergeOp(MI)) { 453 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64()) 454 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; 455 else 456 Res.SEW = DemandedFields::SEWGreaterThanOrEqual; 457 Res.TailPolicy = false; 458 } 459 } 460 461 // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW. 462 if (isScalarExtractInstr(MI)) { 463 assert(!RISCVII::hasVLOp(TSFlags)); 464 Res.LMUL = DemandedFields::LMULNone; 465 Res.SEWLMULRatio = false; 466 Res.TailPolicy = false; 467 Res.MaskPolicy = false; 468 } 469 470 if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) { 471 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 472 // A slidedown/slideup with an *undefined* merge op can freely clobber 473 // elements not copied from the source vector (e.g. masked off, tail, or 474 // slideup's prefix). Notes: 475 // * We can't modify SEW here since the slide amount is in units of SEW. 476 // * VL=1 is special only because we have existing support for zero vs 477 // non-zero VL. We could generalize this if we had a VL > C predicate. 478 // * The LMUL1 restriction is for machines whose latency may depend on VL. 479 // * As above, this is only legal for tail "undefined" not "agnostic". 480 if (isVSlideInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 && 481 hasUndefinedMergeOp(MI)) { 482 Res.VLAny = false; 483 Res.VLZeroness = true; 484 Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1; 485 Res.TailPolicy = false; 486 } 487 488 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the 489 // same semantically as vmv.s.x. This is particularly useful since we don't 490 // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in 491 // it's place. Since a splat is non-constant time in LMUL, we do need to be 492 // careful to not increase the number of active vector registers (unlike for 493 // vmv.s.x.) 494 if (isScalarSplatInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 && 495 hasUndefinedMergeOp(MI)) { 496 Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1; 497 Res.SEWLMULRatio = false; 498 Res.VLAny = false; 499 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64()) 500 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; 501 else 502 Res.SEW = DemandedFields::SEWGreaterThanOrEqual; 503 Res.TailPolicy = false; 504 } 505 } 506 507 return Res; 508 } 509 510 /// Defines the abstract state with which the forward dataflow models the 511 /// values of the VL and VTYPE registers after insertion. 512 class VSETVLIInfo { 513 struct AVLDef { 514 // Every AVLDef should have a VNInfo, unless we're running without 515 // LiveIntervals in which case this will be nullptr. 516 const VNInfo *ValNo; 517 Register DefReg; 518 }; 519 union { 520 AVLDef AVLRegDef; 521 unsigned AVLImm; 522 }; 523 524 enum : uint8_t { 525 Uninitialized, 526 AVLIsReg, 527 AVLIsImm, 528 AVLIsVLMAX, 529 Unknown, // AVL and VTYPE are fully unknown 530 } State = Uninitialized; 531 532 // Fields from VTYPE. 533 RISCVII::VLMUL VLMul = RISCVII::LMUL_1; 534 uint8_t SEW = 0; 535 uint8_t TailAgnostic : 1; 536 uint8_t MaskAgnostic : 1; 537 uint8_t SEWLMULRatioOnly : 1; 538 539 public: 540 VSETVLIInfo() 541 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), 542 SEWLMULRatioOnly(false) {} 543 544 static VSETVLIInfo getUnknown() { 545 VSETVLIInfo Info; 546 Info.setUnknown(); 547 return Info; 548 } 549 550 bool isValid() const { return State != Uninitialized; } 551 void setUnknown() { State = Unknown; } 552 bool isUnknown() const { return State == Unknown; } 553 554 void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) { 555 assert(AVLReg.isVirtual()); 556 AVLRegDef.ValNo = VNInfo; 557 AVLRegDef.DefReg = AVLReg; 558 State = AVLIsReg; 559 } 560 561 void setAVLImm(unsigned Imm) { 562 AVLImm = Imm; 563 State = AVLIsImm; 564 } 565 566 void setAVLVLMAX() { State = AVLIsVLMAX; } 567 568 bool hasAVLImm() const { return State == AVLIsImm; } 569 bool hasAVLReg() const { return State == AVLIsReg; } 570 bool hasAVLVLMAX() const { return State == AVLIsVLMAX; } 571 Register getAVLReg() const { 572 assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual()); 573 return AVLRegDef.DefReg; 574 } 575 unsigned getAVLImm() const { 576 assert(hasAVLImm()); 577 return AVLImm; 578 } 579 const VNInfo *getAVLVNInfo() const { 580 assert(hasAVLReg()); 581 return AVLRegDef.ValNo; 582 } 583 // Most AVLIsReg infos will have a single defining MachineInstr, unless it was 584 // a PHI node. In that case getAVLVNInfo()->def will point to the block 585 // boundary slot and this will return nullptr. If LiveIntervals isn't 586 // available, nullptr is also returned. 587 const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const { 588 assert(hasAVLReg()); 589 if (!LIS || getAVLVNInfo()->isPHIDef()) 590 return nullptr; 591 auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def); 592 assert(MI); 593 return MI; 594 } 595 596 void setAVL(VSETVLIInfo Info) { 597 assert(Info.isValid()); 598 if (Info.isUnknown()) 599 setUnknown(); 600 else if (Info.hasAVLReg()) 601 setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg()); 602 else if (Info.hasAVLVLMAX()) 603 setAVLVLMAX(); 604 else { 605 assert(Info.hasAVLImm()); 606 setAVLImm(Info.getAVLImm()); 607 } 608 } 609 610 unsigned getSEW() const { return SEW; } 611 RISCVII::VLMUL getVLMUL() const { return VLMul; } 612 bool getTailAgnostic() const { return TailAgnostic; } 613 bool getMaskAgnostic() const { return MaskAgnostic; } 614 615 bool hasNonZeroAVL(const LiveIntervals *LIS) const { 616 if (hasAVLImm()) 617 return getAVLImm() > 0; 618 if (hasAVLReg()) { 619 if (auto *DefMI = getAVLDefMI(LIS)) 620 return isNonZeroLoadImmediate(*DefMI); 621 } 622 if (hasAVLVLMAX()) 623 return true; 624 return false; 625 } 626 627 bool hasEquallyZeroAVL(const VSETVLIInfo &Other, 628 const LiveIntervals *LIS) const { 629 if (hasSameAVL(Other)) 630 return true; 631 return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS)); 632 } 633 634 bool hasSameAVLLatticeValue(const VSETVLIInfo &Other) const { 635 if (hasAVLReg() && Other.hasAVLReg()) { 636 assert(!getAVLVNInfo() == !Other.getAVLVNInfo() && 637 "we either have intervals or we don't"); 638 if (!getAVLVNInfo()) 639 return getAVLReg() == Other.getAVLReg(); 640 return getAVLVNInfo()->id == Other.getAVLVNInfo()->id && 641 getAVLReg() == Other.getAVLReg(); 642 } 643 644 if (hasAVLImm() && Other.hasAVLImm()) 645 return getAVLImm() == Other.getAVLImm(); 646 647 if (hasAVLVLMAX()) 648 return Other.hasAVLVLMAX() && hasSameVLMAX(Other); 649 650 return false; 651 } 652 653 // Return true if the two lattice values are guaranteed to have 654 // the same AVL value at runtime. 655 bool hasSameAVL(const VSETVLIInfo &Other) const { 656 // Without LiveIntervals, we don't know which instruction defines a 657 // register. Since a register may be redefined, this means all AVLIsReg 658 // states must be treated as possibly distinct. 659 if (hasAVLReg() && Other.hasAVLReg()) { 660 assert(!getAVLVNInfo() == !Other.getAVLVNInfo() && 661 "we either have intervals or we don't"); 662 if (!getAVLVNInfo()) 663 return false; 664 } 665 return hasSameAVLLatticeValue(Other); 666 } 667 668 void setVTYPE(unsigned VType) { 669 assert(isValid() && !isUnknown() && 670 "Can't set VTYPE for uninitialized or unknown"); 671 VLMul = RISCVVType::getVLMUL(VType); 672 SEW = RISCVVType::getSEW(VType); 673 TailAgnostic = RISCVVType::isTailAgnostic(VType); 674 MaskAgnostic = RISCVVType::isMaskAgnostic(VType); 675 } 676 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) { 677 assert(isValid() && !isUnknown() && 678 "Can't set VTYPE for uninitialized or unknown"); 679 VLMul = L; 680 SEW = S; 681 TailAgnostic = TA; 682 MaskAgnostic = MA; 683 } 684 685 void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; } 686 687 unsigned encodeVTYPE() const { 688 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && 689 "Can't encode VTYPE for uninitialized or unknown"); 690 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 691 } 692 693 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } 694 695 bool hasSameVTYPE(const VSETVLIInfo &Other) const { 696 assert(isValid() && Other.isValid() && 697 "Can't compare invalid VSETVLIInfos"); 698 assert(!isUnknown() && !Other.isUnknown() && 699 "Can't compare VTYPE in unknown state"); 700 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && 701 "Can't compare when only LMUL/SEW ratio is valid."); 702 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == 703 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, 704 Other.MaskAgnostic); 705 } 706 707 unsigned getSEWLMULRatio() const { 708 assert(isValid() && !isUnknown() && 709 "Can't use VTYPE for uninitialized or unknown"); 710 return RISCVVType::getSEWLMULRatio(SEW, VLMul); 711 } 712 713 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. 714 // Note that having the same VLMAX ensures that both share the same 715 // function from AVL to VL; that is, they must produce the same VL value 716 // for any given AVL value. 717 bool hasSameVLMAX(const VSETVLIInfo &Other) const { 718 assert(isValid() && Other.isValid() && 719 "Can't compare invalid VSETVLIInfos"); 720 assert(!isUnknown() && !Other.isUnknown() && 721 "Can't compare VTYPE in unknown state"); 722 return getSEWLMULRatio() == Other.getSEWLMULRatio(); 723 } 724 725 bool hasCompatibleVTYPE(const DemandedFields &Used, 726 const VSETVLIInfo &Require) const { 727 return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used); 728 } 729 730 // Determine whether the vector instructions requirements represented by 731 // Require are compatible with the previous vsetvli instruction represented 732 // by this. MI is the instruction whose requirements we're considering. 733 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require, 734 const LiveIntervals *LIS) const { 735 assert(isValid() && Require.isValid() && 736 "Can't compare invalid VSETVLIInfos"); 737 // Nothing is compatible with Unknown. 738 if (isUnknown() || Require.isUnknown()) 739 return false; 740 741 // If only our VLMAX ratio is valid, then this isn't compatible. 742 if (SEWLMULRatioOnly || Require.SEWLMULRatioOnly) 743 return false; 744 745 if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require))) 746 return false; 747 748 if (Used.VLZeroness && !hasEquallyZeroAVL(Require, LIS)) 749 return false; 750 751 return hasCompatibleVTYPE(Used, Require); 752 } 753 754 bool operator==(const VSETVLIInfo &Other) const { 755 // Uninitialized is only equal to another Uninitialized. 756 if (!isValid()) 757 return !Other.isValid(); 758 if (!Other.isValid()) 759 return !isValid(); 760 761 // Unknown is only equal to another Unknown. 762 if (isUnknown()) 763 return Other.isUnknown(); 764 if (Other.isUnknown()) 765 return isUnknown(); 766 767 if (!hasSameAVLLatticeValue(Other)) 768 return false; 769 770 // If the SEWLMULRatioOnly bits are different, then they aren't equal. 771 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly) 772 return false; 773 774 // If only the VLMAX is valid, check that it is the same. 775 if (SEWLMULRatioOnly) 776 return hasSameVLMAX(Other); 777 778 // If the full VTYPE is valid, check that it is the same. 779 return hasSameVTYPE(Other); 780 } 781 782 bool operator!=(const VSETVLIInfo &Other) const { 783 return !(*this == Other); 784 } 785 786 // Calculate the VSETVLIInfo visible to a block assuming this and Other are 787 // both predecessors. 788 VSETVLIInfo intersect(const VSETVLIInfo &Other) const { 789 // If the new value isn't valid, ignore it. 790 if (!Other.isValid()) 791 return *this; 792 793 // If this value isn't valid, this must be the first predecessor, use it. 794 if (!isValid()) 795 return Other; 796 797 // If either is unknown, the result is unknown. 798 if (isUnknown() || Other.isUnknown()) 799 return VSETVLIInfo::getUnknown(); 800 801 // If we have an exact, match return this. 802 if (*this == Other) 803 return *this; 804 805 // Not an exact match, but maybe the AVL and VLMAX are the same. If so, 806 // return an SEW/LMUL ratio only value. 807 if (hasSameAVL(Other) && hasSameVLMAX(Other)) { 808 VSETVLIInfo MergeInfo = *this; 809 MergeInfo.SEWLMULRatioOnly = true; 810 return MergeInfo; 811 } 812 813 // Otherwise the result is unknown. 814 return VSETVLIInfo::getUnknown(); 815 } 816 817 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 818 /// Support for debugging, callable in GDB: V->dump() 819 LLVM_DUMP_METHOD void dump() const { 820 print(dbgs()); 821 dbgs() << "\n"; 822 } 823 824 /// Implement operator<<. 825 /// @{ 826 void print(raw_ostream &OS) const { 827 OS << "{"; 828 if (!isValid()) 829 OS << "Uninitialized"; 830 if (isUnknown()) 831 OS << "unknown"; 832 if (hasAVLReg()) 833 OS << "AVLReg=" << llvm::printReg(getAVLReg()); 834 if (hasAVLImm()) 835 OS << "AVLImm=" << (unsigned)AVLImm; 836 if (hasAVLVLMAX()) 837 OS << "AVLVLMAX"; 838 OS << ", " 839 << "VLMul=" << (unsigned)VLMul << ", " 840 << "SEW=" << (unsigned)SEW << ", " 841 << "TailAgnostic=" << (bool)TailAgnostic << ", " 842 << "MaskAgnostic=" << (bool)MaskAgnostic << ", " 843 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}"; 844 } 845 #endif 846 }; 847 848 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 849 LLVM_ATTRIBUTE_USED 850 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) { 851 V.print(OS); 852 return OS; 853 } 854 #endif 855 856 struct BlockData { 857 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this 858 // block. Calculated in Phase 2. 859 VSETVLIInfo Exit; 860 861 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor 862 // blocks. Calculated in Phase 2, and used by Phase 3. 863 VSETVLIInfo Pred; 864 865 // Keeps track of whether the block is already in the queue. 866 bool InQueue = false; 867 868 BlockData() = default; 869 }; 870 871 class RISCVInsertVSETVLI : public MachineFunctionPass { 872 const RISCVSubtarget *ST; 873 const TargetInstrInfo *TII; 874 MachineRegisterInfo *MRI; 875 // Possibly null! 876 LiveIntervals *LIS; 877 878 std::vector<BlockData> BlockInfo; 879 std::queue<const MachineBasicBlock *> WorkList; 880 881 public: 882 static char ID; 883 884 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {} 885 bool runOnMachineFunction(MachineFunction &MF) override; 886 887 void getAnalysisUsage(AnalysisUsage &AU) const override { 888 AU.setPreservesCFG(); 889 890 AU.addUsedIfAvailable<LiveIntervalsWrapperPass>(); 891 AU.addPreserved<LiveIntervalsWrapperPass>(); 892 AU.addPreserved<SlotIndexesWrapperPass>(); 893 AU.addPreserved<LiveDebugVariables>(); 894 AU.addPreserved<LiveStacks>(); 895 896 MachineFunctionPass::getAnalysisUsage(AU); 897 } 898 899 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } 900 901 private: 902 bool needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require, 903 const VSETVLIInfo &CurInfo) const; 904 bool needVSETVLIPHI(const VSETVLIInfo &Require, 905 const MachineBasicBlock &MBB) const; 906 void insertVSETVLI(MachineBasicBlock &MBB, 907 MachineBasicBlock::iterator InsertPt, DebugLoc DL, 908 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); 909 910 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const; 911 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const; 912 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB, 913 VSETVLIInfo &Info) const; 914 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); 915 void emitVSETVLIs(MachineBasicBlock &MBB); 916 void doPRE(MachineBasicBlock &MBB); 917 void insertReadVL(MachineBasicBlock &MBB); 918 919 bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI, 920 const DemandedFields &Used) const; 921 void coalesceVSETVLIs(MachineBasicBlock &MBB) const; 922 923 VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const; 924 VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const; 925 void forwardVSETVLIAVL(VSETVLIInfo &Info) const; 926 }; 927 928 } // end anonymous namespace 929 930 char RISCVInsertVSETVLI::ID = 0; 931 char &llvm::RISCVInsertVSETVLIID = RISCVInsertVSETVLI::ID; 932 933 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, 934 false, false) 935 936 // If the AVL is defined by a vsetvli's output vl with the same VLMAX, we can 937 // replace the AVL operand with the AVL of the defining vsetvli. E.g. 938 // 939 // %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1 940 // $x0 = PseudoVSETVLI %vl:gpr, SEW=32, LMUL=M1 941 // -> 942 // %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1 943 // $x0 = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1 944 void RISCVInsertVSETVLI::forwardVSETVLIAVL(VSETVLIInfo &Info) const { 945 if (!Info.hasAVLReg()) 946 return; 947 const MachineInstr *DefMI = Info.getAVLDefMI(LIS); 948 if (!DefMI || !isVectorConfigInstr(*DefMI)) 949 return; 950 VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI); 951 if (!DefInstrInfo.hasSameVLMAX(Info)) 952 return; 953 Info.setAVL(DefInstrInfo); 954 } 955 956 // Return a VSETVLIInfo representing the changes made by this VSETVLI or 957 // VSETIVLI instruction. 958 VSETVLIInfo 959 RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const { 960 VSETVLIInfo NewInfo; 961 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { 962 NewInfo.setAVLImm(MI.getOperand(1).getImm()); 963 } else { 964 assert(MI.getOpcode() == RISCV::PseudoVSETVLI || 965 MI.getOpcode() == RISCV::PseudoVSETVLIX0); 966 Register AVLReg = MI.getOperand(1).getReg(); 967 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && 968 "Can't handle X0, X0 vsetvli yet"); 969 if (AVLReg == RISCV::X0) 970 NewInfo.setAVLVLMAX(); 971 else if (MI.getOperand(1).isUndef()) 972 // Otherwise use an AVL of 1 to avoid depending on previous vl. 973 NewInfo.setAVLImm(1); 974 else { 975 VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS); 976 NewInfo.setAVLRegDef(VNI, AVLReg); 977 } 978 } 979 NewInfo.setVTYPE(MI.getOperand(2).getImm()); 980 981 forwardVSETVLIAVL(NewInfo); 982 983 return NewInfo; 984 } 985 986 static unsigned computeVLMAX(unsigned VLEN, unsigned SEW, 987 RISCVII::VLMUL VLMul) { 988 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul); 989 if (Fractional) 990 VLEN = VLEN / LMul; 991 else 992 VLEN = VLEN * LMul; 993 return VLEN/SEW; 994 } 995 996 VSETVLIInfo 997 RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const { 998 VSETVLIInfo InstrInfo; 999 const uint64_t TSFlags = MI.getDesc().TSFlags; 1000 1001 bool TailAgnostic = true; 1002 bool MaskAgnostic = true; 1003 if (!hasUndefinedMergeOp(MI)) { 1004 // Start with undisturbed. 1005 TailAgnostic = false; 1006 MaskAgnostic = false; 1007 1008 // If there is a policy operand, use it. 1009 if (RISCVII::hasVecPolicyOp(TSFlags)) { 1010 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); 1011 uint64_t Policy = Op.getImm(); 1012 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && 1013 "Invalid Policy Value"); 1014 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; 1015 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; 1016 } 1017 1018 // Some pseudo instructions force a tail agnostic policy despite having a 1019 // tied def. 1020 if (RISCVII::doesForceTailAgnostic(TSFlags)) 1021 TailAgnostic = true; 1022 1023 if (!RISCVII::usesMaskPolicy(TSFlags)) 1024 MaskAgnostic = true; 1025 } 1026 1027 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); 1028 1029 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); 1030 // A Log2SEW of 0 is an operation on mask registers only. 1031 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 1032 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 1033 1034 if (RISCVII::hasVLOp(TSFlags)) { 1035 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 1036 if (VLOp.isImm()) { 1037 int64_t Imm = VLOp.getImm(); 1038 // Conver the VLMax sentintel to X0 register. 1039 if (Imm == RISCV::VLMaxSentinel) { 1040 // If we know the exact VLEN, see if we can use the constant encoding 1041 // for the VLMAX instead. This reduces register pressure slightly. 1042 const unsigned VLMAX = computeVLMAX(ST->getRealMaxVLen(), SEW, VLMul); 1043 if (ST->getRealMinVLen() == ST->getRealMaxVLen() && VLMAX <= 31) 1044 InstrInfo.setAVLImm(VLMAX); 1045 else 1046 InstrInfo.setAVLVLMAX(); 1047 } 1048 else 1049 InstrInfo.setAVLImm(Imm); 1050 } else if (VLOp.isUndef()) { 1051 // Otherwise use an AVL of 1 to avoid depending on previous vl. 1052 InstrInfo.setAVLImm(1); 1053 } else { 1054 VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS); 1055 InstrInfo.setAVLRegDef(VNI, VLOp.getReg()); 1056 } 1057 } else { 1058 assert(isScalarExtractInstr(MI)); 1059 // Pick a random value for state tracking purposes, will be ignored via 1060 // the demanded fields mechanism 1061 InstrInfo.setAVLImm(1); 1062 } 1063 #ifndef NDEBUG 1064 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) { 1065 assert(SEW == EEW && "Initial SEW doesn't match expected EEW"); 1066 } 1067 #endif 1068 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 1069 1070 forwardVSETVLIAVL(InstrInfo); 1071 1072 return InstrInfo; 1073 } 1074 1075 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, 1076 MachineBasicBlock::iterator InsertPt, DebugLoc DL, 1077 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { 1078 1079 ++NumInsertedVSETVL; 1080 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) { 1081 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same 1082 // VLMAX. 1083 if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { 1084 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 1085 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 1086 .addReg(RISCV::X0, RegState::Kill) 1087 .addImm(Info.encodeVTYPE()) 1088 .addReg(RISCV::VL, RegState::Implicit); 1089 if (LIS) 1090 LIS->InsertMachineInstrInMaps(*MI); 1091 return; 1092 } 1093 1094 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If 1095 // it has the same VLMAX we want and the last VL/VTYPE we observed is the 1096 // same, we can use the X0, X0 form. 1097 if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) { 1098 if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS); 1099 DefMI && isVectorConfigInstr(*DefMI)) { 1100 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 1101 if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) { 1102 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 1103 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 1104 .addReg(RISCV::X0, RegState::Kill) 1105 .addImm(Info.encodeVTYPE()) 1106 .addReg(RISCV::VL, RegState::Implicit); 1107 if (LIS) 1108 LIS->InsertMachineInstrInMaps(*MI); 1109 return; 1110 } 1111 } 1112 } 1113 } 1114 1115 if (Info.hasAVLImm()) { 1116 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) 1117 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 1118 .addImm(Info.getAVLImm()) 1119 .addImm(Info.encodeVTYPE()); 1120 if (LIS) 1121 LIS->InsertMachineInstrInMaps(*MI); 1122 return; 1123 } 1124 1125 if (Info.hasAVLVLMAX()) { 1126 Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); 1127 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 1128 .addReg(DestReg, RegState::Define | RegState::Dead) 1129 .addReg(RISCV::X0, RegState::Kill) 1130 .addImm(Info.encodeVTYPE()); 1131 if (LIS) { 1132 LIS->InsertMachineInstrInMaps(*MI); 1133 LIS->createAndComputeVirtRegInterval(DestReg); 1134 } 1135 return; 1136 } 1137 1138 Register AVLReg = Info.getAVLReg(); 1139 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass); 1140 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI)) 1141 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 1142 .addReg(AVLReg) 1143 .addImm(Info.encodeVTYPE()); 1144 if (LIS) { 1145 LIS->InsertMachineInstrInMaps(*MI); 1146 LiveInterval &LI = LIS->getInterval(AVLReg); 1147 SlotIndex SI = LIS->getInstructionIndex(*MI).getRegSlot(); 1148 // If the AVL value isn't live at MI, do a quick check to see if it's easily 1149 // extendable. Otherwise, we need to copy it. 1150 if (LI.getVNInfoBefore(SI) != Info.getAVLVNInfo()) { 1151 if (!LI.liveAt(SI) && LI.containsOneValue()) 1152 LIS->extendToIndices(LI, SI); 1153 else { 1154 Register AVLCopyReg = 1155 MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass); 1156 MachineBasicBlock::iterator II; 1157 if (Info.getAVLVNInfo()->isPHIDef()) 1158 II = LIS->getMBBFromIndex(Info.getAVLVNInfo()->def)->getFirstNonPHI(); 1159 else { 1160 II = LIS->getInstructionFromIndex(Info.getAVLVNInfo()->def); 1161 II = std::next(II); 1162 } 1163 assert(II.isValid()); 1164 auto AVLCopy = 1165 BuildMI(*II->getParent(), II, DL, TII->get(RISCV::COPY), AVLCopyReg) 1166 .addReg(AVLReg); 1167 LIS->InsertMachineInstrInMaps(*AVLCopy); 1168 MI->getOperand(1).setReg(AVLCopyReg); 1169 LIS->createAndComputeVirtRegInterval(AVLCopyReg); 1170 } 1171 } 1172 } 1173 } 1174 1175 /// Return true if a VSETVLI is required to transition from CurInfo to Require 1176 /// given a set of DemandedFields \p Used. 1177 bool RISCVInsertVSETVLI::needVSETVLI(const DemandedFields &Used, 1178 const VSETVLIInfo &Require, 1179 const VSETVLIInfo &CurInfo) const { 1180 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) 1181 return true; 1182 1183 if (CurInfo.isCompatible(Used, Require, LIS)) 1184 return false; 1185 1186 return true; 1187 } 1188 1189 // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we 1190 // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more 1191 // places. 1192 static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo, 1193 DemandedFields &Demanded) { 1194 VSETVLIInfo Info = NewInfo; 1195 1196 if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() && 1197 !PrevInfo.isUnknown()) { 1198 if (auto NewVLMul = RISCVVType::getSameRatioLMUL( 1199 PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW())) 1200 Info.setVLMul(*NewVLMul); 1201 Demanded.LMUL = DemandedFields::LMULEqual; 1202 } 1203 1204 return Info; 1205 } 1206 1207 // Given an incoming state reaching MI, minimally modifies that state so that it 1208 // is compatible with MI. The resulting state is guaranteed to be semantically 1209 // legal for MI, but may not be the state requested by MI. 1210 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, 1211 const MachineInstr &MI) const { 1212 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags)) 1213 return; 1214 1215 DemandedFields Demanded = getDemanded(MI, ST); 1216 1217 const VSETVLIInfo NewInfo = computeInfoForInstr(MI); 1218 assert(NewInfo.isValid() && !NewInfo.isUnknown()); 1219 if (Info.isValid() && !needVSETVLI(Demanded, NewInfo, Info)) 1220 return; 1221 1222 const VSETVLIInfo PrevInfo = Info; 1223 if (!Info.isValid() || Info.isUnknown()) 1224 Info = NewInfo; 1225 1226 const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded); 1227 1228 // If MI only demands that VL has the same zeroness, we only need to set the 1229 // AVL if the zeroness differs. This removes a vsetvli entirely if the types 1230 // match or allows use of cheaper avl preserving variant if VLMAX doesn't 1231 // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype" 1232 // variant, so we avoid the transform to prevent extending live range of an 1233 // avl register operand. 1234 // TODO: We can probably relax this for immediates. 1235 bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) && 1236 IncomingInfo.hasSameVLMAX(PrevInfo); 1237 if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero)) 1238 Info.setAVL(IncomingInfo); 1239 1240 Info.setVTYPE( 1241 ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info) 1242 .getVLMUL(), 1243 ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(), 1244 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later 1245 // if needed. 1246 (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() || 1247 IncomingInfo.getTailAgnostic(), 1248 (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() || 1249 IncomingInfo.getMaskAgnostic()); 1250 1251 // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep 1252 // the AVL. 1253 if (Info.hasSEWLMULRatioOnly()) { 1254 VSETVLIInfo RatiolessInfo = IncomingInfo; 1255 RatiolessInfo.setAVL(Info); 1256 Info = RatiolessInfo; 1257 } 1258 } 1259 1260 // Given a state with which we evaluated MI (see transferBefore above for why 1261 // this might be different that the state MI requested), modify the state to 1262 // reflect the changes MI might make. 1263 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, 1264 const MachineInstr &MI) const { 1265 if (isVectorConfigInstr(MI)) { 1266 Info = getInfoForVSETVLI(MI); 1267 return; 1268 } 1269 1270 if (RISCV::isFaultFirstLoad(MI)) { 1271 // Update AVL to vl-output of the fault first load. 1272 assert(MI.getOperand(1).getReg().isVirtual()); 1273 if (LIS) { 1274 auto &LI = LIS->getInterval(MI.getOperand(1).getReg()); 1275 SlotIndex SI = 1276 LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot(); 1277 VNInfo *VNI = LI.getVNInfoAt(SI); 1278 Info.setAVLRegDef(VNI, MI.getOperand(1).getReg()); 1279 } else 1280 Info.setAVLRegDef(nullptr, MI.getOperand(1).getReg()); 1281 return; 1282 } 1283 1284 // If this is something that updates VL/VTYPE that we don't know about, set 1285 // the state to unknown. 1286 if (MI.isCall() || MI.isInlineAsm() || 1287 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) || 1288 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr)) 1289 Info = VSETVLIInfo::getUnknown(); 1290 } 1291 1292 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB, 1293 VSETVLIInfo &Info) const { 1294 bool HadVectorOp = false; 1295 1296 Info = BlockInfo[MBB.getNumber()].Pred; 1297 for (const MachineInstr &MI : MBB) { 1298 transferBefore(Info, MI); 1299 1300 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags)) 1301 HadVectorOp = true; 1302 1303 transferAfter(Info, MI); 1304 } 1305 1306 return HadVectorOp; 1307 } 1308 1309 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { 1310 1311 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1312 1313 BBInfo.InQueue = false; 1314 1315 // Start with the previous entry so that we keep the most conservative state 1316 // we have ever found. 1317 VSETVLIInfo InInfo = BBInfo.Pred; 1318 if (MBB.pred_empty()) { 1319 // There are no predecessors, so use the default starting status. 1320 InInfo.setUnknown(); 1321 } else { 1322 for (MachineBasicBlock *P : MBB.predecessors()) 1323 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); 1324 } 1325 1326 // If we don't have any valid predecessor value, wait until we do. 1327 if (!InInfo.isValid()) 1328 return; 1329 1330 // If no change, no need to rerun block 1331 if (InInfo == BBInfo.Pred) 1332 return; 1333 1334 BBInfo.Pred = InInfo; 1335 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) 1336 << " changed to " << BBInfo.Pred << "\n"); 1337 1338 // Note: It's tempting to cache the state changes here, but due to the 1339 // compatibility checks performed a blocks output state can change based on 1340 // the input state. To cache, we'd have to add logic for finding 1341 // never-compatible state changes. 1342 VSETVLIInfo TmpStatus; 1343 computeVLVTYPEChanges(MBB, TmpStatus); 1344 1345 // If the new exit value matches the old exit value, we don't need to revisit 1346 // any blocks. 1347 if (BBInfo.Exit == TmpStatus) 1348 return; 1349 1350 BBInfo.Exit = TmpStatus; 1351 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB) 1352 << " changed to " << BBInfo.Exit << "\n"); 1353 1354 // Add the successors to the work list so we can propagate the changed exit 1355 // status. 1356 for (MachineBasicBlock *S : MBB.successors()) 1357 if (!BlockInfo[S->getNumber()].InQueue) { 1358 BlockInfo[S->getNumber()].InQueue = true; 1359 WorkList.push(S); 1360 } 1361 } 1362 1363 // If we weren't able to prove a vsetvli was directly unneeded, it might still 1364 // be unneeded if the AVL was a phi node where all incoming values are VL 1365 // outputs from the last VSETVLI in their respective basic blocks. 1366 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, 1367 const MachineBasicBlock &MBB) const { 1368 if (!Require.hasAVLReg()) 1369 return true; 1370 1371 if (!LIS) 1372 return true; 1373 1374 // We need the AVL to have been produced by a PHI node in this basic block. 1375 const VNInfo *Valno = Require.getAVLVNInfo(); 1376 if (!Valno->isPHIDef() || LIS->getMBBFromIndex(Valno->def) != &MBB) 1377 return true; 1378 1379 const LiveRange &LR = LIS->getInterval(Require.getAVLReg()); 1380 1381 for (auto *PBB : MBB.predecessors()) { 1382 const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit; 1383 1384 // We need the PHI input to the be the output of a VSET(I)VLI. 1385 const VNInfo *Value = LR.getVNInfoBefore(LIS->getMBBEndIdx(PBB)); 1386 if (!Value) 1387 return true; 1388 MachineInstr *DefMI = LIS->getInstructionFromIndex(Value->def); 1389 if (!DefMI || !isVectorConfigInstr(*DefMI)) 1390 return true; 1391 1392 // We found a VSET(I)VLI make sure it matches the output of the 1393 // predecessor block. 1394 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 1395 if (DefInfo != PBBExit) 1396 return true; 1397 1398 // Require has the same VL as PBBExit, so if the exit from the 1399 // predecessor has the VTYPE we are looking for we might be able 1400 // to avoid a VSETVLI. 1401 if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require)) 1402 return true; 1403 } 1404 1405 // If all the incoming values to the PHI checked out, we don't need 1406 // to insert a VSETVLI. 1407 return false; 1408 } 1409 1410 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { 1411 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred; 1412 // Track whether the prefix of the block we've scanned is transparent 1413 // (meaning has not yet changed the abstract state). 1414 bool PrefixTransparent = true; 1415 for (MachineInstr &MI : MBB) { 1416 const VSETVLIInfo PrevInfo = CurInfo; 1417 transferBefore(CurInfo, MI); 1418 1419 // If this is an explicit VSETVLI or VSETIVLI, update our state. 1420 if (isVectorConfigInstr(MI)) { 1421 // Conservatively, mark the VL and VTYPE as live. 1422 assert(MI.getOperand(3).getReg() == RISCV::VL && 1423 MI.getOperand(4).getReg() == RISCV::VTYPE && 1424 "Unexpected operands where VL and VTYPE should be"); 1425 MI.getOperand(3).setIsDead(false); 1426 MI.getOperand(4).setIsDead(false); 1427 PrefixTransparent = false; 1428 } 1429 1430 uint64_t TSFlags = MI.getDesc().TSFlags; 1431 if (RISCVII::hasSEWOp(TSFlags)) { 1432 if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) { 1433 // If this is the first implicit state change, and the state change 1434 // requested can be proven to produce the same register contents, we 1435 // can skip emitting the actual state change and continue as if we 1436 // had since we know the GPR result of the implicit state change 1437 // wouldn't be used and VL/VTYPE registers are correct. Note that 1438 // we *do* need to model the state as if it changed as while the 1439 // register contents are unchanged, the abstract model can change. 1440 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB)) 1441 insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo); 1442 PrefixTransparent = false; 1443 } 1444 1445 if (RISCVII::hasVLOp(TSFlags)) { 1446 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 1447 if (VLOp.isReg()) { 1448 Register Reg = VLOp.getReg(); 1449 1450 // Erase the AVL operand from the instruction. 1451 VLOp.setReg(RISCV::NoRegister); 1452 VLOp.setIsKill(false); 1453 if (LIS) { 1454 LiveInterval &LI = LIS->getInterval(Reg); 1455 SmallVector<MachineInstr *> DeadMIs; 1456 LIS->shrinkToUses(&LI, &DeadMIs); 1457 // We might have separate components that need split due to 1458 // needVSETVLIPHI causing us to skip inserting a new VL def. 1459 SmallVector<LiveInterval *> SplitLIs; 1460 LIS->splitSeparateComponents(LI, SplitLIs); 1461 1462 // If the AVL was an immediate > 31, then it would have been emitted 1463 // as an ADDI. However, the ADDI might not have been used in the 1464 // vsetvli, or a vsetvli might not have been emitted, so it may be 1465 // dead now. 1466 for (MachineInstr *DeadMI : DeadMIs) { 1467 if (!TII->isAddImmediate(*DeadMI, Reg)) 1468 continue; 1469 LIS->RemoveMachineInstrFromMaps(*DeadMI); 1470 DeadMI->eraseFromParent(); 1471 } 1472 } 1473 } 1474 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, 1475 /*isImp*/ true)); 1476 } 1477 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, 1478 /*isImp*/ true)); 1479 } 1480 1481 if (MI.isCall() || MI.isInlineAsm() || 1482 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) || 1483 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr)) 1484 PrefixTransparent = false; 1485 1486 transferAfter(CurInfo, MI); 1487 } 1488 1489 const auto &Info = BlockInfo[MBB.getNumber()]; 1490 if (CurInfo != Info.Exit) { 1491 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n"); 1492 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n"); 1493 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n"); 1494 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n"); 1495 } 1496 assert(CurInfo == Info.Exit && "InsertVSETVLI dataflow invariant violated"); 1497 } 1498 1499 /// Perform simple partial redundancy elimination of the VSETVLI instructions 1500 /// we're about to insert by looking for cases where we can PRE from the 1501 /// beginning of one block to the end of one of its predecessors. Specifically, 1502 /// this is geared to catch the common case of a fixed length vsetvl in a single 1503 /// block loop when it could execute once in the preheader instead. 1504 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { 1505 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) 1506 return; 1507 1508 MachineBasicBlock *UnavailablePred = nullptr; 1509 VSETVLIInfo AvailableInfo; 1510 for (MachineBasicBlock *P : MBB.predecessors()) { 1511 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; 1512 if (PredInfo.isUnknown()) { 1513 if (UnavailablePred) 1514 return; 1515 UnavailablePred = P; 1516 } else if (!AvailableInfo.isValid()) { 1517 AvailableInfo = PredInfo; 1518 } else if (AvailableInfo != PredInfo) { 1519 return; 1520 } 1521 } 1522 1523 // Unreachable, single pred, or full redundancy. Note that FRE is handled by 1524 // phase 3. 1525 if (!UnavailablePred || !AvailableInfo.isValid()) 1526 return; 1527 1528 if (!LIS) 1529 return; 1530 1531 // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of 1532 // the unavailable pred. 1533 if (AvailableInfo.hasSEWLMULRatioOnly()) 1534 return; 1535 1536 // Critical edge - TODO: consider splitting? 1537 if (UnavailablePred->succ_size() != 1) 1538 return; 1539 1540 // If the AVL value is a register (other than our VLMAX sentinel), 1541 // we need to prove the value is available at the point we're going 1542 // to insert the vsetvli at. 1543 if (AvailableInfo.hasAVLReg()) { 1544 SlotIndex SI = AvailableInfo.getAVLVNInfo()->def; 1545 // This is an inline dominance check which covers the case of 1546 // UnavailablePred being the preheader of a loop. 1547 if (LIS->getMBBFromIndex(SI) != UnavailablePred) 1548 return; 1549 if (!UnavailablePred->terminators().empty() && 1550 SI >= LIS->getInstructionIndex(*UnavailablePred->getFirstTerminator())) 1551 return; 1552 } 1553 1554 // Model the effect of changing the input state of the block MBB to 1555 // AvailableInfo. We're looking for two issues here; one legality, 1556 // one profitability. 1557 // 1) If the block doesn't use some of the fields from VL or VTYPE, we 1558 // may hit the end of the block with a different end state. We can 1559 // not make this change without reflowing later blocks as well. 1560 // 2) If we don't actually remove a transition, inserting a vsetvli 1561 // into the predecessor block would be correct, but unprofitable. 1562 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred; 1563 VSETVLIInfo CurInfo = AvailableInfo; 1564 int TransitionsRemoved = 0; 1565 for (const MachineInstr &MI : MBB) { 1566 const VSETVLIInfo LastInfo = CurInfo; 1567 const VSETVLIInfo LastOldInfo = OldInfo; 1568 transferBefore(CurInfo, MI); 1569 transferBefore(OldInfo, MI); 1570 if (CurInfo == LastInfo) 1571 TransitionsRemoved++; 1572 if (LastOldInfo == OldInfo) 1573 TransitionsRemoved--; 1574 transferAfter(CurInfo, MI); 1575 transferAfter(OldInfo, MI); 1576 if (CurInfo == OldInfo) 1577 // Convergence. All transitions after this must match by construction. 1578 break; 1579 } 1580 if (CurInfo != OldInfo || TransitionsRemoved <= 0) 1581 // Issues 1 and 2 above 1582 return; 1583 1584 // Finally, update both data flow state and insert the actual vsetvli. 1585 // Doing both keeps the code in sync with the dataflow results, which 1586 // is critical for correctness of phase 3. 1587 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit; 1588 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " 1589 << UnavailablePred->getName() << " with state " 1590 << AvailableInfo << "\n"); 1591 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; 1592 BlockInfo[MBB.getNumber()].Pred = AvailableInfo; 1593 1594 // Note there's an implicit assumption here that terminators never use 1595 // or modify VL or VTYPE. Also, fallthrough will return end(). 1596 auto InsertPt = UnavailablePred->getFirstInstrTerminator(); 1597 insertVSETVLI(*UnavailablePred, InsertPt, 1598 UnavailablePred->findDebugLoc(InsertPt), 1599 AvailableInfo, OldExit); 1600 } 1601 1602 // Return true if we can mutate PrevMI to match MI without changing any the 1603 // fields which would be observed. 1604 bool RISCVInsertVSETVLI::canMutatePriorConfig( 1605 const MachineInstr &PrevMI, const MachineInstr &MI, 1606 const DemandedFields &Used) const { 1607 // If the VL values aren't equal, return false if either a) the former is 1608 // demanded, or b) we can't rewrite the former to be the later for 1609 // implementation reasons. 1610 if (!isVLPreservingConfig(MI)) { 1611 if (Used.VLAny) 1612 return false; 1613 1614 if (Used.VLZeroness) { 1615 if (isVLPreservingConfig(PrevMI)) 1616 return false; 1617 if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI), 1618 LIS)) 1619 return false; 1620 } 1621 1622 auto &AVL = MI.getOperand(1); 1623 auto &PrevAVL = PrevMI.getOperand(1); 1624 1625 // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI. 1626 // For now just check that PrevMI uses the same virtual register. 1627 if (AVL.isReg() && AVL.getReg() != RISCV::X0 && 1628 (!MRI->hasOneDef(AVL.getReg()) || !PrevAVL.isReg() || 1629 PrevAVL.getReg() != AVL.getReg())) 1630 return false; 1631 } 1632 1633 assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm()); 1634 auto PriorVType = PrevMI.getOperand(2).getImm(); 1635 auto VType = MI.getOperand(2).getImm(); 1636 return areCompatibleVTYPEs(PriorVType, VType, Used); 1637 } 1638 1639 void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { 1640 MachineInstr *NextMI = nullptr; 1641 // We can have arbitrary code in successors, so VL and VTYPE 1642 // must be considered demanded. 1643 DemandedFields Used; 1644 Used.demandVL(); 1645 Used.demandVTYPE(); 1646 SmallVector<MachineInstr*> ToDelete; 1647 1648 // Update LIS and cleanup dead AVLs given a value which has 1649 // has had one use (as an AVL) removed. 1650 auto afterDroppedAVLUse = [&](Register OldVLReg) { 1651 if (LIS) 1652 LIS->shrinkToUses(&LIS->getInterval(OldVLReg)); 1653 1654 MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg); 1655 if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) && 1656 MRI->use_nodbg_empty(OldVLReg)) { 1657 if (LIS) { 1658 LIS->removeInterval(OldVLReg); 1659 LIS->RemoveMachineInstrFromMaps(*VLOpDef); 1660 } 1661 VLOpDef->eraseFromParent(); 1662 } 1663 }; 1664 1665 for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { 1666 1667 if (!isVectorConfigInstr(MI)) { 1668 Used.doUnion(getDemanded(MI, ST)); 1669 if (MI.isCall() || MI.isInlineAsm() || 1670 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) || 1671 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr)) 1672 NextMI = nullptr; 1673 continue; 1674 } 1675 1676 if (!MI.getOperand(0).isDead()) 1677 Used.demandVL(); 1678 1679 if (NextMI) { 1680 if (!Used.usedVL() && !Used.usedVTYPE()) { 1681 ToDelete.push_back(&MI); 1682 // Leave NextMI unchanged 1683 continue; 1684 } 1685 1686 if (canMutatePriorConfig(MI, *NextMI, Used)) { 1687 if (!isVLPreservingConfig(*NextMI)) { 1688 Register DefReg = NextMI->getOperand(0).getReg(); 1689 1690 MI.getOperand(0).setReg(DefReg); 1691 MI.getOperand(0).setIsDead(false); 1692 1693 // The def of DefReg moved to MI, so extend the LiveInterval up to 1694 // it. 1695 if (DefReg.isVirtual() && LIS) { 1696 LiveInterval &DefLI = LIS->getInterval(DefReg); 1697 SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot(); 1698 VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex()); 1699 LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI); 1700 DefLI.addSegment(S); 1701 DefVNI->def = MISlot; 1702 // Mark DefLI as spillable if it was previously unspillable 1703 DefLI.setWeight(0); 1704 1705 // DefReg may have had no uses, in which case we need to shrink 1706 // the LiveInterval up to MI. 1707 LIS->shrinkToUses(&DefLI); 1708 } 1709 1710 Register OldVLReg; 1711 if (MI.getOperand(1).isReg()) 1712 OldVLReg = MI.getOperand(1).getReg(); 1713 if (NextMI->getOperand(1).isImm()) 1714 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm()); 1715 else 1716 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false); 1717 if (OldVLReg && OldVLReg.isVirtual()) 1718 afterDroppedAVLUse(OldVLReg); 1719 1720 MI.setDesc(NextMI->getDesc()); 1721 } 1722 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm()); 1723 ToDelete.push_back(NextMI); 1724 // fallthrough 1725 } 1726 } 1727 NextMI = &MI; 1728 Used = getDemanded(MI, ST); 1729 } 1730 1731 NumCoalescedVSETVL += ToDelete.size(); 1732 for (auto *MI : ToDelete) { 1733 if (LIS) 1734 LIS->RemoveMachineInstrFromMaps(*MI); 1735 Register OldAVLReg; 1736 if (MI->getOperand(1).isReg()) 1737 OldAVLReg = MI->getOperand(1).getReg(); 1738 MI->eraseFromParent(); 1739 if (OldAVLReg && OldAVLReg.isVirtual()) 1740 afterDroppedAVLUse(OldAVLReg); 1741 } 1742 } 1743 1744 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { 1745 for (auto I = MBB.begin(), E = MBB.end(); I != E;) { 1746 MachineInstr &MI = *I++; 1747 if (RISCV::isFaultFirstLoad(MI)) { 1748 Register VLOutput = MI.getOperand(1).getReg(); 1749 assert(VLOutput.isVirtual()); 1750 if (!MI.getOperand(1).isDead()) { 1751 auto ReadVLMI = BuildMI(MBB, I, MI.getDebugLoc(), 1752 TII->get(RISCV::PseudoReadVL), VLOutput); 1753 // Move the LiveInterval's definition down to PseudoReadVL. 1754 if (LIS) { 1755 SlotIndex NewDefSI = 1756 LIS->InsertMachineInstrInMaps(*ReadVLMI).getRegSlot(); 1757 LiveInterval &DefLI = LIS->getInterval(VLOutput); 1758 VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex()); 1759 DefLI.removeSegment(DefLI.beginIndex(), NewDefSI); 1760 DefVNI->def = NewDefSI; 1761 } 1762 } 1763 // We don't use the vl output of the VLEFF/VLSEGFF anymore. 1764 MI.getOperand(1).setReg(RISCV::X0); 1765 } 1766 } 1767 } 1768 1769 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { 1770 // Skip if the vector extension is not enabled. 1771 ST = &MF.getSubtarget<RISCVSubtarget>(); 1772 if (!ST->hasVInstructions()) 1773 return false; 1774 1775 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n"); 1776 1777 TII = ST->getInstrInfo(); 1778 MRI = &MF.getRegInfo(); 1779 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); 1780 LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; 1781 1782 assert(BlockInfo.empty() && "Expect empty block infos"); 1783 BlockInfo.resize(MF.getNumBlockIDs()); 1784 1785 bool HaveVectorOp = false; 1786 1787 // Phase 1 - determine how VL/VTYPE are affected by the each block. 1788 for (const MachineBasicBlock &MBB : MF) { 1789 VSETVLIInfo TmpStatus; 1790 HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus); 1791 // Initial exit state is whatever change we found in the block. 1792 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1793 BBInfo.Exit = TmpStatus; 1794 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) 1795 << " is " << BBInfo.Exit << "\n"); 1796 1797 } 1798 1799 // If we didn't find any instructions that need VSETVLI, we're done. 1800 if (!HaveVectorOp) { 1801 BlockInfo.clear(); 1802 return false; 1803 } 1804 1805 // Phase 2 - determine the exit VL/VTYPE from each block. We add all 1806 // blocks to the list here, but will also add any that need to be revisited 1807 // during Phase 2 processing. 1808 for (const MachineBasicBlock &MBB : MF) { 1809 WorkList.push(&MBB); 1810 BlockInfo[MBB.getNumber()].InQueue = true; 1811 } 1812 while (!WorkList.empty()) { 1813 const MachineBasicBlock &MBB = *WorkList.front(); 1814 WorkList.pop(); 1815 computeIncomingVLVTYPE(MBB); 1816 } 1817 1818 // Perform partial redundancy elimination of vsetvli transitions. 1819 for (MachineBasicBlock &MBB : MF) 1820 doPRE(MBB); 1821 1822 // Phase 3 - add any vsetvli instructions needed in the block. Use the 1823 // Phase 2 information to avoid adding vsetvlis before the first vector 1824 // instruction in the block if the VL/VTYPE is satisfied by its 1825 // predecessors. 1826 for (MachineBasicBlock &MBB : MF) 1827 emitVSETVLIs(MBB); 1828 1829 // Now that all vsetvlis are explicit, go through and do block local 1830 // DSE and peephole based demanded fields based transforms. Note that 1831 // this *must* be done outside the main dataflow so long as we allow 1832 // any cross block analysis within the dataflow. We can't have both 1833 // demanded fields based mutation and non-local analysis in the 1834 // dataflow at the same time without introducing inconsistencies. 1835 for (MachineBasicBlock &MBB : MF) 1836 coalesceVSETVLIs(MBB); 1837 1838 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output 1839 // of VLEFF/VLSEGFF. 1840 for (MachineBasicBlock &MBB : MF) 1841 insertReadVL(MBB); 1842 1843 BlockInfo.clear(); 1844 return HaveVectorOp; 1845 } 1846 1847 /// Returns an instance of the Insert VSETVLI pass. 1848 FunctionPass *llvm::createRISCVInsertVSETVLIPass() { 1849 return new RISCVInsertVSETVLI(); 1850 } 1851