1 //===-- RISCVInsertWriteVXRM.cpp - Insert Write of RISC-V VXRM CSR --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass inserts writes to the VXRM CSR as needed by vector instructions. 10 // Each instruction that uses VXRM carries an operand that contains its required 11 // VXRM value. This pass tries to optimize placement to avoid redundant writes 12 // to VXRM. 13 // 14 // This is done using 2 dataflow algorithms. The first is a forward data flow 15 // to calculate where a VXRM value is available. The second is a backwards 16 // dataflow to determine where a VXRM value is anticipated. 17 // 18 // Finally, we use the results of these two dataflows to insert VXRM writes 19 // where a value is anticipated, but not available. 20 // 21 // FIXME: This pass does not split critical edges, so there can still be some 22 // redundancy. 23 // 24 // FIXME: If we are willing to have writes that aren't always needed, we could 25 // reduce the number of VXRM writes in some cases. 26 //===----------------------------------------------------------------------===// 27 28 #include "MCTargetDesc/RISCVBaseInfo.h" 29 #include "RISCV.h" 30 #include "RISCVSubtarget.h" 31 #include "llvm/CodeGen/MachineFunctionPass.h" 32 #include <queue> 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "riscv-insert-write-vxrm" 37 #define RISCV_INSERT_WRITE_VXRM_NAME "RISC-V Insert Write VXRM Pass" 38 39 namespace { 40 41 class VXRMInfo { 42 uint8_t VXRMImm = 0; 43 44 enum : uint8_t { 45 Uninitialized, 46 Static, 47 Unknown, 48 } State = Uninitialized; 49 50 public: 51 VXRMInfo() {} 52 53 static VXRMInfo getUnknown() { 54 VXRMInfo Info; 55 Info.setUnknown(); 56 return Info; 57 } 58 59 bool isValid() const { return State != Uninitialized; } 60 void setUnknown() { State = Unknown; } 61 bool isUnknown() const { return State == Unknown; } 62 63 bool isStatic() const { return State == Static; } 64 65 void setVXRMImm(unsigned Imm) { 66 assert(Imm <= 3 && "Unexpected VXRM value"); 67 VXRMImm = Imm; 68 State = Static; 69 } 70 unsigned getVXRMImm() const { 71 assert(isStatic() && VXRMImm <= 3 && "Unexpected state"); 72 return VXRMImm; 73 } 74 75 bool operator==(const VXRMInfo &Other) const { 76 // Uninitialized is only equal to another Uninitialized. 77 if (State != Other.State) 78 return false; 79 80 if (isStatic()) 81 return VXRMImm == Other.VXRMImm; 82 83 assert((isValid() || isUnknown()) && "Unexpected state"); 84 return true; 85 } 86 87 bool operator!=(const VXRMInfo &Other) const { return !(*this == Other); } 88 89 // Calculate the VXRMInfo visible to a block assuming this and Other are 90 // both predecessors. 91 VXRMInfo intersect(const VXRMInfo &Other) const { 92 // If the new value isn't valid, ignore it. 93 if (!Other.isValid()) 94 return *this; 95 96 // If this value isn't valid, this must be the first predecessor, use it. 97 if (!isValid()) 98 return Other; 99 100 // If either is unknown, the result is unknown. 101 if (isUnknown() || Other.isUnknown()) 102 return VXRMInfo::getUnknown(); 103 104 // If we have an exact match, return this. 105 if (*this == Other) 106 return *this; 107 108 // Otherwise the result is unknown. 109 return VXRMInfo::getUnknown(); 110 } 111 112 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 113 /// Support for debugging, callable in GDB: V->dump() 114 LLVM_DUMP_METHOD void dump() const { 115 print(dbgs()); 116 dbgs() << "\n"; 117 } 118 119 void print(raw_ostream &OS) const { 120 OS << '{'; 121 if (!isValid()) 122 OS << "Uninitialized"; 123 else if (isUnknown()) 124 OS << "Unknown"; 125 else 126 OS << getVXRMImm(); 127 OS << '}'; 128 } 129 #endif 130 }; 131 132 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 133 LLVM_ATTRIBUTE_USED 134 inline raw_ostream &operator<<(raw_ostream &OS, const VXRMInfo &V) { 135 V.print(OS); 136 return OS; 137 } 138 #endif 139 140 struct BlockData { 141 // Indicates if the block uses VXRM. Uninitialized means no use. 142 VXRMInfo VXRMUse; 143 144 // Indicates the VXRM output from the block. Unitialized means transparent. 145 VXRMInfo VXRMOut; 146 147 // Keeps track of the available VXRM value at the start of the basic bloc. 148 VXRMInfo AvailableIn; 149 150 // Keeps track of the available VXRM value at the end of the basic block. 151 VXRMInfo AvailableOut; 152 153 // Keeps track of what VXRM is anticipated at the start of the basic block. 154 VXRMInfo AnticipatedIn; 155 156 // Keeps track of what VXRM is anticipated at the end of the basic block. 157 VXRMInfo AnticipatedOut; 158 159 // Keeps track of whether the block is already in the queue. 160 bool InQueue; 161 162 BlockData() = default; 163 }; 164 165 class RISCVInsertWriteVXRM : public MachineFunctionPass { 166 const TargetInstrInfo *TII; 167 168 std::vector<BlockData> BlockInfo; 169 std::queue<const MachineBasicBlock *> WorkList; 170 171 public: 172 static char ID; 173 174 RISCVInsertWriteVXRM() : MachineFunctionPass(ID) {} 175 176 bool runOnMachineFunction(MachineFunction &MF) override; 177 178 void getAnalysisUsage(AnalysisUsage &AU) const override { 179 AU.setPreservesCFG(); 180 MachineFunctionPass::getAnalysisUsage(AU); 181 } 182 183 StringRef getPassName() const override { 184 return RISCV_INSERT_WRITE_VXRM_NAME; 185 } 186 187 private: 188 bool computeVXRMChanges(const MachineBasicBlock &MBB); 189 void computeAvailable(const MachineBasicBlock &MBB); 190 void computeAnticipated(const MachineBasicBlock &MBB); 191 void emitWriteVXRM(MachineBasicBlock &MBB); 192 }; 193 194 } // end anonymous namespace 195 196 char RISCVInsertWriteVXRM::ID = 0; 197 198 INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME, 199 false, false) 200 201 static bool ignoresVXRM(const MachineInstr &MI) { 202 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 203 default: 204 return false; 205 case RISCV::VNCLIP_WI: 206 case RISCV::VNCLIPU_WI: 207 return MI.getOperand(3).getImm() == 0; 208 } 209 } 210 211 bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) { 212 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 213 214 bool NeedVXRMWrite = false; 215 for (const MachineInstr &MI : MBB) { 216 int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc()); 217 if (VXRMIdx >= 0 && !ignoresVXRM(MI)) { 218 unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm(); 219 220 if (!BBInfo.VXRMUse.isValid()) 221 BBInfo.VXRMUse.setVXRMImm(NewVXRMImm); 222 223 BBInfo.VXRMOut.setVXRMImm(NewVXRMImm); 224 NeedVXRMWrite = true; 225 continue; 226 } 227 228 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VXRM)) { 229 if (!BBInfo.VXRMUse.isValid()) 230 BBInfo.VXRMUse.setUnknown(); 231 232 BBInfo.VXRMOut.setUnknown(); 233 } 234 } 235 236 return NeedVXRMWrite; 237 } 238 239 void RISCVInsertWriteVXRM::computeAvailable(const MachineBasicBlock &MBB) { 240 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 241 242 BBInfo.InQueue = false; 243 244 VXRMInfo Available; 245 if (MBB.pred_empty()) { 246 Available.setUnknown(); 247 } else { 248 for (const MachineBasicBlock *P : MBB.predecessors()) 249 Available = Available.intersect(BlockInfo[P->getNumber()].AvailableOut); 250 } 251 252 // If we don't have any valid available info, wait until we do. 253 if (!Available.isValid()) 254 return; 255 256 if (Available != BBInfo.AvailableIn) { 257 BBInfo.AvailableIn = Available; 258 LLVM_DEBUG(dbgs() << "AvailableIn state of " << printMBBReference(MBB) 259 << " changed to " << BBInfo.AvailableIn << "\n"); 260 } 261 262 if (BBInfo.VXRMOut.isValid()) 263 Available = BBInfo.VXRMOut; 264 265 if (Available == BBInfo.AvailableOut) 266 return; 267 268 BBInfo.AvailableOut = Available; 269 LLVM_DEBUG(dbgs() << "AvailableOut state of " << printMBBReference(MBB) 270 << " changed to " << BBInfo.AvailableOut << "\n"); 271 272 // Add the successors to the work list so that we can propagate. 273 for (MachineBasicBlock *S : MBB.successors()) { 274 if (!BlockInfo[S->getNumber()].InQueue) { 275 BlockInfo[S->getNumber()].InQueue = true; 276 WorkList.push(S); 277 } 278 } 279 } 280 281 void RISCVInsertWriteVXRM::computeAnticipated(const MachineBasicBlock &MBB) { 282 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 283 284 BBInfo.InQueue = false; 285 286 VXRMInfo Anticipated; 287 if (MBB.succ_empty()) { 288 Anticipated.setUnknown(); 289 } else { 290 for (const MachineBasicBlock *S : MBB.successors()) 291 Anticipated = 292 Anticipated.intersect(BlockInfo[S->getNumber()].AnticipatedIn); 293 } 294 295 // If we don't have any valid anticipated info, wait until we do. 296 if (!Anticipated.isValid()) 297 return; 298 299 if (Anticipated != BBInfo.AnticipatedOut) { 300 BBInfo.AnticipatedOut = Anticipated; 301 LLVM_DEBUG(dbgs() << "AnticipatedOut state of " << printMBBReference(MBB) 302 << " changed to " << BBInfo.AnticipatedOut << "\n"); 303 } 304 305 // If this block reads VXRM, copy it. 306 if (BBInfo.VXRMUse.isValid()) 307 Anticipated = BBInfo.VXRMUse; 308 309 if (Anticipated == BBInfo.AnticipatedIn) 310 return; 311 312 BBInfo.AnticipatedIn = Anticipated; 313 LLVM_DEBUG(dbgs() << "AnticipatedIn state of " << printMBBReference(MBB) 314 << " changed to " << BBInfo.AnticipatedIn << "\n"); 315 316 // Add the predecessors to the work list so that we can propagate. 317 for (MachineBasicBlock *P : MBB.predecessors()) { 318 if (!BlockInfo[P->getNumber()].InQueue) { 319 BlockInfo[P->getNumber()].InQueue = true; 320 WorkList.push(P); 321 } 322 } 323 } 324 325 void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) { 326 const BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 327 328 VXRMInfo Info = BBInfo.AvailableIn; 329 330 // Flag to indicates we need to insert a VXRM write. We want to delay it as 331 // late as possible in this block. 332 bool PendingInsert = false; 333 334 // Insert VXRM write if anticipated and not available. 335 if (BBInfo.AnticipatedIn.isStatic()) { 336 // If this is the entry block and the value is anticipated, insert. 337 if (MBB.isEntryBlock()) { 338 PendingInsert = true; 339 } else { 340 // Search for any predecessors that wouldn't satisfy our requirement and 341 // insert a write VXRM if needed. 342 // NOTE: If one predecessor is able to provide the requirement, but 343 // another isn't, it means we have a critical edge. The better placement 344 // would be to split the critical edge. 345 for (MachineBasicBlock *P : MBB.predecessors()) { 346 const BlockData &PInfo = BlockInfo[P->getNumber()]; 347 // If it's available out of the predecessor, then we're ok. 348 if (PInfo.AvailableOut.isStatic() && 349 PInfo.AvailableOut.getVXRMImm() == 350 BBInfo.AnticipatedIn.getVXRMImm()) 351 continue; 352 // If the predecessor anticipates this value for all its succesors, 353 // then a write to VXRM would have already occured before this block is 354 // executed. 355 if (PInfo.AnticipatedOut.isStatic() && 356 PInfo.AnticipatedOut.getVXRMImm() == 357 BBInfo.AnticipatedIn.getVXRMImm()) 358 continue; 359 PendingInsert = true; 360 break; 361 } 362 } 363 364 Info = BBInfo.AnticipatedIn; 365 } 366 367 for (MachineInstr &MI : MBB) { 368 int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc()); 369 if (VXRMIdx >= 0 && !ignoresVXRM(MI)) { 370 unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm(); 371 372 if (PendingInsert || !Info.isStatic() || 373 Info.getVXRMImm() != NewVXRMImm) { 374 assert((!PendingInsert || 375 (Info.isStatic() && Info.getVXRMImm() == NewVXRMImm)) && 376 "Pending VXRM insertion mismatch"); 377 LLVM_DEBUG(dbgs() << "Inserting before "; MI.print(dbgs())); 378 BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteVXRMImm)) 379 .addImm(NewVXRMImm); 380 PendingInsert = false; 381 } 382 383 MI.addOperand(MachineOperand::CreateReg(RISCV::VXRM, /*IsDef*/ false, 384 /*IsImp*/ true)); 385 Info.setVXRMImm(NewVXRMImm); 386 continue; 387 } 388 389 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VXRM)) 390 Info.setUnknown(); 391 } 392 393 // If all our successors anticipate a value, do the insert. 394 // NOTE: It's possible that not all predecessors of our successor provide the 395 // correct value. This can occur on critical edges. If we don't split the 396 // critical edge we'll also have a write vxrm in the succesor that is 397 // redundant with this one. 398 if (PendingInsert || 399 (BBInfo.AnticipatedOut.isStatic() && 400 (!Info.isStatic() || 401 Info.getVXRMImm() != BBInfo.AnticipatedOut.getVXRMImm()))) { 402 assert((!PendingInsert || 403 (Info.isStatic() && BBInfo.AnticipatedOut.isStatic() && 404 Info.getVXRMImm() == BBInfo.AnticipatedOut.getVXRMImm())) && 405 "Pending VXRM insertion mismatch"); 406 LLVM_DEBUG(dbgs() << "Inserting at end of " << printMBBReference(MBB) 407 << " changing to " << BBInfo.AnticipatedOut << "\n"); 408 BuildMI(MBB, MBB.getFirstTerminator(), DebugLoc(), 409 TII->get(RISCV::WriteVXRMImm)) 410 .addImm(BBInfo.AnticipatedOut.getVXRMImm()); 411 } 412 } 413 414 bool RISCVInsertWriteVXRM::runOnMachineFunction(MachineFunction &MF) { 415 // Skip if the vector extension is not enabled. 416 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); 417 if (!ST.hasVInstructions()) 418 return false; 419 420 TII = ST.getInstrInfo(); 421 422 assert(BlockInfo.empty() && "Expect empty block infos"); 423 BlockInfo.resize(MF.getNumBlockIDs()); 424 425 // Phase 1 - collect block information. 426 bool NeedVXRMChange = false; 427 for (const MachineBasicBlock &MBB : MF) 428 NeedVXRMChange |= computeVXRMChanges(MBB); 429 430 if (!NeedVXRMChange) { 431 BlockInfo.clear(); 432 return false; 433 } 434 435 // Phase 2 - Compute available VXRM using a forward walk. 436 for (const MachineBasicBlock &MBB : MF) { 437 WorkList.push(&MBB); 438 BlockInfo[MBB.getNumber()].InQueue = true; 439 } 440 while (!WorkList.empty()) { 441 const MachineBasicBlock &MBB = *WorkList.front(); 442 WorkList.pop(); 443 computeAvailable(MBB); 444 } 445 446 // Phase 3 - Compute anticipated VXRM using a backwards walk. 447 for (const MachineBasicBlock &MBB : llvm::reverse(MF)) { 448 WorkList.push(&MBB); 449 BlockInfo[MBB.getNumber()].InQueue = true; 450 } 451 while (!WorkList.empty()) { 452 const MachineBasicBlock &MBB = *WorkList.front(); 453 WorkList.pop(); 454 computeAnticipated(MBB); 455 } 456 457 // Phase 4 - Emit VXRM writes at the earliest place possible. 458 for (MachineBasicBlock &MBB : MF) 459 emitWriteVXRM(MBB); 460 461 BlockInfo.clear(); 462 463 return true; 464 } 465 466 FunctionPass *llvm::createRISCVInsertWriteVXRMPass() { 467 return new RISCVInsertWriteVXRM(); 468 } 469