1 //===-- RISCVInsertWriteVXRM.cpp - Insert Write of RISC-V VXRM CSR --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass inserts writes to the VXRM CSR as needed by vector instructions. 10 // Each instruction that uses VXRM carries an operand that contains its required 11 // VXRM value. This pass tries to optimize placement to avoid redundant writes 12 // to VXRM. 13 // 14 // This is done using 2 dataflow algorithms. The first is a forward data flow 15 // to calculate where a VXRM value is available. The second is a backwards 16 // dataflow to determine where a VXRM value is anticipated. 17 // 18 // Finally, we use the results of these two dataflows to insert VXRM writes 19 // where a value is anticipated, but not available. 20 // 21 // FIXME: This pass does not split critical edges, so there can still be some 22 // redundancy. 23 // 24 // FIXME: If we are willing to have writes that aren't always needed, we could 25 // reduce the number of VXRM writes in some cases. 26 //===----------------------------------------------------------------------===// 27 28 #include "MCTargetDesc/RISCVBaseInfo.h" 29 #include "RISCV.h" 30 #include "RISCVSubtarget.h" 31 #include "llvm/CodeGen/MachineFunctionPass.h" 32 #include <queue> 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "riscv-insert-write-vxrm" 37 #define RISCV_INSERT_WRITE_VXRM_NAME "RISC-V Insert Write VXRM Pass" 38 39 namespace { 40 41 class VXRMInfo { 42 uint8_t VXRMImm = 0; 43 44 enum : uint8_t { 45 Uninitialized, 46 Static, 47 Unknown, 48 } State = Uninitialized; 49 50 public: 51 VXRMInfo() {} 52 53 static VXRMInfo getUnknown() { 54 VXRMInfo Info; 55 Info.setUnknown(); 56 return Info; 57 } 58 59 bool isValid() const { return State != Uninitialized; } 60 void setUnknown() { State = Unknown; } 61 bool isUnknown() const { return State == Unknown; } 62 63 bool isStatic() const { return State == Static; } 64 65 void setVXRMImm(unsigned Imm) { 66 assert(Imm <= 3 && "Unexpected VXRM value"); 67 VXRMImm = Imm; 68 State = Static; 69 } 70 unsigned getVXRMImm() const { 71 assert(isStatic() && VXRMImm <= 3 && "Unexpected state"); 72 return VXRMImm; 73 } 74 75 bool operator==(const VXRMInfo &Other) const { 76 // Uninitialized is only equal to another Uninitialized. 77 if (State != Other.State) 78 return false; 79 80 if (isStatic()) 81 return VXRMImm == Other.VXRMImm; 82 83 assert((isValid() || isUnknown()) && "Unexpected state"); 84 return true; 85 } 86 87 bool operator!=(const VXRMInfo &Other) const { return !(*this == Other); } 88 89 // Calculate the VXRMInfo visible to a block assuming this and Other are 90 // both predecessors. 91 VXRMInfo intersect(const VXRMInfo &Other) const { 92 // If the new value isn't valid, ignore it. 93 if (!Other.isValid()) 94 return *this; 95 96 // If this value isn't valid, this must be the first predecessor, use it. 97 if (!isValid()) 98 return Other; 99 100 // If either is unknown, the result is unknown. 101 if (isUnknown() || Other.isUnknown()) 102 return VXRMInfo::getUnknown(); 103 104 // If we have an exact match, return this. 105 if (*this == Other) 106 return *this; 107 108 // Otherwise the result is unknown. 109 return VXRMInfo::getUnknown(); 110 } 111 112 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 113 /// Support for debugging, callable in GDB: V->dump() 114 LLVM_DUMP_METHOD void dump() const { 115 print(dbgs()); 116 dbgs() << "\n"; 117 } 118 119 void print(raw_ostream &OS) const { 120 OS << '{'; 121 if (!isValid()) 122 OS << "Uninitialized"; 123 else if (isUnknown()) 124 OS << "Unknown"; 125 else 126 OS << getVXRMImm(); 127 OS << '}'; 128 } 129 #endif 130 }; 131 132 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 133 LLVM_ATTRIBUTE_USED 134 inline raw_ostream &operator<<(raw_ostream &OS, const VXRMInfo &V) { 135 V.print(OS); 136 return OS; 137 } 138 #endif 139 140 struct BlockData { 141 // Indicates if the block uses VXRM. Uninitialized means no use. 142 VXRMInfo VXRMUse; 143 144 // Indicates the VXRM output from the block. Unitialized means transparent. 145 VXRMInfo VXRMOut; 146 147 // Keeps track of the available VXRM value at the start of the basic bloc. 148 VXRMInfo AvailableIn; 149 150 // Keeps track of the available VXRM value at the end of the basic block. 151 VXRMInfo AvailableOut; 152 153 // Keeps track of what VXRM is anticipated at the start of the basic block. 154 VXRMInfo AnticipatedIn; 155 156 // Keeps track of what VXRM is anticipated at the end of the basic block. 157 VXRMInfo AnticipatedOut; 158 159 // Keeps track of whether the block is already in the queue. 160 bool InQueue; 161 162 BlockData() = default; 163 }; 164 165 class RISCVInsertWriteVXRM : public MachineFunctionPass { 166 const TargetInstrInfo *TII; 167 168 std::vector<BlockData> BlockInfo; 169 std::queue<const MachineBasicBlock *> WorkList; 170 171 public: 172 static char ID; 173 174 RISCVInsertWriteVXRM() : MachineFunctionPass(ID) {} 175 176 bool runOnMachineFunction(MachineFunction &MF) override; 177 178 void getAnalysisUsage(AnalysisUsage &AU) const override { 179 AU.setPreservesCFG(); 180 MachineFunctionPass::getAnalysisUsage(AU); 181 } 182 183 StringRef getPassName() const override { 184 return RISCV_INSERT_WRITE_VXRM_NAME; 185 } 186 187 private: 188 bool computeVXRMChanges(const MachineBasicBlock &MBB); 189 void computeAvailable(const MachineBasicBlock &MBB); 190 void computeAnticipated(const MachineBasicBlock &MBB); 191 void emitWriteVXRM(MachineBasicBlock &MBB); 192 }; 193 194 } // end anonymous namespace 195 196 char RISCVInsertWriteVXRM::ID = 0; 197 198 INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME, 199 false, false) 200 201 static bool ignoresVXRM(const MachineInstr &MI) { 202 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 203 default: 204 return false; 205 case RISCV::VNCLIP_WI: 206 case RISCV::VNCLIPU_WI: 207 return MI.getOperand(3).getImm() == 0; 208 } 209 } 210 211 bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) { 212 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 213 214 bool NeedVXRMWrite = false; 215 for (const MachineInstr &MI : MBB) { 216 int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc()); 217 if (VXRMIdx >= 0 && !ignoresVXRM(MI)) { 218 unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm(); 219 220 if (!BBInfo.VXRMUse.isValid()) 221 BBInfo.VXRMUse.setVXRMImm(NewVXRMImm); 222 223 BBInfo.VXRMOut.setVXRMImm(NewVXRMImm); 224 NeedVXRMWrite = true; 225 continue; 226 } 227 228 if (MI.isCall() || MI.isInlineAsm() || 229 MI.modifiesRegister(RISCV::VXRM, /*TRI=*/nullptr)) { 230 if (!BBInfo.VXRMUse.isValid()) 231 BBInfo.VXRMUse.setUnknown(); 232 233 BBInfo.VXRMOut.setUnknown(); 234 } 235 } 236 237 return NeedVXRMWrite; 238 } 239 240 void RISCVInsertWriteVXRM::computeAvailable(const MachineBasicBlock &MBB) { 241 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 242 243 BBInfo.InQueue = false; 244 245 VXRMInfo Available; 246 if (MBB.pred_empty()) { 247 Available.setUnknown(); 248 } else { 249 for (const MachineBasicBlock *P : MBB.predecessors()) 250 Available = Available.intersect(BlockInfo[P->getNumber()].AvailableOut); 251 } 252 253 // If we don't have any valid available info, wait until we do. 254 if (!Available.isValid()) 255 return; 256 257 if (Available != BBInfo.AvailableIn) { 258 BBInfo.AvailableIn = Available; 259 LLVM_DEBUG(dbgs() << "AvailableIn state of " << printMBBReference(MBB) 260 << " changed to " << BBInfo.AvailableIn << "\n"); 261 } 262 263 if (BBInfo.VXRMOut.isValid()) 264 Available = BBInfo.VXRMOut; 265 266 if (Available == BBInfo.AvailableOut) 267 return; 268 269 BBInfo.AvailableOut = Available; 270 LLVM_DEBUG(dbgs() << "AvailableOut state of " << printMBBReference(MBB) 271 << " changed to " << BBInfo.AvailableOut << "\n"); 272 273 // Add the successors to the work list so that we can propagate. 274 for (MachineBasicBlock *S : MBB.successors()) { 275 if (!BlockInfo[S->getNumber()].InQueue) { 276 BlockInfo[S->getNumber()].InQueue = true; 277 WorkList.push(S); 278 } 279 } 280 } 281 282 void RISCVInsertWriteVXRM::computeAnticipated(const MachineBasicBlock &MBB) { 283 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 284 285 BBInfo.InQueue = false; 286 287 VXRMInfo Anticipated; 288 if (MBB.succ_empty()) { 289 Anticipated.setUnknown(); 290 } else { 291 for (const MachineBasicBlock *S : MBB.successors()) 292 Anticipated = 293 Anticipated.intersect(BlockInfo[S->getNumber()].AnticipatedIn); 294 } 295 296 // If we don't have any valid anticipated info, wait until we do. 297 if (!Anticipated.isValid()) 298 return; 299 300 if (Anticipated != BBInfo.AnticipatedOut) { 301 BBInfo.AnticipatedOut = Anticipated; 302 LLVM_DEBUG(dbgs() << "AnticipatedOut state of " << printMBBReference(MBB) 303 << " changed to " << BBInfo.AnticipatedOut << "\n"); 304 } 305 306 // If this block reads VXRM, copy it. 307 if (BBInfo.VXRMUse.isValid()) 308 Anticipated = BBInfo.VXRMUse; 309 310 if (Anticipated == BBInfo.AnticipatedIn) 311 return; 312 313 BBInfo.AnticipatedIn = Anticipated; 314 LLVM_DEBUG(dbgs() << "AnticipatedIn state of " << printMBBReference(MBB) 315 << " changed to " << BBInfo.AnticipatedIn << "\n"); 316 317 // Add the predecessors to the work list so that we can propagate. 318 for (MachineBasicBlock *P : MBB.predecessors()) { 319 if (!BlockInfo[P->getNumber()].InQueue) { 320 BlockInfo[P->getNumber()].InQueue = true; 321 WorkList.push(P); 322 } 323 } 324 } 325 326 void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) { 327 const BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 328 329 VXRMInfo Info = BBInfo.AvailableIn; 330 331 // Flag to indicates we need to insert a VXRM write. We want to delay it as 332 // late as possible in this block. 333 bool PendingInsert = false; 334 335 // Insert VXRM write if anticipated and not available. 336 if (BBInfo.AnticipatedIn.isStatic()) { 337 // If this is the entry block and the value is anticipated, insert. 338 if (MBB.isEntryBlock()) { 339 PendingInsert = true; 340 } else { 341 // Search for any predecessors that wouldn't satisfy our requirement and 342 // insert a write VXRM if needed. 343 // NOTE: If one predecessor is able to provide the requirement, but 344 // another isn't, it means we have a critical edge. The better placement 345 // would be to split the critical edge. 346 for (MachineBasicBlock *P : MBB.predecessors()) { 347 const BlockData &PInfo = BlockInfo[P->getNumber()]; 348 // If it's available out of the predecessor, then we're ok. 349 if (PInfo.AvailableOut.isStatic() && 350 PInfo.AvailableOut.getVXRMImm() == 351 BBInfo.AnticipatedIn.getVXRMImm()) 352 continue; 353 // If the predecessor anticipates this value for all its succesors, 354 // then a write to VXRM would have already occured before this block is 355 // executed. 356 if (PInfo.AnticipatedOut.isStatic() && 357 PInfo.AnticipatedOut.getVXRMImm() == 358 BBInfo.AnticipatedIn.getVXRMImm()) 359 continue; 360 PendingInsert = true; 361 break; 362 } 363 } 364 365 Info = BBInfo.AnticipatedIn; 366 } 367 368 for (MachineInstr &MI : MBB) { 369 int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc()); 370 if (VXRMIdx >= 0 && !ignoresVXRM(MI)) { 371 unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm(); 372 373 if (PendingInsert || !Info.isStatic() || 374 Info.getVXRMImm() != NewVXRMImm) { 375 assert((!PendingInsert || 376 (Info.isStatic() && Info.getVXRMImm() == NewVXRMImm)) && 377 "Pending VXRM insertion mismatch"); 378 LLVM_DEBUG(dbgs() << "Inserting before "; MI.print(dbgs())); 379 BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteVXRMImm)) 380 .addImm(NewVXRMImm); 381 PendingInsert = false; 382 } 383 384 MI.addOperand(MachineOperand::CreateReg(RISCV::VXRM, /*IsDef*/ false, 385 /*IsImp*/ true)); 386 Info.setVXRMImm(NewVXRMImm); 387 continue; 388 } 389 390 if (MI.isCall() || MI.isInlineAsm() || 391 MI.modifiesRegister(RISCV::VXRM, /*TRI=*/nullptr)) 392 Info.setUnknown(); 393 } 394 395 // If all our successors anticipate a value, do the insert. 396 // NOTE: It's possible that not all predecessors of our successor provide the 397 // correct value. This can occur on critical edges. If we don't split the 398 // critical edge we'll also have a write vxrm in the succesor that is 399 // redundant with this one. 400 if (PendingInsert || 401 (BBInfo.AnticipatedOut.isStatic() && 402 (!Info.isStatic() || 403 Info.getVXRMImm() != BBInfo.AnticipatedOut.getVXRMImm()))) { 404 assert((!PendingInsert || 405 (Info.isStatic() && BBInfo.AnticipatedOut.isStatic() && 406 Info.getVXRMImm() == BBInfo.AnticipatedOut.getVXRMImm())) && 407 "Pending VXRM insertion mismatch"); 408 LLVM_DEBUG(dbgs() << "Inserting at end of " << printMBBReference(MBB) 409 << " changing to " << BBInfo.AnticipatedOut << "\n"); 410 BuildMI(MBB, MBB.getFirstTerminator(), DebugLoc(), 411 TII->get(RISCV::WriteVXRMImm)) 412 .addImm(BBInfo.AnticipatedOut.getVXRMImm()); 413 } 414 } 415 416 bool RISCVInsertWriteVXRM::runOnMachineFunction(MachineFunction &MF) { 417 // Skip if the vector extension is not enabled. 418 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); 419 if (!ST.hasVInstructions()) 420 return false; 421 422 TII = ST.getInstrInfo(); 423 424 assert(BlockInfo.empty() && "Expect empty block infos"); 425 BlockInfo.resize(MF.getNumBlockIDs()); 426 427 // Phase 1 - collect block information. 428 bool NeedVXRMChange = false; 429 for (const MachineBasicBlock &MBB : MF) 430 NeedVXRMChange |= computeVXRMChanges(MBB); 431 432 if (!NeedVXRMChange) { 433 BlockInfo.clear(); 434 return false; 435 } 436 437 // Phase 2 - Compute available VXRM using a forward walk. 438 for (const MachineBasicBlock &MBB : MF) { 439 WorkList.push(&MBB); 440 BlockInfo[MBB.getNumber()].InQueue = true; 441 } 442 while (!WorkList.empty()) { 443 const MachineBasicBlock &MBB = *WorkList.front(); 444 WorkList.pop(); 445 computeAvailable(MBB); 446 } 447 448 // Phase 3 - Compute anticipated VXRM using a backwards walk. 449 for (const MachineBasicBlock &MBB : llvm::reverse(MF)) { 450 WorkList.push(&MBB); 451 BlockInfo[MBB.getNumber()].InQueue = true; 452 } 453 while (!WorkList.empty()) { 454 const MachineBasicBlock &MBB = *WorkList.front(); 455 WorkList.pop(); 456 computeAnticipated(MBB); 457 } 458 459 // Phase 4 - Emit VXRM writes at the earliest place possible. 460 for (MachineBasicBlock &MBB : MF) 461 emitWriteVXRM(MBB); 462 463 BlockInfo.clear(); 464 465 return true; 466 } 467 468 FunctionPass *llvm::createRISCVInsertWriteVXRMPass() { 469 return new RISCVInsertWriteVXRM(); 470 } 471