1 //===-- RISCVInsertWriteVXRM.cpp - Insert Write of RISC-V VXRM CSR --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass inserts writes to the VXRM CSR as needed by vector instructions.
10 // Each instruction that uses VXRM carries an operand that contains its required
11 // VXRM value. This pass tries to optimize placement to avoid redundant writes
12 // to VXRM.
13 //
14 // This is done using 2 dataflow algorithms. The first is a forward data flow
15 // to calculate where a VXRM value is available. The second is a backwards
16 // dataflow to determine where a VXRM value is anticipated.
17 //
18 // Finally, we use the results of these two dataflows to insert VXRM writes
19 // where a value is anticipated, but not available.
20 //
21 // FIXME: This pass does not split critical edges, so there can still be some
22 // redundancy.
23 //
24 // FIXME: If we are willing to have writes that aren't always needed, we could
25 // reduce the number of VXRM writes in some cases.
26 //===----------------------------------------------------------------------===//
27
28 #include "MCTargetDesc/RISCVBaseInfo.h"
29 #include "RISCV.h"
30 #include "RISCVSubtarget.h"
31 #include "llvm/CodeGen/MachineFunctionPass.h"
32 #include <queue>
33
34 using namespace llvm;
35
36 #define DEBUG_TYPE "riscv-insert-write-vxrm"
37 #define RISCV_INSERT_WRITE_VXRM_NAME "RISC-V Insert Write VXRM Pass"
38
39 namespace {
40
41 class VXRMInfo {
42 uint8_t VXRMImm = 0;
43
44 enum : uint8_t {
45 Uninitialized,
46 Static,
47 Unknown,
48 } State = Uninitialized;
49
50 public:
VXRMInfo()51 VXRMInfo() {}
52
getUnknown()53 static VXRMInfo getUnknown() {
54 VXRMInfo Info;
55 Info.setUnknown();
56 return Info;
57 }
58
isValid() const59 bool isValid() const { return State != Uninitialized; }
setUnknown()60 void setUnknown() { State = Unknown; }
isUnknown() const61 bool isUnknown() const { return State == Unknown; }
62
isStatic() const63 bool isStatic() const { return State == Static; }
64
setVXRMImm(unsigned Imm)65 void setVXRMImm(unsigned Imm) {
66 assert(Imm <= 3 && "Unexpected VXRM value");
67 VXRMImm = Imm;
68 State = Static;
69 }
getVXRMImm() const70 unsigned getVXRMImm() const {
71 assert(isStatic() && VXRMImm <= 3 && "Unexpected state");
72 return VXRMImm;
73 }
74
operator ==(const VXRMInfo & Other) const75 bool operator==(const VXRMInfo &Other) const {
76 // Uninitialized is only equal to another Uninitialized.
77 if (State != Other.State)
78 return false;
79
80 if (isStatic())
81 return VXRMImm == Other.VXRMImm;
82
83 assert((isValid() || isUnknown()) && "Unexpected state");
84 return true;
85 }
86
operator !=(const VXRMInfo & Other) const87 bool operator!=(const VXRMInfo &Other) const { return !(*this == Other); }
88
89 // Calculate the VXRMInfo visible to a block assuming this and Other are
90 // both predecessors.
intersect(const VXRMInfo & Other) const91 VXRMInfo intersect(const VXRMInfo &Other) const {
92 // If the new value isn't valid, ignore it.
93 if (!Other.isValid())
94 return *this;
95
96 // If this value isn't valid, this must be the first predecessor, use it.
97 if (!isValid())
98 return Other;
99
100 // If either is unknown, the result is unknown.
101 if (isUnknown() || Other.isUnknown())
102 return VXRMInfo::getUnknown();
103
104 // If we have an exact match, return this.
105 if (*this == Other)
106 return *this;
107
108 // Otherwise the result is unknown.
109 return VXRMInfo::getUnknown();
110 }
111
112 // Calculate the VXRMInfo visible to a block assuming this and Other
113 // are both predecessors. To allow speculatively running WriteVXRM
114 // we will ignore Unknowns if one of this and Other have valid
115 // WriteVXRM. Rationale: WriteVXRM causes a pipeline flush in some
116 // uarchs and moving it outside loops is very important for some
117 // workloads.
intersectAnticipated(const VXRMInfo & Other) const118 VXRMInfo intersectAnticipated(const VXRMInfo &Other) const {
119 // If the new value isn't valid, ignore it.
120 if (!Other.isValid())
121 return *this;
122
123 // If this value isn't valid, this must be the first predecessor, use it.
124 if (!isValid())
125 return Other;
126
127 // If either is unknown, the result is the other one.
128 if (isUnknown())
129 return Other;
130 if (Other.isUnknown())
131 return *this;
132
133 // If we have an exact match, return this.
134 if (*this == Other)
135 return *this;
136
137 // Otherwise the result is unknown.
138 return VXRMInfo::getUnknown();
139 }
140
141 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
142 /// Support for debugging, callable in GDB: V->dump()
dump() const143 LLVM_DUMP_METHOD void dump() const {
144 print(dbgs());
145 dbgs() << "\n";
146 }
147
print(raw_ostream & OS) const148 void print(raw_ostream &OS) const {
149 OS << '{';
150 if (!isValid())
151 OS << "Uninitialized";
152 else if (isUnknown())
153 OS << "Unknown";
154 else
155 OS << getVXRMImm();
156 OS << '}';
157 }
158 #endif
159 };
160
161 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
162 LLVM_ATTRIBUTE_USED
operator <<(raw_ostream & OS,const VXRMInfo & V)163 inline raw_ostream &operator<<(raw_ostream &OS, const VXRMInfo &V) {
164 V.print(OS);
165 return OS;
166 }
167 #endif
168
169 struct BlockData {
170 // Indicates if the block uses VXRM. Uninitialized means no use.
171 VXRMInfo VXRMUse;
172
173 // Indicates the VXRM output from the block. Uninitialized means transparent.
174 VXRMInfo VXRMOut;
175
176 // Keeps track of the available VXRM value at the start of the basic block.
177 VXRMInfo AvailableIn;
178
179 // Keeps track of the available VXRM value at the end of the basic block.
180 VXRMInfo AvailableOut;
181
182 // Keeps track of what VXRM is anticipated at the start of the basic block.
183 VXRMInfo AnticipatedIn;
184
185 // Keeps track of what VXRM is anticipated at the end of the basic block.
186 VXRMInfo AnticipatedOut;
187
188 // Keeps track of whether the block is already in the queue.
189 bool InQueue;
190
191 BlockData() = default;
192 };
193
194 class RISCVInsertWriteVXRM : public MachineFunctionPass {
195 const TargetInstrInfo *TII;
196
197 std::vector<BlockData> BlockInfo;
198 std::queue<const MachineBasicBlock *> WorkList;
199
200 public:
201 static char ID;
202
RISCVInsertWriteVXRM()203 RISCVInsertWriteVXRM() : MachineFunctionPass(ID) {}
204
205 bool runOnMachineFunction(MachineFunction &MF) override;
206
getAnalysisUsage(AnalysisUsage & AU) const207 void getAnalysisUsage(AnalysisUsage &AU) const override {
208 AU.setPreservesCFG();
209 MachineFunctionPass::getAnalysisUsage(AU);
210 }
211
getPassName() const212 StringRef getPassName() const override {
213 return RISCV_INSERT_WRITE_VXRM_NAME;
214 }
215
216 private:
217 bool computeVXRMChanges(const MachineBasicBlock &MBB);
218 void computeAvailable(const MachineBasicBlock &MBB);
219 void computeAnticipated(const MachineFunction &MF, const MachineBasicBlock &MBB);
220 void emitWriteVXRM(MachineBasicBlock &MBB);
221 };
222
223 } // end anonymous namespace
224
225 char RISCVInsertWriteVXRM::ID = 0;
226
INITIALIZE_PASS(RISCVInsertWriteVXRM,DEBUG_TYPE,RISCV_INSERT_WRITE_VXRM_NAME,false,false)227 INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME,
228 false, false)
229
230 bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) {
231 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
232
233 bool NeedVXRMWrite = false;
234 for (const MachineInstr &MI : MBB) {
235 int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
236 if (VXRMIdx >= 0 && !RISCVInstrInfo::ignoresVXRM(MI)) {
237 unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
238
239 if (!BBInfo.VXRMUse.isValid())
240 BBInfo.VXRMUse.setVXRMImm(NewVXRMImm);
241
242 BBInfo.VXRMOut.setVXRMImm(NewVXRMImm);
243 NeedVXRMWrite = true;
244 continue;
245 }
246
247 if (MI.isCall() || MI.isInlineAsm() ||
248 MI.modifiesRegister(RISCV::VXRM, /*TRI=*/nullptr)) {
249 if (!BBInfo.VXRMUse.isValid())
250 BBInfo.VXRMUse.setUnknown();
251
252 BBInfo.VXRMOut.setUnknown();
253 }
254 }
255
256 return NeedVXRMWrite;
257 }
258
computeAvailable(const MachineBasicBlock & MBB)259 void RISCVInsertWriteVXRM::computeAvailable(const MachineBasicBlock &MBB) {
260 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
261
262 BBInfo.InQueue = false;
263
264 VXRMInfo Available;
265 if (MBB.pred_empty()) {
266 Available.setUnknown();
267 } else {
268 for (const MachineBasicBlock *P : MBB.predecessors())
269 Available = Available.intersect(BlockInfo[P->getNumber()].AvailableOut);
270 }
271
272 // If we don't have any valid available info, wait until we do.
273 if (!Available.isValid())
274 return;
275
276 if (Available != BBInfo.AvailableIn) {
277 BBInfo.AvailableIn = Available;
278 LLVM_DEBUG(dbgs() << "AvailableIn state of " << printMBBReference(MBB)
279 << " changed to " << BBInfo.AvailableIn << "\n");
280 }
281
282 if (BBInfo.VXRMOut.isValid())
283 Available = BBInfo.VXRMOut;
284
285 if (Available == BBInfo.AvailableOut)
286 return;
287
288 BBInfo.AvailableOut = Available;
289 LLVM_DEBUG(dbgs() << "AvailableOut state of " << printMBBReference(MBB)
290 << " changed to " << BBInfo.AvailableOut << "\n");
291
292 // Add the successors to the work list so that we can propagate.
293 for (MachineBasicBlock *S : MBB.successors()) {
294 if (!BlockInfo[S->getNumber()].InQueue) {
295 BlockInfo[S->getNumber()].InQueue = true;
296 WorkList.push(S);
297 }
298 }
299 }
300
computeAnticipated(const MachineFunction & MF,const MachineBasicBlock & MBB)301 void RISCVInsertWriteVXRM::computeAnticipated(const MachineFunction &MF, const MachineBasicBlock &MBB) {
302 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
303 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
304
305 BBInfo.InQueue = false;
306
307 VXRMInfo Anticipated;
308 if (MBB.succ_empty()) {
309 Anticipated.setUnknown();
310 } else {
311 for (const MachineBasicBlock *S : MBB.successors())
312 if (ST.hasVXRMPipelineFlush())
313 Anticipated =
314 Anticipated.intersectAnticipated(BlockInfo[S->getNumber()].AnticipatedIn);
315 else
316 Anticipated =
317 Anticipated.intersect(BlockInfo[S->getNumber()].AnticipatedIn);
318 }
319
320 // If we don't have any valid anticipated info, wait until we do.
321 if (!Anticipated.isValid())
322 return;
323
324 if (Anticipated != BBInfo.AnticipatedOut) {
325 BBInfo.AnticipatedOut = Anticipated;
326 LLVM_DEBUG(dbgs() << "AnticipatedOut state of " << printMBBReference(MBB)
327 << " changed to " << BBInfo.AnticipatedOut << "\n");
328 }
329
330 // If this block reads VXRM, copy it.
331 if (BBInfo.VXRMUse.isValid())
332 Anticipated = BBInfo.VXRMUse;
333
334 if (Anticipated == BBInfo.AnticipatedIn)
335 return;
336
337 BBInfo.AnticipatedIn = Anticipated;
338 LLVM_DEBUG(dbgs() << "AnticipatedIn state of " << printMBBReference(MBB)
339 << " changed to " << BBInfo.AnticipatedIn << "\n");
340
341 // Add the predecessors to the work list so that we can propagate.
342 for (MachineBasicBlock *P : MBB.predecessors()) {
343 if (!BlockInfo[P->getNumber()].InQueue) {
344 BlockInfo[P->getNumber()].InQueue = true;
345 WorkList.push(P);
346 }
347 }
348 }
349
emitWriteVXRM(MachineBasicBlock & MBB)350 void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) {
351 const BlockData &BBInfo = BlockInfo[MBB.getNumber()];
352
353 VXRMInfo Info = BBInfo.AvailableIn;
354
355 // Flag to indicates we need to insert a VXRM write. We want to delay it as
356 // late as possible in this block.
357 bool PendingInsert = false;
358
359 // Insert VXRM write if anticipated and not available.
360 if (BBInfo.AnticipatedIn.isStatic()) {
361 // If this is the entry block and the value is anticipated, insert.
362 if (MBB.isEntryBlock()) {
363 PendingInsert = true;
364 } else {
365 // Search for any predecessors that wouldn't satisfy our requirement and
366 // insert a write VXRM if needed.
367 // NOTE: If one predecessor is able to provide the requirement, but
368 // another isn't, it means we have a critical edge. The better placement
369 // would be to split the critical edge.
370 for (MachineBasicBlock *P : MBB.predecessors()) {
371 const BlockData &PInfo = BlockInfo[P->getNumber()];
372 // If it's available out of the predecessor, then we're ok.
373 if (PInfo.AvailableOut.isStatic() &&
374 PInfo.AvailableOut.getVXRMImm() ==
375 BBInfo.AnticipatedIn.getVXRMImm())
376 continue;
377 // If the predecessor anticipates this value for all its successors,
378 // then a write to VXRM would have already occurred before this block is
379 // executed.
380 if (PInfo.AnticipatedOut.isStatic() &&
381 PInfo.AnticipatedOut.getVXRMImm() ==
382 BBInfo.AnticipatedIn.getVXRMImm())
383 continue;
384 PendingInsert = true;
385 break;
386 }
387 }
388
389 Info = BBInfo.AnticipatedIn;
390 }
391
392 for (MachineInstr &MI : MBB) {
393 int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
394 if (VXRMIdx >= 0 && !RISCVInstrInfo::ignoresVXRM(MI)) {
395 unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
396
397 if (PendingInsert || !Info.isStatic() ||
398 Info.getVXRMImm() != NewVXRMImm) {
399 assert((!PendingInsert ||
400 (Info.isStatic() && Info.getVXRMImm() == NewVXRMImm)) &&
401 "Pending VXRM insertion mismatch");
402 LLVM_DEBUG(dbgs() << "Inserting before "; MI.print(dbgs()));
403 BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteVXRMImm))
404 .addImm(NewVXRMImm);
405 PendingInsert = false;
406 }
407
408 MI.addOperand(MachineOperand::CreateReg(RISCV::VXRM, /*IsDef*/ false,
409 /*IsImp*/ true));
410 Info.setVXRMImm(NewVXRMImm);
411 continue;
412 }
413
414 if (MI.isCall() || MI.isInlineAsm() ||
415 MI.modifiesRegister(RISCV::VXRM, /*TRI=*/nullptr))
416 Info.setUnknown();
417 }
418
419 // If all our successors anticipate a value, do the insert.
420 // NOTE: It's possible that not all predecessors of our successor provide the
421 // correct value. This can occur on critical edges. If we don't split the
422 // critical edge we'll also have a write vxrm in the successor that is
423 // redundant with this one.
424 if (PendingInsert ||
425 (BBInfo.AnticipatedOut.isStatic() &&
426 (!Info.isStatic() ||
427 Info.getVXRMImm() != BBInfo.AnticipatedOut.getVXRMImm()))) {
428 assert((!PendingInsert ||
429 (Info.isStatic() && BBInfo.AnticipatedOut.isStatic() &&
430 Info.getVXRMImm() == BBInfo.AnticipatedOut.getVXRMImm())) &&
431 "Pending VXRM insertion mismatch");
432 LLVM_DEBUG(dbgs() << "Inserting at end of " << printMBBReference(MBB)
433 << " changing to " << BBInfo.AnticipatedOut << "\n");
434 BuildMI(MBB, MBB.getFirstTerminator(), DebugLoc(),
435 TII->get(RISCV::WriteVXRMImm))
436 .addImm(BBInfo.AnticipatedOut.getVXRMImm());
437 }
438 }
439
runOnMachineFunction(MachineFunction & MF)440 bool RISCVInsertWriteVXRM::runOnMachineFunction(MachineFunction &MF) {
441 // Skip if the vector extension is not enabled.
442 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
443 if (!ST.hasVInstructions())
444 return false;
445
446 TII = ST.getInstrInfo();
447
448 assert(BlockInfo.empty() && "Expect empty block infos");
449 BlockInfo.resize(MF.getNumBlockIDs());
450
451 // Phase 1 - collect block information.
452 bool NeedVXRMChange = false;
453 for (const MachineBasicBlock &MBB : MF)
454 NeedVXRMChange |= computeVXRMChanges(MBB);
455
456 if (!NeedVXRMChange) {
457 BlockInfo.clear();
458 return false;
459 }
460
461 // Phase 2 - Compute available VXRM using a forward walk.
462 for (const MachineBasicBlock &MBB : MF) {
463 WorkList.push(&MBB);
464 BlockInfo[MBB.getNumber()].InQueue = true;
465 }
466 while (!WorkList.empty()) {
467 const MachineBasicBlock &MBB = *WorkList.front();
468 WorkList.pop();
469 computeAvailable(MBB);
470 }
471
472 // Phase 3 - Compute anticipated VXRM using a backwards walk.
473 for (const MachineBasicBlock &MBB : llvm::reverse(MF)) {
474 WorkList.push(&MBB);
475 BlockInfo[MBB.getNumber()].InQueue = true;
476 }
477 while (!WorkList.empty()) {
478 const MachineBasicBlock &MBB = *WorkList.front();
479 WorkList.pop();
480 computeAnticipated(MF, MBB);
481 }
482
483 // Phase 4 - Emit VXRM writes at the earliest place possible.
484 for (MachineBasicBlock &MBB : MF)
485 emitWriteVXRM(MBB);
486
487 BlockInfo.clear();
488
489 return true;
490 }
491
createRISCVInsertWriteVXRMPass()492 FunctionPass *llvm::createRISCVInsertWriteVXRMPass() {
493 return new RISCVInsertWriteVXRM();
494 }
495