xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- RISCVInsertWriteVXRM.cpp - Insert Write of RISC-V VXRM CSR --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass inserts writes to the VXRM CSR as needed by vector instructions.
10 // Each instruction that uses VXRM carries an operand that contains its required
11 // VXRM value. This pass tries to optimize placement to avoid redundant writes
12 // to VXRM.
13 //
14 // This is done using 2 dataflow algorithms. The first is a forward data flow
15 // to calculate where a VXRM value is available. The second is a backwards
16 // dataflow to determine where a VXRM value is anticipated.
17 //
18 // Finally, we use the results of these two dataflows to insert VXRM writes
19 // where a value is anticipated, but not available.
20 //
21 // FIXME: This pass does not split critical edges, so there can still be some
22 // redundancy.
23 //
24 // FIXME: If we are willing to have writes that aren't always needed, we could
25 // reduce the number of VXRM writes in some cases.
26 //===----------------------------------------------------------------------===//
27 
28 #include "MCTargetDesc/RISCVBaseInfo.h"
29 #include "RISCV.h"
30 #include "RISCVSubtarget.h"
31 #include "llvm/CodeGen/MachineFunctionPass.h"
32 #include <queue>
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "riscv-insert-write-vxrm"
37 #define RISCV_INSERT_WRITE_VXRM_NAME "RISC-V Insert Write VXRM Pass"
38 
39 namespace {
40 
41 class VXRMInfo {
42   uint8_t VXRMImm = 0;
43 
44   enum : uint8_t {
45     Uninitialized,
46     Static,
47     Unknown,
48   } State = Uninitialized;
49 
50 public:
VXRMInfo()51   VXRMInfo() {}
52 
getUnknown()53   static VXRMInfo getUnknown() {
54     VXRMInfo Info;
55     Info.setUnknown();
56     return Info;
57   }
58 
isValid() const59   bool isValid() const { return State != Uninitialized; }
setUnknown()60   void setUnknown() { State = Unknown; }
isUnknown() const61   bool isUnknown() const { return State == Unknown; }
62 
isStatic() const63   bool isStatic() const { return State == Static; }
64 
setVXRMImm(unsigned Imm)65   void setVXRMImm(unsigned Imm) {
66     assert(Imm <= 3 && "Unexpected VXRM value");
67     VXRMImm = Imm;
68     State = Static;
69   }
getVXRMImm() const70   unsigned getVXRMImm() const {
71     assert(isStatic() && VXRMImm <= 3 && "Unexpected state");
72     return VXRMImm;
73   }
74 
operator ==(const VXRMInfo & Other) const75   bool operator==(const VXRMInfo &Other) const {
76     // Uninitialized is only equal to another Uninitialized.
77     if (State != Other.State)
78       return false;
79 
80     if (isStatic())
81       return VXRMImm == Other.VXRMImm;
82 
83     assert((isValid() || isUnknown()) && "Unexpected state");
84     return true;
85   }
86 
operator !=(const VXRMInfo & Other) const87   bool operator!=(const VXRMInfo &Other) const { return !(*this == Other); }
88 
89   // Calculate the VXRMInfo visible to a block assuming this and Other are
90   // both predecessors.
intersect(const VXRMInfo & Other) const91   VXRMInfo intersect(const VXRMInfo &Other) const {
92     // If the new value isn't valid, ignore it.
93     if (!Other.isValid())
94       return *this;
95 
96     // If this value isn't valid, this must be the first predecessor, use it.
97     if (!isValid())
98       return Other;
99 
100     // If either is unknown, the result is unknown.
101     if (isUnknown() || Other.isUnknown())
102       return VXRMInfo::getUnknown();
103 
104     // If we have an exact match, return this.
105     if (*this == Other)
106       return *this;
107 
108     // Otherwise the result is unknown.
109     return VXRMInfo::getUnknown();
110   }
111 
112   // Calculate the VXRMInfo visible to a block assuming this and Other
113   // are both predecessors. To allow speculatively running WriteVXRM
114   // we will ignore Unknowns if one of this and Other have valid
115   // WriteVXRM. Rationale: WriteVXRM causes a pipeline flush in some
116   // uarchs and moving it outside loops is very important for some
117   // workloads.
intersectAnticipated(const VXRMInfo & Other) const118   VXRMInfo intersectAnticipated(const VXRMInfo &Other) const {
119     // If the new value isn't valid, ignore it.
120     if (!Other.isValid())
121       return *this;
122 
123     // If this value isn't valid, this must be the first predecessor, use it.
124     if (!isValid())
125       return Other;
126 
127     // If either is unknown, the result is the other one.
128     if (isUnknown())
129       return Other;
130     if (Other.isUnknown())
131       return *this;
132 
133     // If we have an exact match, return this.
134     if (*this == Other)
135       return *this;
136 
137     // Otherwise the result is unknown.
138     return VXRMInfo::getUnknown();
139   }
140 
141 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
142   /// Support for debugging, callable in GDB: V->dump()
dump() const143   LLVM_DUMP_METHOD void dump() const {
144     print(dbgs());
145     dbgs() << "\n";
146   }
147 
print(raw_ostream & OS) const148   void print(raw_ostream &OS) const {
149     OS << '{';
150     if (!isValid())
151       OS << "Uninitialized";
152     else if (isUnknown())
153       OS << "Unknown";
154     else
155       OS << getVXRMImm();
156     OS << '}';
157   }
158 #endif
159 };
160 
161 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
162 LLVM_ATTRIBUTE_USED
operator <<(raw_ostream & OS,const VXRMInfo & V)163 inline raw_ostream &operator<<(raw_ostream &OS, const VXRMInfo &V) {
164   V.print(OS);
165   return OS;
166 }
167 #endif
168 
169 struct BlockData {
170   // Indicates if the block uses VXRM. Uninitialized means no use.
171   VXRMInfo VXRMUse;
172 
173   // Indicates the VXRM output from the block. Uninitialized means transparent.
174   VXRMInfo VXRMOut;
175 
176   // Keeps track of the available VXRM value at the start of the basic block.
177   VXRMInfo AvailableIn;
178 
179   // Keeps track of the available VXRM value at the end of the basic block.
180   VXRMInfo AvailableOut;
181 
182   // Keeps track of what VXRM is anticipated at the start of the basic block.
183   VXRMInfo AnticipatedIn;
184 
185   // Keeps track of what VXRM is anticipated at the end of the basic block.
186   VXRMInfo AnticipatedOut;
187 
188   // Keeps track of whether the block is already in the queue.
189   bool InQueue;
190 
191   BlockData() = default;
192 };
193 
194 class RISCVInsertWriteVXRM : public MachineFunctionPass {
195   const TargetInstrInfo *TII;
196 
197   std::vector<BlockData> BlockInfo;
198   std::queue<const MachineBasicBlock *> WorkList;
199 
200 public:
201   static char ID;
202 
RISCVInsertWriteVXRM()203   RISCVInsertWriteVXRM() : MachineFunctionPass(ID) {}
204 
205   bool runOnMachineFunction(MachineFunction &MF) override;
206 
getAnalysisUsage(AnalysisUsage & AU) const207   void getAnalysisUsage(AnalysisUsage &AU) const override {
208     AU.setPreservesCFG();
209     MachineFunctionPass::getAnalysisUsage(AU);
210   }
211 
getPassName() const212   StringRef getPassName() const override {
213     return RISCV_INSERT_WRITE_VXRM_NAME;
214   }
215 
216 private:
217   bool computeVXRMChanges(const MachineBasicBlock &MBB);
218   void computeAvailable(const MachineBasicBlock &MBB);
219   void computeAnticipated(const MachineFunction &MF, const MachineBasicBlock &MBB);
220   void emitWriteVXRM(MachineBasicBlock &MBB);
221 };
222 
223 } // end anonymous namespace
224 
225 char RISCVInsertWriteVXRM::ID = 0;
226 
INITIALIZE_PASS(RISCVInsertWriteVXRM,DEBUG_TYPE,RISCV_INSERT_WRITE_VXRM_NAME,false,false)227 INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME,
228                 false, false)
229 
230 bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) {
231   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
232 
233   bool NeedVXRMWrite = false;
234   for (const MachineInstr &MI : MBB) {
235     int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
236     if (VXRMIdx >= 0 && !RISCVInstrInfo::ignoresVXRM(MI)) {
237       unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
238 
239       if (!BBInfo.VXRMUse.isValid())
240         BBInfo.VXRMUse.setVXRMImm(NewVXRMImm);
241 
242       BBInfo.VXRMOut.setVXRMImm(NewVXRMImm);
243       NeedVXRMWrite = true;
244       continue;
245     }
246 
247     if (MI.isCall() || MI.isInlineAsm() ||
248         MI.modifiesRegister(RISCV::VXRM, /*TRI=*/nullptr)) {
249       if (!BBInfo.VXRMUse.isValid())
250         BBInfo.VXRMUse.setUnknown();
251 
252       BBInfo.VXRMOut.setUnknown();
253     }
254   }
255 
256   return NeedVXRMWrite;
257 }
258 
computeAvailable(const MachineBasicBlock & MBB)259 void RISCVInsertWriteVXRM::computeAvailable(const MachineBasicBlock &MBB) {
260   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
261 
262   BBInfo.InQueue = false;
263 
264   VXRMInfo Available;
265   if (MBB.pred_empty()) {
266     Available.setUnknown();
267   } else {
268     for (const MachineBasicBlock *P : MBB.predecessors())
269       Available = Available.intersect(BlockInfo[P->getNumber()].AvailableOut);
270   }
271 
272   // If we don't have any valid available info, wait until we do.
273   if (!Available.isValid())
274     return;
275 
276   if (Available != BBInfo.AvailableIn) {
277     BBInfo.AvailableIn = Available;
278     LLVM_DEBUG(dbgs() << "AvailableIn state of " << printMBBReference(MBB)
279                       << " changed to " << BBInfo.AvailableIn << "\n");
280   }
281 
282   if (BBInfo.VXRMOut.isValid())
283     Available = BBInfo.VXRMOut;
284 
285   if (Available == BBInfo.AvailableOut)
286     return;
287 
288   BBInfo.AvailableOut = Available;
289   LLVM_DEBUG(dbgs() << "AvailableOut state of " << printMBBReference(MBB)
290                     << " changed to " << BBInfo.AvailableOut << "\n");
291 
292   // Add the successors to the work list so that we can propagate.
293   for (MachineBasicBlock *S : MBB.successors()) {
294     if (!BlockInfo[S->getNumber()].InQueue) {
295       BlockInfo[S->getNumber()].InQueue = true;
296       WorkList.push(S);
297     }
298   }
299 }
300 
computeAnticipated(const MachineFunction & MF,const MachineBasicBlock & MBB)301 void RISCVInsertWriteVXRM::computeAnticipated(const MachineFunction &MF, const MachineBasicBlock &MBB) {
302   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
303   const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
304 
305   BBInfo.InQueue = false;
306 
307   VXRMInfo Anticipated;
308   if (MBB.succ_empty()) {
309     Anticipated.setUnknown();
310   } else {
311     for (const MachineBasicBlock *S : MBB.successors())
312       if (ST.hasVXRMPipelineFlush())
313         Anticipated =
314           Anticipated.intersectAnticipated(BlockInfo[S->getNumber()].AnticipatedIn);
315       else
316         Anticipated =
317           Anticipated.intersect(BlockInfo[S->getNumber()].AnticipatedIn);
318   }
319 
320   // If we don't have any valid anticipated info, wait until we do.
321   if (!Anticipated.isValid())
322     return;
323 
324   if (Anticipated != BBInfo.AnticipatedOut) {
325     BBInfo.AnticipatedOut = Anticipated;
326     LLVM_DEBUG(dbgs() << "AnticipatedOut state of " << printMBBReference(MBB)
327                       << " changed to " << BBInfo.AnticipatedOut << "\n");
328   }
329 
330   // If this block reads VXRM, copy it.
331   if (BBInfo.VXRMUse.isValid())
332     Anticipated = BBInfo.VXRMUse;
333 
334   if (Anticipated == BBInfo.AnticipatedIn)
335     return;
336 
337   BBInfo.AnticipatedIn = Anticipated;
338   LLVM_DEBUG(dbgs() << "AnticipatedIn state of " << printMBBReference(MBB)
339                     << " changed to " << BBInfo.AnticipatedIn << "\n");
340 
341   // Add the predecessors to the work list so that we can propagate.
342   for (MachineBasicBlock *P : MBB.predecessors()) {
343     if (!BlockInfo[P->getNumber()].InQueue) {
344       BlockInfo[P->getNumber()].InQueue = true;
345       WorkList.push(P);
346     }
347   }
348 }
349 
emitWriteVXRM(MachineBasicBlock & MBB)350 void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) {
351   const BlockData &BBInfo = BlockInfo[MBB.getNumber()];
352 
353   VXRMInfo Info = BBInfo.AvailableIn;
354 
355   // Flag to indicates we need to insert a VXRM write. We want to delay it as
356   // late as possible in this block.
357   bool PendingInsert = false;
358 
359   // Insert VXRM write if anticipated and not available.
360   if (BBInfo.AnticipatedIn.isStatic()) {
361     // If this is the entry block and the value is anticipated, insert.
362     if (MBB.isEntryBlock()) {
363       PendingInsert = true;
364     } else {
365       // Search for any predecessors that wouldn't satisfy our requirement and
366       // insert a write VXRM if needed.
367       // NOTE: If one predecessor is able to provide the requirement, but
368       // another isn't, it means we have a critical edge. The better placement
369       // would be to split the critical edge.
370       for (MachineBasicBlock *P : MBB.predecessors()) {
371         const BlockData &PInfo = BlockInfo[P->getNumber()];
372         // If it's available out of the predecessor, then we're ok.
373         if (PInfo.AvailableOut.isStatic() &&
374             PInfo.AvailableOut.getVXRMImm() ==
375                 BBInfo.AnticipatedIn.getVXRMImm())
376           continue;
377         // If the predecessor anticipates this value for all its successors,
378         // then a write to VXRM would have already occurred before this block is
379         // executed.
380         if (PInfo.AnticipatedOut.isStatic() &&
381             PInfo.AnticipatedOut.getVXRMImm() ==
382                 BBInfo.AnticipatedIn.getVXRMImm())
383           continue;
384         PendingInsert = true;
385         break;
386       }
387     }
388 
389     Info = BBInfo.AnticipatedIn;
390   }
391 
392   for (MachineInstr &MI : MBB) {
393     int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
394     if (VXRMIdx >= 0 && !RISCVInstrInfo::ignoresVXRM(MI)) {
395       unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
396 
397       if (PendingInsert || !Info.isStatic() ||
398           Info.getVXRMImm() != NewVXRMImm) {
399         assert((!PendingInsert ||
400                 (Info.isStatic() && Info.getVXRMImm() == NewVXRMImm)) &&
401                "Pending VXRM insertion mismatch");
402         LLVM_DEBUG(dbgs() << "Inserting before "; MI.print(dbgs()));
403         BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteVXRMImm))
404             .addImm(NewVXRMImm);
405         PendingInsert = false;
406       }
407 
408       MI.addOperand(MachineOperand::CreateReg(RISCV::VXRM, /*IsDef*/ false,
409                                               /*IsImp*/ true));
410       Info.setVXRMImm(NewVXRMImm);
411       continue;
412     }
413 
414     if (MI.isCall() || MI.isInlineAsm() ||
415         MI.modifiesRegister(RISCV::VXRM, /*TRI=*/nullptr))
416       Info.setUnknown();
417   }
418 
419   // If all our successors anticipate a value, do the insert.
420   // NOTE: It's possible that not all predecessors of our successor provide the
421   // correct value. This can occur on critical edges. If we don't split the
422   // critical edge we'll also have a write vxrm in the successor that is
423   // redundant with this one.
424   if (PendingInsert ||
425       (BBInfo.AnticipatedOut.isStatic() &&
426        (!Info.isStatic() ||
427         Info.getVXRMImm() != BBInfo.AnticipatedOut.getVXRMImm()))) {
428     assert((!PendingInsert ||
429             (Info.isStatic() && BBInfo.AnticipatedOut.isStatic() &&
430              Info.getVXRMImm() == BBInfo.AnticipatedOut.getVXRMImm())) &&
431            "Pending VXRM insertion mismatch");
432     LLVM_DEBUG(dbgs() << "Inserting at end of " << printMBBReference(MBB)
433                       << " changing to " << BBInfo.AnticipatedOut << "\n");
434     BuildMI(MBB, MBB.getFirstTerminator(), DebugLoc(),
435             TII->get(RISCV::WriteVXRMImm))
436         .addImm(BBInfo.AnticipatedOut.getVXRMImm());
437   }
438 }
439 
runOnMachineFunction(MachineFunction & MF)440 bool RISCVInsertWriteVXRM::runOnMachineFunction(MachineFunction &MF) {
441   // Skip if the vector extension is not enabled.
442   const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
443   if (!ST.hasVInstructions())
444     return false;
445 
446   TII = ST.getInstrInfo();
447 
448   assert(BlockInfo.empty() && "Expect empty block infos");
449   BlockInfo.resize(MF.getNumBlockIDs());
450 
451   // Phase 1 - collect block information.
452   bool NeedVXRMChange = false;
453   for (const MachineBasicBlock &MBB : MF)
454     NeedVXRMChange |= computeVXRMChanges(MBB);
455 
456   if (!NeedVXRMChange) {
457     BlockInfo.clear();
458     return false;
459   }
460 
461   // Phase 2 - Compute available VXRM using a forward walk.
462   for (const MachineBasicBlock &MBB : MF) {
463     WorkList.push(&MBB);
464     BlockInfo[MBB.getNumber()].InQueue = true;
465   }
466   while (!WorkList.empty()) {
467     const MachineBasicBlock &MBB = *WorkList.front();
468     WorkList.pop();
469     computeAvailable(MBB);
470   }
471 
472   // Phase 3 - Compute anticipated VXRM using a backwards walk.
473   for (const MachineBasicBlock &MBB : llvm::reverse(MF)) {
474     WorkList.push(&MBB);
475     BlockInfo[MBB.getNumber()].InQueue = true;
476   }
477   while (!WorkList.empty()) {
478     const MachineBasicBlock &MBB = *WorkList.front();
479     WorkList.pop();
480     computeAnticipated(MF, MBB);
481   }
482 
483   // Phase 4 - Emit VXRM writes at the earliest place possible.
484   for (MachineBasicBlock &MBB : MF)
485     emitWriteVXRM(MBB);
486 
487   BlockInfo.clear();
488 
489   return true;
490 }
491 
createRISCVInsertWriteVXRMPass()492 FunctionPass *llvm::createRISCVInsertWriteVXRMPass() {
493   return new RISCVInsertWriteVXRM();
494 }
495