xref: /freebsd/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- HexagonPacketizer.cpp - VLIW packetizer ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements a simple VLIW packetizer using DFA. The packetizer works on
10 // machine basic blocks. For each instruction I in BB, the packetizer consults
11 // the DFA to see if machine resources are available to execute I. If so, the
12 // packetizer checks if I depends on any instruction J in the current packet.
13 // If no dependency is found, I is added to current packet and machine resource
14 // is marked as taken. If any dependency is found, a target API call is made to
15 // prune the dependence.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "HexagonVLIWPacketizer.h"
20 #include "Hexagon.h"
21 #include "HexagonInstrInfo.h"
22 #include "HexagonRegisterInfo.h"
23 #include "HexagonSubtarget.h"
24 #include "llvm/ADT/BitVector.h"
25 #include "llvm/ADT/DenseSet.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/ADT/StringExtras.h"
28 #include "llvm/Analysis/AliasAnalysis.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
31 #include "llvm/CodeGen/MachineDominators.h"
32 #include "llvm/CodeGen/MachineFrameInfo.h"
33 #include "llvm/CodeGen/MachineFunction.h"
34 #include "llvm/CodeGen/MachineFunctionPass.h"
35 #include "llvm/CodeGen/MachineInstr.h"
36 #include "llvm/CodeGen/MachineInstrBundle.h"
37 #include "llvm/CodeGen/MachineLoopInfo.h"
38 #include "llvm/CodeGen/MachineOperand.h"
39 #include "llvm/CodeGen/ScheduleDAG.h"
40 #include "llvm/CodeGen/TargetRegisterInfo.h"
41 #include "llvm/CodeGen/TargetSubtargetInfo.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/MC/MCInstrDesc.h"
45 #include "llvm/Pass.h"
46 #include "llvm/Support/CommandLine.h"
47 #include "llvm/Support/Debug.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/raw_ostream.h"
50 #include <cassert>
51 #include <cstdint>
52 #include <iterator>
53 
54 using namespace llvm;
55 
56 #define DEBUG_TYPE "packets"
57 
58 static cl::opt<bool>
59     DisablePacketizer("disable-packetizer", cl::Hidden,
60                       cl::desc("Disable Hexagon packetizer pass"));
61 
62 static cl::opt<bool> Slot1Store("slot1-store-slot0-load", cl::Hidden,
63                                 cl::init(true),
64                                 cl::desc("Allow slot1 store and slot0 load"));
65 
66 static cl::opt<bool> PacketizeVolatiles(
67     "hexagon-packetize-volatiles", cl::Hidden, cl::init(true),
68     cl::desc("Allow non-solo packetization of volatile memory references"));
69 
70 static cl::opt<bool>
71     EnableGenAllInsnClass("enable-gen-insn", cl::Hidden,
72                           cl::desc("Generate all instruction with TC"));
73 
74 static cl::opt<bool>
75     DisableVecDblNVStores("disable-vecdbl-nv-stores", cl::Hidden,
76                           cl::desc("Disable vector double new-value-stores"));
77 
78 extern cl::opt<bool> ScheduleInlineAsm;
79 
80 namespace {
81 
82   class HexagonPacketizer : public MachineFunctionPass {
83   public:
84     static char ID;
85 
HexagonPacketizer(bool Min=false)86     HexagonPacketizer(bool Min = false)
87       : MachineFunctionPass(ID), Minimal(Min) {}
88 
getAnalysisUsage(AnalysisUsage & AU) const89     void getAnalysisUsage(AnalysisUsage &AU) const override {
90       AU.setPreservesCFG();
91       AU.addRequired<AAResultsWrapperPass>();
92       AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
93       AU.addRequired<MachineDominatorTreeWrapperPass>();
94       AU.addRequired<MachineLoopInfoWrapperPass>();
95       AU.addPreserved<MachineDominatorTreeWrapperPass>();
96       AU.addPreserved<MachineLoopInfoWrapperPass>();
97       MachineFunctionPass::getAnalysisUsage(AU);
98     }
99 
getPassName() const100     StringRef getPassName() const override { return "Hexagon Packetizer"; }
101     bool runOnMachineFunction(MachineFunction &Fn) override;
102 
getRequiredProperties() const103     MachineFunctionProperties getRequiredProperties() const override {
104       return MachineFunctionProperties().setNoVRegs();
105     }
106 
107   private:
108     const HexagonInstrInfo *HII = nullptr;
109     const HexagonRegisterInfo *HRI = nullptr;
110     const bool Minimal = false;
111   };
112 
113 } // end anonymous namespace
114 
115 char HexagonPacketizer::ID = 0;
116 
117 INITIALIZE_PASS_BEGIN(HexagonPacketizer, "hexagon-packetizer",
118                       "Hexagon Packetizer", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)119 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
120 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
121 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
122 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
123 INITIALIZE_PASS_END(HexagonPacketizer, "hexagon-packetizer",
124                     "Hexagon Packetizer", false, false)
125 
126 HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
127       MachineLoopInfo &MLI, AAResults *AA,
128       const MachineBranchProbabilityInfo *MBPI, bool Minimal)
129     : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI),
130       Minimal(Minimal) {
131   HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
132   HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
133 
134   addMutation(std::make_unique<HexagonSubtarget::UsrOverflowMutation>());
135   addMutation(std::make_unique<HexagonSubtarget::HVXMemLatencyMutation>());
136   addMutation(std::make_unique<HexagonSubtarget::BankConflictMutation>());
137 }
138 
139 // Check if FirstI modifies a register that SecondI reads.
hasWriteToReadDep(const MachineInstr & FirstI,const MachineInstr & SecondI,const TargetRegisterInfo * TRI)140 static bool hasWriteToReadDep(const MachineInstr &FirstI,
141                               const MachineInstr &SecondI,
142                               const TargetRegisterInfo *TRI) {
143   for (auto &MO : FirstI.operands()) {
144     if (!MO.isReg() || !MO.isDef())
145       continue;
146     Register R = MO.getReg();
147     if (SecondI.readsRegister(R, TRI))
148       return true;
149   }
150   return false;
151 }
152 
153 
moveInstrOut(MachineInstr & MI,MachineBasicBlock::iterator BundleIt,bool Before)154 static MachineBasicBlock::iterator moveInstrOut(MachineInstr &MI,
155       MachineBasicBlock::iterator BundleIt, bool Before) {
156   MachineBasicBlock::instr_iterator InsertPt;
157   if (Before)
158     InsertPt = BundleIt.getInstrIterator();
159   else
160     InsertPt = std::next(BundleIt).getInstrIterator();
161 
162   MachineBasicBlock &B = *MI.getParent();
163   // The instruction should at least be bundled with the preceding instruction
164   // (there will always be one, i.e. BUNDLE, if nothing else).
165   assert(MI.isBundledWithPred());
166   if (MI.isBundledWithSucc()) {
167     MI.clearFlag(MachineInstr::BundledSucc);
168     MI.clearFlag(MachineInstr::BundledPred);
169   } else {
170     // If it's not bundled with the successor (i.e. it is the last one
171     // in the bundle), then we can simply unbundle it from the predecessor,
172     // which will take care of updating the predecessor's flag.
173     MI.unbundleFromPred();
174   }
175   B.splice(InsertPt, &B, MI.getIterator());
176 
177   // Get the size of the bundle without asserting.
178   MachineBasicBlock::const_instr_iterator I = BundleIt.getInstrIterator();
179   MachineBasicBlock::const_instr_iterator E = B.instr_end();
180   unsigned Size = 0;
181   for (++I; I != E && I->isBundledWithPred(); ++I)
182     ++Size;
183 
184   // If there are still two or more instructions, then there is nothing
185   // else to be done.
186   if (Size > 1)
187     return BundleIt;
188 
189   // Otherwise, extract the single instruction out and delete the bundle.
190   MachineBasicBlock::iterator NextIt = std::next(BundleIt);
191   MachineInstr &SingleI = *BundleIt->getNextNode();
192   SingleI.unbundleFromPred();
193   assert(!SingleI.isBundledWithSucc());
194   BundleIt->eraseFromParent();
195   return NextIt;
196 }
197 
runOnMachineFunction(MachineFunction & MF)198 bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
199   // FIXME: This pass causes verification failures.
200   MF.getProperties().setFailsVerification();
201 
202   auto &HST = MF.getSubtarget<HexagonSubtarget>();
203   HII = HST.getInstrInfo();
204   HRI = HST.getRegisterInfo();
205   auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
206   auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
207   auto *MBPI =
208       &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
209 
210   if (EnableGenAllInsnClass)
211     HII->genAllInsnTimingClasses(MF);
212 
213   // Instantiate the packetizer.
214   bool MinOnly = Minimal || DisablePacketizer || !HST.usePackets() ||
215                  skipFunction(MF.getFunction());
216   HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI, MinOnly);
217 
218   // DFA state table should not be empty.
219   assert(Packetizer.getResourceTracker() && "Empty DFA table!");
220 
221   // Loop over all basic blocks and remove KILL pseudo-instructions
222   // These instructions confuse the dependence analysis. Consider:
223   // D0 = ...   (Insn 0)
224   // R0 = KILL R0, D0 (Insn 1)
225   // R0 = ... (Insn 2)
226   // Here, Insn 1 will result in the dependence graph not emitting an output
227   // dependence between Insn 0 and Insn 2. This can lead to incorrect
228   // packetization
229   for (MachineBasicBlock &MB : MF) {
230     for (MachineInstr &MI : llvm::make_early_inc_range(MB))
231       if (MI.isKill())
232         MB.erase(&MI);
233   }
234 
235   // TinyCore with Duplexes: Translate to big-instructions.
236   if (HST.isTinyCoreWithDuplex())
237     HII->translateInstrsForDup(MF, true);
238 
239   // Loop over all of the basic blocks.
240   for (auto &MB : MF) {
241     auto Begin = MB.begin(), End = MB.end();
242     while (Begin != End) {
243       // Find the first non-boundary starting from the end of the last
244       // scheduling region.
245       MachineBasicBlock::iterator RB = Begin;
246       while (RB != End && HII->isSchedulingBoundary(*RB, &MB, MF))
247         ++RB;
248       // Find the first boundary starting from the beginning of the new
249       // region.
250       MachineBasicBlock::iterator RE = RB;
251       while (RE != End && !HII->isSchedulingBoundary(*RE, &MB, MF))
252         ++RE;
253       // Add the scheduling boundary if it's not block end.
254       if (RE != End)
255         ++RE;
256       // If RB == End, then RE == End.
257       if (RB != End)
258         Packetizer.PacketizeMIs(&MB, RB, RE);
259 
260       Begin = RE;
261     }
262   }
263 
264   // TinyCore with Duplexes: Translate to tiny-instructions.
265   if (HST.isTinyCoreWithDuplex())
266     HII->translateInstrsForDup(MF, false);
267 
268   Packetizer.unpacketizeSoloInstrs(MF);
269   return true;
270 }
271 
272 // Reserve resources for a constant extender. Trigger an assertion if the
273 // reservation fails.
reserveResourcesForConstExt()274 void HexagonPacketizerList::reserveResourcesForConstExt() {
275   if (!tryAllocateResourcesForConstExt(true))
276     llvm_unreachable("Resources not available");
277 }
278 
canReserveResourcesForConstExt()279 bool HexagonPacketizerList::canReserveResourcesForConstExt() {
280   return tryAllocateResourcesForConstExt(false);
281 }
282 
283 // Allocate resources (i.e. 4 bytes) for constant extender. If succeeded,
284 // return true, otherwise, return false.
tryAllocateResourcesForConstExt(bool Reserve)285 bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) {
286   auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc());
287   bool Avail = ResourceTracker->canReserveResources(*ExtMI);
288   if (Reserve && Avail)
289     ResourceTracker->reserveResources(*ExtMI);
290   MF.deleteMachineInstr(ExtMI);
291   return Avail;
292 }
293 
isCallDependent(const MachineInstr & MI,SDep::Kind DepType,unsigned DepReg)294 bool HexagonPacketizerList::isCallDependent(const MachineInstr &MI,
295       SDep::Kind DepType, unsigned DepReg) {
296   // Check for LR dependence.
297   if (DepReg == HRI->getRARegister())
298     return true;
299 
300   if (HII->isDeallocRet(MI))
301     if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister())
302       return true;
303 
304   // Call-like instructions can be packetized with preceding instructions
305   // that define registers implicitly used or modified by the call. Explicit
306   // uses are still prohibited, as in the case of indirect calls:
307   //   r0 = ...
308   //   J2_jumpr r0
309   if (DepType == SDep::Data) {
310     for (const MachineOperand &MO : MI.operands())
311       if (MO.isReg() && MO.getReg() == DepReg && !MO.isImplicit())
312         return true;
313   }
314 
315   return false;
316 }
317 
isRegDependence(const SDep::Kind DepType)318 static bool isRegDependence(const SDep::Kind DepType) {
319   return DepType == SDep::Data || DepType == SDep::Anti ||
320          DepType == SDep::Output;
321 }
322 
isDirectJump(const MachineInstr & MI)323 static bool isDirectJump(const MachineInstr &MI) {
324   return MI.getOpcode() == Hexagon::J2_jump;
325 }
326 
isSchedBarrier(const MachineInstr & MI)327 static bool isSchedBarrier(const MachineInstr &MI) {
328   switch (MI.getOpcode()) {
329   case Hexagon::Y2_barrier:
330     return true;
331   }
332   return false;
333 }
334 
isControlFlow(const MachineInstr & MI)335 static bool isControlFlow(const MachineInstr &MI) {
336   return MI.getDesc().isTerminator() || MI.getDesc().isCall();
337 }
338 
339 /// Returns true if the instruction modifies a callee-saved register.
doesModifyCalleeSavedReg(const MachineInstr & MI,const TargetRegisterInfo * TRI)340 static bool doesModifyCalleeSavedReg(const MachineInstr &MI,
341                                      const TargetRegisterInfo *TRI) {
342   const MachineFunction &MF = *MI.getParent()->getParent();
343   for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
344     if (MI.modifiesRegister(*CSR, TRI))
345       return true;
346   return false;
347 }
348 
349 // Returns true if an instruction can be promoted to .new predicate or
350 // new-value store.
isNewifiable(const MachineInstr & MI,const TargetRegisterClass * NewRC)351 bool HexagonPacketizerList::isNewifiable(const MachineInstr &MI,
352       const TargetRegisterClass *NewRC) {
353   // Vector stores can be predicated, and can be new-value stores, but
354   // they cannot be predicated on a .new predicate value.
355   if (NewRC == &Hexagon::PredRegsRegClass) {
356     if (HII->isHVXVec(MI) && MI.mayStore())
357       return false;
358     return HII->isPredicated(MI) && HII->getDotNewPredOp(MI, nullptr) > 0;
359   }
360   // If the class is not PredRegs, it could only apply to new-value stores.
361   return HII->mayBeNewStore(MI);
362 }
363 
364 // Promote an instructiont to its .cur form.
365 // At this time, we have already made a call to canPromoteToDotCur and made
366 // sure that it can *indeed* be promoted.
promoteToDotCur(MachineInstr & MI,SDep::Kind DepType,MachineBasicBlock::iterator & MII,const TargetRegisterClass * RC)367 bool HexagonPacketizerList::promoteToDotCur(MachineInstr &MI,
368       SDep::Kind DepType, MachineBasicBlock::iterator &MII,
369       const TargetRegisterClass* RC) {
370   assert(DepType == SDep::Data);
371   int CurOpcode = HII->getDotCurOp(MI);
372   MI.setDesc(HII->get(CurOpcode));
373   return true;
374 }
375 
cleanUpDotCur()376 void HexagonPacketizerList::cleanUpDotCur() {
377   MachineInstr *MI = nullptr;
378   for (auto *BI : CurrentPacketMIs) {
379     LLVM_DEBUG(dbgs() << "Cleanup packet has "; BI->dump(););
380     if (HII->isDotCurInst(*BI)) {
381       MI = BI;
382       continue;
383     }
384     if (MI) {
385       for (auto &MO : BI->operands())
386         if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg())
387           return;
388     }
389   }
390   if (!MI)
391     return;
392   // We did not find a use of the CUR, so de-cur it.
393   MI->setDesc(HII->get(HII->getNonDotCurOp(*MI)));
394   LLVM_DEBUG(dbgs() << "Demoted CUR "; MI->dump(););
395 }
396 
397 // Check to see if an instruction can be dot cur.
canPromoteToDotCur(const MachineInstr & MI,const SUnit * PacketSU,unsigned DepReg,MachineBasicBlock::iterator & MII,const TargetRegisterClass * RC)398 bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr &MI,
399       const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
400       const TargetRegisterClass *RC) {
401   if (!HII->isHVXVec(MI))
402     return false;
403   if (!HII->isHVXVec(*MII))
404     return false;
405 
406   // Already a dot new instruction.
407   if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI))
408     return false;
409 
410   if (!HII->mayBeCurLoad(MI))
411     return false;
412 
413   // The "cur value" cannot come from inline asm.
414   if (PacketSU->getInstr()->isInlineAsm())
415     return false;
416 
417   // Make sure candidate instruction uses cur.
418   LLVM_DEBUG(dbgs() << "Can we DOT Cur Vector MI\n"; MI.dump();
419              dbgs() << "in packet\n";);
420   MachineInstr &MJ = *MII;
421   LLVM_DEBUG({
422     dbgs() << "Checking CUR against ";
423     MJ.dump();
424   });
425   Register DestReg = MI.getOperand(0).getReg();
426   bool FoundMatch = false;
427   for (auto &MO : MJ.operands())
428     if (MO.isReg() && MO.getReg() == DestReg)
429       FoundMatch = true;
430   if (!FoundMatch)
431     return false;
432 
433   // Check for existing uses of a vector register within the packet which
434   // would be affected by converting a vector load into .cur format.
435   for (auto *BI : CurrentPacketMIs) {
436     LLVM_DEBUG(dbgs() << "packet has "; BI->dump(););
437     if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo()))
438       return false;
439   }
440 
441   LLVM_DEBUG(dbgs() << "Can Dot CUR MI\n"; MI.dump(););
442   // We can convert the opcode into a .cur.
443   return true;
444 }
445 
446 // Promote an instruction to its .new form. At this time, we have already
447 // made a call to canPromoteToDotNew and made sure that it can *indeed* be
448 // promoted.
promoteToDotNew(MachineInstr & MI,SDep::Kind DepType,MachineBasicBlock::iterator & MII,const TargetRegisterClass * RC)449 bool HexagonPacketizerList::promoteToDotNew(MachineInstr &MI,
450       SDep::Kind DepType, MachineBasicBlock::iterator &MII,
451       const TargetRegisterClass* RC) {
452   assert(DepType == SDep::Data);
453   int NewOpcode;
454   if (RC == &Hexagon::PredRegsRegClass)
455     NewOpcode = HII->getDotNewPredOp(MI, MBPI);
456   else
457     NewOpcode = HII->getDotNewOp(MI);
458   MI.setDesc(HII->get(NewOpcode));
459   return true;
460 }
461 
demoteToDotOld(MachineInstr & MI)462 bool HexagonPacketizerList::demoteToDotOld(MachineInstr &MI) {
463   int NewOpcode = HII->getDotOldOp(MI);
464   MI.setDesc(HII->get(NewOpcode));
465   return true;
466 }
467 
useCallersSP(MachineInstr & MI)468 bool HexagonPacketizerList::useCallersSP(MachineInstr &MI) {
469   unsigned Opc = MI.getOpcode();
470   switch (Opc) {
471     case Hexagon::S2_storerd_io:
472     case Hexagon::S2_storeri_io:
473     case Hexagon::S2_storerh_io:
474     case Hexagon::S2_storerb_io:
475       break;
476     default:
477       llvm_unreachable("Unexpected instruction");
478   }
479   unsigned FrameSize = MF.getFrameInfo().getStackSize();
480   MachineOperand &Off = MI.getOperand(1);
481   int64_t NewOff = Off.getImm() - (FrameSize + HEXAGON_LRFP_SIZE);
482   if (HII->isValidOffset(Opc, NewOff, HRI)) {
483     Off.setImm(NewOff);
484     return true;
485   }
486   return false;
487 }
488 
useCalleesSP(MachineInstr & MI)489 void HexagonPacketizerList::useCalleesSP(MachineInstr &MI) {
490   unsigned Opc = MI.getOpcode();
491   switch (Opc) {
492     case Hexagon::S2_storerd_io:
493     case Hexagon::S2_storeri_io:
494     case Hexagon::S2_storerh_io:
495     case Hexagon::S2_storerb_io:
496       break;
497     default:
498       llvm_unreachable("Unexpected instruction");
499   }
500   unsigned FrameSize = MF.getFrameInfo().getStackSize();
501   MachineOperand &Off = MI.getOperand(1);
502   Off.setImm(Off.getImm() + FrameSize + HEXAGON_LRFP_SIZE);
503 }
504 
505 /// Return true if we can update the offset in MI so that MI and MJ
506 /// can be packetized together.
updateOffset(SUnit * SUI,SUnit * SUJ)507 bool HexagonPacketizerList::updateOffset(SUnit *SUI, SUnit *SUJ) {
508   assert(SUI->getInstr() && SUJ->getInstr());
509   MachineInstr &MI = *SUI->getInstr();
510   MachineInstr &MJ = *SUJ->getInstr();
511 
512   unsigned BPI, OPI;
513   if (!HII->getBaseAndOffsetPosition(MI, BPI, OPI))
514     return false;
515   unsigned BPJ, OPJ;
516   if (!HII->getBaseAndOffsetPosition(MJ, BPJ, OPJ))
517     return false;
518   Register Reg = MI.getOperand(BPI).getReg();
519   if (Reg != MJ.getOperand(BPJ).getReg())
520     return false;
521   // Make sure that the dependences do not restrict adding MI to the packet.
522   // That is, ignore anti dependences, and make sure the only data dependence
523   // involves the specific register.
524   for (const auto &PI : SUI->Preds)
525     if (PI.getKind() != SDep::Anti &&
526         (PI.getKind() != SDep::Data || PI.getReg() != Reg))
527       return false;
528   int Incr;
529   if (!HII->getIncrementValue(MJ, Incr))
530     return false;
531 
532   int64_t Offset = MI.getOperand(OPI).getImm();
533   if (!HII->isValidOffset(MI.getOpcode(), Offset+Incr, HRI))
534     return false;
535 
536   MI.getOperand(OPI).setImm(Offset + Incr);
537   ChangedOffset = Offset;
538   return true;
539 }
540 
541 /// Undo the changed offset. This is needed if the instruction cannot be
542 /// added to the current packet due to a different instruction.
undoChangedOffset(MachineInstr & MI)543 void HexagonPacketizerList::undoChangedOffset(MachineInstr &MI) {
544   unsigned BP, OP;
545   if (!HII->getBaseAndOffsetPosition(MI, BP, OP))
546     llvm_unreachable("Unable to find base and offset operands.");
547   MI.getOperand(OP).setImm(ChangedOffset);
548 }
549 
550 enum PredicateKind {
551   PK_False,
552   PK_True,
553   PK_Unknown
554 };
555 
556 /// Returns true if an instruction is predicated on p0 and false if it's
557 /// predicated on !p0.
getPredicateSense(const MachineInstr & MI,const HexagonInstrInfo * HII)558 static PredicateKind getPredicateSense(const MachineInstr &MI,
559                                        const HexagonInstrInfo *HII) {
560   if (!HII->isPredicated(MI))
561     return PK_Unknown;
562   if (HII->isPredicatedTrue(MI))
563     return PK_True;
564   return PK_False;
565 }
566 
getPostIncrementOperand(const MachineInstr & MI,const HexagonInstrInfo * HII)567 static const MachineOperand &getPostIncrementOperand(const MachineInstr &MI,
568       const HexagonInstrInfo *HII) {
569   assert(HII->isPostIncrement(MI) && "Not a post increment operation.");
570 #ifndef NDEBUG
571   // Post Increment means duplicates. Use dense map to find duplicates in the
572   // list. Caution: Densemap initializes with the minimum of 64 buckets,
573   // whereas there are at most 5 operands in the post increment.
574   DenseSet<unsigned> DefRegsSet;
575   for (auto &MO : MI.operands())
576     if (MO.isReg() && MO.isDef())
577       DefRegsSet.insert(MO.getReg());
578 
579   for (auto &MO : MI.operands())
580     if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg()))
581       return MO;
582 #else
583   if (MI.mayLoad()) {
584     const MachineOperand &Op1 = MI.getOperand(1);
585     // The 2nd operand is always the post increment operand in load.
586     assert(Op1.isReg() && "Post increment operand has be to a register.");
587     return Op1;
588   }
589   if (MI.getDesc().mayStore()) {
590     const MachineOperand &Op0 = MI.getOperand(0);
591     // The 1st operand is always the post increment operand in store.
592     assert(Op0.isReg() && "Post increment operand has be to a register.");
593     return Op0;
594   }
595 #endif
596   // we should never come here.
597   llvm_unreachable("mayLoad or mayStore not set for Post Increment operation");
598 }
599 
600 // Get the value being stored.
getStoreValueOperand(const MachineInstr & MI)601 static const MachineOperand& getStoreValueOperand(const MachineInstr &MI) {
602   // value being stored is always the last operand.
603   return MI.getOperand(MI.getNumOperands()-1);
604 }
605 
isLoadAbsSet(const MachineInstr & MI)606 static bool isLoadAbsSet(const MachineInstr &MI) {
607   unsigned Opc = MI.getOpcode();
608   switch (Opc) {
609     case Hexagon::L4_loadrd_ap:
610     case Hexagon::L4_loadrb_ap:
611     case Hexagon::L4_loadrh_ap:
612     case Hexagon::L4_loadrub_ap:
613     case Hexagon::L4_loadruh_ap:
614     case Hexagon::L4_loadri_ap:
615       return true;
616   }
617   return false;
618 }
619 
getAbsSetOperand(const MachineInstr & MI)620 static const MachineOperand &getAbsSetOperand(const MachineInstr &MI) {
621   assert(isLoadAbsSet(MI));
622   return MI.getOperand(1);
623 }
624 
625 // Can be new value store?
626 // Following restrictions are to be respected in convert a store into
627 // a new value store.
628 // 1. If an instruction uses auto-increment, its address register cannot
629 //    be a new-value register. Arch Spec 5.4.2.1
630 // 2. If an instruction uses absolute-set addressing mode, its address
631 //    register cannot be a new-value register. Arch Spec 5.4.2.1.
632 // 3. If an instruction produces a 64-bit result, its registers cannot be used
633 //    as new-value registers. Arch Spec 5.4.2.2.
634 // 4. If the instruction that sets the new-value register is conditional, then
635 //    the instruction that uses the new-value register must also be conditional,
636 //    and both must always have their predicates evaluate identically.
637 //    Arch Spec 5.4.2.3.
638 // 5. There is an implied restriction that a packet cannot have another store,
639 //    if there is a new value store in the packet. Corollary: if there is
640 //    already a store in a packet, there can not be a new value store.
641 //    Arch Spec: 3.4.4.2
canPromoteToNewValueStore(const MachineInstr & MI,const MachineInstr & PacketMI,unsigned DepReg)642 bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI,
643       const MachineInstr &PacketMI, unsigned DepReg) {
644   // Make sure we are looking at the store, that can be promoted.
645   if (!HII->mayBeNewStore(MI))
646     return false;
647 
648   // Make sure there is dependency and can be new value'd.
649   const MachineOperand &Val = getStoreValueOperand(MI);
650   if (Val.isReg() && Val.getReg() != DepReg)
651     return false;
652 
653   const MCInstrDesc& MCID = PacketMI.getDesc();
654 
655   // First operand is always the result.
656   const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF);
657   // Double regs can not feed into new value store: PRM section: 5.4.2.2.
658   if (PacketRC == &Hexagon::DoubleRegsRegClass)
659     return false;
660 
661   // New-value stores are of class NV (slot 0), dual stores require class ST
662   // in slot 0 (PRM 5.5).
663   for (auto *I : CurrentPacketMIs) {
664     SUnit *PacketSU = MIToSUnit.find(I)->second;
665     if (PacketSU->getInstr()->mayStore())
666       return false;
667   }
668 
669   // Make sure it's NOT the post increment register that we are going to
670   // new value.
671   if (HII->isPostIncrement(MI) &&
672       getPostIncrementOperand(MI, HII).getReg() == DepReg) {
673     return false;
674   }
675 
676   if (HII->isPostIncrement(PacketMI) && PacketMI.mayLoad() &&
677       getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) {
678     // If source is post_inc, or absolute-set addressing, it can not feed
679     // into new value store
680     //   r3 = memw(r2++#4)
681     //   memw(r30 + #-1404) = r2.new -> can not be new value store
682     // arch spec section: 5.4.2.1.
683     return false;
684   }
685 
686   if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg)
687     return false;
688 
689   // If the source that feeds the store is predicated, new value store must
690   // also be predicated.
691   if (HII->isPredicated(PacketMI)) {
692     if (!HII->isPredicated(MI))
693       return false;
694 
695     // Check to make sure that they both will have their predicates
696     // evaluate identically.
697     unsigned predRegNumSrc = 0;
698     unsigned predRegNumDst = 0;
699     const TargetRegisterClass* predRegClass = nullptr;
700 
701     // Get predicate register used in the source instruction.
702     for (auto &MO : PacketMI.operands()) {
703       if (!MO.isReg())
704         continue;
705       predRegNumSrc = MO.getReg();
706       predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc);
707       if (predRegClass == &Hexagon::PredRegsRegClass)
708         break;
709     }
710     assert((predRegClass == &Hexagon::PredRegsRegClass) &&
711         "predicate register not found in a predicated PacketMI instruction");
712 
713     // Get predicate register used in new-value store instruction.
714     for (auto &MO : MI.operands()) {
715       if (!MO.isReg())
716         continue;
717       predRegNumDst = MO.getReg();
718       predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst);
719       if (predRegClass == &Hexagon::PredRegsRegClass)
720         break;
721     }
722     assert((predRegClass == &Hexagon::PredRegsRegClass) &&
723            "predicate register not found in a predicated MI instruction");
724 
725     // New-value register producer and user (store) need to satisfy these
726     // constraints:
727     // 1) Both instructions should be predicated on the same register.
728     // 2) If producer of the new-value register is .new predicated then store
729     // should also be .new predicated and if producer is not .new predicated
730     // then store should not be .new predicated.
731     // 3) Both new-value register producer and user should have same predicate
732     // sense, i.e, either both should be negated or both should be non-negated.
733     if (predRegNumDst != predRegNumSrc ||
734         HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) ||
735         getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII))
736       return false;
737   }
738 
739   // Make sure that other than the new-value register no other store instruction
740   // register has been modified in the same packet. Predicate registers can be
741   // modified by they should not be modified between the producer and the store
742   // instruction as it will make them both conditional on different values.
743   // We already know this to be true for all the instructions before and
744   // including PacketMI. However, we need to perform the check for the
745   // remaining instructions in the packet.
746 
747   unsigned StartCheck = 0;
748 
749   for (auto *I : CurrentPacketMIs) {
750     SUnit *TempSU = MIToSUnit.find(I)->second;
751     MachineInstr &TempMI = *TempSU->getInstr();
752 
753     // Following condition is true for all the instructions until PacketMI is
754     // reached (StartCheck is set to 0 before the for loop).
755     // StartCheck flag is 1 for all the instructions after PacketMI.
756     if (&TempMI != &PacketMI && !StartCheck) // Start processing only after
757       continue;                              // encountering PacketMI.
758 
759     StartCheck = 1;
760     if (&TempMI == &PacketMI) // We don't want to check PacketMI for dependence.
761       continue;
762 
763     for (auto &MO : MI.operands())
764       if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI))
765         return false;
766   }
767 
768   // Make sure that for non-POST_INC stores:
769   // 1. The only use of reg is DepReg and no other registers.
770   //    This handles base+index registers.
771   //    The following store can not be dot new.
772   //    Eg.   r0 = add(r0, #3)
773   //          memw(r1+r0<<#2) = r0
774   if (!HII->isPostIncrement(MI)) {
775     for (unsigned opNum = 0; opNum < MI.getNumOperands()-1; opNum++) {
776       const MachineOperand &MO = MI.getOperand(opNum);
777       if (MO.isReg() && MO.getReg() == DepReg)
778         return false;
779     }
780   }
781 
782   // If data definition is because of implicit definition of the register,
783   // do not newify the store. Eg.
784   // %r9 = ZXTH %r12, implicit %d6, implicit-def %r12
785   // S2_storerh_io %r8, 2, killed %r12; mem:ST2[%scevgep343]
786   for (auto &MO : PacketMI.operands()) {
787     if (MO.isRegMask() && MO.clobbersPhysReg(DepReg))
788       return false;
789     if (!MO.isReg() || !MO.isDef() || !MO.isImplicit())
790       continue;
791     Register R = MO.getReg();
792     if (R == DepReg || HRI->isSuperRegister(DepReg, R))
793       return false;
794   }
795 
796   // Handle imp-use of super reg case. There is a target independent side
797   // change that should prevent this situation but I am handling it for
798   // just-in-case. For example, we cannot newify R2 in the following case:
799   // %r3 = A2_tfrsi 0;
800   // S2_storeri_io killed %r0, 0, killed %r2, implicit killed %d1;
801   for (auto &MO : MI.operands()) {
802     if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg)
803       return false;
804   }
805 
806   // Can be dot new store.
807   return true;
808 }
809 
810 // Can this MI to promoted to either new value store or new value jump.
canPromoteToNewValue(const MachineInstr & MI,const SUnit * PacketSU,unsigned DepReg,MachineBasicBlock::iterator & MII)811 bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr &MI,
812       const SUnit *PacketSU, unsigned DepReg,
813       MachineBasicBlock::iterator &MII) {
814   if (!HII->mayBeNewStore(MI))
815     return false;
816 
817   // Check to see the store can be new value'ed.
818   MachineInstr &PacketMI = *PacketSU->getInstr();
819   if (canPromoteToNewValueStore(MI, PacketMI, DepReg))
820     return true;
821 
822   // Check to see the compare/jump can be new value'ed.
823   // This is done as a pass on its own. Don't need to check it here.
824   return false;
825 }
826 
isImplicitDependency(const MachineInstr & I,bool CheckDef,unsigned DepReg)827 static bool isImplicitDependency(const MachineInstr &I, bool CheckDef,
828       unsigned DepReg) {
829   for (auto &MO : I.operands()) {
830     if (CheckDef && MO.isRegMask() && MO.clobbersPhysReg(DepReg))
831       return true;
832     if (!MO.isReg() || MO.getReg() != DepReg || !MO.isImplicit())
833       continue;
834     if (CheckDef == MO.isDef())
835       return true;
836   }
837   return false;
838 }
839 
840 // Check to see if an instruction can be dot new.
canPromoteToDotNew(const MachineInstr & MI,const SUnit * PacketSU,unsigned DepReg,MachineBasicBlock::iterator & MII,const TargetRegisterClass * RC)841 bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI,
842       const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
843       const TargetRegisterClass* RC) {
844   // Already a dot new instruction.
845   if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI))
846     return false;
847 
848   if (!isNewifiable(MI, RC))
849     return false;
850 
851   const MachineInstr &PI = *PacketSU->getInstr();
852 
853   // The "new value" cannot come from inline asm.
854   if (PI.isInlineAsm())
855     return false;
856 
857   // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no
858   // sense.
859   if (PI.isImplicitDef())
860     return false;
861 
862   // If dependency is through an implicitly defined register, we should not
863   // newify the use.
864   if (isImplicitDependency(PI, true, DepReg) ||
865       isImplicitDependency(MI, false, DepReg))
866     return false;
867 
868   const MCInstrDesc& MCID = PI.getDesc();
869   const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF);
870   if (DisableVecDblNVStores && VecRC == &Hexagon::HvxWRRegClass)
871     return false;
872 
873   // predicate .new
874   if (RC == &Hexagon::PredRegsRegClass)
875     return HII->predCanBeUsedAsDotNew(PI, DepReg);
876 
877   if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI))
878     return false;
879 
880   // Create a dot new machine instruction to see if resources can be
881   // allocated. If not, bail out now.
882   int NewOpcode = (RC != &Hexagon::PredRegsRegClass) ? HII->getDotNewOp(MI) :
883     HII->getDotNewPredOp(MI, MBPI);
884   const MCInstrDesc &D = HII->get(NewOpcode);
885   MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc());
886   bool ResourcesAvailable = ResourceTracker->canReserveResources(*NewMI);
887   MF.deleteMachineInstr(NewMI);
888   if (!ResourcesAvailable)
889     return false;
890 
891   // New Value Store only. New Value Jump generated as a separate pass.
892   if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII))
893     return false;
894 
895   return true;
896 }
897 
898 // Go through the packet instructions and search for an anti dependency between
899 // them and DepReg from MI. Consider this case:
900 // Trying to add
901 // a) %r1 = TFRI_cdNotPt %p3, 2
902 // to this packet:
903 // {
904 //   b) %p0 = C2_or killed %p3, killed %p0
905 //   c) %p3 = C2_tfrrp %r23
906 //   d) %r1 = C2_cmovenewit %p3, 4
907 //  }
908 // The P3 from a) and d) will be complements after
909 // a)'s P3 is converted to .new form
910 // Anti-dep between c) and b) is irrelevant for this case
restrictingDepExistInPacket(MachineInstr & MI,unsigned DepReg)911 bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr &MI,
912                                                         unsigned DepReg) {
913   SUnit *PacketSUDep = MIToSUnit.find(&MI)->second;
914 
915   for (auto *I : CurrentPacketMIs) {
916     // We only care for dependencies to predicated instructions
917     if (!HII->isPredicated(*I))
918       continue;
919 
920     // Scheduling Unit for current insn in the packet
921     SUnit *PacketSU = MIToSUnit.find(I)->second;
922 
923     // Look at dependencies between current members of the packet and
924     // predicate defining instruction MI. Make sure that dependency is
925     // on the exact register we care about.
926     if (PacketSU->isSucc(PacketSUDep)) {
927       for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
928         auto &Dep = PacketSU->Succs[i];
929         if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti &&
930             Dep.getReg() == DepReg)
931           return true;
932       }
933     }
934   }
935 
936   return false;
937 }
938 
939 /// Gets the predicate register of a predicated instruction.
getPredicatedRegister(MachineInstr & MI,const HexagonInstrInfo * QII)940 static unsigned getPredicatedRegister(MachineInstr &MI,
941                                       const HexagonInstrInfo *QII) {
942   /// We use the following rule: The first predicate register that is a use is
943   /// the predicate register of a predicated instruction.
944   assert(QII->isPredicated(MI) && "Must be predicated instruction");
945 
946   for (auto &Op : MI.operands()) {
947     if (Op.isReg() && Op.getReg() && Op.isUse() &&
948         Hexagon::PredRegsRegClass.contains(Op.getReg()))
949       return Op.getReg();
950   }
951 
952   llvm_unreachable("Unknown instruction operand layout");
953   return 0;
954 }
955 
956 // Given two predicated instructions, this function detects whether
957 // the predicates are complements.
arePredicatesComplements(MachineInstr & MI1,MachineInstr & MI2)958 bool HexagonPacketizerList::arePredicatesComplements(MachineInstr &MI1,
959                                                      MachineInstr &MI2) {
960   // If we don't know the predicate sense of the instructions bail out early, we
961   // need it later.
962   if (getPredicateSense(MI1, HII) == PK_Unknown ||
963       getPredicateSense(MI2, HII) == PK_Unknown)
964     return false;
965 
966   // Scheduling unit for candidate.
967   SUnit *SU = MIToSUnit[&MI1];
968 
969   // One corner case deals with the following scenario:
970   // Trying to add
971   // a) %r24 = A2_tfrt %p0, %r25
972   // to this packet:
973   // {
974   //   b) %r25 = A2_tfrf %p0, %r24
975   //   c) %p0 = C2_cmpeqi %r26, 1
976   // }
977   //
978   // On general check a) and b) are complements, but presence of c) will
979   // convert a) to .new form, and then it is not a complement.
980   // We attempt to detect it by analyzing existing dependencies in the packet.
981 
982   // Analyze relationships between all existing members of the packet.
983   // Look for Anti dependency on the same predicate reg as used in the
984   // candidate.
985   for (auto *I : CurrentPacketMIs) {
986     // Scheduling Unit for current insn in the packet.
987     SUnit *PacketSU = MIToSUnit.find(I)->second;
988 
989     // If this instruction in the packet is succeeded by the candidate...
990     if (PacketSU->isSucc(SU)) {
991       for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
992         auto Dep = PacketSU->Succs[i];
993         // The corner case exist when there is true data dependency between
994         // candidate and one of current packet members, this dep is on
995         // predicate reg, and there already exist anti dep on the same pred in
996         // the packet.
997         if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data &&
998             Hexagon::PredRegsRegClass.contains(Dep.getReg())) {
999           // Here I know that I is predicate setting instruction with true
1000           // data dep to candidate on the register we care about - c) in the
1001           // above example. Now I need to see if there is an anti dependency
1002           // from c) to any other instruction in the same packet on the pred
1003           // reg of interest.
1004           if (restrictingDepExistInPacket(*I, Dep.getReg()))
1005             return false;
1006         }
1007       }
1008     }
1009   }
1010 
1011   // If the above case does not apply, check regular complement condition.
1012   // Check that the predicate register is the same and that the predicate
1013   // sense is different We also need to differentiate .old vs. .new: !p0
1014   // is not complementary to p0.new.
1015   unsigned PReg1 = getPredicatedRegister(MI1, HII);
1016   unsigned PReg2 = getPredicatedRegister(MI2, HII);
1017   return PReg1 == PReg2 &&
1018          Hexagon::PredRegsRegClass.contains(PReg1) &&
1019          Hexagon::PredRegsRegClass.contains(PReg2) &&
1020          getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) &&
1021          HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2);
1022 }
1023 
1024 // Initialize packetizer flags.
initPacketizerState()1025 void HexagonPacketizerList::initPacketizerState() {
1026   Dependence = false;
1027   PromotedToDotNew = false;
1028   GlueToNewValueJump = false;
1029   GlueAllocframeStore = false;
1030   FoundSequentialDependence = false;
1031   ChangedOffset = INT64_MAX;
1032 }
1033 
1034 // Ignore bundling of pseudo instructions.
ignorePseudoInstruction(const MachineInstr & MI,const MachineBasicBlock *)1035 bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr &MI,
1036                                                     const MachineBasicBlock *) {
1037   if (MI.isDebugInstr())
1038     return true;
1039 
1040   if (MI.isCFIInstruction())
1041     return false;
1042 
1043   // We must print out inline assembly.
1044   if (MI.isInlineAsm())
1045     return false;
1046 
1047   if (MI.isImplicitDef())
1048     return false;
1049 
1050   // We check if MI has any functional units mapped to it. If it doesn't,
1051   // we ignore the instruction.
1052   const MCInstrDesc& TID = MI.getDesc();
1053   auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass());
1054   return !IS->getUnits();
1055 }
1056 
isSoloInstruction(const MachineInstr & MI)1057 bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) {
1058   // Ensure any bundles created by gather packetize remain separate.
1059   if (MI.isBundle())
1060     return true;
1061 
1062   if (MI.isEHLabel() || MI.isCFIInstruction())
1063     return true;
1064 
1065   // Consider inline asm to not be a solo instruction by default.
1066   // Inline asm will be put in a packet temporarily, but then it will be
1067   // removed, and placed outside of the packet (before or after, depending
1068   // on dependencies).  This is to reduce the impact of inline asm as a
1069   // "packet splitting" instruction.
1070   if (MI.isInlineAsm() && !ScheduleInlineAsm)
1071     return true;
1072 
1073   if (isSchedBarrier(MI))
1074     return true;
1075 
1076   if (HII->isSolo(MI))
1077     return true;
1078 
1079   if (MI.getOpcode() == Hexagon::PATCHABLE_FUNCTION_ENTER ||
1080       MI.getOpcode() == Hexagon::PATCHABLE_FUNCTION_EXIT ||
1081       MI.getOpcode() == Hexagon::PATCHABLE_TAIL_CALL)
1082     return true;
1083 
1084   if (MI.getOpcode() == Hexagon::A2_nop)
1085     return true;
1086 
1087   return false;
1088 }
1089 
1090 // Quick check if instructions MI and MJ cannot coexist in the same packet.
1091 // Limit the tests to be "one-way", e.g.  "if MI->isBranch and MJ->isInlineAsm",
1092 // but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm".
1093 // For full test call this function twice:
1094 //   cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI)
1095 // Doing the test only one way saves the amount of code in this function,
1096 // since every test would need to be repeated with the MI and MJ reversed.
cannotCoexistAsymm(const MachineInstr & MI,const MachineInstr & MJ,const HexagonInstrInfo & HII)1097 static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ,
1098       const HexagonInstrInfo &HII) {
1099   const MachineFunction *MF = MI.getParent()->getParent();
1100   if (MF->getSubtarget<HexagonSubtarget>().hasV60OpsOnly() &&
1101       HII.isHVXMemWithAIndirect(MI, MJ))
1102     return true;
1103 
1104   // Don't allow a store and an instruction that must be in slot0 and
1105   // doesn't allow a slot1 instruction.
1106   if (MI.mayStore() && HII.isRestrictNoSlot1Store(MJ) && HII.isPureSlot0(MJ))
1107     return true;
1108 
1109   // An inline asm cannot be together with a branch, because we may not be
1110   // able to remove the asm out after packetizing (i.e. if the asm must be
1111   // moved past the bundle).  Similarly, two asms cannot be together to avoid
1112   // complications when determining their relative order outside of a bundle.
1113   if (MI.isInlineAsm())
1114     return MJ.isInlineAsm() || MJ.isBranch() || MJ.isBarrier() ||
1115            MJ.isCall() || MJ.isTerminator();
1116 
1117   // New-value stores cannot coexist with any other stores.
1118   if (HII.isNewValueStore(MI) && MJ.mayStore())
1119     return true;
1120 
1121   switch (MI.getOpcode()) {
1122   case Hexagon::S2_storew_locked:
1123   case Hexagon::S4_stored_locked:
1124   case Hexagon::L2_loadw_locked:
1125   case Hexagon::L4_loadd_locked:
1126   case Hexagon::Y2_dccleana:
1127   case Hexagon::Y2_dccleaninva:
1128   case Hexagon::Y2_dcinva:
1129   case Hexagon::Y2_dczeroa:
1130   case Hexagon::Y4_l2fetch:
1131   case Hexagon::Y5_l2fetch: {
1132     // These instructions can only be grouped with ALU32 or non-floating-point
1133     // XTYPE instructions.  Since there is no convenient way of identifying fp
1134     // XTYPE instructions, only allow grouping with ALU32 for now.
1135     unsigned TJ = HII.getType(MJ);
1136     if (TJ != HexagonII::TypeALU32_2op &&
1137         TJ != HexagonII::TypeALU32_3op &&
1138         TJ != HexagonII::TypeALU32_ADDI)
1139       return true;
1140     break;
1141   }
1142   default:
1143     break;
1144   }
1145 
1146   // "False" really means that the quick check failed to determine if
1147   // I and J cannot coexist.
1148   return false;
1149 }
1150 
1151 // Full, symmetric check.
cannotCoexist(const MachineInstr & MI,const MachineInstr & MJ)1152 bool HexagonPacketizerList::cannotCoexist(const MachineInstr &MI,
1153       const MachineInstr &MJ) {
1154   return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII);
1155 }
1156 
unpacketizeSoloInstrs(MachineFunction & MF)1157 void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) {
1158   for (auto &B : MF) {
1159     MachineBasicBlock::iterator BundleIt;
1160     for (MachineInstr &MI : llvm::make_early_inc_range(B.instrs())) {
1161       if (MI.isBundle())
1162         BundleIt = MI.getIterator();
1163       if (!MI.isInsideBundle())
1164         continue;
1165 
1166       // Decide on where to insert the instruction that we are pulling out.
1167       // Debug instructions always go before the bundle, but the placement of
1168       // INLINE_ASM depends on potential dependencies.  By default, try to
1169       // put it before the bundle, but if the asm writes to a register that
1170       // other instructions in the bundle read, then we need to place it
1171       // after the bundle (to preserve the bundle semantics).
1172       bool InsertBeforeBundle;
1173       if (MI.isInlineAsm())
1174         InsertBeforeBundle = !hasWriteToReadDep(MI, *BundleIt, HRI);
1175       else if (MI.isDebugInstr())
1176         InsertBeforeBundle = true;
1177       else
1178         continue;
1179 
1180       BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle);
1181     }
1182   }
1183 }
1184 
1185 // Check if a given instruction is of class "system".
isSystemInstr(const MachineInstr & MI)1186 static bool isSystemInstr(const MachineInstr &MI) {
1187   unsigned Opc = MI.getOpcode();
1188   switch (Opc) {
1189     case Hexagon::Y2_barrier:
1190     case Hexagon::Y2_dcfetchbo:
1191     case Hexagon::Y4_l2fetch:
1192     case Hexagon::Y5_l2fetch:
1193       return true;
1194   }
1195   return false;
1196 }
1197 
hasDeadDependence(const MachineInstr & I,const MachineInstr & J)1198 bool HexagonPacketizerList::hasDeadDependence(const MachineInstr &I,
1199                                               const MachineInstr &J) {
1200   // The dependence graph may not include edges between dead definitions,
1201   // so without extra checks, we could end up packetizing two instruction
1202   // defining the same (dead) register.
1203   if (I.isCall() || J.isCall())
1204     return false;
1205   if (HII->isPredicated(I) || HII->isPredicated(J))
1206     return false;
1207 
1208   BitVector DeadDefs(Hexagon::NUM_TARGET_REGS);
1209   for (auto &MO : I.operands()) {
1210     if (!MO.isReg() || !MO.isDef() || !MO.isDead())
1211       continue;
1212     DeadDefs[MO.getReg()] = true;
1213   }
1214 
1215   for (auto &MO : J.operands()) {
1216     if (!MO.isReg() || !MO.isDef() || !MO.isDead())
1217       continue;
1218     Register R = MO.getReg();
1219     if (R != Hexagon::USR_OVF && DeadDefs[R])
1220       return true;
1221   }
1222   return false;
1223 }
1224 
hasControlDependence(const MachineInstr & I,const MachineInstr & J)1225 bool HexagonPacketizerList::hasControlDependence(const MachineInstr &I,
1226                                                  const MachineInstr &J) {
1227   // A save callee-save register function call can only be in a packet
1228   // with instructions that don't write to the callee-save registers.
1229   if ((HII->isSaveCalleeSavedRegsCall(I) &&
1230        doesModifyCalleeSavedReg(J, HRI)) ||
1231       (HII->isSaveCalleeSavedRegsCall(J) &&
1232        doesModifyCalleeSavedReg(I, HRI)))
1233     return true;
1234 
1235   // Two control flow instructions cannot go in the same packet.
1236   if (isControlFlow(I) && isControlFlow(J))
1237     return true;
1238 
1239   // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot
1240   // contain a speculative indirect jump,
1241   // a new-value compare jump or a dealloc_return.
1242   auto isBadForLoopN = [this] (const MachineInstr &MI) -> bool {
1243     if (MI.isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI))
1244       return true;
1245     if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI))
1246       return true;
1247     return false;
1248   };
1249 
1250   if (HII->isLoopN(I) && isBadForLoopN(J))
1251     return true;
1252   if (HII->isLoopN(J) && isBadForLoopN(I))
1253     return true;
1254 
1255   // dealloc_return cannot appear in the same packet as a conditional or
1256   // unconditional jump.
1257   return HII->isDeallocRet(I) &&
1258          (J.isBranch() || J.isCall() || J.isBarrier());
1259 }
1260 
hasRegMaskDependence(const MachineInstr & I,const MachineInstr & J)1261 bool HexagonPacketizerList::hasRegMaskDependence(const MachineInstr &I,
1262                                                  const MachineInstr &J) {
1263   // Adding I to a packet that has J.
1264 
1265   // Regmasks are not reflected in the scheduling dependency graph, so
1266   // we need to check them manually. This code assumes that regmasks only
1267   // occur on calls, and the problematic case is when we add an instruction
1268   // defining a register R to a packet that has a call that clobbers R via
1269   // a regmask. Those cannot be packetized together, because the call will
1270   // be executed last. That's also a reason why it is ok to add a call
1271   // clobbering R to a packet that defines R.
1272 
1273   // Look for regmasks in J.
1274   for (const MachineOperand &OpJ : J.operands()) {
1275     if (!OpJ.isRegMask())
1276       continue;
1277     assert((J.isCall() || HII->isTailCall(J)) && "Regmask on a non-call");
1278     for (const MachineOperand &OpI : I.operands()) {
1279       if (OpI.isReg()) {
1280         if (OpJ.clobbersPhysReg(OpI.getReg()))
1281           return true;
1282       } else if (OpI.isRegMask()) {
1283         // Both are regmasks. Assume that they intersect.
1284         return true;
1285       }
1286     }
1287   }
1288   return false;
1289 }
1290 
hasDualStoreDependence(const MachineInstr & I,const MachineInstr & J)1291 bool HexagonPacketizerList::hasDualStoreDependence(const MachineInstr &I,
1292                                                    const MachineInstr &J) {
1293   bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
1294   bool StoreI = I.mayStore(), StoreJ = J.mayStore();
1295   if ((SysI && StoreJ) || (SysJ && StoreI))
1296     return true;
1297 
1298   if (StoreI && StoreJ) {
1299     if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I))
1300       return true;
1301   } else {
1302     // A memop cannot be in the same packet with another memop or a store.
1303     // Two stores can be together, but here I and J cannot both be stores.
1304     bool MopStI = HII->isMemOp(I) || StoreI;
1305     bool MopStJ = HII->isMemOp(J) || StoreJ;
1306     if (MopStI && MopStJ)
1307       return true;
1308   }
1309 
1310   return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J));
1311 }
1312 
1313 // SUI is the current instruction that is outside of the current packet.
1314 // SUJ is the current instruction inside the current packet against which that
1315 // SUI will be packetized.
isLegalToPacketizeTogether(SUnit * SUI,SUnit * SUJ)1316 bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
1317   assert(SUI->getInstr() && SUJ->getInstr());
1318   MachineInstr &I = *SUI->getInstr();
1319   MachineInstr &J = *SUJ->getInstr();
1320 
1321   // Clear IgnoreDepMIs when Packet starts.
1322   if (CurrentPacketMIs.size() == 1)
1323     IgnoreDepMIs.clear();
1324 
1325   MachineBasicBlock::iterator II = I.getIterator();
1326 
1327   // Solo instructions cannot go in the packet.
1328   assert(!isSoloInstruction(I) && "Unexpected solo instr!");
1329 
1330   if (cannotCoexist(I, J))
1331     return false;
1332 
1333   Dependence = hasDeadDependence(I, J) || hasControlDependence(I, J);
1334   if (Dependence)
1335     return false;
1336 
1337   // Regmasks are not accounted for in the scheduling graph, so we need
1338   // to explicitly check for dependencies caused by them. They should only
1339   // appear on calls, so it's not too pessimistic to reject all regmask
1340   // dependencies.
1341   Dependence = hasRegMaskDependence(I, J);
1342   if (Dependence)
1343     return false;
1344 
1345   // Dual-store does not allow second store, if the first store is not
1346   // in SLOT0. New value store, new value jump, dealloc_return and memop
1347   // always take SLOT0. Arch spec 3.4.4.2.
1348   Dependence = hasDualStoreDependence(I, J);
1349   if (Dependence)
1350     return false;
1351 
1352   // If an instruction feeds new value jump, glue it.
1353   MachineBasicBlock::iterator NextMII = I.getIterator();
1354   ++NextMII;
1355   if (NextMII != I.getParent()->end() && HII->isNewValueJump(*NextMII)) {
1356     MachineInstr &NextMI = *NextMII;
1357 
1358     bool secondRegMatch = false;
1359     const MachineOperand &NOp0 = NextMI.getOperand(0);
1360     const MachineOperand &NOp1 = NextMI.getOperand(1);
1361 
1362     if (NOp1.isReg() && I.getOperand(0).getReg() == NOp1.getReg())
1363       secondRegMatch = true;
1364 
1365     for (MachineInstr *PI : CurrentPacketMIs) {
1366       // NVJ can not be part of the dual jump - Arch Spec: section 7.8.
1367       if (PI->isCall()) {
1368         Dependence = true;
1369         break;
1370       }
1371       // Validate:
1372       // 1. Packet does not have a store in it.
1373       // 2. If the first operand of the nvj is newified, and the second
1374       //    operand is also a reg, it (second reg) is not defined in
1375       //    the same packet.
1376       // 3. If the second operand of the nvj is newified, (which means
1377       //    first operand is also a reg), first reg is not defined in
1378       //    the same packet.
1379       if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() ||
1380           HII->isLoopN(*PI)) {
1381         Dependence = true;
1382         break;
1383       }
1384       // Check #2/#3.
1385       const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1;
1386       if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) {
1387         Dependence = true;
1388         break;
1389       }
1390     }
1391 
1392     GlueToNewValueJump = true;
1393     if (Dependence)
1394       return false;
1395   }
1396 
1397   // There no dependency between a prolog instruction and its successor.
1398   if (!SUJ->isSucc(SUI))
1399     return true;
1400 
1401   for (unsigned i = 0; i < SUJ->Succs.size(); ++i) {
1402     if (FoundSequentialDependence)
1403       break;
1404 
1405     if (SUJ->Succs[i].getSUnit() != SUI)
1406       continue;
1407 
1408     SDep::Kind DepType = SUJ->Succs[i].getKind();
1409     // For direct calls:
1410     // Ignore register dependences for call instructions for packetization
1411     // purposes except for those due to r31 and predicate registers.
1412     //
1413     // For indirect calls:
1414     // Same as direct calls + check for true dependences to the register
1415     // used in the indirect call.
1416     //
1417     // We completely ignore Order dependences for call instructions.
1418     //
1419     // For returns:
1420     // Ignore register dependences for return instructions like jumpr,
1421     // dealloc return unless we have dependencies on the explicit uses
1422     // of the registers used by jumpr (like r31) or dealloc return
1423     // (like r29 or r30).
1424     unsigned DepReg = 0;
1425     const TargetRegisterClass *RC = nullptr;
1426     if (DepType == SDep::Data) {
1427       DepReg = SUJ->Succs[i].getReg();
1428       RC = HRI->getMinimalPhysRegClass(DepReg);
1429     }
1430 
1431     if (I.isCall() || HII->isJumpR(I) || I.isReturn() || HII->isTailCall(I)) {
1432       if (!isRegDependence(DepType))
1433         continue;
1434       if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg()))
1435         continue;
1436     }
1437 
1438     if (DepType == SDep::Data) {
1439       if (canPromoteToDotCur(J, SUJ, DepReg, II, RC))
1440         if (promoteToDotCur(J, DepType, II, RC))
1441           continue;
1442     }
1443 
1444     // Data dependence ok if we have load.cur.
1445     if (DepType == SDep::Data && HII->isDotCurInst(J)) {
1446       if (HII->isHVXVec(I))
1447         continue;
1448     }
1449 
1450     // For instructions that can be promoted to dot-new, try to promote.
1451     if (DepType == SDep::Data) {
1452       if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) {
1453         if (promoteToDotNew(I, DepType, II, RC)) {
1454           PromotedToDotNew = true;
1455           if (cannotCoexist(I, J))
1456             FoundSequentialDependence = true;
1457           continue;
1458         }
1459       }
1460       if (HII->isNewValueJump(I))
1461         continue;
1462     }
1463 
1464     // For predicated instructions, if the predicates are complements then
1465     // there can be no dependence.
1466     if (HII->isPredicated(I) && HII->isPredicated(J) &&
1467         arePredicatesComplements(I, J)) {
1468       // Not always safe to do this translation.
1469       // DAG Builder attempts to reduce dependence edges using transitive
1470       // nature of dependencies. Here is an example:
1471       //
1472       // r0 = tfr_pt ... (1)
1473       // r0 = tfr_pf ... (2)
1474       // r0 = tfr_pt ... (3)
1475       //
1476       // There will be an output dependence between (1)->(2) and (2)->(3).
1477       // However, there is no dependence edge between (1)->(3). This results
1478       // in all 3 instructions going in the same packet. We ignore dependce
1479       // only once to avoid this situation.
1480       auto Itr = find(IgnoreDepMIs, &J);
1481       if (Itr != IgnoreDepMIs.end()) {
1482         Dependence = true;
1483         return false;
1484       }
1485       IgnoreDepMIs.push_back(&I);
1486       continue;
1487     }
1488 
1489     // Ignore Order dependences between unconditional direct branches
1490     // and non-control-flow instructions.
1491     if (isDirectJump(I) && !J.isBranch() && !J.isCall() &&
1492         DepType == SDep::Order)
1493       continue;
1494 
1495     // Ignore all dependences for jumps except for true and output
1496     // dependences.
1497     if (I.isConditionalBranch() && DepType != SDep::Data &&
1498         DepType != SDep::Output)
1499       continue;
1500 
1501     if (DepType == SDep::Output) {
1502       FoundSequentialDependence = true;
1503       break;
1504     }
1505 
1506     // For Order dependences:
1507     // 1. Volatile loads/stores can be packetized together, unless other
1508     //    rules prevent is.
1509     // 2. Store followed by a load is not allowed.
1510     // 3. Store followed by a store is valid.
1511     // 4. Load followed by any memory operation is allowed.
1512     if (DepType == SDep::Order) {
1513       if (!PacketizeVolatiles) {
1514         bool OrdRefs = I.hasOrderedMemoryRef() || J.hasOrderedMemoryRef();
1515         if (OrdRefs) {
1516           FoundSequentialDependence = true;
1517           break;
1518         }
1519       }
1520       // J is first, I is second.
1521       bool LoadJ = J.mayLoad(), StoreJ = J.mayStore();
1522       bool LoadI = I.mayLoad(), StoreI = I.mayStore();
1523       bool NVStoreJ = HII->isNewValueStore(J);
1524       bool NVStoreI = HII->isNewValueStore(I);
1525       bool IsVecJ = HII->isHVXVec(J);
1526       bool IsVecI = HII->isHVXVec(I);
1527 
1528       // Don't reorder the loads if there is an order dependence. This would
1529       // occur if the first instruction must go in slot0.
1530       if (LoadJ && LoadI && HII->isPureSlot0(J)) {
1531         FoundSequentialDependence = true;
1532         break;
1533       }
1534 
1535       if (Slot1Store && MF.getSubtarget<HexagonSubtarget>().hasV65Ops() &&
1536           ((LoadJ && StoreI && !NVStoreI) ||
1537            (StoreJ && LoadI && !NVStoreJ)) &&
1538           (J.getOpcode() != Hexagon::S2_allocframe &&
1539            I.getOpcode() != Hexagon::S2_allocframe) &&
1540           (J.getOpcode() != Hexagon::L2_deallocframe &&
1541            I.getOpcode() != Hexagon::L2_deallocframe) &&
1542           (!HII->isMemOp(J) && !HII->isMemOp(I)) && (!IsVecJ && !IsVecI))
1543         setmemShufDisabled(true);
1544       else
1545         if (StoreJ && LoadI && alias(J, I)) {
1546           FoundSequentialDependence = true;
1547           break;
1548         }
1549 
1550       if (!StoreJ)
1551         if (!LoadJ || (!LoadI && !StoreI)) {
1552           // If J is neither load nor store, assume a dependency.
1553           // If J is a load, but I is neither, also assume a dependency.
1554           FoundSequentialDependence = true;
1555           break;
1556         }
1557       // Store followed by store: not OK on V2.
1558       // Store followed by load: not OK on all.
1559       // Load followed by store: OK on all.
1560       // Load followed by load: OK on all.
1561       continue;
1562     }
1563 
1564     // Special case for ALLOCFRAME: even though there is dependency
1565     // between ALLOCFRAME and subsequent store, allow it to be packetized
1566     // in a same packet. This implies that the store is using the caller's
1567     // SP. Hence, offset needs to be updated accordingly.
1568     if (DepType == SDep::Data && J.getOpcode() == Hexagon::S2_allocframe) {
1569       unsigned Opc = I.getOpcode();
1570       switch (Opc) {
1571         case Hexagon::S2_storerd_io:
1572         case Hexagon::S2_storeri_io:
1573         case Hexagon::S2_storerh_io:
1574         case Hexagon::S2_storerb_io:
1575           if (I.getOperand(0).getReg() == HRI->getStackRegister()) {
1576             // Since this store is to be glued with allocframe in the same
1577             // packet, it will use SP of the previous stack frame, i.e.
1578             // caller's SP. Therefore, we need to recalculate offset
1579             // according to this change.
1580             GlueAllocframeStore = useCallersSP(I);
1581             if (GlueAllocframeStore)
1582               continue;
1583           }
1584           break;
1585         default:
1586           break;
1587       }
1588     }
1589 
1590     // There are certain anti-dependencies that cannot be ignored.
1591     // Specifically:
1592     //   J2_call ... implicit-def %r0   ; SUJ
1593     //   R0 = ...                   ; SUI
1594     // Those cannot be packetized together, since the call will observe
1595     // the effect of the assignment to R0.
1596     if ((DepType == SDep::Anti || DepType == SDep::Output) && J.isCall()) {
1597       // Check if I defines any volatile register. We should also check
1598       // registers that the call may read, but these happen to be a
1599       // subset of the volatile register set.
1600       for (const MachineOperand &Op : I.operands()) {
1601         if (Op.isReg() && Op.isDef()) {
1602           Register R = Op.getReg();
1603           if (!J.readsRegister(R, HRI) && !J.modifiesRegister(R, HRI))
1604             continue;
1605         } else if (!Op.isRegMask()) {
1606           // If I has a regmask assume dependency.
1607           continue;
1608         }
1609         FoundSequentialDependence = true;
1610         break;
1611       }
1612     }
1613 
1614     // Skip over remaining anti-dependences. Two instructions that are
1615     // anti-dependent can share a packet, since in most such cases all
1616     // operands are read before any modifications take place.
1617     // The exceptions are branch and call instructions, since they are
1618     // executed after all other instructions have completed (at least
1619     // conceptually).
1620     if (DepType != SDep::Anti) {
1621       FoundSequentialDependence = true;
1622       break;
1623     }
1624   }
1625 
1626   if (FoundSequentialDependence) {
1627     Dependence = true;
1628     return false;
1629   }
1630 
1631   return true;
1632 }
1633 
isLegalToPruneDependencies(SUnit * SUI,SUnit * SUJ)1634 bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
1635   assert(SUI->getInstr() && SUJ->getInstr());
1636   MachineInstr &I = *SUI->getInstr();
1637   MachineInstr &J = *SUJ->getInstr();
1638 
1639   bool Coexist = !cannotCoexist(I, J);
1640 
1641   if (Coexist && !Dependence)
1642     return true;
1643 
1644   // Check if the instruction was promoted to a dot-new. If so, demote it
1645   // back into a dot-old.
1646   if (PromotedToDotNew)
1647     demoteToDotOld(I);
1648 
1649   cleanUpDotCur();
1650   // Check if the instruction (must be a store) was glued with an allocframe
1651   // instruction. If so, restore its offset to its original value, i.e. use
1652   // current SP instead of caller's SP.
1653   if (GlueAllocframeStore) {
1654     useCalleesSP(I);
1655     GlueAllocframeStore = false;
1656   }
1657 
1658   if (ChangedOffset != INT64_MAX)
1659     undoChangedOffset(I);
1660 
1661   if (GlueToNewValueJump) {
1662     // Putting I and J together would prevent the new-value jump from being
1663     // packetized with the producer. In that case I and J must be separated.
1664     GlueToNewValueJump = false;
1665     return false;
1666   }
1667 
1668   if (!Coexist)
1669     return false;
1670 
1671   if (ChangedOffset == INT64_MAX && updateOffset(SUI, SUJ)) {
1672     FoundSequentialDependence = false;
1673     Dependence = false;
1674     return true;
1675   }
1676 
1677   return false;
1678 }
1679 
1680 
foundLSInPacket()1681 bool HexagonPacketizerList::foundLSInPacket() {
1682   bool FoundLoad = false;
1683   bool FoundStore = false;
1684 
1685   for (auto *MJ : CurrentPacketMIs) {
1686     unsigned Opc = MJ->getOpcode();
1687     if (Opc == Hexagon::S2_allocframe || Opc == Hexagon::L2_deallocframe)
1688       continue;
1689     if (HII->isMemOp(*MJ))
1690       continue;
1691     if (MJ->mayLoad())
1692       FoundLoad = true;
1693     if (MJ->mayStore() && !HII->isNewValueStore(*MJ))
1694       FoundStore = true;
1695   }
1696   return FoundLoad && FoundStore;
1697 }
1698 
1699 
1700 MachineBasicBlock::iterator
addToPacket(MachineInstr & MI)1701 HexagonPacketizerList::addToPacket(MachineInstr &MI) {
1702   MachineBasicBlock::iterator MII = MI.getIterator();
1703   MachineBasicBlock *MBB = MI.getParent();
1704 
1705   if (CurrentPacketMIs.empty()) {
1706     PacketStalls = false;
1707     PacketStallCycles = 0;
1708   }
1709   PacketStalls |= producesStall(MI);
1710   PacketStallCycles = std::max(PacketStallCycles, calcStall(MI));
1711 
1712   if (MI.isImplicitDef()) {
1713     // Add to the packet to allow subsequent instructions to be checked
1714     // properly.
1715     CurrentPacketMIs.push_back(&MI);
1716     return MII;
1717   }
1718   assert(ResourceTracker->canReserveResources(MI));
1719 
1720   bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI);
1721   bool Good = true;
1722 
1723   if (GlueToNewValueJump) {
1724     MachineInstr &NvjMI = *++MII;
1725     // We need to put both instructions in the same packet: MI and NvjMI.
1726     // Either of them can require a constant extender. Try to add both to
1727     // the current packet, and if that fails, end the packet and start a
1728     // new one.
1729     ResourceTracker->reserveResources(MI);
1730     if (ExtMI)
1731       Good = tryAllocateResourcesForConstExt(true);
1732 
1733     bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI);
1734     if (Good) {
1735       if (ResourceTracker->canReserveResources(NvjMI))
1736         ResourceTracker->reserveResources(NvjMI);
1737       else
1738         Good = false;
1739     }
1740     if (Good && ExtNvjMI)
1741       Good = tryAllocateResourcesForConstExt(true);
1742 
1743     if (!Good) {
1744       endPacket(MBB, MI);
1745       assert(ResourceTracker->canReserveResources(MI));
1746       ResourceTracker->reserveResources(MI);
1747       if (ExtMI) {
1748         assert(canReserveResourcesForConstExt());
1749         tryAllocateResourcesForConstExt(true);
1750       }
1751       assert(ResourceTracker->canReserveResources(NvjMI));
1752       ResourceTracker->reserveResources(NvjMI);
1753       if (ExtNvjMI) {
1754         assert(canReserveResourcesForConstExt());
1755         reserveResourcesForConstExt();
1756       }
1757     }
1758     CurrentPacketMIs.push_back(&MI);
1759     CurrentPacketMIs.push_back(&NvjMI);
1760     return MII;
1761   }
1762 
1763   ResourceTracker->reserveResources(MI);
1764   if (ExtMI && !tryAllocateResourcesForConstExt(true)) {
1765     endPacket(MBB, MI);
1766     if (PromotedToDotNew)
1767       demoteToDotOld(MI);
1768     if (GlueAllocframeStore) {
1769       useCalleesSP(MI);
1770       GlueAllocframeStore = false;
1771     }
1772     ResourceTracker->reserveResources(MI);
1773     reserveResourcesForConstExt();
1774   }
1775 
1776   CurrentPacketMIs.push_back(&MI);
1777   return MII;
1778 }
1779 
endPacket(MachineBasicBlock * MBB,MachineBasicBlock::iterator EndMI)1780 void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
1781                                       MachineBasicBlock::iterator EndMI) {
1782   // Replace VLIWPacketizerList::endPacket(MBB, EndMI).
1783   LLVM_DEBUG({
1784     if (!CurrentPacketMIs.empty()) {
1785       dbgs() << "Finalizing packet:\n";
1786       unsigned Idx = 0;
1787       for (MachineInstr *MI : CurrentPacketMIs) {
1788         unsigned R = ResourceTracker->getUsedResources(Idx++);
1789         dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI;
1790       }
1791     }
1792   });
1793 
1794   bool memShufDisabled = getmemShufDisabled();
1795   if (memShufDisabled && !foundLSInPacket()) {
1796     setmemShufDisabled(false);
1797     LLVM_DEBUG(dbgs() << "  Not added to NoShufPacket\n");
1798   }
1799   memShufDisabled = getmemShufDisabled();
1800 
1801   OldPacketMIs.clear();
1802   for (MachineInstr *MI : CurrentPacketMIs) {
1803     MachineBasicBlock::instr_iterator NextMI = std::next(MI->getIterator());
1804     for (auto &I : make_range(HII->expandVGatherPseudo(*MI), NextMI))
1805       OldPacketMIs.push_back(&I);
1806   }
1807   CurrentPacketMIs.clear();
1808 
1809   if (OldPacketMIs.size() > 1) {
1810     MachineBasicBlock::instr_iterator FirstMI(OldPacketMIs.front());
1811     MachineBasicBlock::instr_iterator LastMI(EndMI.getInstrIterator());
1812     finalizeBundle(*MBB, FirstMI, LastMI);
1813     auto BundleMII = std::prev(FirstMI);
1814     if (memShufDisabled)
1815       HII->setBundleNoShuf(BundleMII);
1816 
1817     setmemShufDisabled(false);
1818   }
1819 
1820   PacketHasDuplex = false;
1821   PacketHasSLOT0OnlyInsn = false;
1822   ResourceTracker->clearResources();
1823   LLVM_DEBUG(dbgs() << "End packet\n");
1824 }
1825 
shouldAddToPacket(const MachineInstr & MI)1826 bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) {
1827   if (Minimal)
1828     return false;
1829 
1830   if (producesStall(MI))
1831     return false;
1832 
1833   // If TinyCore with Duplexes is enabled, check if this MI can form a Duplex
1834   // with any other instruction in the existing packet.
1835   auto &HST = MI.getParent()->getParent()->getSubtarget<HexagonSubtarget>();
1836   // Constraint 1: Only one duplex allowed per packet.
1837   // Constraint 2: Consider duplex checks only if there is at least one
1838   // instruction in a packet.
1839   // Constraint 3: If one of the existing instructions in the packet has a
1840   // SLOT0 only instruction that can not be duplexed, do not attempt to form
1841   // duplexes. (TODO: This will invalidate the L4_return* instructions to form a
1842   // duplex)
1843   if (HST.isTinyCoreWithDuplex() && CurrentPacketMIs.size() > 0 &&
1844       !PacketHasDuplex) {
1845     // Check for SLOT0 only non-duplexable instruction in packet.
1846     for (auto &MJ : CurrentPacketMIs)
1847       PacketHasSLOT0OnlyInsn |= HII->isPureSlot0(*MJ);
1848     // Get the Big Core Opcode (dup_*).
1849     int Opcode = HII->getDuplexOpcode(MI, false);
1850     if (Opcode >= 0) {
1851       // We now have an instruction that can be duplexed.
1852       for (auto &MJ : CurrentPacketMIs) {
1853         if (HII->isDuplexPair(MI, *MJ) && !PacketHasSLOT0OnlyInsn) {
1854           PacketHasDuplex = true;
1855           return true;
1856         }
1857       }
1858       // If it can not be duplexed, check if there is a valid transition in DFA
1859       // with the original opcode.
1860       MachineInstr &MIRef = const_cast<MachineInstr &>(MI);
1861       MIRef.setDesc(HII->get(Opcode));
1862       return ResourceTracker->canReserveResources(MIRef);
1863     }
1864   }
1865 
1866   return true;
1867 }
1868 
1869 // V60 forward scheduling.
calcStall(const MachineInstr & I)1870 unsigned int HexagonPacketizerList::calcStall(const MachineInstr &I) {
1871   // Check whether the previous packet is in a different loop. If this is the
1872   // case, there is little point in trying to avoid a stall because that would
1873   // favor the rare case (loop entry) over the common case (loop iteration).
1874   //
1875   // TODO: We should really be able to check all the incoming edges if this is
1876   // the first packet in a basic block, so we can avoid stalls from the loop
1877   // backedge.
1878   if (!OldPacketMIs.empty()) {
1879     auto *OldBB = OldPacketMIs.front()->getParent();
1880     auto *ThisBB = I.getParent();
1881     if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB))
1882       return 0;
1883   }
1884 
1885   SUnit *SUI = MIToSUnit[const_cast<MachineInstr *>(&I)];
1886   if (!SUI)
1887     return 0;
1888 
1889   // If the latency is 0 and there is a data dependence between this
1890   // instruction and any instruction in the current packet, we disregard any
1891   // potential stalls due to the instructions in the previous packet. Most of
1892   // the instruction pairs that can go together in the same packet have 0
1893   // latency between them. The exceptions are
1894   // 1. NewValueJumps as they're generated much later and the latencies can't
1895   // be changed at that point.
1896   // 2. .cur instructions, if its consumer has a 0 latency successor (such as
1897   // .new). In this case, the latency between .cur and the consumer stays
1898   // non-zero even though we can have  both .cur and .new in the same packet.
1899   // Changing the latency to 0 is not an option as it causes software pipeliner
1900   // to not pipeline in some cases.
1901 
1902   // For Example:
1903   // {
1904   //   I1:  v6.cur = vmem(r0++#1)
1905   //   I2:  v7 = valign(v6,v4,r2)
1906   //   I3:  vmem(r5++#1) = v7.new
1907   // }
1908   // Here I2 and I3 has 0 cycle latency, but I1 and I2 has 2.
1909 
1910   for (auto *J : CurrentPacketMIs) {
1911     SUnit *SUJ = MIToSUnit[J];
1912     for (auto &Pred : SUI->Preds)
1913       if (Pred.getSUnit() == SUJ)
1914         if ((Pred.getLatency() == 0 && Pred.isAssignedRegDep()) ||
1915             HII->isNewValueJump(I) || HII->isToBeScheduledASAP(*J, I))
1916           return 0;
1917   }
1918 
1919   // Check if the latency is greater than one between this instruction and any
1920   // instruction in the previous packet.
1921   for (auto *J : OldPacketMIs) {
1922     SUnit *SUJ = MIToSUnit[J];
1923     for (auto &Pred : SUI->Preds)
1924       if (Pred.getSUnit() == SUJ && Pred.getLatency() > 1)
1925         return Pred.getLatency();
1926   }
1927 
1928   return 0;
1929 }
1930 
producesStall(const MachineInstr & I)1931 bool HexagonPacketizerList::producesStall(const MachineInstr &I) {
1932   unsigned int Latency = calcStall(I);
1933   if (Latency == 0)
1934     return false;
1935   // Ignore stall unless it stalls more than previous instruction in packet
1936   if (PacketStalls)
1937     return Latency > PacketStallCycles;
1938   return true;
1939 }
1940 
1941 //===----------------------------------------------------------------------===//
1942 //                         Public Constructor Functions
1943 //===----------------------------------------------------------------------===//
1944 
createHexagonPacketizer(bool Minimal)1945 FunctionPass *llvm::createHexagonPacketizer(bool Minimal) {
1946   return new HexagonPacketizer(Minimal);
1947 }
1948