xref: /freebsd/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===---HexagonLoadStoreWidening.cpp---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // HexagonStoreWidening:
9 // Replace sequences of "narrow" stores to adjacent memory locations with
10 // a fewer "wide" stores that have the same effect.
11 // For example, replace:
12 //   S4_storeirb_io  %100, 0, 0   ; store-immediate-byte
13 //   S4_storeirb_io  %100, 1, 0   ; store-immediate-byte
14 // with
15 //   S4_storeirh_io  %100, 0, 0   ; store-immediate-halfword
16 // The above is the general idea.  The actual cases handled by the code
17 // may be a bit more complex.
18 // The purpose of this pass is to reduce the number of outstanding stores,
19 // or as one could say, "reduce store queue pressure".  Also, wide stores
20 // mean fewer stores, and since there are only two memory instructions allowed
21 // per packet, it also means fewer packets, and ultimately fewer cycles.
22 //
23 // HexagonLoadWidening does the same thing as HexagonStoreWidening but
24 // for Loads. Here, we try to replace 4-byte Loads with register-pair loads.
25 // For example:
26 // Replace
27 //   %2:intregs = L2_loadri_io %1:intregs, 0 :: (load (s32) from %ptr1, align 8)
28 //   %3:intregs = L2_loadri_io %1:intregs, 4 :: (load (s32) from %ptr2)
29 // with
30 //   %4:doubleregs = L2_loadrd_io %1:intregs, 0 :: (load (s64) from %ptr1)
31 //   %2:intregs = COPY %4.isub_lo:doubleregs
32 //   %3:intregs = COPY %4.isub_hi:doubleregs
33 //
34 // LoadWidening for 8 and 16-bit loads is not useful as we end up generating 2N
35 // insts to replace N loads: 1 widened load, N bitwise and, N - 1 shifts
36 
37 //===---------------------------------------------------------------------===//
38 
39 #include "Hexagon.h"
40 #include "HexagonInstrInfo.h"
41 #include "HexagonRegisterInfo.h"
42 #include "HexagonSubtarget.h"
43 #include "llvm/ADT/SmallPtrSet.h"
44 #include "llvm/Analysis/AliasAnalysis.h"
45 #include "llvm/Analysis/MemoryLocation.h"
46 #include "llvm/CodeGen/MachineBasicBlock.h"
47 #include "llvm/CodeGen/MachineFunction.h"
48 #include "llvm/CodeGen/MachineFunctionPass.h"
49 #include "llvm/CodeGen/MachineInstr.h"
50 #include "llvm/CodeGen/MachineInstrBuilder.h"
51 #include "llvm/CodeGen/MachineMemOperand.h"
52 #include "llvm/CodeGen/MachineOperand.h"
53 #include "llvm/CodeGen/MachineRegisterInfo.h"
54 #include "llvm/IR/DebugLoc.h"
55 #include "llvm/InitializePasses.h"
56 #include "llvm/MC/MCInstrDesc.h"
57 #include "llvm/Pass.h"
58 #include "llvm/Support/Debug.h"
59 #include "llvm/Support/ErrorHandling.h"
60 #include "llvm/Support/MathExtras.h"
61 #include "llvm/Support/raw_ostream.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <iterator>
66 
67 using namespace llvm;
68 
69 #define DEBUG_TYPE "hexagon-load-store-widening"
70 
71 static cl::opt<unsigned> MaxMBBSizeForLoadStoreWidening(
72     "max-bb-size-for-load-store-widening", cl::Hidden, cl::init(1000),
73     cl::desc("Limit block size to analyze in load/store widening pass"));
74 
75 namespace {
76 
77 struct HexagonLoadStoreWidening {
78   enum WideningMode { Store, Load };
79   const HexagonInstrInfo *TII;
80   const HexagonRegisterInfo *TRI;
81   MachineRegisterInfo *MRI;
82   AliasAnalysis *AA;
83   MachineFunction *MF;
84 
85 public:
HexagonLoadStoreWidening__anon01fcdbeb0111::HexagonLoadStoreWidening86   HexagonLoadStoreWidening(const HexagonInstrInfo *TII,
87                            const HexagonRegisterInfo *TRI,
88                            MachineRegisterInfo *MRI, AliasAnalysis *AA,
89                            MachineFunction *MF, bool StoreMode)
90       : TII(TII), TRI(TRI), MRI(MRI), AA(AA), MF(MF),
91         Mode(StoreMode ? WideningMode::Store : WideningMode::Load),
92         HII(MF->getSubtarget<HexagonSubtarget>().getInstrInfo()) {}
93 
94   bool run();
95 
96 private:
97   const bool Mode;
98   const unsigned MaxWideSize = 8;
99   const HexagonInstrInfo *HII = nullptr;
100 
101   using InstrSet = SmallPtrSet<MachineInstr *, 16>;
102   using InstrGroup = SmallVector<MachineInstr *, 8>;
103   using InstrGroupList = SmallVector<InstrGroup, 8>;
104 
105   InstrSet ProcessedInsts;
106 
107   unsigned getBaseAddressRegister(const MachineInstr *MI);
108   int64_t getOffset(const MachineInstr *MI);
109   int64_t getPostIncrementValue(const MachineInstr *MI);
110   bool handledInstType(const MachineInstr *MI);
111 
112   void createGroup(MachineInstr *BaseInst, InstrGroup &Group);
113   void createGroups(MachineBasicBlock &MBB, InstrGroupList &StoreGroups);
114   bool processBasicBlock(MachineBasicBlock &MBB);
115   bool processGroup(InstrGroup &Group);
116   bool selectInsts(InstrGroup::iterator Begin, InstrGroup::iterator End,
117                    InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize);
118   bool createWideInsts(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
119   bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
120   bool createWideLoads(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
121   bool replaceInsts(InstrGroup &OG, InstrGroup &NG);
122   bool areAdjacent(const MachineInstr *S1, const MachineInstr *S2);
123   bool canSwapInstructions(const MachineInstr *A, const MachineInstr *B);
124 };
125 
126 struct HexagonStoreWidening : public MachineFunctionPass {
127   static char ID;
128 
HexagonStoreWidening__anon01fcdbeb0111::HexagonStoreWidening129   HexagonStoreWidening() : MachineFunctionPass(ID) {}
130 
getPassName__anon01fcdbeb0111::HexagonStoreWidening131   StringRef getPassName() const override { return "Hexagon Store Widening"; }
132 
getAnalysisUsage__anon01fcdbeb0111::HexagonStoreWidening133   void getAnalysisUsage(AnalysisUsage &AU) const override {
134     AU.addRequired<AAResultsWrapperPass>();
135     AU.addPreserved<AAResultsWrapperPass>();
136     MachineFunctionPass::getAnalysisUsage(AU);
137   }
138 
runOnMachineFunction__anon01fcdbeb0111::HexagonStoreWidening139   bool runOnMachineFunction(MachineFunction &MFn) override {
140     if (skipFunction(MFn.getFunction()))
141       return false;
142 
143     auto &ST = MFn.getSubtarget<HexagonSubtarget>();
144     const HexagonInstrInfo *TII = ST.getInstrInfo();
145     const HexagonRegisterInfo *TRI = ST.getRegisterInfo();
146     MachineRegisterInfo *MRI = &MFn.getRegInfo();
147     AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
148 
149     return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, true).run();
150   }
151 };
152 
153 struct HexagonLoadWidening : public MachineFunctionPass {
154   static char ID;
155 
HexagonLoadWidening__anon01fcdbeb0111::HexagonLoadWidening156   HexagonLoadWidening() : MachineFunctionPass(ID) {}
157 
getPassName__anon01fcdbeb0111::HexagonLoadWidening158   StringRef getPassName() const override { return "Hexagon Load Widening"; }
159 
getAnalysisUsage__anon01fcdbeb0111::HexagonLoadWidening160   void getAnalysisUsage(AnalysisUsage &AU) const override {
161     AU.addRequired<AAResultsWrapperPass>();
162     AU.addPreserved<AAResultsWrapperPass>();
163     MachineFunctionPass::getAnalysisUsage(AU);
164   }
165 
runOnMachineFunction__anon01fcdbeb0111::HexagonLoadWidening166   bool runOnMachineFunction(MachineFunction &MFn) override {
167     if (skipFunction(MFn.getFunction()))
168       return false;
169 
170     auto &ST = MFn.getSubtarget<HexagonSubtarget>();
171     const HexagonInstrInfo *TII = ST.getInstrInfo();
172     const HexagonRegisterInfo *TRI = ST.getRegisterInfo();
173     MachineRegisterInfo *MRI = &MFn.getRegInfo();
174     AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
175     return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, false).run();
176   }
177 };
178 
179 char HexagonStoreWidening::ID = 0;
180 char HexagonLoadWidening::ID = 0;
181 
182 } // end anonymous namespace
183 
184 INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
185                       "Hexagon Store Widening", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)186 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
187 INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
188                     "Hexagon Store Widening", false, false)
189 
190 INITIALIZE_PASS_BEGIN(HexagonLoadWidening, "hexagon-widen-loads",
191                       "Hexagon Load Widening", false, false)
192 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
193 INITIALIZE_PASS_END(HexagonLoadWidening, "hexagon-widen-loads",
194                     "Hexagon Load Widening", false, false)
195 
196 static const MachineMemOperand &getMemTarget(const MachineInstr *MI) {
197   assert(!MI->memoperands_empty() && "Expecting memory operands");
198   return **MI->memoperands_begin();
199 }
200 
201 unsigned
getBaseAddressRegister(const MachineInstr * MI)202 HexagonLoadStoreWidening::getBaseAddressRegister(const MachineInstr *MI) {
203   assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode");
204   unsigned Base, Offset;
205   HII->getBaseAndOffsetPosition(*MI, Base, Offset);
206   const MachineOperand &MO = MI->getOperand(Base);
207   assert(MO.isReg() && "Expecting register operand");
208   return MO.getReg();
209 }
210 
getOffset(const MachineInstr * MI)211 int64_t HexagonLoadStoreWidening::getOffset(const MachineInstr *MI) {
212   assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode");
213 
214   // On Hexagon, post-incs always have an offset of 0
215   // There is no Offset operand to post-incs
216   if (HII->isPostIncrement(*MI))
217     return 0;
218 
219   unsigned Base, Offset;
220 
221   HII->getBaseAndOffsetPosition(*MI, Base, Offset);
222   const MachineOperand &MO = MI->getOperand(Offset);
223   switch (MO.getType()) {
224   case MachineOperand::MO_Immediate:
225     return MO.getImm();
226   case MachineOperand::MO_GlobalAddress:
227     return MO.getOffset();
228   default:
229     break;
230   }
231   llvm_unreachable("Expecting an immediate or global operand");
232 }
233 
234 inline int64_t
getPostIncrementValue(const MachineInstr * MI)235 HexagonLoadStoreWidening::getPostIncrementValue(const MachineInstr *MI) {
236   unsigned Base, PostIncIdx;
237   HII->getBaseAndOffsetPosition(*MI, Base, PostIncIdx);
238   const MachineOperand &MO = MI->getOperand(PostIncIdx);
239   return MO.getImm();
240 }
241 
242 // Filtering function: any loads/stores whose opcodes are not "approved" of by
243 // this function will not be subjected to widening.
handledInstType(const MachineInstr * MI)244 inline bool HexagonLoadStoreWidening::handledInstType(const MachineInstr *MI) {
245   unsigned Opc = MI->getOpcode();
246   if (Mode == WideningMode::Store) {
247     switch (Opc) {
248     case Hexagon::S4_storeirb_io:
249     case Hexagon::S4_storeirh_io:
250     case Hexagon::S4_storeiri_io:
251     case Hexagon::S2_storeri_io:
252       // Base address must be a register. (Implement FI later.)
253       return MI->getOperand(0).isReg();
254     case Hexagon::S2_storeri_pi:
255       return MI->getOperand(1).isReg();
256     }
257   } else {
258     // LoadWidening for 8 and 16 bit loads needs 2x instructions to replace x
259     // loads. So we only widen 32 bit loads as we don't need to select the
260     // right bits with AND & SHIFT ops.
261     switch (Opc) {
262     case Hexagon::L2_loadri_io:
263       // Base address must be a register and offset must be immediate.
264       return !MI->memoperands_empty() && MI->getOperand(1).isReg() &&
265              MI->getOperand(2).isImm();
266     case Hexagon::L2_loadri_pi:
267       return !MI->memoperands_empty() && MI->getOperand(2).isReg();
268     }
269   }
270   return false;
271 }
272 
addDefsUsesToList(const MachineInstr * MI,DenseSet<Register> & RegDefs,DenseSet<Register> & RegUses)273 static void addDefsUsesToList(const MachineInstr *MI,
274                               DenseSet<Register> &RegDefs,
275                               DenseSet<Register> &RegUses) {
276   for (const auto &Op : MI->operands()) {
277     if (!Op.isReg())
278       continue;
279     if (Op.isDef())
280       RegDefs.insert(Op.getReg());
281     if (Op.readsReg())
282       RegUses.insert(Op.getReg());
283   }
284 }
285 
canSwapInstructions(const MachineInstr * A,const MachineInstr * B)286 bool HexagonLoadStoreWidening::canSwapInstructions(const MachineInstr *A,
287                                                    const MachineInstr *B) {
288   DenseSet<Register> ARegDefs;
289   DenseSet<Register> ARegUses;
290   addDefsUsesToList(A, ARegDefs, ARegUses);
291   if (A->mayLoadOrStore() && B->mayLoadOrStore() &&
292       (A->mayStore() || B->mayStore()) && A->mayAlias(AA, *B, true))
293     return false;
294   for (const auto &BOp : B->operands()) {
295     if (!BOp.isReg())
296       continue;
297     if ((BOp.isDef() || BOp.readsReg()) && ARegDefs.contains(BOp.getReg()))
298       return false;
299     if (BOp.isDef() && ARegUses.contains(BOp.getReg()))
300       return false;
301   }
302   return true;
303 }
304 
305 // Inspect a machine basic block, and generate groups out of loads/stores
306 // encountered in the block.
307 //
308 // A load/store group is a group of loads or stores that use the same base
309 // register, and which can be reordered within that group without altering the
310 // semantics of the program.  A single group could be widened as
311 // a whole, if there existed a single load/store instruction with the same
312 // semantics as the entire group.  In many cases, a single group may need more
313 // than one wide load or store.
createGroups(MachineBasicBlock & MBB,InstrGroupList & StoreGroups)314 void HexagonLoadStoreWidening::createGroups(MachineBasicBlock &MBB,
315                                             InstrGroupList &StoreGroups) {
316   // Traverse all instructions and if we encounter
317   // a load/store, then try to create a group starting at that instruction
318   // i.e. a sequence of independent loads/stores that can be widened.
319   for (auto I = MBB.begin(); I != MBB.end(); ++I) {
320     MachineInstr *MI = &(*I);
321     if (!handledInstType(MI))
322       continue;
323     if (ProcessedInsts.count(MI))
324       continue;
325 
326     // Found a store.  Try to create a store group.
327     InstrGroup G;
328     createGroup(MI, G);
329     if (G.size() > 1)
330       StoreGroups.push_back(G);
331   }
332 }
333 
334 // Create a single load/store group.  The insts need to be independent between
335 // themselves, and also there cannot be other instructions between them
336 // that could read or modify storage being read from or stored into.
createGroup(MachineInstr * BaseInst,InstrGroup & Group)337 void HexagonLoadStoreWidening::createGroup(MachineInstr *BaseInst,
338                                            InstrGroup &Group) {
339   assert(handledInstType(BaseInst) && "Unexpected instruction");
340   unsigned BaseReg = getBaseAddressRegister(BaseInst);
341   InstrGroup Other;
342 
343   Group.push_back(BaseInst);
344   LLVM_DEBUG(dbgs() << "BaseInst: "; BaseInst->dump());
345   auto End = BaseInst->getParent()->end();
346   auto I = BaseInst->getIterator();
347 
348   while (true) {
349     I = std::next(I);
350     if (I == End)
351       break;
352     MachineInstr *MI = &(*I);
353 
354     // Assume calls are aliased to everything.
355     if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
356         MI->hasOrderedMemoryRef())
357       return;
358 
359     if (!handledInstType(MI)) {
360       if (MI->mayLoadOrStore())
361         Other.push_back(MI);
362       continue;
363     }
364 
365     // We have a handledInstType instruction
366     // If this load/store instruction is aliased with anything already in the
367     // group, terminate the group now.
368     for (auto GI : Group)
369       if (GI->mayAlias(AA, *MI, true))
370         return;
371     if (Mode == WideningMode::Load) {
372       // Check if current load MI can be moved to the first load instruction
373       // in Group. If any load instruction aliases with memory instructions in
374       // Other, terminate the group.
375       for (auto MemI : Other)
376         if (!canSwapInstructions(MI, MemI))
377           return;
378     } else {
379       // Check if store instructions in the group can be moved to current
380       // store MI. If any store instruction aliases with memory instructions
381       // in Other, terminate the group.
382       for (auto MemI : Other) {
383         if (std::distance(Group.back()->getIterator(), MemI->getIterator()) <=
384             0)
385           continue;
386         for (auto GI : Group)
387           if (!canSwapInstructions(MemI, GI))
388             return;
389       }
390     }
391 
392     unsigned BR = getBaseAddressRegister(MI);
393     if (BR == BaseReg) {
394       LLVM_DEBUG(dbgs() << "Added MI to group: "; MI->dump());
395       Group.push_back(MI);
396       ProcessedInsts.insert(MI);
397     }
398   } // while
399 }
400 
401 // Check if load/store instructions S1 and S2 are adjacent.  More precisely,
402 // S2 has to access memory immediately following that accessed by S1.
areAdjacent(const MachineInstr * S1,const MachineInstr * S2)403 bool HexagonLoadStoreWidening::areAdjacent(const MachineInstr *S1,
404                                            const MachineInstr *S2) {
405   if (!handledInstType(S1) || !handledInstType(S2))
406     return false;
407 
408   const MachineMemOperand &S1MO = getMemTarget(S1);
409 
410   // Currently only handling immediate stores.
411   int Off1 = getOffset(S1);
412   int Off2 = getOffset(S2);
413 
414   return (Off1 >= 0) ? Off1 + S1MO.getSize().getValue() == unsigned(Off2)
415                      : int(Off1 + S1MO.getSize().getValue()) == Off2;
416 }
417 
418 /// Given a sequence of adjacent loads/stores, and a maximum size of a single
419 /// wide inst, pick a group of insts that can be replaced by a single load/store
420 /// of size not exceeding MaxSize.  The selected sequence will be recorded
421 /// in OG ("old group" of instructions).
422 /// OG should be empty on entry, and should be left empty if the function
423 /// fails.
selectInsts(InstrGroup::iterator Begin,InstrGroup::iterator End,InstrGroup & OG,unsigned & TotalSize,unsigned MaxSize)424 bool HexagonLoadStoreWidening::selectInsts(InstrGroup::iterator Begin,
425                                            InstrGroup::iterator End,
426                                            InstrGroup &OG, unsigned &TotalSize,
427                                            unsigned MaxSize) {
428   assert(Begin != End && "No instructions to analyze");
429   assert(OG.empty() && "Old group not empty on entry");
430 
431   if (std::distance(Begin, End) <= 1)
432     return false;
433 
434   MachineInstr *FirstMI = *Begin;
435   assert(!FirstMI->memoperands_empty() && "Expecting some memory operands");
436   const MachineMemOperand &FirstMMO = getMemTarget(FirstMI);
437   if (!FirstMMO.getType().isValid())
438     return false;
439 
440   unsigned Alignment = FirstMMO.getAlign().value();
441   unsigned SizeAccum = FirstMMO.getSize().getValue();
442   unsigned FirstOffset = getOffset(FirstMI);
443 
444   // The initial value of SizeAccum should always be a power of 2.
445   assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2");
446 
447   // If the size of the first store equals to or exceeds the limit, do nothing.
448   if (SizeAccum >= MaxSize)
449     return false;
450 
451   // If the size of the first load/store is greater than or equal to the address
452   // stored to, then the inst cannot be made any wider.
453   if (SizeAccum >= Alignment) {
454     LLVM_DEBUG(
455         dbgs() << "Size of load/store greater than equal to its alignment\n");
456     return false;
457   }
458 
459   // The offset of a load/store will put restrictions on how wide the inst can
460   // be.  Offsets in loads/stores of size 2^n bytes need to have the n lowest
461   // bits be 0.  If the first inst already exhausts the offset limits, quit.
462   // Test this by checking if the next wider size would exceed the limit.
463   // For post-increment instructions, the increment amount needs to follow the
464   // same rule.
465   unsigned OffsetOrIncVal = 0;
466   if (HII->isPostIncrement(*FirstMI))
467     OffsetOrIncVal = getPostIncrementValue(FirstMI);
468   else
469     OffsetOrIncVal = FirstOffset;
470   if ((2 * SizeAccum - 1) & OffsetOrIncVal) {
471     LLVM_DEBUG(dbgs() << "Instruction cannot be widened as the offset/postinc"
472                       << " value: " << getPostIncrementValue(FirstMI)
473                       << " is invalid in the widened version\n");
474     return false;
475   }
476 
477   OG.push_back(FirstMI);
478   MachineInstr *S1 = FirstMI;
479 
480   // Pow2Num will be the largest number of elements in OG such that the sum
481   // of sizes of loads/stores 0...Pow2Num-1 will be a power of 2.
482   unsigned Pow2Num = 1;
483   unsigned Pow2Size = SizeAccum;
484   bool HavePostInc = HII->isPostIncrement(*S1);
485 
486   // Be greedy: keep accumulating insts as long as they are to adjacent
487   // memory locations, and as long as the total number of bytes stored
488   // does not exceed the limit (MaxSize).
489   // Keep track of when the total size covered is a power of 2, since
490   // this is a size a single load/store can cover.
491   for (InstrGroup::iterator I = Begin + 1; I != End; ++I) {
492     MachineInstr *S2 = *I;
493     // Insts are sorted, so if S1 and S2 are not adjacent, there won't be
494     // any other store to fill the "hole".
495     if (!areAdjacent(S1, S2))
496       break;
497 
498     // Cannot widen two post increments, need to return two registers
499     // with incremented values
500     if (HavePostInc && HII->isPostIncrement(*S2))
501       break;
502 
503     unsigned S2Size = getMemTarget(S2).getSize().getValue();
504     if (SizeAccum + S2Size > std::min(MaxSize, Alignment))
505       break;
506 
507     OG.push_back(S2);
508     SizeAccum += S2Size;
509     if (isPowerOf2_32(SizeAccum)) {
510       Pow2Num = OG.size();
511       Pow2Size = SizeAccum;
512     }
513     if ((2 * Pow2Size - 1) & FirstOffset)
514       break;
515 
516     S1 = S2;
517   }
518 
519   // The insts don't add up to anything that can be widened.  Clean up.
520   if (Pow2Num <= 1) {
521     OG.clear();
522     return false;
523   }
524 
525   // Only leave the loads/stores being widened.
526   OG.resize(Pow2Num);
527   TotalSize = Pow2Size;
528   return true;
529 }
530 
531 /// Given an "old group" OG of insts, create a "new group" NG of instructions
532 /// to replace them.
createWideInsts(InstrGroup & OG,InstrGroup & NG,unsigned TotalSize)533 bool HexagonLoadStoreWidening::createWideInsts(InstrGroup &OG, InstrGroup &NG,
534                                                unsigned TotalSize) {
535   if (Mode == WideningMode::Store) {
536     return createWideStores(OG, NG, TotalSize);
537   }
538   return createWideLoads(OG, NG, TotalSize);
539 }
540 
541 /// Given an "old group" OG of stores, create a "new group" NG of instructions
542 /// to replace them.  Ideally, NG would only have a single instruction in it,
543 /// but that may only be possible for store-immediate.
createWideStores(InstrGroup & OG,InstrGroup & NG,unsigned TotalSize)544 bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
545                                                 unsigned TotalSize) {
546   // XXX Current limitations:
547   // - only handle a TotalSize of up to 8
548 
549   LLVM_DEBUG(dbgs() << "Creating wide stores\n");
550   if (TotalSize > MaxWideSize)
551     return false;
552 
553   uint64_t Acc = 0; // Value accumulator.
554   unsigned Shift = 0;
555   bool HaveImm = false;
556   bool HaveReg = false;
557 
558   for (MachineInstr *MI : OG) {
559     const MachineMemOperand &MMO = getMemTarget(MI);
560     MachineOperand &SO = HII->isPostIncrement(*MI)
561                              ? MI->getOperand(3)
562                              : MI->getOperand(2); // Source.
563     unsigned NBits;
564     uint64_t Mask;
565     uint64_t Val;
566 
567     switch (SO.getType()) {
568     case MachineOperand::MO_Immediate:
569       LLVM_DEBUG(dbgs() << "Have store immediate\n");
570       HaveImm = true;
571 
572       NBits = MMO.getSizeInBits().toRaw();
573       Mask = (0xFFFFFFFFFFFFFFFFU >> (64 - NBits));
574       Val = (SO.getImm() & Mask) << Shift;
575       Acc |= Val;
576       Shift += NBits;
577       break;
578     case MachineOperand::MO_Register:
579       HaveReg = true;
580       break;
581     default:
582       LLVM_DEBUG(dbgs() << "Unhandled store\n");
583       return false;
584     }
585   }
586 
587   if (HaveImm && HaveReg) {
588     LLVM_DEBUG(dbgs() << "Cannot merge store register and store imm\n");
589     return false;
590   }
591 
592   MachineInstr *FirstSt = OG.front();
593   DebugLoc DL = OG.back()->getDebugLoc();
594   const MachineMemOperand &OldM = getMemTarget(FirstSt);
595   MachineMemOperand *NewM =
596       MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
597                                TotalSize, OldM.getAlign(), OldM.getAAInfo());
598   MachineInstr *StI;
599   MachineOperand &MR =
600       (HII->isPostIncrement(*FirstSt) ? FirstSt->getOperand(1)
601                                       : FirstSt->getOperand(0));
602   auto SecondSt = OG.back();
603   if (HaveReg) {
604     MachineOperand FReg =
605         (HII->isPostIncrement(*FirstSt) ? FirstSt->getOperand(3)
606                                         : FirstSt->getOperand(2));
607     // Post increments appear first in the sorted group.
608     // Cannot have a post increment for the second instruction
609     assert(!HII->isPostIncrement(*SecondSt) && "Unexpected PostInc");
610     MachineOperand SReg = SecondSt->getOperand(2);
611     assert(FReg.isReg() && SReg.isReg() &&
612            "Cannot merge store register and store imm");
613     const MCInstrDesc &CombD = TII->get(Hexagon::A2_combinew);
614     Register VReg =
615         MF->getRegInfo().createVirtualRegister(&Hexagon::DoubleRegsRegClass);
616     MachineInstr *CombI = BuildMI(*MF, DL, CombD, VReg).add(SReg).add(FReg);
617     NG.push_back(CombI);
618 
619     if (FirstSt->getOpcode() == Hexagon::S2_storeri_pi) {
620       const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_pi);
621       auto IncDestMO = FirstSt->getOperand(0);
622       auto IncMO = FirstSt->getOperand(2);
623       StI =
624           BuildMI(*MF, DL, StD).add(IncDestMO).add(MR).add(IncMO).addReg(VReg);
625     } else {
626       const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_io);
627       auto OffMO = FirstSt->getOperand(1);
628       StI = BuildMI(*MF, DL, StD).add(MR).add(OffMO).addReg(VReg);
629     }
630     StI->addMemOperand(*MF, NewM);
631     NG.push_back(StI);
632     return true;
633   }
634 
635   // Handle store immediates
636   // There are no post increment store immediates on Hexagon
637   assert(!HII->isPostIncrement(*FirstSt) && "Unexpected PostInc");
638   auto Off = FirstSt->getOperand(1).getImm();
639   if (TotalSize == 8) {
640     // Create vreg = A2_tfrsi #Acc; nreg = combine(#s32, vreg); memd = nreg
641     uint64_t Mask = 0xFFFFFFFFU;
642     int LowerAcc = int(Mask & Acc);
643     int UpperAcc = Acc >> 32;
644     Register DReg =
645         MF->getRegInfo().createVirtualRegister(&Hexagon::DoubleRegsRegClass);
646     MachineInstr *CombI;
647     if (Acc != 0) {
648       const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
649       const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
650       Register VReg = MF->getRegInfo().createVirtualRegister(RC);
651       MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(LowerAcc);
652       NG.push_back(TfrI);
653       const MCInstrDesc &CombD = TII->get(Hexagon::A4_combineir);
654       CombI = BuildMI(*MF, DL, CombD, DReg)
655                   .addImm(UpperAcc)
656                   .addReg(VReg, RegState::Kill);
657     }
658     // If immediates are 0, we do not need A2_tfrsi
659     else {
660       const MCInstrDesc &CombD = TII->get(Hexagon::A4_combineii);
661       CombI = BuildMI(*MF, DL, CombD, DReg).addImm(0).addImm(0);
662     }
663     NG.push_back(CombI);
664     const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_io);
665     StI =
666         BuildMI(*MF, DL, StD).add(MR).addImm(Off).addReg(DReg, RegState::Kill);
667   } else if (Acc < 0x10000) {
668     // Create mem[hw] = #Acc
669     unsigned WOpc = (TotalSize == 2)   ? Hexagon::S4_storeirh_io
670                     : (TotalSize == 4) ? Hexagon::S4_storeiri_io
671                                        : 0;
672     assert(WOpc && "Unexpected size");
673 
674     int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc);
675     const MCInstrDesc &StD = TII->get(WOpc);
676     StI = BuildMI(*MF, DL, StD).add(MR).addImm(Off).addImm(Val);
677   } else {
678     // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
679     const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
680     const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
681     Register VReg = MF->getRegInfo().createVirtualRegister(RC);
682     MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(int(Acc));
683     NG.push_back(TfrI);
684 
685     unsigned WOpc = (TotalSize == 2)   ? Hexagon::S2_storerh_io
686                     : (TotalSize == 4) ? Hexagon::S2_storeri_io
687                                        : 0;
688     assert(WOpc && "Unexpected size");
689 
690     const MCInstrDesc &StD = TII->get(WOpc);
691     StI =
692         BuildMI(*MF, DL, StD).add(MR).addImm(Off).addReg(VReg, RegState::Kill);
693   }
694   StI->addMemOperand(*MF, NewM);
695   NG.push_back(StI);
696 
697   return true;
698 }
699 
700 /// Given an "old group" OG of loads, create a "new group" NG of instructions
701 /// to replace them.  Ideally, NG would only have a single instruction in it,
702 /// but that may only be possible for double register loads.
createWideLoads(InstrGroup & OG,InstrGroup & NG,unsigned TotalSize)703 bool HexagonLoadStoreWidening::createWideLoads(InstrGroup &OG, InstrGroup &NG,
704                                                unsigned TotalSize) {
705   LLVM_DEBUG(dbgs() << "Creating wide loads\n");
706   // XXX Current limitations:
707   // - only expect stores of immediate values in OG,
708   // - only handle a TotalSize of up to 8
709   if (TotalSize > MaxWideSize)
710     return false;
711   assert(OG.size() == 2 && "Expecting two elements in Instruction Group.");
712 
713   MachineInstr *FirstLd = OG.front();
714   const MachineMemOperand &OldM = getMemTarget(FirstLd);
715   MachineMemOperand *NewM =
716       MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
717                                TotalSize, OldM.getAlign(), OldM.getAAInfo());
718 
719   MachineOperand &MR = FirstLd->getOperand(0);
720   MachineOperand &MRBase =
721       (HII->isPostIncrement(*FirstLd) ? FirstLd->getOperand(2)
722                                       : FirstLd->getOperand(1));
723   DebugLoc DL = OG.back()->getDebugLoc();
724 
725   // Create the double register Load Instruction.
726   Register NewMR = MRI->createVirtualRegister(&Hexagon::DoubleRegsRegClass);
727   MachineInstr *LdI;
728 
729   // Post increments appear first in the sorted group
730   if (FirstLd->getOpcode() == Hexagon::L2_loadri_pi) {
731     auto IncDestMO = FirstLd->getOperand(1);
732     auto IncMO = FirstLd->getOperand(3);
733     LdI = BuildMI(*MF, DL, TII->get(Hexagon::L2_loadrd_pi))
734               .addDef(NewMR, getKillRegState(MR.isKill()), MR.getSubReg())
735               .add(IncDestMO)
736               .add(MRBase)
737               .add(IncMO);
738     LdI->addMemOperand(*MF, NewM);
739   } else {
740     auto OffMO = FirstLd->getOperand(2);
741     LdI = BuildMI(*MF, DL, TII->get(Hexagon::L2_loadrd_io))
742               .addDef(NewMR, getKillRegState(MR.isKill()), MR.getSubReg())
743               .add(MRBase)
744               .add(OffMO);
745     LdI->addMemOperand(*MF, NewM);
746   }
747   NG.push_back(LdI);
748 
749   auto getHalfReg = [&](MachineInstr *DoubleReg, unsigned SubReg,
750                         MachineInstr *DstReg) {
751     Register DestReg = DstReg->getOperand(0).getReg();
752     return BuildMI(*MF, DL, TII->get(Hexagon::COPY), DestReg)
753         .addReg(NewMR, getKillRegState(LdI->isKill()), SubReg);
754   };
755 
756   MachineInstr *LdI_lo = getHalfReg(LdI, Hexagon::isub_lo, FirstLd);
757   MachineInstr *LdI_hi = getHalfReg(LdI, Hexagon::isub_hi, OG.back());
758   NG.push_back(LdI_lo);
759   NG.push_back(LdI_hi);
760 
761   return true;
762 }
763 
764 // Replace instructions from the old group OG with instructions from the
765 // new group NG.  Conceptually, remove all instructions in OG, and then
766 // insert all instructions in NG, starting at where the first instruction
767 // from OG was (in the order in which they appeared in the basic block).
768 // (The ordering in OG does not have to match the order in the basic block.)
replaceInsts(InstrGroup & OG,InstrGroup & NG)769 bool HexagonLoadStoreWidening::replaceInsts(InstrGroup &OG, InstrGroup &NG) {
770   LLVM_DEBUG({
771     dbgs() << "Replacing:\n";
772     for (auto I : OG)
773       dbgs() << "  " << *I;
774     dbgs() << "with\n";
775     for (auto I : NG)
776       dbgs() << "  " << *I;
777   });
778 
779   MachineBasicBlock *MBB = OG.back()->getParent();
780   MachineBasicBlock::iterator InsertAt = MBB->end();
781 
782   // Need to establish the insertion point.
783   // For loads the best one is right before the first load in the OG,
784   // but in the order in which the insts occur in the program list.
785   // For stores the best point is right after the last store in the OG.
786   // Since the ordering in OG does not correspond
787   // to the order in the program list, we need to do some work to find
788   // the insertion point.
789 
790   // Create a set of all instructions in OG (for quick lookup).
791   InstrSet OldMemInsts(llvm::from_range, OG);
792 
793   if (Mode == WideningMode::Load) {
794     // Find the first load instruction in the block that is present in OG.
795     for (auto &I : *MBB) {
796       if (OldMemInsts.count(&I)) {
797         InsertAt = I;
798         break;
799       }
800     }
801 
802     assert((InsertAt != MBB->end()) && "Cannot locate any load from the group");
803 
804     for (auto *I : NG)
805       MBB->insert(InsertAt, I);
806   } else {
807     // Find the last store instruction in the block that is present in OG.
808     auto I = MBB->rbegin();
809     for (; I != MBB->rend(); ++I) {
810       if (OldMemInsts.count(&(*I))) {
811         InsertAt = (*I).getIterator();
812         break;
813       }
814     }
815 
816     assert((I != MBB->rend()) && "Cannot locate any store from the group");
817 
818     for (auto I = NG.rbegin(); I != NG.rend(); ++I)
819       MBB->insertAfter(InsertAt, *I);
820   }
821 
822   for (auto *I : OG)
823     I->eraseFromParent();
824 
825   return true;
826 }
827 
828 // Break up the group into smaller groups, each of which can be replaced by
829 // a single wide load/store.  Widen each such smaller group and replace the old
830 // instructions with the widened ones.
processGroup(InstrGroup & Group)831 bool HexagonLoadStoreWidening::processGroup(InstrGroup &Group) {
832   bool Changed = false;
833   InstrGroup::iterator I = Group.begin(), E = Group.end();
834   InstrGroup OG, NG; // Old and new groups.
835   unsigned CollectedSize;
836 
837   while (I != E) {
838     OG.clear();
839     NG.clear();
840 
841     bool Succ = selectInsts(I++, E, OG, CollectedSize, MaxWideSize) &&
842                 createWideInsts(OG, NG, CollectedSize) && replaceInsts(OG, NG);
843     if (!Succ)
844       continue;
845 
846     assert(OG.size() > 1 && "Created invalid group");
847     assert(std::distance(I, E) + 1 >= int(OG.size()) && "Too many elements");
848     I += OG.size() - 1;
849 
850     Changed = true;
851   }
852 
853   return Changed;
854 }
855 
856 // Process a single basic block: create the load/store groups, and replace them
857 // with the widened insts, if possible.  Processing of each basic block
858 // is independent from processing of any other basic block.  This transfor-
859 // mation could be stopped after having processed any basic block without
860 // any ill effects (other than not having performed widening in the unpro-
861 // cessed blocks).  Also, the basic blocks can be processed in any order.
processBasicBlock(MachineBasicBlock & MBB)862 bool HexagonLoadStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
863   InstrGroupList SGs;
864   bool Changed = false;
865 
866   // To prevent long compile time check for max BB size.
867   if (MBB.size() > MaxMBBSizeForLoadStoreWidening)
868     return false;
869 
870   createGroups(MBB, SGs);
871 
872   auto Less = [this](const MachineInstr *A, const MachineInstr *B) -> bool {
873     return getOffset(A) < getOffset(B);
874   };
875   for (auto &G : SGs) {
876     assert(G.size() > 1 && "Group with fewer than 2 elements");
877     llvm::sort(G, Less);
878 
879     Changed |= processGroup(G);
880   }
881 
882   return Changed;
883 }
884 
run()885 bool HexagonLoadStoreWidening::run() {
886   bool Changed = false;
887 
888   for (auto &B : *MF)
889     Changed |= processBasicBlock(B);
890 
891   return Changed;
892 }
893 
createHexagonStoreWidening()894 FunctionPass *llvm::createHexagonStoreWidening() {
895   return new HexagonStoreWidening();
896 }
897 
createHexagonLoadWidening()898 FunctionPass *llvm::createHexagonLoadWidening() {
899   return new HexagonLoadWidening();
900 }
901