xref: /freebsd/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp (revision 9f23cbd6cae82fd77edfad7173432fa8dccd0a95)
1 //===- HexagonStoreWidening.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Replace sequences of "narrow" stores to adjacent memory locations with
9 // a fewer "wide" stores that have the same effect.
10 // For example, replace:
11 //   S4_storeirb_io  %100, 0, 0   ; store-immediate-byte
12 //   S4_storeirb_io  %100, 1, 0   ; store-immediate-byte
13 // with
14 //   S4_storeirh_io  %100, 0, 0   ; store-immediate-halfword
15 // The above is the general idea.  The actual cases handled by the code
16 // may be a bit more complex.
17 // The purpose of this pass is to reduce the number of outstanding stores,
18 // or as one could say, "reduce store queue pressure".  Also, wide stores
19 // mean fewer stores, and since there are only two memory instructions allowed
20 // per packet, it also means fewer packets, and ultimately fewer cycles.
21 //===---------------------------------------------------------------------===//
22 
23 #include "HexagonInstrInfo.h"
24 #include "HexagonRegisterInfo.h"
25 #include "HexagonSubtarget.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/Analysis/AliasAnalysis.h"
28 #include "llvm/Analysis/MemoryLocation.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunction.h"
31 #include "llvm/CodeGen/MachineFunctionPass.h"
32 #include "llvm/CodeGen/MachineInstr.h"
33 #include "llvm/CodeGen/MachineInstrBuilder.h"
34 #include "llvm/CodeGen/MachineMemOperand.h"
35 #include "llvm/CodeGen/MachineOperand.h"
36 #include "llvm/CodeGen/MachineRegisterInfo.h"
37 #include "llvm/IR/DebugLoc.h"
38 #include "llvm/InitializePasses.h"
39 #include "llvm/MC/MCInstrDesc.h"
40 #include "llvm/Pass.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/MathExtras.h"
44 #include "llvm/Support/raw_ostream.h"
45 #include <algorithm>
46 #include <cassert>
47 #include <cstdint>
48 #include <iterator>
49 #include <vector>
50 
51 #define DEBUG_TYPE "hexagon-widen-stores"
52 
53 using namespace llvm;
54 
55 namespace llvm {
56 
57 FunctionPass *createHexagonStoreWidening();
58 void initializeHexagonStoreWideningPass(PassRegistry&);
59 
60 } // end namespace llvm
61 
62 namespace {
63 
64   struct HexagonStoreWidening : public MachineFunctionPass {
65     const HexagonInstrInfo      *TII;
66     const HexagonRegisterInfo   *TRI;
67     const MachineRegisterInfo   *MRI;
68     AliasAnalysis               *AA;
69     MachineFunction             *MF;
70 
71   public:
72     static char ID;
73 
74     HexagonStoreWidening() : MachineFunctionPass(ID) {
75       initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry());
76     }
77 
78     bool runOnMachineFunction(MachineFunction &MF) override;
79 
80     StringRef getPassName() const override { return "Hexagon Store Widening"; }
81 
82     void getAnalysisUsage(AnalysisUsage &AU) const override {
83       AU.addRequired<AAResultsWrapperPass>();
84       AU.addPreserved<AAResultsWrapperPass>();
85       MachineFunctionPass::getAnalysisUsage(AU);
86     }
87 
88     static bool handledStoreType(const MachineInstr *MI);
89 
90   private:
91     static const int MaxWideSize = 4;
92 
93     using InstrGroup = std::vector<MachineInstr *>;
94     using InstrGroupList = std::vector<InstrGroup>;
95 
96     bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO);
97     bool instrAliased(InstrGroup &Stores, const MachineInstr *MI);
98     void createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin,
99         InstrGroup::iterator End, InstrGroup &Group);
100     void createStoreGroups(MachineBasicBlock &MBB,
101         InstrGroupList &StoreGroups);
102     bool processBasicBlock(MachineBasicBlock &MBB);
103     bool processStoreGroup(InstrGroup &Group);
104     bool selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End,
105         InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize);
106     bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
107     bool replaceStores(InstrGroup &OG, InstrGroup &NG);
108     bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2);
109   };
110 
111 } // end anonymous namespace
112 
113 char HexagonStoreWidening::ID = 0;
114 
115 INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
116                 "Hexason Store Widening", false, false)
117 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
118 INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
119                 "Hexagon Store Widening", false, false)
120 
121 // Some local helper functions...
122 static unsigned getBaseAddressRegister(const MachineInstr *MI) {
123   const MachineOperand &MO = MI->getOperand(0);
124   assert(MO.isReg() && "Expecting register operand");
125   return MO.getReg();
126 }
127 
128 static int64_t getStoreOffset(const MachineInstr *MI) {
129   unsigned OpC = MI->getOpcode();
130   assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode");
131 
132   switch (OpC) {
133     case Hexagon::S4_storeirb_io:
134     case Hexagon::S4_storeirh_io:
135     case Hexagon::S4_storeiri_io: {
136       const MachineOperand &MO = MI->getOperand(1);
137       assert(MO.isImm() && "Expecting immediate offset");
138       return MO.getImm();
139     }
140   }
141   dbgs() << *MI;
142   llvm_unreachable("Store offset calculation missing for a handled opcode");
143   return 0;
144 }
145 
146 static const MachineMemOperand &getStoreTarget(const MachineInstr *MI) {
147   assert(!MI->memoperands_empty() && "Expecting memory operands");
148   return **MI->memoperands_begin();
149 }
150 
151 // Filtering function: any stores whose opcodes are not "approved" of by
152 // this function will not be subjected to widening.
153 inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) {
154   // For now, only handle stores of immediate values.
155   // Also, reject stores to stack slots.
156   unsigned Opc = MI->getOpcode();
157   switch (Opc) {
158     case Hexagon::S4_storeirb_io:
159     case Hexagon::S4_storeirh_io:
160     case Hexagon::S4_storeiri_io:
161       // Base address must be a register. (Implement FI later.)
162       return MI->getOperand(0).isReg();
163     default:
164       return false;
165   }
166 }
167 
168 // Check if the machine memory operand MMO is aliased with any of the
169 // stores in the store group Stores.
170 bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
171       const MachineMemOperand &MMO) {
172   if (!MMO.getValue())
173     return true;
174 
175   MemoryLocation L(MMO.getValue(), MMO.getSize(), MMO.getAAInfo());
176 
177   for (auto *SI : Stores) {
178     const MachineMemOperand &SMO = getStoreTarget(SI);
179     if (!SMO.getValue())
180       return true;
181 
182     MemoryLocation SL(SMO.getValue(), SMO.getSize(), SMO.getAAInfo());
183     if (!AA->isNoAlias(L, SL))
184       return true;
185   }
186 
187   return false;
188 }
189 
190 // Check if the machine instruction MI accesses any storage aliased with
191 // any store in the group Stores.
192 bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
193       const MachineInstr *MI) {
194   for (auto &I : MI->memoperands())
195     if (instrAliased(Stores, *I))
196       return true;
197   return false;
198 }
199 
200 // Inspect a machine basic block, and generate store groups out of stores
201 // encountered in the block.
202 //
203 // A store group is a group of stores that use the same base register,
204 // and which can be reordered within that group without altering the
205 // semantics of the program.  A single store group could be widened as
206 // a whole, if there existed a single store instruction with the same
207 // semantics as the entire group.  In many cases, a single store group
208 // may need more than one wide store.
209 void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB,
210       InstrGroupList &StoreGroups) {
211   InstrGroup AllInsns;
212 
213   // Copy all instruction pointers from the basic block to a temporary
214   // list.  This will allow operating on the list, and modifying its
215   // elements without affecting the basic block.
216   for (auto &I : MBB)
217     AllInsns.push_back(&I);
218 
219   // Traverse all instructions in the AllInsns list, and if we encounter
220   // a store, then try to create a store group starting at that instruction
221   // i.e. a sequence of independent stores that can be widened.
222   for (auto I = AllInsns.begin(), E = AllInsns.end(); I != E; ++I) {
223     MachineInstr *MI = *I;
224     // Skip null pointers (processed instructions).
225     if (!MI || !handledStoreType(MI))
226       continue;
227 
228     // Found a store.  Try to create a store group.
229     InstrGroup G;
230     createStoreGroup(MI, I+1, E, G);
231     if (G.size() > 1)
232       StoreGroups.push_back(G);
233   }
234 }
235 
236 // Create a single store group.  The stores need to be independent between
237 // themselves, and also there cannot be other instructions between them
238 // that could read or modify storage being stored into.
239 void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore,
240       InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group) {
241   assert(handledStoreType(BaseStore) && "Unexpected instruction");
242   unsigned BaseReg = getBaseAddressRegister(BaseStore);
243   InstrGroup Other;
244 
245   Group.push_back(BaseStore);
246 
247   for (auto I = Begin; I != End; ++I) {
248     MachineInstr *MI = *I;
249     if (!MI)
250       continue;
251 
252     if (handledStoreType(MI)) {
253       // If this store instruction is aliased with anything already in the
254       // group, terminate the group now.
255       if (instrAliased(Group, getStoreTarget(MI)))
256         return;
257       // If this store is aliased to any of the memory instructions we have
258       // seen so far (that are not a part of this group), terminate the group.
259       if (instrAliased(Other, getStoreTarget(MI)))
260         return;
261 
262       unsigned BR = getBaseAddressRegister(MI);
263       if (BR == BaseReg) {
264         Group.push_back(MI);
265         *I = nullptr;
266         continue;
267       }
268     }
269 
270     // Assume calls are aliased to everything.
271     if (MI->isCall() || MI->hasUnmodeledSideEffects())
272       return;
273 
274     if (MI->mayLoadOrStore()) {
275       if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI))
276         return;
277       Other.push_back(MI);
278     }
279   } // for
280 }
281 
282 // Check if store instructions S1 and S2 are adjacent.  More precisely,
283 // S2 has to access memory immediately following that accessed by S1.
284 bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1,
285       const MachineInstr *S2) {
286   if (!handledStoreType(S1) || !handledStoreType(S2))
287     return false;
288 
289   const MachineMemOperand &S1MO = getStoreTarget(S1);
290 
291   // Currently only handling immediate stores.
292   int Off1 = S1->getOperand(1).getImm();
293   int Off2 = S2->getOperand(1).getImm();
294 
295   return (Off1 >= 0) ? Off1+S1MO.getSize() == unsigned(Off2)
296                      : int(Off1+S1MO.getSize()) == Off2;
297 }
298 
299 /// Given a sequence of adjacent stores, and a maximum size of a single wide
300 /// store, pick a group of stores that  can be replaced by a single store
301 /// of size not exceeding MaxSize.  The selected sequence will be recorded
302 /// in OG ("old group" of instructions).
303 /// OG should be empty on entry, and should be left empty if the function
304 /// fails.
305 bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin,
306       InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize,
307       unsigned MaxSize) {
308   assert(Begin != End && "No instructions to analyze");
309   assert(OG.empty() && "Old group not empty on entry");
310 
311   if (std::distance(Begin, End) <= 1)
312     return false;
313 
314   MachineInstr *FirstMI = *Begin;
315   assert(!FirstMI->memoperands_empty() && "Expecting some memory operands");
316   const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI);
317   unsigned Alignment = FirstMMO.getAlign().value();
318   unsigned SizeAccum = FirstMMO.getSize();
319   unsigned FirstOffset = getStoreOffset(FirstMI);
320 
321   // The initial value of SizeAccum should always be a power of 2.
322   assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2");
323 
324   // If the size of the first store equals to or exceeds the limit, do nothing.
325   if (SizeAccum >= MaxSize)
326     return false;
327 
328   // If the size of the first store is greater than or equal to the address
329   // stored to, then the store cannot be made any wider.
330   if (SizeAccum >= Alignment)
331     return false;
332 
333   // The offset of a store will put restrictions on how wide the store can be.
334   // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0.
335   // If the first store already exhausts the offset limits, quit.  Test this
336   // by checking if the next wider size would exceed the limit.
337   if ((2*SizeAccum-1) & FirstOffset)
338     return false;
339 
340   OG.push_back(FirstMI);
341   MachineInstr *S1 = FirstMI;
342 
343   // Pow2Num will be the largest number of elements in OG such that the sum
344   // of sizes of stores 0...Pow2Num-1 will be a power of 2.
345   unsigned Pow2Num = 1;
346   unsigned Pow2Size = SizeAccum;
347 
348   // Be greedy: keep accumulating stores as long as they are to adjacent
349   // memory locations, and as long as the total number of bytes stored
350   // does not exceed the limit (MaxSize).
351   // Keep track of when the total size covered is a power of 2, since
352   // this is a size a single store can cover.
353   for (InstrGroup::iterator I = Begin + 1; I != End; ++I) {
354     MachineInstr *S2 = *I;
355     // Stores are sorted, so if S1 and S2 are not adjacent, there won't be
356     // any other store to fill the "hole".
357     if (!storesAreAdjacent(S1, S2))
358       break;
359 
360     unsigned S2Size = getStoreTarget(S2).getSize();
361     if (SizeAccum + S2Size > std::min(MaxSize, Alignment))
362       break;
363 
364     OG.push_back(S2);
365     SizeAccum += S2Size;
366     if (isPowerOf2_32(SizeAccum)) {
367       Pow2Num = OG.size();
368       Pow2Size = SizeAccum;
369     }
370     if ((2*Pow2Size-1) & FirstOffset)
371       break;
372 
373     S1 = S2;
374   }
375 
376   // The stores don't add up to anything that can be widened.  Clean up.
377   if (Pow2Num <= 1) {
378     OG.clear();
379     return false;
380   }
381 
382   // Only leave the stored being widened.
383   OG.resize(Pow2Num);
384   TotalSize = Pow2Size;
385   return true;
386 }
387 
388 /// Given an "old group" OG of stores, create a "new group" NG of instructions
389 /// to replace them.  Ideally, NG would only have a single instruction in it,
390 /// but that may only be possible for store-immediate.
391 bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
392       unsigned TotalSize) {
393   // XXX Current limitations:
394   // - only expect stores of immediate values in OG,
395   // - only handle a TotalSize of up to 4.
396 
397   if (TotalSize > 4)
398     return false;
399 
400   unsigned Acc = 0;  // Value accumulator.
401   unsigned Shift = 0;
402 
403   for (MachineInstr *MI : OG) {
404     const MachineMemOperand &MMO = getStoreTarget(MI);
405     MachineOperand &SO = MI->getOperand(2);  // Source.
406     assert(SO.isImm() && "Expecting an immediate operand");
407 
408     unsigned NBits = MMO.getSize()*8;
409     unsigned Mask = (0xFFFFFFFFU >> (32-NBits));
410     unsigned Val = (SO.getImm() & Mask) << Shift;
411     Acc |= Val;
412     Shift += NBits;
413   }
414 
415   MachineInstr *FirstSt = OG.front();
416   DebugLoc DL = OG.back()->getDebugLoc();
417   const MachineMemOperand &OldM = getStoreTarget(FirstSt);
418   MachineMemOperand *NewM =
419       MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
420                                TotalSize, OldM.getAlign(), OldM.getAAInfo());
421 
422   if (Acc < 0x10000) {
423     // Create mem[hw] = #Acc
424     unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io :
425                     (TotalSize == 4) ? Hexagon::S4_storeiri_io : 0;
426     assert(WOpc && "Unexpected size");
427 
428     int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc);
429     const MCInstrDesc &StD = TII->get(WOpc);
430     MachineOperand &MR = FirstSt->getOperand(0);
431     int64_t Off = FirstSt->getOperand(1).getImm();
432     MachineInstr *StI =
433         BuildMI(*MF, DL, StD)
434             .addReg(MR.getReg(), getKillRegState(MR.isKill()), MR.getSubReg())
435             .addImm(Off)
436             .addImm(Val);
437     StI->addMemOperand(*MF, NewM);
438     NG.push_back(StI);
439   } else {
440     // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
441     const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
442     const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
443     Register VReg = MF->getRegInfo().createVirtualRegister(RC);
444     MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg)
445                            .addImm(int(Acc));
446     NG.push_back(TfrI);
447 
448     unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io :
449                     (TotalSize == 4) ? Hexagon::S2_storeri_io : 0;
450     assert(WOpc && "Unexpected size");
451 
452     const MCInstrDesc &StD = TII->get(WOpc);
453     MachineOperand &MR = FirstSt->getOperand(0);
454     int64_t Off = FirstSt->getOperand(1).getImm();
455     MachineInstr *StI =
456         BuildMI(*MF, DL, StD)
457             .addReg(MR.getReg(), getKillRegState(MR.isKill()), MR.getSubReg())
458             .addImm(Off)
459             .addReg(VReg, RegState::Kill);
460     StI->addMemOperand(*MF, NewM);
461     NG.push_back(StI);
462   }
463 
464   return true;
465 }
466 
467 // Replace instructions from the old group OG with instructions from the
468 // new group NG.  Conceptually, remove all instructions in OG, and then
469 // insert all instructions in NG, starting at where the first instruction
470 // from OG was (in the order in which they appeared in the basic block).
471 // (The ordering in OG does not have to match the order in the basic block.)
472 bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) {
473   LLVM_DEBUG({
474     dbgs() << "Replacing:\n";
475     for (auto I : OG)
476       dbgs() << "  " << *I;
477     dbgs() << "with\n";
478     for (auto I : NG)
479       dbgs() << "  " << *I;
480   });
481 
482   MachineBasicBlock *MBB = OG.back()->getParent();
483   MachineBasicBlock::iterator InsertAt = MBB->end();
484 
485   // Need to establish the insertion point.  The best one is right before
486   // the first store in the OG, but in the order in which the stores occur
487   // in the program list.  Since the ordering in OG does not correspond
488   // to the order in the program list, we need to do some work to find
489   // the insertion point.
490 
491   // Create a set of all instructions in OG (for quick lookup).
492   SmallPtrSet<MachineInstr*, 4> InstrSet;
493   for (auto *I : OG)
494     InstrSet.insert(I);
495 
496   // Traverse the block, until we hit an instruction from OG.
497   for (auto &I : *MBB) {
498     if (InstrSet.count(&I)) {
499       InsertAt = I;
500       break;
501     }
502   }
503 
504   assert((InsertAt != MBB->end()) && "Cannot locate any store from the group");
505 
506   bool AtBBStart = false;
507 
508   // InsertAt points at the first instruction that will be removed.  We need
509   // to move it out of the way, so it remains valid after removing all the
510   // old stores, and so we are able to recover it back to the proper insertion
511   // position.
512   if (InsertAt != MBB->begin())
513     --InsertAt;
514   else
515     AtBBStart = true;
516 
517   for (auto *I : OG)
518     I->eraseFromParent();
519 
520   if (!AtBBStart)
521     ++InsertAt;
522   else
523     InsertAt = MBB->begin();
524 
525   for (auto *I : NG)
526     MBB->insert(InsertAt, I);
527 
528   return true;
529 }
530 
531 // Break up the group into smaller groups, each of which can be replaced by
532 // a single wide store.  Widen each such smaller group and replace the old
533 // instructions with the widened ones.
534 bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) {
535   bool Changed = false;
536   InstrGroup::iterator I = Group.begin(), E = Group.end();
537   InstrGroup OG, NG;   // Old and new groups.
538   unsigned CollectedSize;
539 
540   while (I != E) {
541     OG.clear();
542     NG.clear();
543 
544     bool Succ = selectStores(I++, E, OG, CollectedSize, MaxWideSize) &&
545                 createWideStores(OG, NG, CollectedSize)              &&
546                 replaceStores(OG, NG);
547     if (!Succ)
548       continue;
549 
550     assert(OG.size() > 1 && "Created invalid group");
551     assert(distance(I, E)+1 >= int(OG.size()) && "Too many elements");
552     I += OG.size()-1;
553 
554     Changed = true;
555   }
556 
557   return Changed;
558 }
559 
560 // Process a single basic block: create the store groups, and replace them
561 // with the widened stores, if possible.  Processing of each basic block
562 // is independent from processing of any other basic block.  This transfor-
563 // mation could be stopped after having processed any basic block without
564 // any ill effects (other than not having performed widening in the unpro-
565 // cessed blocks).  Also, the basic blocks can be processed in any order.
566 bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
567   InstrGroupList SGs;
568   bool Changed = false;
569 
570   createStoreGroups(MBB, SGs);
571 
572   auto Less = [] (const MachineInstr *A, const MachineInstr *B) -> bool {
573     return getStoreOffset(A) < getStoreOffset(B);
574   };
575   for (auto &G : SGs) {
576     assert(G.size() > 1 && "Store group with fewer than 2 elements");
577     llvm::sort(G, Less);
578 
579     Changed |= processStoreGroup(G);
580   }
581 
582   return Changed;
583 }
584 
585 bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) {
586   if (skipFunction(MFn.getFunction()))
587     return false;
588 
589   MF = &MFn;
590   auto &ST = MFn.getSubtarget<HexagonSubtarget>();
591   TII = ST.getInstrInfo();
592   TRI = ST.getRegisterInfo();
593   MRI = &MFn.getRegInfo();
594   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
595 
596   bool Changed = false;
597 
598   for (auto &B : MFn)
599     Changed |= processBasicBlock(B);
600 
601   return Changed;
602 }
603 
604 FunctionPass *llvm::createHexagonStoreWidening() {
605   return new HexagonStoreWidening();
606 }
607