xref: /freebsd/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1*700637cbSDimitry Andric //===---HexagonLoadStoreWidening.cpp---------------------------------------===//
2*700637cbSDimitry Andric //
3*700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*700637cbSDimitry Andric //
7*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8*700637cbSDimitry Andric // HexagonStoreWidening:
9*700637cbSDimitry Andric // Replace sequences of "narrow" stores to adjacent memory locations with
10*700637cbSDimitry Andric // a fewer "wide" stores that have the same effect.
11*700637cbSDimitry Andric // For example, replace:
12*700637cbSDimitry Andric //   S4_storeirb_io  %100, 0, 0   ; store-immediate-byte
13*700637cbSDimitry Andric //   S4_storeirb_io  %100, 1, 0   ; store-immediate-byte
14*700637cbSDimitry Andric // with
15*700637cbSDimitry Andric //   S4_storeirh_io  %100, 0, 0   ; store-immediate-halfword
16*700637cbSDimitry Andric // The above is the general idea.  The actual cases handled by the code
17*700637cbSDimitry Andric // may be a bit more complex.
18*700637cbSDimitry Andric // The purpose of this pass is to reduce the number of outstanding stores,
19*700637cbSDimitry Andric // or as one could say, "reduce store queue pressure".  Also, wide stores
20*700637cbSDimitry Andric // mean fewer stores, and since there are only two memory instructions allowed
21*700637cbSDimitry Andric // per packet, it also means fewer packets, and ultimately fewer cycles.
22*700637cbSDimitry Andric //
23*700637cbSDimitry Andric // HexagonLoadWidening does the same thing as HexagonStoreWidening but
24*700637cbSDimitry Andric // for Loads. Here, we try to replace 4-byte Loads with register-pair loads.
25*700637cbSDimitry Andric // For example:
26*700637cbSDimitry Andric // Replace
27*700637cbSDimitry Andric //   %2:intregs = L2_loadri_io %1:intregs, 0 :: (load (s32) from %ptr1, align 8)
28*700637cbSDimitry Andric //   %3:intregs = L2_loadri_io %1:intregs, 4 :: (load (s32) from %ptr2)
29*700637cbSDimitry Andric // with
30*700637cbSDimitry Andric //   %4:doubleregs = L2_loadrd_io %1:intregs, 0 :: (load (s64) from %ptr1)
31*700637cbSDimitry Andric //   %2:intregs = COPY %4.isub_lo:doubleregs
32*700637cbSDimitry Andric //   %3:intregs = COPY %4.isub_hi:doubleregs
33*700637cbSDimitry Andric //
34*700637cbSDimitry Andric // LoadWidening for 8 and 16-bit loads is not useful as we end up generating 2N
35*700637cbSDimitry Andric // insts to replace N loads: 1 widened load, N bitwise and, N - 1 shifts
36*700637cbSDimitry Andric 
37*700637cbSDimitry Andric //===---------------------------------------------------------------------===//
38*700637cbSDimitry Andric 
39*700637cbSDimitry Andric #include "Hexagon.h"
40*700637cbSDimitry Andric #include "HexagonInstrInfo.h"
41*700637cbSDimitry Andric #include "HexagonRegisterInfo.h"
42*700637cbSDimitry Andric #include "HexagonSubtarget.h"
43*700637cbSDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
44*700637cbSDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
45*700637cbSDimitry Andric #include "llvm/Analysis/MemoryLocation.h"
46*700637cbSDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
47*700637cbSDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
48*700637cbSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
49*700637cbSDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
50*700637cbSDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
51*700637cbSDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h"
52*700637cbSDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
53*700637cbSDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
54*700637cbSDimitry Andric #include "llvm/IR/DebugLoc.h"
55*700637cbSDimitry Andric #include "llvm/InitializePasses.h"
56*700637cbSDimitry Andric #include "llvm/MC/MCInstrDesc.h"
57*700637cbSDimitry Andric #include "llvm/Pass.h"
58*700637cbSDimitry Andric #include "llvm/Support/Debug.h"
59*700637cbSDimitry Andric #include "llvm/Support/ErrorHandling.h"
60*700637cbSDimitry Andric #include "llvm/Support/MathExtras.h"
61*700637cbSDimitry Andric #include "llvm/Support/raw_ostream.h"
62*700637cbSDimitry Andric #include <algorithm>
63*700637cbSDimitry Andric #include <cassert>
64*700637cbSDimitry Andric #include <cstdint>
65*700637cbSDimitry Andric #include <iterator>
66*700637cbSDimitry Andric 
67*700637cbSDimitry Andric using namespace llvm;
68*700637cbSDimitry Andric 
69*700637cbSDimitry Andric #define DEBUG_TYPE "hexagon-load-store-widening"
70*700637cbSDimitry Andric 
71*700637cbSDimitry Andric static cl::opt<unsigned> MaxMBBSizeForLoadStoreWidening(
72*700637cbSDimitry Andric     "max-bb-size-for-load-store-widening", cl::Hidden, cl::init(1000),
73*700637cbSDimitry Andric     cl::desc("Limit block size to analyze in load/store widening pass"));
74*700637cbSDimitry Andric 
75*700637cbSDimitry Andric namespace {
76*700637cbSDimitry Andric 
77*700637cbSDimitry Andric struct HexagonLoadStoreWidening {
78*700637cbSDimitry Andric   enum WideningMode { Store, Load };
79*700637cbSDimitry Andric   const HexagonInstrInfo *TII;
80*700637cbSDimitry Andric   const HexagonRegisterInfo *TRI;
81*700637cbSDimitry Andric   MachineRegisterInfo *MRI;
82*700637cbSDimitry Andric   AliasAnalysis *AA;
83*700637cbSDimitry Andric   MachineFunction *MF;
84*700637cbSDimitry Andric 
85*700637cbSDimitry Andric public:
HexagonLoadStoreWidening__anon01fcdbeb0111::HexagonLoadStoreWidening86*700637cbSDimitry Andric   HexagonLoadStoreWidening(const HexagonInstrInfo *TII,
87*700637cbSDimitry Andric                            const HexagonRegisterInfo *TRI,
88*700637cbSDimitry Andric                            MachineRegisterInfo *MRI, AliasAnalysis *AA,
89*700637cbSDimitry Andric                            MachineFunction *MF, bool StoreMode)
90*700637cbSDimitry Andric       : TII(TII), TRI(TRI), MRI(MRI), AA(AA), MF(MF),
91*700637cbSDimitry Andric         Mode(StoreMode ? WideningMode::Store : WideningMode::Load),
92*700637cbSDimitry Andric         HII(MF->getSubtarget<HexagonSubtarget>().getInstrInfo()) {}
93*700637cbSDimitry Andric 
94*700637cbSDimitry Andric   bool run();
95*700637cbSDimitry Andric 
96*700637cbSDimitry Andric private:
97*700637cbSDimitry Andric   const bool Mode;
98*700637cbSDimitry Andric   const unsigned MaxWideSize = 8;
99*700637cbSDimitry Andric   const HexagonInstrInfo *HII = nullptr;
100*700637cbSDimitry Andric 
101*700637cbSDimitry Andric   using InstrSet = SmallPtrSet<MachineInstr *, 16>;
102*700637cbSDimitry Andric   using InstrGroup = SmallVector<MachineInstr *, 8>;
103*700637cbSDimitry Andric   using InstrGroupList = SmallVector<InstrGroup, 8>;
104*700637cbSDimitry Andric 
105*700637cbSDimitry Andric   InstrSet ProcessedInsts;
106*700637cbSDimitry Andric 
107*700637cbSDimitry Andric   unsigned getBaseAddressRegister(const MachineInstr *MI);
108*700637cbSDimitry Andric   int64_t getOffset(const MachineInstr *MI);
109*700637cbSDimitry Andric   int64_t getPostIncrementValue(const MachineInstr *MI);
110*700637cbSDimitry Andric   bool handledInstType(const MachineInstr *MI);
111*700637cbSDimitry Andric 
112*700637cbSDimitry Andric   void createGroup(MachineInstr *BaseInst, InstrGroup &Group);
113*700637cbSDimitry Andric   void createGroups(MachineBasicBlock &MBB, InstrGroupList &StoreGroups);
114*700637cbSDimitry Andric   bool processBasicBlock(MachineBasicBlock &MBB);
115*700637cbSDimitry Andric   bool processGroup(InstrGroup &Group);
116*700637cbSDimitry Andric   bool selectInsts(InstrGroup::iterator Begin, InstrGroup::iterator End,
117*700637cbSDimitry Andric                    InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize);
118*700637cbSDimitry Andric   bool createWideInsts(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
119*700637cbSDimitry Andric   bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
120*700637cbSDimitry Andric   bool createWideLoads(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
121*700637cbSDimitry Andric   bool replaceInsts(InstrGroup &OG, InstrGroup &NG);
122*700637cbSDimitry Andric   bool areAdjacent(const MachineInstr *S1, const MachineInstr *S2);
123*700637cbSDimitry Andric   bool canSwapInstructions(const MachineInstr *A, const MachineInstr *B);
124*700637cbSDimitry Andric };
125*700637cbSDimitry Andric 
126*700637cbSDimitry Andric struct HexagonStoreWidening : public MachineFunctionPass {
127*700637cbSDimitry Andric   static char ID;
128*700637cbSDimitry Andric 
HexagonStoreWidening__anon01fcdbeb0111::HexagonStoreWidening129*700637cbSDimitry Andric   HexagonStoreWidening() : MachineFunctionPass(ID) {}
130*700637cbSDimitry Andric 
getPassName__anon01fcdbeb0111::HexagonStoreWidening131*700637cbSDimitry Andric   StringRef getPassName() const override { return "Hexagon Store Widening"; }
132*700637cbSDimitry Andric 
getAnalysisUsage__anon01fcdbeb0111::HexagonStoreWidening133*700637cbSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
134*700637cbSDimitry Andric     AU.addRequired<AAResultsWrapperPass>();
135*700637cbSDimitry Andric     AU.addPreserved<AAResultsWrapperPass>();
136*700637cbSDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
137*700637cbSDimitry Andric   }
138*700637cbSDimitry Andric 
runOnMachineFunction__anon01fcdbeb0111::HexagonStoreWidening139*700637cbSDimitry Andric   bool runOnMachineFunction(MachineFunction &MFn) override {
140*700637cbSDimitry Andric     if (skipFunction(MFn.getFunction()))
141*700637cbSDimitry Andric       return false;
142*700637cbSDimitry Andric 
143*700637cbSDimitry Andric     auto &ST = MFn.getSubtarget<HexagonSubtarget>();
144*700637cbSDimitry Andric     const HexagonInstrInfo *TII = ST.getInstrInfo();
145*700637cbSDimitry Andric     const HexagonRegisterInfo *TRI = ST.getRegisterInfo();
146*700637cbSDimitry Andric     MachineRegisterInfo *MRI = &MFn.getRegInfo();
147*700637cbSDimitry Andric     AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
148*700637cbSDimitry Andric 
149*700637cbSDimitry Andric     return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, true).run();
150*700637cbSDimitry Andric   }
151*700637cbSDimitry Andric };
152*700637cbSDimitry Andric 
153*700637cbSDimitry Andric struct HexagonLoadWidening : public MachineFunctionPass {
154*700637cbSDimitry Andric   static char ID;
155*700637cbSDimitry Andric 
HexagonLoadWidening__anon01fcdbeb0111::HexagonLoadWidening156*700637cbSDimitry Andric   HexagonLoadWidening() : MachineFunctionPass(ID) {}
157*700637cbSDimitry Andric 
getPassName__anon01fcdbeb0111::HexagonLoadWidening158*700637cbSDimitry Andric   StringRef getPassName() const override { return "Hexagon Load Widening"; }
159*700637cbSDimitry Andric 
getAnalysisUsage__anon01fcdbeb0111::HexagonLoadWidening160*700637cbSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
161*700637cbSDimitry Andric     AU.addRequired<AAResultsWrapperPass>();
162*700637cbSDimitry Andric     AU.addPreserved<AAResultsWrapperPass>();
163*700637cbSDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
164*700637cbSDimitry Andric   }
165*700637cbSDimitry Andric 
runOnMachineFunction__anon01fcdbeb0111::HexagonLoadWidening166*700637cbSDimitry Andric   bool runOnMachineFunction(MachineFunction &MFn) override {
167*700637cbSDimitry Andric     if (skipFunction(MFn.getFunction()))
168*700637cbSDimitry Andric       return false;
169*700637cbSDimitry Andric 
170*700637cbSDimitry Andric     auto &ST = MFn.getSubtarget<HexagonSubtarget>();
171*700637cbSDimitry Andric     const HexagonInstrInfo *TII = ST.getInstrInfo();
172*700637cbSDimitry Andric     const HexagonRegisterInfo *TRI = ST.getRegisterInfo();
173*700637cbSDimitry Andric     MachineRegisterInfo *MRI = &MFn.getRegInfo();
174*700637cbSDimitry Andric     AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
175*700637cbSDimitry Andric     return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, false).run();
176*700637cbSDimitry Andric   }
177*700637cbSDimitry Andric };
178*700637cbSDimitry Andric 
179*700637cbSDimitry Andric char HexagonStoreWidening::ID = 0;
180*700637cbSDimitry Andric char HexagonLoadWidening::ID = 0;
181*700637cbSDimitry Andric 
182*700637cbSDimitry Andric } // end anonymous namespace
183*700637cbSDimitry Andric 
184*700637cbSDimitry Andric INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
185*700637cbSDimitry Andric                       "Hexagon Store Widening", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)186*700637cbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
187*700637cbSDimitry Andric INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
188*700637cbSDimitry Andric                     "Hexagon Store Widening", false, false)
189*700637cbSDimitry Andric 
190*700637cbSDimitry Andric INITIALIZE_PASS_BEGIN(HexagonLoadWidening, "hexagon-widen-loads",
191*700637cbSDimitry Andric                       "Hexagon Load Widening", false, false)
192*700637cbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
193*700637cbSDimitry Andric INITIALIZE_PASS_END(HexagonLoadWidening, "hexagon-widen-loads",
194*700637cbSDimitry Andric                     "Hexagon Load Widening", false, false)
195*700637cbSDimitry Andric 
196*700637cbSDimitry Andric static const MachineMemOperand &getMemTarget(const MachineInstr *MI) {
197*700637cbSDimitry Andric   assert(!MI->memoperands_empty() && "Expecting memory operands");
198*700637cbSDimitry Andric   return **MI->memoperands_begin();
199*700637cbSDimitry Andric }
200*700637cbSDimitry Andric 
201*700637cbSDimitry Andric unsigned
getBaseAddressRegister(const MachineInstr * MI)202*700637cbSDimitry Andric HexagonLoadStoreWidening::getBaseAddressRegister(const MachineInstr *MI) {
203*700637cbSDimitry Andric   assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode");
204*700637cbSDimitry Andric   unsigned Base, Offset;
205*700637cbSDimitry Andric   HII->getBaseAndOffsetPosition(*MI, Base, Offset);
206*700637cbSDimitry Andric   const MachineOperand &MO = MI->getOperand(Base);
207*700637cbSDimitry Andric   assert(MO.isReg() && "Expecting register operand");
208*700637cbSDimitry Andric   return MO.getReg();
209*700637cbSDimitry Andric }
210*700637cbSDimitry Andric 
getOffset(const MachineInstr * MI)211*700637cbSDimitry Andric int64_t HexagonLoadStoreWidening::getOffset(const MachineInstr *MI) {
212*700637cbSDimitry Andric   assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode");
213*700637cbSDimitry Andric 
214*700637cbSDimitry Andric   // On Hexagon, post-incs always have an offset of 0
215*700637cbSDimitry Andric   // There is no Offset operand to post-incs
216*700637cbSDimitry Andric   if (HII->isPostIncrement(*MI))
217*700637cbSDimitry Andric     return 0;
218*700637cbSDimitry Andric 
219*700637cbSDimitry Andric   unsigned Base, Offset;
220*700637cbSDimitry Andric 
221*700637cbSDimitry Andric   HII->getBaseAndOffsetPosition(*MI, Base, Offset);
222*700637cbSDimitry Andric   const MachineOperand &MO = MI->getOperand(Offset);
223*700637cbSDimitry Andric   switch (MO.getType()) {
224*700637cbSDimitry Andric   case MachineOperand::MO_Immediate:
225*700637cbSDimitry Andric     return MO.getImm();
226*700637cbSDimitry Andric   case MachineOperand::MO_GlobalAddress:
227*700637cbSDimitry Andric     return MO.getOffset();
228*700637cbSDimitry Andric   default:
229*700637cbSDimitry Andric     break;
230*700637cbSDimitry Andric   }
231*700637cbSDimitry Andric   llvm_unreachable("Expecting an immediate or global operand");
232*700637cbSDimitry Andric }
233*700637cbSDimitry Andric 
234*700637cbSDimitry Andric inline int64_t
getPostIncrementValue(const MachineInstr * MI)235*700637cbSDimitry Andric HexagonLoadStoreWidening::getPostIncrementValue(const MachineInstr *MI) {
236*700637cbSDimitry Andric   unsigned Base, PostIncIdx;
237*700637cbSDimitry Andric   HII->getBaseAndOffsetPosition(*MI, Base, PostIncIdx);
238*700637cbSDimitry Andric   const MachineOperand &MO = MI->getOperand(PostIncIdx);
239*700637cbSDimitry Andric   return MO.getImm();
240*700637cbSDimitry Andric }
241*700637cbSDimitry Andric 
242*700637cbSDimitry Andric // Filtering function: any loads/stores whose opcodes are not "approved" of by
243*700637cbSDimitry Andric // this function will not be subjected to widening.
handledInstType(const MachineInstr * MI)244*700637cbSDimitry Andric inline bool HexagonLoadStoreWidening::handledInstType(const MachineInstr *MI) {
245*700637cbSDimitry Andric   unsigned Opc = MI->getOpcode();
246*700637cbSDimitry Andric   if (Mode == WideningMode::Store) {
247*700637cbSDimitry Andric     switch (Opc) {
248*700637cbSDimitry Andric     case Hexagon::S4_storeirb_io:
249*700637cbSDimitry Andric     case Hexagon::S4_storeirh_io:
250*700637cbSDimitry Andric     case Hexagon::S4_storeiri_io:
251*700637cbSDimitry Andric     case Hexagon::S2_storeri_io:
252*700637cbSDimitry Andric       // Base address must be a register. (Implement FI later.)
253*700637cbSDimitry Andric       return MI->getOperand(0).isReg();
254*700637cbSDimitry Andric     case Hexagon::S2_storeri_pi:
255*700637cbSDimitry Andric       return MI->getOperand(1).isReg();
256*700637cbSDimitry Andric     }
257*700637cbSDimitry Andric   } else {
258*700637cbSDimitry Andric     // LoadWidening for 8 and 16 bit loads needs 2x instructions to replace x
259*700637cbSDimitry Andric     // loads. So we only widen 32 bit loads as we don't need to select the
260*700637cbSDimitry Andric     // right bits with AND & SHIFT ops.
261*700637cbSDimitry Andric     switch (Opc) {
262*700637cbSDimitry Andric     case Hexagon::L2_loadri_io:
263*700637cbSDimitry Andric       // Base address must be a register and offset must be immediate.
264*700637cbSDimitry Andric       return !MI->memoperands_empty() && MI->getOperand(1).isReg() &&
265*700637cbSDimitry Andric              MI->getOperand(2).isImm();
266*700637cbSDimitry Andric     case Hexagon::L2_loadri_pi:
267*700637cbSDimitry Andric       return !MI->memoperands_empty() && MI->getOperand(2).isReg();
268*700637cbSDimitry Andric     }
269*700637cbSDimitry Andric   }
270*700637cbSDimitry Andric   return false;
271*700637cbSDimitry Andric }
272*700637cbSDimitry Andric 
addDefsUsesToList(const MachineInstr * MI,DenseSet<Register> & RegDefs,DenseSet<Register> & RegUses)273*700637cbSDimitry Andric static void addDefsUsesToList(const MachineInstr *MI,
274*700637cbSDimitry Andric                               DenseSet<Register> &RegDefs,
275*700637cbSDimitry Andric                               DenseSet<Register> &RegUses) {
276*700637cbSDimitry Andric   for (const auto &Op : MI->operands()) {
277*700637cbSDimitry Andric     if (!Op.isReg())
278*700637cbSDimitry Andric       continue;
279*700637cbSDimitry Andric     if (Op.isDef())
280*700637cbSDimitry Andric       RegDefs.insert(Op.getReg());
281*700637cbSDimitry Andric     if (Op.readsReg())
282*700637cbSDimitry Andric       RegUses.insert(Op.getReg());
283*700637cbSDimitry Andric   }
284*700637cbSDimitry Andric }
285*700637cbSDimitry Andric 
canSwapInstructions(const MachineInstr * A,const MachineInstr * B)286*700637cbSDimitry Andric bool HexagonLoadStoreWidening::canSwapInstructions(const MachineInstr *A,
287*700637cbSDimitry Andric                                                    const MachineInstr *B) {
288*700637cbSDimitry Andric   DenseSet<Register> ARegDefs;
289*700637cbSDimitry Andric   DenseSet<Register> ARegUses;
290*700637cbSDimitry Andric   addDefsUsesToList(A, ARegDefs, ARegUses);
291*700637cbSDimitry Andric   if (A->mayLoadOrStore() && B->mayLoadOrStore() &&
292*700637cbSDimitry Andric       (A->mayStore() || B->mayStore()) && A->mayAlias(AA, *B, true))
293*700637cbSDimitry Andric     return false;
294*700637cbSDimitry Andric   for (const auto &BOp : B->operands()) {
295*700637cbSDimitry Andric     if (!BOp.isReg())
296*700637cbSDimitry Andric       continue;
297*700637cbSDimitry Andric     if ((BOp.isDef() || BOp.readsReg()) && ARegDefs.contains(BOp.getReg()))
298*700637cbSDimitry Andric       return false;
299*700637cbSDimitry Andric     if (BOp.isDef() && ARegUses.contains(BOp.getReg()))
300*700637cbSDimitry Andric       return false;
301*700637cbSDimitry Andric   }
302*700637cbSDimitry Andric   return true;
303*700637cbSDimitry Andric }
304*700637cbSDimitry Andric 
305*700637cbSDimitry Andric // Inspect a machine basic block, and generate groups out of loads/stores
306*700637cbSDimitry Andric // encountered in the block.
307*700637cbSDimitry Andric //
308*700637cbSDimitry Andric // A load/store group is a group of loads or stores that use the same base
309*700637cbSDimitry Andric // register, and which can be reordered within that group without altering the
310*700637cbSDimitry Andric // semantics of the program.  A single group could be widened as
311*700637cbSDimitry Andric // a whole, if there existed a single load/store instruction with the same
312*700637cbSDimitry Andric // semantics as the entire group.  In many cases, a single group may need more
313*700637cbSDimitry Andric // than one wide load or store.
createGroups(MachineBasicBlock & MBB,InstrGroupList & StoreGroups)314*700637cbSDimitry Andric void HexagonLoadStoreWidening::createGroups(MachineBasicBlock &MBB,
315*700637cbSDimitry Andric                                             InstrGroupList &StoreGroups) {
316*700637cbSDimitry Andric   // Traverse all instructions and if we encounter
317*700637cbSDimitry Andric   // a load/store, then try to create a group starting at that instruction
318*700637cbSDimitry Andric   // i.e. a sequence of independent loads/stores that can be widened.
319*700637cbSDimitry Andric   for (auto I = MBB.begin(); I != MBB.end(); ++I) {
320*700637cbSDimitry Andric     MachineInstr *MI = &(*I);
321*700637cbSDimitry Andric     if (!handledInstType(MI))
322*700637cbSDimitry Andric       continue;
323*700637cbSDimitry Andric     if (ProcessedInsts.count(MI))
324*700637cbSDimitry Andric       continue;
325*700637cbSDimitry Andric 
326*700637cbSDimitry Andric     // Found a store.  Try to create a store group.
327*700637cbSDimitry Andric     InstrGroup G;
328*700637cbSDimitry Andric     createGroup(MI, G);
329*700637cbSDimitry Andric     if (G.size() > 1)
330*700637cbSDimitry Andric       StoreGroups.push_back(G);
331*700637cbSDimitry Andric   }
332*700637cbSDimitry Andric }
333*700637cbSDimitry Andric 
334*700637cbSDimitry Andric // Create a single load/store group.  The insts need to be independent between
335*700637cbSDimitry Andric // themselves, and also there cannot be other instructions between them
336*700637cbSDimitry Andric // that could read or modify storage being read from or stored into.
createGroup(MachineInstr * BaseInst,InstrGroup & Group)337*700637cbSDimitry Andric void HexagonLoadStoreWidening::createGroup(MachineInstr *BaseInst,
338*700637cbSDimitry Andric                                            InstrGroup &Group) {
339*700637cbSDimitry Andric   assert(handledInstType(BaseInst) && "Unexpected instruction");
340*700637cbSDimitry Andric   unsigned BaseReg = getBaseAddressRegister(BaseInst);
341*700637cbSDimitry Andric   InstrGroup Other;
342*700637cbSDimitry Andric 
343*700637cbSDimitry Andric   Group.push_back(BaseInst);
344*700637cbSDimitry Andric   LLVM_DEBUG(dbgs() << "BaseInst: "; BaseInst->dump());
345*700637cbSDimitry Andric   auto End = BaseInst->getParent()->end();
346*700637cbSDimitry Andric   auto I = BaseInst->getIterator();
347*700637cbSDimitry Andric 
348*700637cbSDimitry Andric   while (true) {
349*700637cbSDimitry Andric     I = std::next(I);
350*700637cbSDimitry Andric     if (I == End)
351*700637cbSDimitry Andric       break;
352*700637cbSDimitry Andric     MachineInstr *MI = &(*I);
353*700637cbSDimitry Andric 
354*700637cbSDimitry Andric     // Assume calls are aliased to everything.
355*700637cbSDimitry Andric     if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
356*700637cbSDimitry Andric         MI->hasOrderedMemoryRef())
357*700637cbSDimitry Andric       return;
358*700637cbSDimitry Andric 
359*700637cbSDimitry Andric     if (!handledInstType(MI)) {
360*700637cbSDimitry Andric       if (MI->mayLoadOrStore())
361*700637cbSDimitry Andric         Other.push_back(MI);
362*700637cbSDimitry Andric       continue;
363*700637cbSDimitry Andric     }
364*700637cbSDimitry Andric 
365*700637cbSDimitry Andric     // We have a handledInstType instruction
366*700637cbSDimitry Andric     // If this load/store instruction is aliased with anything already in the
367*700637cbSDimitry Andric     // group, terminate the group now.
368*700637cbSDimitry Andric     for (auto GI : Group)
369*700637cbSDimitry Andric       if (GI->mayAlias(AA, *MI, true))
370*700637cbSDimitry Andric         return;
371*700637cbSDimitry Andric     if (Mode == WideningMode::Load) {
372*700637cbSDimitry Andric       // Check if current load MI can be moved to the first load instruction
373*700637cbSDimitry Andric       // in Group. If any load instruction aliases with memory instructions in
374*700637cbSDimitry Andric       // Other, terminate the group.
375*700637cbSDimitry Andric       for (auto MemI : Other)
376*700637cbSDimitry Andric         if (!canSwapInstructions(MI, MemI))
377*700637cbSDimitry Andric           return;
378*700637cbSDimitry Andric     } else {
379*700637cbSDimitry Andric       // Check if store instructions in the group can be moved to current
380*700637cbSDimitry Andric       // store MI. If any store instruction aliases with memory instructions
381*700637cbSDimitry Andric       // in Other, terminate the group.
382*700637cbSDimitry Andric       for (auto MemI : Other) {
383*700637cbSDimitry Andric         if (std::distance(Group.back()->getIterator(), MemI->getIterator()) <=
384*700637cbSDimitry Andric             0)
385*700637cbSDimitry Andric           continue;
386*700637cbSDimitry Andric         for (auto GI : Group)
387*700637cbSDimitry Andric           if (!canSwapInstructions(MemI, GI))
388*700637cbSDimitry Andric             return;
389*700637cbSDimitry Andric       }
390*700637cbSDimitry Andric     }
391*700637cbSDimitry Andric 
392*700637cbSDimitry Andric     unsigned BR = getBaseAddressRegister(MI);
393*700637cbSDimitry Andric     if (BR == BaseReg) {
394*700637cbSDimitry Andric       LLVM_DEBUG(dbgs() << "Added MI to group: "; MI->dump());
395*700637cbSDimitry Andric       Group.push_back(MI);
396*700637cbSDimitry Andric       ProcessedInsts.insert(MI);
397*700637cbSDimitry Andric     }
398*700637cbSDimitry Andric   } // while
399*700637cbSDimitry Andric }
400*700637cbSDimitry Andric 
401*700637cbSDimitry Andric // Check if load/store instructions S1 and S2 are adjacent.  More precisely,
402*700637cbSDimitry Andric // S2 has to access memory immediately following that accessed by S1.
areAdjacent(const MachineInstr * S1,const MachineInstr * S2)403*700637cbSDimitry Andric bool HexagonLoadStoreWidening::areAdjacent(const MachineInstr *S1,
404*700637cbSDimitry Andric                                            const MachineInstr *S2) {
405*700637cbSDimitry Andric   if (!handledInstType(S1) || !handledInstType(S2))
406*700637cbSDimitry Andric     return false;
407*700637cbSDimitry Andric 
408*700637cbSDimitry Andric   const MachineMemOperand &S1MO = getMemTarget(S1);
409*700637cbSDimitry Andric 
410*700637cbSDimitry Andric   // Currently only handling immediate stores.
411*700637cbSDimitry Andric   int Off1 = getOffset(S1);
412*700637cbSDimitry Andric   int Off2 = getOffset(S2);
413*700637cbSDimitry Andric 
414*700637cbSDimitry Andric   return (Off1 >= 0) ? Off1 + S1MO.getSize().getValue() == unsigned(Off2)
415*700637cbSDimitry Andric                      : int(Off1 + S1MO.getSize().getValue()) == Off2;
416*700637cbSDimitry Andric }
417*700637cbSDimitry Andric 
418*700637cbSDimitry Andric /// Given a sequence of adjacent loads/stores, and a maximum size of a single
419*700637cbSDimitry Andric /// wide inst, pick a group of insts that can be replaced by a single load/store
420*700637cbSDimitry Andric /// of size not exceeding MaxSize.  The selected sequence will be recorded
421*700637cbSDimitry Andric /// in OG ("old group" of instructions).
422*700637cbSDimitry Andric /// OG should be empty on entry, and should be left empty if the function
423*700637cbSDimitry Andric /// fails.
selectInsts(InstrGroup::iterator Begin,InstrGroup::iterator End,InstrGroup & OG,unsigned & TotalSize,unsigned MaxSize)424*700637cbSDimitry Andric bool HexagonLoadStoreWidening::selectInsts(InstrGroup::iterator Begin,
425*700637cbSDimitry Andric                                            InstrGroup::iterator End,
426*700637cbSDimitry Andric                                            InstrGroup &OG, unsigned &TotalSize,
427*700637cbSDimitry Andric                                            unsigned MaxSize) {
428*700637cbSDimitry Andric   assert(Begin != End && "No instructions to analyze");
429*700637cbSDimitry Andric   assert(OG.empty() && "Old group not empty on entry");
430*700637cbSDimitry Andric 
431*700637cbSDimitry Andric   if (std::distance(Begin, End) <= 1)
432*700637cbSDimitry Andric     return false;
433*700637cbSDimitry Andric 
434*700637cbSDimitry Andric   MachineInstr *FirstMI = *Begin;
435*700637cbSDimitry Andric   assert(!FirstMI->memoperands_empty() && "Expecting some memory operands");
436*700637cbSDimitry Andric   const MachineMemOperand &FirstMMO = getMemTarget(FirstMI);
437*700637cbSDimitry Andric   if (!FirstMMO.getType().isValid())
438*700637cbSDimitry Andric     return false;
439*700637cbSDimitry Andric 
440*700637cbSDimitry Andric   unsigned Alignment = FirstMMO.getAlign().value();
441*700637cbSDimitry Andric   unsigned SizeAccum = FirstMMO.getSize().getValue();
442*700637cbSDimitry Andric   unsigned FirstOffset = getOffset(FirstMI);
443*700637cbSDimitry Andric 
444*700637cbSDimitry Andric   // The initial value of SizeAccum should always be a power of 2.
445*700637cbSDimitry Andric   assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2");
446*700637cbSDimitry Andric 
447*700637cbSDimitry Andric   // If the size of the first store equals to or exceeds the limit, do nothing.
448*700637cbSDimitry Andric   if (SizeAccum >= MaxSize)
449*700637cbSDimitry Andric     return false;
450*700637cbSDimitry Andric 
451*700637cbSDimitry Andric   // If the size of the first load/store is greater than or equal to the address
452*700637cbSDimitry Andric   // stored to, then the inst cannot be made any wider.
453*700637cbSDimitry Andric   if (SizeAccum >= Alignment) {
454*700637cbSDimitry Andric     LLVM_DEBUG(
455*700637cbSDimitry Andric         dbgs() << "Size of load/store greater than equal to its alignment\n");
456*700637cbSDimitry Andric     return false;
457*700637cbSDimitry Andric   }
458*700637cbSDimitry Andric 
459*700637cbSDimitry Andric   // The offset of a load/store will put restrictions on how wide the inst can
460*700637cbSDimitry Andric   // be.  Offsets in loads/stores of size 2^n bytes need to have the n lowest
461*700637cbSDimitry Andric   // bits be 0.  If the first inst already exhausts the offset limits, quit.
462*700637cbSDimitry Andric   // Test this by checking if the next wider size would exceed the limit.
463*700637cbSDimitry Andric   // For post-increment instructions, the increment amount needs to follow the
464*700637cbSDimitry Andric   // same rule.
465*700637cbSDimitry Andric   unsigned OffsetOrIncVal = 0;
466*700637cbSDimitry Andric   if (HII->isPostIncrement(*FirstMI))
467*700637cbSDimitry Andric     OffsetOrIncVal = getPostIncrementValue(FirstMI);
468*700637cbSDimitry Andric   else
469*700637cbSDimitry Andric     OffsetOrIncVal = FirstOffset;
470*700637cbSDimitry Andric   if ((2 * SizeAccum - 1) & OffsetOrIncVal) {
471*700637cbSDimitry Andric     LLVM_DEBUG(dbgs() << "Instruction cannot be widened as the offset/postinc"
472*700637cbSDimitry Andric                       << " value: " << getPostIncrementValue(FirstMI)
473*700637cbSDimitry Andric                       << " is invalid in the widened version\n");
474*700637cbSDimitry Andric     return false;
475*700637cbSDimitry Andric   }
476*700637cbSDimitry Andric 
477*700637cbSDimitry Andric   OG.push_back(FirstMI);
478*700637cbSDimitry Andric   MachineInstr *S1 = FirstMI;
479*700637cbSDimitry Andric 
480*700637cbSDimitry Andric   // Pow2Num will be the largest number of elements in OG such that the sum
481*700637cbSDimitry Andric   // of sizes of loads/stores 0...Pow2Num-1 will be a power of 2.
482*700637cbSDimitry Andric   unsigned Pow2Num = 1;
483*700637cbSDimitry Andric   unsigned Pow2Size = SizeAccum;
484*700637cbSDimitry Andric   bool HavePostInc = HII->isPostIncrement(*S1);
485*700637cbSDimitry Andric 
486*700637cbSDimitry Andric   // Be greedy: keep accumulating insts as long as they are to adjacent
487*700637cbSDimitry Andric   // memory locations, and as long as the total number of bytes stored
488*700637cbSDimitry Andric   // does not exceed the limit (MaxSize).
489*700637cbSDimitry Andric   // Keep track of when the total size covered is a power of 2, since
490*700637cbSDimitry Andric   // this is a size a single load/store can cover.
491*700637cbSDimitry Andric   for (InstrGroup::iterator I = Begin + 1; I != End; ++I) {
492*700637cbSDimitry Andric     MachineInstr *S2 = *I;
493*700637cbSDimitry Andric     // Insts are sorted, so if S1 and S2 are not adjacent, there won't be
494*700637cbSDimitry Andric     // any other store to fill the "hole".
495*700637cbSDimitry Andric     if (!areAdjacent(S1, S2))
496*700637cbSDimitry Andric       break;
497*700637cbSDimitry Andric 
498*700637cbSDimitry Andric     // Cannot widen two post increments, need to return two registers
499*700637cbSDimitry Andric     // with incremented values
500*700637cbSDimitry Andric     if (HavePostInc && HII->isPostIncrement(*S2))
501*700637cbSDimitry Andric       break;
502*700637cbSDimitry Andric 
503*700637cbSDimitry Andric     unsigned S2Size = getMemTarget(S2).getSize().getValue();
504*700637cbSDimitry Andric     if (SizeAccum + S2Size > std::min(MaxSize, Alignment))
505*700637cbSDimitry Andric       break;
506*700637cbSDimitry Andric 
507*700637cbSDimitry Andric     OG.push_back(S2);
508*700637cbSDimitry Andric     SizeAccum += S2Size;
509*700637cbSDimitry Andric     if (isPowerOf2_32(SizeAccum)) {
510*700637cbSDimitry Andric       Pow2Num = OG.size();
511*700637cbSDimitry Andric       Pow2Size = SizeAccum;
512*700637cbSDimitry Andric     }
513*700637cbSDimitry Andric     if ((2 * Pow2Size - 1) & FirstOffset)
514*700637cbSDimitry Andric       break;
515*700637cbSDimitry Andric 
516*700637cbSDimitry Andric     S1 = S2;
517*700637cbSDimitry Andric   }
518*700637cbSDimitry Andric 
519*700637cbSDimitry Andric   // The insts don't add up to anything that can be widened.  Clean up.
520*700637cbSDimitry Andric   if (Pow2Num <= 1) {
521*700637cbSDimitry Andric     OG.clear();
522*700637cbSDimitry Andric     return false;
523*700637cbSDimitry Andric   }
524*700637cbSDimitry Andric 
525*700637cbSDimitry Andric   // Only leave the loads/stores being widened.
526*700637cbSDimitry Andric   OG.resize(Pow2Num);
527*700637cbSDimitry Andric   TotalSize = Pow2Size;
528*700637cbSDimitry Andric   return true;
529*700637cbSDimitry Andric }
530*700637cbSDimitry Andric 
531*700637cbSDimitry Andric /// Given an "old group" OG of insts, create a "new group" NG of instructions
532*700637cbSDimitry Andric /// to replace them.
createWideInsts(InstrGroup & OG,InstrGroup & NG,unsigned TotalSize)533*700637cbSDimitry Andric bool HexagonLoadStoreWidening::createWideInsts(InstrGroup &OG, InstrGroup &NG,
534*700637cbSDimitry Andric                                                unsigned TotalSize) {
535*700637cbSDimitry Andric   if (Mode == WideningMode::Store) {
536*700637cbSDimitry Andric     return createWideStores(OG, NG, TotalSize);
537*700637cbSDimitry Andric   }
538*700637cbSDimitry Andric   return createWideLoads(OG, NG, TotalSize);
539*700637cbSDimitry Andric }
540*700637cbSDimitry Andric 
541*700637cbSDimitry Andric /// Given an "old group" OG of stores, create a "new group" NG of instructions
542*700637cbSDimitry Andric /// to replace them.  Ideally, NG would only have a single instruction in it,
543*700637cbSDimitry Andric /// but that may only be possible for store-immediate.
createWideStores(InstrGroup & OG,InstrGroup & NG,unsigned TotalSize)544*700637cbSDimitry Andric bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
545*700637cbSDimitry Andric                                                 unsigned TotalSize) {
546*700637cbSDimitry Andric   // XXX Current limitations:
547*700637cbSDimitry Andric   // - only handle a TotalSize of up to 8
548*700637cbSDimitry Andric 
549*700637cbSDimitry Andric   LLVM_DEBUG(dbgs() << "Creating wide stores\n");
550*700637cbSDimitry Andric   if (TotalSize > MaxWideSize)
551*700637cbSDimitry Andric     return false;
552*700637cbSDimitry Andric 
553*700637cbSDimitry Andric   uint64_t Acc = 0; // Value accumulator.
554*700637cbSDimitry Andric   unsigned Shift = 0;
555*700637cbSDimitry Andric   bool HaveImm = false;
556*700637cbSDimitry Andric   bool HaveReg = false;
557*700637cbSDimitry Andric 
558*700637cbSDimitry Andric   for (MachineInstr *MI : OG) {
559*700637cbSDimitry Andric     const MachineMemOperand &MMO = getMemTarget(MI);
560*700637cbSDimitry Andric     MachineOperand &SO = HII->isPostIncrement(*MI)
561*700637cbSDimitry Andric                              ? MI->getOperand(3)
562*700637cbSDimitry Andric                              : MI->getOperand(2); // Source.
563*700637cbSDimitry Andric     unsigned NBits;
564*700637cbSDimitry Andric     uint64_t Mask;
565*700637cbSDimitry Andric     uint64_t Val;
566*700637cbSDimitry Andric 
567*700637cbSDimitry Andric     switch (SO.getType()) {
568*700637cbSDimitry Andric     case MachineOperand::MO_Immediate:
569*700637cbSDimitry Andric       LLVM_DEBUG(dbgs() << "Have store immediate\n");
570*700637cbSDimitry Andric       HaveImm = true;
571*700637cbSDimitry Andric 
572*700637cbSDimitry Andric       NBits = MMO.getSizeInBits().toRaw();
573*700637cbSDimitry Andric       Mask = (0xFFFFFFFFFFFFFFFFU >> (64 - NBits));
574*700637cbSDimitry Andric       Val = (SO.getImm() & Mask) << Shift;
575*700637cbSDimitry Andric       Acc |= Val;
576*700637cbSDimitry Andric       Shift += NBits;
577*700637cbSDimitry Andric       break;
578*700637cbSDimitry Andric     case MachineOperand::MO_Register:
579*700637cbSDimitry Andric       HaveReg = true;
580*700637cbSDimitry Andric       break;
581*700637cbSDimitry Andric     default:
582*700637cbSDimitry Andric       LLVM_DEBUG(dbgs() << "Unhandled store\n");
583*700637cbSDimitry Andric       return false;
584*700637cbSDimitry Andric     }
585*700637cbSDimitry Andric   }
586*700637cbSDimitry Andric 
587*700637cbSDimitry Andric   if (HaveImm && HaveReg) {
588*700637cbSDimitry Andric     LLVM_DEBUG(dbgs() << "Cannot merge store register and store imm\n");
589*700637cbSDimitry Andric     return false;
590*700637cbSDimitry Andric   }
591*700637cbSDimitry Andric 
592*700637cbSDimitry Andric   MachineInstr *FirstSt = OG.front();
593*700637cbSDimitry Andric   DebugLoc DL = OG.back()->getDebugLoc();
594*700637cbSDimitry Andric   const MachineMemOperand &OldM = getMemTarget(FirstSt);
595*700637cbSDimitry Andric   MachineMemOperand *NewM =
596*700637cbSDimitry Andric       MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
597*700637cbSDimitry Andric                                TotalSize, OldM.getAlign(), OldM.getAAInfo());
598*700637cbSDimitry Andric   MachineInstr *StI;
599*700637cbSDimitry Andric   MachineOperand &MR =
600*700637cbSDimitry Andric       (HII->isPostIncrement(*FirstSt) ? FirstSt->getOperand(1)
601*700637cbSDimitry Andric                                       : FirstSt->getOperand(0));
602*700637cbSDimitry Andric   auto SecondSt = OG.back();
603*700637cbSDimitry Andric   if (HaveReg) {
604*700637cbSDimitry Andric     MachineOperand FReg =
605*700637cbSDimitry Andric         (HII->isPostIncrement(*FirstSt) ? FirstSt->getOperand(3)
606*700637cbSDimitry Andric                                         : FirstSt->getOperand(2));
607*700637cbSDimitry Andric     // Post increments appear first in the sorted group.
608*700637cbSDimitry Andric     // Cannot have a post increment for the second instruction
609*700637cbSDimitry Andric     assert(!HII->isPostIncrement(*SecondSt) && "Unexpected PostInc");
610*700637cbSDimitry Andric     MachineOperand SReg = SecondSt->getOperand(2);
611*700637cbSDimitry Andric     assert(FReg.isReg() && SReg.isReg() &&
612*700637cbSDimitry Andric            "Cannot merge store register and store imm");
613*700637cbSDimitry Andric     const MCInstrDesc &CombD = TII->get(Hexagon::A2_combinew);
614*700637cbSDimitry Andric     Register VReg =
615*700637cbSDimitry Andric         MF->getRegInfo().createVirtualRegister(&Hexagon::DoubleRegsRegClass);
616*700637cbSDimitry Andric     MachineInstr *CombI = BuildMI(*MF, DL, CombD, VReg).add(SReg).add(FReg);
617*700637cbSDimitry Andric     NG.push_back(CombI);
618*700637cbSDimitry Andric 
619*700637cbSDimitry Andric     if (FirstSt->getOpcode() == Hexagon::S2_storeri_pi) {
620*700637cbSDimitry Andric       const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_pi);
621*700637cbSDimitry Andric       auto IncDestMO = FirstSt->getOperand(0);
622*700637cbSDimitry Andric       auto IncMO = FirstSt->getOperand(2);
623*700637cbSDimitry Andric       StI =
624*700637cbSDimitry Andric           BuildMI(*MF, DL, StD).add(IncDestMO).add(MR).add(IncMO).addReg(VReg);
625*700637cbSDimitry Andric     } else {
626*700637cbSDimitry Andric       const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_io);
627*700637cbSDimitry Andric       auto OffMO = FirstSt->getOperand(1);
628*700637cbSDimitry Andric       StI = BuildMI(*MF, DL, StD).add(MR).add(OffMO).addReg(VReg);
629*700637cbSDimitry Andric     }
630*700637cbSDimitry Andric     StI->addMemOperand(*MF, NewM);
631*700637cbSDimitry Andric     NG.push_back(StI);
632*700637cbSDimitry Andric     return true;
633*700637cbSDimitry Andric   }
634*700637cbSDimitry Andric 
635*700637cbSDimitry Andric   // Handle store immediates
636*700637cbSDimitry Andric   // There are no post increment store immediates on Hexagon
637*700637cbSDimitry Andric   assert(!HII->isPostIncrement(*FirstSt) && "Unexpected PostInc");
638*700637cbSDimitry Andric   auto Off = FirstSt->getOperand(1).getImm();
639*700637cbSDimitry Andric   if (TotalSize == 8) {
640*700637cbSDimitry Andric     // Create vreg = A2_tfrsi #Acc; nreg = combine(#s32, vreg); memd = nreg
641*700637cbSDimitry Andric     uint64_t Mask = 0xFFFFFFFFU;
642*700637cbSDimitry Andric     int LowerAcc = int(Mask & Acc);
643*700637cbSDimitry Andric     int UpperAcc = Acc >> 32;
644*700637cbSDimitry Andric     Register DReg =
645*700637cbSDimitry Andric         MF->getRegInfo().createVirtualRegister(&Hexagon::DoubleRegsRegClass);
646*700637cbSDimitry Andric     MachineInstr *CombI;
647*700637cbSDimitry Andric     if (Acc != 0) {
648*700637cbSDimitry Andric       const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
649*700637cbSDimitry Andric       const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
650*700637cbSDimitry Andric       Register VReg = MF->getRegInfo().createVirtualRegister(RC);
651*700637cbSDimitry Andric       MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(LowerAcc);
652*700637cbSDimitry Andric       NG.push_back(TfrI);
653*700637cbSDimitry Andric       const MCInstrDesc &CombD = TII->get(Hexagon::A4_combineir);
654*700637cbSDimitry Andric       CombI = BuildMI(*MF, DL, CombD, DReg)
655*700637cbSDimitry Andric                   .addImm(UpperAcc)
656*700637cbSDimitry Andric                   .addReg(VReg, RegState::Kill);
657*700637cbSDimitry Andric     }
658*700637cbSDimitry Andric     // If immediates are 0, we do not need A2_tfrsi
659*700637cbSDimitry Andric     else {
660*700637cbSDimitry Andric       const MCInstrDesc &CombD = TII->get(Hexagon::A4_combineii);
661*700637cbSDimitry Andric       CombI = BuildMI(*MF, DL, CombD, DReg).addImm(0).addImm(0);
662*700637cbSDimitry Andric     }
663*700637cbSDimitry Andric     NG.push_back(CombI);
664*700637cbSDimitry Andric     const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_io);
665*700637cbSDimitry Andric     StI =
666*700637cbSDimitry Andric         BuildMI(*MF, DL, StD).add(MR).addImm(Off).addReg(DReg, RegState::Kill);
667*700637cbSDimitry Andric   } else if (Acc < 0x10000) {
668*700637cbSDimitry Andric     // Create mem[hw] = #Acc
669*700637cbSDimitry Andric     unsigned WOpc = (TotalSize == 2)   ? Hexagon::S4_storeirh_io
670*700637cbSDimitry Andric                     : (TotalSize == 4) ? Hexagon::S4_storeiri_io
671*700637cbSDimitry Andric                                        : 0;
672*700637cbSDimitry Andric     assert(WOpc && "Unexpected size");
673*700637cbSDimitry Andric 
674*700637cbSDimitry Andric     int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc);
675*700637cbSDimitry Andric     const MCInstrDesc &StD = TII->get(WOpc);
676*700637cbSDimitry Andric     StI = BuildMI(*MF, DL, StD).add(MR).addImm(Off).addImm(Val);
677*700637cbSDimitry Andric   } else {
678*700637cbSDimitry Andric     // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
679*700637cbSDimitry Andric     const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
680*700637cbSDimitry Andric     const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
681*700637cbSDimitry Andric     Register VReg = MF->getRegInfo().createVirtualRegister(RC);
682*700637cbSDimitry Andric     MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(int(Acc));
683*700637cbSDimitry Andric     NG.push_back(TfrI);
684*700637cbSDimitry Andric 
685*700637cbSDimitry Andric     unsigned WOpc = (TotalSize == 2)   ? Hexagon::S2_storerh_io
686*700637cbSDimitry Andric                     : (TotalSize == 4) ? Hexagon::S2_storeri_io
687*700637cbSDimitry Andric                                        : 0;
688*700637cbSDimitry Andric     assert(WOpc && "Unexpected size");
689*700637cbSDimitry Andric 
690*700637cbSDimitry Andric     const MCInstrDesc &StD = TII->get(WOpc);
691*700637cbSDimitry Andric     StI =
692*700637cbSDimitry Andric         BuildMI(*MF, DL, StD).add(MR).addImm(Off).addReg(VReg, RegState::Kill);
693*700637cbSDimitry Andric   }
694*700637cbSDimitry Andric   StI->addMemOperand(*MF, NewM);
695*700637cbSDimitry Andric   NG.push_back(StI);
696*700637cbSDimitry Andric 
697*700637cbSDimitry Andric   return true;
698*700637cbSDimitry Andric }
699*700637cbSDimitry Andric 
700*700637cbSDimitry Andric /// Given an "old group" OG of loads, create a "new group" NG of instructions
701*700637cbSDimitry Andric /// to replace them.  Ideally, NG would only have a single instruction in it,
702*700637cbSDimitry Andric /// but that may only be possible for double register loads.
createWideLoads(InstrGroup & OG,InstrGroup & NG,unsigned TotalSize)703*700637cbSDimitry Andric bool HexagonLoadStoreWidening::createWideLoads(InstrGroup &OG, InstrGroup &NG,
704*700637cbSDimitry Andric                                                unsigned TotalSize) {
705*700637cbSDimitry Andric   LLVM_DEBUG(dbgs() << "Creating wide loads\n");
706*700637cbSDimitry Andric   // XXX Current limitations:
707*700637cbSDimitry Andric   // - only expect stores of immediate values in OG,
708*700637cbSDimitry Andric   // - only handle a TotalSize of up to 8
709*700637cbSDimitry Andric   if (TotalSize > MaxWideSize)
710*700637cbSDimitry Andric     return false;
711*700637cbSDimitry Andric   assert(OG.size() == 2 && "Expecting two elements in Instruction Group.");
712*700637cbSDimitry Andric 
713*700637cbSDimitry Andric   MachineInstr *FirstLd = OG.front();
714*700637cbSDimitry Andric   const MachineMemOperand &OldM = getMemTarget(FirstLd);
715*700637cbSDimitry Andric   MachineMemOperand *NewM =
716*700637cbSDimitry Andric       MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
717*700637cbSDimitry Andric                                TotalSize, OldM.getAlign(), OldM.getAAInfo());
718*700637cbSDimitry Andric 
719*700637cbSDimitry Andric   MachineOperand &MR = FirstLd->getOperand(0);
720*700637cbSDimitry Andric   MachineOperand &MRBase =
721*700637cbSDimitry Andric       (HII->isPostIncrement(*FirstLd) ? FirstLd->getOperand(2)
722*700637cbSDimitry Andric                                       : FirstLd->getOperand(1));
723*700637cbSDimitry Andric   DebugLoc DL = OG.back()->getDebugLoc();
724*700637cbSDimitry Andric 
725*700637cbSDimitry Andric   // Create the double register Load Instruction.
726*700637cbSDimitry Andric   Register NewMR = MRI->createVirtualRegister(&Hexagon::DoubleRegsRegClass);
727*700637cbSDimitry Andric   MachineInstr *LdI;
728*700637cbSDimitry Andric 
729*700637cbSDimitry Andric   // Post increments appear first in the sorted group
730*700637cbSDimitry Andric   if (FirstLd->getOpcode() == Hexagon::L2_loadri_pi) {
731*700637cbSDimitry Andric     auto IncDestMO = FirstLd->getOperand(1);
732*700637cbSDimitry Andric     auto IncMO = FirstLd->getOperand(3);
733*700637cbSDimitry Andric     LdI = BuildMI(*MF, DL, TII->get(Hexagon::L2_loadrd_pi))
734*700637cbSDimitry Andric               .addDef(NewMR, getKillRegState(MR.isKill()), MR.getSubReg())
735*700637cbSDimitry Andric               .add(IncDestMO)
736*700637cbSDimitry Andric               .add(MRBase)
737*700637cbSDimitry Andric               .add(IncMO);
738*700637cbSDimitry Andric     LdI->addMemOperand(*MF, NewM);
739*700637cbSDimitry Andric   } else {
740*700637cbSDimitry Andric     auto OffMO = FirstLd->getOperand(2);
741*700637cbSDimitry Andric     LdI = BuildMI(*MF, DL, TII->get(Hexagon::L2_loadrd_io))
742*700637cbSDimitry Andric               .addDef(NewMR, getKillRegState(MR.isKill()), MR.getSubReg())
743*700637cbSDimitry Andric               .add(MRBase)
744*700637cbSDimitry Andric               .add(OffMO);
745*700637cbSDimitry Andric     LdI->addMemOperand(*MF, NewM);
746*700637cbSDimitry Andric   }
747*700637cbSDimitry Andric   NG.push_back(LdI);
748*700637cbSDimitry Andric 
749*700637cbSDimitry Andric   auto getHalfReg = [&](MachineInstr *DoubleReg, unsigned SubReg,
750*700637cbSDimitry Andric                         MachineInstr *DstReg) {
751*700637cbSDimitry Andric     Register DestReg = DstReg->getOperand(0).getReg();
752*700637cbSDimitry Andric     return BuildMI(*MF, DL, TII->get(Hexagon::COPY), DestReg)
753*700637cbSDimitry Andric         .addReg(NewMR, getKillRegState(LdI->isKill()), SubReg);
754*700637cbSDimitry Andric   };
755*700637cbSDimitry Andric 
756*700637cbSDimitry Andric   MachineInstr *LdI_lo = getHalfReg(LdI, Hexagon::isub_lo, FirstLd);
757*700637cbSDimitry Andric   MachineInstr *LdI_hi = getHalfReg(LdI, Hexagon::isub_hi, OG.back());
758*700637cbSDimitry Andric   NG.push_back(LdI_lo);
759*700637cbSDimitry Andric   NG.push_back(LdI_hi);
760*700637cbSDimitry Andric 
761*700637cbSDimitry Andric   return true;
762*700637cbSDimitry Andric }
763*700637cbSDimitry Andric 
764*700637cbSDimitry Andric // Replace instructions from the old group OG with instructions from the
765*700637cbSDimitry Andric // new group NG.  Conceptually, remove all instructions in OG, and then
766*700637cbSDimitry Andric // insert all instructions in NG, starting at where the first instruction
767*700637cbSDimitry Andric // from OG was (in the order in which they appeared in the basic block).
768*700637cbSDimitry Andric // (The ordering in OG does not have to match the order in the basic block.)
replaceInsts(InstrGroup & OG,InstrGroup & NG)769*700637cbSDimitry Andric bool HexagonLoadStoreWidening::replaceInsts(InstrGroup &OG, InstrGroup &NG) {
770*700637cbSDimitry Andric   LLVM_DEBUG({
771*700637cbSDimitry Andric     dbgs() << "Replacing:\n";
772*700637cbSDimitry Andric     for (auto I : OG)
773*700637cbSDimitry Andric       dbgs() << "  " << *I;
774*700637cbSDimitry Andric     dbgs() << "with\n";
775*700637cbSDimitry Andric     for (auto I : NG)
776*700637cbSDimitry Andric       dbgs() << "  " << *I;
777*700637cbSDimitry Andric   });
778*700637cbSDimitry Andric 
779*700637cbSDimitry Andric   MachineBasicBlock *MBB = OG.back()->getParent();
780*700637cbSDimitry Andric   MachineBasicBlock::iterator InsertAt = MBB->end();
781*700637cbSDimitry Andric 
782*700637cbSDimitry Andric   // Need to establish the insertion point.
783*700637cbSDimitry Andric   // For loads the best one is right before the first load in the OG,
784*700637cbSDimitry Andric   // but in the order in which the insts occur in the program list.
785*700637cbSDimitry Andric   // For stores the best point is right after the last store in the OG.
786*700637cbSDimitry Andric   // Since the ordering in OG does not correspond
787*700637cbSDimitry Andric   // to the order in the program list, we need to do some work to find
788*700637cbSDimitry Andric   // the insertion point.
789*700637cbSDimitry Andric 
790*700637cbSDimitry Andric   // Create a set of all instructions in OG (for quick lookup).
791*700637cbSDimitry Andric   InstrSet OldMemInsts(llvm::from_range, OG);
792*700637cbSDimitry Andric 
793*700637cbSDimitry Andric   if (Mode == WideningMode::Load) {
794*700637cbSDimitry Andric     // Find the first load instruction in the block that is present in OG.
795*700637cbSDimitry Andric     for (auto &I : *MBB) {
796*700637cbSDimitry Andric       if (OldMemInsts.count(&I)) {
797*700637cbSDimitry Andric         InsertAt = I;
798*700637cbSDimitry Andric         break;
799*700637cbSDimitry Andric       }
800*700637cbSDimitry Andric     }
801*700637cbSDimitry Andric 
802*700637cbSDimitry Andric     assert((InsertAt != MBB->end()) && "Cannot locate any load from the group");
803*700637cbSDimitry Andric 
804*700637cbSDimitry Andric     for (auto *I : NG)
805*700637cbSDimitry Andric       MBB->insert(InsertAt, I);
806*700637cbSDimitry Andric   } else {
807*700637cbSDimitry Andric     // Find the last store instruction in the block that is present in OG.
808*700637cbSDimitry Andric     auto I = MBB->rbegin();
809*700637cbSDimitry Andric     for (; I != MBB->rend(); ++I) {
810*700637cbSDimitry Andric       if (OldMemInsts.count(&(*I))) {
811*700637cbSDimitry Andric         InsertAt = (*I).getIterator();
812*700637cbSDimitry Andric         break;
813*700637cbSDimitry Andric       }
814*700637cbSDimitry Andric     }
815*700637cbSDimitry Andric 
816*700637cbSDimitry Andric     assert((I != MBB->rend()) && "Cannot locate any store from the group");
817*700637cbSDimitry Andric 
818*700637cbSDimitry Andric     for (auto I = NG.rbegin(); I != NG.rend(); ++I)
819*700637cbSDimitry Andric       MBB->insertAfter(InsertAt, *I);
820*700637cbSDimitry Andric   }
821*700637cbSDimitry Andric 
822*700637cbSDimitry Andric   for (auto *I : OG)
823*700637cbSDimitry Andric     I->eraseFromParent();
824*700637cbSDimitry Andric 
825*700637cbSDimitry Andric   return true;
826*700637cbSDimitry Andric }
827*700637cbSDimitry Andric 
828*700637cbSDimitry Andric // Break up the group into smaller groups, each of which can be replaced by
829*700637cbSDimitry Andric // a single wide load/store.  Widen each such smaller group and replace the old
830*700637cbSDimitry Andric // instructions with the widened ones.
processGroup(InstrGroup & Group)831*700637cbSDimitry Andric bool HexagonLoadStoreWidening::processGroup(InstrGroup &Group) {
832*700637cbSDimitry Andric   bool Changed = false;
833*700637cbSDimitry Andric   InstrGroup::iterator I = Group.begin(), E = Group.end();
834*700637cbSDimitry Andric   InstrGroup OG, NG; // Old and new groups.
835*700637cbSDimitry Andric   unsigned CollectedSize;
836*700637cbSDimitry Andric 
837*700637cbSDimitry Andric   while (I != E) {
838*700637cbSDimitry Andric     OG.clear();
839*700637cbSDimitry Andric     NG.clear();
840*700637cbSDimitry Andric 
841*700637cbSDimitry Andric     bool Succ = selectInsts(I++, E, OG, CollectedSize, MaxWideSize) &&
842*700637cbSDimitry Andric                 createWideInsts(OG, NG, CollectedSize) && replaceInsts(OG, NG);
843*700637cbSDimitry Andric     if (!Succ)
844*700637cbSDimitry Andric       continue;
845*700637cbSDimitry Andric 
846*700637cbSDimitry Andric     assert(OG.size() > 1 && "Created invalid group");
847*700637cbSDimitry Andric     assert(std::distance(I, E) + 1 >= int(OG.size()) && "Too many elements");
848*700637cbSDimitry Andric     I += OG.size() - 1;
849*700637cbSDimitry Andric 
850*700637cbSDimitry Andric     Changed = true;
851*700637cbSDimitry Andric   }
852*700637cbSDimitry Andric 
853*700637cbSDimitry Andric   return Changed;
854*700637cbSDimitry Andric }
855*700637cbSDimitry Andric 
856*700637cbSDimitry Andric // Process a single basic block: create the load/store groups, and replace them
857*700637cbSDimitry Andric // with the widened insts, if possible.  Processing of each basic block
858*700637cbSDimitry Andric // is independent from processing of any other basic block.  This transfor-
859*700637cbSDimitry Andric // mation could be stopped after having processed any basic block without
860*700637cbSDimitry Andric // any ill effects (other than not having performed widening in the unpro-
861*700637cbSDimitry Andric // cessed blocks).  Also, the basic blocks can be processed in any order.
processBasicBlock(MachineBasicBlock & MBB)862*700637cbSDimitry Andric bool HexagonLoadStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
863*700637cbSDimitry Andric   InstrGroupList SGs;
864*700637cbSDimitry Andric   bool Changed = false;
865*700637cbSDimitry Andric 
866*700637cbSDimitry Andric   // To prevent long compile time check for max BB size.
867*700637cbSDimitry Andric   if (MBB.size() > MaxMBBSizeForLoadStoreWidening)
868*700637cbSDimitry Andric     return false;
869*700637cbSDimitry Andric 
870*700637cbSDimitry Andric   createGroups(MBB, SGs);
871*700637cbSDimitry Andric 
872*700637cbSDimitry Andric   auto Less = [this](const MachineInstr *A, const MachineInstr *B) -> bool {
873*700637cbSDimitry Andric     return getOffset(A) < getOffset(B);
874*700637cbSDimitry Andric   };
875*700637cbSDimitry Andric   for (auto &G : SGs) {
876*700637cbSDimitry Andric     assert(G.size() > 1 && "Group with fewer than 2 elements");
877*700637cbSDimitry Andric     llvm::sort(G, Less);
878*700637cbSDimitry Andric 
879*700637cbSDimitry Andric     Changed |= processGroup(G);
880*700637cbSDimitry Andric   }
881*700637cbSDimitry Andric 
882*700637cbSDimitry Andric   return Changed;
883*700637cbSDimitry Andric }
884*700637cbSDimitry Andric 
run()885*700637cbSDimitry Andric bool HexagonLoadStoreWidening::run() {
886*700637cbSDimitry Andric   bool Changed = false;
887*700637cbSDimitry Andric 
888*700637cbSDimitry Andric   for (auto &B : *MF)
889*700637cbSDimitry Andric     Changed |= processBasicBlock(B);
890*700637cbSDimitry Andric 
891*700637cbSDimitry Andric   return Changed;
892*700637cbSDimitry Andric }
893*700637cbSDimitry Andric 
createHexagonStoreWidening()894*700637cbSDimitry Andric FunctionPass *llvm::createHexagonStoreWidening() {
895*700637cbSDimitry Andric   return new HexagonStoreWidening();
896*700637cbSDimitry Andric }
897*700637cbSDimitry Andric 
createHexagonLoadWidening()898*700637cbSDimitry Andric FunctionPass *llvm::createHexagonLoadWidening() {
899*700637cbSDimitry Andric   return new HexagonLoadWidening();
900*700637cbSDimitry Andric }
901