1*700637cbSDimitry Andric //===---HexagonLoadStoreWidening.cpp---------------------------------------===//
2*700637cbSDimitry Andric //
3*700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*700637cbSDimitry Andric //
7*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8*700637cbSDimitry Andric // HexagonStoreWidening:
9*700637cbSDimitry Andric // Replace sequences of "narrow" stores to adjacent memory locations with
10*700637cbSDimitry Andric // a fewer "wide" stores that have the same effect.
11*700637cbSDimitry Andric // For example, replace:
12*700637cbSDimitry Andric // S4_storeirb_io %100, 0, 0 ; store-immediate-byte
13*700637cbSDimitry Andric // S4_storeirb_io %100, 1, 0 ; store-immediate-byte
14*700637cbSDimitry Andric // with
15*700637cbSDimitry Andric // S4_storeirh_io %100, 0, 0 ; store-immediate-halfword
16*700637cbSDimitry Andric // The above is the general idea. The actual cases handled by the code
17*700637cbSDimitry Andric // may be a bit more complex.
18*700637cbSDimitry Andric // The purpose of this pass is to reduce the number of outstanding stores,
19*700637cbSDimitry Andric // or as one could say, "reduce store queue pressure". Also, wide stores
20*700637cbSDimitry Andric // mean fewer stores, and since there are only two memory instructions allowed
21*700637cbSDimitry Andric // per packet, it also means fewer packets, and ultimately fewer cycles.
22*700637cbSDimitry Andric //
23*700637cbSDimitry Andric // HexagonLoadWidening does the same thing as HexagonStoreWidening but
24*700637cbSDimitry Andric // for Loads. Here, we try to replace 4-byte Loads with register-pair loads.
25*700637cbSDimitry Andric // For example:
26*700637cbSDimitry Andric // Replace
27*700637cbSDimitry Andric // %2:intregs = L2_loadri_io %1:intregs, 0 :: (load (s32) from %ptr1, align 8)
28*700637cbSDimitry Andric // %3:intregs = L2_loadri_io %1:intregs, 4 :: (load (s32) from %ptr2)
29*700637cbSDimitry Andric // with
30*700637cbSDimitry Andric // %4:doubleregs = L2_loadrd_io %1:intregs, 0 :: (load (s64) from %ptr1)
31*700637cbSDimitry Andric // %2:intregs = COPY %4.isub_lo:doubleregs
32*700637cbSDimitry Andric // %3:intregs = COPY %4.isub_hi:doubleregs
33*700637cbSDimitry Andric //
34*700637cbSDimitry Andric // LoadWidening for 8 and 16-bit loads is not useful as we end up generating 2N
35*700637cbSDimitry Andric // insts to replace N loads: 1 widened load, N bitwise and, N - 1 shifts
36*700637cbSDimitry Andric
37*700637cbSDimitry Andric //===---------------------------------------------------------------------===//
38*700637cbSDimitry Andric
39*700637cbSDimitry Andric #include "Hexagon.h"
40*700637cbSDimitry Andric #include "HexagonInstrInfo.h"
41*700637cbSDimitry Andric #include "HexagonRegisterInfo.h"
42*700637cbSDimitry Andric #include "HexagonSubtarget.h"
43*700637cbSDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
44*700637cbSDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
45*700637cbSDimitry Andric #include "llvm/Analysis/MemoryLocation.h"
46*700637cbSDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
47*700637cbSDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
48*700637cbSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
49*700637cbSDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
50*700637cbSDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
51*700637cbSDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h"
52*700637cbSDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
53*700637cbSDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
54*700637cbSDimitry Andric #include "llvm/IR/DebugLoc.h"
55*700637cbSDimitry Andric #include "llvm/InitializePasses.h"
56*700637cbSDimitry Andric #include "llvm/MC/MCInstrDesc.h"
57*700637cbSDimitry Andric #include "llvm/Pass.h"
58*700637cbSDimitry Andric #include "llvm/Support/Debug.h"
59*700637cbSDimitry Andric #include "llvm/Support/ErrorHandling.h"
60*700637cbSDimitry Andric #include "llvm/Support/MathExtras.h"
61*700637cbSDimitry Andric #include "llvm/Support/raw_ostream.h"
62*700637cbSDimitry Andric #include <algorithm>
63*700637cbSDimitry Andric #include <cassert>
64*700637cbSDimitry Andric #include <cstdint>
65*700637cbSDimitry Andric #include <iterator>
66*700637cbSDimitry Andric
67*700637cbSDimitry Andric using namespace llvm;
68*700637cbSDimitry Andric
69*700637cbSDimitry Andric #define DEBUG_TYPE "hexagon-load-store-widening"
70*700637cbSDimitry Andric
71*700637cbSDimitry Andric static cl::opt<unsigned> MaxMBBSizeForLoadStoreWidening(
72*700637cbSDimitry Andric "max-bb-size-for-load-store-widening", cl::Hidden, cl::init(1000),
73*700637cbSDimitry Andric cl::desc("Limit block size to analyze in load/store widening pass"));
74*700637cbSDimitry Andric
75*700637cbSDimitry Andric namespace {
76*700637cbSDimitry Andric
77*700637cbSDimitry Andric struct HexagonLoadStoreWidening {
78*700637cbSDimitry Andric enum WideningMode { Store, Load };
79*700637cbSDimitry Andric const HexagonInstrInfo *TII;
80*700637cbSDimitry Andric const HexagonRegisterInfo *TRI;
81*700637cbSDimitry Andric MachineRegisterInfo *MRI;
82*700637cbSDimitry Andric AliasAnalysis *AA;
83*700637cbSDimitry Andric MachineFunction *MF;
84*700637cbSDimitry Andric
85*700637cbSDimitry Andric public:
HexagonLoadStoreWidening__anon01fcdbeb0111::HexagonLoadStoreWidening86*700637cbSDimitry Andric HexagonLoadStoreWidening(const HexagonInstrInfo *TII,
87*700637cbSDimitry Andric const HexagonRegisterInfo *TRI,
88*700637cbSDimitry Andric MachineRegisterInfo *MRI, AliasAnalysis *AA,
89*700637cbSDimitry Andric MachineFunction *MF, bool StoreMode)
90*700637cbSDimitry Andric : TII(TII), TRI(TRI), MRI(MRI), AA(AA), MF(MF),
91*700637cbSDimitry Andric Mode(StoreMode ? WideningMode::Store : WideningMode::Load),
92*700637cbSDimitry Andric HII(MF->getSubtarget<HexagonSubtarget>().getInstrInfo()) {}
93*700637cbSDimitry Andric
94*700637cbSDimitry Andric bool run();
95*700637cbSDimitry Andric
96*700637cbSDimitry Andric private:
97*700637cbSDimitry Andric const bool Mode;
98*700637cbSDimitry Andric const unsigned MaxWideSize = 8;
99*700637cbSDimitry Andric const HexagonInstrInfo *HII = nullptr;
100*700637cbSDimitry Andric
101*700637cbSDimitry Andric using InstrSet = SmallPtrSet<MachineInstr *, 16>;
102*700637cbSDimitry Andric using InstrGroup = SmallVector<MachineInstr *, 8>;
103*700637cbSDimitry Andric using InstrGroupList = SmallVector<InstrGroup, 8>;
104*700637cbSDimitry Andric
105*700637cbSDimitry Andric InstrSet ProcessedInsts;
106*700637cbSDimitry Andric
107*700637cbSDimitry Andric unsigned getBaseAddressRegister(const MachineInstr *MI);
108*700637cbSDimitry Andric int64_t getOffset(const MachineInstr *MI);
109*700637cbSDimitry Andric int64_t getPostIncrementValue(const MachineInstr *MI);
110*700637cbSDimitry Andric bool handledInstType(const MachineInstr *MI);
111*700637cbSDimitry Andric
112*700637cbSDimitry Andric void createGroup(MachineInstr *BaseInst, InstrGroup &Group);
113*700637cbSDimitry Andric void createGroups(MachineBasicBlock &MBB, InstrGroupList &StoreGroups);
114*700637cbSDimitry Andric bool processBasicBlock(MachineBasicBlock &MBB);
115*700637cbSDimitry Andric bool processGroup(InstrGroup &Group);
116*700637cbSDimitry Andric bool selectInsts(InstrGroup::iterator Begin, InstrGroup::iterator End,
117*700637cbSDimitry Andric InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize);
118*700637cbSDimitry Andric bool createWideInsts(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
119*700637cbSDimitry Andric bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
120*700637cbSDimitry Andric bool createWideLoads(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
121*700637cbSDimitry Andric bool replaceInsts(InstrGroup &OG, InstrGroup &NG);
122*700637cbSDimitry Andric bool areAdjacent(const MachineInstr *S1, const MachineInstr *S2);
123*700637cbSDimitry Andric bool canSwapInstructions(const MachineInstr *A, const MachineInstr *B);
124*700637cbSDimitry Andric };
125*700637cbSDimitry Andric
126*700637cbSDimitry Andric struct HexagonStoreWidening : public MachineFunctionPass {
127*700637cbSDimitry Andric static char ID;
128*700637cbSDimitry Andric
HexagonStoreWidening__anon01fcdbeb0111::HexagonStoreWidening129*700637cbSDimitry Andric HexagonStoreWidening() : MachineFunctionPass(ID) {}
130*700637cbSDimitry Andric
getPassName__anon01fcdbeb0111::HexagonStoreWidening131*700637cbSDimitry Andric StringRef getPassName() const override { return "Hexagon Store Widening"; }
132*700637cbSDimitry Andric
getAnalysisUsage__anon01fcdbeb0111::HexagonStoreWidening133*700637cbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
134*700637cbSDimitry Andric AU.addRequired<AAResultsWrapperPass>();
135*700637cbSDimitry Andric AU.addPreserved<AAResultsWrapperPass>();
136*700637cbSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
137*700637cbSDimitry Andric }
138*700637cbSDimitry Andric
runOnMachineFunction__anon01fcdbeb0111::HexagonStoreWidening139*700637cbSDimitry Andric bool runOnMachineFunction(MachineFunction &MFn) override {
140*700637cbSDimitry Andric if (skipFunction(MFn.getFunction()))
141*700637cbSDimitry Andric return false;
142*700637cbSDimitry Andric
143*700637cbSDimitry Andric auto &ST = MFn.getSubtarget<HexagonSubtarget>();
144*700637cbSDimitry Andric const HexagonInstrInfo *TII = ST.getInstrInfo();
145*700637cbSDimitry Andric const HexagonRegisterInfo *TRI = ST.getRegisterInfo();
146*700637cbSDimitry Andric MachineRegisterInfo *MRI = &MFn.getRegInfo();
147*700637cbSDimitry Andric AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
148*700637cbSDimitry Andric
149*700637cbSDimitry Andric return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, true).run();
150*700637cbSDimitry Andric }
151*700637cbSDimitry Andric };
152*700637cbSDimitry Andric
153*700637cbSDimitry Andric struct HexagonLoadWidening : public MachineFunctionPass {
154*700637cbSDimitry Andric static char ID;
155*700637cbSDimitry Andric
HexagonLoadWidening__anon01fcdbeb0111::HexagonLoadWidening156*700637cbSDimitry Andric HexagonLoadWidening() : MachineFunctionPass(ID) {}
157*700637cbSDimitry Andric
getPassName__anon01fcdbeb0111::HexagonLoadWidening158*700637cbSDimitry Andric StringRef getPassName() const override { return "Hexagon Load Widening"; }
159*700637cbSDimitry Andric
getAnalysisUsage__anon01fcdbeb0111::HexagonLoadWidening160*700637cbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
161*700637cbSDimitry Andric AU.addRequired<AAResultsWrapperPass>();
162*700637cbSDimitry Andric AU.addPreserved<AAResultsWrapperPass>();
163*700637cbSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
164*700637cbSDimitry Andric }
165*700637cbSDimitry Andric
runOnMachineFunction__anon01fcdbeb0111::HexagonLoadWidening166*700637cbSDimitry Andric bool runOnMachineFunction(MachineFunction &MFn) override {
167*700637cbSDimitry Andric if (skipFunction(MFn.getFunction()))
168*700637cbSDimitry Andric return false;
169*700637cbSDimitry Andric
170*700637cbSDimitry Andric auto &ST = MFn.getSubtarget<HexagonSubtarget>();
171*700637cbSDimitry Andric const HexagonInstrInfo *TII = ST.getInstrInfo();
172*700637cbSDimitry Andric const HexagonRegisterInfo *TRI = ST.getRegisterInfo();
173*700637cbSDimitry Andric MachineRegisterInfo *MRI = &MFn.getRegInfo();
174*700637cbSDimitry Andric AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
175*700637cbSDimitry Andric return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, false).run();
176*700637cbSDimitry Andric }
177*700637cbSDimitry Andric };
178*700637cbSDimitry Andric
179*700637cbSDimitry Andric char HexagonStoreWidening::ID = 0;
180*700637cbSDimitry Andric char HexagonLoadWidening::ID = 0;
181*700637cbSDimitry Andric
182*700637cbSDimitry Andric } // end anonymous namespace
183*700637cbSDimitry Andric
184*700637cbSDimitry Andric INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
185*700637cbSDimitry Andric "Hexagon Store Widening", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)186*700637cbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
187*700637cbSDimitry Andric INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
188*700637cbSDimitry Andric "Hexagon Store Widening", false, false)
189*700637cbSDimitry Andric
190*700637cbSDimitry Andric INITIALIZE_PASS_BEGIN(HexagonLoadWidening, "hexagon-widen-loads",
191*700637cbSDimitry Andric "Hexagon Load Widening", false, false)
192*700637cbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
193*700637cbSDimitry Andric INITIALIZE_PASS_END(HexagonLoadWidening, "hexagon-widen-loads",
194*700637cbSDimitry Andric "Hexagon Load Widening", false, false)
195*700637cbSDimitry Andric
196*700637cbSDimitry Andric static const MachineMemOperand &getMemTarget(const MachineInstr *MI) {
197*700637cbSDimitry Andric assert(!MI->memoperands_empty() && "Expecting memory operands");
198*700637cbSDimitry Andric return **MI->memoperands_begin();
199*700637cbSDimitry Andric }
200*700637cbSDimitry Andric
201*700637cbSDimitry Andric unsigned
getBaseAddressRegister(const MachineInstr * MI)202*700637cbSDimitry Andric HexagonLoadStoreWidening::getBaseAddressRegister(const MachineInstr *MI) {
203*700637cbSDimitry Andric assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode");
204*700637cbSDimitry Andric unsigned Base, Offset;
205*700637cbSDimitry Andric HII->getBaseAndOffsetPosition(*MI, Base, Offset);
206*700637cbSDimitry Andric const MachineOperand &MO = MI->getOperand(Base);
207*700637cbSDimitry Andric assert(MO.isReg() && "Expecting register operand");
208*700637cbSDimitry Andric return MO.getReg();
209*700637cbSDimitry Andric }
210*700637cbSDimitry Andric
getOffset(const MachineInstr * MI)211*700637cbSDimitry Andric int64_t HexagonLoadStoreWidening::getOffset(const MachineInstr *MI) {
212*700637cbSDimitry Andric assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode");
213*700637cbSDimitry Andric
214*700637cbSDimitry Andric // On Hexagon, post-incs always have an offset of 0
215*700637cbSDimitry Andric // There is no Offset operand to post-incs
216*700637cbSDimitry Andric if (HII->isPostIncrement(*MI))
217*700637cbSDimitry Andric return 0;
218*700637cbSDimitry Andric
219*700637cbSDimitry Andric unsigned Base, Offset;
220*700637cbSDimitry Andric
221*700637cbSDimitry Andric HII->getBaseAndOffsetPosition(*MI, Base, Offset);
222*700637cbSDimitry Andric const MachineOperand &MO = MI->getOperand(Offset);
223*700637cbSDimitry Andric switch (MO.getType()) {
224*700637cbSDimitry Andric case MachineOperand::MO_Immediate:
225*700637cbSDimitry Andric return MO.getImm();
226*700637cbSDimitry Andric case MachineOperand::MO_GlobalAddress:
227*700637cbSDimitry Andric return MO.getOffset();
228*700637cbSDimitry Andric default:
229*700637cbSDimitry Andric break;
230*700637cbSDimitry Andric }
231*700637cbSDimitry Andric llvm_unreachable("Expecting an immediate or global operand");
232*700637cbSDimitry Andric }
233*700637cbSDimitry Andric
234*700637cbSDimitry Andric inline int64_t
getPostIncrementValue(const MachineInstr * MI)235*700637cbSDimitry Andric HexagonLoadStoreWidening::getPostIncrementValue(const MachineInstr *MI) {
236*700637cbSDimitry Andric unsigned Base, PostIncIdx;
237*700637cbSDimitry Andric HII->getBaseAndOffsetPosition(*MI, Base, PostIncIdx);
238*700637cbSDimitry Andric const MachineOperand &MO = MI->getOperand(PostIncIdx);
239*700637cbSDimitry Andric return MO.getImm();
240*700637cbSDimitry Andric }
241*700637cbSDimitry Andric
242*700637cbSDimitry Andric // Filtering function: any loads/stores whose opcodes are not "approved" of by
243*700637cbSDimitry Andric // this function will not be subjected to widening.
handledInstType(const MachineInstr * MI)244*700637cbSDimitry Andric inline bool HexagonLoadStoreWidening::handledInstType(const MachineInstr *MI) {
245*700637cbSDimitry Andric unsigned Opc = MI->getOpcode();
246*700637cbSDimitry Andric if (Mode == WideningMode::Store) {
247*700637cbSDimitry Andric switch (Opc) {
248*700637cbSDimitry Andric case Hexagon::S4_storeirb_io:
249*700637cbSDimitry Andric case Hexagon::S4_storeirh_io:
250*700637cbSDimitry Andric case Hexagon::S4_storeiri_io:
251*700637cbSDimitry Andric case Hexagon::S2_storeri_io:
252*700637cbSDimitry Andric // Base address must be a register. (Implement FI later.)
253*700637cbSDimitry Andric return MI->getOperand(0).isReg();
254*700637cbSDimitry Andric case Hexagon::S2_storeri_pi:
255*700637cbSDimitry Andric return MI->getOperand(1).isReg();
256*700637cbSDimitry Andric }
257*700637cbSDimitry Andric } else {
258*700637cbSDimitry Andric // LoadWidening for 8 and 16 bit loads needs 2x instructions to replace x
259*700637cbSDimitry Andric // loads. So we only widen 32 bit loads as we don't need to select the
260*700637cbSDimitry Andric // right bits with AND & SHIFT ops.
261*700637cbSDimitry Andric switch (Opc) {
262*700637cbSDimitry Andric case Hexagon::L2_loadri_io:
263*700637cbSDimitry Andric // Base address must be a register and offset must be immediate.
264*700637cbSDimitry Andric return !MI->memoperands_empty() && MI->getOperand(1).isReg() &&
265*700637cbSDimitry Andric MI->getOperand(2).isImm();
266*700637cbSDimitry Andric case Hexagon::L2_loadri_pi:
267*700637cbSDimitry Andric return !MI->memoperands_empty() && MI->getOperand(2).isReg();
268*700637cbSDimitry Andric }
269*700637cbSDimitry Andric }
270*700637cbSDimitry Andric return false;
271*700637cbSDimitry Andric }
272*700637cbSDimitry Andric
addDefsUsesToList(const MachineInstr * MI,DenseSet<Register> & RegDefs,DenseSet<Register> & RegUses)273*700637cbSDimitry Andric static void addDefsUsesToList(const MachineInstr *MI,
274*700637cbSDimitry Andric DenseSet<Register> &RegDefs,
275*700637cbSDimitry Andric DenseSet<Register> &RegUses) {
276*700637cbSDimitry Andric for (const auto &Op : MI->operands()) {
277*700637cbSDimitry Andric if (!Op.isReg())
278*700637cbSDimitry Andric continue;
279*700637cbSDimitry Andric if (Op.isDef())
280*700637cbSDimitry Andric RegDefs.insert(Op.getReg());
281*700637cbSDimitry Andric if (Op.readsReg())
282*700637cbSDimitry Andric RegUses.insert(Op.getReg());
283*700637cbSDimitry Andric }
284*700637cbSDimitry Andric }
285*700637cbSDimitry Andric
canSwapInstructions(const MachineInstr * A,const MachineInstr * B)286*700637cbSDimitry Andric bool HexagonLoadStoreWidening::canSwapInstructions(const MachineInstr *A,
287*700637cbSDimitry Andric const MachineInstr *B) {
288*700637cbSDimitry Andric DenseSet<Register> ARegDefs;
289*700637cbSDimitry Andric DenseSet<Register> ARegUses;
290*700637cbSDimitry Andric addDefsUsesToList(A, ARegDefs, ARegUses);
291*700637cbSDimitry Andric if (A->mayLoadOrStore() && B->mayLoadOrStore() &&
292*700637cbSDimitry Andric (A->mayStore() || B->mayStore()) && A->mayAlias(AA, *B, true))
293*700637cbSDimitry Andric return false;
294*700637cbSDimitry Andric for (const auto &BOp : B->operands()) {
295*700637cbSDimitry Andric if (!BOp.isReg())
296*700637cbSDimitry Andric continue;
297*700637cbSDimitry Andric if ((BOp.isDef() || BOp.readsReg()) && ARegDefs.contains(BOp.getReg()))
298*700637cbSDimitry Andric return false;
299*700637cbSDimitry Andric if (BOp.isDef() && ARegUses.contains(BOp.getReg()))
300*700637cbSDimitry Andric return false;
301*700637cbSDimitry Andric }
302*700637cbSDimitry Andric return true;
303*700637cbSDimitry Andric }
304*700637cbSDimitry Andric
305*700637cbSDimitry Andric // Inspect a machine basic block, and generate groups out of loads/stores
306*700637cbSDimitry Andric // encountered in the block.
307*700637cbSDimitry Andric //
308*700637cbSDimitry Andric // A load/store group is a group of loads or stores that use the same base
309*700637cbSDimitry Andric // register, and which can be reordered within that group without altering the
310*700637cbSDimitry Andric // semantics of the program. A single group could be widened as
311*700637cbSDimitry Andric // a whole, if there existed a single load/store instruction with the same
312*700637cbSDimitry Andric // semantics as the entire group. In many cases, a single group may need more
313*700637cbSDimitry Andric // than one wide load or store.
createGroups(MachineBasicBlock & MBB,InstrGroupList & StoreGroups)314*700637cbSDimitry Andric void HexagonLoadStoreWidening::createGroups(MachineBasicBlock &MBB,
315*700637cbSDimitry Andric InstrGroupList &StoreGroups) {
316*700637cbSDimitry Andric // Traverse all instructions and if we encounter
317*700637cbSDimitry Andric // a load/store, then try to create a group starting at that instruction
318*700637cbSDimitry Andric // i.e. a sequence of independent loads/stores that can be widened.
319*700637cbSDimitry Andric for (auto I = MBB.begin(); I != MBB.end(); ++I) {
320*700637cbSDimitry Andric MachineInstr *MI = &(*I);
321*700637cbSDimitry Andric if (!handledInstType(MI))
322*700637cbSDimitry Andric continue;
323*700637cbSDimitry Andric if (ProcessedInsts.count(MI))
324*700637cbSDimitry Andric continue;
325*700637cbSDimitry Andric
326*700637cbSDimitry Andric // Found a store. Try to create a store group.
327*700637cbSDimitry Andric InstrGroup G;
328*700637cbSDimitry Andric createGroup(MI, G);
329*700637cbSDimitry Andric if (G.size() > 1)
330*700637cbSDimitry Andric StoreGroups.push_back(G);
331*700637cbSDimitry Andric }
332*700637cbSDimitry Andric }
333*700637cbSDimitry Andric
334*700637cbSDimitry Andric // Create a single load/store group. The insts need to be independent between
335*700637cbSDimitry Andric // themselves, and also there cannot be other instructions between them
336*700637cbSDimitry Andric // that could read or modify storage being read from or stored into.
createGroup(MachineInstr * BaseInst,InstrGroup & Group)337*700637cbSDimitry Andric void HexagonLoadStoreWidening::createGroup(MachineInstr *BaseInst,
338*700637cbSDimitry Andric InstrGroup &Group) {
339*700637cbSDimitry Andric assert(handledInstType(BaseInst) && "Unexpected instruction");
340*700637cbSDimitry Andric unsigned BaseReg = getBaseAddressRegister(BaseInst);
341*700637cbSDimitry Andric InstrGroup Other;
342*700637cbSDimitry Andric
343*700637cbSDimitry Andric Group.push_back(BaseInst);
344*700637cbSDimitry Andric LLVM_DEBUG(dbgs() << "BaseInst: "; BaseInst->dump());
345*700637cbSDimitry Andric auto End = BaseInst->getParent()->end();
346*700637cbSDimitry Andric auto I = BaseInst->getIterator();
347*700637cbSDimitry Andric
348*700637cbSDimitry Andric while (true) {
349*700637cbSDimitry Andric I = std::next(I);
350*700637cbSDimitry Andric if (I == End)
351*700637cbSDimitry Andric break;
352*700637cbSDimitry Andric MachineInstr *MI = &(*I);
353*700637cbSDimitry Andric
354*700637cbSDimitry Andric // Assume calls are aliased to everything.
355*700637cbSDimitry Andric if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
356*700637cbSDimitry Andric MI->hasOrderedMemoryRef())
357*700637cbSDimitry Andric return;
358*700637cbSDimitry Andric
359*700637cbSDimitry Andric if (!handledInstType(MI)) {
360*700637cbSDimitry Andric if (MI->mayLoadOrStore())
361*700637cbSDimitry Andric Other.push_back(MI);
362*700637cbSDimitry Andric continue;
363*700637cbSDimitry Andric }
364*700637cbSDimitry Andric
365*700637cbSDimitry Andric // We have a handledInstType instruction
366*700637cbSDimitry Andric // If this load/store instruction is aliased with anything already in the
367*700637cbSDimitry Andric // group, terminate the group now.
368*700637cbSDimitry Andric for (auto GI : Group)
369*700637cbSDimitry Andric if (GI->mayAlias(AA, *MI, true))
370*700637cbSDimitry Andric return;
371*700637cbSDimitry Andric if (Mode == WideningMode::Load) {
372*700637cbSDimitry Andric // Check if current load MI can be moved to the first load instruction
373*700637cbSDimitry Andric // in Group. If any load instruction aliases with memory instructions in
374*700637cbSDimitry Andric // Other, terminate the group.
375*700637cbSDimitry Andric for (auto MemI : Other)
376*700637cbSDimitry Andric if (!canSwapInstructions(MI, MemI))
377*700637cbSDimitry Andric return;
378*700637cbSDimitry Andric } else {
379*700637cbSDimitry Andric // Check if store instructions in the group can be moved to current
380*700637cbSDimitry Andric // store MI. If any store instruction aliases with memory instructions
381*700637cbSDimitry Andric // in Other, terminate the group.
382*700637cbSDimitry Andric for (auto MemI : Other) {
383*700637cbSDimitry Andric if (std::distance(Group.back()->getIterator(), MemI->getIterator()) <=
384*700637cbSDimitry Andric 0)
385*700637cbSDimitry Andric continue;
386*700637cbSDimitry Andric for (auto GI : Group)
387*700637cbSDimitry Andric if (!canSwapInstructions(MemI, GI))
388*700637cbSDimitry Andric return;
389*700637cbSDimitry Andric }
390*700637cbSDimitry Andric }
391*700637cbSDimitry Andric
392*700637cbSDimitry Andric unsigned BR = getBaseAddressRegister(MI);
393*700637cbSDimitry Andric if (BR == BaseReg) {
394*700637cbSDimitry Andric LLVM_DEBUG(dbgs() << "Added MI to group: "; MI->dump());
395*700637cbSDimitry Andric Group.push_back(MI);
396*700637cbSDimitry Andric ProcessedInsts.insert(MI);
397*700637cbSDimitry Andric }
398*700637cbSDimitry Andric } // while
399*700637cbSDimitry Andric }
400*700637cbSDimitry Andric
401*700637cbSDimitry Andric // Check if load/store instructions S1 and S2 are adjacent. More precisely,
402*700637cbSDimitry Andric // S2 has to access memory immediately following that accessed by S1.
areAdjacent(const MachineInstr * S1,const MachineInstr * S2)403*700637cbSDimitry Andric bool HexagonLoadStoreWidening::areAdjacent(const MachineInstr *S1,
404*700637cbSDimitry Andric const MachineInstr *S2) {
405*700637cbSDimitry Andric if (!handledInstType(S1) || !handledInstType(S2))
406*700637cbSDimitry Andric return false;
407*700637cbSDimitry Andric
408*700637cbSDimitry Andric const MachineMemOperand &S1MO = getMemTarget(S1);
409*700637cbSDimitry Andric
410*700637cbSDimitry Andric // Currently only handling immediate stores.
411*700637cbSDimitry Andric int Off1 = getOffset(S1);
412*700637cbSDimitry Andric int Off2 = getOffset(S2);
413*700637cbSDimitry Andric
414*700637cbSDimitry Andric return (Off1 >= 0) ? Off1 + S1MO.getSize().getValue() == unsigned(Off2)
415*700637cbSDimitry Andric : int(Off1 + S1MO.getSize().getValue()) == Off2;
416*700637cbSDimitry Andric }
417*700637cbSDimitry Andric
418*700637cbSDimitry Andric /// Given a sequence of adjacent loads/stores, and a maximum size of a single
419*700637cbSDimitry Andric /// wide inst, pick a group of insts that can be replaced by a single load/store
420*700637cbSDimitry Andric /// of size not exceeding MaxSize. The selected sequence will be recorded
421*700637cbSDimitry Andric /// in OG ("old group" of instructions).
422*700637cbSDimitry Andric /// OG should be empty on entry, and should be left empty if the function
423*700637cbSDimitry Andric /// fails.
selectInsts(InstrGroup::iterator Begin,InstrGroup::iterator End,InstrGroup & OG,unsigned & TotalSize,unsigned MaxSize)424*700637cbSDimitry Andric bool HexagonLoadStoreWidening::selectInsts(InstrGroup::iterator Begin,
425*700637cbSDimitry Andric InstrGroup::iterator End,
426*700637cbSDimitry Andric InstrGroup &OG, unsigned &TotalSize,
427*700637cbSDimitry Andric unsigned MaxSize) {
428*700637cbSDimitry Andric assert(Begin != End && "No instructions to analyze");
429*700637cbSDimitry Andric assert(OG.empty() && "Old group not empty on entry");
430*700637cbSDimitry Andric
431*700637cbSDimitry Andric if (std::distance(Begin, End) <= 1)
432*700637cbSDimitry Andric return false;
433*700637cbSDimitry Andric
434*700637cbSDimitry Andric MachineInstr *FirstMI = *Begin;
435*700637cbSDimitry Andric assert(!FirstMI->memoperands_empty() && "Expecting some memory operands");
436*700637cbSDimitry Andric const MachineMemOperand &FirstMMO = getMemTarget(FirstMI);
437*700637cbSDimitry Andric if (!FirstMMO.getType().isValid())
438*700637cbSDimitry Andric return false;
439*700637cbSDimitry Andric
440*700637cbSDimitry Andric unsigned Alignment = FirstMMO.getAlign().value();
441*700637cbSDimitry Andric unsigned SizeAccum = FirstMMO.getSize().getValue();
442*700637cbSDimitry Andric unsigned FirstOffset = getOffset(FirstMI);
443*700637cbSDimitry Andric
444*700637cbSDimitry Andric // The initial value of SizeAccum should always be a power of 2.
445*700637cbSDimitry Andric assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2");
446*700637cbSDimitry Andric
447*700637cbSDimitry Andric // If the size of the first store equals to or exceeds the limit, do nothing.
448*700637cbSDimitry Andric if (SizeAccum >= MaxSize)
449*700637cbSDimitry Andric return false;
450*700637cbSDimitry Andric
451*700637cbSDimitry Andric // If the size of the first load/store is greater than or equal to the address
452*700637cbSDimitry Andric // stored to, then the inst cannot be made any wider.
453*700637cbSDimitry Andric if (SizeAccum >= Alignment) {
454*700637cbSDimitry Andric LLVM_DEBUG(
455*700637cbSDimitry Andric dbgs() << "Size of load/store greater than equal to its alignment\n");
456*700637cbSDimitry Andric return false;
457*700637cbSDimitry Andric }
458*700637cbSDimitry Andric
459*700637cbSDimitry Andric // The offset of a load/store will put restrictions on how wide the inst can
460*700637cbSDimitry Andric // be. Offsets in loads/stores of size 2^n bytes need to have the n lowest
461*700637cbSDimitry Andric // bits be 0. If the first inst already exhausts the offset limits, quit.
462*700637cbSDimitry Andric // Test this by checking if the next wider size would exceed the limit.
463*700637cbSDimitry Andric // For post-increment instructions, the increment amount needs to follow the
464*700637cbSDimitry Andric // same rule.
465*700637cbSDimitry Andric unsigned OffsetOrIncVal = 0;
466*700637cbSDimitry Andric if (HII->isPostIncrement(*FirstMI))
467*700637cbSDimitry Andric OffsetOrIncVal = getPostIncrementValue(FirstMI);
468*700637cbSDimitry Andric else
469*700637cbSDimitry Andric OffsetOrIncVal = FirstOffset;
470*700637cbSDimitry Andric if ((2 * SizeAccum - 1) & OffsetOrIncVal) {
471*700637cbSDimitry Andric LLVM_DEBUG(dbgs() << "Instruction cannot be widened as the offset/postinc"
472*700637cbSDimitry Andric << " value: " << getPostIncrementValue(FirstMI)
473*700637cbSDimitry Andric << " is invalid in the widened version\n");
474*700637cbSDimitry Andric return false;
475*700637cbSDimitry Andric }
476*700637cbSDimitry Andric
477*700637cbSDimitry Andric OG.push_back(FirstMI);
478*700637cbSDimitry Andric MachineInstr *S1 = FirstMI;
479*700637cbSDimitry Andric
480*700637cbSDimitry Andric // Pow2Num will be the largest number of elements in OG such that the sum
481*700637cbSDimitry Andric // of sizes of loads/stores 0...Pow2Num-1 will be a power of 2.
482*700637cbSDimitry Andric unsigned Pow2Num = 1;
483*700637cbSDimitry Andric unsigned Pow2Size = SizeAccum;
484*700637cbSDimitry Andric bool HavePostInc = HII->isPostIncrement(*S1);
485*700637cbSDimitry Andric
486*700637cbSDimitry Andric // Be greedy: keep accumulating insts as long as they are to adjacent
487*700637cbSDimitry Andric // memory locations, and as long as the total number of bytes stored
488*700637cbSDimitry Andric // does not exceed the limit (MaxSize).
489*700637cbSDimitry Andric // Keep track of when the total size covered is a power of 2, since
490*700637cbSDimitry Andric // this is a size a single load/store can cover.
491*700637cbSDimitry Andric for (InstrGroup::iterator I = Begin + 1; I != End; ++I) {
492*700637cbSDimitry Andric MachineInstr *S2 = *I;
493*700637cbSDimitry Andric // Insts are sorted, so if S1 and S2 are not adjacent, there won't be
494*700637cbSDimitry Andric // any other store to fill the "hole".
495*700637cbSDimitry Andric if (!areAdjacent(S1, S2))
496*700637cbSDimitry Andric break;
497*700637cbSDimitry Andric
498*700637cbSDimitry Andric // Cannot widen two post increments, need to return two registers
499*700637cbSDimitry Andric // with incremented values
500*700637cbSDimitry Andric if (HavePostInc && HII->isPostIncrement(*S2))
501*700637cbSDimitry Andric break;
502*700637cbSDimitry Andric
503*700637cbSDimitry Andric unsigned S2Size = getMemTarget(S2).getSize().getValue();
504*700637cbSDimitry Andric if (SizeAccum + S2Size > std::min(MaxSize, Alignment))
505*700637cbSDimitry Andric break;
506*700637cbSDimitry Andric
507*700637cbSDimitry Andric OG.push_back(S2);
508*700637cbSDimitry Andric SizeAccum += S2Size;
509*700637cbSDimitry Andric if (isPowerOf2_32(SizeAccum)) {
510*700637cbSDimitry Andric Pow2Num = OG.size();
511*700637cbSDimitry Andric Pow2Size = SizeAccum;
512*700637cbSDimitry Andric }
513*700637cbSDimitry Andric if ((2 * Pow2Size - 1) & FirstOffset)
514*700637cbSDimitry Andric break;
515*700637cbSDimitry Andric
516*700637cbSDimitry Andric S1 = S2;
517*700637cbSDimitry Andric }
518*700637cbSDimitry Andric
519*700637cbSDimitry Andric // The insts don't add up to anything that can be widened. Clean up.
520*700637cbSDimitry Andric if (Pow2Num <= 1) {
521*700637cbSDimitry Andric OG.clear();
522*700637cbSDimitry Andric return false;
523*700637cbSDimitry Andric }
524*700637cbSDimitry Andric
525*700637cbSDimitry Andric // Only leave the loads/stores being widened.
526*700637cbSDimitry Andric OG.resize(Pow2Num);
527*700637cbSDimitry Andric TotalSize = Pow2Size;
528*700637cbSDimitry Andric return true;
529*700637cbSDimitry Andric }
530*700637cbSDimitry Andric
531*700637cbSDimitry Andric /// Given an "old group" OG of insts, create a "new group" NG of instructions
532*700637cbSDimitry Andric /// to replace them.
createWideInsts(InstrGroup & OG,InstrGroup & NG,unsigned TotalSize)533*700637cbSDimitry Andric bool HexagonLoadStoreWidening::createWideInsts(InstrGroup &OG, InstrGroup &NG,
534*700637cbSDimitry Andric unsigned TotalSize) {
535*700637cbSDimitry Andric if (Mode == WideningMode::Store) {
536*700637cbSDimitry Andric return createWideStores(OG, NG, TotalSize);
537*700637cbSDimitry Andric }
538*700637cbSDimitry Andric return createWideLoads(OG, NG, TotalSize);
539*700637cbSDimitry Andric }
540*700637cbSDimitry Andric
541*700637cbSDimitry Andric /// Given an "old group" OG of stores, create a "new group" NG of instructions
542*700637cbSDimitry Andric /// to replace them. Ideally, NG would only have a single instruction in it,
543*700637cbSDimitry Andric /// but that may only be possible for store-immediate.
createWideStores(InstrGroup & OG,InstrGroup & NG,unsigned TotalSize)544*700637cbSDimitry Andric bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
545*700637cbSDimitry Andric unsigned TotalSize) {
546*700637cbSDimitry Andric // XXX Current limitations:
547*700637cbSDimitry Andric // - only handle a TotalSize of up to 8
548*700637cbSDimitry Andric
549*700637cbSDimitry Andric LLVM_DEBUG(dbgs() << "Creating wide stores\n");
550*700637cbSDimitry Andric if (TotalSize > MaxWideSize)
551*700637cbSDimitry Andric return false;
552*700637cbSDimitry Andric
553*700637cbSDimitry Andric uint64_t Acc = 0; // Value accumulator.
554*700637cbSDimitry Andric unsigned Shift = 0;
555*700637cbSDimitry Andric bool HaveImm = false;
556*700637cbSDimitry Andric bool HaveReg = false;
557*700637cbSDimitry Andric
558*700637cbSDimitry Andric for (MachineInstr *MI : OG) {
559*700637cbSDimitry Andric const MachineMemOperand &MMO = getMemTarget(MI);
560*700637cbSDimitry Andric MachineOperand &SO = HII->isPostIncrement(*MI)
561*700637cbSDimitry Andric ? MI->getOperand(3)
562*700637cbSDimitry Andric : MI->getOperand(2); // Source.
563*700637cbSDimitry Andric unsigned NBits;
564*700637cbSDimitry Andric uint64_t Mask;
565*700637cbSDimitry Andric uint64_t Val;
566*700637cbSDimitry Andric
567*700637cbSDimitry Andric switch (SO.getType()) {
568*700637cbSDimitry Andric case MachineOperand::MO_Immediate:
569*700637cbSDimitry Andric LLVM_DEBUG(dbgs() << "Have store immediate\n");
570*700637cbSDimitry Andric HaveImm = true;
571*700637cbSDimitry Andric
572*700637cbSDimitry Andric NBits = MMO.getSizeInBits().toRaw();
573*700637cbSDimitry Andric Mask = (0xFFFFFFFFFFFFFFFFU >> (64 - NBits));
574*700637cbSDimitry Andric Val = (SO.getImm() & Mask) << Shift;
575*700637cbSDimitry Andric Acc |= Val;
576*700637cbSDimitry Andric Shift += NBits;
577*700637cbSDimitry Andric break;
578*700637cbSDimitry Andric case MachineOperand::MO_Register:
579*700637cbSDimitry Andric HaveReg = true;
580*700637cbSDimitry Andric break;
581*700637cbSDimitry Andric default:
582*700637cbSDimitry Andric LLVM_DEBUG(dbgs() << "Unhandled store\n");
583*700637cbSDimitry Andric return false;
584*700637cbSDimitry Andric }
585*700637cbSDimitry Andric }
586*700637cbSDimitry Andric
587*700637cbSDimitry Andric if (HaveImm && HaveReg) {
588*700637cbSDimitry Andric LLVM_DEBUG(dbgs() << "Cannot merge store register and store imm\n");
589*700637cbSDimitry Andric return false;
590*700637cbSDimitry Andric }
591*700637cbSDimitry Andric
592*700637cbSDimitry Andric MachineInstr *FirstSt = OG.front();
593*700637cbSDimitry Andric DebugLoc DL = OG.back()->getDebugLoc();
594*700637cbSDimitry Andric const MachineMemOperand &OldM = getMemTarget(FirstSt);
595*700637cbSDimitry Andric MachineMemOperand *NewM =
596*700637cbSDimitry Andric MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
597*700637cbSDimitry Andric TotalSize, OldM.getAlign(), OldM.getAAInfo());
598*700637cbSDimitry Andric MachineInstr *StI;
599*700637cbSDimitry Andric MachineOperand &MR =
600*700637cbSDimitry Andric (HII->isPostIncrement(*FirstSt) ? FirstSt->getOperand(1)
601*700637cbSDimitry Andric : FirstSt->getOperand(0));
602*700637cbSDimitry Andric auto SecondSt = OG.back();
603*700637cbSDimitry Andric if (HaveReg) {
604*700637cbSDimitry Andric MachineOperand FReg =
605*700637cbSDimitry Andric (HII->isPostIncrement(*FirstSt) ? FirstSt->getOperand(3)
606*700637cbSDimitry Andric : FirstSt->getOperand(2));
607*700637cbSDimitry Andric // Post increments appear first in the sorted group.
608*700637cbSDimitry Andric // Cannot have a post increment for the second instruction
609*700637cbSDimitry Andric assert(!HII->isPostIncrement(*SecondSt) && "Unexpected PostInc");
610*700637cbSDimitry Andric MachineOperand SReg = SecondSt->getOperand(2);
611*700637cbSDimitry Andric assert(FReg.isReg() && SReg.isReg() &&
612*700637cbSDimitry Andric "Cannot merge store register and store imm");
613*700637cbSDimitry Andric const MCInstrDesc &CombD = TII->get(Hexagon::A2_combinew);
614*700637cbSDimitry Andric Register VReg =
615*700637cbSDimitry Andric MF->getRegInfo().createVirtualRegister(&Hexagon::DoubleRegsRegClass);
616*700637cbSDimitry Andric MachineInstr *CombI = BuildMI(*MF, DL, CombD, VReg).add(SReg).add(FReg);
617*700637cbSDimitry Andric NG.push_back(CombI);
618*700637cbSDimitry Andric
619*700637cbSDimitry Andric if (FirstSt->getOpcode() == Hexagon::S2_storeri_pi) {
620*700637cbSDimitry Andric const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_pi);
621*700637cbSDimitry Andric auto IncDestMO = FirstSt->getOperand(0);
622*700637cbSDimitry Andric auto IncMO = FirstSt->getOperand(2);
623*700637cbSDimitry Andric StI =
624*700637cbSDimitry Andric BuildMI(*MF, DL, StD).add(IncDestMO).add(MR).add(IncMO).addReg(VReg);
625*700637cbSDimitry Andric } else {
626*700637cbSDimitry Andric const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_io);
627*700637cbSDimitry Andric auto OffMO = FirstSt->getOperand(1);
628*700637cbSDimitry Andric StI = BuildMI(*MF, DL, StD).add(MR).add(OffMO).addReg(VReg);
629*700637cbSDimitry Andric }
630*700637cbSDimitry Andric StI->addMemOperand(*MF, NewM);
631*700637cbSDimitry Andric NG.push_back(StI);
632*700637cbSDimitry Andric return true;
633*700637cbSDimitry Andric }
634*700637cbSDimitry Andric
635*700637cbSDimitry Andric // Handle store immediates
636*700637cbSDimitry Andric // There are no post increment store immediates on Hexagon
637*700637cbSDimitry Andric assert(!HII->isPostIncrement(*FirstSt) && "Unexpected PostInc");
638*700637cbSDimitry Andric auto Off = FirstSt->getOperand(1).getImm();
639*700637cbSDimitry Andric if (TotalSize == 8) {
640*700637cbSDimitry Andric // Create vreg = A2_tfrsi #Acc; nreg = combine(#s32, vreg); memd = nreg
641*700637cbSDimitry Andric uint64_t Mask = 0xFFFFFFFFU;
642*700637cbSDimitry Andric int LowerAcc = int(Mask & Acc);
643*700637cbSDimitry Andric int UpperAcc = Acc >> 32;
644*700637cbSDimitry Andric Register DReg =
645*700637cbSDimitry Andric MF->getRegInfo().createVirtualRegister(&Hexagon::DoubleRegsRegClass);
646*700637cbSDimitry Andric MachineInstr *CombI;
647*700637cbSDimitry Andric if (Acc != 0) {
648*700637cbSDimitry Andric const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
649*700637cbSDimitry Andric const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
650*700637cbSDimitry Andric Register VReg = MF->getRegInfo().createVirtualRegister(RC);
651*700637cbSDimitry Andric MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(LowerAcc);
652*700637cbSDimitry Andric NG.push_back(TfrI);
653*700637cbSDimitry Andric const MCInstrDesc &CombD = TII->get(Hexagon::A4_combineir);
654*700637cbSDimitry Andric CombI = BuildMI(*MF, DL, CombD, DReg)
655*700637cbSDimitry Andric .addImm(UpperAcc)
656*700637cbSDimitry Andric .addReg(VReg, RegState::Kill);
657*700637cbSDimitry Andric }
658*700637cbSDimitry Andric // If immediates are 0, we do not need A2_tfrsi
659*700637cbSDimitry Andric else {
660*700637cbSDimitry Andric const MCInstrDesc &CombD = TII->get(Hexagon::A4_combineii);
661*700637cbSDimitry Andric CombI = BuildMI(*MF, DL, CombD, DReg).addImm(0).addImm(0);
662*700637cbSDimitry Andric }
663*700637cbSDimitry Andric NG.push_back(CombI);
664*700637cbSDimitry Andric const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_io);
665*700637cbSDimitry Andric StI =
666*700637cbSDimitry Andric BuildMI(*MF, DL, StD).add(MR).addImm(Off).addReg(DReg, RegState::Kill);
667*700637cbSDimitry Andric } else if (Acc < 0x10000) {
668*700637cbSDimitry Andric // Create mem[hw] = #Acc
669*700637cbSDimitry Andric unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io
670*700637cbSDimitry Andric : (TotalSize == 4) ? Hexagon::S4_storeiri_io
671*700637cbSDimitry Andric : 0;
672*700637cbSDimitry Andric assert(WOpc && "Unexpected size");
673*700637cbSDimitry Andric
674*700637cbSDimitry Andric int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc);
675*700637cbSDimitry Andric const MCInstrDesc &StD = TII->get(WOpc);
676*700637cbSDimitry Andric StI = BuildMI(*MF, DL, StD).add(MR).addImm(Off).addImm(Val);
677*700637cbSDimitry Andric } else {
678*700637cbSDimitry Andric // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
679*700637cbSDimitry Andric const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
680*700637cbSDimitry Andric const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
681*700637cbSDimitry Andric Register VReg = MF->getRegInfo().createVirtualRegister(RC);
682*700637cbSDimitry Andric MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(int(Acc));
683*700637cbSDimitry Andric NG.push_back(TfrI);
684*700637cbSDimitry Andric
685*700637cbSDimitry Andric unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io
686*700637cbSDimitry Andric : (TotalSize == 4) ? Hexagon::S2_storeri_io
687*700637cbSDimitry Andric : 0;
688*700637cbSDimitry Andric assert(WOpc && "Unexpected size");
689*700637cbSDimitry Andric
690*700637cbSDimitry Andric const MCInstrDesc &StD = TII->get(WOpc);
691*700637cbSDimitry Andric StI =
692*700637cbSDimitry Andric BuildMI(*MF, DL, StD).add(MR).addImm(Off).addReg(VReg, RegState::Kill);
693*700637cbSDimitry Andric }
694*700637cbSDimitry Andric StI->addMemOperand(*MF, NewM);
695*700637cbSDimitry Andric NG.push_back(StI);
696*700637cbSDimitry Andric
697*700637cbSDimitry Andric return true;
698*700637cbSDimitry Andric }
699*700637cbSDimitry Andric
700*700637cbSDimitry Andric /// Given an "old group" OG of loads, create a "new group" NG of instructions
701*700637cbSDimitry Andric /// to replace them. Ideally, NG would only have a single instruction in it,
702*700637cbSDimitry Andric /// but that may only be possible for double register loads.
createWideLoads(InstrGroup & OG,InstrGroup & NG,unsigned TotalSize)703*700637cbSDimitry Andric bool HexagonLoadStoreWidening::createWideLoads(InstrGroup &OG, InstrGroup &NG,
704*700637cbSDimitry Andric unsigned TotalSize) {
705*700637cbSDimitry Andric LLVM_DEBUG(dbgs() << "Creating wide loads\n");
706*700637cbSDimitry Andric // XXX Current limitations:
707*700637cbSDimitry Andric // - only expect stores of immediate values in OG,
708*700637cbSDimitry Andric // - only handle a TotalSize of up to 8
709*700637cbSDimitry Andric if (TotalSize > MaxWideSize)
710*700637cbSDimitry Andric return false;
711*700637cbSDimitry Andric assert(OG.size() == 2 && "Expecting two elements in Instruction Group.");
712*700637cbSDimitry Andric
713*700637cbSDimitry Andric MachineInstr *FirstLd = OG.front();
714*700637cbSDimitry Andric const MachineMemOperand &OldM = getMemTarget(FirstLd);
715*700637cbSDimitry Andric MachineMemOperand *NewM =
716*700637cbSDimitry Andric MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
717*700637cbSDimitry Andric TotalSize, OldM.getAlign(), OldM.getAAInfo());
718*700637cbSDimitry Andric
719*700637cbSDimitry Andric MachineOperand &MR = FirstLd->getOperand(0);
720*700637cbSDimitry Andric MachineOperand &MRBase =
721*700637cbSDimitry Andric (HII->isPostIncrement(*FirstLd) ? FirstLd->getOperand(2)
722*700637cbSDimitry Andric : FirstLd->getOperand(1));
723*700637cbSDimitry Andric DebugLoc DL = OG.back()->getDebugLoc();
724*700637cbSDimitry Andric
725*700637cbSDimitry Andric // Create the double register Load Instruction.
726*700637cbSDimitry Andric Register NewMR = MRI->createVirtualRegister(&Hexagon::DoubleRegsRegClass);
727*700637cbSDimitry Andric MachineInstr *LdI;
728*700637cbSDimitry Andric
729*700637cbSDimitry Andric // Post increments appear first in the sorted group
730*700637cbSDimitry Andric if (FirstLd->getOpcode() == Hexagon::L2_loadri_pi) {
731*700637cbSDimitry Andric auto IncDestMO = FirstLd->getOperand(1);
732*700637cbSDimitry Andric auto IncMO = FirstLd->getOperand(3);
733*700637cbSDimitry Andric LdI = BuildMI(*MF, DL, TII->get(Hexagon::L2_loadrd_pi))
734*700637cbSDimitry Andric .addDef(NewMR, getKillRegState(MR.isKill()), MR.getSubReg())
735*700637cbSDimitry Andric .add(IncDestMO)
736*700637cbSDimitry Andric .add(MRBase)
737*700637cbSDimitry Andric .add(IncMO);
738*700637cbSDimitry Andric LdI->addMemOperand(*MF, NewM);
739*700637cbSDimitry Andric } else {
740*700637cbSDimitry Andric auto OffMO = FirstLd->getOperand(2);
741*700637cbSDimitry Andric LdI = BuildMI(*MF, DL, TII->get(Hexagon::L2_loadrd_io))
742*700637cbSDimitry Andric .addDef(NewMR, getKillRegState(MR.isKill()), MR.getSubReg())
743*700637cbSDimitry Andric .add(MRBase)
744*700637cbSDimitry Andric .add(OffMO);
745*700637cbSDimitry Andric LdI->addMemOperand(*MF, NewM);
746*700637cbSDimitry Andric }
747*700637cbSDimitry Andric NG.push_back(LdI);
748*700637cbSDimitry Andric
749*700637cbSDimitry Andric auto getHalfReg = [&](MachineInstr *DoubleReg, unsigned SubReg,
750*700637cbSDimitry Andric MachineInstr *DstReg) {
751*700637cbSDimitry Andric Register DestReg = DstReg->getOperand(0).getReg();
752*700637cbSDimitry Andric return BuildMI(*MF, DL, TII->get(Hexagon::COPY), DestReg)
753*700637cbSDimitry Andric .addReg(NewMR, getKillRegState(LdI->isKill()), SubReg);
754*700637cbSDimitry Andric };
755*700637cbSDimitry Andric
756*700637cbSDimitry Andric MachineInstr *LdI_lo = getHalfReg(LdI, Hexagon::isub_lo, FirstLd);
757*700637cbSDimitry Andric MachineInstr *LdI_hi = getHalfReg(LdI, Hexagon::isub_hi, OG.back());
758*700637cbSDimitry Andric NG.push_back(LdI_lo);
759*700637cbSDimitry Andric NG.push_back(LdI_hi);
760*700637cbSDimitry Andric
761*700637cbSDimitry Andric return true;
762*700637cbSDimitry Andric }
763*700637cbSDimitry Andric
764*700637cbSDimitry Andric // Replace instructions from the old group OG with instructions from the
765*700637cbSDimitry Andric // new group NG. Conceptually, remove all instructions in OG, and then
766*700637cbSDimitry Andric // insert all instructions in NG, starting at where the first instruction
767*700637cbSDimitry Andric // from OG was (in the order in which they appeared in the basic block).
768*700637cbSDimitry Andric // (The ordering in OG does not have to match the order in the basic block.)
replaceInsts(InstrGroup & OG,InstrGroup & NG)769*700637cbSDimitry Andric bool HexagonLoadStoreWidening::replaceInsts(InstrGroup &OG, InstrGroup &NG) {
770*700637cbSDimitry Andric LLVM_DEBUG({
771*700637cbSDimitry Andric dbgs() << "Replacing:\n";
772*700637cbSDimitry Andric for (auto I : OG)
773*700637cbSDimitry Andric dbgs() << " " << *I;
774*700637cbSDimitry Andric dbgs() << "with\n";
775*700637cbSDimitry Andric for (auto I : NG)
776*700637cbSDimitry Andric dbgs() << " " << *I;
777*700637cbSDimitry Andric });
778*700637cbSDimitry Andric
779*700637cbSDimitry Andric MachineBasicBlock *MBB = OG.back()->getParent();
780*700637cbSDimitry Andric MachineBasicBlock::iterator InsertAt = MBB->end();
781*700637cbSDimitry Andric
782*700637cbSDimitry Andric // Need to establish the insertion point.
783*700637cbSDimitry Andric // For loads the best one is right before the first load in the OG,
784*700637cbSDimitry Andric // but in the order in which the insts occur in the program list.
785*700637cbSDimitry Andric // For stores the best point is right after the last store in the OG.
786*700637cbSDimitry Andric // Since the ordering in OG does not correspond
787*700637cbSDimitry Andric // to the order in the program list, we need to do some work to find
788*700637cbSDimitry Andric // the insertion point.
789*700637cbSDimitry Andric
790*700637cbSDimitry Andric // Create a set of all instructions in OG (for quick lookup).
791*700637cbSDimitry Andric InstrSet OldMemInsts(llvm::from_range, OG);
792*700637cbSDimitry Andric
793*700637cbSDimitry Andric if (Mode == WideningMode::Load) {
794*700637cbSDimitry Andric // Find the first load instruction in the block that is present in OG.
795*700637cbSDimitry Andric for (auto &I : *MBB) {
796*700637cbSDimitry Andric if (OldMemInsts.count(&I)) {
797*700637cbSDimitry Andric InsertAt = I;
798*700637cbSDimitry Andric break;
799*700637cbSDimitry Andric }
800*700637cbSDimitry Andric }
801*700637cbSDimitry Andric
802*700637cbSDimitry Andric assert((InsertAt != MBB->end()) && "Cannot locate any load from the group");
803*700637cbSDimitry Andric
804*700637cbSDimitry Andric for (auto *I : NG)
805*700637cbSDimitry Andric MBB->insert(InsertAt, I);
806*700637cbSDimitry Andric } else {
807*700637cbSDimitry Andric // Find the last store instruction in the block that is present in OG.
808*700637cbSDimitry Andric auto I = MBB->rbegin();
809*700637cbSDimitry Andric for (; I != MBB->rend(); ++I) {
810*700637cbSDimitry Andric if (OldMemInsts.count(&(*I))) {
811*700637cbSDimitry Andric InsertAt = (*I).getIterator();
812*700637cbSDimitry Andric break;
813*700637cbSDimitry Andric }
814*700637cbSDimitry Andric }
815*700637cbSDimitry Andric
816*700637cbSDimitry Andric assert((I != MBB->rend()) && "Cannot locate any store from the group");
817*700637cbSDimitry Andric
818*700637cbSDimitry Andric for (auto I = NG.rbegin(); I != NG.rend(); ++I)
819*700637cbSDimitry Andric MBB->insertAfter(InsertAt, *I);
820*700637cbSDimitry Andric }
821*700637cbSDimitry Andric
822*700637cbSDimitry Andric for (auto *I : OG)
823*700637cbSDimitry Andric I->eraseFromParent();
824*700637cbSDimitry Andric
825*700637cbSDimitry Andric return true;
826*700637cbSDimitry Andric }
827*700637cbSDimitry Andric
828*700637cbSDimitry Andric // Break up the group into smaller groups, each of which can be replaced by
829*700637cbSDimitry Andric // a single wide load/store. Widen each such smaller group and replace the old
830*700637cbSDimitry Andric // instructions with the widened ones.
processGroup(InstrGroup & Group)831*700637cbSDimitry Andric bool HexagonLoadStoreWidening::processGroup(InstrGroup &Group) {
832*700637cbSDimitry Andric bool Changed = false;
833*700637cbSDimitry Andric InstrGroup::iterator I = Group.begin(), E = Group.end();
834*700637cbSDimitry Andric InstrGroup OG, NG; // Old and new groups.
835*700637cbSDimitry Andric unsigned CollectedSize;
836*700637cbSDimitry Andric
837*700637cbSDimitry Andric while (I != E) {
838*700637cbSDimitry Andric OG.clear();
839*700637cbSDimitry Andric NG.clear();
840*700637cbSDimitry Andric
841*700637cbSDimitry Andric bool Succ = selectInsts(I++, E, OG, CollectedSize, MaxWideSize) &&
842*700637cbSDimitry Andric createWideInsts(OG, NG, CollectedSize) && replaceInsts(OG, NG);
843*700637cbSDimitry Andric if (!Succ)
844*700637cbSDimitry Andric continue;
845*700637cbSDimitry Andric
846*700637cbSDimitry Andric assert(OG.size() > 1 && "Created invalid group");
847*700637cbSDimitry Andric assert(std::distance(I, E) + 1 >= int(OG.size()) && "Too many elements");
848*700637cbSDimitry Andric I += OG.size() - 1;
849*700637cbSDimitry Andric
850*700637cbSDimitry Andric Changed = true;
851*700637cbSDimitry Andric }
852*700637cbSDimitry Andric
853*700637cbSDimitry Andric return Changed;
854*700637cbSDimitry Andric }
855*700637cbSDimitry Andric
856*700637cbSDimitry Andric // Process a single basic block: create the load/store groups, and replace them
857*700637cbSDimitry Andric // with the widened insts, if possible. Processing of each basic block
858*700637cbSDimitry Andric // is independent from processing of any other basic block. This transfor-
859*700637cbSDimitry Andric // mation could be stopped after having processed any basic block without
860*700637cbSDimitry Andric // any ill effects (other than not having performed widening in the unpro-
861*700637cbSDimitry Andric // cessed blocks). Also, the basic blocks can be processed in any order.
processBasicBlock(MachineBasicBlock & MBB)862*700637cbSDimitry Andric bool HexagonLoadStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
863*700637cbSDimitry Andric InstrGroupList SGs;
864*700637cbSDimitry Andric bool Changed = false;
865*700637cbSDimitry Andric
866*700637cbSDimitry Andric // To prevent long compile time check for max BB size.
867*700637cbSDimitry Andric if (MBB.size() > MaxMBBSizeForLoadStoreWidening)
868*700637cbSDimitry Andric return false;
869*700637cbSDimitry Andric
870*700637cbSDimitry Andric createGroups(MBB, SGs);
871*700637cbSDimitry Andric
872*700637cbSDimitry Andric auto Less = [this](const MachineInstr *A, const MachineInstr *B) -> bool {
873*700637cbSDimitry Andric return getOffset(A) < getOffset(B);
874*700637cbSDimitry Andric };
875*700637cbSDimitry Andric for (auto &G : SGs) {
876*700637cbSDimitry Andric assert(G.size() > 1 && "Group with fewer than 2 elements");
877*700637cbSDimitry Andric llvm::sort(G, Less);
878*700637cbSDimitry Andric
879*700637cbSDimitry Andric Changed |= processGroup(G);
880*700637cbSDimitry Andric }
881*700637cbSDimitry Andric
882*700637cbSDimitry Andric return Changed;
883*700637cbSDimitry Andric }
884*700637cbSDimitry Andric
run()885*700637cbSDimitry Andric bool HexagonLoadStoreWidening::run() {
886*700637cbSDimitry Andric bool Changed = false;
887*700637cbSDimitry Andric
888*700637cbSDimitry Andric for (auto &B : *MF)
889*700637cbSDimitry Andric Changed |= processBasicBlock(B);
890*700637cbSDimitry Andric
891*700637cbSDimitry Andric return Changed;
892*700637cbSDimitry Andric }
893*700637cbSDimitry Andric
createHexagonStoreWidening()894*700637cbSDimitry Andric FunctionPass *llvm::createHexagonStoreWidening() {
895*700637cbSDimitry Andric return new HexagonStoreWidening();
896*700637cbSDimitry Andric }
897*700637cbSDimitry Andric
createHexagonLoadWidening()898*700637cbSDimitry Andric FunctionPass *llvm::createHexagonLoadWidening() {
899*700637cbSDimitry Andric return new HexagonLoadWidening();
900*700637cbSDimitry Andric }
901