xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (revision 8bcb0991864975618c09697b1aca10683346d9f0)
10b57cec5SDimitry Andric //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
90b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h"
100b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
11*8bcb0991SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
120b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
130b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/Utils.h"
14*8bcb0991SDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
15*8bcb0991SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
160b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
170b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
180b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
19*8bcb0991SDimitry Andric #include "llvm/CodeGen/TargetLowering.h"
20*8bcb0991SDimitry Andric #include "llvm/Target/TargetMachine.h"
210b57cec5SDimitry Andric 
220b57cec5SDimitry Andric #define DEBUG_TYPE "gi-combiner"
230b57cec5SDimitry Andric 
240b57cec5SDimitry Andric using namespace llvm;
250b57cec5SDimitry Andric 
26*8bcb0991SDimitry Andric // Option to allow testing of the combiner while no targets know about indexed
27*8bcb0991SDimitry Andric // addressing.
28*8bcb0991SDimitry Andric static cl::opt<bool>
29*8bcb0991SDimitry Andric     ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
30*8bcb0991SDimitry Andric                        cl::desc("Force all indexed operations to be "
31*8bcb0991SDimitry Andric                                 "legal for the GlobalISel combiner"));
32*8bcb0991SDimitry Andric 
33*8bcb0991SDimitry Andric 
340b57cec5SDimitry Andric CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
35*8bcb0991SDimitry Andric                                MachineIRBuilder &B, GISelKnownBits *KB,
36*8bcb0991SDimitry Andric                                MachineDominatorTree *MDT)
37*8bcb0991SDimitry Andric     : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
38*8bcb0991SDimitry Andric       KB(KB), MDT(MDT) {
39*8bcb0991SDimitry Andric   (void)this->KB;
40*8bcb0991SDimitry Andric }
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
430b57cec5SDimitry Andric                                     Register ToReg) const {
440b57cec5SDimitry Andric   Observer.changingAllUsesOfReg(MRI, FromReg);
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric   if (MRI.constrainRegAttrs(ToReg, FromReg))
470b57cec5SDimitry Andric     MRI.replaceRegWith(FromReg, ToReg);
480b57cec5SDimitry Andric   else
490b57cec5SDimitry Andric     Builder.buildCopy(ToReg, FromReg);
500b57cec5SDimitry Andric 
510b57cec5SDimitry Andric   Observer.finishedChangingAllUsesOfReg();
520b57cec5SDimitry Andric }
530b57cec5SDimitry Andric 
540b57cec5SDimitry Andric void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
550b57cec5SDimitry Andric                                       MachineOperand &FromRegOp,
560b57cec5SDimitry Andric                                       Register ToReg) const {
570b57cec5SDimitry Andric   assert(FromRegOp.getParent() && "Expected an operand in an MI");
580b57cec5SDimitry Andric   Observer.changingInstr(*FromRegOp.getParent());
590b57cec5SDimitry Andric 
600b57cec5SDimitry Andric   FromRegOp.setReg(ToReg);
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric   Observer.changedInstr(*FromRegOp.getParent());
630b57cec5SDimitry Andric }
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
660b57cec5SDimitry Andric   if (matchCombineCopy(MI)) {
670b57cec5SDimitry Andric     applyCombineCopy(MI);
680b57cec5SDimitry Andric     return true;
690b57cec5SDimitry Andric   }
700b57cec5SDimitry Andric   return false;
710b57cec5SDimitry Andric }
720b57cec5SDimitry Andric bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
730b57cec5SDimitry Andric   if (MI.getOpcode() != TargetOpcode::COPY)
740b57cec5SDimitry Andric     return false;
75*8bcb0991SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
76*8bcb0991SDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
770b57cec5SDimitry Andric   LLT DstTy = MRI.getType(DstReg);
780b57cec5SDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
790b57cec5SDimitry Andric   // Simple Copy Propagation.
800b57cec5SDimitry Andric   // a(sx) = COPY b(sx) -> Replace all uses of a with b.
810b57cec5SDimitry Andric   if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy)
820b57cec5SDimitry Andric     return true;
830b57cec5SDimitry Andric   return false;
840b57cec5SDimitry Andric }
850b57cec5SDimitry Andric void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
86*8bcb0991SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
87*8bcb0991SDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
880b57cec5SDimitry Andric   MI.eraseFromParent();
890b57cec5SDimitry Andric   replaceRegWith(MRI, DstReg, SrcReg);
900b57cec5SDimitry Andric }
910b57cec5SDimitry Andric 
92*8bcb0991SDimitry Andric bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
93*8bcb0991SDimitry Andric   bool IsUndef = false;
94*8bcb0991SDimitry Andric   SmallVector<Register, 4> Ops;
95*8bcb0991SDimitry Andric   if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
96*8bcb0991SDimitry Andric     applyCombineConcatVectors(MI, IsUndef, Ops);
97*8bcb0991SDimitry Andric     return true;
98*8bcb0991SDimitry Andric   }
99*8bcb0991SDimitry Andric   return false;
100*8bcb0991SDimitry Andric }
101*8bcb0991SDimitry Andric 
102*8bcb0991SDimitry Andric bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
103*8bcb0991SDimitry Andric                                                SmallVectorImpl<Register> &Ops) {
104*8bcb0991SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
105*8bcb0991SDimitry Andric          "Invalid instruction");
106*8bcb0991SDimitry Andric   IsUndef = true;
107*8bcb0991SDimitry Andric   MachineInstr *Undef = nullptr;
108*8bcb0991SDimitry Andric 
109*8bcb0991SDimitry Andric   // Walk over all the operands of concat vectors and check if they are
110*8bcb0991SDimitry Andric   // build_vector themselves or undef.
111*8bcb0991SDimitry Andric   // Then collect their operands in Ops.
112*8bcb0991SDimitry Andric   for (const MachineOperand &MO : MI.operands()) {
113*8bcb0991SDimitry Andric     // Skip the instruction definition.
114*8bcb0991SDimitry Andric     if (MO.isDef())
115*8bcb0991SDimitry Andric       continue;
116*8bcb0991SDimitry Andric     Register Reg = MO.getReg();
117*8bcb0991SDimitry Andric     MachineInstr *Def = MRI.getVRegDef(Reg);
118*8bcb0991SDimitry Andric     assert(Def && "Operand not defined");
119*8bcb0991SDimitry Andric     switch (Def->getOpcode()) {
120*8bcb0991SDimitry Andric     case TargetOpcode::G_BUILD_VECTOR:
121*8bcb0991SDimitry Andric       IsUndef = false;
122*8bcb0991SDimitry Andric       // Remember the operands of the build_vector to fold
123*8bcb0991SDimitry Andric       // them into the yet-to-build flattened concat vectors.
124*8bcb0991SDimitry Andric       for (const MachineOperand &BuildVecMO : Def->operands()) {
125*8bcb0991SDimitry Andric         // Skip the definition.
126*8bcb0991SDimitry Andric         if (BuildVecMO.isDef())
127*8bcb0991SDimitry Andric           continue;
128*8bcb0991SDimitry Andric         Ops.push_back(BuildVecMO.getReg());
129*8bcb0991SDimitry Andric       }
130*8bcb0991SDimitry Andric       break;
131*8bcb0991SDimitry Andric     case TargetOpcode::G_IMPLICIT_DEF: {
132*8bcb0991SDimitry Andric       LLT OpType = MRI.getType(Reg);
133*8bcb0991SDimitry Andric       // Keep one undef value for all the undef operands.
134*8bcb0991SDimitry Andric       if (!Undef) {
135*8bcb0991SDimitry Andric         Builder.setInsertPt(*MI.getParent(), MI);
136*8bcb0991SDimitry Andric         Undef = Builder.buildUndef(OpType.getScalarType());
137*8bcb0991SDimitry Andric       }
138*8bcb0991SDimitry Andric       assert(MRI.getType(Undef->getOperand(0).getReg()) ==
139*8bcb0991SDimitry Andric                  OpType.getScalarType() &&
140*8bcb0991SDimitry Andric              "All undefs should have the same type");
141*8bcb0991SDimitry Andric       // Break the undef vector in as many scalar elements as needed
142*8bcb0991SDimitry Andric       // for the flattening.
143*8bcb0991SDimitry Andric       for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
144*8bcb0991SDimitry Andric            EltIdx != EltEnd; ++EltIdx)
145*8bcb0991SDimitry Andric         Ops.push_back(Undef->getOperand(0).getReg());
146*8bcb0991SDimitry Andric       break;
147*8bcb0991SDimitry Andric     }
148*8bcb0991SDimitry Andric     default:
149*8bcb0991SDimitry Andric       return false;
150*8bcb0991SDimitry Andric     }
151*8bcb0991SDimitry Andric   }
152*8bcb0991SDimitry Andric   return true;
153*8bcb0991SDimitry Andric }
154*8bcb0991SDimitry Andric void CombinerHelper::applyCombineConcatVectors(
155*8bcb0991SDimitry Andric     MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
156*8bcb0991SDimitry Andric   // We determined that the concat_vectors can be flatten.
157*8bcb0991SDimitry Andric   // Generate the flattened build_vector.
158*8bcb0991SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
159*8bcb0991SDimitry Andric   Builder.setInsertPt(*MI.getParent(), MI);
160*8bcb0991SDimitry Andric   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
161*8bcb0991SDimitry Andric 
162*8bcb0991SDimitry Andric   // Note: IsUndef is sort of redundant. We could have determine it by
163*8bcb0991SDimitry Andric   // checking that at all Ops are undef.  Alternatively, we could have
164*8bcb0991SDimitry Andric   // generate a build_vector of undefs and rely on another combine to
165*8bcb0991SDimitry Andric   // clean that up.  For now, given we already gather this information
166*8bcb0991SDimitry Andric   // in tryCombineConcatVectors, just save compile time and issue the
167*8bcb0991SDimitry Andric   // right thing.
168*8bcb0991SDimitry Andric   if (IsUndef)
169*8bcb0991SDimitry Andric     Builder.buildUndef(NewDstReg);
170*8bcb0991SDimitry Andric   else
171*8bcb0991SDimitry Andric     Builder.buildBuildVector(NewDstReg, Ops);
172*8bcb0991SDimitry Andric   MI.eraseFromParent();
173*8bcb0991SDimitry Andric   replaceRegWith(MRI, DstReg, NewDstReg);
174*8bcb0991SDimitry Andric }
175*8bcb0991SDimitry Andric 
176*8bcb0991SDimitry Andric bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
177*8bcb0991SDimitry Andric   SmallVector<Register, 4> Ops;
178*8bcb0991SDimitry Andric   if (matchCombineShuffleVector(MI, Ops)) {
179*8bcb0991SDimitry Andric     applyCombineShuffleVector(MI, Ops);
180*8bcb0991SDimitry Andric     return true;
181*8bcb0991SDimitry Andric   }
182*8bcb0991SDimitry Andric   return false;
183*8bcb0991SDimitry Andric }
184*8bcb0991SDimitry Andric 
185*8bcb0991SDimitry Andric bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
186*8bcb0991SDimitry Andric                                                SmallVectorImpl<Register> &Ops) {
187*8bcb0991SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
188*8bcb0991SDimitry Andric          "Invalid instruction kind");
189*8bcb0991SDimitry Andric   LLT DstType = MRI.getType(MI.getOperand(0).getReg());
190*8bcb0991SDimitry Andric   Register Src1 = MI.getOperand(1).getReg();
191*8bcb0991SDimitry Andric   LLT SrcType = MRI.getType(Src1);
192*8bcb0991SDimitry Andric   unsigned DstNumElts = DstType.getNumElements();
193*8bcb0991SDimitry Andric   unsigned SrcNumElts = SrcType.getNumElements();
194*8bcb0991SDimitry Andric 
195*8bcb0991SDimitry Andric   // If the resulting vector is smaller than the size of the source
196*8bcb0991SDimitry Andric   // vectors being concatenated, we won't be able to replace the
197*8bcb0991SDimitry Andric   // shuffle vector into a concat_vectors.
198*8bcb0991SDimitry Andric   //
199*8bcb0991SDimitry Andric   // Note: We may still be able to produce a concat_vectors fed by
200*8bcb0991SDimitry Andric   //       extract_vector_elt and so on. It is less clear that would
201*8bcb0991SDimitry Andric   //       be better though, so don't bother for now.
202*8bcb0991SDimitry Andric   if (DstNumElts < 2 * SrcNumElts)
203*8bcb0991SDimitry Andric     return false;
204*8bcb0991SDimitry Andric 
205*8bcb0991SDimitry Andric   // Check that the shuffle mask can be broken evenly between the
206*8bcb0991SDimitry Andric   // different sources.
207*8bcb0991SDimitry Andric   if (DstNumElts % SrcNumElts != 0)
208*8bcb0991SDimitry Andric     return false;
209*8bcb0991SDimitry Andric 
210*8bcb0991SDimitry Andric   // Mask length is a multiple of the source vector length.
211*8bcb0991SDimitry Andric   // Check if the shuffle is some kind of concatenation of the input
212*8bcb0991SDimitry Andric   // vectors.
213*8bcb0991SDimitry Andric   unsigned NumConcat = DstNumElts / SrcNumElts;
214*8bcb0991SDimitry Andric   SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
215*8bcb0991SDimitry Andric   SmallVector<int, 8> Mask;
216*8bcb0991SDimitry Andric   ShuffleVectorInst::getShuffleMask(MI.getOperand(3).getShuffleMask(), Mask);
217*8bcb0991SDimitry Andric   for (unsigned i = 0; i != DstNumElts; ++i) {
218*8bcb0991SDimitry Andric     int Idx = Mask[i];
219*8bcb0991SDimitry Andric     // Undef value.
220*8bcb0991SDimitry Andric     if (Idx < 0)
221*8bcb0991SDimitry Andric       continue;
222*8bcb0991SDimitry Andric     // Ensure the indices in each SrcType sized piece are sequential and that
223*8bcb0991SDimitry Andric     // the same source is used for the whole piece.
224*8bcb0991SDimitry Andric     if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
225*8bcb0991SDimitry Andric         (ConcatSrcs[i / SrcNumElts] >= 0 &&
226*8bcb0991SDimitry Andric          ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
227*8bcb0991SDimitry Andric       return false;
228*8bcb0991SDimitry Andric     // Remember which source this index came from.
229*8bcb0991SDimitry Andric     ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
230*8bcb0991SDimitry Andric   }
231*8bcb0991SDimitry Andric 
232*8bcb0991SDimitry Andric   // The shuffle is concatenating multiple vectors together.
233*8bcb0991SDimitry Andric   // Collect the different operands for that.
234*8bcb0991SDimitry Andric   Register UndefReg;
235*8bcb0991SDimitry Andric   Register Src2 = MI.getOperand(2).getReg();
236*8bcb0991SDimitry Andric   for (auto Src : ConcatSrcs) {
237*8bcb0991SDimitry Andric     if (Src < 0) {
238*8bcb0991SDimitry Andric       if (!UndefReg) {
239*8bcb0991SDimitry Andric         Builder.setInsertPt(*MI.getParent(), MI);
240*8bcb0991SDimitry Andric         UndefReg = Builder.buildUndef(SrcType).getReg(0);
241*8bcb0991SDimitry Andric       }
242*8bcb0991SDimitry Andric       Ops.push_back(UndefReg);
243*8bcb0991SDimitry Andric     } else if (Src == 0)
244*8bcb0991SDimitry Andric       Ops.push_back(Src1);
245*8bcb0991SDimitry Andric     else
246*8bcb0991SDimitry Andric       Ops.push_back(Src2);
247*8bcb0991SDimitry Andric   }
248*8bcb0991SDimitry Andric   return true;
249*8bcb0991SDimitry Andric }
250*8bcb0991SDimitry Andric 
251*8bcb0991SDimitry Andric void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
252*8bcb0991SDimitry Andric                                                const ArrayRef<Register> Ops) {
253*8bcb0991SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
254*8bcb0991SDimitry Andric   Builder.setInsertPt(*MI.getParent(), MI);
255*8bcb0991SDimitry Andric   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
256*8bcb0991SDimitry Andric 
257*8bcb0991SDimitry Andric   Builder.buildConcatVectors(NewDstReg, Ops);
258*8bcb0991SDimitry Andric 
259*8bcb0991SDimitry Andric   MI.eraseFromParent();
260*8bcb0991SDimitry Andric   replaceRegWith(MRI, DstReg, NewDstReg);
261*8bcb0991SDimitry Andric }
262*8bcb0991SDimitry Andric 
2630b57cec5SDimitry Andric namespace {
2640b57cec5SDimitry Andric 
2650b57cec5SDimitry Andric /// Select a preference between two uses. CurrentUse is the current preference
2660b57cec5SDimitry Andric /// while *ForCandidate is attributes of the candidate under consideration.
2670b57cec5SDimitry Andric PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
2680b57cec5SDimitry Andric                                   const LLT &TyForCandidate,
2690b57cec5SDimitry Andric                                   unsigned OpcodeForCandidate,
2700b57cec5SDimitry Andric                                   MachineInstr *MIForCandidate) {
2710b57cec5SDimitry Andric   if (!CurrentUse.Ty.isValid()) {
2720b57cec5SDimitry Andric     if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
2730b57cec5SDimitry Andric         CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
2740b57cec5SDimitry Andric       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
2750b57cec5SDimitry Andric     return CurrentUse;
2760b57cec5SDimitry Andric   }
2770b57cec5SDimitry Andric 
2780b57cec5SDimitry Andric   // We permit the extend to hoist through basic blocks but this is only
2790b57cec5SDimitry Andric   // sensible if the target has extending loads. If you end up lowering back
2800b57cec5SDimitry Andric   // into a load and extend during the legalizer then the end result is
2810b57cec5SDimitry Andric   // hoisting the extend up to the load.
2820b57cec5SDimitry Andric 
2830b57cec5SDimitry Andric   // Prefer defined extensions to undefined extensions as these are more
2840b57cec5SDimitry Andric   // likely to reduce the number of instructions.
2850b57cec5SDimitry Andric   if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
2860b57cec5SDimitry Andric       CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
2870b57cec5SDimitry Andric     return CurrentUse;
2880b57cec5SDimitry Andric   else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
2890b57cec5SDimitry Andric            OpcodeForCandidate != TargetOpcode::G_ANYEXT)
2900b57cec5SDimitry Andric     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
2910b57cec5SDimitry Andric 
2920b57cec5SDimitry Andric   // Prefer sign extensions to zero extensions as sign-extensions tend to be
2930b57cec5SDimitry Andric   // more expensive.
2940b57cec5SDimitry Andric   if (CurrentUse.Ty == TyForCandidate) {
2950b57cec5SDimitry Andric     if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
2960b57cec5SDimitry Andric         OpcodeForCandidate == TargetOpcode::G_ZEXT)
2970b57cec5SDimitry Andric       return CurrentUse;
2980b57cec5SDimitry Andric     else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
2990b57cec5SDimitry Andric              OpcodeForCandidate == TargetOpcode::G_SEXT)
3000b57cec5SDimitry Andric       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
3010b57cec5SDimitry Andric   }
3020b57cec5SDimitry Andric 
3030b57cec5SDimitry Andric   // This is potentially target specific. We've chosen the largest type
3040b57cec5SDimitry Andric   // because G_TRUNC is usually free. One potential catch with this is that
3050b57cec5SDimitry Andric   // some targets have a reduced number of larger registers than smaller
3060b57cec5SDimitry Andric   // registers and this choice potentially increases the live-range for the
3070b57cec5SDimitry Andric   // larger value.
3080b57cec5SDimitry Andric   if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
3090b57cec5SDimitry Andric     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
3100b57cec5SDimitry Andric   }
3110b57cec5SDimitry Andric   return CurrentUse;
3120b57cec5SDimitry Andric }
3130b57cec5SDimitry Andric 
3140b57cec5SDimitry Andric /// Find a suitable place to insert some instructions and insert them. This
3150b57cec5SDimitry Andric /// function accounts for special cases like inserting before a PHI node.
3160b57cec5SDimitry Andric /// The current strategy for inserting before PHI's is to duplicate the
3170b57cec5SDimitry Andric /// instructions for each predecessor. However, while that's ok for G_TRUNC
3180b57cec5SDimitry Andric /// on most targets since it generally requires no code, other targets/cases may
3190b57cec5SDimitry Andric /// want to try harder to find a dominating block.
3200b57cec5SDimitry Andric static void InsertInsnsWithoutSideEffectsBeforeUse(
3210b57cec5SDimitry Andric     MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
3220b57cec5SDimitry Andric     std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
3230b57cec5SDimitry Andric                        MachineOperand &UseMO)>
3240b57cec5SDimitry Andric         Inserter) {
3250b57cec5SDimitry Andric   MachineInstr &UseMI = *UseMO.getParent();
3260b57cec5SDimitry Andric 
3270b57cec5SDimitry Andric   MachineBasicBlock *InsertBB = UseMI.getParent();
3280b57cec5SDimitry Andric 
3290b57cec5SDimitry Andric   // If the use is a PHI then we want the predecessor block instead.
3300b57cec5SDimitry Andric   if (UseMI.isPHI()) {
3310b57cec5SDimitry Andric     MachineOperand *PredBB = std::next(&UseMO);
3320b57cec5SDimitry Andric     InsertBB = PredBB->getMBB();
3330b57cec5SDimitry Andric   }
3340b57cec5SDimitry Andric 
3350b57cec5SDimitry Andric   // If the block is the same block as the def then we want to insert just after
3360b57cec5SDimitry Andric   // the def instead of at the start of the block.
3370b57cec5SDimitry Andric   if (InsertBB == DefMI.getParent()) {
3380b57cec5SDimitry Andric     MachineBasicBlock::iterator InsertPt = &DefMI;
3390b57cec5SDimitry Andric     Inserter(InsertBB, std::next(InsertPt), UseMO);
3400b57cec5SDimitry Andric     return;
3410b57cec5SDimitry Andric   }
3420b57cec5SDimitry Andric 
3430b57cec5SDimitry Andric   // Otherwise we want the start of the BB
3440b57cec5SDimitry Andric   Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
3450b57cec5SDimitry Andric }
3460b57cec5SDimitry Andric } // end anonymous namespace
3470b57cec5SDimitry Andric 
3480b57cec5SDimitry Andric bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
3490b57cec5SDimitry Andric   PreferredTuple Preferred;
3500b57cec5SDimitry Andric   if (matchCombineExtendingLoads(MI, Preferred)) {
3510b57cec5SDimitry Andric     applyCombineExtendingLoads(MI, Preferred);
3520b57cec5SDimitry Andric     return true;
3530b57cec5SDimitry Andric   }
3540b57cec5SDimitry Andric   return false;
3550b57cec5SDimitry Andric }
3560b57cec5SDimitry Andric 
3570b57cec5SDimitry Andric bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
3580b57cec5SDimitry Andric                                                 PreferredTuple &Preferred) {
3590b57cec5SDimitry Andric   // We match the loads and follow the uses to the extend instead of matching
3600b57cec5SDimitry Andric   // the extends and following the def to the load. This is because the load
3610b57cec5SDimitry Andric   // must remain in the same position for correctness (unless we also add code
3620b57cec5SDimitry Andric   // to find a safe place to sink it) whereas the extend is freely movable.
3630b57cec5SDimitry Andric   // It also prevents us from duplicating the load for the volatile case or just
3640b57cec5SDimitry Andric   // for performance.
3650b57cec5SDimitry Andric 
3660b57cec5SDimitry Andric   if (MI.getOpcode() != TargetOpcode::G_LOAD &&
3670b57cec5SDimitry Andric       MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
3680b57cec5SDimitry Andric       MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
3690b57cec5SDimitry Andric     return false;
3700b57cec5SDimitry Andric 
3710b57cec5SDimitry Andric   auto &LoadValue = MI.getOperand(0);
3720b57cec5SDimitry Andric   assert(LoadValue.isReg() && "Result wasn't a register?");
3730b57cec5SDimitry Andric 
3740b57cec5SDimitry Andric   LLT LoadValueTy = MRI.getType(LoadValue.getReg());
3750b57cec5SDimitry Andric   if (!LoadValueTy.isScalar())
3760b57cec5SDimitry Andric     return false;
3770b57cec5SDimitry Andric 
3780b57cec5SDimitry Andric   // Most architectures are going to legalize <s8 loads into at least a 1 byte
3790b57cec5SDimitry Andric   // load, and the MMOs can only describe memory accesses in multiples of bytes.
3800b57cec5SDimitry Andric   // If we try to perform extload combining on those, we can end up with
3810b57cec5SDimitry Andric   // %a(s8) = extload %ptr (load 1 byte from %ptr)
3820b57cec5SDimitry Andric   // ... which is an illegal extload instruction.
3830b57cec5SDimitry Andric   if (LoadValueTy.getSizeInBits() < 8)
3840b57cec5SDimitry Andric     return false;
3850b57cec5SDimitry Andric 
3860b57cec5SDimitry Andric   // For non power-of-2 types, they will very likely be legalized into multiple
3870b57cec5SDimitry Andric   // loads. Don't bother trying to match them into extending loads.
3880b57cec5SDimitry Andric   if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
3890b57cec5SDimitry Andric     return false;
3900b57cec5SDimitry Andric 
3910b57cec5SDimitry Andric   // Find the preferred type aside from the any-extends (unless it's the only
3920b57cec5SDimitry Andric   // one) and non-extending ops. We'll emit an extending load to that type and
3930b57cec5SDimitry Andric   // and emit a variant of (extend (trunc X)) for the others according to the
3940b57cec5SDimitry Andric   // relative type sizes. At the same time, pick an extend to use based on the
3950b57cec5SDimitry Andric   // extend involved in the chosen type.
3960b57cec5SDimitry Andric   unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
3970b57cec5SDimitry Andric                                  ? TargetOpcode::G_ANYEXT
3980b57cec5SDimitry Andric                                  : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
3990b57cec5SDimitry Andric                                        ? TargetOpcode::G_SEXT
4000b57cec5SDimitry Andric                                        : TargetOpcode::G_ZEXT;
4010b57cec5SDimitry Andric   Preferred = {LLT(), PreferredOpcode, nullptr};
4020b57cec5SDimitry Andric   for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) {
4030b57cec5SDimitry Andric     if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
4040b57cec5SDimitry Andric         UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
4050b57cec5SDimitry Andric         UseMI.getOpcode() == TargetOpcode::G_ANYEXT) {
4060b57cec5SDimitry Andric       Preferred = ChoosePreferredUse(Preferred,
4070b57cec5SDimitry Andric                                      MRI.getType(UseMI.getOperand(0).getReg()),
4080b57cec5SDimitry Andric                                      UseMI.getOpcode(), &UseMI);
4090b57cec5SDimitry Andric     }
4100b57cec5SDimitry Andric   }
4110b57cec5SDimitry Andric 
4120b57cec5SDimitry Andric   // There were no extends
4130b57cec5SDimitry Andric   if (!Preferred.MI)
4140b57cec5SDimitry Andric     return false;
4150b57cec5SDimitry Andric   // It should be impossible to chose an extend without selecting a different
4160b57cec5SDimitry Andric   // type since by definition the result of an extend is larger.
4170b57cec5SDimitry Andric   assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
4180b57cec5SDimitry Andric 
4190b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
4200b57cec5SDimitry Andric   return true;
4210b57cec5SDimitry Andric }
4220b57cec5SDimitry Andric 
4230b57cec5SDimitry Andric void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
4240b57cec5SDimitry Andric                                                 PreferredTuple &Preferred) {
4250b57cec5SDimitry Andric   // Rewrite the load to the chosen extending load.
4260b57cec5SDimitry Andric   Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
4270b57cec5SDimitry Andric 
4280b57cec5SDimitry Andric   // Inserter to insert a truncate back to the original type at a given point
4290b57cec5SDimitry Andric   // with some basic CSE to limit truncate duplication to one per BB.
4300b57cec5SDimitry Andric   DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
4310b57cec5SDimitry Andric   auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
4320b57cec5SDimitry Andric                            MachineBasicBlock::iterator InsertBefore,
4330b57cec5SDimitry Andric                            MachineOperand &UseMO) {
4340b57cec5SDimitry Andric     MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
4350b57cec5SDimitry Andric     if (PreviouslyEmitted) {
4360b57cec5SDimitry Andric       Observer.changingInstr(*UseMO.getParent());
4370b57cec5SDimitry Andric       UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
4380b57cec5SDimitry Andric       Observer.changedInstr(*UseMO.getParent());
4390b57cec5SDimitry Andric       return;
4400b57cec5SDimitry Andric     }
4410b57cec5SDimitry Andric 
4420b57cec5SDimitry Andric     Builder.setInsertPt(*InsertIntoBB, InsertBefore);
4430b57cec5SDimitry Andric     Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
4440b57cec5SDimitry Andric     MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
4450b57cec5SDimitry Andric     EmittedInsns[InsertIntoBB] = NewMI;
4460b57cec5SDimitry Andric     replaceRegOpWith(MRI, UseMO, NewDstReg);
4470b57cec5SDimitry Andric   };
4480b57cec5SDimitry Andric 
4490b57cec5SDimitry Andric   Observer.changingInstr(MI);
4500b57cec5SDimitry Andric   MI.setDesc(
4510b57cec5SDimitry Andric       Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
4520b57cec5SDimitry Andric                                ? TargetOpcode::G_SEXTLOAD
4530b57cec5SDimitry Andric                                : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
4540b57cec5SDimitry Andric                                      ? TargetOpcode::G_ZEXTLOAD
4550b57cec5SDimitry Andric                                      : TargetOpcode::G_LOAD));
4560b57cec5SDimitry Andric 
4570b57cec5SDimitry Andric   // Rewrite all the uses to fix up the types.
4580b57cec5SDimitry Andric   auto &LoadValue = MI.getOperand(0);
4590b57cec5SDimitry Andric   SmallVector<MachineOperand *, 4> Uses;
4600b57cec5SDimitry Andric   for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
4610b57cec5SDimitry Andric     Uses.push_back(&UseMO);
4620b57cec5SDimitry Andric 
4630b57cec5SDimitry Andric   for (auto *UseMO : Uses) {
4640b57cec5SDimitry Andric     MachineInstr *UseMI = UseMO->getParent();
4650b57cec5SDimitry Andric 
4660b57cec5SDimitry Andric     // If the extend is compatible with the preferred extend then we should fix
4670b57cec5SDimitry Andric     // up the type and extend so that it uses the preferred use.
4680b57cec5SDimitry Andric     if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
4690b57cec5SDimitry Andric         UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
470*8bcb0991SDimitry Andric       Register UseDstReg = UseMI->getOperand(0).getReg();
4710b57cec5SDimitry Andric       MachineOperand &UseSrcMO = UseMI->getOperand(1);
4720b57cec5SDimitry Andric       const LLT &UseDstTy = MRI.getType(UseDstReg);
4730b57cec5SDimitry Andric       if (UseDstReg != ChosenDstReg) {
4740b57cec5SDimitry Andric         if (Preferred.Ty == UseDstTy) {
4750b57cec5SDimitry Andric           // If the use has the same type as the preferred use, then merge
4760b57cec5SDimitry Andric           // the vregs and erase the extend. For example:
4770b57cec5SDimitry Andric           //    %1:_(s8) = G_LOAD ...
4780b57cec5SDimitry Andric           //    %2:_(s32) = G_SEXT %1(s8)
4790b57cec5SDimitry Andric           //    %3:_(s32) = G_ANYEXT %1(s8)
4800b57cec5SDimitry Andric           //    ... = ... %3(s32)
4810b57cec5SDimitry Andric           // rewrites to:
4820b57cec5SDimitry Andric           //    %2:_(s32) = G_SEXTLOAD ...
4830b57cec5SDimitry Andric           //    ... = ... %2(s32)
4840b57cec5SDimitry Andric           replaceRegWith(MRI, UseDstReg, ChosenDstReg);
4850b57cec5SDimitry Andric           Observer.erasingInstr(*UseMO->getParent());
4860b57cec5SDimitry Andric           UseMO->getParent()->eraseFromParent();
4870b57cec5SDimitry Andric         } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
4880b57cec5SDimitry Andric           // If the preferred size is smaller, then keep the extend but extend
4890b57cec5SDimitry Andric           // from the result of the extending load. For example:
4900b57cec5SDimitry Andric           //    %1:_(s8) = G_LOAD ...
4910b57cec5SDimitry Andric           //    %2:_(s32) = G_SEXT %1(s8)
4920b57cec5SDimitry Andric           //    %3:_(s64) = G_ANYEXT %1(s8)
4930b57cec5SDimitry Andric           //    ... = ... %3(s64)
4940b57cec5SDimitry Andric           /// rewrites to:
4950b57cec5SDimitry Andric           //    %2:_(s32) = G_SEXTLOAD ...
4960b57cec5SDimitry Andric           //    %3:_(s64) = G_ANYEXT %2:_(s32)
4970b57cec5SDimitry Andric           //    ... = ... %3(s64)
4980b57cec5SDimitry Andric           replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
4990b57cec5SDimitry Andric         } else {
5000b57cec5SDimitry Andric           // If the preferred size is large, then insert a truncate. For
5010b57cec5SDimitry Andric           // example:
5020b57cec5SDimitry Andric           //    %1:_(s8) = G_LOAD ...
5030b57cec5SDimitry Andric           //    %2:_(s64) = G_SEXT %1(s8)
5040b57cec5SDimitry Andric           //    %3:_(s32) = G_ZEXT %1(s8)
5050b57cec5SDimitry Andric           //    ... = ... %3(s32)
5060b57cec5SDimitry Andric           /// rewrites to:
5070b57cec5SDimitry Andric           //    %2:_(s64) = G_SEXTLOAD ...
5080b57cec5SDimitry Andric           //    %4:_(s8) = G_TRUNC %2:_(s32)
5090b57cec5SDimitry Andric           //    %3:_(s64) = G_ZEXT %2:_(s8)
5100b57cec5SDimitry Andric           //    ... = ... %3(s64)
5110b57cec5SDimitry Andric           InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
5120b57cec5SDimitry Andric                                                  InsertTruncAt);
5130b57cec5SDimitry Andric         }
5140b57cec5SDimitry Andric         continue;
5150b57cec5SDimitry Andric       }
5160b57cec5SDimitry Andric       // The use is (one of) the uses of the preferred use we chose earlier.
5170b57cec5SDimitry Andric       // We're going to update the load to def this value later so just erase
5180b57cec5SDimitry Andric       // the old extend.
5190b57cec5SDimitry Andric       Observer.erasingInstr(*UseMO->getParent());
5200b57cec5SDimitry Andric       UseMO->getParent()->eraseFromParent();
5210b57cec5SDimitry Andric       continue;
5220b57cec5SDimitry Andric     }
5230b57cec5SDimitry Andric 
5240b57cec5SDimitry Andric     // The use isn't an extend. Truncate back to the type we originally loaded.
5250b57cec5SDimitry Andric     // This is free on many targets.
5260b57cec5SDimitry Andric     InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
5270b57cec5SDimitry Andric   }
5280b57cec5SDimitry Andric 
5290b57cec5SDimitry Andric   MI.getOperand(0).setReg(ChosenDstReg);
5300b57cec5SDimitry Andric   Observer.changedInstr(MI);
5310b57cec5SDimitry Andric }
5320b57cec5SDimitry Andric 
533*8bcb0991SDimitry Andric bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) {
534*8bcb0991SDimitry Andric   assert(DefMI.getParent() == UseMI.getParent());
535*8bcb0991SDimitry Andric   if (&DefMI == &UseMI)
536*8bcb0991SDimitry Andric     return false;
537*8bcb0991SDimitry Andric 
538*8bcb0991SDimitry Andric   // Loop through the basic block until we find one of the instructions.
539*8bcb0991SDimitry Andric   MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
540*8bcb0991SDimitry Andric   for (; &*I != &DefMI && &*I != &UseMI; ++I)
541*8bcb0991SDimitry Andric     return &*I == &DefMI;
542*8bcb0991SDimitry Andric 
543*8bcb0991SDimitry Andric   llvm_unreachable("Block must contain instructions");
544*8bcb0991SDimitry Andric }
545*8bcb0991SDimitry Andric 
546*8bcb0991SDimitry Andric bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) {
547*8bcb0991SDimitry Andric   if (MDT)
548*8bcb0991SDimitry Andric     return MDT->dominates(&DefMI, &UseMI);
549*8bcb0991SDimitry Andric   else if (DefMI.getParent() != UseMI.getParent())
550*8bcb0991SDimitry Andric     return false;
551*8bcb0991SDimitry Andric 
552*8bcb0991SDimitry Andric   return isPredecessor(DefMI, UseMI);
553*8bcb0991SDimitry Andric }
554*8bcb0991SDimitry Andric 
555*8bcb0991SDimitry Andric bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
556*8bcb0991SDimitry Andric                                             Register &Base, Register &Offset) {
557*8bcb0991SDimitry Andric   auto &MF = *MI.getParent()->getParent();
558*8bcb0991SDimitry Andric   const auto &TLI = *MF.getSubtarget().getTargetLowering();
559*8bcb0991SDimitry Andric 
560*8bcb0991SDimitry Andric #ifndef NDEBUG
561*8bcb0991SDimitry Andric   unsigned Opcode = MI.getOpcode();
562*8bcb0991SDimitry Andric   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
563*8bcb0991SDimitry Andric          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
564*8bcb0991SDimitry Andric #endif
565*8bcb0991SDimitry Andric 
566*8bcb0991SDimitry Andric   Base = MI.getOperand(1).getReg();
567*8bcb0991SDimitry Andric   MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
568*8bcb0991SDimitry Andric   if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
569*8bcb0991SDimitry Andric     return false;
570*8bcb0991SDimitry Andric 
571*8bcb0991SDimitry Andric   LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
572*8bcb0991SDimitry Andric 
573*8bcb0991SDimitry Andric   for (auto &Use : MRI.use_instructions(Base)) {
574*8bcb0991SDimitry Andric     if (Use.getOpcode() != TargetOpcode::G_GEP)
575*8bcb0991SDimitry Andric       continue;
576*8bcb0991SDimitry Andric 
577*8bcb0991SDimitry Andric     Offset = Use.getOperand(2).getReg();
578*8bcb0991SDimitry Andric     if (!ForceLegalIndexing &&
579*8bcb0991SDimitry Andric         !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
580*8bcb0991SDimitry Andric       LLVM_DEBUG(dbgs() << "    Ignoring candidate with illegal addrmode: "
581*8bcb0991SDimitry Andric                         << Use);
582*8bcb0991SDimitry Andric       continue;
583*8bcb0991SDimitry Andric     }
584*8bcb0991SDimitry Andric 
585*8bcb0991SDimitry Andric     // Make sure the offset calculation is before the potentially indexed op.
586*8bcb0991SDimitry Andric     // FIXME: we really care about dependency here. The offset calculation might
587*8bcb0991SDimitry Andric     // be movable.
588*8bcb0991SDimitry Andric     MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
589*8bcb0991SDimitry Andric     if (!OffsetDef || !dominates(*OffsetDef, MI)) {
590*8bcb0991SDimitry Andric       LLVM_DEBUG(dbgs() << "    Ignoring candidate with offset after mem-op: "
591*8bcb0991SDimitry Andric                         << Use);
592*8bcb0991SDimitry Andric       continue;
593*8bcb0991SDimitry Andric     }
594*8bcb0991SDimitry Andric 
595*8bcb0991SDimitry Andric     // FIXME: check whether all uses of Base are load/store with foldable
596*8bcb0991SDimitry Andric     // addressing modes. If so, using the normal addr-modes is better than
597*8bcb0991SDimitry Andric     // forming an indexed one.
598*8bcb0991SDimitry Andric 
599*8bcb0991SDimitry Andric     bool MemOpDominatesAddrUses = true;
600*8bcb0991SDimitry Andric     for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) {
601*8bcb0991SDimitry Andric       if (!dominates(MI, GEPUse)) {
602*8bcb0991SDimitry Andric         MemOpDominatesAddrUses = false;
603*8bcb0991SDimitry Andric         break;
604*8bcb0991SDimitry Andric       }
605*8bcb0991SDimitry Andric     }
606*8bcb0991SDimitry Andric 
607*8bcb0991SDimitry Andric     if (!MemOpDominatesAddrUses) {
608*8bcb0991SDimitry Andric       LLVM_DEBUG(
609*8bcb0991SDimitry Andric           dbgs() << "    Ignoring candidate as memop does not dominate uses: "
610*8bcb0991SDimitry Andric                  << Use);
611*8bcb0991SDimitry Andric       continue;
612*8bcb0991SDimitry Andric     }
613*8bcb0991SDimitry Andric 
614*8bcb0991SDimitry Andric     LLVM_DEBUG(dbgs() << "    Found match: " << Use);
615*8bcb0991SDimitry Andric     Addr = Use.getOperand(0).getReg();
616*8bcb0991SDimitry Andric     return true;
617*8bcb0991SDimitry Andric   }
618*8bcb0991SDimitry Andric 
619*8bcb0991SDimitry Andric   return false;
620*8bcb0991SDimitry Andric }
621*8bcb0991SDimitry Andric 
622*8bcb0991SDimitry Andric bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
623*8bcb0991SDimitry Andric                                            Register &Base, Register &Offset) {
624*8bcb0991SDimitry Andric   auto &MF = *MI.getParent()->getParent();
625*8bcb0991SDimitry Andric   const auto &TLI = *MF.getSubtarget().getTargetLowering();
626*8bcb0991SDimitry Andric 
627*8bcb0991SDimitry Andric #ifndef NDEBUG
628*8bcb0991SDimitry Andric   unsigned Opcode = MI.getOpcode();
629*8bcb0991SDimitry Andric   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
630*8bcb0991SDimitry Andric          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
631*8bcb0991SDimitry Andric #endif
632*8bcb0991SDimitry Andric 
633*8bcb0991SDimitry Andric   Addr = MI.getOperand(1).getReg();
634*8bcb0991SDimitry Andric   MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI);
635*8bcb0991SDimitry Andric   if (!AddrDef || MRI.hasOneUse(Addr))
636*8bcb0991SDimitry Andric     return false;
637*8bcb0991SDimitry Andric 
638*8bcb0991SDimitry Andric   Base = AddrDef->getOperand(1).getReg();
639*8bcb0991SDimitry Andric   Offset = AddrDef->getOperand(2).getReg();
640*8bcb0991SDimitry Andric 
641*8bcb0991SDimitry Andric   LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
642*8bcb0991SDimitry Andric 
643*8bcb0991SDimitry Andric   if (!ForceLegalIndexing &&
644*8bcb0991SDimitry Andric       !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
645*8bcb0991SDimitry Andric     LLVM_DEBUG(dbgs() << "    Skipping, not legal for target");
646*8bcb0991SDimitry Andric     return false;
647*8bcb0991SDimitry Andric   }
648*8bcb0991SDimitry Andric 
649*8bcb0991SDimitry Andric   MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
650*8bcb0991SDimitry Andric   if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
651*8bcb0991SDimitry Andric     LLVM_DEBUG(dbgs() << "    Skipping, frame index would need copy anyway.");
652*8bcb0991SDimitry Andric     return false;
653*8bcb0991SDimitry Andric   }
654*8bcb0991SDimitry Andric 
655*8bcb0991SDimitry Andric   if (MI.getOpcode() == TargetOpcode::G_STORE) {
656*8bcb0991SDimitry Andric     // Would require a copy.
657*8bcb0991SDimitry Andric     if (Base == MI.getOperand(0).getReg()) {
658*8bcb0991SDimitry Andric       LLVM_DEBUG(dbgs() << "    Skipping, storing base so need copy anyway.");
659*8bcb0991SDimitry Andric       return false;
660*8bcb0991SDimitry Andric     }
661*8bcb0991SDimitry Andric 
662*8bcb0991SDimitry Andric     // We're expecting one use of Addr in MI, but it could also be the
663*8bcb0991SDimitry Andric     // value stored, which isn't actually dominated by the instruction.
664*8bcb0991SDimitry Andric     if (MI.getOperand(0).getReg() == Addr) {
665*8bcb0991SDimitry Andric       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses");
666*8bcb0991SDimitry Andric       return false;
667*8bcb0991SDimitry Andric     }
668*8bcb0991SDimitry Andric   }
669*8bcb0991SDimitry Andric 
670*8bcb0991SDimitry Andric   // FIXME: check whether all uses of the base pointer are constant GEPs. That
671*8bcb0991SDimitry Andric   // might allow us to end base's liveness here by adjusting the constant.
672*8bcb0991SDimitry Andric 
673*8bcb0991SDimitry Andric   for (auto &UseMI : MRI.use_instructions(Addr)) {
674*8bcb0991SDimitry Andric     if (!dominates(MI, UseMI)) {
675*8bcb0991SDimitry Andric       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses.");
676*8bcb0991SDimitry Andric       return false;
677*8bcb0991SDimitry Andric     }
678*8bcb0991SDimitry Andric   }
679*8bcb0991SDimitry Andric 
680*8bcb0991SDimitry Andric   return true;
681*8bcb0991SDimitry Andric }
682*8bcb0991SDimitry Andric 
683*8bcb0991SDimitry Andric bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
684*8bcb0991SDimitry Andric   unsigned Opcode = MI.getOpcode();
685*8bcb0991SDimitry Andric   if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
686*8bcb0991SDimitry Andric       Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
687*8bcb0991SDimitry Andric     return false;
688*8bcb0991SDimitry Andric 
689*8bcb0991SDimitry Andric   bool IsStore = Opcode == TargetOpcode::G_STORE;
690*8bcb0991SDimitry Andric   Register Addr, Base, Offset;
691*8bcb0991SDimitry Andric   bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset);
692*8bcb0991SDimitry Andric   if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset))
693*8bcb0991SDimitry Andric     return false;
694*8bcb0991SDimitry Andric 
695*8bcb0991SDimitry Andric 
696*8bcb0991SDimitry Andric   unsigned NewOpcode;
697*8bcb0991SDimitry Andric   switch (Opcode) {
698*8bcb0991SDimitry Andric   case TargetOpcode::G_LOAD:
699*8bcb0991SDimitry Andric     NewOpcode = TargetOpcode::G_INDEXED_LOAD;
700*8bcb0991SDimitry Andric     break;
701*8bcb0991SDimitry Andric   case TargetOpcode::G_SEXTLOAD:
702*8bcb0991SDimitry Andric     NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
703*8bcb0991SDimitry Andric     break;
704*8bcb0991SDimitry Andric   case TargetOpcode::G_ZEXTLOAD:
705*8bcb0991SDimitry Andric     NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
706*8bcb0991SDimitry Andric     break;
707*8bcb0991SDimitry Andric   case TargetOpcode::G_STORE:
708*8bcb0991SDimitry Andric     NewOpcode = TargetOpcode::G_INDEXED_STORE;
709*8bcb0991SDimitry Andric     break;
710*8bcb0991SDimitry Andric   default:
711*8bcb0991SDimitry Andric     llvm_unreachable("Unknown load/store opcode");
712*8bcb0991SDimitry Andric   }
713*8bcb0991SDimitry Andric 
714*8bcb0991SDimitry Andric   MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr);
715*8bcb0991SDimitry Andric   MachineIRBuilder MIRBuilder(MI);
716*8bcb0991SDimitry Andric   auto MIB = MIRBuilder.buildInstr(NewOpcode);
717*8bcb0991SDimitry Andric   if (IsStore) {
718*8bcb0991SDimitry Andric     MIB.addDef(Addr);
719*8bcb0991SDimitry Andric     MIB.addUse(MI.getOperand(0).getReg());
720*8bcb0991SDimitry Andric   } else {
721*8bcb0991SDimitry Andric     MIB.addDef(MI.getOperand(0).getReg());
722*8bcb0991SDimitry Andric     MIB.addDef(Addr);
723*8bcb0991SDimitry Andric   }
724*8bcb0991SDimitry Andric 
725*8bcb0991SDimitry Andric   MIB.addUse(Base);
726*8bcb0991SDimitry Andric   MIB.addUse(Offset);
727*8bcb0991SDimitry Andric   MIB.addImm(IsPre);
728*8bcb0991SDimitry Andric   MI.eraseFromParent();
729*8bcb0991SDimitry Andric   AddrDef.eraseFromParent();
730*8bcb0991SDimitry Andric 
731*8bcb0991SDimitry Andric   LLVM_DEBUG(dbgs() << "    Combinined to indexed operation");
732*8bcb0991SDimitry Andric   return true;
733*8bcb0991SDimitry Andric }
734*8bcb0991SDimitry Andric 
735*8bcb0991SDimitry Andric bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
736*8bcb0991SDimitry Andric   if (MI.getOpcode() != TargetOpcode::G_BR)
737*8bcb0991SDimitry Andric     return false;
738*8bcb0991SDimitry Andric 
7390b57cec5SDimitry Andric   // Try to match the following:
7400b57cec5SDimitry Andric   // bb1:
7410b57cec5SDimitry Andric   //   %c(s32) = G_ICMP pred, %a, %b
7420b57cec5SDimitry Andric   //   %c1(s1) = G_TRUNC %c(s32)
7430b57cec5SDimitry Andric   //   G_BRCOND %c1, %bb2
7440b57cec5SDimitry Andric   //   G_BR %bb3
7450b57cec5SDimitry Andric   // bb2:
7460b57cec5SDimitry Andric   // ...
7470b57cec5SDimitry Andric   // bb3:
7480b57cec5SDimitry Andric 
7490b57cec5SDimitry Andric   // The above pattern does not have a fall through to the successor bb2, always
7500b57cec5SDimitry Andric   // resulting in a branch no matter which path is taken. Here we try to find
7510b57cec5SDimitry Andric   // and replace that pattern with conditional branch to bb3 and otherwise
7520b57cec5SDimitry Andric   // fallthrough to bb2.
7530b57cec5SDimitry Andric 
7540b57cec5SDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
7550b57cec5SDimitry Andric   MachineBasicBlock::iterator BrIt(MI);
7560b57cec5SDimitry Andric   if (BrIt == MBB->begin())
7570b57cec5SDimitry Andric     return false;
7580b57cec5SDimitry Andric   assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
7590b57cec5SDimitry Andric 
7600b57cec5SDimitry Andric   MachineInstr *BrCond = &*std::prev(BrIt);
7610b57cec5SDimitry Andric   if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
7620b57cec5SDimitry Andric     return false;
7630b57cec5SDimitry Andric 
7640b57cec5SDimitry Andric   // Check that the next block is the conditional branch target.
7650b57cec5SDimitry Andric   if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
7660b57cec5SDimitry Andric     return false;
7670b57cec5SDimitry Andric 
7680b57cec5SDimitry Andric   MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
7690b57cec5SDimitry Andric   if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
7700b57cec5SDimitry Andric       !MRI.hasOneUse(CmpMI->getOperand(0).getReg()))
7710b57cec5SDimitry Andric     return false;
7720b57cec5SDimitry Andric   return true;
7730b57cec5SDimitry Andric }
7740b57cec5SDimitry Andric 
775*8bcb0991SDimitry Andric bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) {
776*8bcb0991SDimitry Andric   if (!matchElideBrByInvertingCond(MI))
7770b57cec5SDimitry Andric     return false;
778*8bcb0991SDimitry Andric   applyElideBrByInvertingCond(MI);
779*8bcb0991SDimitry Andric   return true;
780*8bcb0991SDimitry Andric }
781*8bcb0991SDimitry Andric 
782*8bcb0991SDimitry Andric void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) {
7830b57cec5SDimitry Andric   MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
7840b57cec5SDimitry Andric   MachineBasicBlock::iterator BrIt(MI);
7850b57cec5SDimitry Andric   MachineInstr *BrCond = &*std::prev(BrIt);
7860b57cec5SDimitry Andric   MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
7870b57cec5SDimitry Andric 
7880b57cec5SDimitry Andric   CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
7890b57cec5SDimitry Andric       (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
7900b57cec5SDimitry Andric 
7910b57cec5SDimitry Andric   // Invert the G_ICMP condition.
7920b57cec5SDimitry Andric   Observer.changingInstr(*CmpMI);
7930b57cec5SDimitry Andric   CmpMI->getOperand(1).setPredicate(InversePred);
7940b57cec5SDimitry Andric   Observer.changedInstr(*CmpMI);
7950b57cec5SDimitry Andric 
7960b57cec5SDimitry Andric   // Change the conditional branch target.
7970b57cec5SDimitry Andric   Observer.changingInstr(*BrCond);
7980b57cec5SDimitry Andric   BrCond->getOperand(1).setMBB(BrTarget);
7990b57cec5SDimitry Andric   Observer.changedInstr(*BrCond);
8000b57cec5SDimitry Andric   MI.eraseFromParent();
801*8bcb0991SDimitry Andric }
802*8bcb0991SDimitry Andric 
803*8bcb0991SDimitry Andric static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
804*8bcb0991SDimitry Andric   // On Darwin, -Os means optimize for size without hurting performance, so
805*8bcb0991SDimitry Andric   // only really optimize for size when -Oz (MinSize) is used.
806*8bcb0991SDimitry Andric   if (MF.getTarget().getTargetTriple().isOSDarwin())
807*8bcb0991SDimitry Andric     return MF.getFunction().hasMinSize();
808*8bcb0991SDimitry Andric   return MF.getFunction().hasOptSize();
809*8bcb0991SDimitry Andric }
810*8bcb0991SDimitry Andric 
811*8bcb0991SDimitry Andric // Returns a list of types to use for memory op lowering in MemOps. A partial
812*8bcb0991SDimitry Andric // port of findOptimalMemOpLowering in TargetLowering.
813*8bcb0991SDimitry Andric static bool findGISelOptimalMemOpLowering(
814*8bcb0991SDimitry Andric     std::vector<LLT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign,
815*8bcb0991SDimitry Andric     unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
816*8bcb0991SDimitry Andric     bool AllowOverlap, unsigned DstAS, unsigned SrcAS,
817*8bcb0991SDimitry Andric     const AttributeList &FuncAttributes, const TargetLowering &TLI) {
818*8bcb0991SDimitry Andric   // If 'SrcAlign' is zero, that means the memory operation does not need to
819*8bcb0991SDimitry Andric   // load the value, i.e. memset or memcpy from constant string. Otherwise,
820*8bcb0991SDimitry Andric   // it's the inferred alignment of the source. 'DstAlign', on the other hand,
821*8bcb0991SDimitry Andric   // is the specified alignment of the memory operation. If it is zero, that
822*8bcb0991SDimitry Andric   // means it's possible to change the alignment of the destination.
823*8bcb0991SDimitry Andric   // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
824*8bcb0991SDimitry Andric   // not need to be loaded.
825*8bcb0991SDimitry Andric   if (SrcAlign != 0 && SrcAlign < DstAlign)
826*8bcb0991SDimitry Andric     return false;
827*8bcb0991SDimitry Andric 
828*8bcb0991SDimitry Andric   LLT Ty = TLI.getOptimalMemOpLLT(Size, DstAlign, SrcAlign, IsMemset,
829*8bcb0991SDimitry Andric                                   ZeroMemset, MemcpyStrSrc, FuncAttributes);
830*8bcb0991SDimitry Andric 
831*8bcb0991SDimitry Andric   if (Ty == LLT()) {
832*8bcb0991SDimitry Andric     // Use the largest scalar type whose alignment constraints are satisfied.
833*8bcb0991SDimitry Andric     // We only need to check DstAlign here as SrcAlign is always greater or
834*8bcb0991SDimitry Andric     // equal to DstAlign (or zero).
835*8bcb0991SDimitry Andric     Ty = LLT::scalar(64);
836*8bcb0991SDimitry Andric     while (DstAlign && DstAlign < Ty.getSizeInBytes() &&
837*8bcb0991SDimitry Andric            !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, DstAlign))
838*8bcb0991SDimitry Andric       Ty = LLT::scalar(Ty.getSizeInBytes());
839*8bcb0991SDimitry Andric     assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
840*8bcb0991SDimitry Andric     // FIXME: check for the largest legal type we can load/store to.
841*8bcb0991SDimitry Andric   }
842*8bcb0991SDimitry Andric 
843*8bcb0991SDimitry Andric   unsigned NumMemOps = 0;
844*8bcb0991SDimitry Andric   while (Size != 0) {
845*8bcb0991SDimitry Andric     unsigned TySize = Ty.getSizeInBytes();
846*8bcb0991SDimitry Andric     while (TySize > Size) {
847*8bcb0991SDimitry Andric       // For now, only use non-vector load / store's for the left-over pieces.
848*8bcb0991SDimitry Andric       LLT NewTy = Ty;
849*8bcb0991SDimitry Andric       // FIXME: check for mem op safety and legality of the types. Not all of
850*8bcb0991SDimitry Andric       // SDAGisms map cleanly to GISel concepts.
851*8bcb0991SDimitry Andric       if (NewTy.isVector())
852*8bcb0991SDimitry Andric         NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
853*8bcb0991SDimitry Andric       NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
854*8bcb0991SDimitry Andric       unsigned NewTySize = NewTy.getSizeInBytes();
855*8bcb0991SDimitry Andric       assert(NewTySize > 0 && "Could not find appropriate type");
856*8bcb0991SDimitry Andric 
857*8bcb0991SDimitry Andric       // If the new LLT cannot cover all of the remaining bits, then consider
858*8bcb0991SDimitry Andric       // issuing a (or a pair of) unaligned and overlapping load / store.
859*8bcb0991SDimitry Andric       bool Fast;
860*8bcb0991SDimitry Andric       // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
861*8bcb0991SDimitry Andric       MVT VT = getMVTForLLT(Ty);
862*8bcb0991SDimitry Andric       if (NumMemOps && AllowOverlap && NewTySize < Size &&
863*8bcb0991SDimitry Andric           TLI.allowsMisalignedMemoryAccesses(
864*8bcb0991SDimitry Andric               VT, DstAS, DstAlign, MachineMemOperand::MONone, &Fast) &&
865*8bcb0991SDimitry Andric           Fast)
866*8bcb0991SDimitry Andric         TySize = Size;
867*8bcb0991SDimitry Andric       else {
868*8bcb0991SDimitry Andric         Ty = NewTy;
869*8bcb0991SDimitry Andric         TySize = NewTySize;
870*8bcb0991SDimitry Andric       }
871*8bcb0991SDimitry Andric     }
872*8bcb0991SDimitry Andric 
873*8bcb0991SDimitry Andric     if (++NumMemOps > Limit)
874*8bcb0991SDimitry Andric       return false;
875*8bcb0991SDimitry Andric 
876*8bcb0991SDimitry Andric     MemOps.push_back(Ty);
877*8bcb0991SDimitry Andric     Size -= TySize;
878*8bcb0991SDimitry Andric   }
879*8bcb0991SDimitry Andric 
8800b57cec5SDimitry Andric   return true;
8810b57cec5SDimitry Andric }
8820b57cec5SDimitry Andric 
883*8bcb0991SDimitry Andric static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
884*8bcb0991SDimitry Andric   if (Ty.isVector())
885*8bcb0991SDimitry Andric     return VectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
886*8bcb0991SDimitry Andric                            Ty.getNumElements());
887*8bcb0991SDimitry Andric   return IntegerType::get(C, Ty.getSizeInBits());
888*8bcb0991SDimitry Andric }
889*8bcb0991SDimitry Andric 
890*8bcb0991SDimitry Andric // Get a vectorized representation of the memset value operand, GISel edition.
891*8bcb0991SDimitry Andric static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
892*8bcb0991SDimitry Andric   MachineRegisterInfo &MRI = *MIB.getMRI();
893*8bcb0991SDimitry Andric   unsigned NumBits = Ty.getScalarSizeInBits();
894*8bcb0991SDimitry Andric   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
895*8bcb0991SDimitry Andric   if (!Ty.isVector() && ValVRegAndVal) {
896*8bcb0991SDimitry Andric     unsigned KnownVal = ValVRegAndVal->Value;
897*8bcb0991SDimitry Andric     APInt Scalar = APInt(8, KnownVal);
898*8bcb0991SDimitry Andric     APInt SplatVal = APInt::getSplat(NumBits, Scalar);
899*8bcb0991SDimitry Andric     return MIB.buildConstant(Ty, SplatVal).getReg(0);
900*8bcb0991SDimitry Andric   }
901*8bcb0991SDimitry Andric   // FIXME: for vector types create a G_BUILD_VECTOR.
902*8bcb0991SDimitry Andric   if (Ty.isVector())
903*8bcb0991SDimitry Andric     return Register();
904*8bcb0991SDimitry Andric 
905*8bcb0991SDimitry Andric   // Extend the byte value to the larger type, and then multiply by a magic
906*8bcb0991SDimitry Andric   // value 0x010101... in order to replicate it across every byte.
907*8bcb0991SDimitry Andric   LLT ExtType = Ty.getScalarType();
908*8bcb0991SDimitry Andric   auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
909*8bcb0991SDimitry Andric   if (NumBits > 8) {
910*8bcb0991SDimitry Andric     APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
911*8bcb0991SDimitry Andric     auto MagicMI = MIB.buildConstant(ExtType, Magic);
912*8bcb0991SDimitry Andric     Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
913*8bcb0991SDimitry Andric   }
914*8bcb0991SDimitry Andric 
915*8bcb0991SDimitry Andric   assert(ExtType == Ty && "Vector memset value type not supported yet");
916*8bcb0991SDimitry Andric   return Val;
917*8bcb0991SDimitry Andric }
918*8bcb0991SDimitry Andric 
919*8bcb0991SDimitry Andric bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
920*8bcb0991SDimitry Andric                                     unsigned KnownLen, unsigned Align,
921*8bcb0991SDimitry Andric                                     bool IsVolatile) {
922*8bcb0991SDimitry Andric   auto &MF = *MI.getParent()->getParent();
923*8bcb0991SDimitry Andric   const auto &TLI = *MF.getSubtarget().getTargetLowering();
924*8bcb0991SDimitry Andric   auto &DL = MF.getDataLayout();
925*8bcb0991SDimitry Andric   LLVMContext &C = MF.getFunction().getContext();
926*8bcb0991SDimitry Andric 
927*8bcb0991SDimitry Andric   assert(KnownLen != 0 && "Have a zero length memset length!");
928*8bcb0991SDimitry Andric 
929*8bcb0991SDimitry Andric   bool DstAlignCanChange = false;
930*8bcb0991SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
931*8bcb0991SDimitry Andric   bool OptSize = shouldLowerMemFuncForSize(MF);
932*8bcb0991SDimitry Andric 
933*8bcb0991SDimitry Andric   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
934*8bcb0991SDimitry Andric   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
935*8bcb0991SDimitry Andric     DstAlignCanChange = true;
936*8bcb0991SDimitry Andric 
937*8bcb0991SDimitry Andric   unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
938*8bcb0991SDimitry Andric   std::vector<LLT> MemOps;
939*8bcb0991SDimitry Andric 
940*8bcb0991SDimitry Andric   const auto &DstMMO = **MI.memoperands_begin();
941*8bcb0991SDimitry Andric   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
942*8bcb0991SDimitry Andric 
943*8bcb0991SDimitry Andric   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
944*8bcb0991SDimitry Andric   bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
945*8bcb0991SDimitry Andric 
946*8bcb0991SDimitry Andric   if (!findGISelOptimalMemOpLowering(
947*8bcb0991SDimitry Andric           MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), 0,
948*8bcb0991SDimitry Andric           /*IsMemset=*/true,
949*8bcb0991SDimitry Andric           /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
950*8bcb0991SDimitry Andric           /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), ~0u,
951*8bcb0991SDimitry Andric           MF.getFunction().getAttributes(), TLI))
952*8bcb0991SDimitry Andric     return false;
953*8bcb0991SDimitry Andric 
954*8bcb0991SDimitry Andric   if (DstAlignCanChange) {
955*8bcb0991SDimitry Andric     // Get an estimate of the type from the LLT.
956*8bcb0991SDimitry Andric     Type *IRTy = getTypeForLLT(MemOps[0], C);
957*8bcb0991SDimitry Andric     unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
958*8bcb0991SDimitry Andric     if (NewAlign > Align) {
959*8bcb0991SDimitry Andric       Align = NewAlign;
960*8bcb0991SDimitry Andric       unsigned FI = FIDef->getOperand(1).getIndex();
961*8bcb0991SDimitry Andric       // Give the stack frame object a larger alignment if needed.
962*8bcb0991SDimitry Andric       if (MFI.getObjectAlignment(FI) < Align)
963*8bcb0991SDimitry Andric         MFI.setObjectAlignment(FI, Align);
964*8bcb0991SDimitry Andric     }
965*8bcb0991SDimitry Andric   }
966*8bcb0991SDimitry Andric 
967*8bcb0991SDimitry Andric   MachineIRBuilder MIB(MI);
968*8bcb0991SDimitry Andric   // Find the largest store and generate the bit pattern for it.
969*8bcb0991SDimitry Andric   LLT LargestTy = MemOps[0];
970*8bcb0991SDimitry Andric   for (unsigned i = 1; i < MemOps.size(); i++)
971*8bcb0991SDimitry Andric     if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
972*8bcb0991SDimitry Andric       LargestTy = MemOps[i];
973*8bcb0991SDimitry Andric 
974*8bcb0991SDimitry Andric   // The memset stored value is always defined as an s8, so in order to make it
975*8bcb0991SDimitry Andric   // work with larger store types we need to repeat the bit pattern across the
976*8bcb0991SDimitry Andric   // wider type.
977*8bcb0991SDimitry Andric   Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
978*8bcb0991SDimitry Andric 
979*8bcb0991SDimitry Andric   if (!MemSetValue)
980*8bcb0991SDimitry Andric     return false;
981*8bcb0991SDimitry Andric 
982*8bcb0991SDimitry Andric   // Generate the stores. For each store type in the list, we generate the
983*8bcb0991SDimitry Andric   // matching store of that type to the destination address.
984*8bcb0991SDimitry Andric   LLT PtrTy = MRI.getType(Dst);
985*8bcb0991SDimitry Andric   unsigned DstOff = 0;
986*8bcb0991SDimitry Andric   unsigned Size = KnownLen;
987*8bcb0991SDimitry Andric   for (unsigned I = 0; I < MemOps.size(); I++) {
988*8bcb0991SDimitry Andric     LLT Ty = MemOps[I];
989*8bcb0991SDimitry Andric     unsigned TySize = Ty.getSizeInBytes();
990*8bcb0991SDimitry Andric     if (TySize > Size) {
991*8bcb0991SDimitry Andric       // Issuing an unaligned load / store pair that overlaps with the previous
992*8bcb0991SDimitry Andric       // pair. Adjust the offset accordingly.
993*8bcb0991SDimitry Andric       assert(I == MemOps.size() - 1 && I != 0);
994*8bcb0991SDimitry Andric       DstOff -= TySize - Size;
995*8bcb0991SDimitry Andric     }
996*8bcb0991SDimitry Andric 
997*8bcb0991SDimitry Andric     // If this store is smaller than the largest store see whether we can get
998*8bcb0991SDimitry Andric     // the smaller value for free with a truncate.
999*8bcb0991SDimitry Andric     Register Value = MemSetValue;
1000*8bcb0991SDimitry Andric     if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
1001*8bcb0991SDimitry Andric       MVT VT = getMVTForLLT(Ty);
1002*8bcb0991SDimitry Andric       MVT LargestVT = getMVTForLLT(LargestTy);
1003*8bcb0991SDimitry Andric       if (!LargestTy.isVector() && !Ty.isVector() &&
1004*8bcb0991SDimitry Andric           TLI.isTruncateFree(LargestVT, VT))
1005*8bcb0991SDimitry Andric         Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
1006*8bcb0991SDimitry Andric       else
1007*8bcb0991SDimitry Andric         Value = getMemsetValue(Val, Ty, MIB);
1008*8bcb0991SDimitry Andric       if (!Value)
1009*8bcb0991SDimitry Andric         return false;
1010*8bcb0991SDimitry Andric     }
1011*8bcb0991SDimitry Andric 
1012*8bcb0991SDimitry Andric     auto *StoreMMO =
1013*8bcb0991SDimitry Andric         MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
1014*8bcb0991SDimitry Andric 
1015*8bcb0991SDimitry Andric     Register Ptr = Dst;
1016*8bcb0991SDimitry Andric     if (DstOff != 0) {
1017*8bcb0991SDimitry Andric       auto Offset =
1018*8bcb0991SDimitry Andric           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
1019*8bcb0991SDimitry Andric       Ptr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
1020*8bcb0991SDimitry Andric     }
1021*8bcb0991SDimitry Andric 
1022*8bcb0991SDimitry Andric     MIB.buildStore(Value, Ptr, *StoreMMO);
1023*8bcb0991SDimitry Andric     DstOff += Ty.getSizeInBytes();
1024*8bcb0991SDimitry Andric     Size -= TySize;
1025*8bcb0991SDimitry Andric   }
1026*8bcb0991SDimitry Andric 
1027*8bcb0991SDimitry Andric   MI.eraseFromParent();
1028*8bcb0991SDimitry Andric   return true;
1029*8bcb0991SDimitry Andric }
1030*8bcb0991SDimitry Andric 
1031*8bcb0991SDimitry Andric 
1032*8bcb0991SDimitry Andric bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
1033*8bcb0991SDimitry Andric                                     Register Src, unsigned KnownLen,
1034*8bcb0991SDimitry Andric                                     unsigned DstAlign, unsigned SrcAlign,
1035*8bcb0991SDimitry Andric                                     bool IsVolatile) {
1036*8bcb0991SDimitry Andric   auto &MF = *MI.getParent()->getParent();
1037*8bcb0991SDimitry Andric   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1038*8bcb0991SDimitry Andric   auto &DL = MF.getDataLayout();
1039*8bcb0991SDimitry Andric   LLVMContext &C = MF.getFunction().getContext();
1040*8bcb0991SDimitry Andric 
1041*8bcb0991SDimitry Andric   assert(KnownLen != 0 && "Have a zero length memcpy length!");
1042*8bcb0991SDimitry Andric 
1043*8bcb0991SDimitry Andric   bool DstAlignCanChange = false;
1044*8bcb0991SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
1045*8bcb0991SDimitry Andric   bool OptSize = shouldLowerMemFuncForSize(MF);
1046*8bcb0991SDimitry Andric   unsigned Alignment = MinAlign(DstAlign, SrcAlign);
1047*8bcb0991SDimitry Andric 
1048*8bcb0991SDimitry Andric   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1049*8bcb0991SDimitry Andric   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1050*8bcb0991SDimitry Andric     DstAlignCanChange = true;
1051*8bcb0991SDimitry Andric 
1052*8bcb0991SDimitry Andric   // FIXME: infer better src pointer alignment like SelectionDAG does here.
1053*8bcb0991SDimitry Andric   // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
1054*8bcb0991SDimitry Andric   // if the memcpy is in a tail call position.
1055*8bcb0991SDimitry Andric 
1056*8bcb0991SDimitry Andric   unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
1057*8bcb0991SDimitry Andric   std::vector<LLT> MemOps;
1058*8bcb0991SDimitry Andric 
1059*8bcb0991SDimitry Andric   const auto &DstMMO = **MI.memoperands_begin();
1060*8bcb0991SDimitry Andric   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1061*8bcb0991SDimitry Andric   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1062*8bcb0991SDimitry Andric   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1063*8bcb0991SDimitry Andric 
1064*8bcb0991SDimitry Andric   if (!findGISelOptimalMemOpLowering(
1065*8bcb0991SDimitry Andric           MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
1066*8bcb0991SDimitry Andric           SrcAlign,
1067*8bcb0991SDimitry Andric           /*IsMemset=*/false,
1068*8bcb0991SDimitry Andric           /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
1069*8bcb0991SDimitry Andric           /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(),
1070*8bcb0991SDimitry Andric           SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
1071*8bcb0991SDimitry Andric     return false;
1072*8bcb0991SDimitry Andric 
1073*8bcb0991SDimitry Andric   if (DstAlignCanChange) {
1074*8bcb0991SDimitry Andric     // Get an estimate of the type from the LLT.
1075*8bcb0991SDimitry Andric     Type *IRTy = getTypeForLLT(MemOps[0], C);
1076*8bcb0991SDimitry Andric     unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
1077*8bcb0991SDimitry Andric 
1078*8bcb0991SDimitry Andric     // Don't promote to an alignment that would require dynamic stack
1079*8bcb0991SDimitry Andric     // realignment.
1080*8bcb0991SDimitry Andric     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1081*8bcb0991SDimitry Andric     if (!TRI->needsStackRealignment(MF))
1082*8bcb0991SDimitry Andric       while (NewAlign > Alignment &&
1083*8bcb0991SDimitry Andric              DL.exceedsNaturalStackAlignment(Align(NewAlign)))
1084*8bcb0991SDimitry Andric         NewAlign /= 2;
1085*8bcb0991SDimitry Andric 
1086*8bcb0991SDimitry Andric     if (NewAlign > Alignment) {
1087*8bcb0991SDimitry Andric       Alignment = NewAlign;
1088*8bcb0991SDimitry Andric       unsigned FI = FIDef->getOperand(1).getIndex();
1089*8bcb0991SDimitry Andric       // Give the stack frame object a larger alignment if needed.
1090*8bcb0991SDimitry Andric       if (MFI.getObjectAlignment(FI) < Alignment)
1091*8bcb0991SDimitry Andric         MFI.setObjectAlignment(FI, Alignment);
1092*8bcb0991SDimitry Andric     }
1093*8bcb0991SDimitry Andric   }
1094*8bcb0991SDimitry Andric 
1095*8bcb0991SDimitry Andric   LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
1096*8bcb0991SDimitry Andric 
1097*8bcb0991SDimitry Andric   MachineIRBuilder MIB(MI);
1098*8bcb0991SDimitry Andric   // Now we need to emit a pair of load and stores for each of the types we've
1099*8bcb0991SDimitry Andric   // collected. I.e. for each type, generate a load from the source pointer of
1100*8bcb0991SDimitry Andric   // that type width, and then generate a corresponding store to the dest buffer
1101*8bcb0991SDimitry Andric   // of that value loaded. This can result in a sequence of loads and stores
1102*8bcb0991SDimitry Andric   // mixed types, depending on what the target specifies as good types to use.
1103*8bcb0991SDimitry Andric   unsigned CurrOffset = 0;
1104*8bcb0991SDimitry Andric   LLT PtrTy = MRI.getType(Src);
1105*8bcb0991SDimitry Andric   unsigned Size = KnownLen;
1106*8bcb0991SDimitry Andric   for (auto CopyTy : MemOps) {
1107*8bcb0991SDimitry Andric     // Issuing an unaligned load / store pair  that overlaps with the previous
1108*8bcb0991SDimitry Andric     // pair. Adjust the offset accordingly.
1109*8bcb0991SDimitry Andric     if (CopyTy.getSizeInBytes() > Size)
1110*8bcb0991SDimitry Andric       CurrOffset -= CopyTy.getSizeInBytes() - Size;
1111*8bcb0991SDimitry Andric 
1112*8bcb0991SDimitry Andric     // Construct MMOs for the accesses.
1113*8bcb0991SDimitry Andric     auto *LoadMMO =
1114*8bcb0991SDimitry Andric         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1115*8bcb0991SDimitry Andric     auto *StoreMMO =
1116*8bcb0991SDimitry Andric         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1117*8bcb0991SDimitry Andric 
1118*8bcb0991SDimitry Andric     // Create the load.
1119*8bcb0991SDimitry Andric     Register LoadPtr = Src;
1120*8bcb0991SDimitry Andric     Register Offset;
1121*8bcb0991SDimitry Andric     if (CurrOffset != 0) {
1122*8bcb0991SDimitry Andric       Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
1123*8bcb0991SDimitry Andric                    .getReg(0);
1124*8bcb0991SDimitry Andric       LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0);
1125*8bcb0991SDimitry Andric     }
1126*8bcb0991SDimitry Andric     auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
1127*8bcb0991SDimitry Andric 
1128*8bcb0991SDimitry Andric     // Create the store.
1129*8bcb0991SDimitry Andric     Register StorePtr =
1130*8bcb0991SDimitry Andric         CurrOffset == 0 ? Dst : MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
1131*8bcb0991SDimitry Andric     MIB.buildStore(LdVal, StorePtr, *StoreMMO);
1132*8bcb0991SDimitry Andric     CurrOffset += CopyTy.getSizeInBytes();
1133*8bcb0991SDimitry Andric     Size -= CopyTy.getSizeInBytes();
1134*8bcb0991SDimitry Andric   }
1135*8bcb0991SDimitry Andric 
1136*8bcb0991SDimitry Andric   MI.eraseFromParent();
1137*8bcb0991SDimitry Andric   return true;
1138*8bcb0991SDimitry Andric }
1139*8bcb0991SDimitry Andric 
1140*8bcb0991SDimitry Andric bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1141*8bcb0991SDimitry Andric                                     Register Src, unsigned KnownLen,
1142*8bcb0991SDimitry Andric                                     unsigned DstAlign, unsigned SrcAlign,
1143*8bcb0991SDimitry Andric                                     bool IsVolatile) {
1144*8bcb0991SDimitry Andric   auto &MF = *MI.getParent()->getParent();
1145*8bcb0991SDimitry Andric   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1146*8bcb0991SDimitry Andric   auto &DL = MF.getDataLayout();
1147*8bcb0991SDimitry Andric   LLVMContext &C = MF.getFunction().getContext();
1148*8bcb0991SDimitry Andric 
1149*8bcb0991SDimitry Andric   assert(KnownLen != 0 && "Have a zero length memmove length!");
1150*8bcb0991SDimitry Andric 
1151*8bcb0991SDimitry Andric   bool DstAlignCanChange = false;
1152*8bcb0991SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
1153*8bcb0991SDimitry Andric   bool OptSize = shouldLowerMemFuncForSize(MF);
1154*8bcb0991SDimitry Andric   unsigned Alignment = MinAlign(DstAlign, SrcAlign);
1155*8bcb0991SDimitry Andric 
1156*8bcb0991SDimitry Andric   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1157*8bcb0991SDimitry Andric   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1158*8bcb0991SDimitry Andric     DstAlignCanChange = true;
1159*8bcb0991SDimitry Andric 
1160*8bcb0991SDimitry Andric   unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
1161*8bcb0991SDimitry Andric   std::vector<LLT> MemOps;
1162*8bcb0991SDimitry Andric 
1163*8bcb0991SDimitry Andric   const auto &DstMMO = **MI.memoperands_begin();
1164*8bcb0991SDimitry Andric   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1165*8bcb0991SDimitry Andric   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1166*8bcb0991SDimitry Andric   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1167*8bcb0991SDimitry Andric 
1168*8bcb0991SDimitry Andric   // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
1169*8bcb0991SDimitry Andric   // to a bug in it's findOptimalMemOpLowering implementation. For now do the
1170*8bcb0991SDimitry Andric   // same thing here.
1171*8bcb0991SDimitry Andric   if (!findGISelOptimalMemOpLowering(
1172*8bcb0991SDimitry Andric           MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
1173*8bcb0991SDimitry Andric           SrcAlign,
1174*8bcb0991SDimitry Andric           /*IsMemset=*/false,
1175*8bcb0991SDimitry Andric           /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
1176*8bcb0991SDimitry Andric           /*AllowOverlap=*/false, DstPtrInfo.getAddrSpace(),
1177*8bcb0991SDimitry Andric           SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
1178*8bcb0991SDimitry Andric     return false;
1179*8bcb0991SDimitry Andric 
1180*8bcb0991SDimitry Andric   if (DstAlignCanChange) {
1181*8bcb0991SDimitry Andric     // Get an estimate of the type from the LLT.
1182*8bcb0991SDimitry Andric     Type *IRTy = getTypeForLLT(MemOps[0], C);
1183*8bcb0991SDimitry Andric     unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
1184*8bcb0991SDimitry Andric 
1185*8bcb0991SDimitry Andric     // Don't promote to an alignment that would require dynamic stack
1186*8bcb0991SDimitry Andric     // realignment.
1187*8bcb0991SDimitry Andric     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1188*8bcb0991SDimitry Andric     if (!TRI->needsStackRealignment(MF))
1189*8bcb0991SDimitry Andric       while (NewAlign > Alignment &&
1190*8bcb0991SDimitry Andric              DL.exceedsNaturalStackAlignment(Align(NewAlign)))
1191*8bcb0991SDimitry Andric         NewAlign /= 2;
1192*8bcb0991SDimitry Andric 
1193*8bcb0991SDimitry Andric     if (NewAlign > Alignment) {
1194*8bcb0991SDimitry Andric       Alignment = NewAlign;
1195*8bcb0991SDimitry Andric       unsigned FI = FIDef->getOperand(1).getIndex();
1196*8bcb0991SDimitry Andric       // Give the stack frame object a larger alignment if needed.
1197*8bcb0991SDimitry Andric       if (MFI.getObjectAlignment(FI) < Alignment)
1198*8bcb0991SDimitry Andric         MFI.setObjectAlignment(FI, Alignment);
1199*8bcb0991SDimitry Andric     }
1200*8bcb0991SDimitry Andric   }
1201*8bcb0991SDimitry Andric 
1202*8bcb0991SDimitry Andric   LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
1203*8bcb0991SDimitry Andric 
1204*8bcb0991SDimitry Andric   MachineIRBuilder MIB(MI);
1205*8bcb0991SDimitry Andric   // Memmove requires that we perform the loads first before issuing the stores.
1206*8bcb0991SDimitry Andric   // Apart from that, this loop is pretty much doing the same thing as the
1207*8bcb0991SDimitry Andric   // memcpy codegen function.
1208*8bcb0991SDimitry Andric   unsigned CurrOffset = 0;
1209*8bcb0991SDimitry Andric   LLT PtrTy = MRI.getType(Src);
1210*8bcb0991SDimitry Andric   SmallVector<Register, 16> LoadVals;
1211*8bcb0991SDimitry Andric   for (auto CopyTy : MemOps) {
1212*8bcb0991SDimitry Andric     // Construct MMO for the load.
1213*8bcb0991SDimitry Andric     auto *LoadMMO =
1214*8bcb0991SDimitry Andric         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1215*8bcb0991SDimitry Andric 
1216*8bcb0991SDimitry Andric     // Create the load.
1217*8bcb0991SDimitry Andric     Register LoadPtr = Src;
1218*8bcb0991SDimitry Andric     if (CurrOffset != 0) {
1219*8bcb0991SDimitry Andric       auto Offset =
1220*8bcb0991SDimitry Andric           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1221*8bcb0991SDimitry Andric       LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0);
1222*8bcb0991SDimitry Andric     }
1223*8bcb0991SDimitry Andric     LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1224*8bcb0991SDimitry Andric     CurrOffset += CopyTy.getSizeInBytes();
1225*8bcb0991SDimitry Andric   }
1226*8bcb0991SDimitry Andric 
1227*8bcb0991SDimitry Andric   CurrOffset = 0;
1228*8bcb0991SDimitry Andric   for (unsigned I = 0; I < MemOps.size(); ++I) {
1229*8bcb0991SDimitry Andric     LLT CopyTy = MemOps[I];
1230*8bcb0991SDimitry Andric     // Now store the values loaded.
1231*8bcb0991SDimitry Andric     auto *StoreMMO =
1232*8bcb0991SDimitry Andric         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1233*8bcb0991SDimitry Andric 
1234*8bcb0991SDimitry Andric     Register StorePtr = Dst;
1235*8bcb0991SDimitry Andric     if (CurrOffset != 0) {
1236*8bcb0991SDimitry Andric       auto Offset =
1237*8bcb0991SDimitry Andric           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1238*8bcb0991SDimitry Andric       StorePtr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
1239*8bcb0991SDimitry Andric     }
1240*8bcb0991SDimitry Andric     MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1241*8bcb0991SDimitry Andric     CurrOffset += CopyTy.getSizeInBytes();
1242*8bcb0991SDimitry Andric   }
1243*8bcb0991SDimitry Andric   MI.eraseFromParent();
1244*8bcb0991SDimitry Andric   return true;
1245*8bcb0991SDimitry Andric }
1246*8bcb0991SDimitry Andric 
1247*8bcb0991SDimitry Andric bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
1248*8bcb0991SDimitry Andric   // This combine is fairly complex so it's not written with a separate
1249*8bcb0991SDimitry Andric   // matcher function.
1250*8bcb0991SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1251*8bcb0991SDimitry Andric   Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID();
1252*8bcb0991SDimitry Andric   assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove ||
1253*8bcb0991SDimitry Andric           ID == Intrinsic::memset) &&
1254*8bcb0991SDimitry Andric          "Expected a memcpy like intrinsic");
1255*8bcb0991SDimitry Andric 
1256*8bcb0991SDimitry Andric   auto MMOIt = MI.memoperands_begin();
1257*8bcb0991SDimitry Andric   const MachineMemOperand *MemOp = *MMOIt;
1258*8bcb0991SDimitry Andric   bool IsVolatile = MemOp->isVolatile();
1259*8bcb0991SDimitry Andric   // Don't try to optimize volatile.
1260*8bcb0991SDimitry Andric   if (IsVolatile)
1261*8bcb0991SDimitry Andric     return false;
1262*8bcb0991SDimitry Andric 
1263*8bcb0991SDimitry Andric   unsigned DstAlign = MemOp->getBaseAlignment();
1264*8bcb0991SDimitry Andric   unsigned SrcAlign = 0;
1265*8bcb0991SDimitry Andric   Register Dst = MI.getOperand(1).getReg();
1266*8bcb0991SDimitry Andric   Register Src = MI.getOperand(2).getReg();
1267*8bcb0991SDimitry Andric   Register Len = MI.getOperand(3).getReg();
1268*8bcb0991SDimitry Andric 
1269*8bcb0991SDimitry Andric   if (ID != Intrinsic::memset) {
1270*8bcb0991SDimitry Andric     assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
1271*8bcb0991SDimitry Andric     MemOp = *(++MMOIt);
1272*8bcb0991SDimitry Andric     SrcAlign = MemOp->getBaseAlignment();
1273*8bcb0991SDimitry Andric   }
1274*8bcb0991SDimitry Andric 
1275*8bcb0991SDimitry Andric   // See if this is a constant length copy
1276*8bcb0991SDimitry Andric   auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1277*8bcb0991SDimitry Andric   if (!LenVRegAndVal)
1278*8bcb0991SDimitry Andric     return false; // Leave it to the legalizer to lower it to a libcall.
1279*8bcb0991SDimitry Andric   unsigned KnownLen = LenVRegAndVal->Value;
1280*8bcb0991SDimitry Andric 
1281*8bcb0991SDimitry Andric   if (KnownLen == 0) {
1282*8bcb0991SDimitry Andric     MI.eraseFromParent();
1283*8bcb0991SDimitry Andric     return true;
1284*8bcb0991SDimitry Andric   }
1285*8bcb0991SDimitry Andric 
1286*8bcb0991SDimitry Andric   if (MaxLen && KnownLen > MaxLen)
1287*8bcb0991SDimitry Andric     return false;
1288*8bcb0991SDimitry Andric 
1289*8bcb0991SDimitry Andric   if (ID == Intrinsic::memcpy)
1290*8bcb0991SDimitry Andric     return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1291*8bcb0991SDimitry Andric   if (ID == Intrinsic::memmove)
1292*8bcb0991SDimitry Andric     return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1293*8bcb0991SDimitry Andric   if (ID == Intrinsic::memset)
1294*8bcb0991SDimitry Andric     return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1295*8bcb0991SDimitry Andric   return false;
1296*8bcb0991SDimitry Andric }
1297*8bcb0991SDimitry Andric 
12980b57cec5SDimitry Andric bool CombinerHelper::tryCombine(MachineInstr &MI) {
12990b57cec5SDimitry Andric   if (tryCombineCopy(MI))
13000b57cec5SDimitry Andric     return true;
1301*8bcb0991SDimitry Andric   if (tryCombineExtendingLoads(MI))
1302*8bcb0991SDimitry Andric     return true;
1303*8bcb0991SDimitry Andric   if (tryCombineIndexedLoadStore(MI))
1304*8bcb0991SDimitry Andric     return true;
1305*8bcb0991SDimitry Andric   return false;
13060b57cec5SDimitry Andric }
1307