xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1e8d8bef9SDimitry Andric //=== AArch64PostLegalizerCombiner.cpp --------------------------*- C++ -*-===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric ///
9e8d8bef9SDimitry Andric /// \file
10e8d8bef9SDimitry Andric /// Post-legalization combines on generic MachineInstrs.
11e8d8bef9SDimitry Andric ///
12e8d8bef9SDimitry Andric /// The combines here must preserve instruction legality.
13e8d8bef9SDimitry Andric ///
14e8d8bef9SDimitry Andric /// Lowering combines (e.g. pseudo matching) should be handled by
15e8d8bef9SDimitry Andric /// AArch64PostLegalizerLowering.
16e8d8bef9SDimitry Andric ///
17e8d8bef9SDimitry Andric /// Combines which don't rely on instruction legality should go in the
18e8d8bef9SDimitry Andric /// AArch64PreLegalizerCombiner.
19e8d8bef9SDimitry Andric ///
205ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
215ffd83dbSDimitry Andric 
225ffd83dbSDimitry Andric #include "AArch64TargetMachine.h"
235f757f3fSDimitry Andric #include "llvm/ADT/STLExtras.h"
2481ad6265SDimitry Andric #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
255f757f3fSDimitry Andric #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
265ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h"
275ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
285ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
2906c3fb27SDimitry Andric #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
30fe6060f1SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
315ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
3281ad6265SDimitry Andric #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
33fe6060f1SDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
34e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
35e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/Utils.h"
365ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
375ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
38e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
39e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetOpcodes.h"
405ffd83dbSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
415ffd83dbSDimitry Andric #include "llvm/Support/Debug.h"
425ffd83dbSDimitry Andric 
4306c3fb27SDimitry Andric #define GET_GICOMBINER_DEPS
4406c3fb27SDimitry Andric #include "AArch64GenPostLegalizeGICombiner.inc"
4506c3fb27SDimitry Andric #undef GET_GICOMBINER_DEPS
4606c3fb27SDimitry Andric 
475ffd83dbSDimitry Andric #define DEBUG_TYPE "aarch64-postlegalizer-combiner"
485ffd83dbSDimitry Andric 
495ffd83dbSDimitry Andric using namespace llvm;
50fe6060f1SDimitry Andric using namespace MIPatternMatch;
515ffd83dbSDimitry Andric 
5206c3fb27SDimitry Andric namespace {
5306c3fb27SDimitry Andric 
5406c3fb27SDimitry Andric #define GET_GICOMBINER_TYPES
5506c3fb27SDimitry Andric #include "AArch64GenPostLegalizeGICombiner.inc"
5606c3fb27SDimitry Andric #undef GET_GICOMBINER_TYPES
5706c3fb27SDimitry Andric 
58e8d8bef9SDimitry Andric /// This combine tries do what performExtractVectorEltCombine does in SDAG.
59e8d8bef9SDimitry Andric /// Rewrite for pairwise fadd pattern
60e8d8bef9SDimitry Andric ///   (s32 (g_extract_vector_elt
61e8d8bef9SDimitry Andric ///           (g_fadd (vXs32 Other)
62e8d8bef9SDimitry Andric ///                  (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0))
63e8d8bef9SDimitry Andric /// ->
64e8d8bef9SDimitry Andric ///   (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0)
65e8d8bef9SDimitry Andric ///              (g_extract_vector_elt (vXs32 Other) 1))
matchExtractVecEltPairwiseAdd(MachineInstr & MI,MachineRegisterInfo & MRI,std::tuple<unsigned,LLT,Register> & MatchInfo)66e8d8bef9SDimitry Andric bool matchExtractVecEltPairwiseAdd(
67e8d8bef9SDimitry Andric     MachineInstr &MI, MachineRegisterInfo &MRI,
68e8d8bef9SDimitry Andric     std::tuple<unsigned, LLT, Register> &MatchInfo) {
69e8d8bef9SDimitry Andric   Register Src1 = MI.getOperand(1).getReg();
70e8d8bef9SDimitry Andric   Register Src2 = MI.getOperand(2).getReg();
71e8d8bef9SDimitry Andric   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
725ffd83dbSDimitry Andric 
73349cc55cSDimitry Andric   auto Cst = getIConstantVRegValWithLookThrough(Src2, MRI);
74e8d8bef9SDimitry Andric   if (!Cst || Cst->Value != 0)
75e8d8bef9SDimitry Andric     return false;
76e8d8bef9SDimitry Andric   // SDAG also checks for FullFP16, but this looks to be beneficial anyway.
775ffd83dbSDimitry Andric 
78e8d8bef9SDimitry Andric   // Now check for an fadd operation. TODO: expand this for integer add?
79e8d8bef9SDimitry Andric   auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI);
80e8d8bef9SDimitry Andric   if (!FAddMI)
815ffd83dbSDimitry Andric     return false;
825ffd83dbSDimitry Andric 
83e8d8bef9SDimitry Andric   // If we add support for integer add, must restrict these types to just s64.
84e8d8bef9SDimitry Andric   unsigned DstSize = DstTy.getSizeInBits();
85e8d8bef9SDimitry Andric   if (DstSize != 16 && DstSize != 32 && DstSize != 64)
865ffd83dbSDimitry Andric     return false;
87e8d8bef9SDimitry Andric 
88e8d8bef9SDimitry Andric   Register Src1Op1 = FAddMI->getOperand(1).getReg();
89e8d8bef9SDimitry Andric   Register Src1Op2 = FAddMI->getOperand(2).getReg();
90e8d8bef9SDimitry Andric   MachineInstr *Shuffle =
91e8d8bef9SDimitry Andric       getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI);
92e8d8bef9SDimitry Andric   MachineInstr *Other = MRI.getVRegDef(Src1Op1);
93e8d8bef9SDimitry Andric   if (!Shuffle) {
94e8d8bef9SDimitry Andric     Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI);
95e8d8bef9SDimitry Andric     Other = MRI.getVRegDef(Src1Op2);
965ffd83dbSDimitry Andric   }
975ffd83dbSDimitry Andric 
98e8d8bef9SDimitry Andric   // We're looking for a shuffle that moves the second element to index 0.
99e8d8bef9SDimitry Andric   if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 &&
100e8d8bef9SDimitry Andric       Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) {
101e8d8bef9SDimitry Andric     std::get<0>(MatchInfo) = TargetOpcode::G_FADD;
102e8d8bef9SDimitry Andric     std::get<1>(MatchInfo) = DstTy;
103e8d8bef9SDimitry Andric     std::get<2>(MatchInfo) = Other->getOperand(0).getReg();
1045ffd83dbSDimitry Andric     return true;
1055ffd83dbSDimitry Andric   }
1065ffd83dbSDimitry Andric   return false;
1075ffd83dbSDimitry Andric }
1085ffd83dbSDimitry Andric 
applyExtractVecEltPairwiseAdd(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,std::tuple<unsigned,LLT,Register> & MatchInfo)10906c3fb27SDimitry Andric void applyExtractVecEltPairwiseAdd(
110e8d8bef9SDimitry Andric     MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
111e8d8bef9SDimitry Andric     std::tuple<unsigned, LLT, Register> &MatchInfo) {
112e8d8bef9SDimitry Andric   unsigned Opc = std::get<0>(MatchInfo);
113e8d8bef9SDimitry Andric   assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!");
114e8d8bef9SDimitry Andric   // We want to generate two extracts of elements 0 and 1, and add them.
115e8d8bef9SDimitry Andric   LLT Ty = std::get<1>(MatchInfo);
116e8d8bef9SDimitry Andric   Register Src = std::get<2>(MatchInfo);
117e8d8bef9SDimitry Andric   LLT s64 = LLT::scalar(64);
118e8d8bef9SDimitry Andric   B.setInstrAndDebugLoc(MI);
119e8d8bef9SDimitry Andric   auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0));
120e8d8bef9SDimitry Andric   auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1));
121e8d8bef9SDimitry Andric   B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1});
1225ffd83dbSDimitry Andric   MI.eraseFromParent();
1235ffd83dbSDimitry Andric }
1245ffd83dbSDimitry Andric 
isSignExtended(Register R,MachineRegisterInfo & MRI)12506c3fb27SDimitry Andric bool isSignExtended(Register R, MachineRegisterInfo &MRI) {
126e8d8bef9SDimitry Andric   // TODO: check if extended build vector as well.
127e8d8bef9SDimitry Andric   unsigned Opc = MRI.getVRegDef(R)->getOpcode();
128e8d8bef9SDimitry Andric   return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG;
129e8d8bef9SDimitry Andric }
130e8d8bef9SDimitry Andric 
isZeroExtended(Register R,MachineRegisterInfo & MRI)13106c3fb27SDimitry Andric bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
132e8d8bef9SDimitry Andric   // TODO: check if extended build vector as well.
133e8d8bef9SDimitry Andric   return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
134e8d8bef9SDimitry Andric }
135e8d8bef9SDimitry Andric 
matchAArch64MulConstCombine(MachineInstr & MI,MachineRegisterInfo & MRI,std::function<void (MachineIRBuilder & B,Register DstReg)> & ApplyFn)136e8d8bef9SDimitry Andric bool matchAArch64MulConstCombine(
137e8d8bef9SDimitry Andric     MachineInstr &MI, MachineRegisterInfo &MRI,
138e8d8bef9SDimitry Andric     std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
139e8d8bef9SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_MUL);
140e8d8bef9SDimitry Andric   Register LHS = MI.getOperand(1).getReg();
141e8d8bef9SDimitry Andric   Register RHS = MI.getOperand(2).getReg();
142e8d8bef9SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
143e8d8bef9SDimitry Andric   const LLT Ty = MRI.getType(LHS);
144e8d8bef9SDimitry Andric 
145e8d8bef9SDimitry Andric   // The below optimizations require a constant RHS.
146349cc55cSDimitry Andric   auto Const = getIConstantVRegValWithLookThrough(RHS, MRI);
147e8d8bef9SDimitry Andric   if (!Const)
148e8d8bef9SDimitry Andric     return false;
149e8d8bef9SDimitry Andric 
15081ad6265SDimitry Andric   APInt ConstValue = Const->Value.sext(Ty.getSizeInBits());
151e8d8bef9SDimitry Andric   // The following code is ported from AArch64ISelLowering.
152e8d8bef9SDimitry Andric   // Multiplication of a power of two plus/minus one can be done more
1535f757f3fSDimitry Andric   // cheaply as shift+add/sub. For now, this is true unilaterally. If
154e8d8bef9SDimitry Andric   // future CPUs have a cheaper MADD instruction, this may need to be
155e8d8bef9SDimitry Andric   // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
156e8d8bef9SDimitry Andric   // 64-bit is 5 cycles, so this is always a win.
157e8d8bef9SDimitry Andric   // More aggressively, some multiplications N0 * C can be lowered to
158e8d8bef9SDimitry Andric   // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
159e8d8bef9SDimitry Andric   // e.g. 6=3*2=(2+1)*2.
160e8d8bef9SDimitry Andric   // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
161e8d8bef9SDimitry Andric   // which equals to (1+2)*16-(1+2).
162e8d8bef9SDimitry Andric   // TrailingZeroes is used to test if the mul can be lowered to
163e8d8bef9SDimitry Andric   // shift+add+shift.
16406c3fb27SDimitry Andric   unsigned TrailingZeroes = ConstValue.countr_zero();
165e8d8bef9SDimitry Andric   if (TrailingZeroes) {
166e8d8bef9SDimitry Andric     // Conservatively do not lower to shift+add+shift if the mul might be
167e8d8bef9SDimitry Andric     // folded into smul or umul.
168e8d8bef9SDimitry Andric     if (MRI.hasOneNonDBGUse(LHS) &&
169e8d8bef9SDimitry Andric         (isSignExtended(LHS, MRI) || isZeroExtended(LHS, MRI)))
170e8d8bef9SDimitry Andric       return false;
171e8d8bef9SDimitry Andric     // Conservatively do not lower to shift+add+shift if the mul might be
172e8d8bef9SDimitry Andric     // folded into madd or msub.
173e8d8bef9SDimitry Andric     if (MRI.hasOneNonDBGUse(Dst)) {
174e8d8bef9SDimitry Andric       MachineInstr &UseMI = *MRI.use_instr_begin(Dst);
175fe6060f1SDimitry Andric       unsigned UseOpc = UseMI.getOpcode();
176fe6060f1SDimitry Andric       if (UseOpc == TargetOpcode::G_ADD || UseOpc == TargetOpcode::G_PTR_ADD ||
177fe6060f1SDimitry Andric           UseOpc == TargetOpcode::G_SUB)
178e8d8bef9SDimitry Andric         return false;
179e8d8bef9SDimitry Andric     }
180e8d8bef9SDimitry Andric   }
181e8d8bef9SDimitry Andric   // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
182e8d8bef9SDimitry Andric   // and shift+add+shift.
183e8d8bef9SDimitry Andric   APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
184e8d8bef9SDimitry Andric 
185e8d8bef9SDimitry Andric   unsigned ShiftAmt, AddSubOpc;
186e8d8bef9SDimitry Andric   // Is the shifted value the LHS operand of the add/sub?
187e8d8bef9SDimitry Andric   bool ShiftValUseIsLHS = true;
188e8d8bef9SDimitry Andric   // Do we need to negate the result?
189e8d8bef9SDimitry Andric   bool NegateResult = false;
190e8d8bef9SDimitry Andric 
191e8d8bef9SDimitry Andric   if (ConstValue.isNonNegative()) {
192e8d8bef9SDimitry Andric     // (mul x, 2^N + 1) => (add (shl x, N), x)
193e8d8bef9SDimitry Andric     // (mul x, 2^N - 1) => (sub (shl x, N), x)
194e8d8bef9SDimitry Andric     // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
195e8d8bef9SDimitry Andric     APInt SCVMinus1 = ShiftedConstValue - 1;
196e8d8bef9SDimitry Andric     APInt CVPlus1 = ConstValue + 1;
197e8d8bef9SDimitry Andric     if (SCVMinus1.isPowerOf2()) {
198e8d8bef9SDimitry Andric       ShiftAmt = SCVMinus1.logBase2();
199e8d8bef9SDimitry Andric       AddSubOpc = TargetOpcode::G_ADD;
200e8d8bef9SDimitry Andric     } else if (CVPlus1.isPowerOf2()) {
201e8d8bef9SDimitry Andric       ShiftAmt = CVPlus1.logBase2();
202e8d8bef9SDimitry Andric       AddSubOpc = TargetOpcode::G_SUB;
203e8d8bef9SDimitry Andric     } else
204e8d8bef9SDimitry Andric       return false;
205e8d8bef9SDimitry Andric   } else {
206e8d8bef9SDimitry Andric     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
207e8d8bef9SDimitry Andric     // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
208e8d8bef9SDimitry Andric     APInt CVNegPlus1 = -ConstValue + 1;
209e8d8bef9SDimitry Andric     APInt CVNegMinus1 = -ConstValue - 1;
210e8d8bef9SDimitry Andric     if (CVNegPlus1.isPowerOf2()) {
211e8d8bef9SDimitry Andric       ShiftAmt = CVNegPlus1.logBase2();
212e8d8bef9SDimitry Andric       AddSubOpc = TargetOpcode::G_SUB;
213e8d8bef9SDimitry Andric       ShiftValUseIsLHS = false;
214e8d8bef9SDimitry Andric     } else if (CVNegMinus1.isPowerOf2()) {
215e8d8bef9SDimitry Andric       ShiftAmt = CVNegMinus1.logBase2();
216e8d8bef9SDimitry Andric       AddSubOpc = TargetOpcode::G_ADD;
217e8d8bef9SDimitry Andric       NegateResult = true;
218e8d8bef9SDimitry Andric     } else
219e8d8bef9SDimitry Andric       return false;
220e8d8bef9SDimitry Andric   }
221e8d8bef9SDimitry Andric 
222e8d8bef9SDimitry Andric   if (NegateResult && TrailingZeroes)
223e8d8bef9SDimitry Andric     return false;
224e8d8bef9SDimitry Andric 
225e8d8bef9SDimitry Andric   ApplyFn = [=](MachineIRBuilder &B, Register DstReg) {
226e8d8bef9SDimitry Andric     auto Shift = B.buildConstant(LLT::scalar(64), ShiftAmt);
227e8d8bef9SDimitry Andric     auto ShiftedVal = B.buildShl(Ty, LHS, Shift);
228e8d8bef9SDimitry Andric 
229e8d8bef9SDimitry Andric     Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) : LHS;
230e8d8bef9SDimitry Andric     Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(0);
231e8d8bef9SDimitry Andric     auto Res = B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS});
232e8d8bef9SDimitry Andric     assert(!(NegateResult && TrailingZeroes) &&
233e8d8bef9SDimitry Andric            "NegateResult and TrailingZeroes cannot both be true for now.");
234e8d8bef9SDimitry Andric     // Negate the result.
235e8d8bef9SDimitry Andric     if (NegateResult) {
236e8d8bef9SDimitry Andric       B.buildSub(DstReg, B.buildConstant(Ty, 0), Res);
237e8d8bef9SDimitry Andric       return;
238e8d8bef9SDimitry Andric     }
239e8d8bef9SDimitry Andric     // Shift the result.
240e8d8bef9SDimitry Andric     if (TrailingZeroes) {
241e8d8bef9SDimitry Andric       B.buildShl(DstReg, Res, B.buildConstant(LLT::scalar(64), TrailingZeroes));
242e8d8bef9SDimitry Andric       return;
243e8d8bef9SDimitry Andric     }
244e8d8bef9SDimitry Andric     B.buildCopy(DstReg, Res.getReg(0));
245e8d8bef9SDimitry Andric   };
246e8d8bef9SDimitry Andric   return true;
247e8d8bef9SDimitry Andric }
248e8d8bef9SDimitry Andric 
applyAArch64MulConstCombine(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,std::function<void (MachineIRBuilder & B,Register DstReg)> & ApplyFn)24906c3fb27SDimitry Andric void applyAArch64MulConstCombine(
250e8d8bef9SDimitry Andric     MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
251e8d8bef9SDimitry Andric     std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
252e8d8bef9SDimitry Andric   B.setInstrAndDebugLoc(MI);
253e8d8bef9SDimitry Andric   ApplyFn(B, MI.getOperand(0).getReg());
2545ffd83dbSDimitry Andric   MI.eraseFromParent();
2555ffd83dbSDimitry Andric }
2565ffd83dbSDimitry Andric 
257fe6060f1SDimitry Andric /// Try to fold a G_MERGE_VALUES of 2 s32 sources, where the second source
258fe6060f1SDimitry Andric /// is a zero, into a G_ZEXT of the first.
matchFoldMergeToZext(MachineInstr & MI,MachineRegisterInfo & MRI)259fe6060f1SDimitry Andric bool matchFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI) {
260fe6060f1SDimitry Andric   auto &Merge = cast<GMerge>(MI);
261fe6060f1SDimitry Andric   LLT SrcTy = MRI.getType(Merge.getSourceReg(0));
262fe6060f1SDimitry Andric   if (SrcTy != LLT::scalar(32) || Merge.getNumSources() != 2)
263fe6060f1SDimitry Andric     return false;
264fe6060f1SDimitry Andric   return mi_match(Merge.getSourceReg(1), MRI, m_SpecificICst(0));
265fe6060f1SDimitry Andric }
266fe6060f1SDimitry Andric 
applyFoldMergeToZext(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,GISelChangeObserver & Observer)267fe6060f1SDimitry Andric void applyFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI,
268fe6060f1SDimitry Andric                           MachineIRBuilder &B, GISelChangeObserver &Observer) {
269fe6060f1SDimitry Andric   // Mutate %d(s64) = G_MERGE_VALUES %a(s32), 0(s32)
270fe6060f1SDimitry Andric   //  ->
271fe6060f1SDimitry Andric   // %d(s64) = G_ZEXT %a(s32)
272fe6060f1SDimitry Andric   Observer.changingInstr(MI);
273fe6060f1SDimitry Andric   MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT));
27481ad6265SDimitry Andric   MI.removeOperand(2);
275fe6060f1SDimitry Andric   Observer.changedInstr(MI);
276fe6060f1SDimitry Andric }
277fe6060f1SDimitry Andric 
278349cc55cSDimitry Andric /// \returns True if a G_ANYEXT instruction \p MI should be mutated to a G_ZEXT
279349cc55cSDimitry Andric /// instruction.
matchMutateAnyExtToZExt(MachineInstr & MI,MachineRegisterInfo & MRI)28006c3fb27SDimitry Andric bool matchMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI) {
281349cc55cSDimitry Andric   // If this is coming from a scalar compare then we can use a G_ZEXT instead of
282349cc55cSDimitry Andric   // a G_ANYEXT:
283349cc55cSDimitry Andric   //
284349cc55cSDimitry Andric   // %cmp:_(s32) = G_[I|F]CMP ... <-- produces 0/1.
285349cc55cSDimitry Andric   // %ext:_(s64) = G_ANYEXT %cmp(s32)
286349cc55cSDimitry Andric   //
287349cc55cSDimitry Andric   // By doing this, we can leverage more KnownBits combines.
288349cc55cSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
289349cc55cSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
290349cc55cSDimitry Andric   Register Src = MI.getOperand(1).getReg();
291349cc55cSDimitry Andric   return MRI.getType(Dst).isScalar() &&
292349cc55cSDimitry Andric          mi_match(Src, MRI,
293349cc55cSDimitry Andric                   m_any_of(m_GICmp(m_Pred(), m_Reg(), m_Reg()),
294349cc55cSDimitry Andric                            m_GFCmp(m_Pred(), m_Reg(), m_Reg())));
295349cc55cSDimitry Andric }
296349cc55cSDimitry Andric 
applyMutateAnyExtToZExt(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,GISelChangeObserver & Observer)29706c3fb27SDimitry Andric void applyMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI,
298349cc55cSDimitry Andric                              MachineIRBuilder &B,
299349cc55cSDimitry Andric                              GISelChangeObserver &Observer) {
300349cc55cSDimitry Andric   Observer.changingInstr(MI);
301349cc55cSDimitry Andric   MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT));
302349cc55cSDimitry Andric   Observer.changedInstr(MI);
303349cc55cSDimitry Andric }
304349cc55cSDimitry Andric 
3050eae32dcSDimitry Andric /// Match a 128b store of zero and split it into two 64 bit stores, for
3060eae32dcSDimitry Andric /// size/performance reasons.
matchSplitStoreZero128(MachineInstr & MI,MachineRegisterInfo & MRI)30706c3fb27SDimitry Andric bool matchSplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI) {
3080eae32dcSDimitry Andric   GStore &Store = cast<GStore>(MI);
3090eae32dcSDimitry Andric   if (!Store.isSimple())
3100eae32dcSDimitry Andric     return false;
3110eae32dcSDimitry Andric   LLT ValTy = MRI.getType(Store.getValueReg());
312*0fca6ea1SDimitry Andric   if (ValTy.isScalableVector())
313*0fca6ea1SDimitry Andric     return false;
3140eae32dcSDimitry Andric   if (!ValTy.isVector() || ValTy.getSizeInBits() != 128)
3150eae32dcSDimitry Andric     return false;
316*0fca6ea1SDimitry Andric   if (Store.getMemSizeInBits() != ValTy.getSizeInBits())
3170eae32dcSDimitry Andric     return false; // Don't split truncating stores.
3180eae32dcSDimitry Andric   if (!MRI.hasOneNonDBGUse(Store.getValueReg()))
3190eae32dcSDimitry Andric     return false;
3200eae32dcSDimitry Andric   auto MaybeCst = isConstantOrConstantSplatVector(
3210eae32dcSDimitry Andric       *MRI.getVRegDef(Store.getValueReg()), MRI);
3220eae32dcSDimitry Andric   return MaybeCst && MaybeCst->isZero();
3230eae32dcSDimitry Andric }
3240eae32dcSDimitry Andric 
applySplitStoreZero128(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,GISelChangeObserver & Observer)32506c3fb27SDimitry Andric void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
3260eae32dcSDimitry Andric                             MachineIRBuilder &B,
3270eae32dcSDimitry Andric                             GISelChangeObserver &Observer) {
3280eae32dcSDimitry Andric   B.setInstrAndDebugLoc(MI);
3290eae32dcSDimitry Andric   GStore &Store = cast<GStore>(MI);
3300eae32dcSDimitry Andric   assert(MRI.getType(Store.getValueReg()).isVector() &&
3310eae32dcSDimitry Andric          "Expected a vector store value");
3320eae32dcSDimitry Andric   LLT NewTy = LLT::scalar(64);
3330eae32dcSDimitry Andric   Register PtrReg = Store.getPointerReg();
3340eae32dcSDimitry Andric   auto Zero = B.buildConstant(NewTy, 0);
3350eae32dcSDimitry Andric   auto HighPtr = B.buildPtrAdd(MRI.getType(PtrReg), PtrReg,
3360eae32dcSDimitry Andric                                B.buildConstant(LLT::scalar(64), 8));
3370eae32dcSDimitry Andric   auto &MF = *MI.getMF();
3380eae32dcSDimitry Andric   auto *LowMMO = MF.getMachineMemOperand(&Store.getMMO(), 0, NewTy);
3390eae32dcSDimitry Andric   auto *HighMMO = MF.getMachineMemOperand(&Store.getMMO(), 8, NewTy);
3400eae32dcSDimitry Andric   B.buildStore(Zero, PtrReg, *LowMMO);
3410eae32dcSDimitry Andric   B.buildStore(Zero, HighPtr, *HighMMO);
3420eae32dcSDimitry Andric   Store.eraseFromParent();
3430eae32dcSDimitry Andric }
3440eae32dcSDimitry Andric 
matchOrToBSP(MachineInstr & MI,MachineRegisterInfo & MRI,std::tuple<Register,Register,Register> & MatchInfo)3455f757f3fSDimitry Andric bool matchOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
3465f757f3fSDimitry Andric                   std::tuple<Register, Register, Register> &MatchInfo) {
3475f757f3fSDimitry Andric   const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3485f757f3fSDimitry Andric   if (!DstTy.isVector())
3495f757f3fSDimitry Andric     return false;
3505ffd83dbSDimitry Andric 
3515f757f3fSDimitry Andric   Register AO1, AO2, BVO1, BVO2;
3525f757f3fSDimitry Andric   if (!mi_match(MI, MRI,
3535f757f3fSDimitry Andric                 m_GOr(m_GAnd(m_Reg(AO1), m_Reg(BVO1)),
3545f757f3fSDimitry Andric                       m_GAnd(m_Reg(AO2), m_Reg(BVO2)))))
3555f757f3fSDimitry Andric     return false;
3565f757f3fSDimitry Andric 
3575f757f3fSDimitry Andric   auto *BV1 = getOpcodeDef<GBuildVector>(BVO1, MRI);
3585f757f3fSDimitry Andric   auto *BV2 = getOpcodeDef<GBuildVector>(BVO2, MRI);
3595f757f3fSDimitry Andric   if (!BV1 || !BV2)
3605f757f3fSDimitry Andric     return false;
3615f757f3fSDimitry Andric 
3625f757f3fSDimitry Andric   for (int I = 0, E = DstTy.getNumElements(); I < E; I++) {
3635f757f3fSDimitry Andric     auto ValAndVReg1 =
3645f757f3fSDimitry Andric         getIConstantVRegValWithLookThrough(BV1->getSourceReg(I), MRI);
3655f757f3fSDimitry Andric     auto ValAndVReg2 =
3665f757f3fSDimitry Andric         getIConstantVRegValWithLookThrough(BV2->getSourceReg(I), MRI);
3675f757f3fSDimitry Andric     if (!ValAndVReg1 || !ValAndVReg2 ||
3685f757f3fSDimitry Andric         ValAndVReg1->Value != ~ValAndVReg2->Value)
3695f757f3fSDimitry Andric       return false;
3705f757f3fSDimitry Andric   }
3715f757f3fSDimitry Andric 
3725f757f3fSDimitry Andric   MatchInfo = {AO1, AO2, BVO1};
3735f757f3fSDimitry Andric   return true;
3745f757f3fSDimitry Andric }
3755f757f3fSDimitry Andric 
applyOrToBSP(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,std::tuple<Register,Register,Register> & MatchInfo)3765f757f3fSDimitry Andric void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
3775f757f3fSDimitry Andric                   MachineIRBuilder &B,
3785f757f3fSDimitry Andric                   std::tuple<Register, Register, Register> &MatchInfo) {
3795f757f3fSDimitry Andric   B.setInstrAndDebugLoc(MI);
3805f757f3fSDimitry Andric   B.buildInstr(
3815f757f3fSDimitry Andric       AArch64::G_BSP, {MI.getOperand(0).getReg()},
3825f757f3fSDimitry Andric       {std::get<2>(MatchInfo), std::get<0>(MatchInfo), std::get<1>(MatchInfo)});
3835f757f3fSDimitry Andric   MI.eraseFromParent();
3845f757f3fSDimitry Andric }
3855f757f3fSDimitry Andric 
386*0fca6ea1SDimitry Andric // Combines Mul(And(Srl(X, 15), 0x10001), 0xffff) into CMLTz
matchCombineMulCMLT(MachineInstr & MI,MachineRegisterInfo & MRI,Register & SrcReg)387*0fca6ea1SDimitry Andric bool matchCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
388*0fca6ea1SDimitry Andric                          Register &SrcReg) {
389*0fca6ea1SDimitry Andric   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
390*0fca6ea1SDimitry Andric 
391*0fca6ea1SDimitry Andric   if (DstTy != LLT::fixed_vector(2, 64) && DstTy != LLT::fixed_vector(2, 32) &&
392*0fca6ea1SDimitry Andric       DstTy != LLT::fixed_vector(4, 32) && DstTy != LLT::fixed_vector(4, 16) &&
393*0fca6ea1SDimitry Andric       DstTy != LLT::fixed_vector(8, 16))
394*0fca6ea1SDimitry Andric     return false;
395*0fca6ea1SDimitry Andric 
396*0fca6ea1SDimitry Andric   auto AndMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
397*0fca6ea1SDimitry Andric   if (AndMI->getOpcode() != TargetOpcode::G_AND)
398*0fca6ea1SDimitry Andric     return false;
399*0fca6ea1SDimitry Andric   auto LShrMI = getDefIgnoringCopies(AndMI->getOperand(1).getReg(), MRI);
400*0fca6ea1SDimitry Andric   if (LShrMI->getOpcode() != TargetOpcode::G_LSHR)
401*0fca6ea1SDimitry Andric     return false;
402*0fca6ea1SDimitry Andric 
403*0fca6ea1SDimitry Andric   // Check the constant splat values
404*0fca6ea1SDimitry Andric   auto V1 = isConstantOrConstantSplatVector(
405*0fca6ea1SDimitry Andric       *MRI.getVRegDef(MI.getOperand(2).getReg()), MRI);
406*0fca6ea1SDimitry Andric   auto V2 = isConstantOrConstantSplatVector(
407*0fca6ea1SDimitry Andric       *MRI.getVRegDef(AndMI->getOperand(2).getReg()), MRI);
408*0fca6ea1SDimitry Andric   auto V3 = isConstantOrConstantSplatVector(
409*0fca6ea1SDimitry Andric       *MRI.getVRegDef(LShrMI->getOperand(2).getReg()), MRI);
410*0fca6ea1SDimitry Andric   if (!V1.has_value() || !V2.has_value() || !V3.has_value())
411*0fca6ea1SDimitry Andric     return false;
412*0fca6ea1SDimitry Andric   unsigned HalfSize = DstTy.getScalarSizeInBits() / 2;
413*0fca6ea1SDimitry Andric   if (!V1.value().isMask(HalfSize) || V2.value() != (1ULL | 1ULL << HalfSize) ||
414*0fca6ea1SDimitry Andric       V3 != (HalfSize - 1))
415*0fca6ea1SDimitry Andric     return false;
416*0fca6ea1SDimitry Andric 
417*0fca6ea1SDimitry Andric   SrcReg = LShrMI->getOperand(1).getReg();
418*0fca6ea1SDimitry Andric 
419*0fca6ea1SDimitry Andric   return true;
420*0fca6ea1SDimitry Andric }
421*0fca6ea1SDimitry Andric 
applyCombineMulCMLT(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,Register & SrcReg)422*0fca6ea1SDimitry Andric void applyCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
423*0fca6ea1SDimitry Andric                          MachineIRBuilder &B, Register &SrcReg) {
424*0fca6ea1SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
425*0fca6ea1SDimitry Andric   LLT DstTy = MRI.getType(DstReg);
426*0fca6ea1SDimitry Andric   LLT HalfTy =
427*0fca6ea1SDimitry Andric       DstTy.changeElementCount(DstTy.getElementCount().multiplyCoefficientBy(2))
428*0fca6ea1SDimitry Andric           .changeElementSize(DstTy.getScalarSizeInBits() / 2);
429*0fca6ea1SDimitry Andric 
430*0fca6ea1SDimitry Andric   Register ZeroVec = B.buildConstant(HalfTy, 0).getReg(0);
431*0fca6ea1SDimitry Andric   Register CastReg =
432*0fca6ea1SDimitry Andric       B.buildInstr(TargetOpcode::G_BITCAST, {HalfTy}, {SrcReg}).getReg(0);
433*0fca6ea1SDimitry Andric   Register CMLTReg =
434*0fca6ea1SDimitry Andric       B.buildICmp(CmpInst::Predicate::ICMP_SLT, HalfTy, CastReg, ZeroVec)
435*0fca6ea1SDimitry Andric           .getReg(0);
436*0fca6ea1SDimitry Andric 
437*0fca6ea1SDimitry Andric   B.buildInstr(TargetOpcode::G_BITCAST, {DstReg}, {CMLTReg}).getReg(0);
438*0fca6ea1SDimitry Andric   MI.eraseFromParent();
439*0fca6ea1SDimitry Andric }
440*0fca6ea1SDimitry Andric 
4415f757f3fSDimitry Andric class AArch64PostLegalizerCombinerImpl : public Combiner {
4425f757f3fSDimitry Andric protected:
4435f757f3fSDimitry Andric   // TODO: Make CombinerHelper methods const.
4445f757f3fSDimitry Andric   mutable CombinerHelper Helper;
4455f757f3fSDimitry Andric   const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig;
44606c3fb27SDimitry Andric   const AArch64Subtarget &STI;
44706c3fb27SDimitry Andric 
44806c3fb27SDimitry Andric public:
44906c3fb27SDimitry Andric   AArch64PostLegalizerCombinerImpl(
4505f757f3fSDimitry Andric       MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
4515f757f3fSDimitry Andric       GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
45206c3fb27SDimitry Andric       const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
4535f757f3fSDimitry Andric       const AArch64Subtarget &STI, MachineDominatorTree *MDT,
4545f757f3fSDimitry Andric       const LegalizerInfo *LI);
45506c3fb27SDimitry Andric 
getName()45606c3fb27SDimitry Andric   static const char *getName() { return "AArch64PostLegalizerCombiner"; }
45706c3fb27SDimitry Andric 
4585f757f3fSDimitry Andric   bool tryCombineAll(MachineInstr &I) const override;
45906c3fb27SDimitry Andric 
46006c3fb27SDimitry Andric private:
46106c3fb27SDimitry Andric #define GET_GICOMBINER_CLASS_MEMBERS
4625ffd83dbSDimitry Andric #include "AArch64GenPostLegalizeGICombiner.inc"
46306c3fb27SDimitry Andric #undef GET_GICOMBINER_CLASS_MEMBERS
46406c3fb27SDimitry Andric };
46506c3fb27SDimitry Andric 
46606c3fb27SDimitry Andric #define GET_GICOMBINER_IMPL
46706c3fb27SDimitry Andric #include "AArch64GenPostLegalizeGICombiner.inc"
46806c3fb27SDimitry Andric #undef GET_GICOMBINER_IMPL
46906c3fb27SDimitry Andric 
AArch64PostLegalizerCombinerImpl(MachineFunction & MF,CombinerInfo & CInfo,const TargetPassConfig * TPC,GISelKnownBits & KB,GISelCSEInfo * CSEInfo,const AArch64PostLegalizerCombinerImplRuleConfig & RuleConfig,const AArch64Subtarget & STI,MachineDominatorTree * MDT,const LegalizerInfo * LI)47006c3fb27SDimitry Andric AArch64PostLegalizerCombinerImpl::AArch64PostLegalizerCombinerImpl(
4715f757f3fSDimitry Andric     MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
4725f757f3fSDimitry Andric     GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
47306c3fb27SDimitry Andric     const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
4745f757f3fSDimitry Andric     const AArch64Subtarget &STI, MachineDominatorTree *MDT,
4755f757f3fSDimitry Andric     const LegalizerInfo *LI)
4765f757f3fSDimitry Andric     : Combiner(MF, CInfo, TPC, &KB, CSEInfo),
4775f757f3fSDimitry Andric       Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
4785f757f3fSDimitry Andric       RuleConfig(RuleConfig), STI(STI),
47906c3fb27SDimitry Andric #define GET_GICOMBINER_CONSTRUCTOR_INITS
48006c3fb27SDimitry Andric #include "AArch64GenPostLegalizeGICombiner.inc"
48106c3fb27SDimitry Andric #undef GET_GICOMBINER_CONSTRUCTOR_INITS
48206c3fb27SDimitry Andric {
48306c3fb27SDimitry Andric }
4845ffd83dbSDimitry Andric 
4855ffd83dbSDimitry Andric class AArch64PostLegalizerCombiner : public MachineFunctionPass {
4865ffd83dbSDimitry Andric public:
4875ffd83dbSDimitry Andric   static char ID;
4885ffd83dbSDimitry Andric 
4895ffd83dbSDimitry Andric   AArch64PostLegalizerCombiner(bool IsOptNone = false);
4905ffd83dbSDimitry Andric 
getPassName() const4915ffd83dbSDimitry Andric   StringRef getPassName() const override {
4925ffd83dbSDimitry Andric     return "AArch64PostLegalizerCombiner";
4935ffd83dbSDimitry Andric   }
4945ffd83dbSDimitry Andric 
4955ffd83dbSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
4965ffd83dbSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
4975ffd83dbSDimitry Andric 
4985ffd83dbSDimitry Andric private:
4995ffd83dbSDimitry Andric   bool IsOptNone;
5005f757f3fSDimitry Andric   AArch64PostLegalizerCombinerImplRuleConfig RuleConfig;
5015f757f3fSDimitry Andric 
5025f757f3fSDimitry Andric 
5035f757f3fSDimitry Andric   struct StoreInfo {
5045f757f3fSDimitry Andric     GStore *St = nullptr;
5055f757f3fSDimitry Andric     // The G_PTR_ADD that's used by the store. We keep this to cache the
5065f757f3fSDimitry Andric     // MachineInstr def.
5075f757f3fSDimitry Andric     GPtrAdd *Ptr = nullptr;
5085f757f3fSDimitry Andric     // The signed offset to the Ptr instruction.
5095f757f3fSDimitry Andric     int64_t Offset = 0;
5105f757f3fSDimitry Andric     LLT StoredType;
5115f757f3fSDimitry Andric   };
5125f757f3fSDimitry Andric   bool tryOptimizeConsecStores(SmallVectorImpl<StoreInfo> &Stores,
5135f757f3fSDimitry Andric                                CSEMIRBuilder &MIB);
5145f757f3fSDimitry Andric 
5155f757f3fSDimitry Andric   bool optimizeConsecutiveMemOpAddressing(MachineFunction &MF,
5165f757f3fSDimitry Andric                                           CSEMIRBuilder &MIB);
5175ffd83dbSDimitry Andric };
5185ffd83dbSDimitry Andric } // end anonymous namespace
5195ffd83dbSDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const5205ffd83dbSDimitry Andric void AArch64PostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
5215ffd83dbSDimitry Andric   AU.addRequired<TargetPassConfig>();
5225ffd83dbSDimitry Andric   AU.setPreservesCFG();
5235ffd83dbSDimitry Andric   getSelectionDAGFallbackAnalysisUsage(AU);
5245ffd83dbSDimitry Andric   AU.addRequired<GISelKnownBitsAnalysis>();
5255ffd83dbSDimitry Andric   AU.addPreserved<GISelKnownBitsAnalysis>();
5265ffd83dbSDimitry Andric   if (!IsOptNone) {
527*0fca6ea1SDimitry Andric     AU.addRequired<MachineDominatorTreeWrapperPass>();
528*0fca6ea1SDimitry Andric     AU.addPreserved<MachineDominatorTreeWrapperPass>();
529fe6060f1SDimitry Andric     AU.addRequired<GISelCSEAnalysisWrapperPass>();
530fe6060f1SDimitry Andric     AU.addPreserved<GISelCSEAnalysisWrapperPass>();
5315ffd83dbSDimitry Andric   }
5325ffd83dbSDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
5335ffd83dbSDimitry Andric }
5345ffd83dbSDimitry Andric 
AArch64PostLegalizerCombiner(bool IsOptNone)5355ffd83dbSDimitry Andric AArch64PostLegalizerCombiner::AArch64PostLegalizerCombiner(bool IsOptNone)
5365ffd83dbSDimitry Andric     : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
5375ffd83dbSDimitry Andric   initializeAArch64PostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
5385f757f3fSDimitry Andric 
5395f757f3fSDimitry Andric   if (!RuleConfig.parseCommandLineOption())
5405f757f3fSDimitry Andric     report_fatal_error("Invalid rule identifier");
5415ffd83dbSDimitry Andric }
5425ffd83dbSDimitry Andric 
runOnMachineFunction(MachineFunction & MF)5435ffd83dbSDimitry Andric bool AArch64PostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
5445ffd83dbSDimitry Andric   if (MF.getProperties().hasProperty(
5455ffd83dbSDimitry Andric           MachineFunctionProperties::Property::FailedISel))
5465ffd83dbSDimitry Andric     return false;
5475ffd83dbSDimitry Andric   assert(MF.getProperties().hasProperty(
5485ffd83dbSDimitry Andric              MachineFunctionProperties::Property::Legalized) &&
5495ffd83dbSDimitry Andric          "Expected a legalized function?");
5505ffd83dbSDimitry Andric   auto *TPC = &getAnalysis<TargetPassConfig>();
5515ffd83dbSDimitry Andric   const Function &F = MF.getFunction();
5525ffd83dbSDimitry Andric   bool EnableOpt =
5535f757f3fSDimitry Andric       MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
5545f757f3fSDimitry Andric 
5555f757f3fSDimitry Andric   const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
5565f757f3fSDimitry Andric   const auto *LI = ST.getLegalizerInfo();
5575f757f3fSDimitry Andric 
5585ffd83dbSDimitry Andric   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
5595ffd83dbSDimitry Andric   MachineDominatorTree *MDT =
560*0fca6ea1SDimitry Andric       IsOptNone ? nullptr
561*0fca6ea1SDimitry Andric                 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
562fe6060f1SDimitry Andric   GISelCSEAnalysisWrapper &Wrapper =
563fe6060f1SDimitry Andric       getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
564fe6060f1SDimitry Andric   auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
5655f757f3fSDimitry Andric 
5665f757f3fSDimitry Andric   CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
5675f757f3fSDimitry Andric                      /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(),
5685f757f3fSDimitry Andric                      F.hasMinSize());
5695f757f3fSDimitry Andric   AArch64PostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo,
5705f757f3fSDimitry Andric                                         RuleConfig, ST, MDT, LI);
5715f757f3fSDimitry Andric   bool Changed = Impl.combineMachineInstrs();
5725f757f3fSDimitry Andric 
5735f757f3fSDimitry Andric   auto MIB = CSEMIRBuilder(MF);
5745f757f3fSDimitry Andric   MIB.setCSEInfo(CSEInfo);
5755f757f3fSDimitry Andric   Changed |= optimizeConsecutiveMemOpAddressing(MF, MIB);
5765f757f3fSDimitry Andric   return Changed;
5775f757f3fSDimitry Andric }
5785f757f3fSDimitry Andric 
tryOptimizeConsecStores(SmallVectorImpl<StoreInfo> & Stores,CSEMIRBuilder & MIB)5795f757f3fSDimitry Andric bool AArch64PostLegalizerCombiner::tryOptimizeConsecStores(
5805f757f3fSDimitry Andric     SmallVectorImpl<StoreInfo> &Stores, CSEMIRBuilder &MIB) {
5815f757f3fSDimitry Andric   if (Stores.size() <= 2)
5825f757f3fSDimitry Andric     return false;
5835f757f3fSDimitry Andric 
5845f757f3fSDimitry Andric   // Profitabity checks:
5855f757f3fSDimitry Andric   int64_t BaseOffset = Stores[0].Offset;
5865f757f3fSDimitry Andric   unsigned NumPairsExpected = Stores.size() / 2;
5875f757f3fSDimitry Andric   unsigned TotalInstsExpected = NumPairsExpected + (Stores.size() % 2);
5885f757f3fSDimitry Andric   // Size savings will depend on whether we can fold the offset, as an
5895f757f3fSDimitry Andric   // immediate of an ADD.
5905f757f3fSDimitry Andric   auto &TLI = *MIB.getMF().getSubtarget().getTargetLowering();
5915f757f3fSDimitry Andric   if (!TLI.isLegalAddImmediate(BaseOffset))
5925f757f3fSDimitry Andric     TotalInstsExpected++;
5935f757f3fSDimitry Andric   int SavingsExpected = Stores.size() - TotalInstsExpected;
5945f757f3fSDimitry Andric   if (SavingsExpected <= 0)
5955f757f3fSDimitry Andric     return false;
5965f757f3fSDimitry Andric 
5975f757f3fSDimitry Andric   auto &MRI = MIB.getMF().getRegInfo();
5985f757f3fSDimitry Andric 
5995f757f3fSDimitry Andric   // We have a series of consecutive stores. Factor out the common base
6005f757f3fSDimitry Andric   // pointer and rewrite the offsets.
6015f757f3fSDimitry Andric   Register NewBase = Stores[0].Ptr->getReg(0);
6025f757f3fSDimitry Andric   for (auto &SInfo : Stores) {
6035f757f3fSDimitry Andric     // Compute a new pointer with the new base ptr and adjusted offset.
6045f757f3fSDimitry Andric     MIB.setInstrAndDebugLoc(*SInfo.St);
6055f757f3fSDimitry Andric     auto NewOff = MIB.buildConstant(LLT::scalar(64), SInfo.Offset - BaseOffset);
6065f757f3fSDimitry Andric     auto NewPtr = MIB.buildPtrAdd(MRI.getType(SInfo.St->getPointerReg()),
6075f757f3fSDimitry Andric                                   NewBase, NewOff);
6085f757f3fSDimitry Andric     if (MIB.getObserver())
6095f757f3fSDimitry Andric       MIB.getObserver()->changingInstr(*SInfo.St);
6105f757f3fSDimitry Andric     SInfo.St->getOperand(1).setReg(NewPtr.getReg(0));
6115f757f3fSDimitry Andric     if (MIB.getObserver())
6125f757f3fSDimitry Andric       MIB.getObserver()->changedInstr(*SInfo.St);
6135f757f3fSDimitry Andric   }
6145f757f3fSDimitry Andric   LLVM_DEBUG(dbgs() << "Split a series of " << Stores.size()
6155f757f3fSDimitry Andric                     << " stores into a base pointer and offsets.\n");
6165f757f3fSDimitry Andric   return true;
6175f757f3fSDimitry Andric }
6185f757f3fSDimitry Andric 
6195f757f3fSDimitry Andric static cl::opt<bool>
6205f757f3fSDimitry Andric     EnableConsecutiveMemOpOpt("aarch64-postlegalizer-consecutive-memops",
6215f757f3fSDimitry Andric                               cl::init(true), cl::Hidden,
6225f757f3fSDimitry Andric                               cl::desc("Enable consecutive memop optimization "
6235f757f3fSDimitry Andric                                        "in AArch64PostLegalizerCombiner"));
6245f757f3fSDimitry Andric 
optimizeConsecutiveMemOpAddressing(MachineFunction & MF,CSEMIRBuilder & MIB)6255f757f3fSDimitry Andric bool AArch64PostLegalizerCombiner::optimizeConsecutiveMemOpAddressing(
6265f757f3fSDimitry Andric     MachineFunction &MF, CSEMIRBuilder &MIB) {
6275f757f3fSDimitry Andric   // This combine needs to run after all reassociations/folds on pointer
6285f757f3fSDimitry Andric   // addressing have been done, specifically those that combine two G_PTR_ADDs
6295f757f3fSDimitry Andric   // with constant offsets into a single G_PTR_ADD with a combined offset.
6305f757f3fSDimitry Andric   // The goal of this optimization is to undo that combine in the case where
6315f757f3fSDimitry Andric   // doing so has prevented the formation of pair stores due to illegal
6325f757f3fSDimitry Andric   // addressing modes of STP. The reason that we do it here is because
6335f757f3fSDimitry Andric   // it's much easier to undo the transformation of a series consecutive
6345f757f3fSDimitry Andric   // mem ops, than it is to detect when doing it would be a bad idea looking
6355f757f3fSDimitry Andric   // at a single G_PTR_ADD in the reassociation/ptradd_immed_chain combine.
6365f757f3fSDimitry Andric   //
6375f757f3fSDimitry Andric   // An example:
6385f757f3fSDimitry Andric   //   G_STORE %11:_(<2 x s64>), %base:_(p0) :: (store (<2 x s64>), align 1)
6395f757f3fSDimitry Andric   //   %off1:_(s64) = G_CONSTANT i64 4128
6405f757f3fSDimitry Andric   //   %p1:_(p0) = G_PTR_ADD %0:_, %off1:_(s64)
6415f757f3fSDimitry Andric   //   G_STORE %11:_(<2 x s64>), %p1:_(p0) :: (store (<2 x s64>), align 1)
6425f757f3fSDimitry Andric   //   %off2:_(s64) = G_CONSTANT i64 4144
6435f757f3fSDimitry Andric   //   %p2:_(p0) = G_PTR_ADD %0:_, %off2:_(s64)
6445f757f3fSDimitry Andric   //   G_STORE %11:_(<2 x s64>), %p2:_(p0) :: (store (<2 x s64>), align 1)
6455f757f3fSDimitry Andric   //   %off3:_(s64) = G_CONSTANT i64 4160
6465f757f3fSDimitry Andric   //   %p3:_(p0) = G_PTR_ADD %0:_, %off3:_(s64)
6475f757f3fSDimitry Andric   //   G_STORE %11:_(<2 x s64>), %17:_(p0) :: (store (<2 x s64>), align 1)
6485f757f3fSDimitry Andric   bool Changed = false;
6495f757f3fSDimitry Andric   auto &MRI = MF.getRegInfo();
6505f757f3fSDimitry Andric 
6515f757f3fSDimitry Andric   if (!EnableConsecutiveMemOpOpt)
6525f757f3fSDimitry Andric     return Changed;
6535f757f3fSDimitry Andric 
6545f757f3fSDimitry Andric   SmallVector<StoreInfo, 8> Stores;
6555f757f3fSDimitry Andric   // If we see a load, then we keep track of any values defined by it.
6565f757f3fSDimitry Andric   // In the following example, STP formation will fail anyway because
6575f757f3fSDimitry Andric   // the latter store is using a load result that appears after the
6585f757f3fSDimitry Andric   // the prior store. In this situation if we factor out the offset then
6595f757f3fSDimitry Andric   // we increase code size for no benefit.
6605f757f3fSDimitry Andric   //   G_STORE %v1:_(s64), %base:_(p0) :: (store (s64))
6615f757f3fSDimitry Andric   //   %v2:_(s64) = G_LOAD %ldptr:_(p0) :: (load (s64))
6625f757f3fSDimitry Andric   //   G_STORE %v2:_(s64), %base:_(p0) :: (store (s64))
6635f757f3fSDimitry Andric   SmallVector<Register> LoadValsSinceLastStore;
6645f757f3fSDimitry Andric 
6655f757f3fSDimitry Andric   auto storeIsValid = [&](StoreInfo &Last, StoreInfo New) {
6665f757f3fSDimitry Andric     // Check if this store is consecutive to the last one.
6675f757f3fSDimitry Andric     if (Last.Ptr->getBaseReg() != New.Ptr->getBaseReg() ||
6685f757f3fSDimitry Andric         (Last.Offset + static_cast<int64_t>(Last.StoredType.getSizeInBytes()) !=
6695f757f3fSDimitry Andric          New.Offset) ||
6705f757f3fSDimitry Andric         Last.StoredType != New.StoredType)
6715f757f3fSDimitry Andric       return false;
6725f757f3fSDimitry Andric 
6735f757f3fSDimitry Andric     // Check if this store is using a load result that appears after the
6745f757f3fSDimitry Andric     // last store. If so, bail out.
6755f757f3fSDimitry Andric     if (any_of(LoadValsSinceLastStore, [&](Register LoadVal) {
6765f757f3fSDimitry Andric           return New.St->getValueReg() == LoadVal;
6775f757f3fSDimitry Andric         }))
6785f757f3fSDimitry Andric       return false;
6795f757f3fSDimitry Andric 
6805f757f3fSDimitry Andric     // Check if the current offset would be too large for STP.
6815f757f3fSDimitry Andric     // If not, then STP formation should be able to handle it, so we don't
6825f757f3fSDimitry Andric     // need to do anything.
6835f757f3fSDimitry Andric     int64_t MaxLegalOffset;
6845f757f3fSDimitry Andric     switch (New.StoredType.getSizeInBits()) {
6855f757f3fSDimitry Andric     case 32:
6865f757f3fSDimitry Andric       MaxLegalOffset = 252;
6875f757f3fSDimitry Andric       break;
6885f757f3fSDimitry Andric     case 64:
6895f757f3fSDimitry Andric       MaxLegalOffset = 504;
6905f757f3fSDimitry Andric       break;
6915f757f3fSDimitry Andric     case 128:
6925f757f3fSDimitry Andric       MaxLegalOffset = 1008;
6935f757f3fSDimitry Andric       break;
6945f757f3fSDimitry Andric     default:
6955f757f3fSDimitry Andric       llvm_unreachable("Unexpected stored type size");
6965f757f3fSDimitry Andric     }
6975f757f3fSDimitry Andric     if (New.Offset < MaxLegalOffset)
6985f757f3fSDimitry Andric       return false;
6995f757f3fSDimitry Andric 
7005f757f3fSDimitry Andric     // If factoring it out still wouldn't help then don't bother.
7015f757f3fSDimitry Andric     return New.Offset - Stores[0].Offset <= MaxLegalOffset;
7025f757f3fSDimitry Andric   };
7035f757f3fSDimitry Andric 
7045f757f3fSDimitry Andric   auto resetState = [&]() {
7055f757f3fSDimitry Andric     Stores.clear();
7065f757f3fSDimitry Andric     LoadValsSinceLastStore.clear();
7075f757f3fSDimitry Andric   };
7085f757f3fSDimitry Andric 
7095f757f3fSDimitry Andric   for (auto &MBB : MF) {
7105f757f3fSDimitry Andric     // We're looking inside a single BB at a time since the memset pattern
7115f757f3fSDimitry Andric     // should only be in a single block.
7125f757f3fSDimitry Andric     resetState();
7135f757f3fSDimitry Andric     for (auto &MI : MBB) {
714*0fca6ea1SDimitry Andric       // Skip for scalable vectors
715*0fca6ea1SDimitry Andric       if (auto *LdSt = dyn_cast<GLoadStore>(&MI);
716*0fca6ea1SDimitry Andric           LdSt && MRI.getType(LdSt->getOperand(0).getReg()).isScalableVector())
717*0fca6ea1SDimitry Andric         continue;
718*0fca6ea1SDimitry Andric 
7195f757f3fSDimitry Andric       if (auto *St = dyn_cast<GStore>(&MI)) {
7205f757f3fSDimitry Andric         Register PtrBaseReg;
7215f757f3fSDimitry Andric         APInt Offset;
7225f757f3fSDimitry Andric         LLT StoredValTy = MRI.getType(St->getValueReg());
7235f757f3fSDimitry Andric         unsigned ValSize = StoredValTy.getSizeInBits();
724*0fca6ea1SDimitry Andric         if (ValSize < 32 || St->getMMO().getSizeInBits() != ValSize)
7255f757f3fSDimitry Andric           continue;
7265f757f3fSDimitry Andric 
7275f757f3fSDimitry Andric         Register PtrReg = St->getPointerReg();
7285f757f3fSDimitry Andric         if (mi_match(
7295f757f3fSDimitry Andric                 PtrReg, MRI,
7305f757f3fSDimitry Andric                 m_OneNonDBGUse(m_GPtrAdd(m_Reg(PtrBaseReg), m_ICst(Offset))))) {
7315f757f3fSDimitry Andric           GPtrAdd *PtrAdd = cast<GPtrAdd>(MRI.getVRegDef(PtrReg));
7325f757f3fSDimitry Andric           StoreInfo New = {St, PtrAdd, Offset.getSExtValue(), StoredValTy};
7335f757f3fSDimitry Andric 
7345f757f3fSDimitry Andric           if (Stores.empty()) {
7355f757f3fSDimitry Andric             Stores.push_back(New);
7365f757f3fSDimitry Andric             continue;
7375f757f3fSDimitry Andric           }
7385f757f3fSDimitry Andric 
7395f757f3fSDimitry Andric           // Check if this store is a valid continuation of the sequence.
7405f757f3fSDimitry Andric           auto &Last = Stores.back();
7415f757f3fSDimitry Andric           if (storeIsValid(Last, New)) {
7425f757f3fSDimitry Andric             Stores.push_back(New);
7435f757f3fSDimitry Andric             LoadValsSinceLastStore.clear(); // Reset the load value tracking.
7445f757f3fSDimitry Andric           } else {
7455f757f3fSDimitry Andric             // The store isn't a valid to consider for the prior sequence,
7465f757f3fSDimitry Andric             // so try to optimize what we have so far and start a new sequence.
7475f757f3fSDimitry Andric             Changed |= tryOptimizeConsecStores(Stores, MIB);
7485f757f3fSDimitry Andric             resetState();
7495f757f3fSDimitry Andric             Stores.push_back(New);
7505f757f3fSDimitry Andric           }
7515f757f3fSDimitry Andric         }
7525f757f3fSDimitry Andric       } else if (auto *Ld = dyn_cast<GLoad>(&MI)) {
7535f757f3fSDimitry Andric         LoadValsSinceLastStore.push_back(Ld->getDstReg());
7545f757f3fSDimitry Andric       }
7555f757f3fSDimitry Andric     }
7565f757f3fSDimitry Andric     Changed |= tryOptimizeConsecStores(Stores, MIB);
7575f757f3fSDimitry Andric     resetState();
7585f757f3fSDimitry Andric   }
7595f757f3fSDimitry Andric 
7605f757f3fSDimitry Andric   return Changed;
7615ffd83dbSDimitry Andric }
7625ffd83dbSDimitry Andric 
7635ffd83dbSDimitry Andric char AArch64PostLegalizerCombiner::ID = 0;
7645ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(AArch64PostLegalizerCombiner, DEBUG_TYPE,
7655ffd83dbSDimitry Andric                       "Combine AArch64 MachineInstrs after legalization", false,
7665ffd83dbSDimitry Andric                       false)
7675ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
7685ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
7695ffd83dbSDimitry Andric INITIALIZE_PASS_END(AArch64PostLegalizerCombiner, DEBUG_TYPE,
7705ffd83dbSDimitry Andric                     "Combine AArch64 MachineInstrs after legalization", false,
7715ffd83dbSDimitry Andric                     false)
7725ffd83dbSDimitry Andric 
7735ffd83dbSDimitry Andric namespace llvm {
createAArch64PostLegalizerCombiner(bool IsOptNone)774e8d8bef9SDimitry Andric FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) {
7755ffd83dbSDimitry Andric   return new AArch64PostLegalizerCombiner(IsOptNone);
7765ffd83dbSDimitry Andric }
7775ffd83dbSDimitry Andric } // end namespace llvm
778