10b57cec5SDimitry Andric //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 90b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h" 100b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 118bcb0991SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 12*5ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 13*5ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 140b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 150b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/Utils.h" 168bcb0991SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 178bcb0991SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 180b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 190b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 200b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 218bcb0991SDimitry Andric #include "llvm/CodeGen/TargetLowering.h" 22*5ffd83dbSDimitry Andric #include "llvm/Support/MathExtras.h" 238bcb0991SDimitry Andric #include "llvm/Target/TargetMachine.h" 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric #define DEBUG_TYPE "gi-combiner" 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric using namespace llvm; 28*5ffd83dbSDimitry Andric using namespace MIPatternMatch; 290b57cec5SDimitry Andric 308bcb0991SDimitry Andric // Option to allow testing of the combiner while no targets know about indexed 318bcb0991SDimitry Andric // addressing. 328bcb0991SDimitry Andric static cl::opt<bool> 338bcb0991SDimitry Andric ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), 348bcb0991SDimitry Andric cl::desc("Force all indexed operations to be " 358bcb0991SDimitry Andric "legal for the GlobalISel combiner")); 368bcb0991SDimitry Andric 378bcb0991SDimitry Andric 380b57cec5SDimitry Andric CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, 398bcb0991SDimitry Andric MachineIRBuilder &B, GISelKnownBits *KB, 40*5ffd83dbSDimitry Andric MachineDominatorTree *MDT, 41*5ffd83dbSDimitry Andric const LegalizerInfo *LI) 428bcb0991SDimitry Andric : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), 43*5ffd83dbSDimitry Andric KB(KB), MDT(MDT), LI(LI) { 448bcb0991SDimitry Andric (void)this->KB; 458bcb0991SDimitry Andric } 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, 480b57cec5SDimitry Andric Register ToReg) const { 490b57cec5SDimitry Andric Observer.changingAllUsesOfReg(MRI, FromReg); 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric if (MRI.constrainRegAttrs(ToReg, FromReg)) 520b57cec5SDimitry Andric MRI.replaceRegWith(FromReg, ToReg); 530b57cec5SDimitry Andric else 540b57cec5SDimitry Andric Builder.buildCopy(ToReg, FromReg); 550b57cec5SDimitry Andric 560b57cec5SDimitry Andric Observer.finishedChangingAllUsesOfReg(); 570b57cec5SDimitry Andric } 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI, 600b57cec5SDimitry Andric MachineOperand &FromRegOp, 610b57cec5SDimitry Andric Register ToReg) const { 620b57cec5SDimitry Andric assert(FromRegOp.getParent() && "Expected an operand in an MI"); 630b57cec5SDimitry Andric Observer.changingInstr(*FromRegOp.getParent()); 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric FromRegOp.setReg(ToReg); 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric Observer.changedInstr(*FromRegOp.getParent()); 680b57cec5SDimitry Andric } 690b57cec5SDimitry Andric 700b57cec5SDimitry Andric bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { 710b57cec5SDimitry Andric if (matchCombineCopy(MI)) { 720b57cec5SDimitry Andric applyCombineCopy(MI); 730b57cec5SDimitry Andric return true; 740b57cec5SDimitry Andric } 750b57cec5SDimitry Andric return false; 760b57cec5SDimitry Andric } 770b57cec5SDimitry Andric bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { 780b57cec5SDimitry Andric if (MI.getOpcode() != TargetOpcode::COPY) 790b57cec5SDimitry Andric return false; 808bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 818bcb0991SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 82*5ffd83dbSDimitry Andric return canReplaceReg(DstReg, SrcReg, MRI); 830b57cec5SDimitry Andric } 840b57cec5SDimitry Andric void CombinerHelper::applyCombineCopy(MachineInstr &MI) { 858bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 868bcb0991SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 870b57cec5SDimitry Andric MI.eraseFromParent(); 880b57cec5SDimitry Andric replaceRegWith(MRI, DstReg, SrcReg); 890b57cec5SDimitry Andric } 900b57cec5SDimitry Andric 918bcb0991SDimitry Andric bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) { 928bcb0991SDimitry Andric bool IsUndef = false; 938bcb0991SDimitry Andric SmallVector<Register, 4> Ops; 948bcb0991SDimitry Andric if (matchCombineConcatVectors(MI, IsUndef, Ops)) { 958bcb0991SDimitry Andric applyCombineConcatVectors(MI, IsUndef, Ops); 968bcb0991SDimitry Andric return true; 978bcb0991SDimitry Andric } 988bcb0991SDimitry Andric return false; 998bcb0991SDimitry Andric } 1008bcb0991SDimitry Andric 1018bcb0991SDimitry Andric bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, 1028bcb0991SDimitry Andric SmallVectorImpl<Register> &Ops) { 1038bcb0991SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && 1048bcb0991SDimitry Andric "Invalid instruction"); 1058bcb0991SDimitry Andric IsUndef = true; 1068bcb0991SDimitry Andric MachineInstr *Undef = nullptr; 1078bcb0991SDimitry Andric 1088bcb0991SDimitry Andric // Walk over all the operands of concat vectors and check if they are 1098bcb0991SDimitry Andric // build_vector themselves or undef. 1108bcb0991SDimitry Andric // Then collect their operands in Ops. 111480093f4SDimitry Andric for (const MachineOperand &MO : MI.uses()) { 1128bcb0991SDimitry Andric Register Reg = MO.getReg(); 1138bcb0991SDimitry Andric MachineInstr *Def = MRI.getVRegDef(Reg); 1148bcb0991SDimitry Andric assert(Def && "Operand not defined"); 1158bcb0991SDimitry Andric switch (Def->getOpcode()) { 1168bcb0991SDimitry Andric case TargetOpcode::G_BUILD_VECTOR: 1178bcb0991SDimitry Andric IsUndef = false; 1188bcb0991SDimitry Andric // Remember the operands of the build_vector to fold 1198bcb0991SDimitry Andric // them into the yet-to-build flattened concat vectors. 120480093f4SDimitry Andric for (const MachineOperand &BuildVecMO : Def->uses()) 1218bcb0991SDimitry Andric Ops.push_back(BuildVecMO.getReg()); 1228bcb0991SDimitry Andric break; 1238bcb0991SDimitry Andric case TargetOpcode::G_IMPLICIT_DEF: { 1248bcb0991SDimitry Andric LLT OpType = MRI.getType(Reg); 1258bcb0991SDimitry Andric // Keep one undef value for all the undef operands. 1268bcb0991SDimitry Andric if (!Undef) { 1278bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 1288bcb0991SDimitry Andric Undef = Builder.buildUndef(OpType.getScalarType()); 1298bcb0991SDimitry Andric } 1308bcb0991SDimitry Andric assert(MRI.getType(Undef->getOperand(0).getReg()) == 1318bcb0991SDimitry Andric OpType.getScalarType() && 1328bcb0991SDimitry Andric "All undefs should have the same type"); 1338bcb0991SDimitry Andric // Break the undef vector in as many scalar elements as needed 1348bcb0991SDimitry Andric // for the flattening. 1358bcb0991SDimitry Andric for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements(); 1368bcb0991SDimitry Andric EltIdx != EltEnd; ++EltIdx) 1378bcb0991SDimitry Andric Ops.push_back(Undef->getOperand(0).getReg()); 1388bcb0991SDimitry Andric break; 1398bcb0991SDimitry Andric } 1408bcb0991SDimitry Andric default: 1418bcb0991SDimitry Andric return false; 1428bcb0991SDimitry Andric } 1438bcb0991SDimitry Andric } 1448bcb0991SDimitry Andric return true; 1458bcb0991SDimitry Andric } 1468bcb0991SDimitry Andric void CombinerHelper::applyCombineConcatVectors( 1478bcb0991SDimitry Andric MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) { 1488bcb0991SDimitry Andric // We determined that the concat_vectors can be flatten. 1498bcb0991SDimitry Andric // Generate the flattened build_vector. 1508bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1518bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 1528bcb0991SDimitry Andric Register NewDstReg = MRI.cloneVirtualRegister(DstReg); 1538bcb0991SDimitry Andric 1548bcb0991SDimitry Andric // Note: IsUndef is sort of redundant. We could have determine it by 1558bcb0991SDimitry Andric // checking that at all Ops are undef. Alternatively, we could have 1568bcb0991SDimitry Andric // generate a build_vector of undefs and rely on another combine to 1578bcb0991SDimitry Andric // clean that up. For now, given we already gather this information 1588bcb0991SDimitry Andric // in tryCombineConcatVectors, just save compile time and issue the 1598bcb0991SDimitry Andric // right thing. 1608bcb0991SDimitry Andric if (IsUndef) 1618bcb0991SDimitry Andric Builder.buildUndef(NewDstReg); 1628bcb0991SDimitry Andric else 1638bcb0991SDimitry Andric Builder.buildBuildVector(NewDstReg, Ops); 1648bcb0991SDimitry Andric MI.eraseFromParent(); 1658bcb0991SDimitry Andric replaceRegWith(MRI, DstReg, NewDstReg); 1668bcb0991SDimitry Andric } 1678bcb0991SDimitry Andric 1688bcb0991SDimitry Andric bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { 1698bcb0991SDimitry Andric SmallVector<Register, 4> Ops; 1708bcb0991SDimitry Andric if (matchCombineShuffleVector(MI, Ops)) { 1718bcb0991SDimitry Andric applyCombineShuffleVector(MI, Ops); 1728bcb0991SDimitry Andric return true; 1738bcb0991SDimitry Andric } 1748bcb0991SDimitry Andric return false; 1758bcb0991SDimitry Andric } 1768bcb0991SDimitry Andric 1778bcb0991SDimitry Andric bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, 1788bcb0991SDimitry Andric SmallVectorImpl<Register> &Ops) { 1798bcb0991SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && 1808bcb0991SDimitry Andric "Invalid instruction kind"); 1818bcb0991SDimitry Andric LLT DstType = MRI.getType(MI.getOperand(0).getReg()); 1828bcb0991SDimitry Andric Register Src1 = MI.getOperand(1).getReg(); 1838bcb0991SDimitry Andric LLT SrcType = MRI.getType(Src1); 184480093f4SDimitry Andric // As bizarre as it may look, shuffle vector can actually produce 185480093f4SDimitry Andric // scalar! This is because at the IR level a <1 x ty> shuffle 186480093f4SDimitry Andric // vector is perfectly valid. 187480093f4SDimitry Andric unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1; 188480093f4SDimitry Andric unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1; 1898bcb0991SDimitry Andric 1908bcb0991SDimitry Andric // If the resulting vector is smaller than the size of the source 1918bcb0991SDimitry Andric // vectors being concatenated, we won't be able to replace the 1928bcb0991SDimitry Andric // shuffle vector into a concat_vectors. 1938bcb0991SDimitry Andric // 1948bcb0991SDimitry Andric // Note: We may still be able to produce a concat_vectors fed by 1958bcb0991SDimitry Andric // extract_vector_elt and so on. It is less clear that would 1968bcb0991SDimitry Andric // be better though, so don't bother for now. 197480093f4SDimitry Andric // 198480093f4SDimitry Andric // If the destination is a scalar, the size of the sources doesn't 199480093f4SDimitry Andric // matter. we will lower the shuffle to a plain copy. This will 200480093f4SDimitry Andric // work only if the source and destination have the same size. But 201480093f4SDimitry Andric // that's covered by the next condition. 202480093f4SDimitry Andric // 203480093f4SDimitry Andric // TODO: If the size between the source and destination don't match 204480093f4SDimitry Andric // we could still emit an extract vector element in that case. 205480093f4SDimitry Andric if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1) 2068bcb0991SDimitry Andric return false; 2078bcb0991SDimitry Andric 2088bcb0991SDimitry Andric // Check that the shuffle mask can be broken evenly between the 2098bcb0991SDimitry Andric // different sources. 2108bcb0991SDimitry Andric if (DstNumElts % SrcNumElts != 0) 2118bcb0991SDimitry Andric return false; 2128bcb0991SDimitry Andric 2138bcb0991SDimitry Andric // Mask length is a multiple of the source vector length. 2148bcb0991SDimitry Andric // Check if the shuffle is some kind of concatenation of the input 2158bcb0991SDimitry Andric // vectors. 2168bcb0991SDimitry Andric unsigned NumConcat = DstNumElts / SrcNumElts; 2178bcb0991SDimitry Andric SmallVector<int, 8> ConcatSrcs(NumConcat, -1); 218480093f4SDimitry Andric ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 2198bcb0991SDimitry Andric for (unsigned i = 0; i != DstNumElts; ++i) { 2208bcb0991SDimitry Andric int Idx = Mask[i]; 2218bcb0991SDimitry Andric // Undef value. 2228bcb0991SDimitry Andric if (Idx < 0) 2238bcb0991SDimitry Andric continue; 2248bcb0991SDimitry Andric // Ensure the indices in each SrcType sized piece are sequential and that 2258bcb0991SDimitry Andric // the same source is used for the whole piece. 2268bcb0991SDimitry Andric if ((Idx % SrcNumElts != (i % SrcNumElts)) || 2278bcb0991SDimitry Andric (ConcatSrcs[i / SrcNumElts] >= 0 && 2288bcb0991SDimitry Andric ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) 2298bcb0991SDimitry Andric return false; 2308bcb0991SDimitry Andric // Remember which source this index came from. 2318bcb0991SDimitry Andric ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; 2328bcb0991SDimitry Andric } 2338bcb0991SDimitry Andric 2348bcb0991SDimitry Andric // The shuffle is concatenating multiple vectors together. 2358bcb0991SDimitry Andric // Collect the different operands for that. 2368bcb0991SDimitry Andric Register UndefReg; 2378bcb0991SDimitry Andric Register Src2 = MI.getOperand(2).getReg(); 2388bcb0991SDimitry Andric for (auto Src : ConcatSrcs) { 2398bcb0991SDimitry Andric if (Src < 0) { 2408bcb0991SDimitry Andric if (!UndefReg) { 2418bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 2428bcb0991SDimitry Andric UndefReg = Builder.buildUndef(SrcType).getReg(0); 2438bcb0991SDimitry Andric } 2448bcb0991SDimitry Andric Ops.push_back(UndefReg); 2458bcb0991SDimitry Andric } else if (Src == 0) 2468bcb0991SDimitry Andric Ops.push_back(Src1); 2478bcb0991SDimitry Andric else 2488bcb0991SDimitry Andric Ops.push_back(Src2); 2498bcb0991SDimitry Andric } 2508bcb0991SDimitry Andric return true; 2518bcb0991SDimitry Andric } 2528bcb0991SDimitry Andric 2538bcb0991SDimitry Andric void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI, 2548bcb0991SDimitry Andric const ArrayRef<Register> Ops) { 2558bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2568bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 2578bcb0991SDimitry Andric Register NewDstReg = MRI.cloneVirtualRegister(DstReg); 2588bcb0991SDimitry Andric 259480093f4SDimitry Andric if (Ops.size() == 1) 260480093f4SDimitry Andric Builder.buildCopy(NewDstReg, Ops[0]); 261480093f4SDimitry Andric else 262480093f4SDimitry Andric Builder.buildMerge(NewDstReg, Ops); 2638bcb0991SDimitry Andric 2648bcb0991SDimitry Andric MI.eraseFromParent(); 2658bcb0991SDimitry Andric replaceRegWith(MRI, DstReg, NewDstReg); 2668bcb0991SDimitry Andric } 2678bcb0991SDimitry Andric 2680b57cec5SDimitry Andric namespace { 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andric /// Select a preference between two uses. CurrentUse is the current preference 2710b57cec5SDimitry Andric /// while *ForCandidate is attributes of the candidate under consideration. 2720b57cec5SDimitry Andric PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse, 273*5ffd83dbSDimitry Andric const LLT TyForCandidate, 2740b57cec5SDimitry Andric unsigned OpcodeForCandidate, 2750b57cec5SDimitry Andric MachineInstr *MIForCandidate) { 2760b57cec5SDimitry Andric if (!CurrentUse.Ty.isValid()) { 2770b57cec5SDimitry Andric if (CurrentUse.ExtendOpcode == OpcodeForCandidate || 2780b57cec5SDimitry Andric CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT) 2790b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 2800b57cec5SDimitry Andric return CurrentUse; 2810b57cec5SDimitry Andric } 2820b57cec5SDimitry Andric 2830b57cec5SDimitry Andric // We permit the extend to hoist through basic blocks but this is only 2840b57cec5SDimitry Andric // sensible if the target has extending loads. If you end up lowering back 2850b57cec5SDimitry Andric // into a load and extend during the legalizer then the end result is 2860b57cec5SDimitry Andric // hoisting the extend up to the load. 2870b57cec5SDimitry Andric 2880b57cec5SDimitry Andric // Prefer defined extensions to undefined extensions as these are more 2890b57cec5SDimitry Andric // likely to reduce the number of instructions. 2900b57cec5SDimitry Andric if (OpcodeForCandidate == TargetOpcode::G_ANYEXT && 2910b57cec5SDimitry Andric CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT) 2920b57cec5SDimitry Andric return CurrentUse; 2930b57cec5SDimitry Andric else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT && 2940b57cec5SDimitry Andric OpcodeForCandidate != TargetOpcode::G_ANYEXT) 2950b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 2960b57cec5SDimitry Andric 2970b57cec5SDimitry Andric // Prefer sign extensions to zero extensions as sign-extensions tend to be 2980b57cec5SDimitry Andric // more expensive. 2990b57cec5SDimitry Andric if (CurrentUse.Ty == TyForCandidate) { 3000b57cec5SDimitry Andric if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT && 3010b57cec5SDimitry Andric OpcodeForCandidate == TargetOpcode::G_ZEXT) 3020b57cec5SDimitry Andric return CurrentUse; 3030b57cec5SDimitry Andric else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT && 3040b57cec5SDimitry Andric OpcodeForCandidate == TargetOpcode::G_SEXT) 3050b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 3060b57cec5SDimitry Andric } 3070b57cec5SDimitry Andric 3080b57cec5SDimitry Andric // This is potentially target specific. We've chosen the largest type 3090b57cec5SDimitry Andric // because G_TRUNC is usually free. One potential catch with this is that 3100b57cec5SDimitry Andric // some targets have a reduced number of larger registers than smaller 3110b57cec5SDimitry Andric // registers and this choice potentially increases the live-range for the 3120b57cec5SDimitry Andric // larger value. 3130b57cec5SDimitry Andric if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) { 3140b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 3150b57cec5SDimitry Andric } 3160b57cec5SDimitry Andric return CurrentUse; 3170b57cec5SDimitry Andric } 3180b57cec5SDimitry Andric 3190b57cec5SDimitry Andric /// Find a suitable place to insert some instructions and insert them. This 3200b57cec5SDimitry Andric /// function accounts for special cases like inserting before a PHI node. 3210b57cec5SDimitry Andric /// The current strategy for inserting before PHI's is to duplicate the 3220b57cec5SDimitry Andric /// instructions for each predecessor. However, while that's ok for G_TRUNC 3230b57cec5SDimitry Andric /// on most targets since it generally requires no code, other targets/cases may 3240b57cec5SDimitry Andric /// want to try harder to find a dominating block. 3250b57cec5SDimitry Andric static void InsertInsnsWithoutSideEffectsBeforeUse( 3260b57cec5SDimitry Andric MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO, 3270b57cec5SDimitry Andric std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator, 3280b57cec5SDimitry Andric MachineOperand &UseMO)> 3290b57cec5SDimitry Andric Inserter) { 3300b57cec5SDimitry Andric MachineInstr &UseMI = *UseMO.getParent(); 3310b57cec5SDimitry Andric 3320b57cec5SDimitry Andric MachineBasicBlock *InsertBB = UseMI.getParent(); 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric // If the use is a PHI then we want the predecessor block instead. 3350b57cec5SDimitry Andric if (UseMI.isPHI()) { 3360b57cec5SDimitry Andric MachineOperand *PredBB = std::next(&UseMO); 3370b57cec5SDimitry Andric InsertBB = PredBB->getMBB(); 3380b57cec5SDimitry Andric } 3390b57cec5SDimitry Andric 3400b57cec5SDimitry Andric // If the block is the same block as the def then we want to insert just after 3410b57cec5SDimitry Andric // the def instead of at the start of the block. 3420b57cec5SDimitry Andric if (InsertBB == DefMI.getParent()) { 3430b57cec5SDimitry Andric MachineBasicBlock::iterator InsertPt = &DefMI; 3440b57cec5SDimitry Andric Inserter(InsertBB, std::next(InsertPt), UseMO); 3450b57cec5SDimitry Andric return; 3460b57cec5SDimitry Andric } 3470b57cec5SDimitry Andric 3480b57cec5SDimitry Andric // Otherwise we want the start of the BB 3490b57cec5SDimitry Andric Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO); 3500b57cec5SDimitry Andric } 3510b57cec5SDimitry Andric } // end anonymous namespace 3520b57cec5SDimitry Andric 3530b57cec5SDimitry Andric bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { 3540b57cec5SDimitry Andric PreferredTuple Preferred; 3550b57cec5SDimitry Andric if (matchCombineExtendingLoads(MI, Preferred)) { 3560b57cec5SDimitry Andric applyCombineExtendingLoads(MI, Preferred); 3570b57cec5SDimitry Andric return true; 3580b57cec5SDimitry Andric } 3590b57cec5SDimitry Andric return false; 3600b57cec5SDimitry Andric } 3610b57cec5SDimitry Andric 3620b57cec5SDimitry Andric bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, 3630b57cec5SDimitry Andric PreferredTuple &Preferred) { 3640b57cec5SDimitry Andric // We match the loads and follow the uses to the extend instead of matching 3650b57cec5SDimitry Andric // the extends and following the def to the load. This is because the load 3660b57cec5SDimitry Andric // must remain in the same position for correctness (unless we also add code 3670b57cec5SDimitry Andric // to find a safe place to sink it) whereas the extend is freely movable. 3680b57cec5SDimitry Andric // It also prevents us from duplicating the load for the volatile case or just 3690b57cec5SDimitry Andric // for performance. 3700b57cec5SDimitry Andric 3710b57cec5SDimitry Andric if (MI.getOpcode() != TargetOpcode::G_LOAD && 3720b57cec5SDimitry Andric MI.getOpcode() != TargetOpcode::G_SEXTLOAD && 3730b57cec5SDimitry Andric MI.getOpcode() != TargetOpcode::G_ZEXTLOAD) 3740b57cec5SDimitry Andric return false; 3750b57cec5SDimitry Andric 3760b57cec5SDimitry Andric auto &LoadValue = MI.getOperand(0); 3770b57cec5SDimitry Andric assert(LoadValue.isReg() && "Result wasn't a register?"); 3780b57cec5SDimitry Andric 3790b57cec5SDimitry Andric LLT LoadValueTy = MRI.getType(LoadValue.getReg()); 3800b57cec5SDimitry Andric if (!LoadValueTy.isScalar()) 3810b57cec5SDimitry Andric return false; 3820b57cec5SDimitry Andric 3830b57cec5SDimitry Andric // Most architectures are going to legalize <s8 loads into at least a 1 byte 3840b57cec5SDimitry Andric // load, and the MMOs can only describe memory accesses in multiples of bytes. 3850b57cec5SDimitry Andric // If we try to perform extload combining on those, we can end up with 3860b57cec5SDimitry Andric // %a(s8) = extload %ptr (load 1 byte from %ptr) 3870b57cec5SDimitry Andric // ... which is an illegal extload instruction. 3880b57cec5SDimitry Andric if (LoadValueTy.getSizeInBits() < 8) 3890b57cec5SDimitry Andric return false; 3900b57cec5SDimitry Andric 3910b57cec5SDimitry Andric // For non power-of-2 types, they will very likely be legalized into multiple 3920b57cec5SDimitry Andric // loads. Don't bother trying to match them into extending loads. 3930b57cec5SDimitry Andric if (!isPowerOf2_32(LoadValueTy.getSizeInBits())) 3940b57cec5SDimitry Andric return false; 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric // Find the preferred type aside from the any-extends (unless it's the only 3970b57cec5SDimitry Andric // one) and non-extending ops. We'll emit an extending load to that type and 3980b57cec5SDimitry Andric // and emit a variant of (extend (trunc X)) for the others according to the 3990b57cec5SDimitry Andric // relative type sizes. At the same time, pick an extend to use based on the 4000b57cec5SDimitry Andric // extend involved in the chosen type. 4010b57cec5SDimitry Andric unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD 4020b57cec5SDimitry Andric ? TargetOpcode::G_ANYEXT 4030b57cec5SDimitry Andric : MI.getOpcode() == TargetOpcode::G_SEXTLOAD 4040b57cec5SDimitry Andric ? TargetOpcode::G_SEXT 4050b57cec5SDimitry Andric : TargetOpcode::G_ZEXT; 4060b57cec5SDimitry Andric Preferred = {LLT(), PreferredOpcode, nullptr}; 407*5ffd83dbSDimitry Andric for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) { 4080b57cec5SDimitry Andric if (UseMI.getOpcode() == TargetOpcode::G_SEXT || 4090b57cec5SDimitry Andric UseMI.getOpcode() == TargetOpcode::G_ZEXT || 410*5ffd83dbSDimitry Andric (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) { 411*5ffd83dbSDimitry Andric // Check for legality. 412*5ffd83dbSDimitry Andric if (LI) { 413*5ffd83dbSDimitry Andric LegalityQuery::MemDesc MMDesc; 414*5ffd83dbSDimitry Andric const auto &MMO = **MI.memoperands_begin(); 415*5ffd83dbSDimitry Andric MMDesc.SizeInBits = MMO.getSizeInBits(); 416*5ffd83dbSDimitry Andric MMDesc.AlignInBits = MMO.getAlign().value() * 8; 417*5ffd83dbSDimitry Andric MMDesc.Ordering = MMO.getOrdering(); 418*5ffd83dbSDimitry Andric LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg()); 419*5ffd83dbSDimitry Andric LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 420*5ffd83dbSDimitry Andric if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action != 421*5ffd83dbSDimitry Andric LegalizeActions::Legal) 422*5ffd83dbSDimitry Andric continue; 423*5ffd83dbSDimitry Andric } 4240b57cec5SDimitry Andric Preferred = ChoosePreferredUse(Preferred, 4250b57cec5SDimitry Andric MRI.getType(UseMI.getOperand(0).getReg()), 4260b57cec5SDimitry Andric UseMI.getOpcode(), &UseMI); 4270b57cec5SDimitry Andric } 4280b57cec5SDimitry Andric } 4290b57cec5SDimitry Andric 4300b57cec5SDimitry Andric // There were no extends 4310b57cec5SDimitry Andric if (!Preferred.MI) 4320b57cec5SDimitry Andric return false; 4330b57cec5SDimitry Andric // It should be impossible to chose an extend without selecting a different 4340b57cec5SDimitry Andric // type since by definition the result of an extend is larger. 4350b57cec5SDimitry Andric assert(Preferred.Ty != LoadValueTy && "Extending to same type?"); 4360b57cec5SDimitry Andric 4370b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI); 4380b57cec5SDimitry Andric return true; 4390b57cec5SDimitry Andric } 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andric void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, 4420b57cec5SDimitry Andric PreferredTuple &Preferred) { 4430b57cec5SDimitry Andric // Rewrite the load to the chosen extending load. 4440b57cec5SDimitry Andric Register ChosenDstReg = Preferred.MI->getOperand(0).getReg(); 4450b57cec5SDimitry Andric 4460b57cec5SDimitry Andric // Inserter to insert a truncate back to the original type at a given point 4470b57cec5SDimitry Andric // with some basic CSE to limit truncate duplication to one per BB. 4480b57cec5SDimitry Andric DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns; 4490b57cec5SDimitry Andric auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB, 4500b57cec5SDimitry Andric MachineBasicBlock::iterator InsertBefore, 4510b57cec5SDimitry Andric MachineOperand &UseMO) { 4520b57cec5SDimitry Andric MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB); 4530b57cec5SDimitry Andric if (PreviouslyEmitted) { 4540b57cec5SDimitry Andric Observer.changingInstr(*UseMO.getParent()); 4550b57cec5SDimitry Andric UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg()); 4560b57cec5SDimitry Andric Observer.changedInstr(*UseMO.getParent()); 4570b57cec5SDimitry Andric return; 4580b57cec5SDimitry Andric } 4590b57cec5SDimitry Andric 4600b57cec5SDimitry Andric Builder.setInsertPt(*InsertIntoBB, InsertBefore); 4610b57cec5SDimitry Andric Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); 4620b57cec5SDimitry Andric MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg); 4630b57cec5SDimitry Andric EmittedInsns[InsertIntoBB] = NewMI; 4640b57cec5SDimitry Andric replaceRegOpWith(MRI, UseMO, NewDstReg); 4650b57cec5SDimitry Andric }; 4660b57cec5SDimitry Andric 4670b57cec5SDimitry Andric Observer.changingInstr(MI); 4680b57cec5SDimitry Andric MI.setDesc( 4690b57cec5SDimitry Andric Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT 4700b57cec5SDimitry Andric ? TargetOpcode::G_SEXTLOAD 4710b57cec5SDimitry Andric : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT 4720b57cec5SDimitry Andric ? TargetOpcode::G_ZEXTLOAD 4730b57cec5SDimitry Andric : TargetOpcode::G_LOAD)); 4740b57cec5SDimitry Andric 4750b57cec5SDimitry Andric // Rewrite all the uses to fix up the types. 4760b57cec5SDimitry Andric auto &LoadValue = MI.getOperand(0); 4770b57cec5SDimitry Andric SmallVector<MachineOperand *, 4> Uses; 4780b57cec5SDimitry Andric for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) 4790b57cec5SDimitry Andric Uses.push_back(&UseMO); 4800b57cec5SDimitry Andric 4810b57cec5SDimitry Andric for (auto *UseMO : Uses) { 4820b57cec5SDimitry Andric MachineInstr *UseMI = UseMO->getParent(); 4830b57cec5SDimitry Andric 4840b57cec5SDimitry Andric // If the extend is compatible with the preferred extend then we should fix 4850b57cec5SDimitry Andric // up the type and extend so that it uses the preferred use. 4860b57cec5SDimitry Andric if (UseMI->getOpcode() == Preferred.ExtendOpcode || 4870b57cec5SDimitry Andric UseMI->getOpcode() == TargetOpcode::G_ANYEXT) { 4888bcb0991SDimitry Andric Register UseDstReg = UseMI->getOperand(0).getReg(); 4890b57cec5SDimitry Andric MachineOperand &UseSrcMO = UseMI->getOperand(1); 490*5ffd83dbSDimitry Andric const LLT UseDstTy = MRI.getType(UseDstReg); 4910b57cec5SDimitry Andric if (UseDstReg != ChosenDstReg) { 4920b57cec5SDimitry Andric if (Preferred.Ty == UseDstTy) { 4930b57cec5SDimitry Andric // If the use has the same type as the preferred use, then merge 4940b57cec5SDimitry Andric // the vregs and erase the extend. For example: 4950b57cec5SDimitry Andric // %1:_(s8) = G_LOAD ... 4960b57cec5SDimitry Andric // %2:_(s32) = G_SEXT %1(s8) 4970b57cec5SDimitry Andric // %3:_(s32) = G_ANYEXT %1(s8) 4980b57cec5SDimitry Andric // ... = ... %3(s32) 4990b57cec5SDimitry Andric // rewrites to: 5000b57cec5SDimitry Andric // %2:_(s32) = G_SEXTLOAD ... 5010b57cec5SDimitry Andric // ... = ... %2(s32) 5020b57cec5SDimitry Andric replaceRegWith(MRI, UseDstReg, ChosenDstReg); 5030b57cec5SDimitry Andric Observer.erasingInstr(*UseMO->getParent()); 5040b57cec5SDimitry Andric UseMO->getParent()->eraseFromParent(); 5050b57cec5SDimitry Andric } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) { 5060b57cec5SDimitry Andric // If the preferred size is smaller, then keep the extend but extend 5070b57cec5SDimitry Andric // from the result of the extending load. For example: 5080b57cec5SDimitry Andric // %1:_(s8) = G_LOAD ... 5090b57cec5SDimitry Andric // %2:_(s32) = G_SEXT %1(s8) 5100b57cec5SDimitry Andric // %3:_(s64) = G_ANYEXT %1(s8) 5110b57cec5SDimitry Andric // ... = ... %3(s64) 5120b57cec5SDimitry Andric /// rewrites to: 5130b57cec5SDimitry Andric // %2:_(s32) = G_SEXTLOAD ... 5140b57cec5SDimitry Andric // %3:_(s64) = G_ANYEXT %2:_(s32) 5150b57cec5SDimitry Andric // ... = ... %3(s64) 5160b57cec5SDimitry Andric replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg); 5170b57cec5SDimitry Andric } else { 5180b57cec5SDimitry Andric // If the preferred size is large, then insert a truncate. For 5190b57cec5SDimitry Andric // example: 5200b57cec5SDimitry Andric // %1:_(s8) = G_LOAD ... 5210b57cec5SDimitry Andric // %2:_(s64) = G_SEXT %1(s8) 5220b57cec5SDimitry Andric // %3:_(s32) = G_ZEXT %1(s8) 5230b57cec5SDimitry Andric // ... = ... %3(s32) 5240b57cec5SDimitry Andric /// rewrites to: 5250b57cec5SDimitry Andric // %2:_(s64) = G_SEXTLOAD ... 5260b57cec5SDimitry Andric // %4:_(s8) = G_TRUNC %2:_(s32) 5270b57cec5SDimitry Andric // %3:_(s64) = G_ZEXT %2:_(s8) 5280b57cec5SDimitry Andric // ... = ... %3(s64) 5290b57cec5SDimitry Andric InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, 5300b57cec5SDimitry Andric InsertTruncAt); 5310b57cec5SDimitry Andric } 5320b57cec5SDimitry Andric continue; 5330b57cec5SDimitry Andric } 5340b57cec5SDimitry Andric // The use is (one of) the uses of the preferred use we chose earlier. 5350b57cec5SDimitry Andric // We're going to update the load to def this value later so just erase 5360b57cec5SDimitry Andric // the old extend. 5370b57cec5SDimitry Andric Observer.erasingInstr(*UseMO->getParent()); 5380b57cec5SDimitry Andric UseMO->getParent()->eraseFromParent(); 5390b57cec5SDimitry Andric continue; 5400b57cec5SDimitry Andric } 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric // The use isn't an extend. Truncate back to the type we originally loaded. 5430b57cec5SDimitry Andric // This is free on many targets. 5440b57cec5SDimitry Andric InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt); 5450b57cec5SDimitry Andric } 5460b57cec5SDimitry Andric 5470b57cec5SDimitry Andric MI.getOperand(0).setReg(ChosenDstReg); 5480b57cec5SDimitry Andric Observer.changedInstr(MI); 5490b57cec5SDimitry Andric } 5500b57cec5SDimitry Andric 551*5ffd83dbSDimitry Andric bool CombinerHelper::isPredecessor(const MachineInstr &DefMI, 552*5ffd83dbSDimitry Andric const MachineInstr &UseMI) { 553*5ffd83dbSDimitry Andric assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() && 554*5ffd83dbSDimitry Andric "shouldn't consider debug uses"); 5558bcb0991SDimitry Andric assert(DefMI.getParent() == UseMI.getParent()); 5568bcb0991SDimitry Andric if (&DefMI == &UseMI) 5578bcb0991SDimitry Andric return false; 5588bcb0991SDimitry Andric 5598bcb0991SDimitry Andric // Loop through the basic block until we find one of the instructions. 5608bcb0991SDimitry Andric MachineBasicBlock::const_iterator I = DefMI.getParent()->begin(); 5618bcb0991SDimitry Andric for (; &*I != &DefMI && &*I != &UseMI; ++I) 5628bcb0991SDimitry Andric return &*I == &DefMI; 5638bcb0991SDimitry Andric 5648bcb0991SDimitry Andric llvm_unreachable("Block must contain instructions"); 5658bcb0991SDimitry Andric } 5668bcb0991SDimitry Andric 567*5ffd83dbSDimitry Andric bool CombinerHelper::dominates(const MachineInstr &DefMI, 568*5ffd83dbSDimitry Andric const MachineInstr &UseMI) { 569*5ffd83dbSDimitry Andric assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() && 570*5ffd83dbSDimitry Andric "shouldn't consider debug uses"); 5718bcb0991SDimitry Andric if (MDT) 5728bcb0991SDimitry Andric return MDT->dominates(&DefMI, &UseMI); 5738bcb0991SDimitry Andric else if (DefMI.getParent() != UseMI.getParent()) 5748bcb0991SDimitry Andric return false; 5758bcb0991SDimitry Andric 5768bcb0991SDimitry Andric return isPredecessor(DefMI, UseMI); 5778bcb0991SDimitry Andric } 5788bcb0991SDimitry Andric 579*5ffd83dbSDimitry Andric bool CombinerHelper::matchSextAlreadyExtended(MachineInstr &MI) { 580*5ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 581*5ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 582*5ffd83dbSDimitry Andric unsigned SrcSignBits = KB->computeNumSignBits(SrcReg); 583*5ffd83dbSDimitry Andric unsigned NumSextBits = 584*5ffd83dbSDimitry Andric MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() - 585*5ffd83dbSDimitry Andric MI.getOperand(2).getImm(); 586*5ffd83dbSDimitry Andric return SrcSignBits >= NumSextBits; 587*5ffd83dbSDimitry Andric } 588*5ffd83dbSDimitry Andric 589*5ffd83dbSDimitry Andric bool CombinerHelper::applySextAlreadyExtended(MachineInstr &MI) { 590*5ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 591*5ffd83dbSDimitry Andric MachineIRBuilder MIB(MI); 592*5ffd83dbSDimitry Andric MIB.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); 593*5ffd83dbSDimitry Andric MI.eraseFromParent(); 594*5ffd83dbSDimitry Andric return true; 595*5ffd83dbSDimitry Andric } 596*5ffd83dbSDimitry Andric 5978bcb0991SDimitry Andric bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, 5988bcb0991SDimitry Andric Register &Base, Register &Offset) { 5998bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 6008bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 6018bcb0991SDimitry Andric 6028bcb0991SDimitry Andric #ifndef NDEBUG 6038bcb0991SDimitry Andric unsigned Opcode = MI.getOpcode(); 6048bcb0991SDimitry Andric assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || 6058bcb0991SDimitry Andric Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); 6068bcb0991SDimitry Andric #endif 6078bcb0991SDimitry Andric 6088bcb0991SDimitry Andric Base = MI.getOperand(1).getReg(); 6098bcb0991SDimitry Andric MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base); 6108bcb0991SDimitry Andric if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) 6118bcb0991SDimitry Andric return false; 6128bcb0991SDimitry Andric 6138bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI); 6148bcb0991SDimitry Andric 615*5ffd83dbSDimitry Andric for (auto &Use : MRI.use_nodbg_instructions(Base)) { 616480093f4SDimitry Andric if (Use.getOpcode() != TargetOpcode::G_PTR_ADD) 6178bcb0991SDimitry Andric continue; 6188bcb0991SDimitry Andric 6198bcb0991SDimitry Andric Offset = Use.getOperand(2).getReg(); 6208bcb0991SDimitry Andric if (!ForceLegalIndexing && 6218bcb0991SDimitry Andric !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) { 6228bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: " 6238bcb0991SDimitry Andric << Use); 6248bcb0991SDimitry Andric continue; 6258bcb0991SDimitry Andric } 6268bcb0991SDimitry Andric 6278bcb0991SDimitry Andric // Make sure the offset calculation is before the potentially indexed op. 6288bcb0991SDimitry Andric // FIXME: we really care about dependency here. The offset calculation might 6298bcb0991SDimitry Andric // be movable. 6308bcb0991SDimitry Andric MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset); 6318bcb0991SDimitry Andric if (!OffsetDef || !dominates(*OffsetDef, MI)) { 6328bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: " 6338bcb0991SDimitry Andric << Use); 6348bcb0991SDimitry Andric continue; 6358bcb0991SDimitry Andric } 6368bcb0991SDimitry Andric 6378bcb0991SDimitry Andric // FIXME: check whether all uses of Base are load/store with foldable 6388bcb0991SDimitry Andric // addressing modes. If so, using the normal addr-modes is better than 6398bcb0991SDimitry Andric // forming an indexed one. 6408bcb0991SDimitry Andric 6418bcb0991SDimitry Andric bool MemOpDominatesAddrUses = true; 642*5ffd83dbSDimitry Andric for (auto &PtrAddUse : 643*5ffd83dbSDimitry Andric MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) { 644480093f4SDimitry Andric if (!dominates(MI, PtrAddUse)) { 6458bcb0991SDimitry Andric MemOpDominatesAddrUses = false; 6468bcb0991SDimitry Andric break; 6478bcb0991SDimitry Andric } 6488bcb0991SDimitry Andric } 6498bcb0991SDimitry Andric 6508bcb0991SDimitry Andric if (!MemOpDominatesAddrUses) { 6518bcb0991SDimitry Andric LLVM_DEBUG( 6528bcb0991SDimitry Andric dbgs() << " Ignoring candidate as memop does not dominate uses: " 6538bcb0991SDimitry Andric << Use); 6548bcb0991SDimitry Andric continue; 6558bcb0991SDimitry Andric } 6568bcb0991SDimitry Andric 6578bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Found match: " << Use); 6588bcb0991SDimitry Andric Addr = Use.getOperand(0).getReg(); 6598bcb0991SDimitry Andric return true; 6608bcb0991SDimitry Andric } 6618bcb0991SDimitry Andric 6628bcb0991SDimitry Andric return false; 6638bcb0991SDimitry Andric } 6648bcb0991SDimitry Andric 6658bcb0991SDimitry Andric bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr, 6668bcb0991SDimitry Andric Register &Base, Register &Offset) { 6678bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 6688bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 6698bcb0991SDimitry Andric 6708bcb0991SDimitry Andric #ifndef NDEBUG 6718bcb0991SDimitry Andric unsigned Opcode = MI.getOpcode(); 6728bcb0991SDimitry Andric assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || 6738bcb0991SDimitry Andric Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); 6748bcb0991SDimitry Andric #endif 6758bcb0991SDimitry Andric 6768bcb0991SDimitry Andric Addr = MI.getOperand(1).getReg(); 677480093f4SDimitry Andric MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI); 678*5ffd83dbSDimitry Andric if (!AddrDef || MRI.hasOneNonDBGUse(Addr)) 6798bcb0991SDimitry Andric return false; 6808bcb0991SDimitry Andric 6818bcb0991SDimitry Andric Base = AddrDef->getOperand(1).getReg(); 6828bcb0991SDimitry Andric Offset = AddrDef->getOperand(2).getReg(); 6838bcb0991SDimitry Andric 6848bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI); 6858bcb0991SDimitry Andric 6868bcb0991SDimitry Andric if (!ForceLegalIndexing && 6878bcb0991SDimitry Andric !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) { 6888bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, not legal for target"); 6898bcb0991SDimitry Andric return false; 6908bcb0991SDimitry Andric } 6918bcb0991SDimitry Andric 6928bcb0991SDimitry Andric MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI); 6938bcb0991SDimitry Andric if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { 6948bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway."); 6958bcb0991SDimitry Andric return false; 6968bcb0991SDimitry Andric } 6978bcb0991SDimitry Andric 6988bcb0991SDimitry Andric if (MI.getOpcode() == TargetOpcode::G_STORE) { 6998bcb0991SDimitry Andric // Would require a copy. 7008bcb0991SDimitry Andric if (Base == MI.getOperand(0).getReg()) { 7018bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway."); 7028bcb0991SDimitry Andric return false; 7038bcb0991SDimitry Andric } 7048bcb0991SDimitry Andric 7058bcb0991SDimitry Andric // We're expecting one use of Addr in MI, but it could also be the 7068bcb0991SDimitry Andric // value stored, which isn't actually dominated by the instruction. 7078bcb0991SDimitry Andric if (MI.getOperand(0).getReg() == Addr) { 7088bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses"); 7098bcb0991SDimitry Andric return false; 7108bcb0991SDimitry Andric } 7118bcb0991SDimitry Andric } 7128bcb0991SDimitry Andric 713480093f4SDimitry Andric // FIXME: check whether all uses of the base pointer are constant PtrAdds. 714480093f4SDimitry Andric // That might allow us to end base's liveness here by adjusting the constant. 7158bcb0991SDimitry Andric 716*5ffd83dbSDimitry Andric for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) { 7178bcb0991SDimitry Andric if (!dominates(MI, UseMI)) { 7188bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses."); 7198bcb0991SDimitry Andric return false; 7208bcb0991SDimitry Andric } 7218bcb0991SDimitry Andric } 7228bcb0991SDimitry Andric 7238bcb0991SDimitry Andric return true; 7248bcb0991SDimitry Andric } 7258bcb0991SDimitry Andric 7268bcb0991SDimitry Andric bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) { 727480093f4SDimitry Andric IndexedLoadStoreMatchInfo MatchInfo; 728480093f4SDimitry Andric if (matchCombineIndexedLoadStore(MI, MatchInfo)) { 729480093f4SDimitry Andric applyCombineIndexedLoadStore(MI, MatchInfo); 730480093f4SDimitry Andric return true; 731480093f4SDimitry Andric } 732480093f4SDimitry Andric return false; 733480093f4SDimitry Andric } 734480093f4SDimitry Andric 735480093f4SDimitry Andric bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { 7368bcb0991SDimitry Andric unsigned Opcode = MI.getOpcode(); 7378bcb0991SDimitry Andric if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD && 7388bcb0991SDimitry Andric Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE) 7398bcb0991SDimitry Andric return false; 7408bcb0991SDimitry Andric 741480093f4SDimitry Andric MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, 742480093f4SDimitry Andric MatchInfo.Offset); 743480093f4SDimitry Andric if (!MatchInfo.IsPre && 744480093f4SDimitry Andric !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, 745480093f4SDimitry Andric MatchInfo.Offset)) 7468bcb0991SDimitry Andric return false; 7478bcb0991SDimitry Andric 748480093f4SDimitry Andric return true; 749480093f4SDimitry Andric } 7508bcb0991SDimitry Andric 751480093f4SDimitry Andric void CombinerHelper::applyCombineIndexedLoadStore( 752480093f4SDimitry Andric MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { 753480093f4SDimitry Andric MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr); 754480093f4SDimitry Andric MachineIRBuilder MIRBuilder(MI); 755480093f4SDimitry Andric unsigned Opcode = MI.getOpcode(); 756480093f4SDimitry Andric bool IsStore = Opcode == TargetOpcode::G_STORE; 7578bcb0991SDimitry Andric unsigned NewOpcode; 7588bcb0991SDimitry Andric switch (Opcode) { 7598bcb0991SDimitry Andric case TargetOpcode::G_LOAD: 7608bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_LOAD; 7618bcb0991SDimitry Andric break; 7628bcb0991SDimitry Andric case TargetOpcode::G_SEXTLOAD: 7638bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD; 7648bcb0991SDimitry Andric break; 7658bcb0991SDimitry Andric case TargetOpcode::G_ZEXTLOAD: 7668bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD; 7678bcb0991SDimitry Andric break; 7688bcb0991SDimitry Andric case TargetOpcode::G_STORE: 7698bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_STORE; 7708bcb0991SDimitry Andric break; 7718bcb0991SDimitry Andric default: 7728bcb0991SDimitry Andric llvm_unreachable("Unknown load/store opcode"); 7738bcb0991SDimitry Andric } 7748bcb0991SDimitry Andric 7758bcb0991SDimitry Andric auto MIB = MIRBuilder.buildInstr(NewOpcode); 7768bcb0991SDimitry Andric if (IsStore) { 777480093f4SDimitry Andric MIB.addDef(MatchInfo.Addr); 7788bcb0991SDimitry Andric MIB.addUse(MI.getOperand(0).getReg()); 7798bcb0991SDimitry Andric } else { 7808bcb0991SDimitry Andric MIB.addDef(MI.getOperand(0).getReg()); 781480093f4SDimitry Andric MIB.addDef(MatchInfo.Addr); 7828bcb0991SDimitry Andric } 7838bcb0991SDimitry Andric 784480093f4SDimitry Andric MIB.addUse(MatchInfo.Base); 785480093f4SDimitry Andric MIB.addUse(MatchInfo.Offset); 786480093f4SDimitry Andric MIB.addImm(MatchInfo.IsPre); 7878bcb0991SDimitry Andric MI.eraseFromParent(); 7888bcb0991SDimitry Andric AddrDef.eraseFromParent(); 7898bcb0991SDimitry Andric 7908bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); 7918bcb0991SDimitry Andric } 7928bcb0991SDimitry Andric 7938bcb0991SDimitry Andric bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) { 7948bcb0991SDimitry Andric if (MI.getOpcode() != TargetOpcode::G_BR) 7958bcb0991SDimitry Andric return false; 7968bcb0991SDimitry Andric 7970b57cec5SDimitry Andric // Try to match the following: 7980b57cec5SDimitry Andric // bb1: 7990b57cec5SDimitry Andric // %c(s32) = G_ICMP pred, %a, %b 8000b57cec5SDimitry Andric // %c1(s1) = G_TRUNC %c(s32) 8010b57cec5SDimitry Andric // G_BRCOND %c1, %bb2 8020b57cec5SDimitry Andric // G_BR %bb3 8030b57cec5SDimitry Andric // bb2: 8040b57cec5SDimitry Andric // ... 8050b57cec5SDimitry Andric // bb3: 8060b57cec5SDimitry Andric 8070b57cec5SDimitry Andric // The above pattern does not have a fall through to the successor bb2, always 8080b57cec5SDimitry Andric // resulting in a branch no matter which path is taken. Here we try to find 8090b57cec5SDimitry Andric // and replace that pattern with conditional branch to bb3 and otherwise 8100b57cec5SDimitry Andric // fallthrough to bb2. 8110b57cec5SDimitry Andric 8120b57cec5SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 8130b57cec5SDimitry Andric MachineBasicBlock::iterator BrIt(MI); 8140b57cec5SDimitry Andric if (BrIt == MBB->begin()) 8150b57cec5SDimitry Andric return false; 8160b57cec5SDimitry Andric assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator"); 8170b57cec5SDimitry Andric 8180b57cec5SDimitry Andric MachineInstr *BrCond = &*std::prev(BrIt); 8190b57cec5SDimitry Andric if (BrCond->getOpcode() != TargetOpcode::G_BRCOND) 8200b57cec5SDimitry Andric return false; 8210b57cec5SDimitry Andric 8220b57cec5SDimitry Andric // Check that the next block is the conditional branch target. 8230b57cec5SDimitry Andric if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB())) 8240b57cec5SDimitry Andric return false; 8250b57cec5SDimitry Andric 8260b57cec5SDimitry Andric MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg()); 8270b57cec5SDimitry Andric if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP || 828*5ffd83dbSDimitry Andric !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg())) 8290b57cec5SDimitry Andric return false; 8300b57cec5SDimitry Andric return true; 8310b57cec5SDimitry Andric } 8320b57cec5SDimitry Andric 8338bcb0991SDimitry Andric bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) { 8348bcb0991SDimitry Andric if (!matchElideBrByInvertingCond(MI)) 8350b57cec5SDimitry Andric return false; 8368bcb0991SDimitry Andric applyElideBrByInvertingCond(MI); 8378bcb0991SDimitry Andric return true; 8388bcb0991SDimitry Andric } 8398bcb0991SDimitry Andric 8408bcb0991SDimitry Andric void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) { 8410b57cec5SDimitry Andric MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); 8420b57cec5SDimitry Andric MachineBasicBlock::iterator BrIt(MI); 8430b57cec5SDimitry Andric MachineInstr *BrCond = &*std::prev(BrIt); 8440b57cec5SDimitry Andric MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg()); 8450b57cec5SDimitry Andric 8460b57cec5SDimitry Andric CmpInst::Predicate InversePred = CmpInst::getInversePredicate( 8470b57cec5SDimitry Andric (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate()); 8480b57cec5SDimitry Andric 8490b57cec5SDimitry Andric // Invert the G_ICMP condition. 8500b57cec5SDimitry Andric Observer.changingInstr(*CmpMI); 8510b57cec5SDimitry Andric CmpMI->getOperand(1).setPredicate(InversePred); 8520b57cec5SDimitry Andric Observer.changedInstr(*CmpMI); 8530b57cec5SDimitry Andric 8540b57cec5SDimitry Andric // Change the conditional branch target. 8550b57cec5SDimitry Andric Observer.changingInstr(*BrCond); 8560b57cec5SDimitry Andric BrCond->getOperand(1).setMBB(BrTarget); 8570b57cec5SDimitry Andric Observer.changedInstr(*BrCond); 8580b57cec5SDimitry Andric MI.eraseFromParent(); 8598bcb0991SDimitry Andric } 8608bcb0991SDimitry Andric 8618bcb0991SDimitry Andric static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { 8628bcb0991SDimitry Andric // On Darwin, -Os means optimize for size without hurting performance, so 8638bcb0991SDimitry Andric // only really optimize for size when -Oz (MinSize) is used. 8648bcb0991SDimitry Andric if (MF.getTarget().getTargetTriple().isOSDarwin()) 8658bcb0991SDimitry Andric return MF.getFunction().hasMinSize(); 8668bcb0991SDimitry Andric return MF.getFunction().hasOptSize(); 8678bcb0991SDimitry Andric } 8688bcb0991SDimitry Andric 8698bcb0991SDimitry Andric // Returns a list of types to use for memory op lowering in MemOps. A partial 8708bcb0991SDimitry Andric // port of findOptimalMemOpLowering in TargetLowering. 871*5ffd83dbSDimitry Andric static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps, 872*5ffd83dbSDimitry Andric unsigned Limit, const MemOp &Op, 873*5ffd83dbSDimitry Andric unsigned DstAS, unsigned SrcAS, 874*5ffd83dbSDimitry Andric const AttributeList &FuncAttributes, 875*5ffd83dbSDimitry Andric const TargetLowering &TLI) { 876*5ffd83dbSDimitry Andric if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) 8778bcb0991SDimitry Andric return false; 8788bcb0991SDimitry Andric 879*5ffd83dbSDimitry Andric LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes); 8808bcb0991SDimitry Andric 8818bcb0991SDimitry Andric if (Ty == LLT()) { 8828bcb0991SDimitry Andric // Use the largest scalar type whose alignment constraints are satisfied. 8838bcb0991SDimitry Andric // We only need to check DstAlign here as SrcAlign is always greater or 8848bcb0991SDimitry Andric // equal to DstAlign (or zero). 8858bcb0991SDimitry Andric Ty = LLT::scalar(64); 886*5ffd83dbSDimitry Andric if (Op.isFixedDstAlign()) 887*5ffd83dbSDimitry Andric while (Op.getDstAlign() < Ty.getSizeInBytes() && 888*5ffd83dbSDimitry Andric !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign())) 8898bcb0991SDimitry Andric Ty = LLT::scalar(Ty.getSizeInBytes()); 8908bcb0991SDimitry Andric assert(Ty.getSizeInBits() > 0 && "Could not find valid type"); 8918bcb0991SDimitry Andric // FIXME: check for the largest legal type we can load/store to. 8928bcb0991SDimitry Andric } 8938bcb0991SDimitry Andric 8948bcb0991SDimitry Andric unsigned NumMemOps = 0; 895*5ffd83dbSDimitry Andric uint64_t Size = Op.size(); 896*5ffd83dbSDimitry Andric while (Size) { 8978bcb0991SDimitry Andric unsigned TySize = Ty.getSizeInBytes(); 8988bcb0991SDimitry Andric while (TySize > Size) { 8998bcb0991SDimitry Andric // For now, only use non-vector load / store's for the left-over pieces. 9008bcb0991SDimitry Andric LLT NewTy = Ty; 9018bcb0991SDimitry Andric // FIXME: check for mem op safety and legality of the types. Not all of 9028bcb0991SDimitry Andric // SDAGisms map cleanly to GISel concepts. 9038bcb0991SDimitry Andric if (NewTy.isVector()) 9048bcb0991SDimitry Andric NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32); 9058bcb0991SDimitry Andric NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1)); 9068bcb0991SDimitry Andric unsigned NewTySize = NewTy.getSizeInBytes(); 9078bcb0991SDimitry Andric assert(NewTySize > 0 && "Could not find appropriate type"); 9088bcb0991SDimitry Andric 9098bcb0991SDimitry Andric // If the new LLT cannot cover all of the remaining bits, then consider 9108bcb0991SDimitry Andric // issuing a (or a pair of) unaligned and overlapping load / store. 9118bcb0991SDimitry Andric bool Fast; 9128bcb0991SDimitry Andric // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). 9138bcb0991SDimitry Andric MVT VT = getMVTForLLT(Ty); 914*5ffd83dbSDimitry Andric if (NumMemOps && Op.allowOverlap() && NewTySize < Size && 9158bcb0991SDimitry Andric TLI.allowsMisalignedMemoryAccesses( 916*5ffd83dbSDimitry Andric VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0, 917*5ffd83dbSDimitry Andric MachineMemOperand::MONone, &Fast) && 9188bcb0991SDimitry Andric Fast) 9198bcb0991SDimitry Andric TySize = Size; 9208bcb0991SDimitry Andric else { 9218bcb0991SDimitry Andric Ty = NewTy; 9228bcb0991SDimitry Andric TySize = NewTySize; 9238bcb0991SDimitry Andric } 9248bcb0991SDimitry Andric } 9258bcb0991SDimitry Andric 9268bcb0991SDimitry Andric if (++NumMemOps > Limit) 9278bcb0991SDimitry Andric return false; 9288bcb0991SDimitry Andric 9298bcb0991SDimitry Andric MemOps.push_back(Ty); 9308bcb0991SDimitry Andric Size -= TySize; 9318bcb0991SDimitry Andric } 9328bcb0991SDimitry Andric 9330b57cec5SDimitry Andric return true; 9340b57cec5SDimitry Andric } 9350b57cec5SDimitry Andric 9368bcb0991SDimitry Andric static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { 9378bcb0991SDimitry Andric if (Ty.isVector()) 938*5ffd83dbSDimitry Andric return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), 9398bcb0991SDimitry Andric Ty.getNumElements()); 9408bcb0991SDimitry Andric return IntegerType::get(C, Ty.getSizeInBits()); 9418bcb0991SDimitry Andric } 9428bcb0991SDimitry Andric 9438bcb0991SDimitry Andric // Get a vectorized representation of the memset value operand, GISel edition. 9448bcb0991SDimitry Andric static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { 9458bcb0991SDimitry Andric MachineRegisterInfo &MRI = *MIB.getMRI(); 9468bcb0991SDimitry Andric unsigned NumBits = Ty.getScalarSizeInBits(); 9478bcb0991SDimitry Andric auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); 9488bcb0991SDimitry Andric if (!Ty.isVector() && ValVRegAndVal) { 9498bcb0991SDimitry Andric unsigned KnownVal = ValVRegAndVal->Value; 9508bcb0991SDimitry Andric APInt Scalar = APInt(8, KnownVal); 9518bcb0991SDimitry Andric APInt SplatVal = APInt::getSplat(NumBits, Scalar); 9528bcb0991SDimitry Andric return MIB.buildConstant(Ty, SplatVal).getReg(0); 9538bcb0991SDimitry Andric } 9548bcb0991SDimitry Andric 9558bcb0991SDimitry Andric // Extend the byte value to the larger type, and then multiply by a magic 9568bcb0991SDimitry Andric // value 0x010101... in order to replicate it across every byte. 957*5ffd83dbSDimitry Andric // Unless it's zero, in which case just emit a larger G_CONSTANT 0. 958*5ffd83dbSDimitry Andric if (ValVRegAndVal && ValVRegAndVal->Value == 0) { 959*5ffd83dbSDimitry Andric return MIB.buildConstant(Ty, 0).getReg(0); 960*5ffd83dbSDimitry Andric } 961*5ffd83dbSDimitry Andric 9628bcb0991SDimitry Andric LLT ExtType = Ty.getScalarType(); 9638bcb0991SDimitry Andric auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val); 9648bcb0991SDimitry Andric if (NumBits > 8) { 9658bcb0991SDimitry Andric APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); 9668bcb0991SDimitry Andric auto MagicMI = MIB.buildConstant(ExtType, Magic); 9678bcb0991SDimitry Andric Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0); 9688bcb0991SDimitry Andric } 9698bcb0991SDimitry Andric 970*5ffd83dbSDimitry Andric // For vector types create a G_BUILD_VECTOR. 971*5ffd83dbSDimitry Andric if (Ty.isVector()) 972*5ffd83dbSDimitry Andric Val = MIB.buildSplatVector(Ty, Val).getReg(0); 973*5ffd83dbSDimitry Andric 9748bcb0991SDimitry Andric return Val; 9758bcb0991SDimitry Andric } 9768bcb0991SDimitry Andric 977*5ffd83dbSDimitry Andric bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, 978*5ffd83dbSDimitry Andric Register Val, unsigned KnownLen, 979*5ffd83dbSDimitry Andric Align Alignment, bool IsVolatile) { 9808bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 9818bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 9828bcb0991SDimitry Andric auto &DL = MF.getDataLayout(); 9838bcb0991SDimitry Andric LLVMContext &C = MF.getFunction().getContext(); 9848bcb0991SDimitry Andric 9858bcb0991SDimitry Andric assert(KnownLen != 0 && "Have a zero length memset length!"); 9868bcb0991SDimitry Andric 9878bcb0991SDimitry Andric bool DstAlignCanChange = false; 9888bcb0991SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 9898bcb0991SDimitry Andric bool OptSize = shouldLowerMemFuncForSize(MF); 9908bcb0991SDimitry Andric 9918bcb0991SDimitry Andric MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); 9928bcb0991SDimitry Andric if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) 9938bcb0991SDimitry Andric DstAlignCanChange = true; 9948bcb0991SDimitry Andric 9958bcb0991SDimitry Andric unsigned Limit = TLI.getMaxStoresPerMemset(OptSize); 9968bcb0991SDimitry Andric std::vector<LLT> MemOps; 9978bcb0991SDimitry Andric 9988bcb0991SDimitry Andric const auto &DstMMO = **MI.memoperands_begin(); 9998bcb0991SDimitry Andric MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); 10008bcb0991SDimitry Andric 10018bcb0991SDimitry Andric auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); 10028bcb0991SDimitry Andric bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0; 10038bcb0991SDimitry Andric 1004*5ffd83dbSDimitry Andric if (!findGISelOptimalMemOpLowering(MemOps, Limit, 1005*5ffd83dbSDimitry Andric MemOp::Set(KnownLen, DstAlignCanChange, 1006*5ffd83dbSDimitry Andric Alignment, 1007*5ffd83dbSDimitry Andric /*IsZeroMemset=*/IsZeroVal, 1008*5ffd83dbSDimitry Andric /*IsVolatile=*/IsVolatile), 1009*5ffd83dbSDimitry Andric DstPtrInfo.getAddrSpace(), ~0u, 10108bcb0991SDimitry Andric MF.getFunction().getAttributes(), TLI)) 10118bcb0991SDimitry Andric return false; 10128bcb0991SDimitry Andric 10138bcb0991SDimitry Andric if (DstAlignCanChange) { 10148bcb0991SDimitry Andric // Get an estimate of the type from the LLT. 10158bcb0991SDimitry Andric Type *IRTy = getTypeForLLT(MemOps[0], C); 1016*5ffd83dbSDimitry Andric Align NewAlign = DL.getABITypeAlign(IRTy); 1017*5ffd83dbSDimitry Andric if (NewAlign > Alignment) { 1018*5ffd83dbSDimitry Andric Alignment = NewAlign; 10198bcb0991SDimitry Andric unsigned FI = FIDef->getOperand(1).getIndex(); 10208bcb0991SDimitry Andric // Give the stack frame object a larger alignment if needed. 1021*5ffd83dbSDimitry Andric if (MFI.getObjectAlign(FI) < Alignment) 1022*5ffd83dbSDimitry Andric MFI.setObjectAlignment(FI, Alignment); 10238bcb0991SDimitry Andric } 10248bcb0991SDimitry Andric } 10258bcb0991SDimitry Andric 10268bcb0991SDimitry Andric MachineIRBuilder MIB(MI); 10278bcb0991SDimitry Andric // Find the largest store and generate the bit pattern for it. 10288bcb0991SDimitry Andric LLT LargestTy = MemOps[0]; 10298bcb0991SDimitry Andric for (unsigned i = 1; i < MemOps.size(); i++) 10308bcb0991SDimitry Andric if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits()) 10318bcb0991SDimitry Andric LargestTy = MemOps[i]; 10328bcb0991SDimitry Andric 10338bcb0991SDimitry Andric // The memset stored value is always defined as an s8, so in order to make it 10348bcb0991SDimitry Andric // work with larger store types we need to repeat the bit pattern across the 10358bcb0991SDimitry Andric // wider type. 10368bcb0991SDimitry Andric Register MemSetValue = getMemsetValue(Val, LargestTy, MIB); 10378bcb0991SDimitry Andric 10388bcb0991SDimitry Andric if (!MemSetValue) 10398bcb0991SDimitry Andric return false; 10408bcb0991SDimitry Andric 10418bcb0991SDimitry Andric // Generate the stores. For each store type in the list, we generate the 10428bcb0991SDimitry Andric // matching store of that type to the destination address. 10438bcb0991SDimitry Andric LLT PtrTy = MRI.getType(Dst); 10448bcb0991SDimitry Andric unsigned DstOff = 0; 10458bcb0991SDimitry Andric unsigned Size = KnownLen; 10468bcb0991SDimitry Andric for (unsigned I = 0; I < MemOps.size(); I++) { 10478bcb0991SDimitry Andric LLT Ty = MemOps[I]; 10488bcb0991SDimitry Andric unsigned TySize = Ty.getSizeInBytes(); 10498bcb0991SDimitry Andric if (TySize > Size) { 10508bcb0991SDimitry Andric // Issuing an unaligned load / store pair that overlaps with the previous 10518bcb0991SDimitry Andric // pair. Adjust the offset accordingly. 10528bcb0991SDimitry Andric assert(I == MemOps.size() - 1 && I != 0); 10538bcb0991SDimitry Andric DstOff -= TySize - Size; 10548bcb0991SDimitry Andric } 10558bcb0991SDimitry Andric 10568bcb0991SDimitry Andric // If this store is smaller than the largest store see whether we can get 10578bcb0991SDimitry Andric // the smaller value for free with a truncate. 10588bcb0991SDimitry Andric Register Value = MemSetValue; 10598bcb0991SDimitry Andric if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) { 10608bcb0991SDimitry Andric MVT VT = getMVTForLLT(Ty); 10618bcb0991SDimitry Andric MVT LargestVT = getMVTForLLT(LargestTy); 10628bcb0991SDimitry Andric if (!LargestTy.isVector() && !Ty.isVector() && 10638bcb0991SDimitry Andric TLI.isTruncateFree(LargestVT, VT)) 10648bcb0991SDimitry Andric Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0); 10658bcb0991SDimitry Andric else 10668bcb0991SDimitry Andric Value = getMemsetValue(Val, Ty, MIB); 10678bcb0991SDimitry Andric if (!Value) 10688bcb0991SDimitry Andric return false; 10698bcb0991SDimitry Andric } 10708bcb0991SDimitry Andric 10718bcb0991SDimitry Andric auto *StoreMMO = 10728bcb0991SDimitry Andric MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes()); 10738bcb0991SDimitry Andric 10748bcb0991SDimitry Andric Register Ptr = Dst; 10758bcb0991SDimitry Andric if (DstOff != 0) { 10768bcb0991SDimitry Andric auto Offset = 10778bcb0991SDimitry Andric MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff); 1078480093f4SDimitry Andric Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); 10798bcb0991SDimitry Andric } 10808bcb0991SDimitry Andric 10818bcb0991SDimitry Andric MIB.buildStore(Value, Ptr, *StoreMMO); 10828bcb0991SDimitry Andric DstOff += Ty.getSizeInBytes(); 10838bcb0991SDimitry Andric Size -= TySize; 10848bcb0991SDimitry Andric } 10858bcb0991SDimitry Andric 10868bcb0991SDimitry Andric MI.eraseFromParent(); 10878bcb0991SDimitry Andric return true; 10888bcb0991SDimitry Andric } 10898bcb0991SDimitry Andric 10908bcb0991SDimitry Andric bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, 10918bcb0991SDimitry Andric Register Src, unsigned KnownLen, 1092*5ffd83dbSDimitry Andric Align DstAlign, Align SrcAlign, 10938bcb0991SDimitry Andric bool IsVolatile) { 10948bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 10958bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 10968bcb0991SDimitry Andric auto &DL = MF.getDataLayout(); 10978bcb0991SDimitry Andric LLVMContext &C = MF.getFunction().getContext(); 10988bcb0991SDimitry Andric 10998bcb0991SDimitry Andric assert(KnownLen != 0 && "Have a zero length memcpy length!"); 11008bcb0991SDimitry Andric 11018bcb0991SDimitry Andric bool DstAlignCanChange = false; 11028bcb0991SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 11038bcb0991SDimitry Andric bool OptSize = shouldLowerMemFuncForSize(MF); 1104*5ffd83dbSDimitry Andric Align Alignment = commonAlignment(DstAlign, SrcAlign); 11058bcb0991SDimitry Andric 11068bcb0991SDimitry Andric MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); 11078bcb0991SDimitry Andric if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) 11088bcb0991SDimitry Andric DstAlignCanChange = true; 11098bcb0991SDimitry Andric 11108bcb0991SDimitry Andric // FIXME: infer better src pointer alignment like SelectionDAG does here. 11118bcb0991SDimitry Andric // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining 11128bcb0991SDimitry Andric // if the memcpy is in a tail call position. 11138bcb0991SDimitry Andric 11148bcb0991SDimitry Andric unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize); 11158bcb0991SDimitry Andric std::vector<LLT> MemOps; 11168bcb0991SDimitry Andric 11178bcb0991SDimitry Andric const auto &DstMMO = **MI.memoperands_begin(); 11188bcb0991SDimitry Andric const auto &SrcMMO = **std::next(MI.memoperands_begin()); 11198bcb0991SDimitry Andric MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); 11208bcb0991SDimitry Andric MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); 11218bcb0991SDimitry Andric 11228bcb0991SDimitry Andric if (!findGISelOptimalMemOpLowering( 1123*5ffd83dbSDimitry Andric MemOps, Limit, 1124*5ffd83dbSDimitry Andric MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, 1125*5ffd83dbSDimitry Andric IsVolatile), 1126*5ffd83dbSDimitry Andric DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), 1127*5ffd83dbSDimitry Andric MF.getFunction().getAttributes(), TLI)) 11288bcb0991SDimitry Andric return false; 11298bcb0991SDimitry Andric 11308bcb0991SDimitry Andric if (DstAlignCanChange) { 11318bcb0991SDimitry Andric // Get an estimate of the type from the LLT. 11328bcb0991SDimitry Andric Type *IRTy = getTypeForLLT(MemOps[0], C); 1133*5ffd83dbSDimitry Andric Align NewAlign = DL.getABITypeAlign(IRTy); 11348bcb0991SDimitry Andric 11358bcb0991SDimitry Andric // Don't promote to an alignment that would require dynamic stack 11368bcb0991SDimitry Andric // realignment. 11378bcb0991SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 11388bcb0991SDimitry Andric if (!TRI->needsStackRealignment(MF)) 1139*5ffd83dbSDimitry Andric while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) 1140*5ffd83dbSDimitry Andric NewAlign = NewAlign / 2; 11418bcb0991SDimitry Andric 11428bcb0991SDimitry Andric if (NewAlign > Alignment) { 11438bcb0991SDimitry Andric Alignment = NewAlign; 11448bcb0991SDimitry Andric unsigned FI = FIDef->getOperand(1).getIndex(); 11458bcb0991SDimitry Andric // Give the stack frame object a larger alignment if needed. 1146*5ffd83dbSDimitry Andric if (MFI.getObjectAlign(FI) < Alignment) 11478bcb0991SDimitry Andric MFI.setObjectAlignment(FI, Alignment); 11488bcb0991SDimitry Andric } 11498bcb0991SDimitry Andric } 11508bcb0991SDimitry Andric 11518bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n"); 11528bcb0991SDimitry Andric 11538bcb0991SDimitry Andric MachineIRBuilder MIB(MI); 11548bcb0991SDimitry Andric // Now we need to emit a pair of load and stores for each of the types we've 11558bcb0991SDimitry Andric // collected. I.e. for each type, generate a load from the source pointer of 11568bcb0991SDimitry Andric // that type width, and then generate a corresponding store to the dest buffer 11578bcb0991SDimitry Andric // of that value loaded. This can result in a sequence of loads and stores 11588bcb0991SDimitry Andric // mixed types, depending on what the target specifies as good types to use. 11598bcb0991SDimitry Andric unsigned CurrOffset = 0; 11608bcb0991SDimitry Andric LLT PtrTy = MRI.getType(Src); 11618bcb0991SDimitry Andric unsigned Size = KnownLen; 11628bcb0991SDimitry Andric for (auto CopyTy : MemOps) { 11638bcb0991SDimitry Andric // Issuing an unaligned load / store pair that overlaps with the previous 11648bcb0991SDimitry Andric // pair. Adjust the offset accordingly. 11658bcb0991SDimitry Andric if (CopyTy.getSizeInBytes() > Size) 11668bcb0991SDimitry Andric CurrOffset -= CopyTy.getSizeInBytes() - Size; 11678bcb0991SDimitry Andric 11688bcb0991SDimitry Andric // Construct MMOs for the accesses. 11698bcb0991SDimitry Andric auto *LoadMMO = 11708bcb0991SDimitry Andric MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); 11718bcb0991SDimitry Andric auto *StoreMMO = 11728bcb0991SDimitry Andric MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); 11738bcb0991SDimitry Andric 11748bcb0991SDimitry Andric // Create the load. 11758bcb0991SDimitry Andric Register LoadPtr = Src; 11768bcb0991SDimitry Andric Register Offset; 11778bcb0991SDimitry Andric if (CurrOffset != 0) { 11788bcb0991SDimitry Andric Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) 11798bcb0991SDimitry Andric .getReg(0); 1180480093f4SDimitry Andric LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); 11818bcb0991SDimitry Andric } 11828bcb0991SDimitry Andric auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); 11838bcb0991SDimitry Andric 11848bcb0991SDimitry Andric // Create the store. 11858bcb0991SDimitry Andric Register StorePtr = 1186480093f4SDimitry Andric CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); 11878bcb0991SDimitry Andric MIB.buildStore(LdVal, StorePtr, *StoreMMO); 11888bcb0991SDimitry Andric CurrOffset += CopyTy.getSizeInBytes(); 11898bcb0991SDimitry Andric Size -= CopyTy.getSizeInBytes(); 11908bcb0991SDimitry Andric } 11918bcb0991SDimitry Andric 11928bcb0991SDimitry Andric MI.eraseFromParent(); 11938bcb0991SDimitry Andric return true; 11948bcb0991SDimitry Andric } 11958bcb0991SDimitry Andric 11968bcb0991SDimitry Andric bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, 11978bcb0991SDimitry Andric Register Src, unsigned KnownLen, 1198*5ffd83dbSDimitry Andric Align DstAlign, Align SrcAlign, 11998bcb0991SDimitry Andric bool IsVolatile) { 12008bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 12018bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 12028bcb0991SDimitry Andric auto &DL = MF.getDataLayout(); 12038bcb0991SDimitry Andric LLVMContext &C = MF.getFunction().getContext(); 12048bcb0991SDimitry Andric 12058bcb0991SDimitry Andric assert(KnownLen != 0 && "Have a zero length memmove length!"); 12068bcb0991SDimitry Andric 12078bcb0991SDimitry Andric bool DstAlignCanChange = false; 12088bcb0991SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 12098bcb0991SDimitry Andric bool OptSize = shouldLowerMemFuncForSize(MF); 1210*5ffd83dbSDimitry Andric Align Alignment = commonAlignment(DstAlign, SrcAlign); 12118bcb0991SDimitry Andric 12128bcb0991SDimitry Andric MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); 12138bcb0991SDimitry Andric if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) 12148bcb0991SDimitry Andric DstAlignCanChange = true; 12158bcb0991SDimitry Andric 12168bcb0991SDimitry Andric unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize); 12178bcb0991SDimitry Andric std::vector<LLT> MemOps; 12188bcb0991SDimitry Andric 12198bcb0991SDimitry Andric const auto &DstMMO = **MI.memoperands_begin(); 12208bcb0991SDimitry Andric const auto &SrcMMO = **std::next(MI.memoperands_begin()); 12218bcb0991SDimitry Andric MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); 12228bcb0991SDimitry Andric MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); 12238bcb0991SDimitry Andric 12248bcb0991SDimitry Andric // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due 12258bcb0991SDimitry Andric // to a bug in it's findOptimalMemOpLowering implementation. For now do the 12268bcb0991SDimitry Andric // same thing here. 12278bcb0991SDimitry Andric if (!findGISelOptimalMemOpLowering( 1228*5ffd83dbSDimitry Andric MemOps, Limit, 1229*5ffd83dbSDimitry Andric MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, 1230*5ffd83dbSDimitry Andric /*IsVolatile*/ true), 1231*5ffd83dbSDimitry Andric DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), 1232*5ffd83dbSDimitry Andric MF.getFunction().getAttributes(), TLI)) 12338bcb0991SDimitry Andric return false; 12348bcb0991SDimitry Andric 12358bcb0991SDimitry Andric if (DstAlignCanChange) { 12368bcb0991SDimitry Andric // Get an estimate of the type from the LLT. 12378bcb0991SDimitry Andric Type *IRTy = getTypeForLLT(MemOps[0], C); 1238*5ffd83dbSDimitry Andric Align NewAlign = DL.getABITypeAlign(IRTy); 12398bcb0991SDimitry Andric 12408bcb0991SDimitry Andric // Don't promote to an alignment that would require dynamic stack 12418bcb0991SDimitry Andric // realignment. 12428bcb0991SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 12438bcb0991SDimitry Andric if (!TRI->needsStackRealignment(MF)) 1244*5ffd83dbSDimitry Andric while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) 1245*5ffd83dbSDimitry Andric NewAlign = NewAlign / 2; 12468bcb0991SDimitry Andric 12478bcb0991SDimitry Andric if (NewAlign > Alignment) { 12488bcb0991SDimitry Andric Alignment = NewAlign; 12498bcb0991SDimitry Andric unsigned FI = FIDef->getOperand(1).getIndex(); 12508bcb0991SDimitry Andric // Give the stack frame object a larger alignment if needed. 1251*5ffd83dbSDimitry Andric if (MFI.getObjectAlign(FI) < Alignment) 12528bcb0991SDimitry Andric MFI.setObjectAlignment(FI, Alignment); 12538bcb0991SDimitry Andric } 12548bcb0991SDimitry Andric } 12558bcb0991SDimitry Andric 12568bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n"); 12578bcb0991SDimitry Andric 12588bcb0991SDimitry Andric MachineIRBuilder MIB(MI); 12598bcb0991SDimitry Andric // Memmove requires that we perform the loads first before issuing the stores. 12608bcb0991SDimitry Andric // Apart from that, this loop is pretty much doing the same thing as the 12618bcb0991SDimitry Andric // memcpy codegen function. 12628bcb0991SDimitry Andric unsigned CurrOffset = 0; 12638bcb0991SDimitry Andric LLT PtrTy = MRI.getType(Src); 12648bcb0991SDimitry Andric SmallVector<Register, 16> LoadVals; 12658bcb0991SDimitry Andric for (auto CopyTy : MemOps) { 12668bcb0991SDimitry Andric // Construct MMO for the load. 12678bcb0991SDimitry Andric auto *LoadMMO = 12688bcb0991SDimitry Andric MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); 12698bcb0991SDimitry Andric 12708bcb0991SDimitry Andric // Create the load. 12718bcb0991SDimitry Andric Register LoadPtr = Src; 12728bcb0991SDimitry Andric if (CurrOffset != 0) { 12738bcb0991SDimitry Andric auto Offset = 12748bcb0991SDimitry Andric MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); 1275480093f4SDimitry Andric LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); 12768bcb0991SDimitry Andric } 12778bcb0991SDimitry Andric LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); 12788bcb0991SDimitry Andric CurrOffset += CopyTy.getSizeInBytes(); 12798bcb0991SDimitry Andric } 12808bcb0991SDimitry Andric 12818bcb0991SDimitry Andric CurrOffset = 0; 12828bcb0991SDimitry Andric for (unsigned I = 0; I < MemOps.size(); ++I) { 12838bcb0991SDimitry Andric LLT CopyTy = MemOps[I]; 12848bcb0991SDimitry Andric // Now store the values loaded. 12858bcb0991SDimitry Andric auto *StoreMMO = 12868bcb0991SDimitry Andric MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); 12878bcb0991SDimitry Andric 12888bcb0991SDimitry Andric Register StorePtr = Dst; 12898bcb0991SDimitry Andric if (CurrOffset != 0) { 12908bcb0991SDimitry Andric auto Offset = 12918bcb0991SDimitry Andric MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); 1292480093f4SDimitry Andric StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); 12938bcb0991SDimitry Andric } 12948bcb0991SDimitry Andric MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); 12958bcb0991SDimitry Andric CurrOffset += CopyTy.getSizeInBytes(); 12968bcb0991SDimitry Andric } 12978bcb0991SDimitry Andric MI.eraseFromParent(); 12988bcb0991SDimitry Andric return true; 12998bcb0991SDimitry Andric } 13008bcb0991SDimitry Andric 13018bcb0991SDimitry Andric bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { 13028bcb0991SDimitry Andric // This combine is fairly complex so it's not written with a separate 13038bcb0991SDimitry Andric // matcher function. 13048bcb0991SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); 13058bcb0991SDimitry Andric Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID(); 13068bcb0991SDimitry Andric assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove || 13078bcb0991SDimitry Andric ID == Intrinsic::memset) && 13088bcb0991SDimitry Andric "Expected a memcpy like intrinsic"); 13098bcb0991SDimitry Andric 13108bcb0991SDimitry Andric auto MMOIt = MI.memoperands_begin(); 13118bcb0991SDimitry Andric const MachineMemOperand *MemOp = *MMOIt; 13128bcb0991SDimitry Andric bool IsVolatile = MemOp->isVolatile(); 13138bcb0991SDimitry Andric // Don't try to optimize volatile. 13148bcb0991SDimitry Andric if (IsVolatile) 13158bcb0991SDimitry Andric return false; 13168bcb0991SDimitry Andric 1317*5ffd83dbSDimitry Andric Align DstAlign = MemOp->getBaseAlign(); 1318*5ffd83dbSDimitry Andric Align SrcAlign; 13198bcb0991SDimitry Andric Register Dst = MI.getOperand(1).getReg(); 13208bcb0991SDimitry Andric Register Src = MI.getOperand(2).getReg(); 13218bcb0991SDimitry Andric Register Len = MI.getOperand(3).getReg(); 13228bcb0991SDimitry Andric 13238bcb0991SDimitry Andric if (ID != Intrinsic::memset) { 13248bcb0991SDimitry Andric assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI"); 13258bcb0991SDimitry Andric MemOp = *(++MMOIt); 1326*5ffd83dbSDimitry Andric SrcAlign = MemOp->getBaseAlign(); 13278bcb0991SDimitry Andric } 13288bcb0991SDimitry Andric 13298bcb0991SDimitry Andric // See if this is a constant length copy 13308bcb0991SDimitry Andric auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); 13318bcb0991SDimitry Andric if (!LenVRegAndVal) 13328bcb0991SDimitry Andric return false; // Leave it to the legalizer to lower it to a libcall. 13338bcb0991SDimitry Andric unsigned KnownLen = LenVRegAndVal->Value; 13348bcb0991SDimitry Andric 13358bcb0991SDimitry Andric if (KnownLen == 0) { 13368bcb0991SDimitry Andric MI.eraseFromParent(); 13378bcb0991SDimitry Andric return true; 13388bcb0991SDimitry Andric } 13398bcb0991SDimitry Andric 13408bcb0991SDimitry Andric if (MaxLen && KnownLen > MaxLen) 13418bcb0991SDimitry Andric return false; 13428bcb0991SDimitry Andric 13438bcb0991SDimitry Andric if (ID == Intrinsic::memcpy) 13448bcb0991SDimitry Andric return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); 13458bcb0991SDimitry Andric if (ID == Intrinsic::memmove) 13468bcb0991SDimitry Andric return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); 13478bcb0991SDimitry Andric if (ID == Intrinsic::memset) 13488bcb0991SDimitry Andric return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile); 13498bcb0991SDimitry Andric return false; 13508bcb0991SDimitry Andric } 13518bcb0991SDimitry Andric 1352480093f4SDimitry Andric bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, 1353480093f4SDimitry Andric PtrAddChain &MatchInfo) { 1354480093f4SDimitry Andric // We're trying to match the following pattern: 1355480093f4SDimitry Andric // %t1 = G_PTR_ADD %base, G_CONSTANT imm1 1356480093f4SDimitry Andric // %root = G_PTR_ADD %t1, G_CONSTANT imm2 1357480093f4SDimitry Andric // --> 1358480093f4SDimitry Andric // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2) 1359480093f4SDimitry Andric 1360480093f4SDimitry Andric if (MI.getOpcode() != TargetOpcode::G_PTR_ADD) 1361480093f4SDimitry Andric return false; 1362480093f4SDimitry Andric 1363480093f4SDimitry Andric Register Add2 = MI.getOperand(1).getReg(); 1364480093f4SDimitry Andric Register Imm1 = MI.getOperand(2).getReg(); 1365480093f4SDimitry Andric auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI); 1366480093f4SDimitry Andric if (!MaybeImmVal) 1367480093f4SDimitry Andric return false; 1368480093f4SDimitry Andric 1369480093f4SDimitry Andric MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2); 1370480093f4SDimitry Andric if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD) 1371480093f4SDimitry Andric return false; 1372480093f4SDimitry Andric 1373480093f4SDimitry Andric Register Base = Add2Def->getOperand(1).getReg(); 1374480093f4SDimitry Andric Register Imm2 = Add2Def->getOperand(2).getReg(); 1375480093f4SDimitry Andric auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI); 1376480093f4SDimitry Andric if (!MaybeImm2Val) 1377480093f4SDimitry Andric return false; 1378480093f4SDimitry Andric 1379480093f4SDimitry Andric // Pass the combined immediate to the apply function. 1380480093f4SDimitry Andric MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value; 1381480093f4SDimitry Andric MatchInfo.Base = Base; 1382480093f4SDimitry Andric return true; 1383480093f4SDimitry Andric } 1384480093f4SDimitry Andric 1385480093f4SDimitry Andric bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI, 1386480093f4SDimitry Andric PtrAddChain &MatchInfo) { 1387480093f4SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); 1388480093f4SDimitry Andric MachineIRBuilder MIB(MI); 1389480093f4SDimitry Andric LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg()); 1390480093f4SDimitry Andric auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm); 1391480093f4SDimitry Andric Observer.changingInstr(MI); 1392480093f4SDimitry Andric MI.getOperand(1).setReg(MatchInfo.Base); 1393480093f4SDimitry Andric MI.getOperand(2).setReg(NewOffset.getReg(0)); 1394480093f4SDimitry Andric Observer.changedInstr(MI); 1395480093f4SDimitry Andric return true; 1396480093f4SDimitry Andric } 1397480093f4SDimitry Andric 1398*5ffd83dbSDimitry Andric bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, 1399*5ffd83dbSDimitry Andric unsigned &ShiftVal) { 1400*5ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); 1401*5ffd83dbSDimitry Andric auto MaybeImmVal = 1402*5ffd83dbSDimitry Andric getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); 1403*5ffd83dbSDimitry Andric if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value)) 1404*5ffd83dbSDimitry Andric return false; 1405*5ffd83dbSDimitry Andric ShiftVal = Log2_64(MaybeImmVal->Value); 1406*5ffd83dbSDimitry Andric return true; 1407*5ffd83dbSDimitry Andric } 1408*5ffd83dbSDimitry Andric 1409*5ffd83dbSDimitry Andric bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI, 1410*5ffd83dbSDimitry Andric unsigned &ShiftVal) { 1411*5ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); 1412*5ffd83dbSDimitry Andric MachineIRBuilder MIB(MI); 1413*5ffd83dbSDimitry Andric LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg()); 1414*5ffd83dbSDimitry Andric auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal); 1415*5ffd83dbSDimitry Andric Observer.changingInstr(MI); 1416*5ffd83dbSDimitry Andric MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL)); 1417*5ffd83dbSDimitry Andric MI.getOperand(2).setReg(ShiftCst.getReg(0)); 1418*5ffd83dbSDimitry Andric Observer.changedInstr(MI); 1419*5ffd83dbSDimitry Andric return true; 1420*5ffd83dbSDimitry Andric } 1421*5ffd83dbSDimitry Andric 1422*5ffd83dbSDimitry Andric bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, 1423*5ffd83dbSDimitry Andric unsigned TargetShiftSize, 1424*5ffd83dbSDimitry Andric unsigned &ShiftVal) { 1425*5ffd83dbSDimitry Andric assert((MI.getOpcode() == TargetOpcode::G_SHL || 1426*5ffd83dbSDimitry Andric MI.getOpcode() == TargetOpcode::G_LSHR || 1427*5ffd83dbSDimitry Andric MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift"); 1428*5ffd83dbSDimitry Andric 1429*5ffd83dbSDimitry Andric LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 1430*5ffd83dbSDimitry Andric if (Ty.isVector()) // TODO: 1431*5ffd83dbSDimitry Andric return false; 1432*5ffd83dbSDimitry Andric 1433*5ffd83dbSDimitry Andric // Don't narrow further than the requested size. 1434*5ffd83dbSDimitry Andric unsigned Size = Ty.getSizeInBits(); 1435*5ffd83dbSDimitry Andric if (Size <= TargetShiftSize) 1436*5ffd83dbSDimitry Andric return false; 1437*5ffd83dbSDimitry Andric 1438*5ffd83dbSDimitry Andric auto MaybeImmVal = 1439*5ffd83dbSDimitry Andric getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); 1440*5ffd83dbSDimitry Andric if (!MaybeImmVal) 1441*5ffd83dbSDimitry Andric return false; 1442*5ffd83dbSDimitry Andric 1443*5ffd83dbSDimitry Andric ShiftVal = MaybeImmVal->Value; 1444*5ffd83dbSDimitry Andric return ShiftVal >= Size / 2 && ShiftVal < Size; 1445*5ffd83dbSDimitry Andric } 1446*5ffd83dbSDimitry Andric 1447*5ffd83dbSDimitry Andric bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI, 1448*5ffd83dbSDimitry Andric const unsigned &ShiftVal) { 1449*5ffd83dbSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1450*5ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1451*5ffd83dbSDimitry Andric LLT Ty = MRI.getType(SrcReg); 1452*5ffd83dbSDimitry Andric unsigned Size = Ty.getSizeInBits(); 1453*5ffd83dbSDimitry Andric unsigned HalfSize = Size / 2; 1454*5ffd83dbSDimitry Andric assert(ShiftVal >= HalfSize); 1455*5ffd83dbSDimitry Andric 1456*5ffd83dbSDimitry Andric LLT HalfTy = LLT::scalar(HalfSize); 1457*5ffd83dbSDimitry Andric 1458*5ffd83dbSDimitry Andric Builder.setInstr(MI); 1459*5ffd83dbSDimitry Andric auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg); 1460*5ffd83dbSDimitry Andric unsigned NarrowShiftAmt = ShiftVal - HalfSize; 1461*5ffd83dbSDimitry Andric 1462*5ffd83dbSDimitry Andric if (MI.getOpcode() == TargetOpcode::G_LSHR) { 1463*5ffd83dbSDimitry Andric Register Narrowed = Unmerge.getReg(1); 1464*5ffd83dbSDimitry Andric 1465*5ffd83dbSDimitry Andric // dst = G_LSHR s64:x, C for C >= 32 1466*5ffd83dbSDimitry Andric // => 1467*5ffd83dbSDimitry Andric // lo, hi = G_UNMERGE_VALUES x 1468*5ffd83dbSDimitry Andric // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0 1469*5ffd83dbSDimitry Andric 1470*5ffd83dbSDimitry Andric if (NarrowShiftAmt != 0) { 1471*5ffd83dbSDimitry Andric Narrowed = Builder.buildLShr(HalfTy, Narrowed, 1472*5ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0); 1473*5ffd83dbSDimitry Andric } 1474*5ffd83dbSDimitry Andric 1475*5ffd83dbSDimitry Andric auto Zero = Builder.buildConstant(HalfTy, 0); 1476*5ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Narrowed, Zero }); 1477*5ffd83dbSDimitry Andric } else if (MI.getOpcode() == TargetOpcode::G_SHL) { 1478*5ffd83dbSDimitry Andric Register Narrowed = Unmerge.getReg(0); 1479*5ffd83dbSDimitry Andric // dst = G_SHL s64:x, C for C >= 32 1480*5ffd83dbSDimitry Andric // => 1481*5ffd83dbSDimitry Andric // lo, hi = G_UNMERGE_VALUES x 1482*5ffd83dbSDimitry Andric // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32) 1483*5ffd83dbSDimitry Andric if (NarrowShiftAmt != 0) { 1484*5ffd83dbSDimitry Andric Narrowed = Builder.buildShl(HalfTy, Narrowed, 1485*5ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0); 1486*5ffd83dbSDimitry Andric } 1487*5ffd83dbSDimitry Andric 1488*5ffd83dbSDimitry Andric auto Zero = Builder.buildConstant(HalfTy, 0); 1489*5ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Zero, Narrowed }); 1490*5ffd83dbSDimitry Andric } else { 1491*5ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ASHR); 1492*5ffd83dbSDimitry Andric auto Hi = Builder.buildAShr( 1493*5ffd83dbSDimitry Andric HalfTy, Unmerge.getReg(1), 1494*5ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, HalfSize - 1)); 1495*5ffd83dbSDimitry Andric 1496*5ffd83dbSDimitry Andric if (ShiftVal == HalfSize) { 1497*5ffd83dbSDimitry Andric // (G_ASHR i64:x, 32) -> 1498*5ffd83dbSDimitry Andric // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31) 1499*5ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi }); 1500*5ffd83dbSDimitry Andric } else if (ShiftVal == Size - 1) { 1501*5ffd83dbSDimitry Andric // Don't need a second shift. 1502*5ffd83dbSDimitry Andric // (G_ASHR i64:x, 63) -> 1503*5ffd83dbSDimitry Andric // %narrowed = (G_ASHR hi_32(x), 31) 1504*5ffd83dbSDimitry Andric // G_MERGE_VALUES %narrowed, %narrowed 1505*5ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Hi, Hi }); 1506*5ffd83dbSDimitry Andric } else { 1507*5ffd83dbSDimitry Andric auto Lo = Builder.buildAShr( 1508*5ffd83dbSDimitry Andric HalfTy, Unmerge.getReg(1), 1509*5ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, ShiftVal - HalfSize)); 1510*5ffd83dbSDimitry Andric 1511*5ffd83dbSDimitry Andric // (G_ASHR i64:x, C) ->, for C >= 32 1512*5ffd83dbSDimitry Andric // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31) 1513*5ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Lo, Hi }); 1514*5ffd83dbSDimitry Andric } 1515*5ffd83dbSDimitry Andric } 1516*5ffd83dbSDimitry Andric 1517*5ffd83dbSDimitry Andric MI.eraseFromParent(); 1518*5ffd83dbSDimitry Andric return true; 1519*5ffd83dbSDimitry Andric } 1520*5ffd83dbSDimitry Andric 1521*5ffd83dbSDimitry Andric bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI, 1522*5ffd83dbSDimitry Andric unsigned TargetShiftAmount) { 1523*5ffd83dbSDimitry Andric unsigned ShiftAmt; 1524*5ffd83dbSDimitry Andric if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) { 1525*5ffd83dbSDimitry Andric applyCombineShiftToUnmerge(MI, ShiftAmt); 1526*5ffd83dbSDimitry Andric return true; 1527*5ffd83dbSDimitry Andric } 1528*5ffd83dbSDimitry Andric 1529*5ffd83dbSDimitry Andric return false; 1530*5ffd83dbSDimitry Andric } 1531*5ffd83dbSDimitry Andric 1532*5ffd83dbSDimitry Andric bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { 1533*5ffd83dbSDimitry Andric return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) { 1534*5ffd83dbSDimitry Andric return MO.isReg() && 1535*5ffd83dbSDimitry Andric getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); 1536*5ffd83dbSDimitry Andric }); 1537*5ffd83dbSDimitry Andric } 1538*5ffd83dbSDimitry Andric 1539*5ffd83dbSDimitry Andric bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) { 1540*5ffd83dbSDimitry Andric return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) { 1541*5ffd83dbSDimitry Andric return !MO.isReg() || 1542*5ffd83dbSDimitry Andric getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); 1543*5ffd83dbSDimitry Andric }); 1544*5ffd83dbSDimitry Andric } 1545*5ffd83dbSDimitry Andric 1546*5ffd83dbSDimitry Andric bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) { 1547*5ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 1548*5ffd83dbSDimitry Andric ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 1549*5ffd83dbSDimitry Andric return all_of(Mask, [](int Elt) { return Elt < 0; }); 1550*5ffd83dbSDimitry Andric } 1551*5ffd83dbSDimitry Andric 1552*5ffd83dbSDimitry Andric bool CombinerHelper::matchUndefStore(MachineInstr &MI) { 1553*5ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_STORE); 1554*5ffd83dbSDimitry Andric return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(), 1555*5ffd83dbSDimitry Andric MRI); 1556*5ffd83dbSDimitry Andric } 1557*5ffd83dbSDimitry Andric 1558*5ffd83dbSDimitry Andric bool CombinerHelper::eraseInst(MachineInstr &MI) { 1559*5ffd83dbSDimitry Andric MI.eraseFromParent(); 1560*5ffd83dbSDimitry Andric return true; 1561*5ffd83dbSDimitry Andric } 1562*5ffd83dbSDimitry Andric 1563*5ffd83dbSDimitry Andric bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, 1564*5ffd83dbSDimitry Andric const MachineOperand &MOP2) { 1565*5ffd83dbSDimitry Andric if (!MOP1.isReg() || !MOP2.isReg()) 1566*5ffd83dbSDimitry Andric return false; 1567*5ffd83dbSDimitry Andric MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI); 1568*5ffd83dbSDimitry Andric if (!I1) 1569*5ffd83dbSDimitry Andric return false; 1570*5ffd83dbSDimitry Andric MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI); 1571*5ffd83dbSDimitry Andric if (!I2) 1572*5ffd83dbSDimitry Andric return false; 1573*5ffd83dbSDimitry Andric 1574*5ffd83dbSDimitry Andric // Handle a case like this: 1575*5ffd83dbSDimitry Andric // 1576*5ffd83dbSDimitry Andric // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>) 1577*5ffd83dbSDimitry Andric // 1578*5ffd83dbSDimitry Andric // Even though %0 and %1 are produced by the same instruction they are not 1579*5ffd83dbSDimitry Andric // the same values. 1580*5ffd83dbSDimitry Andric if (I1 == I2) 1581*5ffd83dbSDimitry Andric return MOP1.getReg() == MOP2.getReg(); 1582*5ffd83dbSDimitry Andric 1583*5ffd83dbSDimitry Andric // If we have an instruction which loads or stores, we can't guarantee that 1584*5ffd83dbSDimitry Andric // it is identical. 1585*5ffd83dbSDimitry Andric // 1586*5ffd83dbSDimitry Andric // For example, we may have 1587*5ffd83dbSDimitry Andric // 1588*5ffd83dbSDimitry Andric // %x1 = G_LOAD %addr (load N from @somewhere) 1589*5ffd83dbSDimitry Andric // ... 1590*5ffd83dbSDimitry Andric // call @foo 1591*5ffd83dbSDimitry Andric // ... 1592*5ffd83dbSDimitry Andric // %x2 = G_LOAD %addr (load N from @somewhere) 1593*5ffd83dbSDimitry Andric // ... 1594*5ffd83dbSDimitry Andric // %or = G_OR %x1, %x2 1595*5ffd83dbSDimitry Andric // 1596*5ffd83dbSDimitry Andric // It's possible that @foo will modify whatever lives at the address we're 1597*5ffd83dbSDimitry Andric // loading from. To be safe, let's just assume that all loads and stores 1598*5ffd83dbSDimitry Andric // are different (unless we have something which is guaranteed to not 1599*5ffd83dbSDimitry Andric // change.) 1600*5ffd83dbSDimitry Andric if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr)) 1601*5ffd83dbSDimitry Andric return false; 1602*5ffd83dbSDimitry Andric 1603*5ffd83dbSDimitry Andric // Check for physical registers on the instructions first to avoid cases 1604*5ffd83dbSDimitry Andric // like this: 1605*5ffd83dbSDimitry Andric // 1606*5ffd83dbSDimitry Andric // %a = COPY $physreg 1607*5ffd83dbSDimitry Andric // ... 1608*5ffd83dbSDimitry Andric // SOMETHING implicit-def $physreg 1609*5ffd83dbSDimitry Andric // ... 1610*5ffd83dbSDimitry Andric // %b = COPY $physreg 1611*5ffd83dbSDimitry Andric // 1612*5ffd83dbSDimitry Andric // These copies are not equivalent. 1613*5ffd83dbSDimitry Andric if (any_of(I1->uses(), [](const MachineOperand &MO) { 1614*5ffd83dbSDimitry Andric return MO.isReg() && MO.getReg().isPhysical(); 1615*5ffd83dbSDimitry Andric })) { 1616*5ffd83dbSDimitry Andric // Check if we have a case like this: 1617*5ffd83dbSDimitry Andric // 1618*5ffd83dbSDimitry Andric // %a = COPY $physreg 1619*5ffd83dbSDimitry Andric // %b = COPY %a 1620*5ffd83dbSDimitry Andric // 1621*5ffd83dbSDimitry Andric // In this case, I1 and I2 will both be equal to %a = COPY $physreg. 1622*5ffd83dbSDimitry Andric // From that, we know that they must have the same value, since they must 1623*5ffd83dbSDimitry Andric // have come from the same COPY. 1624*5ffd83dbSDimitry Andric return I1->isIdenticalTo(*I2); 1625*5ffd83dbSDimitry Andric } 1626*5ffd83dbSDimitry Andric 1627*5ffd83dbSDimitry Andric // We don't have any physical registers, so we don't necessarily need the 1628*5ffd83dbSDimitry Andric // same vreg defs. 1629*5ffd83dbSDimitry Andric // 1630*5ffd83dbSDimitry Andric // On the off-chance that there's some target instruction feeding into the 1631*5ffd83dbSDimitry Andric // instruction, let's use produceSameValue instead of isIdenticalTo. 1632*5ffd83dbSDimitry Andric return Builder.getTII().produceSameValue(*I1, *I2, &MRI); 1633*5ffd83dbSDimitry Andric } 1634*5ffd83dbSDimitry Andric 1635*5ffd83dbSDimitry Andric bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) { 1636*5ffd83dbSDimitry Andric if (!MOP.isReg()) 1637*5ffd83dbSDimitry Andric return false; 1638*5ffd83dbSDimitry Andric // MIPatternMatch doesn't let us look through G_ZEXT etc. 1639*5ffd83dbSDimitry Andric auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI); 1640*5ffd83dbSDimitry Andric return ValAndVReg && ValAndVReg->Value == C; 1641*5ffd83dbSDimitry Andric } 1642*5ffd83dbSDimitry Andric 1643*5ffd83dbSDimitry Andric bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, 1644*5ffd83dbSDimitry Andric unsigned OpIdx) { 1645*5ffd83dbSDimitry Andric assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); 1646*5ffd83dbSDimitry Andric Register OldReg = MI.getOperand(0).getReg(); 1647*5ffd83dbSDimitry Andric Register Replacement = MI.getOperand(OpIdx).getReg(); 1648*5ffd83dbSDimitry Andric assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); 1649*5ffd83dbSDimitry Andric MI.eraseFromParent(); 1650*5ffd83dbSDimitry Andric replaceRegWith(MRI, OldReg, Replacement); 1651*5ffd83dbSDimitry Andric return true; 1652*5ffd83dbSDimitry Andric } 1653*5ffd83dbSDimitry Andric 1654*5ffd83dbSDimitry Andric bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { 1655*5ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SELECT); 1656*5ffd83dbSDimitry Andric // Match (cond ? x : x) 1657*5ffd83dbSDimitry Andric return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) && 1658*5ffd83dbSDimitry Andric canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(), 1659*5ffd83dbSDimitry Andric MRI); 1660*5ffd83dbSDimitry Andric } 1661*5ffd83dbSDimitry Andric 1662*5ffd83dbSDimitry Andric bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) { 1663*5ffd83dbSDimitry Andric return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) && 1664*5ffd83dbSDimitry Andric canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 1665*5ffd83dbSDimitry Andric MRI); 1666*5ffd83dbSDimitry Andric } 1667*5ffd83dbSDimitry Andric 1668*5ffd83dbSDimitry Andric bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) { 1669*5ffd83dbSDimitry Andric return matchConstantOp(MI.getOperand(OpIdx), 0) && 1670*5ffd83dbSDimitry Andric canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(), 1671*5ffd83dbSDimitry Andric MRI); 1672*5ffd83dbSDimitry Andric } 1673*5ffd83dbSDimitry Andric 1674*5ffd83dbSDimitry Andric bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { 1675*5ffd83dbSDimitry Andric assert(MI.getNumDefs() == 1 && "Expected only one def?"); 1676*5ffd83dbSDimitry Andric Builder.setInstr(MI); 1677*5ffd83dbSDimitry Andric Builder.buildFConstant(MI.getOperand(0), C); 1678*5ffd83dbSDimitry Andric MI.eraseFromParent(); 1679*5ffd83dbSDimitry Andric return true; 1680*5ffd83dbSDimitry Andric } 1681*5ffd83dbSDimitry Andric 1682*5ffd83dbSDimitry Andric bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) { 1683*5ffd83dbSDimitry Andric assert(MI.getNumDefs() == 1 && "Expected only one def?"); 1684*5ffd83dbSDimitry Andric Builder.setInstr(MI); 1685*5ffd83dbSDimitry Andric Builder.buildConstant(MI.getOperand(0), C); 1686*5ffd83dbSDimitry Andric MI.eraseFromParent(); 1687*5ffd83dbSDimitry Andric return true; 1688*5ffd83dbSDimitry Andric } 1689*5ffd83dbSDimitry Andric 1690*5ffd83dbSDimitry Andric bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) { 1691*5ffd83dbSDimitry Andric assert(MI.getNumDefs() == 1 && "Expected only one def?"); 1692*5ffd83dbSDimitry Andric Builder.setInstr(MI); 1693*5ffd83dbSDimitry Andric Builder.buildUndef(MI.getOperand(0)); 1694*5ffd83dbSDimitry Andric MI.eraseFromParent(); 1695*5ffd83dbSDimitry Andric return true; 1696*5ffd83dbSDimitry Andric } 1697*5ffd83dbSDimitry Andric 1698*5ffd83dbSDimitry Andric bool CombinerHelper::matchSimplifyAddToSub( 1699*5ffd83dbSDimitry Andric MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { 1700*5ffd83dbSDimitry Andric Register LHS = MI.getOperand(1).getReg(); 1701*5ffd83dbSDimitry Andric Register RHS = MI.getOperand(2).getReg(); 1702*5ffd83dbSDimitry Andric Register &NewLHS = std::get<0>(MatchInfo); 1703*5ffd83dbSDimitry Andric Register &NewRHS = std::get<1>(MatchInfo); 1704*5ffd83dbSDimitry Andric 1705*5ffd83dbSDimitry Andric // Helper lambda to check for opportunities for 1706*5ffd83dbSDimitry Andric // ((0-A) + B) -> B - A 1707*5ffd83dbSDimitry Andric // (A + (0-B)) -> A - B 1708*5ffd83dbSDimitry Andric auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) { 1709*5ffd83dbSDimitry Andric int64_t Cst; 1710*5ffd83dbSDimitry Andric if (!mi_match(MaybeSub, MRI, m_GSub(m_ICst(Cst), m_Reg(NewRHS))) || 1711*5ffd83dbSDimitry Andric Cst != 0) 1712*5ffd83dbSDimitry Andric return false; 1713*5ffd83dbSDimitry Andric NewLHS = MaybeNewLHS; 1714*5ffd83dbSDimitry Andric return true; 1715*5ffd83dbSDimitry Andric }; 1716*5ffd83dbSDimitry Andric 1717*5ffd83dbSDimitry Andric return CheckFold(LHS, RHS) || CheckFold(RHS, LHS); 1718*5ffd83dbSDimitry Andric } 1719*5ffd83dbSDimitry Andric 1720*5ffd83dbSDimitry Andric bool CombinerHelper::applySimplifyAddToSub( 1721*5ffd83dbSDimitry Andric MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { 1722*5ffd83dbSDimitry Andric Builder.setInstr(MI); 1723*5ffd83dbSDimitry Andric Register SubLHS, SubRHS; 1724*5ffd83dbSDimitry Andric std::tie(SubLHS, SubRHS) = MatchInfo; 1725*5ffd83dbSDimitry Andric Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS); 1726*5ffd83dbSDimitry Andric MI.eraseFromParent(); 1727*5ffd83dbSDimitry Andric return true; 1728*5ffd83dbSDimitry Andric } 1729*5ffd83dbSDimitry Andric 17300b57cec5SDimitry Andric bool CombinerHelper::tryCombine(MachineInstr &MI) { 17310b57cec5SDimitry Andric if (tryCombineCopy(MI)) 17320b57cec5SDimitry Andric return true; 17338bcb0991SDimitry Andric if (tryCombineExtendingLoads(MI)) 17348bcb0991SDimitry Andric return true; 17358bcb0991SDimitry Andric if (tryCombineIndexedLoadStore(MI)) 17368bcb0991SDimitry Andric return true; 17378bcb0991SDimitry Andric return false; 17380b57cec5SDimitry Andric } 1739