10b57cec5SDimitry Andric //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 90b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h" 100b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 118bcb0991SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 125ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 135ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 140b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 150b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/Utils.h" 168bcb0991SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 178bcb0991SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 180b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 19*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h" 200b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 210b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 228bcb0991SDimitry Andric #include "llvm/CodeGen/TargetLowering.h" 235ffd83dbSDimitry Andric #include "llvm/Support/MathExtras.h" 248bcb0991SDimitry Andric #include "llvm/Target/TargetMachine.h" 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric #define DEBUG_TYPE "gi-combiner" 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric using namespace llvm; 295ffd83dbSDimitry Andric using namespace MIPatternMatch; 300b57cec5SDimitry Andric 318bcb0991SDimitry Andric // Option to allow testing of the combiner while no targets know about indexed 328bcb0991SDimitry Andric // addressing. 338bcb0991SDimitry Andric static cl::opt<bool> 348bcb0991SDimitry Andric ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), 358bcb0991SDimitry Andric cl::desc("Force all indexed operations to be " 368bcb0991SDimitry Andric "legal for the GlobalISel combiner")); 378bcb0991SDimitry Andric 380b57cec5SDimitry Andric CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, 398bcb0991SDimitry Andric MachineIRBuilder &B, GISelKnownBits *KB, 405ffd83dbSDimitry Andric MachineDominatorTree *MDT, 415ffd83dbSDimitry Andric const LegalizerInfo *LI) 428bcb0991SDimitry Andric : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), 435ffd83dbSDimitry Andric KB(KB), MDT(MDT), LI(LI) { 448bcb0991SDimitry Andric (void)this->KB; 458bcb0991SDimitry Andric } 460b57cec5SDimitry Andric 47*e8d8bef9SDimitry Andric const TargetLowering &CombinerHelper::getTargetLowering() const { 48*e8d8bef9SDimitry Andric return *Builder.getMF().getSubtarget().getTargetLowering(); 49*e8d8bef9SDimitry Andric } 50*e8d8bef9SDimitry Andric 51*e8d8bef9SDimitry Andric /// \returns The little endian in-memory byte position of byte \p I in a 52*e8d8bef9SDimitry Andric /// \p ByteWidth bytes wide type. 53*e8d8bef9SDimitry Andric /// 54*e8d8bef9SDimitry Andric /// E.g. Given a 4-byte type x, x[0] -> byte 0 55*e8d8bef9SDimitry Andric static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) { 56*e8d8bef9SDimitry Andric assert(I < ByteWidth && "I must be in [0, ByteWidth)"); 57*e8d8bef9SDimitry Andric return I; 58*e8d8bef9SDimitry Andric } 59*e8d8bef9SDimitry Andric 60*e8d8bef9SDimitry Andric /// \returns The big endian in-memory byte position of byte \p I in a 61*e8d8bef9SDimitry Andric /// \p ByteWidth bytes wide type. 62*e8d8bef9SDimitry Andric /// 63*e8d8bef9SDimitry Andric /// E.g. Given a 4-byte type x, x[0] -> byte 3 64*e8d8bef9SDimitry Andric static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) { 65*e8d8bef9SDimitry Andric assert(I < ByteWidth && "I must be in [0, ByteWidth)"); 66*e8d8bef9SDimitry Andric return ByteWidth - I - 1; 67*e8d8bef9SDimitry Andric } 68*e8d8bef9SDimitry Andric 69*e8d8bef9SDimitry Andric /// Given a map from byte offsets in memory to indices in a load/store, 70*e8d8bef9SDimitry Andric /// determine if that map corresponds to a little or big endian byte pattern. 71*e8d8bef9SDimitry Andric /// 72*e8d8bef9SDimitry Andric /// \param MemOffset2Idx maps memory offsets to address offsets. 73*e8d8bef9SDimitry Andric /// \param LowestIdx is the lowest index in \p MemOffset2Idx. 74*e8d8bef9SDimitry Andric /// 75*e8d8bef9SDimitry Andric /// \returns true if the map corresponds to a big endian byte pattern, false 76*e8d8bef9SDimitry Andric /// if it corresponds to a little endian byte pattern, and None otherwise. 77*e8d8bef9SDimitry Andric /// 78*e8d8bef9SDimitry Andric /// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns 79*e8d8bef9SDimitry Andric /// are as follows: 80*e8d8bef9SDimitry Andric /// 81*e8d8bef9SDimitry Andric /// AddrOffset Little endian Big endian 82*e8d8bef9SDimitry Andric /// 0 0 3 83*e8d8bef9SDimitry Andric /// 1 1 2 84*e8d8bef9SDimitry Andric /// 2 2 1 85*e8d8bef9SDimitry Andric /// 3 3 0 86*e8d8bef9SDimitry Andric static Optional<bool> 87*e8d8bef9SDimitry Andric isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, 88*e8d8bef9SDimitry Andric int64_t LowestIdx) { 89*e8d8bef9SDimitry Andric // Need at least two byte positions to decide on endianness. 90*e8d8bef9SDimitry Andric unsigned Width = MemOffset2Idx.size(); 91*e8d8bef9SDimitry Andric if (Width < 2) 92*e8d8bef9SDimitry Andric return None; 93*e8d8bef9SDimitry Andric bool BigEndian = true, LittleEndian = true; 94*e8d8bef9SDimitry Andric for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) { 95*e8d8bef9SDimitry Andric auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset); 96*e8d8bef9SDimitry Andric if (MemOffsetAndIdx == MemOffset2Idx.end()) 97*e8d8bef9SDimitry Andric return None; 98*e8d8bef9SDimitry Andric const int64_t Idx = MemOffsetAndIdx->second - LowestIdx; 99*e8d8bef9SDimitry Andric assert(Idx >= 0 && "Expected non-negative byte offset?"); 100*e8d8bef9SDimitry Andric LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset); 101*e8d8bef9SDimitry Andric BigEndian &= Idx == bigEndianByteAt(Width, MemOffset); 102*e8d8bef9SDimitry Andric if (!BigEndian && !LittleEndian) 103*e8d8bef9SDimitry Andric return None; 104*e8d8bef9SDimitry Andric } 105*e8d8bef9SDimitry Andric 106*e8d8bef9SDimitry Andric assert((BigEndian != LittleEndian) && 107*e8d8bef9SDimitry Andric "Pattern cannot be both big and little endian!"); 108*e8d8bef9SDimitry Andric return BigEndian; 109*e8d8bef9SDimitry Andric } 110*e8d8bef9SDimitry Andric 111*e8d8bef9SDimitry Andric bool CombinerHelper::isLegalOrBeforeLegalizer( 112*e8d8bef9SDimitry Andric const LegalityQuery &Query) const { 113*e8d8bef9SDimitry Andric return !LI || LI->getAction(Query).Action == LegalizeActions::Legal; 114*e8d8bef9SDimitry Andric } 115*e8d8bef9SDimitry Andric 1160b57cec5SDimitry Andric void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, 1170b57cec5SDimitry Andric Register ToReg) const { 1180b57cec5SDimitry Andric Observer.changingAllUsesOfReg(MRI, FromReg); 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric if (MRI.constrainRegAttrs(ToReg, FromReg)) 1210b57cec5SDimitry Andric MRI.replaceRegWith(FromReg, ToReg); 1220b57cec5SDimitry Andric else 1230b57cec5SDimitry Andric Builder.buildCopy(ToReg, FromReg); 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric Observer.finishedChangingAllUsesOfReg(); 1260b57cec5SDimitry Andric } 1270b57cec5SDimitry Andric 1280b57cec5SDimitry Andric void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI, 1290b57cec5SDimitry Andric MachineOperand &FromRegOp, 1300b57cec5SDimitry Andric Register ToReg) const { 1310b57cec5SDimitry Andric assert(FromRegOp.getParent() && "Expected an operand in an MI"); 1320b57cec5SDimitry Andric Observer.changingInstr(*FromRegOp.getParent()); 1330b57cec5SDimitry Andric 1340b57cec5SDimitry Andric FromRegOp.setReg(ToReg); 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric Observer.changedInstr(*FromRegOp.getParent()); 1370b57cec5SDimitry Andric } 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { 1400b57cec5SDimitry Andric if (matchCombineCopy(MI)) { 1410b57cec5SDimitry Andric applyCombineCopy(MI); 1420b57cec5SDimitry Andric return true; 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric return false; 1450b57cec5SDimitry Andric } 1460b57cec5SDimitry Andric bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { 1470b57cec5SDimitry Andric if (MI.getOpcode() != TargetOpcode::COPY) 1480b57cec5SDimitry Andric return false; 1498bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1508bcb0991SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1515ffd83dbSDimitry Andric return canReplaceReg(DstReg, SrcReg, MRI); 1520b57cec5SDimitry Andric } 1530b57cec5SDimitry Andric void CombinerHelper::applyCombineCopy(MachineInstr &MI) { 1548bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1558bcb0991SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1560b57cec5SDimitry Andric MI.eraseFromParent(); 1570b57cec5SDimitry Andric replaceRegWith(MRI, DstReg, SrcReg); 1580b57cec5SDimitry Andric } 1590b57cec5SDimitry Andric 1608bcb0991SDimitry Andric bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) { 1618bcb0991SDimitry Andric bool IsUndef = false; 1628bcb0991SDimitry Andric SmallVector<Register, 4> Ops; 1638bcb0991SDimitry Andric if (matchCombineConcatVectors(MI, IsUndef, Ops)) { 1648bcb0991SDimitry Andric applyCombineConcatVectors(MI, IsUndef, Ops); 1658bcb0991SDimitry Andric return true; 1668bcb0991SDimitry Andric } 1678bcb0991SDimitry Andric return false; 1688bcb0991SDimitry Andric } 1698bcb0991SDimitry Andric 1708bcb0991SDimitry Andric bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, 1718bcb0991SDimitry Andric SmallVectorImpl<Register> &Ops) { 1728bcb0991SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && 1738bcb0991SDimitry Andric "Invalid instruction"); 1748bcb0991SDimitry Andric IsUndef = true; 1758bcb0991SDimitry Andric MachineInstr *Undef = nullptr; 1768bcb0991SDimitry Andric 1778bcb0991SDimitry Andric // Walk over all the operands of concat vectors and check if they are 1788bcb0991SDimitry Andric // build_vector themselves or undef. 1798bcb0991SDimitry Andric // Then collect their operands in Ops. 180480093f4SDimitry Andric for (const MachineOperand &MO : MI.uses()) { 1818bcb0991SDimitry Andric Register Reg = MO.getReg(); 1828bcb0991SDimitry Andric MachineInstr *Def = MRI.getVRegDef(Reg); 1838bcb0991SDimitry Andric assert(Def && "Operand not defined"); 1848bcb0991SDimitry Andric switch (Def->getOpcode()) { 1858bcb0991SDimitry Andric case TargetOpcode::G_BUILD_VECTOR: 1868bcb0991SDimitry Andric IsUndef = false; 1878bcb0991SDimitry Andric // Remember the operands of the build_vector to fold 1888bcb0991SDimitry Andric // them into the yet-to-build flattened concat vectors. 189480093f4SDimitry Andric for (const MachineOperand &BuildVecMO : Def->uses()) 1908bcb0991SDimitry Andric Ops.push_back(BuildVecMO.getReg()); 1918bcb0991SDimitry Andric break; 1928bcb0991SDimitry Andric case TargetOpcode::G_IMPLICIT_DEF: { 1938bcb0991SDimitry Andric LLT OpType = MRI.getType(Reg); 1948bcb0991SDimitry Andric // Keep one undef value for all the undef operands. 1958bcb0991SDimitry Andric if (!Undef) { 1968bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 1978bcb0991SDimitry Andric Undef = Builder.buildUndef(OpType.getScalarType()); 1988bcb0991SDimitry Andric } 1998bcb0991SDimitry Andric assert(MRI.getType(Undef->getOperand(0).getReg()) == 2008bcb0991SDimitry Andric OpType.getScalarType() && 2018bcb0991SDimitry Andric "All undefs should have the same type"); 2028bcb0991SDimitry Andric // Break the undef vector in as many scalar elements as needed 2038bcb0991SDimitry Andric // for the flattening. 2048bcb0991SDimitry Andric for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements(); 2058bcb0991SDimitry Andric EltIdx != EltEnd; ++EltIdx) 2068bcb0991SDimitry Andric Ops.push_back(Undef->getOperand(0).getReg()); 2078bcb0991SDimitry Andric break; 2088bcb0991SDimitry Andric } 2098bcb0991SDimitry Andric default: 2108bcb0991SDimitry Andric return false; 2118bcb0991SDimitry Andric } 2128bcb0991SDimitry Andric } 2138bcb0991SDimitry Andric return true; 2148bcb0991SDimitry Andric } 2158bcb0991SDimitry Andric void CombinerHelper::applyCombineConcatVectors( 2168bcb0991SDimitry Andric MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) { 2178bcb0991SDimitry Andric // We determined that the concat_vectors can be flatten. 2188bcb0991SDimitry Andric // Generate the flattened build_vector. 2198bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2208bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 2218bcb0991SDimitry Andric Register NewDstReg = MRI.cloneVirtualRegister(DstReg); 2228bcb0991SDimitry Andric 2238bcb0991SDimitry Andric // Note: IsUndef is sort of redundant. We could have determine it by 2248bcb0991SDimitry Andric // checking that at all Ops are undef. Alternatively, we could have 2258bcb0991SDimitry Andric // generate a build_vector of undefs and rely on another combine to 2268bcb0991SDimitry Andric // clean that up. For now, given we already gather this information 2278bcb0991SDimitry Andric // in tryCombineConcatVectors, just save compile time and issue the 2288bcb0991SDimitry Andric // right thing. 2298bcb0991SDimitry Andric if (IsUndef) 2308bcb0991SDimitry Andric Builder.buildUndef(NewDstReg); 2318bcb0991SDimitry Andric else 2328bcb0991SDimitry Andric Builder.buildBuildVector(NewDstReg, Ops); 2338bcb0991SDimitry Andric MI.eraseFromParent(); 2348bcb0991SDimitry Andric replaceRegWith(MRI, DstReg, NewDstReg); 2358bcb0991SDimitry Andric } 2368bcb0991SDimitry Andric 2378bcb0991SDimitry Andric bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { 2388bcb0991SDimitry Andric SmallVector<Register, 4> Ops; 2398bcb0991SDimitry Andric if (matchCombineShuffleVector(MI, Ops)) { 2408bcb0991SDimitry Andric applyCombineShuffleVector(MI, Ops); 2418bcb0991SDimitry Andric return true; 2428bcb0991SDimitry Andric } 2438bcb0991SDimitry Andric return false; 2448bcb0991SDimitry Andric } 2458bcb0991SDimitry Andric 2468bcb0991SDimitry Andric bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, 2478bcb0991SDimitry Andric SmallVectorImpl<Register> &Ops) { 2488bcb0991SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && 2498bcb0991SDimitry Andric "Invalid instruction kind"); 2508bcb0991SDimitry Andric LLT DstType = MRI.getType(MI.getOperand(0).getReg()); 2518bcb0991SDimitry Andric Register Src1 = MI.getOperand(1).getReg(); 2528bcb0991SDimitry Andric LLT SrcType = MRI.getType(Src1); 253480093f4SDimitry Andric // As bizarre as it may look, shuffle vector can actually produce 254480093f4SDimitry Andric // scalar! This is because at the IR level a <1 x ty> shuffle 255480093f4SDimitry Andric // vector is perfectly valid. 256480093f4SDimitry Andric unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1; 257480093f4SDimitry Andric unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1; 2588bcb0991SDimitry Andric 2598bcb0991SDimitry Andric // If the resulting vector is smaller than the size of the source 2608bcb0991SDimitry Andric // vectors being concatenated, we won't be able to replace the 2618bcb0991SDimitry Andric // shuffle vector into a concat_vectors. 2628bcb0991SDimitry Andric // 2638bcb0991SDimitry Andric // Note: We may still be able to produce a concat_vectors fed by 2648bcb0991SDimitry Andric // extract_vector_elt and so on. It is less clear that would 2658bcb0991SDimitry Andric // be better though, so don't bother for now. 266480093f4SDimitry Andric // 267480093f4SDimitry Andric // If the destination is a scalar, the size of the sources doesn't 268480093f4SDimitry Andric // matter. we will lower the shuffle to a plain copy. This will 269480093f4SDimitry Andric // work only if the source and destination have the same size. But 270480093f4SDimitry Andric // that's covered by the next condition. 271480093f4SDimitry Andric // 272480093f4SDimitry Andric // TODO: If the size between the source and destination don't match 273480093f4SDimitry Andric // we could still emit an extract vector element in that case. 274480093f4SDimitry Andric if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1) 2758bcb0991SDimitry Andric return false; 2768bcb0991SDimitry Andric 2778bcb0991SDimitry Andric // Check that the shuffle mask can be broken evenly between the 2788bcb0991SDimitry Andric // different sources. 2798bcb0991SDimitry Andric if (DstNumElts % SrcNumElts != 0) 2808bcb0991SDimitry Andric return false; 2818bcb0991SDimitry Andric 2828bcb0991SDimitry Andric // Mask length is a multiple of the source vector length. 2838bcb0991SDimitry Andric // Check if the shuffle is some kind of concatenation of the input 2848bcb0991SDimitry Andric // vectors. 2858bcb0991SDimitry Andric unsigned NumConcat = DstNumElts / SrcNumElts; 2868bcb0991SDimitry Andric SmallVector<int, 8> ConcatSrcs(NumConcat, -1); 287480093f4SDimitry Andric ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 2888bcb0991SDimitry Andric for (unsigned i = 0; i != DstNumElts; ++i) { 2898bcb0991SDimitry Andric int Idx = Mask[i]; 2908bcb0991SDimitry Andric // Undef value. 2918bcb0991SDimitry Andric if (Idx < 0) 2928bcb0991SDimitry Andric continue; 2938bcb0991SDimitry Andric // Ensure the indices in each SrcType sized piece are sequential and that 2948bcb0991SDimitry Andric // the same source is used for the whole piece. 2958bcb0991SDimitry Andric if ((Idx % SrcNumElts != (i % SrcNumElts)) || 2968bcb0991SDimitry Andric (ConcatSrcs[i / SrcNumElts] >= 0 && 2978bcb0991SDimitry Andric ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) 2988bcb0991SDimitry Andric return false; 2998bcb0991SDimitry Andric // Remember which source this index came from. 3008bcb0991SDimitry Andric ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; 3018bcb0991SDimitry Andric } 3028bcb0991SDimitry Andric 3038bcb0991SDimitry Andric // The shuffle is concatenating multiple vectors together. 3048bcb0991SDimitry Andric // Collect the different operands for that. 3058bcb0991SDimitry Andric Register UndefReg; 3068bcb0991SDimitry Andric Register Src2 = MI.getOperand(2).getReg(); 3078bcb0991SDimitry Andric for (auto Src : ConcatSrcs) { 3088bcb0991SDimitry Andric if (Src < 0) { 3098bcb0991SDimitry Andric if (!UndefReg) { 3108bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 3118bcb0991SDimitry Andric UndefReg = Builder.buildUndef(SrcType).getReg(0); 3128bcb0991SDimitry Andric } 3138bcb0991SDimitry Andric Ops.push_back(UndefReg); 3148bcb0991SDimitry Andric } else if (Src == 0) 3158bcb0991SDimitry Andric Ops.push_back(Src1); 3168bcb0991SDimitry Andric else 3178bcb0991SDimitry Andric Ops.push_back(Src2); 3188bcb0991SDimitry Andric } 3198bcb0991SDimitry Andric return true; 3208bcb0991SDimitry Andric } 3218bcb0991SDimitry Andric 3228bcb0991SDimitry Andric void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI, 3238bcb0991SDimitry Andric const ArrayRef<Register> Ops) { 3248bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3258bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 3268bcb0991SDimitry Andric Register NewDstReg = MRI.cloneVirtualRegister(DstReg); 3278bcb0991SDimitry Andric 328480093f4SDimitry Andric if (Ops.size() == 1) 329480093f4SDimitry Andric Builder.buildCopy(NewDstReg, Ops[0]); 330480093f4SDimitry Andric else 331480093f4SDimitry Andric Builder.buildMerge(NewDstReg, Ops); 3328bcb0991SDimitry Andric 3338bcb0991SDimitry Andric MI.eraseFromParent(); 3348bcb0991SDimitry Andric replaceRegWith(MRI, DstReg, NewDstReg); 3358bcb0991SDimitry Andric } 3368bcb0991SDimitry Andric 3370b57cec5SDimitry Andric namespace { 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric /// Select a preference between two uses. CurrentUse is the current preference 3400b57cec5SDimitry Andric /// while *ForCandidate is attributes of the candidate under consideration. 3410b57cec5SDimitry Andric PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse, 3425ffd83dbSDimitry Andric const LLT TyForCandidate, 3430b57cec5SDimitry Andric unsigned OpcodeForCandidate, 3440b57cec5SDimitry Andric MachineInstr *MIForCandidate) { 3450b57cec5SDimitry Andric if (!CurrentUse.Ty.isValid()) { 3460b57cec5SDimitry Andric if (CurrentUse.ExtendOpcode == OpcodeForCandidate || 3470b57cec5SDimitry Andric CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT) 3480b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 3490b57cec5SDimitry Andric return CurrentUse; 3500b57cec5SDimitry Andric } 3510b57cec5SDimitry Andric 3520b57cec5SDimitry Andric // We permit the extend to hoist through basic blocks but this is only 3530b57cec5SDimitry Andric // sensible if the target has extending loads. If you end up lowering back 3540b57cec5SDimitry Andric // into a load and extend during the legalizer then the end result is 3550b57cec5SDimitry Andric // hoisting the extend up to the load. 3560b57cec5SDimitry Andric 3570b57cec5SDimitry Andric // Prefer defined extensions to undefined extensions as these are more 3580b57cec5SDimitry Andric // likely to reduce the number of instructions. 3590b57cec5SDimitry Andric if (OpcodeForCandidate == TargetOpcode::G_ANYEXT && 3600b57cec5SDimitry Andric CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT) 3610b57cec5SDimitry Andric return CurrentUse; 3620b57cec5SDimitry Andric else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT && 3630b57cec5SDimitry Andric OpcodeForCandidate != TargetOpcode::G_ANYEXT) 3640b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 3650b57cec5SDimitry Andric 3660b57cec5SDimitry Andric // Prefer sign extensions to zero extensions as sign-extensions tend to be 3670b57cec5SDimitry Andric // more expensive. 3680b57cec5SDimitry Andric if (CurrentUse.Ty == TyForCandidate) { 3690b57cec5SDimitry Andric if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT && 3700b57cec5SDimitry Andric OpcodeForCandidate == TargetOpcode::G_ZEXT) 3710b57cec5SDimitry Andric return CurrentUse; 3720b57cec5SDimitry Andric else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT && 3730b57cec5SDimitry Andric OpcodeForCandidate == TargetOpcode::G_SEXT) 3740b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 3750b57cec5SDimitry Andric } 3760b57cec5SDimitry Andric 3770b57cec5SDimitry Andric // This is potentially target specific. We've chosen the largest type 3780b57cec5SDimitry Andric // because G_TRUNC is usually free. One potential catch with this is that 3790b57cec5SDimitry Andric // some targets have a reduced number of larger registers than smaller 3800b57cec5SDimitry Andric // registers and this choice potentially increases the live-range for the 3810b57cec5SDimitry Andric // larger value. 3820b57cec5SDimitry Andric if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) { 3830b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 3840b57cec5SDimitry Andric } 3850b57cec5SDimitry Andric return CurrentUse; 3860b57cec5SDimitry Andric } 3870b57cec5SDimitry Andric 3880b57cec5SDimitry Andric /// Find a suitable place to insert some instructions and insert them. This 3890b57cec5SDimitry Andric /// function accounts for special cases like inserting before a PHI node. 3900b57cec5SDimitry Andric /// The current strategy for inserting before PHI's is to duplicate the 3910b57cec5SDimitry Andric /// instructions for each predecessor. However, while that's ok for G_TRUNC 3920b57cec5SDimitry Andric /// on most targets since it generally requires no code, other targets/cases may 3930b57cec5SDimitry Andric /// want to try harder to find a dominating block. 3940b57cec5SDimitry Andric static void InsertInsnsWithoutSideEffectsBeforeUse( 3950b57cec5SDimitry Andric MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO, 3960b57cec5SDimitry Andric std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator, 3970b57cec5SDimitry Andric MachineOperand &UseMO)> 3980b57cec5SDimitry Andric Inserter) { 3990b57cec5SDimitry Andric MachineInstr &UseMI = *UseMO.getParent(); 4000b57cec5SDimitry Andric 4010b57cec5SDimitry Andric MachineBasicBlock *InsertBB = UseMI.getParent(); 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric // If the use is a PHI then we want the predecessor block instead. 4040b57cec5SDimitry Andric if (UseMI.isPHI()) { 4050b57cec5SDimitry Andric MachineOperand *PredBB = std::next(&UseMO); 4060b57cec5SDimitry Andric InsertBB = PredBB->getMBB(); 4070b57cec5SDimitry Andric } 4080b57cec5SDimitry Andric 4090b57cec5SDimitry Andric // If the block is the same block as the def then we want to insert just after 4100b57cec5SDimitry Andric // the def instead of at the start of the block. 4110b57cec5SDimitry Andric if (InsertBB == DefMI.getParent()) { 4120b57cec5SDimitry Andric MachineBasicBlock::iterator InsertPt = &DefMI; 4130b57cec5SDimitry Andric Inserter(InsertBB, std::next(InsertPt), UseMO); 4140b57cec5SDimitry Andric return; 4150b57cec5SDimitry Andric } 4160b57cec5SDimitry Andric 4170b57cec5SDimitry Andric // Otherwise we want the start of the BB 4180b57cec5SDimitry Andric Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO); 4190b57cec5SDimitry Andric } 4200b57cec5SDimitry Andric } // end anonymous namespace 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { 4230b57cec5SDimitry Andric PreferredTuple Preferred; 4240b57cec5SDimitry Andric if (matchCombineExtendingLoads(MI, Preferred)) { 4250b57cec5SDimitry Andric applyCombineExtendingLoads(MI, Preferred); 4260b57cec5SDimitry Andric return true; 4270b57cec5SDimitry Andric } 4280b57cec5SDimitry Andric return false; 4290b57cec5SDimitry Andric } 4300b57cec5SDimitry Andric 4310b57cec5SDimitry Andric bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, 4320b57cec5SDimitry Andric PreferredTuple &Preferred) { 4330b57cec5SDimitry Andric // We match the loads and follow the uses to the extend instead of matching 4340b57cec5SDimitry Andric // the extends and following the def to the load. This is because the load 4350b57cec5SDimitry Andric // must remain in the same position for correctness (unless we also add code 4360b57cec5SDimitry Andric // to find a safe place to sink it) whereas the extend is freely movable. 4370b57cec5SDimitry Andric // It also prevents us from duplicating the load for the volatile case or just 4380b57cec5SDimitry Andric // for performance. 4390b57cec5SDimitry Andric 4400b57cec5SDimitry Andric if (MI.getOpcode() != TargetOpcode::G_LOAD && 4410b57cec5SDimitry Andric MI.getOpcode() != TargetOpcode::G_SEXTLOAD && 4420b57cec5SDimitry Andric MI.getOpcode() != TargetOpcode::G_ZEXTLOAD) 4430b57cec5SDimitry Andric return false; 4440b57cec5SDimitry Andric 4450b57cec5SDimitry Andric auto &LoadValue = MI.getOperand(0); 4460b57cec5SDimitry Andric assert(LoadValue.isReg() && "Result wasn't a register?"); 4470b57cec5SDimitry Andric 4480b57cec5SDimitry Andric LLT LoadValueTy = MRI.getType(LoadValue.getReg()); 4490b57cec5SDimitry Andric if (!LoadValueTy.isScalar()) 4500b57cec5SDimitry Andric return false; 4510b57cec5SDimitry Andric 4520b57cec5SDimitry Andric // Most architectures are going to legalize <s8 loads into at least a 1 byte 4530b57cec5SDimitry Andric // load, and the MMOs can only describe memory accesses in multiples of bytes. 4540b57cec5SDimitry Andric // If we try to perform extload combining on those, we can end up with 4550b57cec5SDimitry Andric // %a(s8) = extload %ptr (load 1 byte from %ptr) 4560b57cec5SDimitry Andric // ... which is an illegal extload instruction. 4570b57cec5SDimitry Andric if (LoadValueTy.getSizeInBits() < 8) 4580b57cec5SDimitry Andric return false; 4590b57cec5SDimitry Andric 4600b57cec5SDimitry Andric // For non power-of-2 types, they will very likely be legalized into multiple 4610b57cec5SDimitry Andric // loads. Don't bother trying to match them into extending loads. 4620b57cec5SDimitry Andric if (!isPowerOf2_32(LoadValueTy.getSizeInBits())) 4630b57cec5SDimitry Andric return false; 4640b57cec5SDimitry Andric 4650b57cec5SDimitry Andric // Find the preferred type aside from the any-extends (unless it's the only 4660b57cec5SDimitry Andric // one) and non-extending ops. We'll emit an extending load to that type and 4670b57cec5SDimitry Andric // and emit a variant of (extend (trunc X)) for the others according to the 4680b57cec5SDimitry Andric // relative type sizes. At the same time, pick an extend to use based on the 4690b57cec5SDimitry Andric // extend involved in the chosen type. 4700b57cec5SDimitry Andric unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD 4710b57cec5SDimitry Andric ? TargetOpcode::G_ANYEXT 4720b57cec5SDimitry Andric : MI.getOpcode() == TargetOpcode::G_SEXTLOAD 4730b57cec5SDimitry Andric ? TargetOpcode::G_SEXT 4740b57cec5SDimitry Andric : TargetOpcode::G_ZEXT; 4750b57cec5SDimitry Andric Preferred = {LLT(), PreferredOpcode, nullptr}; 4765ffd83dbSDimitry Andric for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) { 4770b57cec5SDimitry Andric if (UseMI.getOpcode() == TargetOpcode::G_SEXT || 4780b57cec5SDimitry Andric UseMI.getOpcode() == TargetOpcode::G_ZEXT || 4795ffd83dbSDimitry Andric (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) { 4805ffd83dbSDimitry Andric // Check for legality. 4815ffd83dbSDimitry Andric if (LI) { 4825ffd83dbSDimitry Andric LegalityQuery::MemDesc MMDesc; 4835ffd83dbSDimitry Andric const auto &MMO = **MI.memoperands_begin(); 4845ffd83dbSDimitry Andric MMDesc.SizeInBits = MMO.getSizeInBits(); 4855ffd83dbSDimitry Andric MMDesc.AlignInBits = MMO.getAlign().value() * 8; 4865ffd83dbSDimitry Andric MMDesc.Ordering = MMO.getOrdering(); 4875ffd83dbSDimitry Andric LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg()); 4885ffd83dbSDimitry Andric LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 4895ffd83dbSDimitry Andric if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action != 4905ffd83dbSDimitry Andric LegalizeActions::Legal) 4915ffd83dbSDimitry Andric continue; 4925ffd83dbSDimitry Andric } 4930b57cec5SDimitry Andric Preferred = ChoosePreferredUse(Preferred, 4940b57cec5SDimitry Andric MRI.getType(UseMI.getOperand(0).getReg()), 4950b57cec5SDimitry Andric UseMI.getOpcode(), &UseMI); 4960b57cec5SDimitry Andric } 4970b57cec5SDimitry Andric } 4980b57cec5SDimitry Andric 4990b57cec5SDimitry Andric // There were no extends 5000b57cec5SDimitry Andric if (!Preferred.MI) 5010b57cec5SDimitry Andric return false; 5020b57cec5SDimitry Andric // It should be impossible to chose an extend without selecting a different 5030b57cec5SDimitry Andric // type since by definition the result of an extend is larger. 5040b57cec5SDimitry Andric assert(Preferred.Ty != LoadValueTy && "Extending to same type?"); 5050b57cec5SDimitry Andric 5060b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI); 5070b57cec5SDimitry Andric return true; 5080b57cec5SDimitry Andric } 5090b57cec5SDimitry Andric 5100b57cec5SDimitry Andric void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, 5110b57cec5SDimitry Andric PreferredTuple &Preferred) { 5120b57cec5SDimitry Andric // Rewrite the load to the chosen extending load. 5130b57cec5SDimitry Andric Register ChosenDstReg = Preferred.MI->getOperand(0).getReg(); 5140b57cec5SDimitry Andric 5150b57cec5SDimitry Andric // Inserter to insert a truncate back to the original type at a given point 5160b57cec5SDimitry Andric // with some basic CSE to limit truncate duplication to one per BB. 5170b57cec5SDimitry Andric DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns; 5180b57cec5SDimitry Andric auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB, 5190b57cec5SDimitry Andric MachineBasicBlock::iterator InsertBefore, 5200b57cec5SDimitry Andric MachineOperand &UseMO) { 5210b57cec5SDimitry Andric MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB); 5220b57cec5SDimitry Andric if (PreviouslyEmitted) { 5230b57cec5SDimitry Andric Observer.changingInstr(*UseMO.getParent()); 5240b57cec5SDimitry Andric UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg()); 5250b57cec5SDimitry Andric Observer.changedInstr(*UseMO.getParent()); 5260b57cec5SDimitry Andric return; 5270b57cec5SDimitry Andric } 5280b57cec5SDimitry Andric 5290b57cec5SDimitry Andric Builder.setInsertPt(*InsertIntoBB, InsertBefore); 5300b57cec5SDimitry Andric Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); 5310b57cec5SDimitry Andric MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg); 5320b57cec5SDimitry Andric EmittedInsns[InsertIntoBB] = NewMI; 5330b57cec5SDimitry Andric replaceRegOpWith(MRI, UseMO, NewDstReg); 5340b57cec5SDimitry Andric }; 5350b57cec5SDimitry Andric 5360b57cec5SDimitry Andric Observer.changingInstr(MI); 5370b57cec5SDimitry Andric MI.setDesc( 5380b57cec5SDimitry Andric Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT 5390b57cec5SDimitry Andric ? TargetOpcode::G_SEXTLOAD 5400b57cec5SDimitry Andric : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT 5410b57cec5SDimitry Andric ? TargetOpcode::G_ZEXTLOAD 5420b57cec5SDimitry Andric : TargetOpcode::G_LOAD)); 5430b57cec5SDimitry Andric 5440b57cec5SDimitry Andric // Rewrite all the uses to fix up the types. 5450b57cec5SDimitry Andric auto &LoadValue = MI.getOperand(0); 5460b57cec5SDimitry Andric SmallVector<MachineOperand *, 4> Uses; 5470b57cec5SDimitry Andric for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) 5480b57cec5SDimitry Andric Uses.push_back(&UseMO); 5490b57cec5SDimitry Andric 5500b57cec5SDimitry Andric for (auto *UseMO : Uses) { 5510b57cec5SDimitry Andric MachineInstr *UseMI = UseMO->getParent(); 5520b57cec5SDimitry Andric 5530b57cec5SDimitry Andric // If the extend is compatible with the preferred extend then we should fix 5540b57cec5SDimitry Andric // up the type and extend so that it uses the preferred use. 5550b57cec5SDimitry Andric if (UseMI->getOpcode() == Preferred.ExtendOpcode || 5560b57cec5SDimitry Andric UseMI->getOpcode() == TargetOpcode::G_ANYEXT) { 5578bcb0991SDimitry Andric Register UseDstReg = UseMI->getOperand(0).getReg(); 5580b57cec5SDimitry Andric MachineOperand &UseSrcMO = UseMI->getOperand(1); 5595ffd83dbSDimitry Andric const LLT UseDstTy = MRI.getType(UseDstReg); 5600b57cec5SDimitry Andric if (UseDstReg != ChosenDstReg) { 5610b57cec5SDimitry Andric if (Preferred.Ty == UseDstTy) { 5620b57cec5SDimitry Andric // If the use has the same type as the preferred use, then merge 5630b57cec5SDimitry Andric // the vregs and erase the extend. For example: 5640b57cec5SDimitry Andric // %1:_(s8) = G_LOAD ... 5650b57cec5SDimitry Andric // %2:_(s32) = G_SEXT %1(s8) 5660b57cec5SDimitry Andric // %3:_(s32) = G_ANYEXT %1(s8) 5670b57cec5SDimitry Andric // ... = ... %3(s32) 5680b57cec5SDimitry Andric // rewrites to: 5690b57cec5SDimitry Andric // %2:_(s32) = G_SEXTLOAD ... 5700b57cec5SDimitry Andric // ... = ... %2(s32) 5710b57cec5SDimitry Andric replaceRegWith(MRI, UseDstReg, ChosenDstReg); 5720b57cec5SDimitry Andric Observer.erasingInstr(*UseMO->getParent()); 5730b57cec5SDimitry Andric UseMO->getParent()->eraseFromParent(); 5740b57cec5SDimitry Andric } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) { 5750b57cec5SDimitry Andric // If the preferred size is smaller, then keep the extend but extend 5760b57cec5SDimitry Andric // from the result of the extending load. For example: 5770b57cec5SDimitry Andric // %1:_(s8) = G_LOAD ... 5780b57cec5SDimitry Andric // %2:_(s32) = G_SEXT %1(s8) 5790b57cec5SDimitry Andric // %3:_(s64) = G_ANYEXT %1(s8) 5800b57cec5SDimitry Andric // ... = ... %3(s64) 5810b57cec5SDimitry Andric /// rewrites to: 5820b57cec5SDimitry Andric // %2:_(s32) = G_SEXTLOAD ... 5830b57cec5SDimitry Andric // %3:_(s64) = G_ANYEXT %2:_(s32) 5840b57cec5SDimitry Andric // ... = ... %3(s64) 5850b57cec5SDimitry Andric replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg); 5860b57cec5SDimitry Andric } else { 5870b57cec5SDimitry Andric // If the preferred size is large, then insert a truncate. For 5880b57cec5SDimitry Andric // example: 5890b57cec5SDimitry Andric // %1:_(s8) = G_LOAD ... 5900b57cec5SDimitry Andric // %2:_(s64) = G_SEXT %1(s8) 5910b57cec5SDimitry Andric // %3:_(s32) = G_ZEXT %1(s8) 5920b57cec5SDimitry Andric // ... = ... %3(s32) 5930b57cec5SDimitry Andric /// rewrites to: 5940b57cec5SDimitry Andric // %2:_(s64) = G_SEXTLOAD ... 5950b57cec5SDimitry Andric // %4:_(s8) = G_TRUNC %2:_(s32) 5960b57cec5SDimitry Andric // %3:_(s64) = G_ZEXT %2:_(s8) 5970b57cec5SDimitry Andric // ... = ... %3(s64) 5980b57cec5SDimitry Andric InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, 5990b57cec5SDimitry Andric InsertTruncAt); 6000b57cec5SDimitry Andric } 6010b57cec5SDimitry Andric continue; 6020b57cec5SDimitry Andric } 6030b57cec5SDimitry Andric // The use is (one of) the uses of the preferred use we chose earlier. 6040b57cec5SDimitry Andric // We're going to update the load to def this value later so just erase 6050b57cec5SDimitry Andric // the old extend. 6060b57cec5SDimitry Andric Observer.erasingInstr(*UseMO->getParent()); 6070b57cec5SDimitry Andric UseMO->getParent()->eraseFromParent(); 6080b57cec5SDimitry Andric continue; 6090b57cec5SDimitry Andric } 6100b57cec5SDimitry Andric 6110b57cec5SDimitry Andric // The use isn't an extend. Truncate back to the type we originally loaded. 6120b57cec5SDimitry Andric // This is free on many targets. 6130b57cec5SDimitry Andric InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt); 6140b57cec5SDimitry Andric } 6150b57cec5SDimitry Andric 6160b57cec5SDimitry Andric MI.getOperand(0).setReg(ChosenDstReg); 6170b57cec5SDimitry Andric Observer.changedInstr(MI); 6180b57cec5SDimitry Andric } 6190b57cec5SDimitry Andric 6205ffd83dbSDimitry Andric bool CombinerHelper::isPredecessor(const MachineInstr &DefMI, 6215ffd83dbSDimitry Andric const MachineInstr &UseMI) { 6225ffd83dbSDimitry Andric assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() && 6235ffd83dbSDimitry Andric "shouldn't consider debug uses"); 6248bcb0991SDimitry Andric assert(DefMI.getParent() == UseMI.getParent()); 6258bcb0991SDimitry Andric if (&DefMI == &UseMI) 6268bcb0991SDimitry Andric return false; 627*e8d8bef9SDimitry Andric const MachineBasicBlock &MBB = *DefMI.getParent(); 628*e8d8bef9SDimitry Andric auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) { 629*e8d8bef9SDimitry Andric return &MI == &DefMI || &MI == &UseMI; 630*e8d8bef9SDimitry Andric }); 631*e8d8bef9SDimitry Andric if (DefOrUse == MBB.end()) 632*e8d8bef9SDimitry Andric llvm_unreachable("Block must contain both DefMI and UseMI!"); 633*e8d8bef9SDimitry Andric return &*DefOrUse == &DefMI; 6348bcb0991SDimitry Andric } 6358bcb0991SDimitry Andric 6365ffd83dbSDimitry Andric bool CombinerHelper::dominates(const MachineInstr &DefMI, 6375ffd83dbSDimitry Andric const MachineInstr &UseMI) { 6385ffd83dbSDimitry Andric assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() && 6395ffd83dbSDimitry Andric "shouldn't consider debug uses"); 6408bcb0991SDimitry Andric if (MDT) 6418bcb0991SDimitry Andric return MDT->dominates(&DefMI, &UseMI); 6428bcb0991SDimitry Andric else if (DefMI.getParent() != UseMI.getParent()) 6438bcb0991SDimitry Andric return false; 6448bcb0991SDimitry Andric 6458bcb0991SDimitry Andric return isPredecessor(DefMI, UseMI); 6468bcb0991SDimitry Andric } 6478bcb0991SDimitry Andric 648*e8d8bef9SDimitry Andric bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) { 6495ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 6505ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 651*e8d8bef9SDimitry Andric Register LoadUser = SrcReg; 652*e8d8bef9SDimitry Andric 653*e8d8bef9SDimitry Andric if (MRI.getType(SrcReg).isVector()) 654*e8d8bef9SDimitry Andric return false; 655*e8d8bef9SDimitry Andric 656*e8d8bef9SDimitry Andric Register TruncSrc; 657*e8d8bef9SDimitry Andric if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) 658*e8d8bef9SDimitry Andric LoadUser = TruncSrc; 659*e8d8bef9SDimitry Andric 660*e8d8bef9SDimitry Andric uint64_t SizeInBits = MI.getOperand(2).getImm(); 661*e8d8bef9SDimitry Andric // If the source is a G_SEXTLOAD from the same bit width, then we don't 662*e8d8bef9SDimitry Andric // need any extend at all, just a truncate. 663*e8d8bef9SDimitry Andric if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) { 664*e8d8bef9SDimitry Andric const auto &MMO = **LoadMI->memoperands_begin(); 665*e8d8bef9SDimitry Andric // If truncating more than the original extended value, abort. 666*e8d8bef9SDimitry Andric if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits()) 667*e8d8bef9SDimitry Andric return false; 668*e8d8bef9SDimitry Andric if (MMO.getSizeInBits() == SizeInBits) 669*e8d8bef9SDimitry Andric return true; 670*e8d8bef9SDimitry Andric } 671*e8d8bef9SDimitry Andric return false; 6725ffd83dbSDimitry Andric } 6735ffd83dbSDimitry Andric 674*e8d8bef9SDimitry Andric bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) { 6755ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 676*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 677*e8d8bef9SDimitry Andric Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); 678*e8d8bef9SDimitry Andric MI.eraseFromParent(); 679*e8d8bef9SDimitry Andric return true; 680*e8d8bef9SDimitry Andric } 681*e8d8bef9SDimitry Andric 682*e8d8bef9SDimitry Andric bool CombinerHelper::matchSextInRegOfLoad( 683*e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 684*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 685*e8d8bef9SDimitry Andric 686*e8d8bef9SDimitry Andric // Only supports scalars for now. 687*e8d8bef9SDimitry Andric if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 688*e8d8bef9SDimitry Andric return false; 689*e8d8bef9SDimitry Andric 690*e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 691*e8d8bef9SDimitry Andric MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); 692*e8d8bef9SDimitry Andric if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg())) 693*e8d8bef9SDimitry Andric return false; 694*e8d8bef9SDimitry Andric 695*e8d8bef9SDimitry Andric // If the sign extend extends from a narrower width than the load's width, 696*e8d8bef9SDimitry Andric // then we can narrow the load width when we combine to a G_SEXTLOAD. 697*e8d8bef9SDimitry Andric auto &MMO = **LoadDef->memoperands_begin(); 698*e8d8bef9SDimitry Andric // Don't do this for non-simple loads. 699*e8d8bef9SDimitry Andric if (MMO.isAtomic() || MMO.isVolatile()) 700*e8d8bef9SDimitry Andric return false; 701*e8d8bef9SDimitry Andric 702*e8d8bef9SDimitry Andric // Avoid widening the load at all. 703*e8d8bef9SDimitry Andric unsigned NewSizeBits = 704*e8d8bef9SDimitry Andric std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits()); 705*e8d8bef9SDimitry Andric 706*e8d8bef9SDimitry Andric // Don't generate G_SEXTLOADs with a < 1 byte width. 707*e8d8bef9SDimitry Andric if (NewSizeBits < 8) 708*e8d8bef9SDimitry Andric return false; 709*e8d8bef9SDimitry Andric // Don't bother creating a non-power-2 sextload, it will likely be broken up 710*e8d8bef9SDimitry Andric // anyway for most targets. 711*e8d8bef9SDimitry Andric if (!isPowerOf2_32(NewSizeBits)) 712*e8d8bef9SDimitry Andric return false; 713*e8d8bef9SDimitry Andric MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits); 714*e8d8bef9SDimitry Andric return true; 715*e8d8bef9SDimitry Andric } 716*e8d8bef9SDimitry Andric 717*e8d8bef9SDimitry Andric bool CombinerHelper::applySextInRegOfLoad( 718*e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 719*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 720*e8d8bef9SDimitry Andric Register LoadReg; 721*e8d8bef9SDimitry Andric unsigned ScalarSizeBits; 722*e8d8bef9SDimitry Andric std::tie(LoadReg, ScalarSizeBits) = MatchInfo; 723*e8d8bef9SDimitry Andric auto *LoadDef = MRI.getVRegDef(LoadReg); 724*e8d8bef9SDimitry Andric assert(LoadDef && "Expected a load reg"); 725*e8d8bef9SDimitry Andric 726*e8d8bef9SDimitry Andric // If we have the following: 727*e8d8bef9SDimitry Andric // %ld = G_LOAD %ptr, (load 2) 728*e8d8bef9SDimitry Andric // %ext = G_SEXT_INREG %ld, 8 729*e8d8bef9SDimitry Andric // ==> 730*e8d8bef9SDimitry Andric // %ld = G_SEXTLOAD %ptr (load 1) 731*e8d8bef9SDimitry Andric 732*e8d8bef9SDimitry Andric auto &MMO = **LoadDef->memoperands_begin(); 733*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 734*e8d8bef9SDimitry Andric auto &MF = Builder.getMF(); 735*e8d8bef9SDimitry Andric auto PtrInfo = MMO.getPointerInfo(); 736*e8d8bef9SDimitry Andric auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8); 737*e8d8bef9SDimitry Andric Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(), 738*e8d8bef9SDimitry Andric LoadDef->getOperand(1).getReg(), *NewMMO); 7395ffd83dbSDimitry Andric MI.eraseFromParent(); 7405ffd83dbSDimitry Andric return true; 7415ffd83dbSDimitry Andric } 7425ffd83dbSDimitry Andric 7438bcb0991SDimitry Andric bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, 7448bcb0991SDimitry Andric Register &Base, Register &Offset) { 7458bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 7468bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 7478bcb0991SDimitry Andric 7488bcb0991SDimitry Andric #ifndef NDEBUG 7498bcb0991SDimitry Andric unsigned Opcode = MI.getOpcode(); 7508bcb0991SDimitry Andric assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || 7518bcb0991SDimitry Andric Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); 7528bcb0991SDimitry Andric #endif 7538bcb0991SDimitry Andric 7548bcb0991SDimitry Andric Base = MI.getOperand(1).getReg(); 7558bcb0991SDimitry Andric MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base); 7568bcb0991SDimitry Andric if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) 7578bcb0991SDimitry Andric return false; 7588bcb0991SDimitry Andric 7598bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI); 760*e8d8bef9SDimitry Andric // FIXME: The following use traversal needs a bail out for patholigical cases. 7615ffd83dbSDimitry Andric for (auto &Use : MRI.use_nodbg_instructions(Base)) { 762480093f4SDimitry Andric if (Use.getOpcode() != TargetOpcode::G_PTR_ADD) 7638bcb0991SDimitry Andric continue; 7648bcb0991SDimitry Andric 7658bcb0991SDimitry Andric Offset = Use.getOperand(2).getReg(); 7668bcb0991SDimitry Andric if (!ForceLegalIndexing && 7678bcb0991SDimitry Andric !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) { 7688bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: " 7698bcb0991SDimitry Andric << Use); 7708bcb0991SDimitry Andric continue; 7718bcb0991SDimitry Andric } 7728bcb0991SDimitry Andric 7738bcb0991SDimitry Andric // Make sure the offset calculation is before the potentially indexed op. 7748bcb0991SDimitry Andric // FIXME: we really care about dependency here. The offset calculation might 7758bcb0991SDimitry Andric // be movable. 7768bcb0991SDimitry Andric MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset); 7778bcb0991SDimitry Andric if (!OffsetDef || !dominates(*OffsetDef, MI)) { 7788bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: " 7798bcb0991SDimitry Andric << Use); 7808bcb0991SDimitry Andric continue; 7818bcb0991SDimitry Andric } 7828bcb0991SDimitry Andric 7838bcb0991SDimitry Andric // FIXME: check whether all uses of Base are load/store with foldable 7848bcb0991SDimitry Andric // addressing modes. If so, using the normal addr-modes is better than 7858bcb0991SDimitry Andric // forming an indexed one. 7868bcb0991SDimitry Andric 7878bcb0991SDimitry Andric bool MemOpDominatesAddrUses = true; 7885ffd83dbSDimitry Andric for (auto &PtrAddUse : 7895ffd83dbSDimitry Andric MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) { 790480093f4SDimitry Andric if (!dominates(MI, PtrAddUse)) { 7918bcb0991SDimitry Andric MemOpDominatesAddrUses = false; 7928bcb0991SDimitry Andric break; 7938bcb0991SDimitry Andric } 7948bcb0991SDimitry Andric } 7958bcb0991SDimitry Andric 7968bcb0991SDimitry Andric if (!MemOpDominatesAddrUses) { 7978bcb0991SDimitry Andric LLVM_DEBUG( 7988bcb0991SDimitry Andric dbgs() << " Ignoring candidate as memop does not dominate uses: " 7998bcb0991SDimitry Andric << Use); 8008bcb0991SDimitry Andric continue; 8018bcb0991SDimitry Andric } 8028bcb0991SDimitry Andric 8038bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Found match: " << Use); 8048bcb0991SDimitry Andric Addr = Use.getOperand(0).getReg(); 8058bcb0991SDimitry Andric return true; 8068bcb0991SDimitry Andric } 8078bcb0991SDimitry Andric 8088bcb0991SDimitry Andric return false; 8098bcb0991SDimitry Andric } 8108bcb0991SDimitry Andric 8118bcb0991SDimitry Andric bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr, 8128bcb0991SDimitry Andric Register &Base, Register &Offset) { 8138bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 8148bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 8158bcb0991SDimitry Andric 8168bcb0991SDimitry Andric #ifndef NDEBUG 8178bcb0991SDimitry Andric unsigned Opcode = MI.getOpcode(); 8188bcb0991SDimitry Andric assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || 8198bcb0991SDimitry Andric Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); 8208bcb0991SDimitry Andric #endif 8218bcb0991SDimitry Andric 8228bcb0991SDimitry Andric Addr = MI.getOperand(1).getReg(); 823480093f4SDimitry Andric MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI); 8245ffd83dbSDimitry Andric if (!AddrDef || MRI.hasOneNonDBGUse(Addr)) 8258bcb0991SDimitry Andric return false; 8268bcb0991SDimitry Andric 8278bcb0991SDimitry Andric Base = AddrDef->getOperand(1).getReg(); 8288bcb0991SDimitry Andric Offset = AddrDef->getOperand(2).getReg(); 8298bcb0991SDimitry Andric 8308bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI); 8318bcb0991SDimitry Andric 8328bcb0991SDimitry Andric if (!ForceLegalIndexing && 8338bcb0991SDimitry Andric !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) { 8348bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, not legal for target"); 8358bcb0991SDimitry Andric return false; 8368bcb0991SDimitry Andric } 8378bcb0991SDimitry Andric 8388bcb0991SDimitry Andric MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI); 8398bcb0991SDimitry Andric if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { 8408bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway."); 8418bcb0991SDimitry Andric return false; 8428bcb0991SDimitry Andric } 8438bcb0991SDimitry Andric 8448bcb0991SDimitry Andric if (MI.getOpcode() == TargetOpcode::G_STORE) { 8458bcb0991SDimitry Andric // Would require a copy. 8468bcb0991SDimitry Andric if (Base == MI.getOperand(0).getReg()) { 8478bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway."); 8488bcb0991SDimitry Andric return false; 8498bcb0991SDimitry Andric } 8508bcb0991SDimitry Andric 8518bcb0991SDimitry Andric // We're expecting one use of Addr in MI, but it could also be the 8528bcb0991SDimitry Andric // value stored, which isn't actually dominated by the instruction. 8538bcb0991SDimitry Andric if (MI.getOperand(0).getReg() == Addr) { 8548bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses"); 8558bcb0991SDimitry Andric return false; 8568bcb0991SDimitry Andric } 8578bcb0991SDimitry Andric } 8588bcb0991SDimitry Andric 859480093f4SDimitry Andric // FIXME: check whether all uses of the base pointer are constant PtrAdds. 860480093f4SDimitry Andric // That might allow us to end base's liveness here by adjusting the constant. 8618bcb0991SDimitry Andric 8625ffd83dbSDimitry Andric for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) { 8638bcb0991SDimitry Andric if (!dominates(MI, UseMI)) { 8648bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses."); 8658bcb0991SDimitry Andric return false; 8668bcb0991SDimitry Andric } 8678bcb0991SDimitry Andric } 8688bcb0991SDimitry Andric 8698bcb0991SDimitry Andric return true; 8708bcb0991SDimitry Andric } 8718bcb0991SDimitry Andric 8728bcb0991SDimitry Andric bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) { 873480093f4SDimitry Andric IndexedLoadStoreMatchInfo MatchInfo; 874480093f4SDimitry Andric if (matchCombineIndexedLoadStore(MI, MatchInfo)) { 875480093f4SDimitry Andric applyCombineIndexedLoadStore(MI, MatchInfo); 876480093f4SDimitry Andric return true; 877480093f4SDimitry Andric } 878480093f4SDimitry Andric return false; 879480093f4SDimitry Andric } 880480093f4SDimitry Andric 881480093f4SDimitry Andric bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { 8828bcb0991SDimitry Andric unsigned Opcode = MI.getOpcode(); 8838bcb0991SDimitry Andric if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD && 8848bcb0991SDimitry Andric Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE) 8858bcb0991SDimitry Andric return false; 8868bcb0991SDimitry Andric 887*e8d8bef9SDimitry Andric // For now, no targets actually support these opcodes so don't waste time 888*e8d8bef9SDimitry Andric // running these unless we're forced to for testing. 889*e8d8bef9SDimitry Andric if (!ForceLegalIndexing) 890*e8d8bef9SDimitry Andric return false; 891*e8d8bef9SDimitry Andric 892480093f4SDimitry Andric MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, 893480093f4SDimitry Andric MatchInfo.Offset); 894480093f4SDimitry Andric if (!MatchInfo.IsPre && 895480093f4SDimitry Andric !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, 896480093f4SDimitry Andric MatchInfo.Offset)) 8978bcb0991SDimitry Andric return false; 8988bcb0991SDimitry Andric 899480093f4SDimitry Andric return true; 900480093f4SDimitry Andric } 9018bcb0991SDimitry Andric 902480093f4SDimitry Andric void CombinerHelper::applyCombineIndexedLoadStore( 903480093f4SDimitry Andric MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { 904480093f4SDimitry Andric MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr); 905480093f4SDimitry Andric MachineIRBuilder MIRBuilder(MI); 906480093f4SDimitry Andric unsigned Opcode = MI.getOpcode(); 907480093f4SDimitry Andric bool IsStore = Opcode == TargetOpcode::G_STORE; 9088bcb0991SDimitry Andric unsigned NewOpcode; 9098bcb0991SDimitry Andric switch (Opcode) { 9108bcb0991SDimitry Andric case TargetOpcode::G_LOAD: 9118bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_LOAD; 9128bcb0991SDimitry Andric break; 9138bcb0991SDimitry Andric case TargetOpcode::G_SEXTLOAD: 9148bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD; 9158bcb0991SDimitry Andric break; 9168bcb0991SDimitry Andric case TargetOpcode::G_ZEXTLOAD: 9178bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD; 9188bcb0991SDimitry Andric break; 9198bcb0991SDimitry Andric case TargetOpcode::G_STORE: 9208bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_STORE; 9218bcb0991SDimitry Andric break; 9228bcb0991SDimitry Andric default: 9238bcb0991SDimitry Andric llvm_unreachable("Unknown load/store opcode"); 9248bcb0991SDimitry Andric } 9258bcb0991SDimitry Andric 9268bcb0991SDimitry Andric auto MIB = MIRBuilder.buildInstr(NewOpcode); 9278bcb0991SDimitry Andric if (IsStore) { 928480093f4SDimitry Andric MIB.addDef(MatchInfo.Addr); 9298bcb0991SDimitry Andric MIB.addUse(MI.getOperand(0).getReg()); 9308bcb0991SDimitry Andric } else { 9318bcb0991SDimitry Andric MIB.addDef(MI.getOperand(0).getReg()); 932480093f4SDimitry Andric MIB.addDef(MatchInfo.Addr); 9338bcb0991SDimitry Andric } 9348bcb0991SDimitry Andric 935480093f4SDimitry Andric MIB.addUse(MatchInfo.Base); 936480093f4SDimitry Andric MIB.addUse(MatchInfo.Offset); 937480093f4SDimitry Andric MIB.addImm(MatchInfo.IsPre); 9388bcb0991SDimitry Andric MI.eraseFromParent(); 9398bcb0991SDimitry Andric AddrDef.eraseFromParent(); 9408bcb0991SDimitry Andric 9418bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); 9428bcb0991SDimitry Andric } 9438bcb0991SDimitry Andric 944*e8d8bef9SDimitry Andric bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) { 9458bcb0991SDimitry Andric if (MI.getOpcode() != TargetOpcode::G_BR) 9468bcb0991SDimitry Andric return false; 9478bcb0991SDimitry Andric 9480b57cec5SDimitry Andric // Try to match the following: 9490b57cec5SDimitry Andric // bb1: 9500b57cec5SDimitry Andric // G_BRCOND %c1, %bb2 9510b57cec5SDimitry Andric // G_BR %bb3 9520b57cec5SDimitry Andric // bb2: 9530b57cec5SDimitry Andric // ... 9540b57cec5SDimitry Andric // bb3: 9550b57cec5SDimitry Andric 9560b57cec5SDimitry Andric // The above pattern does not have a fall through to the successor bb2, always 9570b57cec5SDimitry Andric // resulting in a branch no matter which path is taken. Here we try to find 9580b57cec5SDimitry Andric // and replace that pattern with conditional branch to bb3 and otherwise 959*e8d8bef9SDimitry Andric // fallthrough to bb2. This is generally better for branch predictors. 9600b57cec5SDimitry Andric 9610b57cec5SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 9620b57cec5SDimitry Andric MachineBasicBlock::iterator BrIt(MI); 9630b57cec5SDimitry Andric if (BrIt == MBB->begin()) 9640b57cec5SDimitry Andric return false; 9650b57cec5SDimitry Andric assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator"); 9660b57cec5SDimitry Andric 9670b57cec5SDimitry Andric MachineInstr *BrCond = &*std::prev(BrIt); 9680b57cec5SDimitry Andric if (BrCond->getOpcode() != TargetOpcode::G_BRCOND) 9690b57cec5SDimitry Andric return false; 9700b57cec5SDimitry Andric 9710b57cec5SDimitry Andric // Check that the next block is the conditional branch target. 9720b57cec5SDimitry Andric if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB())) 9730b57cec5SDimitry Andric return false; 9740b57cec5SDimitry Andric return true; 9750b57cec5SDimitry Andric } 9760b57cec5SDimitry Andric 977*e8d8bef9SDimitry Andric void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) { 9780b57cec5SDimitry Andric MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); 9790b57cec5SDimitry Andric MachineBasicBlock::iterator BrIt(MI); 9800b57cec5SDimitry Andric MachineInstr *BrCond = &*std::prev(BrIt); 9810b57cec5SDimitry Andric 982*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(*BrCond); 983*e8d8bef9SDimitry Andric LLT Ty = MRI.getType(BrCond->getOperand(0).getReg()); 984*e8d8bef9SDimitry Andric // FIXME: Does int/fp matter for this? If so, we might need to restrict 985*e8d8bef9SDimitry Andric // this to i1 only since we might not know for sure what kind of 986*e8d8bef9SDimitry Andric // compare generated the condition value. 987*e8d8bef9SDimitry Andric auto True = Builder.buildConstant( 988*e8d8bef9SDimitry Andric Ty, getICmpTrueVal(getTargetLowering(), false, false)); 989*e8d8bef9SDimitry Andric auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True); 9900b57cec5SDimitry Andric 991*e8d8bef9SDimitry Andric auto *FallthroughBB = BrCond->getOperand(1).getMBB(); 992*e8d8bef9SDimitry Andric Observer.changingInstr(MI); 993*e8d8bef9SDimitry Andric MI.getOperand(0).setMBB(FallthroughBB); 994*e8d8bef9SDimitry Andric Observer.changedInstr(MI); 9950b57cec5SDimitry Andric 996*e8d8bef9SDimitry Andric // Change the conditional branch to use the inverted condition and 997*e8d8bef9SDimitry Andric // new target block. 9980b57cec5SDimitry Andric Observer.changingInstr(*BrCond); 999*e8d8bef9SDimitry Andric BrCond->getOperand(0).setReg(Xor.getReg(0)); 10000b57cec5SDimitry Andric BrCond->getOperand(1).setMBB(BrTarget); 10010b57cec5SDimitry Andric Observer.changedInstr(*BrCond); 10028bcb0991SDimitry Andric } 10038bcb0991SDimitry Andric 10048bcb0991SDimitry Andric static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { 10058bcb0991SDimitry Andric // On Darwin, -Os means optimize for size without hurting performance, so 10068bcb0991SDimitry Andric // only really optimize for size when -Oz (MinSize) is used. 10078bcb0991SDimitry Andric if (MF.getTarget().getTargetTriple().isOSDarwin()) 10088bcb0991SDimitry Andric return MF.getFunction().hasMinSize(); 10098bcb0991SDimitry Andric return MF.getFunction().hasOptSize(); 10108bcb0991SDimitry Andric } 10118bcb0991SDimitry Andric 10128bcb0991SDimitry Andric // Returns a list of types to use for memory op lowering in MemOps. A partial 10138bcb0991SDimitry Andric // port of findOptimalMemOpLowering in TargetLowering. 10145ffd83dbSDimitry Andric static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps, 10155ffd83dbSDimitry Andric unsigned Limit, const MemOp &Op, 10165ffd83dbSDimitry Andric unsigned DstAS, unsigned SrcAS, 10175ffd83dbSDimitry Andric const AttributeList &FuncAttributes, 10185ffd83dbSDimitry Andric const TargetLowering &TLI) { 10195ffd83dbSDimitry Andric if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) 10208bcb0991SDimitry Andric return false; 10218bcb0991SDimitry Andric 10225ffd83dbSDimitry Andric LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes); 10238bcb0991SDimitry Andric 10248bcb0991SDimitry Andric if (Ty == LLT()) { 10258bcb0991SDimitry Andric // Use the largest scalar type whose alignment constraints are satisfied. 10268bcb0991SDimitry Andric // We only need to check DstAlign here as SrcAlign is always greater or 10278bcb0991SDimitry Andric // equal to DstAlign (or zero). 10288bcb0991SDimitry Andric Ty = LLT::scalar(64); 10295ffd83dbSDimitry Andric if (Op.isFixedDstAlign()) 10305ffd83dbSDimitry Andric while (Op.getDstAlign() < Ty.getSizeInBytes() && 10315ffd83dbSDimitry Andric !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign())) 10328bcb0991SDimitry Andric Ty = LLT::scalar(Ty.getSizeInBytes()); 10338bcb0991SDimitry Andric assert(Ty.getSizeInBits() > 0 && "Could not find valid type"); 10348bcb0991SDimitry Andric // FIXME: check for the largest legal type we can load/store to. 10358bcb0991SDimitry Andric } 10368bcb0991SDimitry Andric 10378bcb0991SDimitry Andric unsigned NumMemOps = 0; 10385ffd83dbSDimitry Andric uint64_t Size = Op.size(); 10395ffd83dbSDimitry Andric while (Size) { 10408bcb0991SDimitry Andric unsigned TySize = Ty.getSizeInBytes(); 10418bcb0991SDimitry Andric while (TySize > Size) { 10428bcb0991SDimitry Andric // For now, only use non-vector load / store's for the left-over pieces. 10438bcb0991SDimitry Andric LLT NewTy = Ty; 10448bcb0991SDimitry Andric // FIXME: check for mem op safety and legality of the types. Not all of 10458bcb0991SDimitry Andric // SDAGisms map cleanly to GISel concepts. 10468bcb0991SDimitry Andric if (NewTy.isVector()) 10478bcb0991SDimitry Andric NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32); 10488bcb0991SDimitry Andric NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1)); 10498bcb0991SDimitry Andric unsigned NewTySize = NewTy.getSizeInBytes(); 10508bcb0991SDimitry Andric assert(NewTySize > 0 && "Could not find appropriate type"); 10518bcb0991SDimitry Andric 10528bcb0991SDimitry Andric // If the new LLT cannot cover all of the remaining bits, then consider 10538bcb0991SDimitry Andric // issuing a (or a pair of) unaligned and overlapping load / store. 10548bcb0991SDimitry Andric bool Fast; 10558bcb0991SDimitry Andric // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). 10568bcb0991SDimitry Andric MVT VT = getMVTForLLT(Ty); 10575ffd83dbSDimitry Andric if (NumMemOps && Op.allowOverlap() && NewTySize < Size && 10588bcb0991SDimitry Andric TLI.allowsMisalignedMemoryAccesses( 10595ffd83dbSDimitry Andric VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0, 10605ffd83dbSDimitry Andric MachineMemOperand::MONone, &Fast) && 10618bcb0991SDimitry Andric Fast) 10628bcb0991SDimitry Andric TySize = Size; 10638bcb0991SDimitry Andric else { 10648bcb0991SDimitry Andric Ty = NewTy; 10658bcb0991SDimitry Andric TySize = NewTySize; 10668bcb0991SDimitry Andric } 10678bcb0991SDimitry Andric } 10688bcb0991SDimitry Andric 10698bcb0991SDimitry Andric if (++NumMemOps > Limit) 10708bcb0991SDimitry Andric return false; 10718bcb0991SDimitry Andric 10728bcb0991SDimitry Andric MemOps.push_back(Ty); 10738bcb0991SDimitry Andric Size -= TySize; 10748bcb0991SDimitry Andric } 10758bcb0991SDimitry Andric 10760b57cec5SDimitry Andric return true; 10770b57cec5SDimitry Andric } 10780b57cec5SDimitry Andric 10798bcb0991SDimitry Andric static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { 10808bcb0991SDimitry Andric if (Ty.isVector()) 10815ffd83dbSDimitry Andric return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), 10828bcb0991SDimitry Andric Ty.getNumElements()); 10838bcb0991SDimitry Andric return IntegerType::get(C, Ty.getSizeInBits()); 10848bcb0991SDimitry Andric } 10858bcb0991SDimitry Andric 10868bcb0991SDimitry Andric // Get a vectorized representation of the memset value operand, GISel edition. 10878bcb0991SDimitry Andric static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { 10888bcb0991SDimitry Andric MachineRegisterInfo &MRI = *MIB.getMRI(); 10898bcb0991SDimitry Andric unsigned NumBits = Ty.getScalarSizeInBits(); 10908bcb0991SDimitry Andric auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); 10918bcb0991SDimitry Andric if (!Ty.isVector() && ValVRegAndVal) { 1092*e8d8bef9SDimitry Andric APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8); 10938bcb0991SDimitry Andric APInt SplatVal = APInt::getSplat(NumBits, Scalar); 10948bcb0991SDimitry Andric return MIB.buildConstant(Ty, SplatVal).getReg(0); 10958bcb0991SDimitry Andric } 10968bcb0991SDimitry Andric 10978bcb0991SDimitry Andric // Extend the byte value to the larger type, and then multiply by a magic 10988bcb0991SDimitry Andric // value 0x010101... in order to replicate it across every byte. 10995ffd83dbSDimitry Andric // Unless it's zero, in which case just emit a larger G_CONSTANT 0. 11005ffd83dbSDimitry Andric if (ValVRegAndVal && ValVRegAndVal->Value == 0) { 11015ffd83dbSDimitry Andric return MIB.buildConstant(Ty, 0).getReg(0); 11025ffd83dbSDimitry Andric } 11035ffd83dbSDimitry Andric 11048bcb0991SDimitry Andric LLT ExtType = Ty.getScalarType(); 11058bcb0991SDimitry Andric auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val); 11068bcb0991SDimitry Andric if (NumBits > 8) { 11078bcb0991SDimitry Andric APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); 11088bcb0991SDimitry Andric auto MagicMI = MIB.buildConstant(ExtType, Magic); 11098bcb0991SDimitry Andric Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0); 11108bcb0991SDimitry Andric } 11118bcb0991SDimitry Andric 11125ffd83dbSDimitry Andric // For vector types create a G_BUILD_VECTOR. 11135ffd83dbSDimitry Andric if (Ty.isVector()) 11145ffd83dbSDimitry Andric Val = MIB.buildSplatVector(Ty, Val).getReg(0); 11155ffd83dbSDimitry Andric 11168bcb0991SDimitry Andric return Val; 11178bcb0991SDimitry Andric } 11188bcb0991SDimitry Andric 11195ffd83dbSDimitry Andric bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, 11205ffd83dbSDimitry Andric Register Val, unsigned KnownLen, 11215ffd83dbSDimitry Andric Align Alignment, bool IsVolatile) { 11228bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 11238bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 11248bcb0991SDimitry Andric auto &DL = MF.getDataLayout(); 11258bcb0991SDimitry Andric LLVMContext &C = MF.getFunction().getContext(); 11268bcb0991SDimitry Andric 11278bcb0991SDimitry Andric assert(KnownLen != 0 && "Have a zero length memset length!"); 11288bcb0991SDimitry Andric 11298bcb0991SDimitry Andric bool DstAlignCanChange = false; 11308bcb0991SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 11318bcb0991SDimitry Andric bool OptSize = shouldLowerMemFuncForSize(MF); 11328bcb0991SDimitry Andric 11338bcb0991SDimitry Andric MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); 11348bcb0991SDimitry Andric if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) 11358bcb0991SDimitry Andric DstAlignCanChange = true; 11368bcb0991SDimitry Andric 11378bcb0991SDimitry Andric unsigned Limit = TLI.getMaxStoresPerMemset(OptSize); 11388bcb0991SDimitry Andric std::vector<LLT> MemOps; 11398bcb0991SDimitry Andric 11408bcb0991SDimitry Andric const auto &DstMMO = **MI.memoperands_begin(); 11418bcb0991SDimitry Andric MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); 11428bcb0991SDimitry Andric 11438bcb0991SDimitry Andric auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); 11448bcb0991SDimitry Andric bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0; 11458bcb0991SDimitry Andric 11465ffd83dbSDimitry Andric if (!findGISelOptimalMemOpLowering(MemOps, Limit, 11475ffd83dbSDimitry Andric MemOp::Set(KnownLen, DstAlignCanChange, 11485ffd83dbSDimitry Andric Alignment, 11495ffd83dbSDimitry Andric /*IsZeroMemset=*/IsZeroVal, 11505ffd83dbSDimitry Andric /*IsVolatile=*/IsVolatile), 11515ffd83dbSDimitry Andric DstPtrInfo.getAddrSpace(), ~0u, 11528bcb0991SDimitry Andric MF.getFunction().getAttributes(), TLI)) 11538bcb0991SDimitry Andric return false; 11548bcb0991SDimitry Andric 11558bcb0991SDimitry Andric if (DstAlignCanChange) { 11568bcb0991SDimitry Andric // Get an estimate of the type from the LLT. 11578bcb0991SDimitry Andric Type *IRTy = getTypeForLLT(MemOps[0], C); 11585ffd83dbSDimitry Andric Align NewAlign = DL.getABITypeAlign(IRTy); 11595ffd83dbSDimitry Andric if (NewAlign > Alignment) { 11605ffd83dbSDimitry Andric Alignment = NewAlign; 11618bcb0991SDimitry Andric unsigned FI = FIDef->getOperand(1).getIndex(); 11628bcb0991SDimitry Andric // Give the stack frame object a larger alignment if needed. 11635ffd83dbSDimitry Andric if (MFI.getObjectAlign(FI) < Alignment) 11645ffd83dbSDimitry Andric MFI.setObjectAlignment(FI, Alignment); 11658bcb0991SDimitry Andric } 11668bcb0991SDimitry Andric } 11678bcb0991SDimitry Andric 11688bcb0991SDimitry Andric MachineIRBuilder MIB(MI); 11698bcb0991SDimitry Andric // Find the largest store and generate the bit pattern for it. 11708bcb0991SDimitry Andric LLT LargestTy = MemOps[0]; 11718bcb0991SDimitry Andric for (unsigned i = 1; i < MemOps.size(); i++) 11728bcb0991SDimitry Andric if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits()) 11738bcb0991SDimitry Andric LargestTy = MemOps[i]; 11748bcb0991SDimitry Andric 11758bcb0991SDimitry Andric // The memset stored value is always defined as an s8, so in order to make it 11768bcb0991SDimitry Andric // work with larger store types we need to repeat the bit pattern across the 11778bcb0991SDimitry Andric // wider type. 11788bcb0991SDimitry Andric Register MemSetValue = getMemsetValue(Val, LargestTy, MIB); 11798bcb0991SDimitry Andric 11808bcb0991SDimitry Andric if (!MemSetValue) 11818bcb0991SDimitry Andric return false; 11828bcb0991SDimitry Andric 11838bcb0991SDimitry Andric // Generate the stores. For each store type in the list, we generate the 11848bcb0991SDimitry Andric // matching store of that type to the destination address. 11858bcb0991SDimitry Andric LLT PtrTy = MRI.getType(Dst); 11868bcb0991SDimitry Andric unsigned DstOff = 0; 11878bcb0991SDimitry Andric unsigned Size = KnownLen; 11888bcb0991SDimitry Andric for (unsigned I = 0; I < MemOps.size(); I++) { 11898bcb0991SDimitry Andric LLT Ty = MemOps[I]; 11908bcb0991SDimitry Andric unsigned TySize = Ty.getSizeInBytes(); 11918bcb0991SDimitry Andric if (TySize > Size) { 11928bcb0991SDimitry Andric // Issuing an unaligned load / store pair that overlaps with the previous 11938bcb0991SDimitry Andric // pair. Adjust the offset accordingly. 11948bcb0991SDimitry Andric assert(I == MemOps.size() - 1 && I != 0); 11958bcb0991SDimitry Andric DstOff -= TySize - Size; 11968bcb0991SDimitry Andric } 11978bcb0991SDimitry Andric 11988bcb0991SDimitry Andric // If this store is smaller than the largest store see whether we can get 11998bcb0991SDimitry Andric // the smaller value for free with a truncate. 12008bcb0991SDimitry Andric Register Value = MemSetValue; 12018bcb0991SDimitry Andric if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) { 12028bcb0991SDimitry Andric MVT VT = getMVTForLLT(Ty); 12038bcb0991SDimitry Andric MVT LargestVT = getMVTForLLT(LargestTy); 12048bcb0991SDimitry Andric if (!LargestTy.isVector() && !Ty.isVector() && 12058bcb0991SDimitry Andric TLI.isTruncateFree(LargestVT, VT)) 12068bcb0991SDimitry Andric Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0); 12078bcb0991SDimitry Andric else 12088bcb0991SDimitry Andric Value = getMemsetValue(Val, Ty, MIB); 12098bcb0991SDimitry Andric if (!Value) 12108bcb0991SDimitry Andric return false; 12118bcb0991SDimitry Andric } 12128bcb0991SDimitry Andric 12138bcb0991SDimitry Andric auto *StoreMMO = 12148bcb0991SDimitry Andric MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes()); 12158bcb0991SDimitry Andric 12168bcb0991SDimitry Andric Register Ptr = Dst; 12178bcb0991SDimitry Andric if (DstOff != 0) { 12188bcb0991SDimitry Andric auto Offset = 12198bcb0991SDimitry Andric MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff); 1220480093f4SDimitry Andric Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); 12218bcb0991SDimitry Andric } 12228bcb0991SDimitry Andric 12238bcb0991SDimitry Andric MIB.buildStore(Value, Ptr, *StoreMMO); 12248bcb0991SDimitry Andric DstOff += Ty.getSizeInBytes(); 12258bcb0991SDimitry Andric Size -= TySize; 12268bcb0991SDimitry Andric } 12278bcb0991SDimitry Andric 12288bcb0991SDimitry Andric MI.eraseFromParent(); 12298bcb0991SDimitry Andric return true; 12308bcb0991SDimitry Andric } 12318bcb0991SDimitry Andric 12328bcb0991SDimitry Andric bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, 12338bcb0991SDimitry Andric Register Src, unsigned KnownLen, 12345ffd83dbSDimitry Andric Align DstAlign, Align SrcAlign, 12358bcb0991SDimitry Andric bool IsVolatile) { 12368bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 12378bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 12388bcb0991SDimitry Andric auto &DL = MF.getDataLayout(); 12398bcb0991SDimitry Andric LLVMContext &C = MF.getFunction().getContext(); 12408bcb0991SDimitry Andric 12418bcb0991SDimitry Andric assert(KnownLen != 0 && "Have a zero length memcpy length!"); 12428bcb0991SDimitry Andric 12438bcb0991SDimitry Andric bool DstAlignCanChange = false; 12448bcb0991SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 12458bcb0991SDimitry Andric bool OptSize = shouldLowerMemFuncForSize(MF); 12465ffd83dbSDimitry Andric Align Alignment = commonAlignment(DstAlign, SrcAlign); 12478bcb0991SDimitry Andric 12488bcb0991SDimitry Andric MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); 12498bcb0991SDimitry Andric if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) 12508bcb0991SDimitry Andric DstAlignCanChange = true; 12518bcb0991SDimitry Andric 12528bcb0991SDimitry Andric // FIXME: infer better src pointer alignment like SelectionDAG does here. 12538bcb0991SDimitry Andric // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining 12548bcb0991SDimitry Andric // if the memcpy is in a tail call position. 12558bcb0991SDimitry Andric 12568bcb0991SDimitry Andric unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize); 12578bcb0991SDimitry Andric std::vector<LLT> MemOps; 12588bcb0991SDimitry Andric 12598bcb0991SDimitry Andric const auto &DstMMO = **MI.memoperands_begin(); 12608bcb0991SDimitry Andric const auto &SrcMMO = **std::next(MI.memoperands_begin()); 12618bcb0991SDimitry Andric MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); 12628bcb0991SDimitry Andric MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); 12638bcb0991SDimitry Andric 12648bcb0991SDimitry Andric if (!findGISelOptimalMemOpLowering( 12655ffd83dbSDimitry Andric MemOps, Limit, 12665ffd83dbSDimitry Andric MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, 12675ffd83dbSDimitry Andric IsVolatile), 12685ffd83dbSDimitry Andric DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), 12695ffd83dbSDimitry Andric MF.getFunction().getAttributes(), TLI)) 12708bcb0991SDimitry Andric return false; 12718bcb0991SDimitry Andric 12728bcb0991SDimitry Andric if (DstAlignCanChange) { 12738bcb0991SDimitry Andric // Get an estimate of the type from the LLT. 12748bcb0991SDimitry Andric Type *IRTy = getTypeForLLT(MemOps[0], C); 12755ffd83dbSDimitry Andric Align NewAlign = DL.getABITypeAlign(IRTy); 12768bcb0991SDimitry Andric 12778bcb0991SDimitry Andric // Don't promote to an alignment that would require dynamic stack 12788bcb0991SDimitry Andric // realignment. 12798bcb0991SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 12808bcb0991SDimitry Andric if (!TRI->needsStackRealignment(MF)) 12815ffd83dbSDimitry Andric while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) 12825ffd83dbSDimitry Andric NewAlign = NewAlign / 2; 12838bcb0991SDimitry Andric 12848bcb0991SDimitry Andric if (NewAlign > Alignment) { 12858bcb0991SDimitry Andric Alignment = NewAlign; 12868bcb0991SDimitry Andric unsigned FI = FIDef->getOperand(1).getIndex(); 12878bcb0991SDimitry Andric // Give the stack frame object a larger alignment if needed. 12885ffd83dbSDimitry Andric if (MFI.getObjectAlign(FI) < Alignment) 12898bcb0991SDimitry Andric MFI.setObjectAlignment(FI, Alignment); 12908bcb0991SDimitry Andric } 12918bcb0991SDimitry Andric } 12928bcb0991SDimitry Andric 12938bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n"); 12948bcb0991SDimitry Andric 12958bcb0991SDimitry Andric MachineIRBuilder MIB(MI); 12968bcb0991SDimitry Andric // Now we need to emit a pair of load and stores for each of the types we've 12978bcb0991SDimitry Andric // collected. I.e. for each type, generate a load from the source pointer of 12988bcb0991SDimitry Andric // that type width, and then generate a corresponding store to the dest buffer 12998bcb0991SDimitry Andric // of that value loaded. This can result in a sequence of loads and stores 13008bcb0991SDimitry Andric // mixed types, depending on what the target specifies as good types to use. 13018bcb0991SDimitry Andric unsigned CurrOffset = 0; 13028bcb0991SDimitry Andric LLT PtrTy = MRI.getType(Src); 13038bcb0991SDimitry Andric unsigned Size = KnownLen; 13048bcb0991SDimitry Andric for (auto CopyTy : MemOps) { 13058bcb0991SDimitry Andric // Issuing an unaligned load / store pair that overlaps with the previous 13068bcb0991SDimitry Andric // pair. Adjust the offset accordingly. 13078bcb0991SDimitry Andric if (CopyTy.getSizeInBytes() > Size) 13088bcb0991SDimitry Andric CurrOffset -= CopyTy.getSizeInBytes() - Size; 13098bcb0991SDimitry Andric 13108bcb0991SDimitry Andric // Construct MMOs for the accesses. 13118bcb0991SDimitry Andric auto *LoadMMO = 13128bcb0991SDimitry Andric MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); 13138bcb0991SDimitry Andric auto *StoreMMO = 13148bcb0991SDimitry Andric MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); 13158bcb0991SDimitry Andric 13168bcb0991SDimitry Andric // Create the load. 13178bcb0991SDimitry Andric Register LoadPtr = Src; 13188bcb0991SDimitry Andric Register Offset; 13198bcb0991SDimitry Andric if (CurrOffset != 0) { 13208bcb0991SDimitry Andric Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) 13218bcb0991SDimitry Andric .getReg(0); 1322480093f4SDimitry Andric LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); 13238bcb0991SDimitry Andric } 13248bcb0991SDimitry Andric auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); 13258bcb0991SDimitry Andric 13268bcb0991SDimitry Andric // Create the store. 13278bcb0991SDimitry Andric Register StorePtr = 1328480093f4SDimitry Andric CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); 13298bcb0991SDimitry Andric MIB.buildStore(LdVal, StorePtr, *StoreMMO); 13308bcb0991SDimitry Andric CurrOffset += CopyTy.getSizeInBytes(); 13318bcb0991SDimitry Andric Size -= CopyTy.getSizeInBytes(); 13328bcb0991SDimitry Andric } 13338bcb0991SDimitry Andric 13348bcb0991SDimitry Andric MI.eraseFromParent(); 13358bcb0991SDimitry Andric return true; 13368bcb0991SDimitry Andric } 13378bcb0991SDimitry Andric 13388bcb0991SDimitry Andric bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, 13398bcb0991SDimitry Andric Register Src, unsigned KnownLen, 13405ffd83dbSDimitry Andric Align DstAlign, Align SrcAlign, 13418bcb0991SDimitry Andric bool IsVolatile) { 13428bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 13438bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 13448bcb0991SDimitry Andric auto &DL = MF.getDataLayout(); 13458bcb0991SDimitry Andric LLVMContext &C = MF.getFunction().getContext(); 13468bcb0991SDimitry Andric 13478bcb0991SDimitry Andric assert(KnownLen != 0 && "Have a zero length memmove length!"); 13488bcb0991SDimitry Andric 13498bcb0991SDimitry Andric bool DstAlignCanChange = false; 13508bcb0991SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 13518bcb0991SDimitry Andric bool OptSize = shouldLowerMemFuncForSize(MF); 13525ffd83dbSDimitry Andric Align Alignment = commonAlignment(DstAlign, SrcAlign); 13538bcb0991SDimitry Andric 13548bcb0991SDimitry Andric MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); 13558bcb0991SDimitry Andric if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) 13568bcb0991SDimitry Andric DstAlignCanChange = true; 13578bcb0991SDimitry Andric 13588bcb0991SDimitry Andric unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize); 13598bcb0991SDimitry Andric std::vector<LLT> MemOps; 13608bcb0991SDimitry Andric 13618bcb0991SDimitry Andric const auto &DstMMO = **MI.memoperands_begin(); 13628bcb0991SDimitry Andric const auto &SrcMMO = **std::next(MI.memoperands_begin()); 13638bcb0991SDimitry Andric MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); 13648bcb0991SDimitry Andric MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); 13658bcb0991SDimitry Andric 13668bcb0991SDimitry Andric // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due 13678bcb0991SDimitry Andric // to a bug in it's findOptimalMemOpLowering implementation. For now do the 13688bcb0991SDimitry Andric // same thing here. 13698bcb0991SDimitry Andric if (!findGISelOptimalMemOpLowering( 13705ffd83dbSDimitry Andric MemOps, Limit, 13715ffd83dbSDimitry Andric MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, 13725ffd83dbSDimitry Andric /*IsVolatile*/ true), 13735ffd83dbSDimitry Andric DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), 13745ffd83dbSDimitry Andric MF.getFunction().getAttributes(), TLI)) 13758bcb0991SDimitry Andric return false; 13768bcb0991SDimitry Andric 13778bcb0991SDimitry Andric if (DstAlignCanChange) { 13788bcb0991SDimitry Andric // Get an estimate of the type from the LLT. 13798bcb0991SDimitry Andric Type *IRTy = getTypeForLLT(MemOps[0], C); 13805ffd83dbSDimitry Andric Align NewAlign = DL.getABITypeAlign(IRTy); 13818bcb0991SDimitry Andric 13828bcb0991SDimitry Andric // Don't promote to an alignment that would require dynamic stack 13838bcb0991SDimitry Andric // realignment. 13848bcb0991SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 13858bcb0991SDimitry Andric if (!TRI->needsStackRealignment(MF)) 13865ffd83dbSDimitry Andric while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) 13875ffd83dbSDimitry Andric NewAlign = NewAlign / 2; 13888bcb0991SDimitry Andric 13898bcb0991SDimitry Andric if (NewAlign > Alignment) { 13908bcb0991SDimitry Andric Alignment = NewAlign; 13918bcb0991SDimitry Andric unsigned FI = FIDef->getOperand(1).getIndex(); 13928bcb0991SDimitry Andric // Give the stack frame object a larger alignment if needed. 13935ffd83dbSDimitry Andric if (MFI.getObjectAlign(FI) < Alignment) 13948bcb0991SDimitry Andric MFI.setObjectAlignment(FI, Alignment); 13958bcb0991SDimitry Andric } 13968bcb0991SDimitry Andric } 13978bcb0991SDimitry Andric 13988bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n"); 13998bcb0991SDimitry Andric 14008bcb0991SDimitry Andric MachineIRBuilder MIB(MI); 14018bcb0991SDimitry Andric // Memmove requires that we perform the loads first before issuing the stores. 14028bcb0991SDimitry Andric // Apart from that, this loop is pretty much doing the same thing as the 14038bcb0991SDimitry Andric // memcpy codegen function. 14048bcb0991SDimitry Andric unsigned CurrOffset = 0; 14058bcb0991SDimitry Andric LLT PtrTy = MRI.getType(Src); 14068bcb0991SDimitry Andric SmallVector<Register, 16> LoadVals; 14078bcb0991SDimitry Andric for (auto CopyTy : MemOps) { 14088bcb0991SDimitry Andric // Construct MMO for the load. 14098bcb0991SDimitry Andric auto *LoadMMO = 14108bcb0991SDimitry Andric MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); 14118bcb0991SDimitry Andric 14128bcb0991SDimitry Andric // Create the load. 14138bcb0991SDimitry Andric Register LoadPtr = Src; 14148bcb0991SDimitry Andric if (CurrOffset != 0) { 14158bcb0991SDimitry Andric auto Offset = 14168bcb0991SDimitry Andric MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); 1417480093f4SDimitry Andric LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); 14188bcb0991SDimitry Andric } 14198bcb0991SDimitry Andric LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); 14208bcb0991SDimitry Andric CurrOffset += CopyTy.getSizeInBytes(); 14218bcb0991SDimitry Andric } 14228bcb0991SDimitry Andric 14238bcb0991SDimitry Andric CurrOffset = 0; 14248bcb0991SDimitry Andric for (unsigned I = 0; I < MemOps.size(); ++I) { 14258bcb0991SDimitry Andric LLT CopyTy = MemOps[I]; 14268bcb0991SDimitry Andric // Now store the values loaded. 14278bcb0991SDimitry Andric auto *StoreMMO = 14288bcb0991SDimitry Andric MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); 14298bcb0991SDimitry Andric 14308bcb0991SDimitry Andric Register StorePtr = Dst; 14318bcb0991SDimitry Andric if (CurrOffset != 0) { 14328bcb0991SDimitry Andric auto Offset = 14338bcb0991SDimitry Andric MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); 1434480093f4SDimitry Andric StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); 14358bcb0991SDimitry Andric } 14368bcb0991SDimitry Andric MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); 14378bcb0991SDimitry Andric CurrOffset += CopyTy.getSizeInBytes(); 14388bcb0991SDimitry Andric } 14398bcb0991SDimitry Andric MI.eraseFromParent(); 14408bcb0991SDimitry Andric return true; 14418bcb0991SDimitry Andric } 14428bcb0991SDimitry Andric 14438bcb0991SDimitry Andric bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { 1444*e8d8bef9SDimitry Andric const unsigned Opc = MI.getOpcode(); 14458bcb0991SDimitry Andric // This combine is fairly complex so it's not written with a separate 14468bcb0991SDimitry Andric // matcher function. 1447*e8d8bef9SDimitry Andric assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE || 1448*e8d8bef9SDimitry Andric Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction"); 14498bcb0991SDimitry Andric 14508bcb0991SDimitry Andric auto MMOIt = MI.memoperands_begin(); 14518bcb0991SDimitry Andric const MachineMemOperand *MemOp = *MMOIt; 14528bcb0991SDimitry Andric bool IsVolatile = MemOp->isVolatile(); 14538bcb0991SDimitry Andric // Don't try to optimize volatile. 14548bcb0991SDimitry Andric if (IsVolatile) 14558bcb0991SDimitry Andric return false; 14568bcb0991SDimitry Andric 14575ffd83dbSDimitry Andric Align DstAlign = MemOp->getBaseAlign(); 14585ffd83dbSDimitry Andric Align SrcAlign; 1459*e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 1460*e8d8bef9SDimitry Andric Register Src = MI.getOperand(1).getReg(); 1461*e8d8bef9SDimitry Andric Register Len = MI.getOperand(2).getReg(); 14628bcb0991SDimitry Andric 1463*e8d8bef9SDimitry Andric if (Opc != TargetOpcode::G_MEMSET) { 14648bcb0991SDimitry Andric assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI"); 14658bcb0991SDimitry Andric MemOp = *(++MMOIt); 14665ffd83dbSDimitry Andric SrcAlign = MemOp->getBaseAlign(); 14678bcb0991SDimitry Andric } 14688bcb0991SDimitry Andric 14698bcb0991SDimitry Andric // See if this is a constant length copy 14708bcb0991SDimitry Andric auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); 14718bcb0991SDimitry Andric if (!LenVRegAndVal) 14728bcb0991SDimitry Andric return false; // Leave it to the legalizer to lower it to a libcall. 1473*e8d8bef9SDimitry Andric unsigned KnownLen = LenVRegAndVal->Value.getZExtValue(); 14748bcb0991SDimitry Andric 14758bcb0991SDimitry Andric if (KnownLen == 0) { 14768bcb0991SDimitry Andric MI.eraseFromParent(); 14778bcb0991SDimitry Andric return true; 14788bcb0991SDimitry Andric } 14798bcb0991SDimitry Andric 14808bcb0991SDimitry Andric if (MaxLen && KnownLen > MaxLen) 14818bcb0991SDimitry Andric return false; 14828bcb0991SDimitry Andric 1483*e8d8bef9SDimitry Andric if (Opc == TargetOpcode::G_MEMCPY) 14848bcb0991SDimitry Andric return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); 1485*e8d8bef9SDimitry Andric if (Opc == TargetOpcode::G_MEMMOVE) 14868bcb0991SDimitry Andric return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); 1487*e8d8bef9SDimitry Andric if (Opc == TargetOpcode::G_MEMSET) 14888bcb0991SDimitry Andric return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile); 14898bcb0991SDimitry Andric return false; 14908bcb0991SDimitry Andric } 14918bcb0991SDimitry Andric 1492*e8d8bef9SDimitry Andric static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy, 1493*e8d8bef9SDimitry Andric const Register Op, 1494*e8d8bef9SDimitry Andric const MachineRegisterInfo &MRI) { 1495*e8d8bef9SDimitry Andric const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI); 1496*e8d8bef9SDimitry Andric if (!MaybeCst) 1497*e8d8bef9SDimitry Andric return None; 1498*e8d8bef9SDimitry Andric 1499*e8d8bef9SDimitry Andric APFloat V = MaybeCst->getValueAPF(); 1500*e8d8bef9SDimitry Andric switch (Opcode) { 1501*e8d8bef9SDimitry Andric default: 1502*e8d8bef9SDimitry Andric llvm_unreachable("Unexpected opcode!"); 1503*e8d8bef9SDimitry Andric case TargetOpcode::G_FNEG: { 1504*e8d8bef9SDimitry Andric V.changeSign(); 1505*e8d8bef9SDimitry Andric return V; 1506*e8d8bef9SDimitry Andric } 1507*e8d8bef9SDimitry Andric case TargetOpcode::G_FABS: { 1508*e8d8bef9SDimitry Andric V.clearSign(); 1509*e8d8bef9SDimitry Andric return V; 1510*e8d8bef9SDimitry Andric } 1511*e8d8bef9SDimitry Andric case TargetOpcode::G_FPTRUNC: 1512*e8d8bef9SDimitry Andric break; 1513*e8d8bef9SDimitry Andric case TargetOpcode::G_FSQRT: { 1514*e8d8bef9SDimitry Andric bool Unused; 1515*e8d8bef9SDimitry Andric V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); 1516*e8d8bef9SDimitry Andric V = APFloat(sqrt(V.convertToDouble())); 1517*e8d8bef9SDimitry Andric break; 1518*e8d8bef9SDimitry Andric } 1519*e8d8bef9SDimitry Andric case TargetOpcode::G_FLOG2: { 1520*e8d8bef9SDimitry Andric bool Unused; 1521*e8d8bef9SDimitry Andric V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); 1522*e8d8bef9SDimitry Andric V = APFloat(log2(V.convertToDouble())); 1523*e8d8bef9SDimitry Andric break; 1524*e8d8bef9SDimitry Andric } 1525*e8d8bef9SDimitry Andric } 1526*e8d8bef9SDimitry Andric // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise, 1527*e8d8bef9SDimitry Andric // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`, 1528*e8d8bef9SDimitry Andric // and `G_FLOG2` reach here. 1529*e8d8bef9SDimitry Andric bool Unused; 1530*e8d8bef9SDimitry Andric V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused); 1531*e8d8bef9SDimitry Andric return V; 1532*e8d8bef9SDimitry Andric } 1533*e8d8bef9SDimitry Andric 1534*e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI, 1535*e8d8bef9SDimitry Andric Optional<APFloat> &Cst) { 1536*e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1537*e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1538*e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 1539*e8d8bef9SDimitry Andric Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI); 1540*e8d8bef9SDimitry Andric return Cst.hasValue(); 1541*e8d8bef9SDimitry Andric } 1542*e8d8bef9SDimitry Andric 1543*e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, 1544*e8d8bef9SDimitry Andric Optional<APFloat> &Cst) { 1545*e8d8bef9SDimitry Andric assert(Cst.hasValue() && "Optional is unexpectedly empty!"); 1546*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1547*e8d8bef9SDimitry Andric MachineFunction &MF = Builder.getMF(); 1548*e8d8bef9SDimitry Andric auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst); 1549*e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1550*e8d8bef9SDimitry Andric Builder.buildFConstant(DstReg, *FPVal); 1551*e8d8bef9SDimitry Andric MI.eraseFromParent(); 1552*e8d8bef9SDimitry Andric return true; 1553*e8d8bef9SDimitry Andric } 1554*e8d8bef9SDimitry Andric 1555480093f4SDimitry Andric bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, 1556480093f4SDimitry Andric PtrAddChain &MatchInfo) { 1557480093f4SDimitry Andric // We're trying to match the following pattern: 1558480093f4SDimitry Andric // %t1 = G_PTR_ADD %base, G_CONSTANT imm1 1559480093f4SDimitry Andric // %root = G_PTR_ADD %t1, G_CONSTANT imm2 1560480093f4SDimitry Andric // --> 1561480093f4SDimitry Andric // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2) 1562480093f4SDimitry Andric 1563480093f4SDimitry Andric if (MI.getOpcode() != TargetOpcode::G_PTR_ADD) 1564480093f4SDimitry Andric return false; 1565480093f4SDimitry Andric 1566480093f4SDimitry Andric Register Add2 = MI.getOperand(1).getReg(); 1567480093f4SDimitry Andric Register Imm1 = MI.getOperand(2).getReg(); 1568480093f4SDimitry Andric auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI); 1569480093f4SDimitry Andric if (!MaybeImmVal) 1570480093f4SDimitry Andric return false; 1571480093f4SDimitry Andric 1572480093f4SDimitry Andric MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2); 1573480093f4SDimitry Andric if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD) 1574480093f4SDimitry Andric return false; 1575480093f4SDimitry Andric 1576480093f4SDimitry Andric Register Base = Add2Def->getOperand(1).getReg(); 1577480093f4SDimitry Andric Register Imm2 = Add2Def->getOperand(2).getReg(); 1578480093f4SDimitry Andric auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI); 1579480093f4SDimitry Andric if (!MaybeImm2Val) 1580480093f4SDimitry Andric return false; 1581480093f4SDimitry Andric 1582480093f4SDimitry Andric // Pass the combined immediate to the apply function. 1583*e8d8bef9SDimitry Andric MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue(); 1584480093f4SDimitry Andric MatchInfo.Base = Base; 1585480093f4SDimitry Andric return true; 1586480093f4SDimitry Andric } 1587480093f4SDimitry Andric 1588480093f4SDimitry Andric bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI, 1589480093f4SDimitry Andric PtrAddChain &MatchInfo) { 1590480093f4SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); 1591480093f4SDimitry Andric MachineIRBuilder MIB(MI); 1592480093f4SDimitry Andric LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg()); 1593480093f4SDimitry Andric auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm); 1594480093f4SDimitry Andric Observer.changingInstr(MI); 1595480093f4SDimitry Andric MI.getOperand(1).setReg(MatchInfo.Base); 1596480093f4SDimitry Andric MI.getOperand(2).setReg(NewOffset.getReg(0)); 1597480093f4SDimitry Andric Observer.changedInstr(MI); 1598480093f4SDimitry Andric return true; 1599480093f4SDimitry Andric } 1600480093f4SDimitry Andric 1601*e8d8bef9SDimitry Andric bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI, 1602*e8d8bef9SDimitry Andric RegisterImmPair &MatchInfo) { 1603*e8d8bef9SDimitry Andric // We're trying to match the following pattern with any of 1604*e8d8bef9SDimitry Andric // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions: 1605*e8d8bef9SDimitry Andric // %t1 = SHIFT %base, G_CONSTANT imm1 1606*e8d8bef9SDimitry Andric // %root = SHIFT %t1, G_CONSTANT imm2 1607*e8d8bef9SDimitry Andric // --> 1608*e8d8bef9SDimitry Andric // %root = SHIFT %base, G_CONSTANT (imm1 + imm2) 1609*e8d8bef9SDimitry Andric 1610*e8d8bef9SDimitry Andric unsigned Opcode = MI.getOpcode(); 1611*e8d8bef9SDimitry Andric assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || 1612*e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || 1613*e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_USHLSAT) && 1614*e8d8bef9SDimitry Andric "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); 1615*e8d8bef9SDimitry Andric 1616*e8d8bef9SDimitry Andric Register Shl2 = MI.getOperand(1).getReg(); 1617*e8d8bef9SDimitry Andric Register Imm1 = MI.getOperand(2).getReg(); 1618*e8d8bef9SDimitry Andric auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI); 1619*e8d8bef9SDimitry Andric if (!MaybeImmVal) 1620*e8d8bef9SDimitry Andric return false; 1621*e8d8bef9SDimitry Andric 1622*e8d8bef9SDimitry Andric MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2); 1623*e8d8bef9SDimitry Andric if (Shl2Def->getOpcode() != Opcode) 1624*e8d8bef9SDimitry Andric return false; 1625*e8d8bef9SDimitry Andric 1626*e8d8bef9SDimitry Andric Register Base = Shl2Def->getOperand(1).getReg(); 1627*e8d8bef9SDimitry Andric Register Imm2 = Shl2Def->getOperand(2).getReg(); 1628*e8d8bef9SDimitry Andric auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI); 1629*e8d8bef9SDimitry Andric if (!MaybeImm2Val) 1630*e8d8bef9SDimitry Andric return false; 1631*e8d8bef9SDimitry Andric 1632*e8d8bef9SDimitry Andric // Pass the combined immediate to the apply function. 1633*e8d8bef9SDimitry Andric MatchInfo.Imm = 1634*e8d8bef9SDimitry Andric (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue(); 1635*e8d8bef9SDimitry Andric MatchInfo.Reg = Base; 1636*e8d8bef9SDimitry Andric 1637*e8d8bef9SDimitry Andric // There is no simple replacement for a saturating unsigned left shift that 1638*e8d8bef9SDimitry Andric // exceeds the scalar size. 1639*e8d8bef9SDimitry Andric if (Opcode == TargetOpcode::G_USHLSAT && 1640*e8d8bef9SDimitry Andric MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits()) 1641*e8d8bef9SDimitry Andric return false; 1642*e8d8bef9SDimitry Andric 1643*e8d8bef9SDimitry Andric return true; 1644*e8d8bef9SDimitry Andric } 1645*e8d8bef9SDimitry Andric 1646*e8d8bef9SDimitry Andric bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI, 1647*e8d8bef9SDimitry Andric RegisterImmPair &MatchInfo) { 1648*e8d8bef9SDimitry Andric unsigned Opcode = MI.getOpcode(); 1649*e8d8bef9SDimitry Andric assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || 1650*e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || 1651*e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_USHLSAT) && 1652*e8d8bef9SDimitry Andric "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); 1653*e8d8bef9SDimitry Andric 1654*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1655*e8d8bef9SDimitry Andric LLT Ty = MRI.getType(MI.getOperand(1).getReg()); 1656*e8d8bef9SDimitry Andric unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits(); 1657*e8d8bef9SDimitry Andric auto Imm = MatchInfo.Imm; 1658*e8d8bef9SDimitry Andric 1659*e8d8bef9SDimitry Andric if (Imm >= ScalarSizeInBits) { 1660*e8d8bef9SDimitry Andric // Any logical shift that exceeds scalar size will produce zero. 1661*e8d8bef9SDimitry Andric if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) { 1662*e8d8bef9SDimitry Andric Builder.buildConstant(MI.getOperand(0), 0); 1663*e8d8bef9SDimitry Andric MI.eraseFromParent(); 1664*e8d8bef9SDimitry Andric return true; 1665*e8d8bef9SDimitry Andric } 1666*e8d8bef9SDimitry Andric // Arithmetic shift and saturating signed left shift have no effect beyond 1667*e8d8bef9SDimitry Andric // scalar size. 1668*e8d8bef9SDimitry Andric Imm = ScalarSizeInBits - 1; 1669*e8d8bef9SDimitry Andric } 1670*e8d8bef9SDimitry Andric 1671*e8d8bef9SDimitry Andric LLT ImmTy = MRI.getType(MI.getOperand(2).getReg()); 1672*e8d8bef9SDimitry Andric Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0); 1673*e8d8bef9SDimitry Andric Observer.changingInstr(MI); 1674*e8d8bef9SDimitry Andric MI.getOperand(1).setReg(MatchInfo.Reg); 1675*e8d8bef9SDimitry Andric MI.getOperand(2).setReg(NewImm); 1676*e8d8bef9SDimitry Andric Observer.changedInstr(MI); 1677*e8d8bef9SDimitry Andric return true; 1678*e8d8bef9SDimitry Andric } 1679*e8d8bef9SDimitry Andric 1680*e8d8bef9SDimitry Andric bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI, 1681*e8d8bef9SDimitry Andric ShiftOfShiftedLogic &MatchInfo) { 1682*e8d8bef9SDimitry Andric // We're trying to match the following pattern with any of 1683*e8d8bef9SDimitry Andric // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination 1684*e8d8bef9SDimitry Andric // with any of G_AND/G_OR/G_XOR logic instructions. 1685*e8d8bef9SDimitry Andric // %t1 = SHIFT %X, G_CONSTANT C0 1686*e8d8bef9SDimitry Andric // %t2 = LOGIC %t1, %Y 1687*e8d8bef9SDimitry Andric // %root = SHIFT %t2, G_CONSTANT C1 1688*e8d8bef9SDimitry Andric // --> 1689*e8d8bef9SDimitry Andric // %t3 = SHIFT %X, G_CONSTANT (C0+C1) 1690*e8d8bef9SDimitry Andric // %t4 = SHIFT %Y, G_CONSTANT C1 1691*e8d8bef9SDimitry Andric // %root = LOGIC %t3, %t4 1692*e8d8bef9SDimitry Andric unsigned ShiftOpcode = MI.getOpcode(); 1693*e8d8bef9SDimitry Andric assert((ShiftOpcode == TargetOpcode::G_SHL || 1694*e8d8bef9SDimitry Andric ShiftOpcode == TargetOpcode::G_ASHR || 1695*e8d8bef9SDimitry Andric ShiftOpcode == TargetOpcode::G_LSHR || 1696*e8d8bef9SDimitry Andric ShiftOpcode == TargetOpcode::G_USHLSAT || 1697*e8d8bef9SDimitry Andric ShiftOpcode == TargetOpcode::G_SSHLSAT) && 1698*e8d8bef9SDimitry Andric "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT"); 1699*e8d8bef9SDimitry Andric 1700*e8d8bef9SDimitry Andric // Match a one-use bitwise logic op. 1701*e8d8bef9SDimitry Andric Register LogicDest = MI.getOperand(1).getReg(); 1702*e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(LogicDest)) 1703*e8d8bef9SDimitry Andric return false; 1704*e8d8bef9SDimitry Andric 1705*e8d8bef9SDimitry Andric MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest); 1706*e8d8bef9SDimitry Andric unsigned LogicOpcode = LogicMI->getOpcode(); 1707*e8d8bef9SDimitry Andric if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR && 1708*e8d8bef9SDimitry Andric LogicOpcode != TargetOpcode::G_XOR) 1709*e8d8bef9SDimitry Andric return false; 1710*e8d8bef9SDimitry Andric 1711*e8d8bef9SDimitry Andric // Find a matching one-use shift by constant. 1712*e8d8bef9SDimitry Andric const Register C1 = MI.getOperand(2).getReg(); 1713*e8d8bef9SDimitry Andric auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI); 1714*e8d8bef9SDimitry Andric if (!MaybeImmVal) 1715*e8d8bef9SDimitry Andric return false; 1716*e8d8bef9SDimitry Andric 1717*e8d8bef9SDimitry Andric const uint64_t C1Val = MaybeImmVal->Value.getZExtValue(); 1718*e8d8bef9SDimitry Andric 1719*e8d8bef9SDimitry Andric auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) { 1720*e8d8bef9SDimitry Andric // Shift should match previous one and should be a one-use. 1721*e8d8bef9SDimitry Andric if (MI->getOpcode() != ShiftOpcode || 1722*e8d8bef9SDimitry Andric !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 1723*e8d8bef9SDimitry Andric return false; 1724*e8d8bef9SDimitry Andric 1725*e8d8bef9SDimitry Andric // Must be a constant. 1726*e8d8bef9SDimitry Andric auto MaybeImmVal = 1727*e8d8bef9SDimitry Andric getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); 1728*e8d8bef9SDimitry Andric if (!MaybeImmVal) 1729*e8d8bef9SDimitry Andric return false; 1730*e8d8bef9SDimitry Andric 1731*e8d8bef9SDimitry Andric ShiftVal = MaybeImmVal->Value.getSExtValue(); 1732*e8d8bef9SDimitry Andric return true; 1733*e8d8bef9SDimitry Andric }; 1734*e8d8bef9SDimitry Andric 1735*e8d8bef9SDimitry Andric // Logic ops are commutative, so check each operand for a match. 1736*e8d8bef9SDimitry Andric Register LogicMIReg1 = LogicMI->getOperand(1).getReg(); 1737*e8d8bef9SDimitry Andric MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1); 1738*e8d8bef9SDimitry Andric Register LogicMIReg2 = LogicMI->getOperand(2).getReg(); 1739*e8d8bef9SDimitry Andric MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2); 1740*e8d8bef9SDimitry Andric uint64_t C0Val; 1741*e8d8bef9SDimitry Andric 1742*e8d8bef9SDimitry Andric if (matchFirstShift(LogicMIOp1, C0Val)) { 1743*e8d8bef9SDimitry Andric MatchInfo.LogicNonShiftReg = LogicMIReg2; 1744*e8d8bef9SDimitry Andric MatchInfo.Shift2 = LogicMIOp1; 1745*e8d8bef9SDimitry Andric } else if (matchFirstShift(LogicMIOp2, C0Val)) { 1746*e8d8bef9SDimitry Andric MatchInfo.LogicNonShiftReg = LogicMIReg1; 1747*e8d8bef9SDimitry Andric MatchInfo.Shift2 = LogicMIOp2; 1748*e8d8bef9SDimitry Andric } else 1749*e8d8bef9SDimitry Andric return false; 1750*e8d8bef9SDimitry Andric 1751*e8d8bef9SDimitry Andric MatchInfo.ValSum = C0Val + C1Val; 1752*e8d8bef9SDimitry Andric 1753*e8d8bef9SDimitry Andric // The fold is not valid if the sum of the shift values exceeds bitwidth. 1754*e8d8bef9SDimitry Andric if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits()) 1755*e8d8bef9SDimitry Andric return false; 1756*e8d8bef9SDimitry Andric 1757*e8d8bef9SDimitry Andric MatchInfo.Logic = LogicMI; 1758*e8d8bef9SDimitry Andric return true; 1759*e8d8bef9SDimitry Andric } 1760*e8d8bef9SDimitry Andric 1761*e8d8bef9SDimitry Andric bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, 1762*e8d8bef9SDimitry Andric ShiftOfShiftedLogic &MatchInfo) { 1763*e8d8bef9SDimitry Andric unsigned Opcode = MI.getOpcode(); 1764*e8d8bef9SDimitry Andric assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || 1765*e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT || 1766*e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_SSHLSAT) && 1767*e8d8bef9SDimitry Andric "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT"); 1768*e8d8bef9SDimitry Andric 1769*e8d8bef9SDimitry Andric LLT ShlType = MRI.getType(MI.getOperand(2).getReg()); 1770*e8d8bef9SDimitry Andric LLT DestType = MRI.getType(MI.getOperand(0).getReg()); 1771*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1772*e8d8bef9SDimitry Andric 1773*e8d8bef9SDimitry Andric Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0); 1774*e8d8bef9SDimitry Andric 1775*e8d8bef9SDimitry Andric Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg(); 1776*e8d8bef9SDimitry Andric Register Shift1 = 1777*e8d8bef9SDimitry Andric Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0); 1778*e8d8bef9SDimitry Andric 1779*e8d8bef9SDimitry Andric Register Shift2Const = MI.getOperand(2).getReg(); 1780*e8d8bef9SDimitry Andric Register Shift2 = Builder 1781*e8d8bef9SDimitry Andric .buildInstr(Opcode, {DestType}, 1782*e8d8bef9SDimitry Andric {MatchInfo.LogicNonShiftReg, Shift2Const}) 1783*e8d8bef9SDimitry Andric .getReg(0); 1784*e8d8bef9SDimitry Andric 1785*e8d8bef9SDimitry Andric Register Dest = MI.getOperand(0).getReg(); 1786*e8d8bef9SDimitry Andric Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2}); 1787*e8d8bef9SDimitry Andric 1788*e8d8bef9SDimitry Andric // These were one use so it's safe to remove them. 1789*e8d8bef9SDimitry Andric MatchInfo.Shift2->eraseFromParent(); 1790*e8d8bef9SDimitry Andric MatchInfo.Logic->eraseFromParent(); 1791*e8d8bef9SDimitry Andric 1792*e8d8bef9SDimitry Andric MI.eraseFromParent(); 1793*e8d8bef9SDimitry Andric return true; 1794*e8d8bef9SDimitry Andric } 1795*e8d8bef9SDimitry Andric 17965ffd83dbSDimitry Andric bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, 17975ffd83dbSDimitry Andric unsigned &ShiftVal) { 17985ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); 17995ffd83dbSDimitry Andric auto MaybeImmVal = 18005ffd83dbSDimitry Andric getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); 1801*e8d8bef9SDimitry Andric if (!MaybeImmVal) 18025ffd83dbSDimitry Andric return false; 1803*e8d8bef9SDimitry Andric 1804*e8d8bef9SDimitry Andric ShiftVal = MaybeImmVal->Value.exactLogBase2(); 1805*e8d8bef9SDimitry Andric return (static_cast<int32_t>(ShiftVal) != -1); 18065ffd83dbSDimitry Andric } 18075ffd83dbSDimitry Andric 18085ffd83dbSDimitry Andric bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI, 18095ffd83dbSDimitry Andric unsigned &ShiftVal) { 18105ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); 18115ffd83dbSDimitry Andric MachineIRBuilder MIB(MI); 18125ffd83dbSDimitry Andric LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg()); 18135ffd83dbSDimitry Andric auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal); 18145ffd83dbSDimitry Andric Observer.changingInstr(MI); 18155ffd83dbSDimitry Andric MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL)); 18165ffd83dbSDimitry Andric MI.getOperand(2).setReg(ShiftCst.getReg(0)); 18175ffd83dbSDimitry Andric Observer.changedInstr(MI); 18185ffd83dbSDimitry Andric return true; 18195ffd83dbSDimitry Andric } 18205ffd83dbSDimitry Andric 1821*e8d8bef9SDimitry Andric // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source 1822*e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, 1823*e8d8bef9SDimitry Andric RegisterImmPair &MatchData) { 1824*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHL && KB); 1825*e8d8bef9SDimitry Andric 1826*e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 1827*e8d8bef9SDimitry Andric 1828*e8d8bef9SDimitry Andric Register ExtSrc; 1829*e8d8bef9SDimitry Andric if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) && 1830*e8d8bef9SDimitry Andric !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) && 1831*e8d8bef9SDimitry Andric !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc)))) 1832*e8d8bef9SDimitry Andric return false; 1833*e8d8bef9SDimitry Andric 1834*e8d8bef9SDimitry Andric // TODO: Should handle vector splat. 1835*e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 1836*e8d8bef9SDimitry Andric auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI); 1837*e8d8bef9SDimitry Andric if (!MaybeShiftAmtVal) 1838*e8d8bef9SDimitry Andric return false; 1839*e8d8bef9SDimitry Andric 1840*e8d8bef9SDimitry Andric if (LI) { 1841*e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(ExtSrc); 1842*e8d8bef9SDimitry Andric 1843*e8d8bef9SDimitry Andric // We only really care about the legality with the shifted value. We can 1844*e8d8bef9SDimitry Andric // pick any type the constant shift amount, so ask the target what to 1845*e8d8bef9SDimitry Andric // use. Otherwise we would have to guess and hope it is reported as legal. 1846*e8d8bef9SDimitry Andric LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy); 1847*e8d8bef9SDimitry Andric if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}})) 1848*e8d8bef9SDimitry Andric return false; 1849*e8d8bef9SDimitry Andric } 1850*e8d8bef9SDimitry Andric 1851*e8d8bef9SDimitry Andric int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue(); 1852*e8d8bef9SDimitry Andric MatchData.Reg = ExtSrc; 1853*e8d8bef9SDimitry Andric MatchData.Imm = ShiftAmt; 1854*e8d8bef9SDimitry Andric 1855*e8d8bef9SDimitry Andric unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes(); 1856*e8d8bef9SDimitry Andric return MinLeadingZeros >= ShiftAmt; 1857*e8d8bef9SDimitry Andric } 1858*e8d8bef9SDimitry Andric 1859*e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, 1860*e8d8bef9SDimitry Andric const RegisterImmPair &MatchData) { 1861*e8d8bef9SDimitry Andric Register ExtSrcReg = MatchData.Reg; 1862*e8d8bef9SDimitry Andric int64_t ShiftAmtVal = MatchData.Imm; 1863*e8d8bef9SDimitry Andric 1864*e8d8bef9SDimitry Andric LLT ExtSrcTy = MRI.getType(ExtSrcReg); 1865*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1866*e8d8bef9SDimitry Andric auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal); 1867*e8d8bef9SDimitry Andric auto NarrowShift = 1868*e8d8bef9SDimitry Andric Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags()); 1869*e8d8bef9SDimitry Andric Builder.buildZExt(MI.getOperand(0), NarrowShift); 1870*e8d8bef9SDimitry Andric MI.eraseFromParent(); 1871*e8d8bef9SDimitry Andric return true; 1872*e8d8bef9SDimitry Andric } 1873*e8d8bef9SDimitry Andric 1874*e8d8bef9SDimitry Andric static Register peekThroughBitcast(Register Reg, 1875*e8d8bef9SDimitry Andric const MachineRegisterInfo &MRI) { 1876*e8d8bef9SDimitry Andric while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg)))) 1877*e8d8bef9SDimitry Andric ; 1878*e8d8bef9SDimitry Andric 1879*e8d8bef9SDimitry Andric return Reg; 1880*e8d8bef9SDimitry Andric } 1881*e8d8bef9SDimitry Andric 1882*e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineUnmergeMergeToPlainValues( 1883*e8d8bef9SDimitry Andric MachineInstr &MI, SmallVectorImpl<Register> &Operands) { 1884*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 1885*e8d8bef9SDimitry Andric "Expected an unmerge"); 1886*e8d8bef9SDimitry Andric Register SrcReg = 1887*e8d8bef9SDimitry Andric peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI); 1888*e8d8bef9SDimitry Andric 1889*e8d8bef9SDimitry Andric MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); 1890*e8d8bef9SDimitry Andric if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES && 1891*e8d8bef9SDimitry Andric SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR && 1892*e8d8bef9SDimitry Andric SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS) 1893*e8d8bef9SDimitry Andric return false; 1894*e8d8bef9SDimitry Andric 1895*e8d8bef9SDimitry Andric // Check the source type of the merge. 1896*e8d8bef9SDimitry Andric LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg()); 1897*e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); 1898*e8d8bef9SDimitry Andric bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits(); 1899*e8d8bef9SDimitry Andric if (SrcMergeTy != Dst0Ty && !SameSize) 1900*e8d8bef9SDimitry Andric return false; 1901*e8d8bef9SDimitry Andric // They are the same now (modulo a bitcast). 1902*e8d8bef9SDimitry Andric // We can collect all the src registers. 1903*e8d8bef9SDimitry Andric for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx; 1904*e8d8bef9SDimitry Andric ++Idx) 1905*e8d8bef9SDimitry Andric Operands.push_back(SrcInstr->getOperand(Idx).getReg()); 1906*e8d8bef9SDimitry Andric return true; 1907*e8d8bef9SDimitry Andric } 1908*e8d8bef9SDimitry Andric 1909*e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineUnmergeMergeToPlainValues( 1910*e8d8bef9SDimitry Andric MachineInstr &MI, SmallVectorImpl<Register> &Operands) { 1911*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 1912*e8d8bef9SDimitry Andric "Expected an unmerge"); 1913*e8d8bef9SDimitry Andric assert((MI.getNumOperands() - 1 == Operands.size()) && 1914*e8d8bef9SDimitry Andric "Not enough operands to replace all defs"); 1915*e8d8bef9SDimitry Andric unsigned NumElems = MI.getNumOperands() - 1; 1916*e8d8bef9SDimitry Andric 1917*e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(Operands[0]); 1918*e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 1919*e8d8bef9SDimitry Andric bool CanReuseInputDirectly = DstTy == SrcTy; 1920*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1921*e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < NumElems; ++Idx) { 1922*e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(Idx).getReg(); 1923*e8d8bef9SDimitry Andric Register SrcReg = Operands[Idx]; 1924*e8d8bef9SDimitry Andric if (CanReuseInputDirectly) 1925*e8d8bef9SDimitry Andric replaceRegWith(MRI, DstReg, SrcReg); 1926*e8d8bef9SDimitry Andric else 1927*e8d8bef9SDimitry Andric Builder.buildCast(DstReg, SrcReg); 1928*e8d8bef9SDimitry Andric } 1929*e8d8bef9SDimitry Andric MI.eraseFromParent(); 1930*e8d8bef9SDimitry Andric return true; 1931*e8d8bef9SDimitry Andric } 1932*e8d8bef9SDimitry Andric 1933*e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI, 1934*e8d8bef9SDimitry Andric SmallVectorImpl<APInt> &Csts) { 1935*e8d8bef9SDimitry Andric unsigned SrcIdx = MI.getNumOperands() - 1; 1936*e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(SrcIdx).getReg(); 1937*e8d8bef9SDimitry Andric MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); 1938*e8d8bef9SDimitry Andric if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT && 1939*e8d8bef9SDimitry Andric SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT) 1940*e8d8bef9SDimitry Andric return false; 1941*e8d8bef9SDimitry Andric // Break down the big constant in smaller ones. 1942*e8d8bef9SDimitry Andric const MachineOperand &CstVal = SrcInstr->getOperand(1); 1943*e8d8bef9SDimitry Andric APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT 1944*e8d8bef9SDimitry Andric ? CstVal.getCImm()->getValue() 1945*e8d8bef9SDimitry Andric : CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); 1946*e8d8bef9SDimitry Andric 1947*e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); 1948*e8d8bef9SDimitry Andric unsigned ShiftAmt = Dst0Ty.getSizeInBits(); 1949*e8d8bef9SDimitry Andric // Unmerge a constant. 1950*e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) { 1951*e8d8bef9SDimitry Andric Csts.emplace_back(Val.trunc(ShiftAmt)); 1952*e8d8bef9SDimitry Andric Val = Val.lshr(ShiftAmt); 1953*e8d8bef9SDimitry Andric } 1954*e8d8bef9SDimitry Andric 1955*e8d8bef9SDimitry Andric return true; 1956*e8d8bef9SDimitry Andric } 1957*e8d8bef9SDimitry Andric 1958*e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, 1959*e8d8bef9SDimitry Andric SmallVectorImpl<APInt> &Csts) { 1960*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 1961*e8d8bef9SDimitry Andric "Expected an unmerge"); 1962*e8d8bef9SDimitry Andric assert((MI.getNumOperands() - 1 == Csts.size()) && 1963*e8d8bef9SDimitry Andric "Not enough operands to replace all defs"); 1964*e8d8bef9SDimitry Andric unsigned NumElems = MI.getNumOperands() - 1; 1965*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1966*e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < NumElems; ++Idx) { 1967*e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(Idx).getReg(); 1968*e8d8bef9SDimitry Andric Builder.buildConstant(DstReg, Csts[Idx]); 1969*e8d8bef9SDimitry Andric } 1970*e8d8bef9SDimitry Andric 1971*e8d8bef9SDimitry Andric MI.eraseFromParent(); 1972*e8d8bef9SDimitry Andric return true; 1973*e8d8bef9SDimitry Andric } 1974*e8d8bef9SDimitry Andric 1975*e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { 1976*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 1977*e8d8bef9SDimitry Andric "Expected an unmerge"); 1978*e8d8bef9SDimitry Andric // Check that all the lanes are dead except the first one. 1979*e8d8bef9SDimitry Andric for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { 1980*e8d8bef9SDimitry Andric if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg())) 1981*e8d8bef9SDimitry Andric return false; 1982*e8d8bef9SDimitry Andric } 1983*e8d8bef9SDimitry Andric return true; 1984*e8d8bef9SDimitry Andric } 1985*e8d8bef9SDimitry Andric 1986*e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { 1987*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1988*e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); 1989*e8d8bef9SDimitry Andric // Truncating a vector is going to truncate every single lane, 1990*e8d8bef9SDimitry Andric // whereas we want the full lowbits. 1991*e8d8bef9SDimitry Andric // Do the operation on a scalar instead. 1992*e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 1993*e8d8bef9SDimitry Andric if (SrcTy.isVector()) 1994*e8d8bef9SDimitry Andric SrcReg = 1995*e8d8bef9SDimitry Andric Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0); 1996*e8d8bef9SDimitry Andric 1997*e8d8bef9SDimitry Andric Register Dst0Reg = MI.getOperand(0).getReg(); 1998*e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(Dst0Reg); 1999*e8d8bef9SDimitry Andric if (Dst0Ty.isVector()) { 2000*e8d8bef9SDimitry Andric auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg); 2001*e8d8bef9SDimitry Andric Builder.buildCast(Dst0Reg, MIB); 2002*e8d8bef9SDimitry Andric } else 2003*e8d8bef9SDimitry Andric Builder.buildTrunc(Dst0Reg, SrcReg); 2004*e8d8bef9SDimitry Andric MI.eraseFromParent(); 2005*e8d8bef9SDimitry Andric return true; 2006*e8d8bef9SDimitry Andric } 2007*e8d8bef9SDimitry Andric 2008*e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) { 2009*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 2010*e8d8bef9SDimitry Andric "Expected an unmerge"); 2011*e8d8bef9SDimitry Andric Register Dst0Reg = MI.getOperand(0).getReg(); 2012*e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(Dst0Reg); 2013*e8d8bef9SDimitry Andric // G_ZEXT on vector applies to each lane, so it will 2014*e8d8bef9SDimitry Andric // affect all destinations. Therefore we won't be able 2015*e8d8bef9SDimitry Andric // to simplify the unmerge to just the first definition. 2016*e8d8bef9SDimitry Andric if (Dst0Ty.isVector()) 2017*e8d8bef9SDimitry Andric return false; 2018*e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); 2019*e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 2020*e8d8bef9SDimitry Andric if (SrcTy.isVector()) 2021*e8d8bef9SDimitry Andric return false; 2022*e8d8bef9SDimitry Andric 2023*e8d8bef9SDimitry Andric Register ZExtSrcReg; 2024*e8d8bef9SDimitry Andric if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg)))) 2025*e8d8bef9SDimitry Andric return false; 2026*e8d8bef9SDimitry Andric 2027*e8d8bef9SDimitry Andric // Finally we can replace the first definition with 2028*e8d8bef9SDimitry Andric // a zext of the source if the definition is big enough to hold 2029*e8d8bef9SDimitry Andric // all of ZExtSrc bits. 2030*e8d8bef9SDimitry Andric LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); 2031*e8d8bef9SDimitry Andric return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits(); 2032*e8d8bef9SDimitry Andric } 2033*e8d8bef9SDimitry Andric 2034*e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) { 2035*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 2036*e8d8bef9SDimitry Andric "Expected an unmerge"); 2037*e8d8bef9SDimitry Andric 2038*e8d8bef9SDimitry Andric Register Dst0Reg = MI.getOperand(0).getReg(); 2039*e8d8bef9SDimitry Andric 2040*e8d8bef9SDimitry Andric MachineInstr *ZExtInstr = 2041*e8d8bef9SDimitry Andric MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg()); 2042*e8d8bef9SDimitry Andric assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT && 2043*e8d8bef9SDimitry Andric "Expecting a G_ZEXT"); 2044*e8d8bef9SDimitry Andric 2045*e8d8bef9SDimitry Andric Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg(); 2046*e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(Dst0Reg); 2047*e8d8bef9SDimitry Andric LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); 2048*e8d8bef9SDimitry Andric 2049*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2050*e8d8bef9SDimitry Andric 2051*e8d8bef9SDimitry Andric if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) { 2052*e8d8bef9SDimitry Andric Builder.buildZExt(Dst0Reg, ZExtSrcReg); 2053*e8d8bef9SDimitry Andric } else { 2054*e8d8bef9SDimitry Andric assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() && 2055*e8d8bef9SDimitry Andric "ZExt src doesn't fit in destination"); 2056*e8d8bef9SDimitry Andric replaceRegWith(MRI, Dst0Reg, ZExtSrcReg); 2057*e8d8bef9SDimitry Andric } 2058*e8d8bef9SDimitry Andric 2059*e8d8bef9SDimitry Andric Register ZeroReg; 2060*e8d8bef9SDimitry Andric for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { 2061*e8d8bef9SDimitry Andric if (!ZeroReg) 2062*e8d8bef9SDimitry Andric ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0); 2063*e8d8bef9SDimitry Andric replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg); 2064*e8d8bef9SDimitry Andric } 2065*e8d8bef9SDimitry Andric MI.eraseFromParent(); 2066*e8d8bef9SDimitry Andric return true; 2067*e8d8bef9SDimitry Andric } 2068*e8d8bef9SDimitry Andric 20695ffd83dbSDimitry Andric bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, 20705ffd83dbSDimitry Andric unsigned TargetShiftSize, 20715ffd83dbSDimitry Andric unsigned &ShiftVal) { 20725ffd83dbSDimitry Andric assert((MI.getOpcode() == TargetOpcode::G_SHL || 20735ffd83dbSDimitry Andric MI.getOpcode() == TargetOpcode::G_LSHR || 20745ffd83dbSDimitry Andric MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift"); 20755ffd83dbSDimitry Andric 20765ffd83dbSDimitry Andric LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 20775ffd83dbSDimitry Andric if (Ty.isVector()) // TODO: 20785ffd83dbSDimitry Andric return false; 20795ffd83dbSDimitry Andric 20805ffd83dbSDimitry Andric // Don't narrow further than the requested size. 20815ffd83dbSDimitry Andric unsigned Size = Ty.getSizeInBits(); 20825ffd83dbSDimitry Andric if (Size <= TargetShiftSize) 20835ffd83dbSDimitry Andric return false; 20845ffd83dbSDimitry Andric 20855ffd83dbSDimitry Andric auto MaybeImmVal = 20865ffd83dbSDimitry Andric getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); 20875ffd83dbSDimitry Andric if (!MaybeImmVal) 20885ffd83dbSDimitry Andric return false; 20895ffd83dbSDimitry Andric 2090*e8d8bef9SDimitry Andric ShiftVal = MaybeImmVal->Value.getSExtValue(); 20915ffd83dbSDimitry Andric return ShiftVal >= Size / 2 && ShiftVal < Size; 20925ffd83dbSDimitry Andric } 20935ffd83dbSDimitry Andric 20945ffd83dbSDimitry Andric bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI, 20955ffd83dbSDimitry Andric const unsigned &ShiftVal) { 20965ffd83dbSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 20975ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 20985ffd83dbSDimitry Andric LLT Ty = MRI.getType(SrcReg); 20995ffd83dbSDimitry Andric unsigned Size = Ty.getSizeInBits(); 21005ffd83dbSDimitry Andric unsigned HalfSize = Size / 2; 21015ffd83dbSDimitry Andric assert(ShiftVal >= HalfSize); 21025ffd83dbSDimitry Andric 21035ffd83dbSDimitry Andric LLT HalfTy = LLT::scalar(HalfSize); 21045ffd83dbSDimitry Andric 21055ffd83dbSDimitry Andric Builder.setInstr(MI); 21065ffd83dbSDimitry Andric auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg); 21075ffd83dbSDimitry Andric unsigned NarrowShiftAmt = ShiftVal - HalfSize; 21085ffd83dbSDimitry Andric 21095ffd83dbSDimitry Andric if (MI.getOpcode() == TargetOpcode::G_LSHR) { 21105ffd83dbSDimitry Andric Register Narrowed = Unmerge.getReg(1); 21115ffd83dbSDimitry Andric 21125ffd83dbSDimitry Andric // dst = G_LSHR s64:x, C for C >= 32 21135ffd83dbSDimitry Andric // => 21145ffd83dbSDimitry Andric // lo, hi = G_UNMERGE_VALUES x 21155ffd83dbSDimitry Andric // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0 21165ffd83dbSDimitry Andric 21175ffd83dbSDimitry Andric if (NarrowShiftAmt != 0) { 21185ffd83dbSDimitry Andric Narrowed = Builder.buildLShr(HalfTy, Narrowed, 21195ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0); 21205ffd83dbSDimitry Andric } 21215ffd83dbSDimitry Andric 21225ffd83dbSDimitry Andric auto Zero = Builder.buildConstant(HalfTy, 0); 21235ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Narrowed, Zero }); 21245ffd83dbSDimitry Andric } else if (MI.getOpcode() == TargetOpcode::G_SHL) { 21255ffd83dbSDimitry Andric Register Narrowed = Unmerge.getReg(0); 21265ffd83dbSDimitry Andric // dst = G_SHL s64:x, C for C >= 32 21275ffd83dbSDimitry Andric // => 21285ffd83dbSDimitry Andric // lo, hi = G_UNMERGE_VALUES x 21295ffd83dbSDimitry Andric // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32) 21305ffd83dbSDimitry Andric if (NarrowShiftAmt != 0) { 21315ffd83dbSDimitry Andric Narrowed = Builder.buildShl(HalfTy, Narrowed, 21325ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0); 21335ffd83dbSDimitry Andric } 21345ffd83dbSDimitry Andric 21355ffd83dbSDimitry Andric auto Zero = Builder.buildConstant(HalfTy, 0); 21365ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Zero, Narrowed }); 21375ffd83dbSDimitry Andric } else { 21385ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ASHR); 21395ffd83dbSDimitry Andric auto Hi = Builder.buildAShr( 21405ffd83dbSDimitry Andric HalfTy, Unmerge.getReg(1), 21415ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, HalfSize - 1)); 21425ffd83dbSDimitry Andric 21435ffd83dbSDimitry Andric if (ShiftVal == HalfSize) { 21445ffd83dbSDimitry Andric // (G_ASHR i64:x, 32) -> 21455ffd83dbSDimitry Andric // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31) 21465ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi }); 21475ffd83dbSDimitry Andric } else if (ShiftVal == Size - 1) { 21485ffd83dbSDimitry Andric // Don't need a second shift. 21495ffd83dbSDimitry Andric // (G_ASHR i64:x, 63) -> 21505ffd83dbSDimitry Andric // %narrowed = (G_ASHR hi_32(x), 31) 21515ffd83dbSDimitry Andric // G_MERGE_VALUES %narrowed, %narrowed 21525ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Hi, Hi }); 21535ffd83dbSDimitry Andric } else { 21545ffd83dbSDimitry Andric auto Lo = Builder.buildAShr( 21555ffd83dbSDimitry Andric HalfTy, Unmerge.getReg(1), 21565ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, ShiftVal - HalfSize)); 21575ffd83dbSDimitry Andric 21585ffd83dbSDimitry Andric // (G_ASHR i64:x, C) ->, for C >= 32 21595ffd83dbSDimitry Andric // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31) 21605ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Lo, Hi }); 21615ffd83dbSDimitry Andric } 21625ffd83dbSDimitry Andric } 21635ffd83dbSDimitry Andric 21645ffd83dbSDimitry Andric MI.eraseFromParent(); 21655ffd83dbSDimitry Andric return true; 21665ffd83dbSDimitry Andric } 21675ffd83dbSDimitry Andric 21685ffd83dbSDimitry Andric bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI, 21695ffd83dbSDimitry Andric unsigned TargetShiftAmount) { 21705ffd83dbSDimitry Andric unsigned ShiftAmt; 21715ffd83dbSDimitry Andric if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) { 21725ffd83dbSDimitry Andric applyCombineShiftToUnmerge(MI, ShiftAmt); 21735ffd83dbSDimitry Andric return true; 21745ffd83dbSDimitry Andric } 21755ffd83dbSDimitry Andric 21765ffd83dbSDimitry Andric return false; 21775ffd83dbSDimitry Andric } 21785ffd83dbSDimitry Andric 2179*e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) { 2180*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); 2181*e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2182*e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2183*e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2184*e8d8bef9SDimitry Andric return mi_match(SrcReg, MRI, 2185*e8d8bef9SDimitry Andric m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg)))); 2186*e8d8bef9SDimitry Andric } 2187*e8d8bef9SDimitry Andric 2188*e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) { 2189*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); 2190*e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2191*e8d8bef9SDimitry Andric Builder.setInstr(MI); 2192*e8d8bef9SDimitry Andric Builder.buildCopy(DstReg, Reg); 2193*e8d8bef9SDimitry Andric MI.eraseFromParent(); 2194*e8d8bef9SDimitry Andric return true; 2195*e8d8bef9SDimitry Andric } 2196*e8d8bef9SDimitry Andric 2197*e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) { 2198*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); 2199*e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2200*e8d8bef9SDimitry Andric return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg))); 2201*e8d8bef9SDimitry Andric } 2202*e8d8bef9SDimitry Andric 2203*e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) { 2204*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); 2205*e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2206*e8d8bef9SDimitry Andric Builder.setInstr(MI); 2207*e8d8bef9SDimitry Andric Builder.buildZExtOrTrunc(DstReg, Reg); 2208*e8d8bef9SDimitry Andric MI.eraseFromParent(); 2209*e8d8bef9SDimitry Andric return true; 2210*e8d8bef9SDimitry Andric } 2211*e8d8bef9SDimitry Andric 2212*e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineAddP2IToPtrAdd( 2213*e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, bool> &PtrReg) { 2214*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ADD); 2215*e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 2216*e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 2217*e8d8bef9SDimitry Andric LLT IntTy = MRI.getType(LHS); 2218*e8d8bef9SDimitry Andric 2219*e8d8bef9SDimitry Andric // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the 2220*e8d8bef9SDimitry Andric // instruction. 2221*e8d8bef9SDimitry Andric PtrReg.second = false; 2222*e8d8bef9SDimitry Andric for (Register SrcReg : {LHS, RHS}) { 2223*e8d8bef9SDimitry Andric if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) { 2224*e8d8bef9SDimitry Andric // Don't handle cases where the integer is implicitly converted to the 2225*e8d8bef9SDimitry Andric // pointer width. 2226*e8d8bef9SDimitry Andric LLT PtrTy = MRI.getType(PtrReg.first); 2227*e8d8bef9SDimitry Andric if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits()) 2228*e8d8bef9SDimitry Andric return true; 2229*e8d8bef9SDimitry Andric } 2230*e8d8bef9SDimitry Andric 2231*e8d8bef9SDimitry Andric PtrReg.second = true; 2232*e8d8bef9SDimitry Andric } 2233*e8d8bef9SDimitry Andric 2234*e8d8bef9SDimitry Andric return false; 2235*e8d8bef9SDimitry Andric } 2236*e8d8bef9SDimitry Andric 2237*e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineAddP2IToPtrAdd( 2238*e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, bool> &PtrReg) { 2239*e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 2240*e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 2241*e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 2242*e8d8bef9SDimitry Andric 2243*e8d8bef9SDimitry Andric const bool DoCommute = PtrReg.second; 2244*e8d8bef9SDimitry Andric if (DoCommute) 2245*e8d8bef9SDimitry Andric std::swap(LHS, RHS); 2246*e8d8bef9SDimitry Andric LHS = PtrReg.first; 2247*e8d8bef9SDimitry Andric 2248*e8d8bef9SDimitry Andric LLT PtrTy = MRI.getType(LHS); 2249*e8d8bef9SDimitry Andric 2250*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2251*e8d8bef9SDimitry Andric auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS); 2252*e8d8bef9SDimitry Andric Builder.buildPtrToInt(Dst, PtrAdd); 2253*e8d8bef9SDimitry Andric MI.eraseFromParent(); 2254*e8d8bef9SDimitry Andric return true; 2255*e8d8bef9SDimitry Andric } 2256*e8d8bef9SDimitry Andric 2257*e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, 2258*e8d8bef9SDimitry Andric int64_t &NewCst) { 2259*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); 2260*e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 2261*e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 2262*e8d8bef9SDimitry Andric MachineRegisterInfo &MRI = Builder.getMF().getRegInfo(); 2263*e8d8bef9SDimitry Andric 2264*e8d8bef9SDimitry Andric if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) { 2265*e8d8bef9SDimitry Andric int64_t Cst; 2266*e8d8bef9SDimitry Andric if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) { 2267*e8d8bef9SDimitry Andric NewCst = Cst + *RHSCst; 2268*e8d8bef9SDimitry Andric return true; 2269*e8d8bef9SDimitry Andric } 2270*e8d8bef9SDimitry Andric } 2271*e8d8bef9SDimitry Andric 2272*e8d8bef9SDimitry Andric return false; 2273*e8d8bef9SDimitry Andric } 2274*e8d8bef9SDimitry Andric 2275*e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI, 2276*e8d8bef9SDimitry Andric int64_t &NewCst) { 2277*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); 2278*e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 2279*e8d8bef9SDimitry Andric 2280*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2281*e8d8bef9SDimitry Andric Builder.buildConstant(Dst, NewCst); 2282*e8d8bef9SDimitry Andric MI.eraseFromParent(); 2283*e8d8bef9SDimitry Andric return true; 2284*e8d8bef9SDimitry Andric } 2285*e8d8bef9SDimitry Andric 2286*e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { 2287*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT"); 2288*e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2289*e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2290*e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2291*e8d8bef9SDimitry Andric return mi_match(SrcReg, MRI, 2292*e8d8bef9SDimitry Andric m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))); 2293*e8d8bef9SDimitry Andric } 2294*e8d8bef9SDimitry Andric 2295*e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { 2296*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT"); 2297*e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2298*e8d8bef9SDimitry Andric MI.eraseFromParent(); 2299*e8d8bef9SDimitry Andric replaceRegWith(MRI, DstReg, Reg); 2300*e8d8bef9SDimitry Andric return true; 2301*e8d8bef9SDimitry Andric } 2302*e8d8bef9SDimitry Andric 2303*e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineExtOfExt( 2304*e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 2305*e8d8bef9SDimitry Andric assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || 2306*e8d8bef9SDimitry Andric MI.getOpcode() == TargetOpcode::G_SEXT || 2307*e8d8bef9SDimitry Andric MI.getOpcode() == TargetOpcode::G_ZEXT) && 2308*e8d8bef9SDimitry Andric "Expected a G_[ASZ]EXT"); 2309*e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2310*e8d8bef9SDimitry Andric MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); 2311*e8d8bef9SDimitry Andric // Match exts with the same opcode, anyext([sz]ext) and sext(zext). 2312*e8d8bef9SDimitry Andric unsigned Opc = MI.getOpcode(); 2313*e8d8bef9SDimitry Andric unsigned SrcOpc = SrcMI->getOpcode(); 2314*e8d8bef9SDimitry Andric if (Opc == SrcOpc || 2315*e8d8bef9SDimitry Andric (Opc == TargetOpcode::G_ANYEXT && 2316*e8d8bef9SDimitry Andric (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) || 2317*e8d8bef9SDimitry Andric (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) { 2318*e8d8bef9SDimitry Andric MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc); 2319*e8d8bef9SDimitry Andric return true; 2320*e8d8bef9SDimitry Andric } 2321*e8d8bef9SDimitry Andric return false; 2322*e8d8bef9SDimitry Andric } 2323*e8d8bef9SDimitry Andric 2324*e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineExtOfExt( 2325*e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 2326*e8d8bef9SDimitry Andric assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || 2327*e8d8bef9SDimitry Andric MI.getOpcode() == TargetOpcode::G_SEXT || 2328*e8d8bef9SDimitry Andric MI.getOpcode() == TargetOpcode::G_ZEXT) && 2329*e8d8bef9SDimitry Andric "Expected a G_[ASZ]EXT"); 2330*e8d8bef9SDimitry Andric 2331*e8d8bef9SDimitry Andric Register Reg = std::get<0>(MatchInfo); 2332*e8d8bef9SDimitry Andric unsigned SrcExtOp = std::get<1>(MatchInfo); 2333*e8d8bef9SDimitry Andric 2334*e8d8bef9SDimitry Andric // Combine exts with the same opcode. 2335*e8d8bef9SDimitry Andric if (MI.getOpcode() == SrcExtOp) { 2336*e8d8bef9SDimitry Andric Observer.changingInstr(MI); 2337*e8d8bef9SDimitry Andric MI.getOperand(1).setReg(Reg); 2338*e8d8bef9SDimitry Andric Observer.changedInstr(MI); 2339*e8d8bef9SDimitry Andric return true; 2340*e8d8bef9SDimitry Andric } 2341*e8d8bef9SDimitry Andric 2342*e8d8bef9SDimitry Andric // Combine: 2343*e8d8bef9SDimitry Andric // - anyext([sz]ext x) to [sz]ext x 2344*e8d8bef9SDimitry Andric // - sext(zext x) to zext x 2345*e8d8bef9SDimitry Andric if (MI.getOpcode() == TargetOpcode::G_ANYEXT || 2346*e8d8bef9SDimitry Andric (MI.getOpcode() == TargetOpcode::G_SEXT && 2347*e8d8bef9SDimitry Andric SrcExtOp == TargetOpcode::G_ZEXT)) { 2348*e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2349*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2350*e8d8bef9SDimitry Andric Builder.buildInstr(SrcExtOp, {DstReg}, {Reg}); 2351*e8d8bef9SDimitry Andric MI.eraseFromParent(); 2352*e8d8bef9SDimitry Andric return true; 2353*e8d8bef9SDimitry Andric } 2354*e8d8bef9SDimitry Andric 2355*e8d8bef9SDimitry Andric return false; 2356*e8d8bef9SDimitry Andric } 2357*e8d8bef9SDimitry Andric 2358*e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) { 2359*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); 2360*e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2361*e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2362*e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2363*e8d8bef9SDimitry Andric 2364*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2365*e8d8bef9SDimitry Andric Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg, 2366*e8d8bef9SDimitry Andric MI.getFlags()); 2367*e8d8bef9SDimitry Andric MI.eraseFromParent(); 2368*e8d8bef9SDimitry Andric return true; 2369*e8d8bef9SDimitry Andric } 2370*e8d8bef9SDimitry Andric 2371*e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) { 2372*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG"); 2373*e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2374*e8d8bef9SDimitry Andric return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg))); 2375*e8d8bef9SDimitry Andric } 2376*e8d8bef9SDimitry Andric 2377*e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { 2378*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); 2379*e8d8bef9SDimitry Andric Src = MI.getOperand(1).getReg(); 2380*e8d8bef9SDimitry Andric Register AbsSrc; 2381*e8d8bef9SDimitry Andric return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc))); 2382*e8d8bef9SDimitry Andric } 2383*e8d8bef9SDimitry Andric 2384*e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { 2385*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); 2386*e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 2387*e8d8bef9SDimitry Andric MI.eraseFromParent(); 2388*e8d8bef9SDimitry Andric replaceRegWith(MRI, Dst, Src); 2389*e8d8bef9SDimitry Andric return true; 2390*e8d8bef9SDimitry Andric } 2391*e8d8bef9SDimitry Andric 2392*e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineTruncOfExt( 2393*e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { 2394*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 2395*e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2396*e8d8bef9SDimitry Andric MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); 2397*e8d8bef9SDimitry Andric unsigned SrcOpc = SrcMI->getOpcode(); 2398*e8d8bef9SDimitry Andric if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT || 2399*e8d8bef9SDimitry Andric SrcOpc == TargetOpcode::G_ZEXT) { 2400*e8d8bef9SDimitry Andric MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc); 2401*e8d8bef9SDimitry Andric return true; 2402*e8d8bef9SDimitry Andric } 2403*e8d8bef9SDimitry Andric return false; 2404*e8d8bef9SDimitry Andric } 2405*e8d8bef9SDimitry Andric 2406*e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineTruncOfExt( 2407*e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { 2408*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 2409*e8d8bef9SDimitry Andric Register SrcReg = MatchInfo.first; 2410*e8d8bef9SDimitry Andric unsigned SrcExtOp = MatchInfo.second; 2411*e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2412*e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 2413*e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2414*e8d8bef9SDimitry Andric if (SrcTy == DstTy) { 2415*e8d8bef9SDimitry Andric MI.eraseFromParent(); 2416*e8d8bef9SDimitry Andric replaceRegWith(MRI, DstReg, SrcReg); 2417*e8d8bef9SDimitry Andric return true; 2418*e8d8bef9SDimitry Andric } 2419*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2420*e8d8bef9SDimitry Andric if (SrcTy.getSizeInBits() < DstTy.getSizeInBits()) 2421*e8d8bef9SDimitry Andric Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg}); 2422*e8d8bef9SDimitry Andric else 2423*e8d8bef9SDimitry Andric Builder.buildTrunc(DstReg, SrcReg); 2424*e8d8bef9SDimitry Andric MI.eraseFromParent(); 2425*e8d8bef9SDimitry Andric return true; 2426*e8d8bef9SDimitry Andric } 2427*e8d8bef9SDimitry Andric 2428*e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineTruncOfShl( 2429*e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 2430*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 2431*e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2432*e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2433*e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2434*e8d8bef9SDimitry Andric Register ShiftSrc; 2435*e8d8bef9SDimitry Andric Register ShiftAmt; 2436*e8d8bef9SDimitry Andric 2437*e8d8bef9SDimitry Andric if (MRI.hasOneNonDBGUse(SrcReg) && 2438*e8d8bef9SDimitry Andric mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) && 2439*e8d8bef9SDimitry Andric isLegalOrBeforeLegalizer( 2440*e8d8bef9SDimitry Andric {TargetOpcode::G_SHL, 2441*e8d8bef9SDimitry Andric {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) { 2442*e8d8bef9SDimitry Andric KnownBits Known = KB->getKnownBits(ShiftAmt); 2443*e8d8bef9SDimitry Andric unsigned Size = DstTy.getSizeInBits(); 2444*e8d8bef9SDimitry Andric if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { 2445*e8d8bef9SDimitry Andric MatchInfo = std::make_pair(ShiftSrc, ShiftAmt); 2446*e8d8bef9SDimitry Andric return true; 2447*e8d8bef9SDimitry Andric } 2448*e8d8bef9SDimitry Andric } 2449*e8d8bef9SDimitry Andric return false; 2450*e8d8bef9SDimitry Andric } 2451*e8d8bef9SDimitry Andric 2452*e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineTruncOfShl( 2453*e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 2454*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 2455*e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2456*e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2457*e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2458*e8d8bef9SDimitry Andric MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); 2459*e8d8bef9SDimitry Andric 2460*e8d8bef9SDimitry Andric Register ShiftSrc = MatchInfo.first; 2461*e8d8bef9SDimitry Andric Register ShiftAmt = MatchInfo.second; 2462*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2463*e8d8bef9SDimitry Andric auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc); 2464*e8d8bef9SDimitry Andric Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags()); 2465*e8d8bef9SDimitry Andric MI.eraseFromParent(); 2466*e8d8bef9SDimitry Andric return true; 2467*e8d8bef9SDimitry Andric } 2468*e8d8bef9SDimitry Andric 24695ffd83dbSDimitry Andric bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { 24705ffd83dbSDimitry Andric return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) { 24715ffd83dbSDimitry Andric return MO.isReg() && 24725ffd83dbSDimitry Andric getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); 24735ffd83dbSDimitry Andric }); 24745ffd83dbSDimitry Andric } 24755ffd83dbSDimitry Andric 24765ffd83dbSDimitry Andric bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) { 24775ffd83dbSDimitry Andric return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) { 24785ffd83dbSDimitry Andric return !MO.isReg() || 24795ffd83dbSDimitry Andric getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); 24805ffd83dbSDimitry Andric }); 24815ffd83dbSDimitry Andric } 24825ffd83dbSDimitry Andric 24835ffd83dbSDimitry Andric bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) { 24845ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 24855ffd83dbSDimitry Andric ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 24865ffd83dbSDimitry Andric return all_of(Mask, [](int Elt) { return Elt < 0; }); 24875ffd83dbSDimitry Andric } 24885ffd83dbSDimitry Andric 24895ffd83dbSDimitry Andric bool CombinerHelper::matchUndefStore(MachineInstr &MI) { 24905ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_STORE); 24915ffd83dbSDimitry Andric return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(), 24925ffd83dbSDimitry Andric MRI); 24935ffd83dbSDimitry Andric } 24945ffd83dbSDimitry Andric 2495*e8d8bef9SDimitry Andric bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) { 2496*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SELECT); 2497*e8d8bef9SDimitry Andric return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(), 2498*e8d8bef9SDimitry Andric MRI); 2499*e8d8bef9SDimitry Andric } 2500*e8d8bef9SDimitry Andric 2501*e8d8bef9SDimitry Andric bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) { 2502*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SELECT); 2503*e8d8bef9SDimitry Andric if (auto MaybeCstCmp = 2504*e8d8bef9SDimitry Andric getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) { 2505*e8d8bef9SDimitry Andric OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2; 2506*e8d8bef9SDimitry Andric return true; 2507*e8d8bef9SDimitry Andric } 2508*e8d8bef9SDimitry Andric return false; 2509*e8d8bef9SDimitry Andric } 2510*e8d8bef9SDimitry Andric 25115ffd83dbSDimitry Andric bool CombinerHelper::eraseInst(MachineInstr &MI) { 25125ffd83dbSDimitry Andric MI.eraseFromParent(); 25135ffd83dbSDimitry Andric return true; 25145ffd83dbSDimitry Andric } 25155ffd83dbSDimitry Andric 25165ffd83dbSDimitry Andric bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, 25175ffd83dbSDimitry Andric const MachineOperand &MOP2) { 25185ffd83dbSDimitry Andric if (!MOP1.isReg() || !MOP2.isReg()) 25195ffd83dbSDimitry Andric return false; 25205ffd83dbSDimitry Andric MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI); 25215ffd83dbSDimitry Andric if (!I1) 25225ffd83dbSDimitry Andric return false; 25235ffd83dbSDimitry Andric MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI); 25245ffd83dbSDimitry Andric if (!I2) 25255ffd83dbSDimitry Andric return false; 25265ffd83dbSDimitry Andric 25275ffd83dbSDimitry Andric // Handle a case like this: 25285ffd83dbSDimitry Andric // 25295ffd83dbSDimitry Andric // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>) 25305ffd83dbSDimitry Andric // 25315ffd83dbSDimitry Andric // Even though %0 and %1 are produced by the same instruction they are not 25325ffd83dbSDimitry Andric // the same values. 25335ffd83dbSDimitry Andric if (I1 == I2) 25345ffd83dbSDimitry Andric return MOP1.getReg() == MOP2.getReg(); 25355ffd83dbSDimitry Andric 25365ffd83dbSDimitry Andric // If we have an instruction which loads or stores, we can't guarantee that 25375ffd83dbSDimitry Andric // it is identical. 25385ffd83dbSDimitry Andric // 25395ffd83dbSDimitry Andric // For example, we may have 25405ffd83dbSDimitry Andric // 25415ffd83dbSDimitry Andric // %x1 = G_LOAD %addr (load N from @somewhere) 25425ffd83dbSDimitry Andric // ... 25435ffd83dbSDimitry Andric // call @foo 25445ffd83dbSDimitry Andric // ... 25455ffd83dbSDimitry Andric // %x2 = G_LOAD %addr (load N from @somewhere) 25465ffd83dbSDimitry Andric // ... 25475ffd83dbSDimitry Andric // %or = G_OR %x1, %x2 25485ffd83dbSDimitry Andric // 25495ffd83dbSDimitry Andric // It's possible that @foo will modify whatever lives at the address we're 25505ffd83dbSDimitry Andric // loading from. To be safe, let's just assume that all loads and stores 25515ffd83dbSDimitry Andric // are different (unless we have something which is guaranteed to not 25525ffd83dbSDimitry Andric // change.) 25535ffd83dbSDimitry Andric if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr)) 25545ffd83dbSDimitry Andric return false; 25555ffd83dbSDimitry Andric 25565ffd83dbSDimitry Andric // Check for physical registers on the instructions first to avoid cases 25575ffd83dbSDimitry Andric // like this: 25585ffd83dbSDimitry Andric // 25595ffd83dbSDimitry Andric // %a = COPY $physreg 25605ffd83dbSDimitry Andric // ... 25615ffd83dbSDimitry Andric // SOMETHING implicit-def $physreg 25625ffd83dbSDimitry Andric // ... 25635ffd83dbSDimitry Andric // %b = COPY $physreg 25645ffd83dbSDimitry Andric // 25655ffd83dbSDimitry Andric // These copies are not equivalent. 25665ffd83dbSDimitry Andric if (any_of(I1->uses(), [](const MachineOperand &MO) { 25675ffd83dbSDimitry Andric return MO.isReg() && MO.getReg().isPhysical(); 25685ffd83dbSDimitry Andric })) { 25695ffd83dbSDimitry Andric // Check if we have a case like this: 25705ffd83dbSDimitry Andric // 25715ffd83dbSDimitry Andric // %a = COPY $physreg 25725ffd83dbSDimitry Andric // %b = COPY %a 25735ffd83dbSDimitry Andric // 25745ffd83dbSDimitry Andric // In this case, I1 and I2 will both be equal to %a = COPY $physreg. 25755ffd83dbSDimitry Andric // From that, we know that they must have the same value, since they must 25765ffd83dbSDimitry Andric // have come from the same COPY. 25775ffd83dbSDimitry Andric return I1->isIdenticalTo(*I2); 25785ffd83dbSDimitry Andric } 25795ffd83dbSDimitry Andric 25805ffd83dbSDimitry Andric // We don't have any physical registers, so we don't necessarily need the 25815ffd83dbSDimitry Andric // same vreg defs. 25825ffd83dbSDimitry Andric // 25835ffd83dbSDimitry Andric // On the off-chance that there's some target instruction feeding into the 25845ffd83dbSDimitry Andric // instruction, let's use produceSameValue instead of isIdenticalTo. 25855ffd83dbSDimitry Andric return Builder.getTII().produceSameValue(*I1, *I2, &MRI); 25865ffd83dbSDimitry Andric } 25875ffd83dbSDimitry Andric 25885ffd83dbSDimitry Andric bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) { 25895ffd83dbSDimitry Andric if (!MOP.isReg()) 25905ffd83dbSDimitry Andric return false; 25915ffd83dbSDimitry Andric // MIPatternMatch doesn't let us look through G_ZEXT etc. 25925ffd83dbSDimitry Andric auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI); 25935ffd83dbSDimitry Andric return ValAndVReg && ValAndVReg->Value == C; 25945ffd83dbSDimitry Andric } 25955ffd83dbSDimitry Andric 25965ffd83dbSDimitry Andric bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, 25975ffd83dbSDimitry Andric unsigned OpIdx) { 25985ffd83dbSDimitry Andric assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); 25995ffd83dbSDimitry Andric Register OldReg = MI.getOperand(0).getReg(); 26005ffd83dbSDimitry Andric Register Replacement = MI.getOperand(OpIdx).getReg(); 26015ffd83dbSDimitry Andric assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); 26025ffd83dbSDimitry Andric MI.eraseFromParent(); 26035ffd83dbSDimitry Andric replaceRegWith(MRI, OldReg, Replacement); 26045ffd83dbSDimitry Andric return true; 26055ffd83dbSDimitry Andric } 26065ffd83dbSDimitry Andric 2607*e8d8bef9SDimitry Andric bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI, 2608*e8d8bef9SDimitry Andric Register Replacement) { 2609*e8d8bef9SDimitry Andric assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); 2610*e8d8bef9SDimitry Andric Register OldReg = MI.getOperand(0).getReg(); 2611*e8d8bef9SDimitry Andric assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); 2612*e8d8bef9SDimitry Andric MI.eraseFromParent(); 2613*e8d8bef9SDimitry Andric replaceRegWith(MRI, OldReg, Replacement); 2614*e8d8bef9SDimitry Andric return true; 2615*e8d8bef9SDimitry Andric } 2616*e8d8bef9SDimitry Andric 26175ffd83dbSDimitry Andric bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { 26185ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SELECT); 26195ffd83dbSDimitry Andric // Match (cond ? x : x) 26205ffd83dbSDimitry Andric return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) && 26215ffd83dbSDimitry Andric canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(), 26225ffd83dbSDimitry Andric MRI); 26235ffd83dbSDimitry Andric } 26245ffd83dbSDimitry Andric 26255ffd83dbSDimitry Andric bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) { 26265ffd83dbSDimitry Andric return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) && 26275ffd83dbSDimitry Andric canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 26285ffd83dbSDimitry Andric MRI); 26295ffd83dbSDimitry Andric } 26305ffd83dbSDimitry Andric 26315ffd83dbSDimitry Andric bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) { 26325ffd83dbSDimitry Andric return matchConstantOp(MI.getOperand(OpIdx), 0) && 26335ffd83dbSDimitry Andric canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(), 26345ffd83dbSDimitry Andric MRI); 26355ffd83dbSDimitry Andric } 26365ffd83dbSDimitry Andric 2637*e8d8bef9SDimitry Andric bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) { 2638*e8d8bef9SDimitry Andric MachineOperand &MO = MI.getOperand(OpIdx); 2639*e8d8bef9SDimitry Andric return MO.isReg() && 2640*e8d8bef9SDimitry Andric getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); 2641*e8d8bef9SDimitry Andric } 2642*e8d8bef9SDimitry Andric 2643*e8d8bef9SDimitry Andric bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, 2644*e8d8bef9SDimitry Andric unsigned OpIdx) { 2645*e8d8bef9SDimitry Andric MachineOperand &MO = MI.getOperand(OpIdx); 2646*e8d8bef9SDimitry Andric return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB); 2647*e8d8bef9SDimitry Andric } 2648*e8d8bef9SDimitry Andric 26495ffd83dbSDimitry Andric bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { 26505ffd83dbSDimitry Andric assert(MI.getNumDefs() == 1 && "Expected only one def?"); 26515ffd83dbSDimitry Andric Builder.setInstr(MI); 26525ffd83dbSDimitry Andric Builder.buildFConstant(MI.getOperand(0), C); 26535ffd83dbSDimitry Andric MI.eraseFromParent(); 26545ffd83dbSDimitry Andric return true; 26555ffd83dbSDimitry Andric } 26565ffd83dbSDimitry Andric 26575ffd83dbSDimitry Andric bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) { 26585ffd83dbSDimitry Andric assert(MI.getNumDefs() == 1 && "Expected only one def?"); 26595ffd83dbSDimitry Andric Builder.setInstr(MI); 26605ffd83dbSDimitry Andric Builder.buildConstant(MI.getOperand(0), C); 26615ffd83dbSDimitry Andric MI.eraseFromParent(); 26625ffd83dbSDimitry Andric return true; 26635ffd83dbSDimitry Andric } 26645ffd83dbSDimitry Andric 26655ffd83dbSDimitry Andric bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) { 26665ffd83dbSDimitry Andric assert(MI.getNumDefs() == 1 && "Expected only one def?"); 26675ffd83dbSDimitry Andric Builder.setInstr(MI); 26685ffd83dbSDimitry Andric Builder.buildUndef(MI.getOperand(0)); 26695ffd83dbSDimitry Andric MI.eraseFromParent(); 26705ffd83dbSDimitry Andric return true; 26715ffd83dbSDimitry Andric } 26725ffd83dbSDimitry Andric 26735ffd83dbSDimitry Andric bool CombinerHelper::matchSimplifyAddToSub( 26745ffd83dbSDimitry Andric MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { 26755ffd83dbSDimitry Andric Register LHS = MI.getOperand(1).getReg(); 26765ffd83dbSDimitry Andric Register RHS = MI.getOperand(2).getReg(); 26775ffd83dbSDimitry Andric Register &NewLHS = std::get<0>(MatchInfo); 26785ffd83dbSDimitry Andric Register &NewRHS = std::get<1>(MatchInfo); 26795ffd83dbSDimitry Andric 26805ffd83dbSDimitry Andric // Helper lambda to check for opportunities for 26815ffd83dbSDimitry Andric // ((0-A) + B) -> B - A 26825ffd83dbSDimitry Andric // (A + (0-B)) -> A - B 26835ffd83dbSDimitry Andric auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) { 2684*e8d8bef9SDimitry Andric if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS)))) 26855ffd83dbSDimitry Andric return false; 26865ffd83dbSDimitry Andric NewLHS = MaybeNewLHS; 26875ffd83dbSDimitry Andric return true; 26885ffd83dbSDimitry Andric }; 26895ffd83dbSDimitry Andric 26905ffd83dbSDimitry Andric return CheckFold(LHS, RHS) || CheckFold(RHS, LHS); 26915ffd83dbSDimitry Andric } 26925ffd83dbSDimitry Andric 2693*e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineInsertVecElts( 2694*e8d8bef9SDimitry Andric MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { 2695*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT && 2696*e8d8bef9SDimitry Andric "Invalid opcode"); 2697*e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2698*e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2699*e8d8bef9SDimitry Andric assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?"); 2700*e8d8bef9SDimitry Andric unsigned NumElts = DstTy.getNumElements(); 2701*e8d8bef9SDimitry Andric // If this MI is part of a sequence of insert_vec_elts, then 2702*e8d8bef9SDimitry Andric // don't do the combine in the middle of the sequence. 2703*e8d8bef9SDimitry Andric if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() == 2704*e8d8bef9SDimitry Andric TargetOpcode::G_INSERT_VECTOR_ELT) 2705*e8d8bef9SDimitry Andric return false; 2706*e8d8bef9SDimitry Andric MachineInstr *CurrInst = &MI; 2707*e8d8bef9SDimitry Andric MachineInstr *TmpInst; 2708*e8d8bef9SDimitry Andric int64_t IntImm; 2709*e8d8bef9SDimitry Andric Register TmpReg; 2710*e8d8bef9SDimitry Andric MatchInfo.resize(NumElts); 2711*e8d8bef9SDimitry Andric while (mi_match( 2712*e8d8bef9SDimitry Andric CurrInst->getOperand(0).getReg(), MRI, 2713*e8d8bef9SDimitry Andric m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) { 2714*e8d8bef9SDimitry Andric if (IntImm >= NumElts) 2715*e8d8bef9SDimitry Andric return false; 2716*e8d8bef9SDimitry Andric if (!MatchInfo[IntImm]) 2717*e8d8bef9SDimitry Andric MatchInfo[IntImm] = TmpReg; 2718*e8d8bef9SDimitry Andric CurrInst = TmpInst; 2719*e8d8bef9SDimitry Andric } 2720*e8d8bef9SDimitry Andric // Variable index. 2721*e8d8bef9SDimitry Andric if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) 2722*e8d8bef9SDimitry Andric return false; 2723*e8d8bef9SDimitry Andric if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) { 2724*e8d8bef9SDimitry Andric for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) { 2725*e8d8bef9SDimitry Andric if (!MatchInfo[I - 1].isValid()) 2726*e8d8bef9SDimitry Andric MatchInfo[I - 1] = TmpInst->getOperand(I).getReg(); 2727*e8d8bef9SDimitry Andric } 2728*e8d8bef9SDimitry Andric return true; 2729*e8d8bef9SDimitry Andric } 2730*e8d8bef9SDimitry Andric // If we didn't end in a G_IMPLICIT_DEF, bail out. 2731*e8d8bef9SDimitry Andric return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF; 2732*e8d8bef9SDimitry Andric } 2733*e8d8bef9SDimitry Andric 2734*e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineInsertVecElts( 2735*e8d8bef9SDimitry Andric MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { 2736*e8d8bef9SDimitry Andric Builder.setInstr(MI); 2737*e8d8bef9SDimitry Andric Register UndefReg; 2738*e8d8bef9SDimitry Andric auto GetUndef = [&]() { 2739*e8d8bef9SDimitry Andric if (UndefReg) 2740*e8d8bef9SDimitry Andric return UndefReg; 2741*e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 2742*e8d8bef9SDimitry Andric UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0); 2743*e8d8bef9SDimitry Andric return UndefReg; 2744*e8d8bef9SDimitry Andric }; 2745*e8d8bef9SDimitry Andric for (unsigned I = 0; I < MatchInfo.size(); ++I) { 2746*e8d8bef9SDimitry Andric if (!MatchInfo[I]) 2747*e8d8bef9SDimitry Andric MatchInfo[I] = GetUndef(); 2748*e8d8bef9SDimitry Andric } 2749*e8d8bef9SDimitry Andric Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo); 2750*e8d8bef9SDimitry Andric MI.eraseFromParent(); 2751*e8d8bef9SDimitry Andric return true; 2752*e8d8bef9SDimitry Andric } 2753*e8d8bef9SDimitry Andric 27545ffd83dbSDimitry Andric bool CombinerHelper::applySimplifyAddToSub( 27555ffd83dbSDimitry Andric MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { 27565ffd83dbSDimitry Andric Builder.setInstr(MI); 27575ffd83dbSDimitry Andric Register SubLHS, SubRHS; 27585ffd83dbSDimitry Andric std::tie(SubLHS, SubRHS) = MatchInfo; 27595ffd83dbSDimitry Andric Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS); 27605ffd83dbSDimitry Andric MI.eraseFromParent(); 27615ffd83dbSDimitry Andric return true; 27625ffd83dbSDimitry Andric } 27635ffd83dbSDimitry Andric 2764*e8d8bef9SDimitry Andric bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( 2765*e8d8bef9SDimitry Andric MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { 2766*e8d8bef9SDimitry Andric // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ... 2767*e8d8bef9SDimitry Andric // 2768*e8d8bef9SDimitry Andric // Creates the new hand + logic instruction (but does not insert them.) 2769*e8d8bef9SDimitry Andric // 2770*e8d8bef9SDimitry Andric // On success, MatchInfo is populated with the new instructions. These are 2771*e8d8bef9SDimitry Andric // inserted in applyHoistLogicOpWithSameOpcodeHands. 2772*e8d8bef9SDimitry Andric unsigned LogicOpcode = MI.getOpcode(); 2773*e8d8bef9SDimitry Andric assert(LogicOpcode == TargetOpcode::G_AND || 2774*e8d8bef9SDimitry Andric LogicOpcode == TargetOpcode::G_OR || 2775*e8d8bef9SDimitry Andric LogicOpcode == TargetOpcode::G_XOR); 2776*e8d8bef9SDimitry Andric MachineIRBuilder MIB(MI); 2777*e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 2778*e8d8bef9SDimitry Andric Register LHSReg = MI.getOperand(1).getReg(); 2779*e8d8bef9SDimitry Andric Register RHSReg = MI.getOperand(2).getReg(); 2780*e8d8bef9SDimitry Andric 2781*e8d8bef9SDimitry Andric // Don't recompute anything. 2782*e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg)) 2783*e8d8bef9SDimitry Andric return false; 2784*e8d8bef9SDimitry Andric 2785*e8d8bef9SDimitry Andric // Make sure we have (hand x, ...), (hand y, ...) 2786*e8d8bef9SDimitry Andric MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI); 2787*e8d8bef9SDimitry Andric MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI); 2788*e8d8bef9SDimitry Andric if (!LeftHandInst || !RightHandInst) 2789*e8d8bef9SDimitry Andric return false; 2790*e8d8bef9SDimitry Andric unsigned HandOpcode = LeftHandInst->getOpcode(); 2791*e8d8bef9SDimitry Andric if (HandOpcode != RightHandInst->getOpcode()) 2792*e8d8bef9SDimitry Andric return false; 2793*e8d8bef9SDimitry Andric if (!LeftHandInst->getOperand(1).isReg() || 2794*e8d8bef9SDimitry Andric !RightHandInst->getOperand(1).isReg()) 2795*e8d8bef9SDimitry Andric return false; 2796*e8d8bef9SDimitry Andric 2797*e8d8bef9SDimitry Andric // Make sure the types match up, and if we're doing this post-legalization, 2798*e8d8bef9SDimitry Andric // we end up with legal types. 2799*e8d8bef9SDimitry Andric Register X = LeftHandInst->getOperand(1).getReg(); 2800*e8d8bef9SDimitry Andric Register Y = RightHandInst->getOperand(1).getReg(); 2801*e8d8bef9SDimitry Andric LLT XTy = MRI.getType(X); 2802*e8d8bef9SDimitry Andric LLT YTy = MRI.getType(Y); 2803*e8d8bef9SDimitry Andric if (XTy != YTy) 2804*e8d8bef9SDimitry Andric return false; 2805*e8d8bef9SDimitry Andric if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}})) 2806*e8d8bef9SDimitry Andric return false; 2807*e8d8bef9SDimitry Andric 2808*e8d8bef9SDimitry Andric // Optional extra source register. 2809*e8d8bef9SDimitry Andric Register ExtraHandOpSrcReg; 2810*e8d8bef9SDimitry Andric switch (HandOpcode) { 2811*e8d8bef9SDimitry Andric default: 2812*e8d8bef9SDimitry Andric return false; 2813*e8d8bef9SDimitry Andric case TargetOpcode::G_ANYEXT: 2814*e8d8bef9SDimitry Andric case TargetOpcode::G_SEXT: 2815*e8d8bef9SDimitry Andric case TargetOpcode::G_ZEXT: { 2816*e8d8bef9SDimitry Andric // Match: logic (ext X), (ext Y) --> ext (logic X, Y) 2817*e8d8bef9SDimitry Andric break; 2818*e8d8bef9SDimitry Andric } 2819*e8d8bef9SDimitry Andric case TargetOpcode::G_AND: 2820*e8d8bef9SDimitry Andric case TargetOpcode::G_ASHR: 2821*e8d8bef9SDimitry Andric case TargetOpcode::G_LSHR: 2822*e8d8bef9SDimitry Andric case TargetOpcode::G_SHL: { 2823*e8d8bef9SDimitry Andric // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z 2824*e8d8bef9SDimitry Andric MachineOperand &ZOp = LeftHandInst->getOperand(2); 2825*e8d8bef9SDimitry Andric if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2))) 2826*e8d8bef9SDimitry Andric return false; 2827*e8d8bef9SDimitry Andric ExtraHandOpSrcReg = ZOp.getReg(); 2828*e8d8bef9SDimitry Andric break; 2829*e8d8bef9SDimitry Andric } 2830*e8d8bef9SDimitry Andric } 2831*e8d8bef9SDimitry Andric 2832*e8d8bef9SDimitry Andric // Record the steps to build the new instructions. 2833*e8d8bef9SDimitry Andric // 2834*e8d8bef9SDimitry Andric // Steps to build (logic x, y) 2835*e8d8bef9SDimitry Andric auto NewLogicDst = MRI.createGenericVirtualRegister(XTy); 2836*e8d8bef9SDimitry Andric OperandBuildSteps LogicBuildSteps = { 2837*e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); }, 2838*e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(X); }, 2839*e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }}; 2840*e8d8bef9SDimitry Andric InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps); 2841*e8d8bef9SDimitry Andric 2842*e8d8bef9SDimitry Andric // Steps to build hand (logic x, y), ...z 2843*e8d8bef9SDimitry Andric OperandBuildSteps HandBuildSteps = { 2844*e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); }, 2845*e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }}; 2846*e8d8bef9SDimitry Andric if (ExtraHandOpSrcReg.isValid()) 2847*e8d8bef9SDimitry Andric HandBuildSteps.push_back( 2848*e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); }); 2849*e8d8bef9SDimitry Andric InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps); 2850*e8d8bef9SDimitry Andric 2851*e8d8bef9SDimitry Andric MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps}); 2852*e8d8bef9SDimitry Andric return true; 2853*e8d8bef9SDimitry Andric } 2854*e8d8bef9SDimitry Andric 2855*e8d8bef9SDimitry Andric bool CombinerHelper::applyBuildInstructionSteps( 2856*e8d8bef9SDimitry Andric MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { 2857*e8d8bef9SDimitry Andric assert(MatchInfo.InstrsToBuild.size() && 2858*e8d8bef9SDimitry Andric "Expected at least one instr to build?"); 2859*e8d8bef9SDimitry Andric Builder.setInstr(MI); 2860*e8d8bef9SDimitry Andric for (auto &InstrToBuild : MatchInfo.InstrsToBuild) { 2861*e8d8bef9SDimitry Andric assert(InstrToBuild.Opcode && "Expected a valid opcode?"); 2862*e8d8bef9SDimitry Andric assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?"); 2863*e8d8bef9SDimitry Andric MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode); 2864*e8d8bef9SDimitry Andric for (auto &OperandFn : InstrToBuild.OperandFns) 2865*e8d8bef9SDimitry Andric OperandFn(Instr); 2866*e8d8bef9SDimitry Andric } 2867*e8d8bef9SDimitry Andric MI.eraseFromParent(); 2868*e8d8bef9SDimitry Andric return true; 2869*e8d8bef9SDimitry Andric } 2870*e8d8bef9SDimitry Andric 2871*e8d8bef9SDimitry Andric bool CombinerHelper::matchAshrShlToSextInreg( 2872*e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { 2873*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ASHR); 2874*e8d8bef9SDimitry Andric int64_t ShlCst, AshrCst; 2875*e8d8bef9SDimitry Andric Register Src; 2876*e8d8bef9SDimitry Andric // FIXME: detect splat constant vectors. 2877*e8d8bef9SDimitry Andric if (!mi_match(MI.getOperand(0).getReg(), MRI, 2878*e8d8bef9SDimitry Andric m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst)))) 2879*e8d8bef9SDimitry Andric return false; 2880*e8d8bef9SDimitry Andric if (ShlCst != AshrCst) 2881*e8d8bef9SDimitry Andric return false; 2882*e8d8bef9SDimitry Andric if (!isLegalOrBeforeLegalizer( 2883*e8d8bef9SDimitry Andric {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}})) 2884*e8d8bef9SDimitry Andric return false; 2885*e8d8bef9SDimitry Andric MatchInfo = std::make_tuple(Src, ShlCst); 2886*e8d8bef9SDimitry Andric return true; 2887*e8d8bef9SDimitry Andric } 2888*e8d8bef9SDimitry Andric bool CombinerHelper::applyAshShlToSextInreg( 2889*e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { 2890*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ASHR); 2891*e8d8bef9SDimitry Andric Register Src; 2892*e8d8bef9SDimitry Andric int64_t ShiftAmt; 2893*e8d8bef9SDimitry Andric std::tie(Src, ShiftAmt) = MatchInfo; 2894*e8d8bef9SDimitry Andric unsigned Size = MRI.getType(Src).getScalarSizeInBits(); 2895*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2896*e8d8bef9SDimitry Andric Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt); 2897*e8d8bef9SDimitry Andric MI.eraseFromParent(); 2898*e8d8bef9SDimitry Andric return true; 2899*e8d8bef9SDimitry Andric } 2900*e8d8bef9SDimitry Andric 2901*e8d8bef9SDimitry Andric bool CombinerHelper::matchRedundantAnd(MachineInstr &MI, 2902*e8d8bef9SDimitry Andric Register &Replacement) { 2903*e8d8bef9SDimitry Andric // Given 2904*e8d8bef9SDimitry Andric // 2905*e8d8bef9SDimitry Andric // %y:_(sN) = G_SOMETHING 2906*e8d8bef9SDimitry Andric // %x:_(sN) = G_SOMETHING 2907*e8d8bef9SDimitry Andric // %res:_(sN) = G_AND %x, %y 2908*e8d8bef9SDimitry Andric // 2909*e8d8bef9SDimitry Andric // Eliminate the G_AND when it is known that x & y == x or x & y == y. 2910*e8d8bef9SDimitry Andric // 2911*e8d8bef9SDimitry Andric // Patterns like this can appear as a result of legalization. E.g. 2912*e8d8bef9SDimitry Andric // 2913*e8d8bef9SDimitry Andric // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y 2914*e8d8bef9SDimitry Andric // %one:_(s32) = G_CONSTANT i32 1 2915*e8d8bef9SDimitry Andric // %and:_(s32) = G_AND %cmp, %one 2916*e8d8bef9SDimitry Andric // 2917*e8d8bef9SDimitry Andric // In this case, G_ICMP only produces a single bit, so x & 1 == x. 2918*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_AND); 2919*e8d8bef9SDimitry Andric if (!KB) 2920*e8d8bef9SDimitry Andric return false; 2921*e8d8bef9SDimitry Andric 2922*e8d8bef9SDimitry Andric Register AndDst = MI.getOperand(0).getReg(); 2923*e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(AndDst); 2924*e8d8bef9SDimitry Andric 2925*e8d8bef9SDimitry Andric // FIXME: This should be removed once GISelKnownBits supports vectors. 2926*e8d8bef9SDimitry Andric if (DstTy.isVector()) 2927*e8d8bef9SDimitry Andric return false; 2928*e8d8bef9SDimitry Andric 2929*e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 2930*e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 2931*e8d8bef9SDimitry Andric KnownBits LHSBits = KB->getKnownBits(LHS); 2932*e8d8bef9SDimitry Andric KnownBits RHSBits = KB->getKnownBits(RHS); 2933*e8d8bef9SDimitry Andric 2934*e8d8bef9SDimitry Andric // Check that x & Mask == x. 2935*e8d8bef9SDimitry Andric // x & 1 == x, always 2936*e8d8bef9SDimitry Andric // x & 0 == x, only if x is also 0 2937*e8d8bef9SDimitry Andric // Meaning Mask has no effect if every bit is either one in Mask or zero in x. 2938*e8d8bef9SDimitry Andric // 2939*e8d8bef9SDimitry Andric // Check if we can replace AndDst with the LHS of the G_AND 2940*e8d8bef9SDimitry Andric if (canReplaceReg(AndDst, LHS, MRI) && 2941*e8d8bef9SDimitry Andric (LHSBits.Zero | RHSBits.One).isAllOnesValue()) { 2942*e8d8bef9SDimitry Andric Replacement = LHS; 2943*e8d8bef9SDimitry Andric return true; 2944*e8d8bef9SDimitry Andric } 2945*e8d8bef9SDimitry Andric 2946*e8d8bef9SDimitry Andric // Check if we can replace AndDst with the RHS of the G_AND 2947*e8d8bef9SDimitry Andric if (canReplaceReg(AndDst, RHS, MRI) && 2948*e8d8bef9SDimitry Andric (LHSBits.One | RHSBits.Zero).isAllOnesValue()) { 2949*e8d8bef9SDimitry Andric Replacement = RHS; 2950*e8d8bef9SDimitry Andric return true; 2951*e8d8bef9SDimitry Andric } 2952*e8d8bef9SDimitry Andric 2953*e8d8bef9SDimitry Andric return false; 2954*e8d8bef9SDimitry Andric } 2955*e8d8bef9SDimitry Andric 2956*e8d8bef9SDimitry Andric bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) { 2957*e8d8bef9SDimitry Andric // Given 2958*e8d8bef9SDimitry Andric // 2959*e8d8bef9SDimitry Andric // %y:_(sN) = G_SOMETHING 2960*e8d8bef9SDimitry Andric // %x:_(sN) = G_SOMETHING 2961*e8d8bef9SDimitry Andric // %res:_(sN) = G_OR %x, %y 2962*e8d8bef9SDimitry Andric // 2963*e8d8bef9SDimitry Andric // Eliminate the G_OR when it is known that x | y == x or x | y == y. 2964*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_OR); 2965*e8d8bef9SDimitry Andric if (!KB) 2966*e8d8bef9SDimitry Andric return false; 2967*e8d8bef9SDimitry Andric 2968*e8d8bef9SDimitry Andric Register OrDst = MI.getOperand(0).getReg(); 2969*e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(OrDst); 2970*e8d8bef9SDimitry Andric 2971*e8d8bef9SDimitry Andric // FIXME: This should be removed once GISelKnownBits supports vectors. 2972*e8d8bef9SDimitry Andric if (DstTy.isVector()) 2973*e8d8bef9SDimitry Andric return false; 2974*e8d8bef9SDimitry Andric 2975*e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 2976*e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 2977*e8d8bef9SDimitry Andric KnownBits LHSBits = KB->getKnownBits(LHS); 2978*e8d8bef9SDimitry Andric KnownBits RHSBits = KB->getKnownBits(RHS); 2979*e8d8bef9SDimitry Andric 2980*e8d8bef9SDimitry Andric // Check that x | Mask == x. 2981*e8d8bef9SDimitry Andric // x | 0 == x, always 2982*e8d8bef9SDimitry Andric // x | 1 == x, only if x is also 1 2983*e8d8bef9SDimitry Andric // Meaning Mask has no effect if every bit is either zero in Mask or one in x. 2984*e8d8bef9SDimitry Andric // 2985*e8d8bef9SDimitry Andric // Check if we can replace OrDst with the LHS of the G_OR 2986*e8d8bef9SDimitry Andric if (canReplaceReg(OrDst, LHS, MRI) && 2987*e8d8bef9SDimitry Andric (LHSBits.One | RHSBits.Zero).isAllOnesValue()) { 2988*e8d8bef9SDimitry Andric Replacement = LHS; 2989*e8d8bef9SDimitry Andric return true; 2990*e8d8bef9SDimitry Andric } 2991*e8d8bef9SDimitry Andric 2992*e8d8bef9SDimitry Andric // Check if we can replace OrDst with the RHS of the G_OR 2993*e8d8bef9SDimitry Andric if (canReplaceReg(OrDst, RHS, MRI) && 2994*e8d8bef9SDimitry Andric (LHSBits.Zero | RHSBits.One).isAllOnesValue()) { 2995*e8d8bef9SDimitry Andric Replacement = RHS; 2996*e8d8bef9SDimitry Andric return true; 2997*e8d8bef9SDimitry Andric } 2998*e8d8bef9SDimitry Andric 2999*e8d8bef9SDimitry Andric return false; 3000*e8d8bef9SDimitry Andric } 3001*e8d8bef9SDimitry Andric 3002*e8d8bef9SDimitry Andric bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) { 3003*e8d8bef9SDimitry Andric // If the input is already sign extended, just drop the extension. 3004*e8d8bef9SDimitry Andric Register Src = MI.getOperand(1).getReg(); 3005*e8d8bef9SDimitry Andric unsigned ExtBits = MI.getOperand(2).getImm(); 3006*e8d8bef9SDimitry Andric unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits(); 3007*e8d8bef9SDimitry Andric return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1); 3008*e8d8bef9SDimitry Andric } 3009*e8d8bef9SDimitry Andric 3010*e8d8bef9SDimitry Andric static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, 3011*e8d8bef9SDimitry Andric int64_t Cst, bool IsVector, bool IsFP) { 3012*e8d8bef9SDimitry Andric // For i1, Cst will always be -1 regardless of boolean contents. 3013*e8d8bef9SDimitry Andric return (ScalarSizeBits == 1 && Cst == -1) || 3014*e8d8bef9SDimitry Andric isConstTrueVal(TLI, Cst, IsVector, IsFP); 3015*e8d8bef9SDimitry Andric } 3016*e8d8bef9SDimitry Andric 3017*e8d8bef9SDimitry Andric bool CombinerHelper::matchNotCmp(MachineInstr &MI, 3018*e8d8bef9SDimitry Andric SmallVectorImpl<Register> &RegsToNegate) { 3019*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_XOR); 3020*e8d8bef9SDimitry Andric LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 3021*e8d8bef9SDimitry Andric const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering(); 3022*e8d8bef9SDimitry Andric Register XorSrc; 3023*e8d8bef9SDimitry Andric Register CstReg; 3024*e8d8bef9SDimitry Andric // We match xor(src, true) here. 3025*e8d8bef9SDimitry Andric if (!mi_match(MI.getOperand(0).getReg(), MRI, 3026*e8d8bef9SDimitry Andric m_GXor(m_Reg(XorSrc), m_Reg(CstReg)))) 3027*e8d8bef9SDimitry Andric return false; 3028*e8d8bef9SDimitry Andric 3029*e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(XorSrc)) 3030*e8d8bef9SDimitry Andric return false; 3031*e8d8bef9SDimitry Andric 3032*e8d8bef9SDimitry Andric // Check that XorSrc is the root of a tree of comparisons combined with ANDs 3033*e8d8bef9SDimitry Andric // and ORs. The suffix of RegsToNegate starting from index I is used a work 3034*e8d8bef9SDimitry Andric // list of tree nodes to visit. 3035*e8d8bef9SDimitry Andric RegsToNegate.push_back(XorSrc); 3036*e8d8bef9SDimitry Andric // Remember whether the comparisons are all integer or all floating point. 3037*e8d8bef9SDimitry Andric bool IsInt = false; 3038*e8d8bef9SDimitry Andric bool IsFP = false; 3039*e8d8bef9SDimitry Andric for (unsigned I = 0; I < RegsToNegate.size(); ++I) { 3040*e8d8bef9SDimitry Andric Register Reg = RegsToNegate[I]; 3041*e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(Reg)) 3042*e8d8bef9SDimitry Andric return false; 3043*e8d8bef9SDimitry Andric MachineInstr *Def = MRI.getVRegDef(Reg); 3044*e8d8bef9SDimitry Andric switch (Def->getOpcode()) { 3045*e8d8bef9SDimitry Andric default: 3046*e8d8bef9SDimitry Andric // Don't match if the tree contains anything other than ANDs, ORs and 3047*e8d8bef9SDimitry Andric // comparisons. 3048*e8d8bef9SDimitry Andric return false; 3049*e8d8bef9SDimitry Andric case TargetOpcode::G_ICMP: 3050*e8d8bef9SDimitry Andric if (IsFP) 3051*e8d8bef9SDimitry Andric return false; 3052*e8d8bef9SDimitry Andric IsInt = true; 3053*e8d8bef9SDimitry Andric // When we apply the combine we will invert the predicate. 3054*e8d8bef9SDimitry Andric break; 3055*e8d8bef9SDimitry Andric case TargetOpcode::G_FCMP: 3056*e8d8bef9SDimitry Andric if (IsInt) 3057*e8d8bef9SDimitry Andric return false; 3058*e8d8bef9SDimitry Andric IsFP = true; 3059*e8d8bef9SDimitry Andric // When we apply the combine we will invert the predicate. 3060*e8d8bef9SDimitry Andric break; 3061*e8d8bef9SDimitry Andric case TargetOpcode::G_AND: 3062*e8d8bef9SDimitry Andric case TargetOpcode::G_OR: 3063*e8d8bef9SDimitry Andric // Implement De Morgan's laws: 3064*e8d8bef9SDimitry Andric // ~(x & y) -> ~x | ~y 3065*e8d8bef9SDimitry Andric // ~(x | y) -> ~x & ~y 3066*e8d8bef9SDimitry Andric // When we apply the combine we will change the opcode and recursively 3067*e8d8bef9SDimitry Andric // negate the operands. 3068*e8d8bef9SDimitry Andric RegsToNegate.push_back(Def->getOperand(1).getReg()); 3069*e8d8bef9SDimitry Andric RegsToNegate.push_back(Def->getOperand(2).getReg()); 3070*e8d8bef9SDimitry Andric break; 3071*e8d8bef9SDimitry Andric } 3072*e8d8bef9SDimitry Andric } 3073*e8d8bef9SDimitry Andric 3074*e8d8bef9SDimitry Andric // Now we know whether the comparisons are integer or floating point, check 3075*e8d8bef9SDimitry Andric // the constant in the xor. 3076*e8d8bef9SDimitry Andric int64_t Cst; 3077*e8d8bef9SDimitry Andric if (Ty.isVector()) { 3078*e8d8bef9SDimitry Andric MachineInstr *CstDef = MRI.getVRegDef(CstReg); 3079*e8d8bef9SDimitry Andric auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI); 3080*e8d8bef9SDimitry Andric if (!MaybeCst) 3081*e8d8bef9SDimitry Andric return false; 3082*e8d8bef9SDimitry Andric if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP)) 3083*e8d8bef9SDimitry Andric return false; 3084*e8d8bef9SDimitry Andric } else { 3085*e8d8bef9SDimitry Andric if (!mi_match(CstReg, MRI, m_ICst(Cst))) 3086*e8d8bef9SDimitry Andric return false; 3087*e8d8bef9SDimitry Andric if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP)) 3088*e8d8bef9SDimitry Andric return false; 3089*e8d8bef9SDimitry Andric } 3090*e8d8bef9SDimitry Andric 3091*e8d8bef9SDimitry Andric return true; 3092*e8d8bef9SDimitry Andric } 3093*e8d8bef9SDimitry Andric 3094*e8d8bef9SDimitry Andric bool CombinerHelper::applyNotCmp(MachineInstr &MI, 3095*e8d8bef9SDimitry Andric SmallVectorImpl<Register> &RegsToNegate) { 3096*e8d8bef9SDimitry Andric for (Register Reg : RegsToNegate) { 3097*e8d8bef9SDimitry Andric MachineInstr *Def = MRI.getVRegDef(Reg); 3098*e8d8bef9SDimitry Andric Observer.changingInstr(*Def); 3099*e8d8bef9SDimitry Andric // For each comparison, invert the opcode. For each AND and OR, change the 3100*e8d8bef9SDimitry Andric // opcode. 3101*e8d8bef9SDimitry Andric switch (Def->getOpcode()) { 3102*e8d8bef9SDimitry Andric default: 3103*e8d8bef9SDimitry Andric llvm_unreachable("Unexpected opcode"); 3104*e8d8bef9SDimitry Andric case TargetOpcode::G_ICMP: 3105*e8d8bef9SDimitry Andric case TargetOpcode::G_FCMP: { 3106*e8d8bef9SDimitry Andric MachineOperand &PredOp = Def->getOperand(1); 3107*e8d8bef9SDimitry Andric CmpInst::Predicate NewP = CmpInst::getInversePredicate( 3108*e8d8bef9SDimitry Andric (CmpInst::Predicate)PredOp.getPredicate()); 3109*e8d8bef9SDimitry Andric PredOp.setPredicate(NewP); 3110*e8d8bef9SDimitry Andric break; 3111*e8d8bef9SDimitry Andric } 3112*e8d8bef9SDimitry Andric case TargetOpcode::G_AND: 3113*e8d8bef9SDimitry Andric Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR)); 3114*e8d8bef9SDimitry Andric break; 3115*e8d8bef9SDimitry Andric case TargetOpcode::G_OR: 3116*e8d8bef9SDimitry Andric Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND)); 3117*e8d8bef9SDimitry Andric break; 3118*e8d8bef9SDimitry Andric } 3119*e8d8bef9SDimitry Andric Observer.changedInstr(*Def); 3120*e8d8bef9SDimitry Andric } 3121*e8d8bef9SDimitry Andric 3122*e8d8bef9SDimitry Andric replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); 3123*e8d8bef9SDimitry Andric MI.eraseFromParent(); 3124*e8d8bef9SDimitry Andric return true; 3125*e8d8bef9SDimitry Andric } 3126*e8d8bef9SDimitry Andric 3127*e8d8bef9SDimitry Andric bool CombinerHelper::matchXorOfAndWithSameReg( 3128*e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 3129*e8d8bef9SDimitry Andric // Match (xor (and x, y), y) (or any of its commuted cases) 3130*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_XOR); 3131*e8d8bef9SDimitry Andric Register &X = MatchInfo.first; 3132*e8d8bef9SDimitry Andric Register &Y = MatchInfo.second; 3133*e8d8bef9SDimitry Andric Register AndReg = MI.getOperand(1).getReg(); 3134*e8d8bef9SDimitry Andric Register SharedReg = MI.getOperand(2).getReg(); 3135*e8d8bef9SDimitry Andric 3136*e8d8bef9SDimitry Andric // Find a G_AND on either side of the G_XOR. 3137*e8d8bef9SDimitry Andric // Look for one of 3138*e8d8bef9SDimitry Andric // 3139*e8d8bef9SDimitry Andric // (xor (and x, y), SharedReg) 3140*e8d8bef9SDimitry Andric // (xor SharedReg, (and x, y)) 3141*e8d8bef9SDimitry Andric if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) { 3142*e8d8bef9SDimitry Andric std::swap(AndReg, SharedReg); 3143*e8d8bef9SDimitry Andric if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) 3144*e8d8bef9SDimitry Andric return false; 3145*e8d8bef9SDimitry Andric } 3146*e8d8bef9SDimitry Andric 3147*e8d8bef9SDimitry Andric // Only do this if we'll eliminate the G_AND. 3148*e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(AndReg)) 3149*e8d8bef9SDimitry Andric return false; 3150*e8d8bef9SDimitry Andric 3151*e8d8bef9SDimitry Andric // We can combine if SharedReg is the same as either the LHS or RHS of the 3152*e8d8bef9SDimitry Andric // G_AND. 3153*e8d8bef9SDimitry Andric if (Y != SharedReg) 3154*e8d8bef9SDimitry Andric std::swap(X, Y); 3155*e8d8bef9SDimitry Andric return Y == SharedReg; 3156*e8d8bef9SDimitry Andric } 3157*e8d8bef9SDimitry Andric 3158*e8d8bef9SDimitry Andric bool CombinerHelper::applyXorOfAndWithSameReg( 3159*e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 3160*e8d8bef9SDimitry Andric // Fold (xor (and x, y), y) -> (and (not x), y) 3161*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3162*e8d8bef9SDimitry Andric Register X, Y; 3163*e8d8bef9SDimitry Andric std::tie(X, Y) = MatchInfo; 3164*e8d8bef9SDimitry Andric auto Not = Builder.buildNot(MRI.getType(X), X); 3165*e8d8bef9SDimitry Andric Observer.changingInstr(MI); 3166*e8d8bef9SDimitry Andric MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND)); 3167*e8d8bef9SDimitry Andric MI.getOperand(1).setReg(Not->getOperand(0).getReg()); 3168*e8d8bef9SDimitry Andric MI.getOperand(2).setReg(Y); 3169*e8d8bef9SDimitry Andric Observer.changedInstr(MI); 3170*e8d8bef9SDimitry Andric return true; 3171*e8d8bef9SDimitry Andric } 3172*e8d8bef9SDimitry Andric 3173*e8d8bef9SDimitry Andric bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) { 3174*e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3175*e8d8bef9SDimitry Andric LLT Ty = MRI.getType(DstReg); 3176*e8d8bef9SDimitry Andric const DataLayout &DL = Builder.getMF().getDataLayout(); 3177*e8d8bef9SDimitry Andric 3178*e8d8bef9SDimitry Andric if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace())) 3179*e8d8bef9SDimitry Andric return false; 3180*e8d8bef9SDimitry Andric 3181*e8d8bef9SDimitry Andric if (Ty.isPointer()) { 3182*e8d8bef9SDimitry Andric auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI); 3183*e8d8bef9SDimitry Andric return ConstVal && *ConstVal == 0; 3184*e8d8bef9SDimitry Andric } 3185*e8d8bef9SDimitry Andric 3186*e8d8bef9SDimitry Andric assert(Ty.isVector() && "Expecting a vector type"); 3187*e8d8bef9SDimitry Andric const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg()); 3188*e8d8bef9SDimitry Andric return isBuildVectorAllZeros(*VecMI, MRI); 3189*e8d8bef9SDimitry Andric } 3190*e8d8bef9SDimitry Andric 3191*e8d8bef9SDimitry Andric bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) { 3192*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD); 3193*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3194*e8d8bef9SDimitry Andric Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2)); 3195*e8d8bef9SDimitry Andric MI.eraseFromParent(); 3196*e8d8bef9SDimitry Andric return true; 3197*e8d8bef9SDimitry Andric } 3198*e8d8bef9SDimitry Andric 3199*e8d8bef9SDimitry Andric /// The second source operand is known to be a power of 2. 3200*e8d8bef9SDimitry Andric bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) { 3201*e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3202*e8d8bef9SDimitry Andric Register Src0 = MI.getOperand(1).getReg(); 3203*e8d8bef9SDimitry Andric Register Pow2Src1 = MI.getOperand(2).getReg(); 3204*e8d8bef9SDimitry Andric LLT Ty = MRI.getType(DstReg); 3205*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3206*e8d8bef9SDimitry Andric 3207*e8d8bef9SDimitry Andric // Fold (urem x, pow2) -> (and x, pow2-1) 3208*e8d8bef9SDimitry Andric auto NegOne = Builder.buildConstant(Ty, -1); 3209*e8d8bef9SDimitry Andric auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne); 3210*e8d8bef9SDimitry Andric Builder.buildAnd(DstReg, Src0, Add); 3211*e8d8bef9SDimitry Andric MI.eraseFromParent(); 3212*e8d8bef9SDimitry Andric return true; 3213*e8d8bef9SDimitry Andric } 3214*e8d8bef9SDimitry Andric 3215*e8d8bef9SDimitry Andric Optional<SmallVector<Register, 8>> 3216*e8d8bef9SDimitry Andric CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const { 3217*e8d8bef9SDimitry Andric assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!"); 3218*e8d8bef9SDimitry Andric // We want to detect if Root is part of a tree which represents a bunch 3219*e8d8bef9SDimitry Andric // of loads being merged into a larger load. We'll try to recognize patterns 3220*e8d8bef9SDimitry Andric // like, for example: 3221*e8d8bef9SDimitry Andric // 3222*e8d8bef9SDimitry Andric // Reg Reg 3223*e8d8bef9SDimitry Andric // \ / 3224*e8d8bef9SDimitry Andric // OR_1 Reg 3225*e8d8bef9SDimitry Andric // \ / 3226*e8d8bef9SDimitry Andric // OR_2 3227*e8d8bef9SDimitry Andric // \ Reg 3228*e8d8bef9SDimitry Andric // .. / 3229*e8d8bef9SDimitry Andric // Root 3230*e8d8bef9SDimitry Andric // 3231*e8d8bef9SDimitry Andric // Reg Reg Reg Reg 3232*e8d8bef9SDimitry Andric // \ / \ / 3233*e8d8bef9SDimitry Andric // OR_1 OR_2 3234*e8d8bef9SDimitry Andric // \ / 3235*e8d8bef9SDimitry Andric // \ / 3236*e8d8bef9SDimitry Andric // ... 3237*e8d8bef9SDimitry Andric // Root 3238*e8d8bef9SDimitry Andric // 3239*e8d8bef9SDimitry Andric // Each "Reg" may have been produced by a load + some arithmetic. This 3240*e8d8bef9SDimitry Andric // function will save each of them. 3241*e8d8bef9SDimitry Andric SmallVector<Register, 8> RegsToVisit; 3242*e8d8bef9SDimitry Andric SmallVector<const MachineInstr *, 7> Ors = {Root}; 3243*e8d8bef9SDimitry Andric 3244*e8d8bef9SDimitry Andric // In the "worst" case, we're dealing with a load for each byte. So, there 3245*e8d8bef9SDimitry Andric // are at most #bytes - 1 ORs. 3246*e8d8bef9SDimitry Andric const unsigned MaxIter = 3247*e8d8bef9SDimitry Andric MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1; 3248*e8d8bef9SDimitry Andric for (unsigned Iter = 0; Iter < MaxIter; ++Iter) { 3249*e8d8bef9SDimitry Andric if (Ors.empty()) 3250*e8d8bef9SDimitry Andric break; 3251*e8d8bef9SDimitry Andric const MachineInstr *Curr = Ors.pop_back_val(); 3252*e8d8bef9SDimitry Andric Register OrLHS = Curr->getOperand(1).getReg(); 3253*e8d8bef9SDimitry Andric Register OrRHS = Curr->getOperand(2).getReg(); 3254*e8d8bef9SDimitry Andric 3255*e8d8bef9SDimitry Andric // In the combine, we want to elimate the entire tree. 3256*e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS)) 3257*e8d8bef9SDimitry Andric return None; 3258*e8d8bef9SDimitry Andric 3259*e8d8bef9SDimitry Andric // If it's a G_OR, save it and continue to walk. If it's not, then it's 3260*e8d8bef9SDimitry Andric // something that may be a load + arithmetic. 3261*e8d8bef9SDimitry Andric if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI)) 3262*e8d8bef9SDimitry Andric Ors.push_back(Or); 3263*e8d8bef9SDimitry Andric else 3264*e8d8bef9SDimitry Andric RegsToVisit.push_back(OrLHS); 3265*e8d8bef9SDimitry Andric if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI)) 3266*e8d8bef9SDimitry Andric Ors.push_back(Or); 3267*e8d8bef9SDimitry Andric else 3268*e8d8bef9SDimitry Andric RegsToVisit.push_back(OrRHS); 3269*e8d8bef9SDimitry Andric } 3270*e8d8bef9SDimitry Andric 3271*e8d8bef9SDimitry Andric // We're going to try and merge each register into a wider power-of-2 type, 3272*e8d8bef9SDimitry Andric // so we ought to have an even number of registers. 3273*e8d8bef9SDimitry Andric if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0) 3274*e8d8bef9SDimitry Andric return None; 3275*e8d8bef9SDimitry Andric return RegsToVisit; 3276*e8d8bef9SDimitry Andric } 3277*e8d8bef9SDimitry Andric 3278*e8d8bef9SDimitry Andric /// Helper function for findLoadOffsetsForLoadOrCombine. 3279*e8d8bef9SDimitry Andric /// 3280*e8d8bef9SDimitry Andric /// Check if \p Reg is the result of loading a \p MemSizeInBits wide value, 3281*e8d8bef9SDimitry Andric /// and then moving that value into a specific byte offset. 3282*e8d8bef9SDimitry Andric /// 3283*e8d8bef9SDimitry Andric /// e.g. x[i] << 24 3284*e8d8bef9SDimitry Andric /// 3285*e8d8bef9SDimitry Andric /// \returns The load instruction and the byte offset it is moved into. 3286*e8d8bef9SDimitry Andric static Optional<std::pair<MachineInstr *, int64_t>> 3287*e8d8bef9SDimitry Andric matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, 3288*e8d8bef9SDimitry Andric const MachineRegisterInfo &MRI) { 3289*e8d8bef9SDimitry Andric assert(MRI.hasOneNonDBGUse(Reg) && 3290*e8d8bef9SDimitry Andric "Expected Reg to only have one non-debug use?"); 3291*e8d8bef9SDimitry Andric Register MaybeLoad; 3292*e8d8bef9SDimitry Andric int64_t Shift; 3293*e8d8bef9SDimitry Andric if (!mi_match(Reg, MRI, 3294*e8d8bef9SDimitry Andric m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) { 3295*e8d8bef9SDimitry Andric Shift = 0; 3296*e8d8bef9SDimitry Andric MaybeLoad = Reg; 3297*e8d8bef9SDimitry Andric } 3298*e8d8bef9SDimitry Andric 3299*e8d8bef9SDimitry Andric if (Shift % MemSizeInBits != 0) 3300*e8d8bef9SDimitry Andric return None; 3301*e8d8bef9SDimitry Andric 3302*e8d8bef9SDimitry Andric // TODO: Handle other types of loads. 3303*e8d8bef9SDimitry Andric auto *Load = getOpcodeDef(TargetOpcode::G_ZEXTLOAD, MaybeLoad, MRI); 3304*e8d8bef9SDimitry Andric if (!Load) 3305*e8d8bef9SDimitry Andric return None; 3306*e8d8bef9SDimitry Andric 3307*e8d8bef9SDimitry Andric const auto &MMO = **Load->memoperands_begin(); 3308*e8d8bef9SDimitry Andric if (!MMO.isUnordered() || MMO.getSizeInBits() != MemSizeInBits) 3309*e8d8bef9SDimitry Andric return None; 3310*e8d8bef9SDimitry Andric 3311*e8d8bef9SDimitry Andric return std::make_pair(Load, Shift / MemSizeInBits); 3312*e8d8bef9SDimitry Andric } 3313*e8d8bef9SDimitry Andric 3314*e8d8bef9SDimitry Andric Optional<std::pair<MachineInstr *, int64_t>> 3315*e8d8bef9SDimitry Andric CombinerHelper::findLoadOffsetsForLoadOrCombine( 3316*e8d8bef9SDimitry Andric SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, 3317*e8d8bef9SDimitry Andric const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) { 3318*e8d8bef9SDimitry Andric 3319*e8d8bef9SDimitry Andric // Each load found for the pattern. There should be one for each RegsToVisit. 3320*e8d8bef9SDimitry Andric SmallSetVector<const MachineInstr *, 8> Loads; 3321*e8d8bef9SDimitry Andric 3322*e8d8bef9SDimitry Andric // The lowest index used in any load. (The lowest "i" for each x[i].) 3323*e8d8bef9SDimitry Andric int64_t LowestIdx = INT64_MAX; 3324*e8d8bef9SDimitry Andric 3325*e8d8bef9SDimitry Andric // The load which uses the lowest index. 3326*e8d8bef9SDimitry Andric MachineInstr *LowestIdxLoad = nullptr; 3327*e8d8bef9SDimitry Andric 3328*e8d8bef9SDimitry Andric // Keeps track of the load indices we see. We shouldn't see any indices twice. 3329*e8d8bef9SDimitry Andric SmallSet<int64_t, 8> SeenIdx; 3330*e8d8bef9SDimitry Andric 3331*e8d8bef9SDimitry Andric // Ensure each load is in the same MBB. 3332*e8d8bef9SDimitry Andric // TODO: Support multiple MachineBasicBlocks. 3333*e8d8bef9SDimitry Andric MachineBasicBlock *MBB = nullptr; 3334*e8d8bef9SDimitry Andric const MachineMemOperand *MMO = nullptr; 3335*e8d8bef9SDimitry Andric 3336*e8d8bef9SDimitry Andric // Earliest instruction-order load in the pattern. 3337*e8d8bef9SDimitry Andric MachineInstr *EarliestLoad = nullptr; 3338*e8d8bef9SDimitry Andric 3339*e8d8bef9SDimitry Andric // Latest instruction-order load in the pattern. 3340*e8d8bef9SDimitry Andric MachineInstr *LatestLoad = nullptr; 3341*e8d8bef9SDimitry Andric 3342*e8d8bef9SDimitry Andric // Base pointer which every load should share. 3343*e8d8bef9SDimitry Andric Register BasePtr; 3344*e8d8bef9SDimitry Andric 3345*e8d8bef9SDimitry Andric // We want to find a load for each register. Each load should have some 3346*e8d8bef9SDimitry Andric // appropriate bit twiddling arithmetic. During this loop, we will also keep 3347*e8d8bef9SDimitry Andric // track of the load which uses the lowest index. Later, we will check if we 3348*e8d8bef9SDimitry Andric // can use its pointer in the final, combined load. 3349*e8d8bef9SDimitry Andric for (auto Reg : RegsToVisit) { 3350*e8d8bef9SDimitry Andric // Find the load, and find the position that it will end up in (e.g. a 3351*e8d8bef9SDimitry Andric // shifted) value. 3352*e8d8bef9SDimitry Andric auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI); 3353*e8d8bef9SDimitry Andric if (!LoadAndPos) 3354*e8d8bef9SDimitry Andric return None; 3355*e8d8bef9SDimitry Andric MachineInstr *Load; 3356*e8d8bef9SDimitry Andric int64_t DstPos; 3357*e8d8bef9SDimitry Andric std::tie(Load, DstPos) = *LoadAndPos; 3358*e8d8bef9SDimitry Andric 3359*e8d8bef9SDimitry Andric // TODO: Handle multiple MachineBasicBlocks. Currently not handled because 3360*e8d8bef9SDimitry Andric // it is difficult to check for stores/calls/etc between loads. 3361*e8d8bef9SDimitry Andric MachineBasicBlock *LoadMBB = Load->getParent(); 3362*e8d8bef9SDimitry Andric if (!MBB) 3363*e8d8bef9SDimitry Andric MBB = LoadMBB; 3364*e8d8bef9SDimitry Andric if (LoadMBB != MBB) 3365*e8d8bef9SDimitry Andric return None; 3366*e8d8bef9SDimitry Andric 3367*e8d8bef9SDimitry Andric // Make sure that the MachineMemOperands of every seen load are compatible. 3368*e8d8bef9SDimitry Andric const MachineMemOperand *LoadMMO = *Load->memoperands_begin(); 3369*e8d8bef9SDimitry Andric if (!MMO) 3370*e8d8bef9SDimitry Andric MMO = LoadMMO; 3371*e8d8bef9SDimitry Andric if (MMO->getAddrSpace() != LoadMMO->getAddrSpace()) 3372*e8d8bef9SDimitry Andric return None; 3373*e8d8bef9SDimitry Andric 3374*e8d8bef9SDimitry Andric // Find out what the base pointer and index for the load is. 3375*e8d8bef9SDimitry Andric Register LoadPtr; 3376*e8d8bef9SDimitry Andric int64_t Idx; 3377*e8d8bef9SDimitry Andric if (!mi_match(Load->getOperand(1).getReg(), MRI, 3378*e8d8bef9SDimitry Andric m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) { 3379*e8d8bef9SDimitry Andric LoadPtr = Load->getOperand(1).getReg(); 3380*e8d8bef9SDimitry Andric Idx = 0; 3381*e8d8bef9SDimitry Andric } 3382*e8d8bef9SDimitry Andric 3383*e8d8bef9SDimitry Andric // Don't combine things like a[i], a[i] -> a bigger load. 3384*e8d8bef9SDimitry Andric if (!SeenIdx.insert(Idx).second) 3385*e8d8bef9SDimitry Andric return None; 3386*e8d8bef9SDimitry Andric 3387*e8d8bef9SDimitry Andric // Every load must share the same base pointer; don't combine things like: 3388*e8d8bef9SDimitry Andric // 3389*e8d8bef9SDimitry Andric // a[i], b[i + 1] -> a bigger load. 3390*e8d8bef9SDimitry Andric if (!BasePtr.isValid()) 3391*e8d8bef9SDimitry Andric BasePtr = LoadPtr; 3392*e8d8bef9SDimitry Andric if (BasePtr != LoadPtr) 3393*e8d8bef9SDimitry Andric return None; 3394*e8d8bef9SDimitry Andric 3395*e8d8bef9SDimitry Andric if (Idx < LowestIdx) { 3396*e8d8bef9SDimitry Andric LowestIdx = Idx; 3397*e8d8bef9SDimitry Andric LowestIdxLoad = Load; 3398*e8d8bef9SDimitry Andric } 3399*e8d8bef9SDimitry Andric 3400*e8d8bef9SDimitry Andric // Keep track of the byte offset that this load ends up at. If we have seen 3401*e8d8bef9SDimitry Andric // the byte offset, then stop here. We do not want to combine: 3402*e8d8bef9SDimitry Andric // 3403*e8d8bef9SDimitry Andric // a[i] << 16, a[i + k] << 16 -> a bigger load. 3404*e8d8bef9SDimitry Andric if (!MemOffset2Idx.try_emplace(DstPos, Idx).second) 3405*e8d8bef9SDimitry Andric return None; 3406*e8d8bef9SDimitry Andric Loads.insert(Load); 3407*e8d8bef9SDimitry Andric 3408*e8d8bef9SDimitry Andric // Keep track of the position of the earliest/latest loads in the pattern. 3409*e8d8bef9SDimitry Andric // We will check that there are no load fold barriers between them later 3410*e8d8bef9SDimitry Andric // on. 3411*e8d8bef9SDimitry Andric // 3412*e8d8bef9SDimitry Andric // FIXME: Is there a better way to check for load fold barriers? 3413*e8d8bef9SDimitry Andric if (!EarliestLoad || dominates(*Load, *EarliestLoad)) 3414*e8d8bef9SDimitry Andric EarliestLoad = Load; 3415*e8d8bef9SDimitry Andric if (!LatestLoad || dominates(*LatestLoad, *Load)) 3416*e8d8bef9SDimitry Andric LatestLoad = Load; 3417*e8d8bef9SDimitry Andric } 3418*e8d8bef9SDimitry Andric 3419*e8d8bef9SDimitry Andric // We found a load for each register. Let's check if each load satisfies the 3420*e8d8bef9SDimitry Andric // pattern. 3421*e8d8bef9SDimitry Andric assert(Loads.size() == RegsToVisit.size() && 3422*e8d8bef9SDimitry Andric "Expected to find a load for each register?"); 3423*e8d8bef9SDimitry Andric assert(EarliestLoad != LatestLoad && EarliestLoad && 3424*e8d8bef9SDimitry Andric LatestLoad && "Expected at least two loads?"); 3425*e8d8bef9SDimitry Andric 3426*e8d8bef9SDimitry Andric // Check if there are any stores, calls, etc. between any of the loads. If 3427*e8d8bef9SDimitry Andric // there are, then we can't safely perform the combine. 3428*e8d8bef9SDimitry Andric // 3429*e8d8bef9SDimitry Andric // MaxIter is chosen based off the (worst case) number of iterations it 3430*e8d8bef9SDimitry Andric // typically takes to succeed in the LLVM test suite plus some padding. 3431*e8d8bef9SDimitry Andric // 3432*e8d8bef9SDimitry Andric // FIXME: Is there a better way to check for load fold barriers? 3433*e8d8bef9SDimitry Andric const unsigned MaxIter = 20; 3434*e8d8bef9SDimitry Andric unsigned Iter = 0; 3435*e8d8bef9SDimitry Andric for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(), 3436*e8d8bef9SDimitry Andric LatestLoad->getIterator())) { 3437*e8d8bef9SDimitry Andric if (Loads.count(&MI)) 3438*e8d8bef9SDimitry Andric continue; 3439*e8d8bef9SDimitry Andric if (MI.isLoadFoldBarrier()) 3440*e8d8bef9SDimitry Andric return None; 3441*e8d8bef9SDimitry Andric if (Iter++ == MaxIter) 3442*e8d8bef9SDimitry Andric return None; 3443*e8d8bef9SDimitry Andric } 3444*e8d8bef9SDimitry Andric 3445*e8d8bef9SDimitry Andric return std::make_pair(LowestIdxLoad, LowestIdx); 3446*e8d8bef9SDimitry Andric } 3447*e8d8bef9SDimitry Andric 3448*e8d8bef9SDimitry Andric bool CombinerHelper::matchLoadOrCombine( 3449*e8d8bef9SDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 3450*e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_OR); 3451*e8d8bef9SDimitry Andric MachineFunction &MF = *MI.getMF(); 3452*e8d8bef9SDimitry Andric // Assuming a little-endian target, transform: 3453*e8d8bef9SDimitry Andric // s8 *a = ... 3454*e8d8bef9SDimitry Andric // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24) 3455*e8d8bef9SDimitry Andric // => 3456*e8d8bef9SDimitry Andric // s32 val = *((i32)a) 3457*e8d8bef9SDimitry Andric // 3458*e8d8bef9SDimitry Andric // s8 *a = ... 3459*e8d8bef9SDimitry Andric // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3] 3460*e8d8bef9SDimitry Andric // => 3461*e8d8bef9SDimitry Andric // s32 val = BSWAP(*((s32)a)) 3462*e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 3463*e8d8bef9SDimitry Andric LLT Ty = MRI.getType(Dst); 3464*e8d8bef9SDimitry Andric if (Ty.isVector()) 3465*e8d8bef9SDimitry Andric return false; 3466*e8d8bef9SDimitry Andric 3467*e8d8bef9SDimitry Andric // We need to combine at least two loads into this type. Since the smallest 3468*e8d8bef9SDimitry Andric // possible load is into a byte, we need at least a 16-bit wide type. 3469*e8d8bef9SDimitry Andric const unsigned WideMemSizeInBits = Ty.getSizeInBits(); 3470*e8d8bef9SDimitry Andric if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0) 3471*e8d8bef9SDimitry Andric return false; 3472*e8d8bef9SDimitry Andric 3473*e8d8bef9SDimitry Andric // Match a collection of non-OR instructions in the pattern. 3474*e8d8bef9SDimitry Andric auto RegsToVisit = findCandidatesForLoadOrCombine(&MI); 3475*e8d8bef9SDimitry Andric if (!RegsToVisit) 3476*e8d8bef9SDimitry Andric return false; 3477*e8d8bef9SDimitry Andric 3478*e8d8bef9SDimitry Andric // We have a collection of non-OR instructions. Figure out how wide each of 3479*e8d8bef9SDimitry Andric // the small loads should be based off of the number of potential loads we 3480*e8d8bef9SDimitry Andric // found. 3481*e8d8bef9SDimitry Andric const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size(); 3482*e8d8bef9SDimitry Andric if (NarrowMemSizeInBits % 8 != 0) 3483*e8d8bef9SDimitry Andric return false; 3484*e8d8bef9SDimitry Andric 3485*e8d8bef9SDimitry Andric // Check if each register feeding into each OR is a load from the same 3486*e8d8bef9SDimitry Andric // base pointer + some arithmetic. 3487*e8d8bef9SDimitry Andric // 3488*e8d8bef9SDimitry Andric // e.g. a[0], a[1] << 8, a[2] << 16, etc. 3489*e8d8bef9SDimitry Andric // 3490*e8d8bef9SDimitry Andric // Also verify that each of these ends up putting a[i] into the same memory 3491*e8d8bef9SDimitry Andric // offset as a load into a wide type would. 3492*e8d8bef9SDimitry Andric SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx; 3493*e8d8bef9SDimitry Andric MachineInstr *LowestIdxLoad; 3494*e8d8bef9SDimitry Andric int64_t LowestIdx; 3495*e8d8bef9SDimitry Andric auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine( 3496*e8d8bef9SDimitry Andric MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits); 3497*e8d8bef9SDimitry Andric if (!MaybeLoadInfo) 3498*e8d8bef9SDimitry Andric return false; 3499*e8d8bef9SDimitry Andric std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo; 3500*e8d8bef9SDimitry Andric 3501*e8d8bef9SDimitry Andric // We have a bunch of loads being OR'd together. Using the addresses + offsets 3502*e8d8bef9SDimitry Andric // we found before, check if this corresponds to a big or little endian byte 3503*e8d8bef9SDimitry Andric // pattern. If it does, then we can represent it using a load + possibly a 3504*e8d8bef9SDimitry Andric // BSWAP. 3505*e8d8bef9SDimitry Andric bool IsBigEndianTarget = MF.getDataLayout().isBigEndian(); 3506*e8d8bef9SDimitry Andric Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx); 3507*e8d8bef9SDimitry Andric if (!IsBigEndian.hasValue()) 3508*e8d8bef9SDimitry Andric return false; 3509*e8d8bef9SDimitry Andric bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian; 3510*e8d8bef9SDimitry Andric if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}})) 3511*e8d8bef9SDimitry Andric return false; 3512*e8d8bef9SDimitry Andric 3513*e8d8bef9SDimitry Andric // Make sure that the load from the lowest index produces offset 0 in the 3514*e8d8bef9SDimitry Andric // final value. 3515*e8d8bef9SDimitry Andric // 3516*e8d8bef9SDimitry Andric // This ensures that we won't combine something like this: 3517*e8d8bef9SDimitry Andric // 3518*e8d8bef9SDimitry Andric // load x[i] -> byte 2 3519*e8d8bef9SDimitry Andric // load x[i+1] -> byte 0 ---> wide_load x[i] 3520*e8d8bef9SDimitry Andric // load x[i+2] -> byte 1 3521*e8d8bef9SDimitry Andric const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits; 3522*e8d8bef9SDimitry Andric const unsigned ZeroByteOffset = 3523*e8d8bef9SDimitry Andric *IsBigEndian 3524*e8d8bef9SDimitry Andric ? bigEndianByteAt(NumLoadsInTy, 0) 3525*e8d8bef9SDimitry Andric : littleEndianByteAt(NumLoadsInTy, 0); 3526*e8d8bef9SDimitry Andric auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset); 3527*e8d8bef9SDimitry Andric if (ZeroOffsetIdx == MemOffset2Idx.end() || 3528*e8d8bef9SDimitry Andric ZeroOffsetIdx->second != LowestIdx) 3529*e8d8bef9SDimitry Andric return false; 3530*e8d8bef9SDimitry Andric 3531*e8d8bef9SDimitry Andric // We wil reuse the pointer from the load which ends up at byte offset 0. It 3532*e8d8bef9SDimitry Andric // may not use index 0. 3533*e8d8bef9SDimitry Andric Register Ptr = LowestIdxLoad->getOperand(1).getReg(); 3534*e8d8bef9SDimitry Andric const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin(); 3535*e8d8bef9SDimitry Andric LegalityQuery::MemDesc MMDesc; 3536*e8d8bef9SDimitry Andric MMDesc.SizeInBits = WideMemSizeInBits; 3537*e8d8bef9SDimitry Andric MMDesc.AlignInBits = MMO.getAlign().value() * 8; 3538*e8d8bef9SDimitry Andric MMDesc.Ordering = MMO.getOrdering(); 3539*e8d8bef9SDimitry Andric if (!isLegalOrBeforeLegalizer( 3540*e8d8bef9SDimitry Andric {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}})) 3541*e8d8bef9SDimitry Andric return false; 3542*e8d8bef9SDimitry Andric auto PtrInfo = MMO.getPointerInfo(); 3543*e8d8bef9SDimitry Andric auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8); 3544*e8d8bef9SDimitry Andric 3545*e8d8bef9SDimitry Andric // Load must be allowed and fast on the target. 3546*e8d8bef9SDimitry Andric LLVMContext &C = MF.getFunction().getContext(); 3547*e8d8bef9SDimitry Andric auto &DL = MF.getDataLayout(); 3548*e8d8bef9SDimitry Andric bool Fast = false; 3549*e8d8bef9SDimitry Andric if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) || 3550*e8d8bef9SDimitry Andric !Fast) 3551*e8d8bef9SDimitry Andric return false; 3552*e8d8bef9SDimitry Andric 3553*e8d8bef9SDimitry Andric MatchInfo = [=](MachineIRBuilder &MIB) { 3554*e8d8bef9SDimitry Andric Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst; 3555*e8d8bef9SDimitry Andric MIB.buildLoad(LoadDst, Ptr, *NewMMO); 3556*e8d8bef9SDimitry Andric if (NeedsBSwap) 3557*e8d8bef9SDimitry Andric MIB.buildBSwap(Dst, LoadDst); 3558*e8d8bef9SDimitry Andric }; 3559*e8d8bef9SDimitry Andric return true; 3560*e8d8bef9SDimitry Andric } 3561*e8d8bef9SDimitry Andric 3562*e8d8bef9SDimitry Andric bool CombinerHelper::applyLoadOrCombine( 3563*e8d8bef9SDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 3564*e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3565*e8d8bef9SDimitry Andric MatchInfo(Builder); 3566*e8d8bef9SDimitry Andric MI.eraseFromParent(); 3567*e8d8bef9SDimitry Andric return true; 3568*e8d8bef9SDimitry Andric } 3569*e8d8bef9SDimitry Andric 35700b57cec5SDimitry Andric bool CombinerHelper::tryCombine(MachineInstr &MI) { 35710b57cec5SDimitry Andric if (tryCombineCopy(MI)) 35720b57cec5SDimitry Andric return true; 35738bcb0991SDimitry Andric if (tryCombineExtendingLoads(MI)) 35748bcb0991SDimitry Andric return true; 35758bcb0991SDimitry Andric if (tryCombineIndexedLoadStore(MI)) 35768bcb0991SDimitry Andric return true; 35778bcb0991SDimitry Andric return false; 35780b57cec5SDimitry Andric } 3579