10b57cec5SDimitry Andric //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 90b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h" 100b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 118bcb0991SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 125ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 135ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 140b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 150b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/Utils.h" 168bcb0991SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 178bcb0991SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 180b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 19e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h" 200b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 210b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 228bcb0991SDimitry Andric #include "llvm/CodeGen/TargetLowering.h" 235ffd83dbSDimitry Andric #include "llvm/Support/MathExtras.h" 248bcb0991SDimitry Andric #include "llvm/Target/TargetMachine.h" 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric #define DEBUG_TYPE "gi-combiner" 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric using namespace llvm; 295ffd83dbSDimitry Andric using namespace MIPatternMatch; 300b57cec5SDimitry Andric 318bcb0991SDimitry Andric // Option to allow testing of the combiner while no targets know about indexed 328bcb0991SDimitry Andric // addressing. 338bcb0991SDimitry Andric static cl::opt<bool> 348bcb0991SDimitry Andric ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), 358bcb0991SDimitry Andric cl::desc("Force all indexed operations to be " 368bcb0991SDimitry Andric "legal for the GlobalISel combiner")); 378bcb0991SDimitry Andric 380b57cec5SDimitry Andric CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, 398bcb0991SDimitry Andric MachineIRBuilder &B, GISelKnownBits *KB, 405ffd83dbSDimitry Andric MachineDominatorTree *MDT, 415ffd83dbSDimitry Andric const LegalizerInfo *LI) 428bcb0991SDimitry Andric : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), 435ffd83dbSDimitry Andric KB(KB), MDT(MDT), LI(LI) { 448bcb0991SDimitry Andric (void)this->KB; 458bcb0991SDimitry Andric } 460b57cec5SDimitry Andric 47e8d8bef9SDimitry Andric const TargetLowering &CombinerHelper::getTargetLowering() const { 48e8d8bef9SDimitry Andric return *Builder.getMF().getSubtarget().getTargetLowering(); 49e8d8bef9SDimitry Andric } 50e8d8bef9SDimitry Andric 51e8d8bef9SDimitry Andric /// \returns The little endian in-memory byte position of byte \p I in a 52e8d8bef9SDimitry Andric /// \p ByteWidth bytes wide type. 53e8d8bef9SDimitry Andric /// 54e8d8bef9SDimitry Andric /// E.g. Given a 4-byte type x, x[0] -> byte 0 55e8d8bef9SDimitry Andric static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) { 56e8d8bef9SDimitry Andric assert(I < ByteWidth && "I must be in [0, ByteWidth)"); 57e8d8bef9SDimitry Andric return I; 58e8d8bef9SDimitry Andric } 59e8d8bef9SDimitry Andric 60e8d8bef9SDimitry Andric /// \returns The big endian in-memory byte position of byte \p I in a 61e8d8bef9SDimitry Andric /// \p ByteWidth bytes wide type. 62e8d8bef9SDimitry Andric /// 63e8d8bef9SDimitry Andric /// E.g. Given a 4-byte type x, x[0] -> byte 3 64e8d8bef9SDimitry Andric static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) { 65e8d8bef9SDimitry Andric assert(I < ByteWidth && "I must be in [0, ByteWidth)"); 66e8d8bef9SDimitry Andric return ByteWidth - I - 1; 67e8d8bef9SDimitry Andric } 68e8d8bef9SDimitry Andric 69e8d8bef9SDimitry Andric /// Given a map from byte offsets in memory to indices in a load/store, 70e8d8bef9SDimitry Andric /// determine if that map corresponds to a little or big endian byte pattern. 71e8d8bef9SDimitry Andric /// 72e8d8bef9SDimitry Andric /// \param MemOffset2Idx maps memory offsets to address offsets. 73e8d8bef9SDimitry Andric /// \param LowestIdx is the lowest index in \p MemOffset2Idx. 74e8d8bef9SDimitry Andric /// 75e8d8bef9SDimitry Andric /// \returns true if the map corresponds to a big endian byte pattern, false 76e8d8bef9SDimitry Andric /// if it corresponds to a little endian byte pattern, and None otherwise. 77e8d8bef9SDimitry Andric /// 78e8d8bef9SDimitry Andric /// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns 79e8d8bef9SDimitry Andric /// are as follows: 80e8d8bef9SDimitry Andric /// 81e8d8bef9SDimitry Andric /// AddrOffset Little endian Big endian 82e8d8bef9SDimitry Andric /// 0 0 3 83e8d8bef9SDimitry Andric /// 1 1 2 84e8d8bef9SDimitry Andric /// 2 2 1 85e8d8bef9SDimitry Andric /// 3 3 0 86e8d8bef9SDimitry Andric static Optional<bool> 87e8d8bef9SDimitry Andric isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, 88e8d8bef9SDimitry Andric int64_t LowestIdx) { 89e8d8bef9SDimitry Andric // Need at least two byte positions to decide on endianness. 90e8d8bef9SDimitry Andric unsigned Width = MemOffset2Idx.size(); 91e8d8bef9SDimitry Andric if (Width < 2) 92e8d8bef9SDimitry Andric return None; 93e8d8bef9SDimitry Andric bool BigEndian = true, LittleEndian = true; 94e8d8bef9SDimitry Andric for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) { 95e8d8bef9SDimitry Andric auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset); 96e8d8bef9SDimitry Andric if (MemOffsetAndIdx == MemOffset2Idx.end()) 97e8d8bef9SDimitry Andric return None; 98e8d8bef9SDimitry Andric const int64_t Idx = MemOffsetAndIdx->second - LowestIdx; 99e8d8bef9SDimitry Andric assert(Idx >= 0 && "Expected non-negative byte offset?"); 100e8d8bef9SDimitry Andric LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset); 101e8d8bef9SDimitry Andric BigEndian &= Idx == bigEndianByteAt(Width, MemOffset); 102e8d8bef9SDimitry Andric if (!BigEndian && !LittleEndian) 103e8d8bef9SDimitry Andric return None; 104e8d8bef9SDimitry Andric } 105e8d8bef9SDimitry Andric 106e8d8bef9SDimitry Andric assert((BigEndian != LittleEndian) && 107e8d8bef9SDimitry Andric "Pattern cannot be both big and little endian!"); 108e8d8bef9SDimitry Andric return BigEndian; 109e8d8bef9SDimitry Andric } 110e8d8bef9SDimitry Andric 111e8d8bef9SDimitry Andric bool CombinerHelper::isLegalOrBeforeLegalizer( 112e8d8bef9SDimitry Andric const LegalityQuery &Query) const { 113e8d8bef9SDimitry Andric return !LI || LI->getAction(Query).Action == LegalizeActions::Legal; 114e8d8bef9SDimitry Andric } 115e8d8bef9SDimitry Andric 1160b57cec5SDimitry Andric void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, 1170b57cec5SDimitry Andric Register ToReg) const { 1180b57cec5SDimitry Andric Observer.changingAllUsesOfReg(MRI, FromReg); 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric if (MRI.constrainRegAttrs(ToReg, FromReg)) 1210b57cec5SDimitry Andric MRI.replaceRegWith(FromReg, ToReg); 1220b57cec5SDimitry Andric else 1230b57cec5SDimitry Andric Builder.buildCopy(ToReg, FromReg); 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric Observer.finishedChangingAllUsesOfReg(); 1260b57cec5SDimitry Andric } 1270b57cec5SDimitry Andric 1280b57cec5SDimitry Andric void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI, 1290b57cec5SDimitry Andric MachineOperand &FromRegOp, 1300b57cec5SDimitry Andric Register ToReg) const { 1310b57cec5SDimitry Andric assert(FromRegOp.getParent() && "Expected an operand in an MI"); 1320b57cec5SDimitry Andric Observer.changingInstr(*FromRegOp.getParent()); 1330b57cec5SDimitry Andric 1340b57cec5SDimitry Andric FromRegOp.setReg(ToReg); 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric Observer.changedInstr(*FromRegOp.getParent()); 1370b57cec5SDimitry Andric } 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { 1400b57cec5SDimitry Andric if (matchCombineCopy(MI)) { 1410b57cec5SDimitry Andric applyCombineCopy(MI); 1420b57cec5SDimitry Andric return true; 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric return false; 1450b57cec5SDimitry Andric } 1460b57cec5SDimitry Andric bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { 1470b57cec5SDimitry Andric if (MI.getOpcode() != TargetOpcode::COPY) 1480b57cec5SDimitry Andric return false; 1498bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1508bcb0991SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1515ffd83dbSDimitry Andric return canReplaceReg(DstReg, SrcReg, MRI); 1520b57cec5SDimitry Andric } 1530b57cec5SDimitry Andric void CombinerHelper::applyCombineCopy(MachineInstr &MI) { 1548bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1558bcb0991SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1560b57cec5SDimitry Andric MI.eraseFromParent(); 1570b57cec5SDimitry Andric replaceRegWith(MRI, DstReg, SrcReg); 1580b57cec5SDimitry Andric } 1590b57cec5SDimitry Andric 1608bcb0991SDimitry Andric bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) { 1618bcb0991SDimitry Andric bool IsUndef = false; 1628bcb0991SDimitry Andric SmallVector<Register, 4> Ops; 1638bcb0991SDimitry Andric if (matchCombineConcatVectors(MI, IsUndef, Ops)) { 1648bcb0991SDimitry Andric applyCombineConcatVectors(MI, IsUndef, Ops); 1658bcb0991SDimitry Andric return true; 1668bcb0991SDimitry Andric } 1678bcb0991SDimitry Andric return false; 1688bcb0991SDimitry Andric } 1698bcb0991SDimitry Andric 1708bcb0991SDimitry Andric bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, 1718bcb0991SDimitry Andric SmallVectorImpl<Register> &Ops) { 1728bcb0991SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && 1738bcb0991SDimitry Andric "Invalid instruction"); 1748bcb0991SDimitry Andric IsUndef = true; 1758bcb0991SDimitry Andric MachineInstr *Undef = nullptr; 1768bcb0991SDimitry Andric 1778bcb0991SDimitry Andric // Walk over all the operands of concat vectors and check if they are 1788bcb0991SDimitry Andric // build_vector themselves or undef. 1798bcb0991SDimitry Andric // Then collect their operands in Ops. 180480093f4SDimitry Andric for (const MachineOperand &MO : MI.uses()) { 1818bcb0991SDimitry Andric Register Reg = MO.getReg(); 1828bcb0991SDimitry Andric MachineInstr *Def = MRI.getVRegDef(Reg); 1838bcb0991SDimitry Andric assert(Def && "Operand not defined"); 1848bcb0991SDimitry Andric switch (Def->getOpcode()) { 1858bcb0991SDimitry Andric case TargetOpcode::G_BUILD_VECTOR: 1868bcb0991SDimitry Andric IsUndef = false; 1878bcb0991SDimitry Andric // Remember the operands of the build_vector to fold 1888bcb0991SDimitry Andric // them into the yet-to-build flattened concat vectors. 189480093f4SDimitry Andric for (const MachineOperand &BuildVecMO : Def->uses()) 1908bcb0991SDimitry Andric Ops.push_back(BuildVecMO.getReg()); 1918bcb0991SDimitry Andric break; 1928bcb0991SDimitry Andric case TargetOpcode::G_IMPLICIT_DEF: { 1938bcb0991SDimitry Andric LLT OpType = MRI.getType(Reg); 1948bcb0991SDimitry Andric // Keep one undef value for all the undef operands. 1958bcb0991SDimitry Andric if (!Undef) { 1968bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 1978bcb0991SDimitry Andric Undef = Builder.buildUndef(OpType.getScalarType()); 1988bcb0991SDimitry Andric } 1998bcb0991SDimitry Andric assert(MRI.getType(Undef->getOperand(0).getReg()) == 2008bcb0991SDimitry Andric OpType.getScalarType() && 2018bcb0991SDimitry Andric "All undefs should have the same type"); 2028bcb0991SDimitry Andric // Break the undef vector in as many scalar elements as needed 2038bcb0991SDimitry Andric // for the flattening. 2048bcb0991SDimitry Andric for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements(); 2058bcb0991SDimitry Andric EltIdx != EltEnd; ++EltIdx) 2068bcb0991SDimitry Andric Ops.push_back(Undef->getOperand(0).getReg()); 2078bcb0991SDimitry Andric break; 2088bcb0991SDimitry Andric } 2098bcb0991SDimitry Andric default: 2108bcb0991SDimitry Andric return false; 2118bcb0991SDimitry Andric } 2128bcb0991SDimitry Andric } 2138bcb0991SDimitry Andric return true; 2148bcb0991SDimitry Andric } 2158bcb0991SDimitry Andric void CombinerHelper::applyCombineConcatVectors( 2168bcb0991SDimitry Andric MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) { 2178bcb0991SDimitry Andric // We determined that the concat_vectors can be flatten. 2188bcb0991SDimitry Andric // Generate the flattened build_vector. 2198bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2208bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 2218bcb0991SDimitry Andric Register NewDstReg = MRI.cloneVirtualRegister(DstReg); 2228bcb0991SDimitry Andric 2238bcb0991SDimitry Andric // Note: IsUndef is sort of redundant. We could have determine it by 2248bcb0991SDimitry Andric // checking that at all Ops are undef. Alternatively, we could have 2258bcb0991SDimitry Andric // generate a build_vector of undefs and rely on another combine to 2268bcb0991SDimitry Andric // clean that up. For now, given we already gather this information 2278bcb0991SDimitry Andric // in tryCombineConcatVectors, just save compile time and issue the 2288bcb0991SDimitry Andric // right thing. 2298bcb0991SDimitry Andric if (IsUndef) 2308bcb0991SDimitry Andric Builder.buildUndef(NewDstReg); 2318bcb0991SDimitry Andric else 2328bcb0991SDimitry Andric Builder.buildBuildVector(NewDstReg, Ops); 2338bcb0991SDimitry Andric MI.eraseFromParent(); 2348bcb0991SDimitry Andric replaceRegWith(MRI, DstReg, NewDstReg); 2358bcb0991SDimitry Andric } 2368bcb0991SDimitry Andric 2378bcb0991SDimitry Andric bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { 2388bcb0991SDimitry Andric SmallVector<Register, 4> Ops; 2398bcb0991SDimitry Andric if (matchCombineShuffleVector(MI, Ops)) { 2408bcb0991SDimitry Andric applyCombineShuffleVector(MI, Ops); 2418bcb0991SDimitry Andric return true; 2428bcb0991SDimitry Andric } 2438bcb0991SDimitry Andric return false; 2448bcb0991SDimitry Andric } 2458bcb0991SDimitry Andric 2468bcb0991SDimitry Andric bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, 2478bcb0991SDimitry Andric SmallVectorImpl<Register> &Ops) { 2488bcb0991SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && 2498bcb0991SDimitry Andric "Invalid instruction kind"); 2508bcb0991SDimitry Andric LLT DstType = MRI.getType(MI.getOperand(0).getReg()); 2518bcb0991SDimitry Andric Register Src1 = MI.getOperand(1).getReg(); 2528bcb0991SDimitry Andric LLT SrcType = MRI.getType(Src1); 253480093f4SDimitry Andric // As bizarre as it may look, shuffle vector can actually produce 254480093f4SDimitry Andric // scalar! This is because at the IR level a <1 x ty> shuffle 255480093f4SDimitry Andric // vector is perfectly valid. 256480093f4SDimitry Andric unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1; 257480093f4SDimitry Andric unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1; 2588bcb0991SDimitry Andric 2598bcb0991SDimitry Andric // If the resulting vector is smaller than the size of the source 2608bcb0991SDimitry Andric // vectors being concatenated, we won't be able to replace the 2618bcb0991SDimitry Andric // shuffle vector into a concat_vectors. 2628bcb0991SDimitry Andric // 2638bcb0991SDimitry Andric // Note: We may still be able to produce a concat_vectors fed by 2648bcb0991SDimitry Andric // extract_vector_elt and so on. It is less clear that would 2658bcb0991SDimitry Andric // be better though, so don't bother for now. 266480093f4SDimitry Andric // 267480093f4SDimitry Andric // If the destination is a scalar, the size of the sources doesn't 268480093f4SDimitry Andric // matter. we will lower the shuffle to a plain copy. This will 269480093f4SDimitry Andric // work only if the source and destination have the same size. But 270480093f4SDimitry Andric // that's covered by the next condition. 271480093f4SDimitry Andric // 272480093f4SDimitry Andric // TODO: If the size between the source and destination don't match 273480093f4SDimitry Andric // we could still emit an extract vector element in that case. 274480093f4SDimitry Andric if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1) 2758bcb0991SDimitry Andric return false; 2768bcb0991SDimitry Andric 2778bcb0991SDimitry Andric // Check that the shuffle mask can be broken evenly between the 2788bcb0991SDimitry Andric // different sources. 2798bcb0991SDimitry Andric if (DstNumElts % SrcNumElts != 0) 2808bcb0991SDimitry Andric return false; 2818bcb0991SDimitry Andric 2828bcb0991SDimitry Andric // Mask length is a multiple of the source vector length. 2838bcb0991SDimitry Andric // Check if the shuffle is some kind of concatenation of the input 2848bcb0991SDimitry Andric // vectors. 2858bcb0991SDimitry Andric unsigned NumConcat = DstNumElts / SrcNumElts; 2868bcb0991SDimitry Andric SmallVector<int, 8> ConcatSrcs(NumConcat, -1); 287480093f4SDimitry Andric ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 2888bcb0991SDimitry Andric for (unsigned i = 0; i != DstNumElts; ++i) { 2898bcb0991SDimitry Andric int Idx = Mask[i]; 2908bcb0991SDimitry Andric // Undef value. 2918bcb0991SDimitry Andric if (Idx < 0) 2928bcb0991SDimitry Andric continue; 2938bcb0991SDimitry Andric // Ensure the indices in each SrcType sized piece are sequential and that 2948bcb0991SDimitry Andric // the same source is used for the whole piece. 2958bcb0991SDimitry Andric if ((Idx % SrcNumElts != (i % SrcNumElts)) || 2968bcb0991SDimitry Andric (ConcatSrcs[i / SrcNumElts] >= 0 && 2978bcb0991SDimitry Andric ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) 2988bcb0991SDimitry Andric return false; 2998bcb0991SDimitry Andric // Remember which source this index came from. 3008bcb0991SDimitry Andric ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; 3018bcb0991SDimitry Andric } 3028bcb0991SDimitry Andric 3038bcb0991SDimitry Andric // The shuffle is concatenating multiple vectors together. 3048bcb0991SDimitry Andric // Collect the different operands for that. 3058bcb0991SDimitry Andric Register UndefReg; 3068bcb0991SDimitry Andric Register Src2 = MI.getOperand(2).getReg(); 3078bcb0991SDimitry Andric for (auto Src : ConcatSrcs) { 3088bcb0991SDimitry Andric if (Src < 0) { 3098bcb0991SDimitry Andric if (!UndefReg) { 3108bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 3118bcb0991SDimitry Andric UndefReg = Builder.buildUndef(SrcType).getReg(0); 3128bcb0991SDimitry Andric } 3138bcb0991SDimitry Andric Ops.push_back(UndefReg); 3148bcb0991SDimitry Andric } else if (Src == 0) 3158bcb0991SDimitry Andric Ops.push_back(Src1); 3168bcb0991SDimitry Andric else 3178bcb0991SDimitry Andric Ops.push_back(Src2); 3188bcb0991SDimitry Andric } 3198bcb0991SDimitry Andric return true; 3208bcb0991SDimitry Andric } 3218bcb0991SDimitry Andric 3228bcb0991SDimitry Andric void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI, 3238bcb0991SDimitry Andric const ArrayRef<Register> Ops) { 3248bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3258bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 3268bcb0991SDimitry Andric Register NewDstReg = MRI.cloneVirtualRegister(DstReg); 3278bcb0991SDimitry Andric 328480093f4SDimitry Andric if (Ops.size() == 1) 329480093f4SDimitry Andric Builder.buildCopy(NewDstReg, Ops[0]); 330480093f4SDimitry Andric else 331480093f4SDimitry Andric Builder.buildMerge(NewDstReg, Ops); 3328bcb0991SDimitry Andric 3338bcb0991SDimitry Andric MI.eraseFromParent(); 3348bcb0991SDimitry Andric replaceRegWith(MRI, DstReg, NewDstReg); 3358bcb0991SDimitry Andric } 3368bcb0991SDimitry Andric 3370b57cec5SDimitry Andric namespace { 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric /// Select a preference between two uses. CurrentUse is the current preference 3400b57cec5SDimitry Andric /// while *ForCandidate is attributes of the candidate under consideration. 3410b57cec5SDimitry Andric PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse, 3425ffd83dbSDimitry Andric const LLT TyForCandidate, 3430b57cec5SDimitry Andric unsigned OpcodeForCandidate, 3440b57cec5SDimitry Andric MachineInstr *MIForCandidate) { 3450b57cec5SDimitry Andric if (!CurrentUse.Ty.isValid()) { 3460b57cec5SDimitry Andric if (CurrentUse.ExtendOpcode == OpcodeForCandidate || 3470b57cec5SDimitry Andric CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT) 3480b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 3490b57cec5SDimitry Andric return CurrentUse; 3500b57cec5SDimitry Andric } 3510b57cec5SDimitry Andric 3520b57cec5SDimitry Andric // We permit the extend to hoist through basic blocks but this is only 3530b57cec5SDimitry Andric // sensible if the target has extending loads. If you end up lowering back 3540b57cec5SDimitry Andric // into a load and extend during the legalizer then the end result is 3550b57cec5SDimitry Andric // hoisting the extend up to the load. 3560b57cec5SDimitry Andric 3570b57cec5SDimitry Andric // Prefer defined extensions to undefined extensions as these are more 3580b57cec5SDimitry Andric // likely to reduce the number of instructions. 3590b57cec5SDimitry Andric if (OpcodeForCandidate == TargetOpcode::G_ANYEXT && 3600b57cec5SDimitry Andric CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT) 3610b57cec5SDimitry Andric return CurrentUse; 3620b57cec5SDimitry Andric else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT && 3630b57cec5SDimitry Andric OpcodeForCandidate != TargetOpcode::G_ANYEXT) 3640b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 3650b57cec5SDimitry Andric 3660b57cec5SDimitry Andric // Prefer sign extensions to zero extensions as sign-extensions tend to be 3670b57cec5SDimitry Andric // more expensive. 3680b57cec5SDimitry Andric if (CurrentUse.Ty == TyForCandidate) { 3690b57cec5SDimitry Andric if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT && 3700b57cec5SDimitry Andric OpcodeForCandidate == TargetOpcode::G_ZEXT) 3710b57cec5SDimitry Andric return CurrentUse; 3720b57cec5SDimitry Andric else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT && 3730b57cec5SDimitry Andric OpcodeForCandidate == TargetOpcode::G_SEXT) 3740b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 3750b57cec5SDimitry Andric } 3760b57cec5SDimitry Andric 3770b57cec5SDimitry Andric // This is potentially target specific. We've chosen the largest type 3780b57cec5SDimitry Andric // because G_TRUNC is usually free. One potential catch with this is that 3790b57cec5SDimitry Andric // some targets have a reduced number of larger registers than smaller 3800b57cec5SDimitry Andric // registers and this choice potentially increases the live-range for the 3810b57cec5SDimitry Andric // larger value. 3820b57cec5SDimitry Andric if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) { 3830b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 3840b57cec5SDimitry Andric } 3850b57cec5SDimitry Andric return CurrentUse; 3860b57cec5SDimitry Andric } 3870b57cec5SDimitry Andric 3880b57cec5SDimitry Andric /// Find a suitable place to insert some instructions and insert them. This 3890b57cec5SDimitry Andric /// function accounts for special cases like inserting before a PHI node. 3900b57cec5SDimitry Andric /// The current strategy for inserting before PHI's is to duplicate the 3910b57cec5SDimitry Andric /// instructions for each predecessor. However, while that's ok for G_TRUNC 3920b57cec5SDimitry Andric /// on most targets since it generally requires no code, other targets/cases may 3930b57cec5SDimitry Andric /// want to try harder to find a dominating block. 3940b57cec5SDimitry Andric static void InsertInsnsWithoutSideEffectsBeforeUse( 3950b57cec5SDimitry Andric MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO, 3960b57cec5SDimitry Andric std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator, 3970b57cec5SDimitry Andric MachineOperand &UseMO)> 3980b57cec5SDimitry Andric Inserter) { 3990b57cec5SDimitry Andric MachineInstr &UseMI = *UseMO.getParent(); 4000b57cec5SDimitry Andric 4010b57cec5SDimitry Andric MachineBasicBlock *InsertBB = UseMI.getParent(); 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric // If the use is a PHI then we want the predecessor block instead. 4040b57cec5SDimitry Andric if (UseMI.isPHI()) { 4050b57cec5SDimitry Andric MachineOperand *PredBB = std::next(&UseMO); 4060b57cec5SDimitry Andric InsertBB = PredBB->getMBB(); 4070b57cec5SDimitry Andric } 4080b57cec5SDimitry Andric 4090b57cec5SDimitry Andric // If the block is the same block as the def then we want to insert just after 4100b57cec5SDimitry Andric // the def instead of at the start of the block. 4110b57cec5SDimitry Andric if (InsertBB == DefMI.getParent()) { 4120b57cec5SDimitry Andric MachineBasicBlock::iterator InsertPt = &DefMI; 4130b57cec5SDimitry Andric Inserter(InsertBB, std::next(InsertPt), UseMO); 4140b57cec5SDimitry Andric return; 4150b57cec5SDimitry Andric } 4160b57cec5SDimitry Andric 4170b57cec5SDimitry Andric // Otherwise we want the start of the BB 4180b57cec5SDimitry Andric Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO); 4190b57cec5SDimitry Andric } 4200b57cec5SDimitry Andric } // end anonymous namespace 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { 4230b57cec5SDimitry Andric PreferredTuple Preferred; 4240b57cec5SDimitry Andric if (matchCombineExtendingLoads(MI, Preferred)) { 4250b57cec5SDimitry Andric applyCombineExtendingLoads(MI, Preferred); 4260b57cec5SDimitry Andric return true; 4270b57cec5SDimitry Andric } 4280b57cec5SDimitry Andric return false; 4290b57cec5SDimitry Andric } 4300b57cec5SDimitry Andric 4310b57cec5SDimitry Andric bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, 4320b57cec5SDimitry Andric PreferredTuple &Preferred) { 4330b57cec5SDimitry Andric // We match the loads and follow the uses to the extend instead of matching 4340b57cec5SDimitry Andric // the extends and following the def to the load. This is because the load 4350b57cec5SDimitry Andric // must remain in the same position for correctness (unless we also add code 4360b57cec5SDimitry Andric // to find a safe place to sink it) whereas the extend is freely movable. 4370b57cec5SDimitry Andric // It also prevents us from duplicating the load for the volatile case or just 4380b57cec5SDimitry Andric // for performance. 4390b57cec5SDimitry Andric 4400b57cec5SDimitry Andric if (MI.getOpcode() != TargetOpcode::G_LOAD && 4410b57cec5SDimitry Andric MI.getOpcode() != TargetOpcode::G_SEXTLOAD && 4420b57cec5SDimitry Andric MI.getOpcode() != TargetOpcode::G_ZEXTLOAD) 4430b57cec5SDimitry Andric return false; 4440b57cec5SDimitry Andric 4450b57cec5SDimitry Andric auto &LoadValue = MI.getOperand(0); 4460b57cec5SDimitry Andric assert(LoadValue.isReg() && "Result wasn't a register?"); 4470b57cec5SDimitry Andric 4480b57cec5SDimitry Andric LLT LoadValueTy = MRI.getType(LoadValue.getReg()); 4490b57cec5SDimitry Andric if (!LoadValueTy.isScalar()) 4500b57cec5SDimitry Andric return false; 4510b57cec5SDimitry Andric 4520b57cec5SDimitry Andric // Most architectures are going to legalize <s8 loads into at least a 1 byte 4530b57cec5SDimitry Andric // load, and the MMOs can only describe memory accesses in multiples of bytes. 4540b57cec5SDimitry Andric // If we try to perform extload combining on those, we can end up with 4550b57cec5SDimitry Andric // %a(s8) = extload %ptr (load 1 byte from %ptr) 4560b57cec5SDimitry Andric // ... which is an illegal extload instruction. 4570b57cec5SDimitry Andric if (LoadValueTy.getSizeInBits() < 8) 4580b57cec5SDimitry Andric return false; 4590b57cec5SDimitry Andric 4600b57cec5SDimitry Andric // For non power-of-2 types, they will very likely be legalized into multiple 4610b57cec5SDimitry Andric // loads. Don't bother trying to match them into extending loads. 4620b57cec5SDimitry Andric if (!isPowerOf2_32(LoadValueTy.getSizeInBits())) 4630b57cec5SDimitry Andric return false; 4640b57cec5SDimitry Andric 4650b57cec5SDimitry Andric // Find the preferred type aside from the any-extends (unless it's the only 4660b57cec5SDimitry Andric // one) and non-extending ops. We'll emit an extending load to that type and 4670b57cec5SDimitry Andric // and emit a variant of (extend (trunc X)) for the others according to the 4680b57cec5SDimitry Andric // relative type sizes. At the same time, pick an extend to use based on the 4690b57cec5SDimitry Andric // extend involved in the chosen type. 4700b57cec5SDimitry Andric unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD 4710b57cec5SDimitry Andric ? TargetOpcode::G_ANYEXT 4720b57cec5SDimitry Andric : MI.getOpcode() == TargetOpcode::G_SEXTLOAD 4730b57cec5SDimitry Andric ? TargetOpcode::G_SEXT 4740b57cec5SDimitry Andric : TargetOpcode::G_ZEXT; 4750b57cec5SDimitry Andric Preferred = {LLT(), PreferredOpcode, nullptr}; 4765ffd83dbSDimitry Andric for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) { 4770b57cec5SDimitry Andric if (UseMI.getOpcode() == TargetOpcode::G_SEXT || 4780b57cec5SDimitry Andric UseMI.getOpcode() == TargetOpcode::G_ZEXT || 4795ffd83dbSDimitry Andric (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) { 4805ffd83dbSDimitry Andric // Check for legality. 4815ffd83dbSDimitry Andric if (LI) { 4825ffd83dbSDimitry Andric LegalityQuery::MemDesc MMDesc; 4835ffd83dbSDimitry Andric const auto &MMO = **MI.memoperands_begin(); 4845ffd83dbSDimitry Andric MMDesc.SizeInBits = MMO.getSizeInBits(); 4855ffd83dbSDimitry Andric MMDesc.AlignInBits = MMO.getAlign().value() * 8; 4865ffd83dbSDimitry Andric MMDesc.Ordering = MMO.getOrdering(); 4875ffd83dbSDimitry Andric LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg()); 4885ffd83dbSDimitry Andric LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 4895ffd83dbSDimitry Andric if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action != 4905ffd83dbSDimitry Andric LegalizeActions::Legal) 4915ffd83dbSDimitry Andric continue; 4925ffd83dbSDimitry Andric } 4930b57cec5SDimitry Andric Preferred = ChoosePreferredUse(Preferred, 4940b57cec5SDimitry Andric MRI.getType(UseMI.getOperand(0).getReg()), 4950b57cec5SDimitry Andric UseMI.getOpcode(), &UseMI); 4960b57cec5SDimitry Andric } 4970b57cec5SDimitry Andric } 4980b57cec5SDimitry Andric 4990b57cec5SDimitry Andric // There were no extends 5000b57cec5SDimitry Andric if (!Preferred.MI) 5010b57cec5SDimitry Andric return false; 5020b57cec5SDimitry Andric // It should be impossible to chose an extend without selecting a different 5030b57cec5SDimitry Andric // type since by definition the result of an extend is larger. 5040b57cec5SDimitry Andric assert(Preferred.Ty != LoadValueTy && "Extending to same type?"); 5050b57cec5SDimitry Andric 5060b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI); 5070b57cec5SDimitry Andric return true; 5080b57cec5SDimitry Andric } 5090b57cec5SDimitry Andric 5100b57cec5SDimitry Andric void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, 5110b57cec5SDimitry Andric PreferredTuple &Preferred) { 5120b57cec5SDimitry Andric // Rewrite the load to the chosen extending load. 5130b57cec5SDimitry Andric Register ChosenDstReg = Preferred.MI->getOperand(0).getReg(); 5140b57cec5SDimitry Andric 5150b57cec5SDimitry Andric // Inserter to insert a truncate back to the original type at a given point 5160b57cec5SDimitry Andric // with some basic CSE to limit truncate duplication to one per BB. 5170b57cec5SDimitry Andric DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns; 5180b57cec5SDimitry Andric auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB, 5190b57cec5SDimitry Andric MachineBasicBlock::iterator InsertBefore, 5200b57cec5SDimitry Andric MachineOperand &UseMO) { 5210b57cec5SDimitry Andric MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB); 5220b57cec5SDimitry Andric if (PreviouslyEmitted) { 5230b57cec5SDimitry Andric Observer.changingInstr(*UseMO.getParent()); 5240b57cec5SDimitry Andric UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg()); 5250b57cec5SDimitry Andric Observer.changedInstr(*UseMO.getParent()); 5260b57cec5SDimitry Andric return; 5270b57cec5SDimitry Andric } 5280b57cec5SDimitry Andric 5290b57cec5SDimitry Andric Builder.setInsertPt(*InsertIntoBB, InsertBefore); 5300b57cec5SDimitry Andric Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); 5310b57cec5SDimitry Andric MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg); 5320b57cec5SDimitry Andric EmittedInsns[InsertIntoBB] = NewMI; 5330b57cec5SDimitry Andric replaceRegOpWith(MRI, UseMO, NewDstReg); 5340b57cec5SDimitry Andric }; 5350b57cec5SDimitry Andric 5360b57cec5SDimitry Andric Observer.changingInstr(MI); 5370b57cec5SDimitry Andric MI.setDesc( 5380b57cec5SDimitry Andric Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT 5390b57cec5SDimitry Andric ? TargetOpcode::G_SEXTLOAD 5400b57cec5SDimitry Andric : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT 5410b57cec5SDimitry Andric ? TargetOpcode::G_ZEXTLOAD 5420b57cec5SDimitry Andric : TargetOpcode::G_LOAD)); 5430b57cec5SDimitry Andric 5440b57cec5SDimitry Andric // Rewrite all the uses to fix up the types. 5450b57cec5SDimitry Andric auto &LoadValue = MI.getOperand(0); 5460b57cec5SDimitry Andric SmallVector<MachineOperand *, 4> Uses; 5470b57cec5SDimitry Andric for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) 5480b57cec5SDimitry Andric Uses.push_back(&UseMO); 5490b57cec5SDimitry Andric 5500b57cec5SDimitry Andric for (auto *UseMO : Uses) { 5510b57cec5SDimitry Andric MachineInstr *UseMI = UseMO->getParent(); 5520b57cec5SDimitry Andric 5530b57cec5SDimitry Andric // If the extend is compatible with the preferred extend then we should fix 5540b57cec5SDimitry Andric // up the type and extend so that it uses the preferred use. 5550b57cec5SDimitry Andric if (UseMI->getOpcode() == Preferred.ExtendOpcode || 5560b57cec5SDimitry Andric UseMI->getOpcode() == TargetOpcode::G_ANYEXT) { 5578bcb0991SDimitry Andric Register UseDstReg = UseMI->getOperand(0).getReg(); 5580b57cec5SDimitry Andric MachineOperand &UseSrcMO = UseMI->getOperand(1); 5595ffd83dbSDimitry Andric const LLT UseDstTy = MRI.getType(UseDstReg); 5600b57cec5SDimitry Andric if (UseDstReg != ChosenDstReg) { 5610b57cec5SDimitry Andric if (Preferred.Ty == UseDstTy) { 5620b57cec5SDimitry Andric // If the use has the same type as the preferred use, then merge 5630b57cec5SDimitry Andric // the vregs and erase the extend. For example: 5640b57cec5SDimitry Andric // %1:_(s8) = G_LOAD ... 5650b57cec5SDimitry Andric // %2:_(s32) = G_SEXT %1(s8) 5660b57cec5SDimitry Andric // %3:_(s32) = G_ANYEXT %1(s8) 5670b57cec5SDimitry Andric // ... = ... %3(s32) 5680b57cec5SDimitry Andric // rewrites to: 5690b57cec5SDimitry Andric // %2:_(s32) = G_SEXTLOAD ... 5700b57cec5SDimitry Andric // ... = ... %2(s32) 5710b57cec5SDimitry Andric replaceRegWith(MRI, UseDstReg, ChosenDstReg); 5720b57cec5SDimitry Andric Observer.erasingInstr(*UseMO->getParent()); 5730b57cec5SDimitry Andric UseMO->getParent()->eraseFromParent(); 5740b57cec5SDimitry Andric } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) { 5750b57cec5SDimitry Andric // If the preferred size is smaller, then keep the extend but extend 5760b57cec5SDimitry Andric // from the result of the extending load. For example: 5770b57cec5SDimitry Andric // %1:_(s8) = G_LOAD ... 5780b57cec5SDimitry Andric // %2:_(s32) = G_SEXT %1(s8) 5790b57cec5SDimitry Andric // %3:_(s64) = G_ANYEXT %1(s8) 5800b57cec5SDimitry Andric // ... = ... %3(s64) 5810b57cec5SDimitry Andric /// rewrites to: 5820b57cec5SDimitry Andric // %2:_(s32) = G_SEXTLOAD ... 5830b57cec5SDimitry Andric // %3:_(s64) = G_ANYEXT %2:_(s32) 5840b57cec5SDimitry Andric // ... = ... %3(s64) 5850b57cec5SDimitry Andric replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg); 5860b57cec5SDimitry Andric } else { 5870b57cec5SDimitry Andric // If the preferred size is large, then insert a truncate. For 5880b57cec5SDimitry Andric // example: 5890b57cec5SDimitry Andric // %1:_(s8) = G_LOAD ... 5900b57cec5SDimitry Andric // %2:_(s64) = G_SEXT %1(s8) 5910b57cec5SDimitry Andric // %3:_(s32) = G_ZEXT %1(s8) 5920b57cec5SDimitry Andric // ... = ... %3(s32) 5930b57cec5SDimitry Andric /// rewrites to: 5940b57cec5SDimitry Andric // %2:_(s64) = G_SEXTLOAD ... 5950b57cec5SDimitry Andric // %4:_(s8) = G_TRUNC %2:_(s32) 5960b57cec5SDimitry Andric // %3:_(s64) = G_ZEXT %2:_(s8) 5970b57cec5SDimitry Andric // ... = ... %3(s64) 5980b57cec5SDimitry Andric InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, 5990b57cec5SDimitry Andric InsertTruncAt); 6000b57cec5SDimitry Andric } 6010b57cec5SDimitry Andric continue; 6020b57cec5SDimitry Andric } 6030b57cec5SDimitry Andric // The use is (one of) the uses of the preferred use we chose earlier. 6040b57cec5SDimitry Andric // We're going to update the load to def this value later so just erase 6050b57cec5SDimitry Andric // the old extend. 6060b57cec5SDimitry Andric Observer.erasingInstr(*UseMO->getParent()); 6070b57cec5SDimitry Andric UseMO->getParent()->eraseFromParent(); 6080b57cec5SDimitry Andric continue; 6090b57cec5SDimitry Andric } 6100b57cec5SDimitry Andric 6110b57cec5SDimitry Andric // The use isn't an extend. Truncate back to the type we originally loaded. 6120b57cec5SDimitry Andric // This is free on many targets. 6130b57cec5SDimitry Andric InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt); 6140b57cec5SDimitry Andric } 6150b57cec5SDimitry Andric 6160b57cec5SDimitry Andric MI.getOperand(0).setReg(ChosenDstReg); 6170b57cec5SDimitry Andric Observer.changedInstr(MI); 6180b57cec5SDimitry Andric } 6190b57cec5SDimitry Andric 6205ffd83dbSDimitry Andric bool CombinerHelper::isPredecessor(const MachineInstr &DefMI, 6215ffd83dbSDimitry Andric const MachineInstr &UseMI) { 6225ffd83dbSDimitry Andric assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() && 6235ffd83dbSDimitry Andric "shouldn't consider debug uses"); 6248bcb0991SDimitry Andric assert(DefMI.getParent() == UseMI.getParent()); 6258bcb0991SDimitry Andric if (&DefMI == &UseMI) 6268bcb0991SDimitry Andric return false; 627e8d8bef9SDimitry Andric const MachineBasicBlock &MBB = *DefMI.getParent(); 628e8d8bef9SDimitry Andric auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) { 629e8d8bef9SDimitry Andric return &MI == &DefMI || &MI == &UseMI; 630e8d8bef9SDimitry Andric }); 631e8d8bef9SDimitry Andric if (DefOrUse == MBB.end()) 632e8d8bef9SDimitry Andric llvm_unreachable("Block must contain both DefMI and UseMI!"); 633e8d8bef9SDimitry Andric return &*DefOrUse == &DefMI; 6348bcb0991SDimitry Andric } 6358bcb0991SDimitry Andric 6365ffd83dbSDimitry Andric bool CombinerHelper::dominates(const MachineInstr &DefMI, 6375ffd83dbSDimitry Andric const MachineInstr &UseMI) { 6385ffd83dbSDimitry Andric assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() && 6395ffd83dbSDimitry Andric "shouldn't consider debug uses"); 6408bcb0991SDimitry Andric if (MDT) 6418bcb0991SDimitry Andric return MDT->dominates(&DefMI, &UseMI); 6428bcb0991SDimitry Andric else if (DefMI.getParent() != UseMI.getParent()) 6438bcb0991SDimitry Andric return false; 6448bcb0991SDimitry Andric 6458bcb0991SDimitry Andric return isPredecessor(DefMI, UseMI); 6468bcb0991SDimitry Andric } 6478bcb0991SDimitry Andric 648e8d8bef9SDimitry Andric bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) { 6495ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 6505ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 651e8d8bef9SDimitry Andric Register LoadUser = SrcReg; 652e8d8bef9SDimitry Andric 653e8d8bef9SDimitry Andric if (MRI.getType(SrcReg).isVector()) 654e8d8bef9SDimitry Andric return false; 655e8d8bef9SDimitry Andric 656e8d8bef9SDimitry Andric Register TruncSrc; 657e8d8bef9SDimitry Andric if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) 658e8d8bef9SDimitry Andric LoadUser = TruncSrc; 659e8d8bef9SDimitry Andric 660e8d8bef9SDimitry Andric uint64_t SizeInBits = MI.getOperand(2).getImm(); 661e8d8bef9SDimitry Andric // If the source is a G_SEXTLOAD from the same bit width, then we don't 662e8d8bef9SDimitry Andric // need any extend at all, just a truncate. 663e8d8bef9SDimitry Andric if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) { 664e8d8bef9SDimitry Andric const auto &MMO = **LoadMI->memoperands_begin(); 665e8d8bef9SDimitry Andric // If truncating more than the original extended value, abort. 666e8d8bef9SDimitry Andric if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits()) 667e8d8bef9SDimitry Andric return false; 668e8d8bef9SDimitry Andric if (MMO.getSizeInBits() == SizeInBits) 669e8d8bef9SDimitry Andric return true; 670e8d8bef9SDimitry Andric } 671e8d8bef9SDimitry Andric return false; 6725ffd83dbSDimitry Andric } 6735ffd83dbSDimitry Andric 674e8d8bef9SDimitry Andric bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) { 6755ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 676e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 677e8d8bef9SDimitry Andric Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); 678e8d8bef9SDimitry Andric MI.eraseFromParent(); 679e8d8bef9SDimitry Andric return true; 680e8d8bef9SDimitry Andric } 681e8d8bef9SDimitry Andric 682e8d8bef9SDimitry Andric bool CombinerHelper::matchSextInRegOfLoad( 683e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 684e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 685e8d8bef9SDimitry Andric 686e8d8bef9SDimitry Andric // Only supports scalars for now. 687e8d8bef9SDimitry Andric if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 688e8d8bef9SDimitry Andric return false; 689e8d8bef9SDimitry Andric 690e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 691e8d8bef9SDimitry Andric MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); 692e8d8bef9SDimitry Andric if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg())) 693e8d8bef9SDimitry Andric return false; 694e8d8bef9SDimitry Andric 695e8d8bef9SDimitry Andric // If the sign extend extends from a narrower width than the load's width, 696e8d8bef9SDimitry Andric // then we can narrow the load width when we combine to a G_SEXTLOAD. 697e8d8bef9SDimitry Andric auto &MMO = **LoadDef->memoperands_begin(); 698e8d8bef9SDimitry Andric // Don't do this for non-simple loads. 699e8d8bef9SDimitry Andric if (MMO.isAtomic() || MMO.isVolatile()) 700e8d8bef9SDimitry Andric return false; 701e8d8bef9SDimitry Andric 702e8d8bef9SDimitry Andric // Avoid widening the load at all. 703e8d8bef9SDimitry Andric unsigned NewSizeBits = 704e8d8bef9SDimitry Andric std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits()); 705e8d8bef9SDimitry Andric 706e8d8bef9SDimitry Andric // Don't generate G_SEXTLOADs with a < 1 byte width. 707e8d8bef9SDimitry Andric if (NewSizeBits < 8) 708e8d8bef9SDimitry Andric return false; 709e8d8bef9SDimitry Andric // Don't bother creating a non-power-2 sextload, it will likely be broken up 710e8d8bef9SDimitry Andric // anyway for most targets. 711e8d8bef9SDimitry Andric if (!isPowerOf2_32(NewSizeBits)) 712e8d8bef9SDimitry Andric return false; 713e8d8bef9SDimitry Andric MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits); 714e8d8bef9SDimitry Andric return true; 715e8d8bef9SDimitry Andric } 716e8d8bef9SDimitry Andric 717e8d8bef9SDimitry Andric bool CombinerHelper::applySextInRegOfLoad( 718e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 719e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 720e8d8bef9SDimitry Andric Register LoadReg; 721e8d8bef9SDimitry Andric unsigned ScalarSizeBits; 722e8d8bef9SDimitry Andric std::tie(LoadReg, ScalarSizeBits) = MatchInfo; 723e8d8bef9SDimitry Andric auto *LoadDef = MRI.getVRegDef(LoadReg); 724e8d8bef9SDimitry Andric assert(LoadDef && "Expected a load reg"); 725e8d8bef9SDimitry Andric 726e8d8bef9SDimitry Andric // If we have the following: 727e8d8bef9SDimitry Andric // %ld = G_LOAD %ptr, (load 2) 728e8d8bef9SDimitry Andric // %ext = G_SEXT_INREG %ld, 8 729e8d8bef9SDimitry Andric // ==> 730e8d8bef9SDimitry Andric // %ld = G_SEXTLOAD %ptr (load 1) 731e8d8bef9SDimitry Andric 732e8d8bef9SDimitry Andric auto &MMO = **LoadDef->memoperands_begin(); 733e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 734e8d8bef9SDimitry Andric auto &MF = Builder.getMF(); 735e8d8bef9SDimitry Andric auto PtrInfo = MMO.getPointerInfo(); 736e8d8bef9SDimitry Andric auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8); 737e8d8bef9SDimitry Andric Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(), 738e8d8bef9SDimitry Andric LoadDef->getOperand(1).getReg(), *NewMMO); 7395ffd83dbSDimitry Andric MI.eraseFromParent(); 7405ffd83dbSDimitry Andric return true; 7415ffd83dbSDimitry Andric } 7425ffd83dbSDimitry Andric 7438bcb0991SDimitry Andric bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, 7448bcb0991SDimitry Andric Register &Base, Register &Offset) { 7458bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 7468bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 7478bcb0991SDimitry Andric 7488bcb0991SDimitry Andric #ifndef NDEBUG 7498bcb0991SDimitry Andric unsigned Opcode = MI.getOpcode(); 7508bcb0991SDimitry Andric assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || 7518bcb0991SDimitry Andric Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); 7528bcb0991SDimitry Andric #endif 7538bcb0991SDimitry Andric 7548bcb0991SDimitry Andric Base = MI.getOperand(1).getReg(); 7558bcb0991SDimitry Andric MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base); 7568bcb0991SDimitry Andric if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) 7578bcb0991SDimitry Andric return false; 7588bcb0991SDimitry Andric 7598bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI); 760e8d8bef9SDimitry Andric // FIXME: The following use traversal needs a bail out for patholigical cases. 7615ffd83dbSDimitry Andric for (auto &Use : MRI.use_nodbg_instructions(Base)) { 762480093f4SDimitry Andric if (Use.getOpcode() != TargetOpcode::G_PTR_ADD) 7638bcb0991SDimitry Andric continue; 7648bcb0991SDimitry Andric 7658bcb0991SDimitry Andric Offset = Use.getOperand(2).getReg(); 7668bcb0991SDimitry Andric if (!ForceLegalIndexing && 7678bcb0991SDimitry Andric !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) { 7688bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: " 7698bcb0991SDimitry Andric << Use); 7708bcb0991SDimitry Andric continue; 7718bcb0991SDimitry Andric } 7728bcb0991SDimitry Andric 7738bcb0991SDimitry Andric // Make sure the offset calculation is before the potentially indexed op. 7748bcb0991SDimitry Andric // FIXME: we really care about dependency here. The offset calculation might 7758bcb0991SDimitry Andric // be movable. 7768bcb0991SDimitry Andric MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset); 7778bcb0991SDimitry Andric if (!OffsetDef || !dominates(*OffsetDef, MI)) { 7788bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: " 7798bcb0991SDimitry Andric << Use); 7808bcb0991SDimitry Andric continue; 7818bcb0991SDimitry Andric } 7828bcb0991SDimitry Andric 7838bcb0991SDimitry Andric // FIXME: check whether all uses of Base are load/store with foldable 7848bcb0991SDimitry Andric // addressing modes. If so, using the normal addr-modes is better than 7858bcb0991SDimitry Andric // forming an indexed one. 7868bcb0991SDimitry Andric 7878bcb0991SDimitry Andric bool MemOpDominatesAddrUses = true; 7885ffd83dbSDimitry Andric for (auto &PtrAddUse : 7895ffd83dbSDimitry Andric MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) { 790480093f4SDimitry Andric if (!dominates(MI, PtrAddUse)) { 7918bcb0991SDimitry Andric MemOpDominatesAddrUses = false; 7928bcb0991SDimitry Andric break; 7938bcb0991SDimitry Andric } 7948bcb0991SDimitry Andric } 7958bcb0991SDimitry Andric 7968bcb0991SDimitry Andric if (!MemOpDominatesAddrUses) { 7978bcb0991SDimitry Andric LLVM_DEBUG( 7988bcb0991SDimitry Andric dbgs() << " Ignoring candidate as memop does not dominate uses: " 7998bcb0991SDimitry Andric << Use); 8008bcb0991SDimitry Andric continue; 8018bcb0991SDimitry Andric } 8028bcb0991SDimitry Andric 8038bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Found match: " << Use); 8048bcb0991SDimitry Andric Addr = Use.getOperand(0).getReg(); 8058bcb0991SDimitry Andric return true; 8068bcb0991SDimitry Andric } 8078bcb0991SDimitry Andric 8088bcb0991SDimitry Andric return false; 8098bcb0991SDimitry Andric } 8108bcb0991SDimitry Andric 8118bcb0991SDimitry Andric bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr, 8128bcb0991SDimitry Andric Register &Base, Register &Offset) { 8138bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 8148bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 8158bcb0991SDimitry Andric 8168bcb0991SDimitry Andric #ifndef NDEBUG 8178bcb0991SDimitry Andric unsigned Opcode = MI.getOpcode(); 8188bcb0991SDimitry Andric assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || 8198bcb0991SDimitry Andric Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); 8208bcb0991SDimitry Andric #endif 8218bcb0991SDimitry Andric 8228bcb0991SDimitry Andric Addr = MI.getOperand(1).getReg(); 823480093f4SDimitry Andric MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI); 8245ffd83dbSDimitry Andric if (!AddrDef || MRI.hasOneNonDBGUse(Addr)) 8258bcb0991SDimitry Andric return false; 8268bcb0991SDimitry Andric 8278bcb0991SDimitry Andric Base = AddrDef->getOperand(1).getReg(); 8288bcb0991SDimitry Andric Offset = AddrDef->getOperand(2).getReg(); 8298bcb0991SDimitry Andric 8308bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI); 8318bcb0991SDimitry Andric 8328bcb0991SDimitry Andric if (!ForceLegalIndexing && 8338bcb0991SDimitry Andric !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) { 8348bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, not legal for target"); 8358bcb0991SDimitry Andric return false; 8368bcb0991SDimitry Andric } 8378bcb0991SDimitry Andric 8388bcb0991SDimitry Andric MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI); 8398bcb0991SDimitry Andric if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { 8408bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway."); 8418bcb0991SDimitry Andric return false; 8428bcb0991SDimitry Andric } 8438bcb0991SDimitry Andric 8448bcb0991SDimitry Andric if (MI.getOpcode() == TargetOpcode::G_STORE) { 8458bcb0991SDimitry Andric // Would require a copy. 8468bcb0991SDimitry Andric if (Base == MI.getOperand(0).getReg()) { 8478bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway."); 8488bcb0991SDimitry Andric return false; 8498bcb0991SDimitry Andric } 8508bcb0991SDimitry Andric 8518bcb0991SDimitry Andric // We're expecting one use of Addr in MI, but it could also be the 8528bcb0991SDimitry Andric // value stored, which isn't actually dominated by the instruction. 8538bcb0991SDimitry Andric if (MI.getOperand(0).getReg() == Addr) { 8548bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses"); 8558bcb0991SDimitry Andric return false; 8568bcb0991SDimitry Andric } 8578bcb0991SDimitry Andric } 8588bcb0991SDimitry Andric 859480093f4SDimitry Andric // FIXME: check whether all uses of the base pointer are constant PtrAdds. 860480093f4SDimitry Andric // That might allow us to end base's liveness here by adjusting the constant. 8618bcb0991SDimitry Andric 8625ffd83dbSDimitry Andric for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) { 8638bcb0991SDimitry Andric if (!dominates(MI, UseMI)) { 8648bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses."); 8658bcb0991SDimitry Andric return false; 8668bcb0991SDimitry Andric } 8678bcb0991SDimitry Andric } 8688bcb0991SDimitry Andric 8698bcb0991SDimitry Andric return true; 8708bcb0991SDimitry Andric } 8718bcb0991SDimitry Andric 8728bcb0991SDimitry Andric bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) { 873480093f4SDimitry Andric IndexedLoadStoreMatchInfo MatchInfo; 874480093f4SDimitry Andric if (matchCombineIndexedLoadStore(MI, MatchInfo)) { 875480093f4SDimitry Andric applyCombineIndexedLoadStore(MI, MatchInfo); 876480093f4SDimitry Andric return true; 877480093f4SDimitry Andric } 878480093f4SDimitry Andric return false; 879480093f4SDimitry Andric } 880480093f4SDimitry Andric 881480093f4SDimitry Andric bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { 8828bcb0991SDimitry Andric unsigned Opcode = MI.getOpcode(); 8838bcb0991SDimitry Andric if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD && 8848bcb0991SDimitry Andric Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE) 8858bcb0991SDimitry Andric return false; 8868bcb0991SDimitry Andric 887e8d8bef9SDimitry Andric // For now, no targets actually support these opcodes so don't waste time 888e8d8bef9SDimitry Andric // running these unless we're forced to for testing. 889e8d8bef9SDimitry Andric if (!ForceLegalIndexing) 890e8d8bef9SDimitry Andric return false; 891e8d8bef9SDimitry Andric 892480093f4SDimitry Andric MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, 893480093f4SDimitry Andric MatchInfo.Offset); 894480093f4SDimitry Andric if (!MatchInfo.IsPre && 895480093f4SDimitry Andric !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, 896480093f4SDimitry Andric MatchInfo.Offset)) 8978bcb0991SDimitry Andric return false; 8988bcb0991SDimitry Andric 899480093f4SDimitry Andric return true; 900480093f4SDimitry Andric } 9018bcb0991SDimitry Andric 902480093f4SDimitry Andric void CombinerHelper::applyCombineIndexedLoadStore( 903480093f4SDimitry Andric MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { 904480093f4SDimitry Andric MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr); 905480093f4SDimitry Andric MachineIRBuilder MIRBuilder(MI); 906480093f4SDimitry Andric unsigned Opcode = MI.getOpcode(); 907480093f4SDimitry Andric bool IsStore = Opcode == TargetOpcode::G_STORE; 9088bcb0991SDimitry Andric unsigned NewOpcode; 9098bcb0991SDimitry Andric switch (Opcode) { 9108bcb0991SDimitry Andric case TargetOpcode::G_LOAD: 9118bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_LOAD; 9128bcb0991SDimitry Andric break; 9138bcb0991SDimitry Andric case TargetOpcode::G_SEXTLOAD: 9148bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD; 9158bcb0991SDimitry Andric break; 9168bcb0991SDimitry Andric case TargetOpcode::G_ZEXTLOAD: 9178bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD; 9188bcb0991SDimitry Andric break; 9198bcb0991SDimitry Andric case TargetOpcode::G_STORE: 9208bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_STORE; 9218bcb0991SDimitry Andric break; 9228bcb0991SDimitry Andric default: 9238bcb0991SDimitry Andric llvm_unreachable("Unknown load/store opcode"); 9248bcb0991SDimitry Andric } 9258bcb0991SDimitry Andric 9268bcb0991SDimitry Andric auto MIB = MIRBuilder.buildInstr(NewOpcode); 9278bcb0991SDimitry Andric if (IsStore) { 928480093f4SDimitry Andric MIB.addDef(MatchInfo.Addr); 9298bcb0991SDimitry Andric MIB.addUse(MI.getOperand(0).getReg()); 9308bcb0991SDimitry Andric } else { 9318bcb0991SDimitry Andric MIB.addDef(MI.getOperand(0).getReg()); 932480093f4SDimitry Andric MIB.addDef(MatchInfo.Addr); 9338bcb0991SDimitry Andric } 9348bcb0991SDimitry Andric 935480093f4SDimitry Andric MIB.addUse(MatchInfo.Base); 936480093f4SDimitry Andric MIB.addUse(MatchInfo.Offset); 937480093f4SDimitry Andric MIB.addImm(MatchInfo.IsPre); 9388bcb0991SDimitry Andric MI.eraseFromParent(); 9398bcb0991SDimitry Andric AddrDef.eraseFromParent(); 9408bcb0991SDimitry Andric 9418bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); 9428bcb0991SDimitry Andric } 9438bcb0991SDimitry Andric 944e8d8bef9SDimitry Andric bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) { 9458bcb0991SDimitry Andric if (MI.getOpcode() != TargetOpcode::G_BR) 9468bcb0991SDimitry Andric return false; 9478bcb0991SDimitry Andric 9480b57cec5SDimitry Andric // Try to match the following: 9490b57cec5SDimitry Andric // bb1: 9500b57cec5SDimitry Andric // G_BRCOND %c1, %bb2 9510b57cec5SDimitry Andric // G_BR %bb3 9520b57cec5SDimitry Andric // bb2: 9530b57cec5SDimitry Andric // ... 9540b57cec5SDimitry Andric // bb3: 9550b57cec5SDimitry Andric 9560b57cec5SDimitry Andric // The above pattern does not have a fall through to the successor bb2, always 9570b57cec5SDimitry Andric // resulting in a branch no matter which path is taken. Here we try to find 9580b57cec5SDimitry Andric // and replace that pattern with conditional branch to bb3 and otherwise 959e8d8bef9SDimitry Andric // fallthrough to bb2. This is generally better for branch predictors. 9600b57cec5SDimitry Andric 9610b57cec5SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 9620b57cec5SDimitry Andric MachineBasicBlock::iterator BrIt(MI); 9630b57cec5SDimitry Andric if (BrIt == MBB->begin()) 9640b57cec5SDimitry Andric return false; 9650b57cec5SDimitry Andric assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator"); 9660b57cec5SDimitry Andric 9670b57cec5SDimitry Andric MachineInstr *BrCond = &*std::prev(BrIt); 9680b57cec5SDimitry Andric if (BrCond->getOpcode() != TargetOpcode::G_BRCOND) 9690b57cec5SDimitry Andric return false; 9700b57cec5SDimitry Andric 971*d409305fSDimitry Andric // Check that the next block is the conditional branch target. Also make sure 972*d409305fSDimitry Andric // that it isn't the same as the G_BR's target (otherwise, this will loop.) 973*d409305fSDimitry Andric MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB(); 974*d409305fSDimitry Andric return BrCondTarget != MI.getOperand(0).getMBB() && 975*d409305fSDimitry Andric MBB->isLayoutSuccessor(BrCondTarget); 9760b57cec5SDimitry Andric } 9770b57cec5SDimitry Andric 978e8d8bef9SDimitry Andric void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) { 9790b57cec5SDimitry Andric MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); 9800b57cec5SDimitry Andric MachineBasicBlock::iterator BrIt(MI); 9810b57cec5SDimitry Andric MachineInstr *BrCond = &*std::prev(BrIt); 9820b57cec5SDimitry Andric 983e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(*BrCond); 984e8d8bef9SDimitry Andric LLT Ty = MRI.getType(BrCond->getOperand(0).getReg()); 985e8d8bef9SDimitry Andric // FIXME: Does int/fp matter for this? If so, we might need to restrict 986e8d8bef9SDimitry Andric // this to i1 only since we might not know for sure what kind of 987e8d8bef9SDimitry Andric // compare generated the condition value. 988e8d8bef9SDimitry Andric auto True = Builder.buildConstant( 989e8d8bef9SDimitry Andric Ty, getICmpTrueVal(getTargetLowering(), false, false)); 990e8d8bef9SDimitry Andric auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True); 9910b57cec5SDimitry Andric 992e8d8bef9SDimitry Andric auto *FallthroughBB = BrCond->getOperand(1).getMBB(); 993e8d8bef9SDimitry Andric Observer.changingInstr(MI); 994e8d8bef9SDimitry Andric MI.getOperand(0).setMBB(FallthroughBB); 995e8d8bef9SDimitry Andric Observer.changedInstr(MI); 9960b57cec5SDimitry Andric 997e8d8bef9SDimitry Andric // Change the conditional branch to use the inverted condition and 998e8d8bef9SDimitry Andric // new target block. 9990b57cec5SDimitry Andric Observer.changingInstr(*BrCond); 1000e8d8bef9SDimitry Andric BrCond->getOperand(0).setReg(Xor.getReg(0)); 10010b57cec5SDimitry Andric BrCond->getOperand(1).setMBB(BrTarget); 10020b57cec5SDimitry Andric Observer.changedInstr(*BrCond); 10038bcb0991SDimitry Andric } 10048bcb0991SDimitry Andric 10058bcb0991SDimitry Andric static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { 10068bcb0991SDimitry Andric // On Darwin, -Os means optimize for size without hurting performance, so 10078bcb0991SDimitry Andric // only really optimize for size when -Oz (MinSize) is used. 10088bcb0991SDimitry Andric if (MF.getTarget().getTargetTriple().isOSDarwin()) 10098bcb0991SDimitry Andric return MF.getFunction().hasMinSize(); 10108bcb0991SDimitry Andric return MF.getFunction().hasOptSize(); 10118bcb0991SDimitry Andric } 10128bcb0991SDimitry Andric 10138bcb0991SDimitry Andric // Returns a list of types to use for memory op lowering in MemOps. A partial 10148bcb0991SDimitry Andric // port of findOptimalMemOpLowering in TargetLowering. 10155ffd83dbSDimitry Andric static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps, 10165ffd83dbSDimitry Andric unsigned Limit, const MemOp &Op, 10175ffd83dbSDimitry Andric unsigned DstAS, unsigned SrcAS, 10185ffd83dbSDimitry Andric const AttributeList &FuncAttributes, 10195ffd83dbSDimitry Andric const TargetLowering &TLI) { 10205ffd83dbSDimitry Andric if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) 10218bcb0991SDimitry Andric return false; 10228bcb0991SDimitry Andric 10235ffd83dbSDimitry Andric LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes); 10248bcb0991SDimitry Andric 10258bcb0991SDimitry Andric if (Ty == LLT()) { 10268bcb0991SDimitry Andric // Use the largest scalar type whose alignment constraints are satisfied. 10278bcb0991SDimitry Andric // We only need to check DstAlign here as SrcAlign is always greater or 10288bcb0991SDimitry Andric // equal to DstAlign (or zero). 10298bcb0991SDimitry Andric Ty = LLT::scalar(64); 10305ffd83dbSDimitry Andric if (Op.isFixedDstAlign()) 10315ffd83dbSDimitry Andric while (Op.getDstAlign() < Ty.getSizeInBytes() && 10325ffd83dbSDimitry Andric !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign())) 10338bcb0991SDimitry Andric Ty = LLT::scalar(Ty.getSizeInBytes()); 10348bcb0991SDimitry Andric assert(Ty.getSizeInBits() > 0 && "Could not find valid type"); 10358bcb0991SDimitry Andric // FIXME: check for the largest legal type we can load/store to. 10368bcb0991SDimitry Andric } 10378bcb0991SDimitry Andric 10388bcb0991SDimitry Andric unsigned NumMemOps = 0; 10395ffd83dbSDimitry Andric uint64_t Size = Op.size(); 10405ffd83dbSDimitry Andric while (Size) { 10418bcb0991SDimitry Andric unsigned TySize = Ty.getSizeInBytes(); 10428bcb0991SDimitry Andric while (TySize > Size) { 10438bcb0991SDimitry Andric // For now, only use non-vector load / store's for the left-over pieces. 10448bcb0991SDimitry Andric LLT NewTy = Ty; 10458bcb0991SDimitry Andric // FIXME: check for mem op safety and legality of the types. Not all of 10468bcb0991SDimitry Andric // SDAGisms map cleanly to GISel concepts. 10478bcb0991SDimitry Andric if (NewTy.isVector()) 10488bcb0991SDimitry Andric NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32); 10498bcb0991SDimitry Andric NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1)); 10508bcb0991SDimitry Andric unsigned NewTySize = NewTy.getSizeInBytes(); 10518bcb0991SDimitry Andric assert(NewTySize > 0 && "Could not find appropriate type"); 10528bcb0991SDimitry Andric 10538bcb0991SDimitry Andric // If the new LLT cannot cover all of the remaining bits, then consider 10548bcb0991SDimitry Andric // issuing a (or a pair of) unaligned and overlapping load / store. 10558bcb0991SDimitry Andric bool Fast; 10568bcb0991SDimitry Andric // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). 10578bcb0991SDimitry Andric MVT VT = getMVTForLLT(Ty); 10585ffd83dbSDimitry Andric if (NumMemOps && Op.allowOverlap() && NewTySize < Size && 10598bcb0991SDimitry Andric TLI.allowsMisalignedMemoryAccesses( 10605ffd83dbSDimitry Andric VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0, 10615ffd83dbSDimitry Andric MachineMemOperand::MONone, &Fast) && 10628bcb0991SDimitry Andric Fast) 10638bcb0991SDimitry Andric TySize = Size; 10648bcb0991SDimitry Andric else { 10658bcb0991SDimitry Andric Ty = NewTy; 10668bcb0991SDimitry Andric TySize = NewTySize; 10678bcb0991SDimitry Andric } 10688bcb0991SDimitry Andric } 10698bcb0991SDimitry Andric 10708bcb0991SDimitry Andric if (++NumMemOps > Limit) 10718bcb0991SDimitry Andric return false; 10728bcb0991SDimitry Andric 10738bcb0991SDimitry Andric MemOps.push_back(Ty); 10748bcb0991SDimitry Andric Size -= TySize; 10758bcb0991SDimitry Andric } 10768bcb0991SDimitry Andric 10770b57cec5SDimitry Andric return true; 10780b57cec5SDimitry Andric } 10790b57cec5SDimitry Andric 10808bcb0991SDimitry Andric static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { 10818bcb0991SDimitry Andric if (Ty.isVector()) 10825ffd83dbSDimitry Andric return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), 10838bcb0991SDimitry Andric Ty.getNumElements()); 10848bcb0991SDimitry Andric return IntegerType::get(C, Ty.getSizeInBits()); 10858bcb0991SDimitry Andric } 10868bcb0991SDimitry Andric 10878bcb0991SDimitry Andric // Get a vectorized representation of the memset value operand, GISel edition. 10888bcb0991SDimitry Andric static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { 10898bcb0991SDimitry Andric MachineRegisterInfo &MRI = *MIB.getMRI(); 10908bcb0991SDimitry Andric unsigned NumBits = Ty.getScalarSizeInBits(); 10918bcb0991SDimitry Andric auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); 10928bcb0991SDimitry Andric if (!Ty.isVector() && ValVRegAndVal) { 1093e8d8bef9SDimitry Andric APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8); 10948bcb0991SDimitry Andric APInt SplatVal = APInt::getSplat(NumBits, Scalar); 10958bcb0991SDimitry Andric return MIB.buildConstant(Ty, SplatVal).getReg(0); 10968bcb0991SDimitry Andric } 10978bcb0991SDimitry Andric 10988bcb0991SDimitry Andric // Extend the byte value to the larger type, and then multiply by a magic 10998bcb0991SDimitry Andric // value 0x010101... in order to replicate it across every byte. 11005ffd83dbSDimitry Andric // Unless it's zero, in which case just emit a larger G_CONSTANT 0. 11015ffd83dbSDimitry Andric if (ValVRegAndVal && ValVRegAndVal->Value == 0) { 11025ffd83dbSDimitry Andric return MIB.buildConstant(Ty, 0).getReg(0); 11035ffd83dbSDimitry Andric } 11045ffd83dbSDimitry Andric 11058bcb0991SDimitry Andric LLT ExtType = Ty.getScalarType(); 11068bcb0991SDimitry Andric auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val); 11078bcb0991SDimitry Andric if (NumBits > 8) { 11088bcb0991SDimitry Andric APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); 11098bcb0991SDimitry Andric auto MagicMI = MIB.buildConstant(ExtType, Magic); 11108bcb0991SDimitry Andric Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0); 11118bcb0991SDimitry Andric } 11128bcb0991SDimitry Andric 11135ffd83dbSDimitry Andric // For vector types create a G_BUILD_VECTOR. 11145ffd83dbSDimitry Andric if (Ty.isVector()) 11155ffd83dbSDimitry Andric Val = MIB.buildSplatVector(Ty, Val).getReg(0); 11165ffd83dbSDimitry Andric 11178bcb0991SDimitry Andric return Val; 11188bcb0991SDimitry Andric } 11198bcb0991SDimitry Andric 11205ffd83dbSDimitry Andric bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, 11215ffd83dbSDimitry Andric Register Val, unsigned KnownLen, 11225ffd83dbSDimitry Andric Align Alignment, bool IsVolatile) { 11238bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 11248bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 11258bcb0991SDimitry Andric auto &DL = MF.getDataLayout(); 11268bcb0991SDimitry Andric LLVMContext &C = MF.getFunction().getContext(); 11278bcb0991SDimitry Andric 11288bcb0991SDimitry Andric assert(KnownLen != 0 && "Have a zero length memset length!"); 11298bcb0991SDimitry Andric 11308bcb0991SDimitry Andric bool DstAlignCanChange = false; 11318bcb0991SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 11328bcb0991SDimitry Andric bool OptSize = shouldLowerMemFuncForSize(MF); 11338bcb0991SDimitry Andric 11348bcb0991SDimitry Andric MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); 11358bcb0991SDimitry Andric if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) 11368bcb0991SDimitry Andric DstAlignCanChange = true; 11378bcb0991SDimitry Andric 11388bcb0991SDimitry Andric unsigned Limit = TLI.getMaxStoresPerMemset(OptSize); 11398bcb0991SDimitry Andric std::vector<LLT> MemOps; 11408bcb0991SDimitry Andric 11418bcb0991SDimitry Andric const auto &DstMMO = **MI.memoperands_begin(); 11428bcb0991SDimitry Andric MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); 11438bcb0991SDimitry Andric 11448bcb0991SDimitry Andric auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); 11458bcb0991SDimitry Andric bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0; 11468bcb0991SDimitry Andric 11475ffd83dbSDimitry Andric if (!findGISelOptimalMemOpLowering(MemOps, Limit, 11485ffd83dbSDimitry Andric MemOp::Set(KnownLen, DstAlignCanChange, 11495ffd83dbSDimitry Andric Alignment, 11505ffd83dbSDimitry Andric /*IsZeroMemset=*/IsZeroVal, 11515ffd83dbSDimitry Andric /*IsVolatile=*/IsVolatile), 11525ffd83dbSDimitry Andric DstPtrInfo.getAddrSpace(), ~0u, 11538bcb0991SDimitry Andric MF.getFunction().getAttributes(), TLI)) 11548bcb0991SDimitry Andric return false; 11558bcb0991SDimitry Andric 11568bcb0991SDimitry Andric if (DstAlignCanChange) { 11578bcb0991SDimitry Andric // Get an estimate of the type from the LLT. 11588bcb0991SDimitry Andric Type *IRTy = getTypeForLLT(MemOps[0], C); 11595ffd83dbSDimitry Andric Align NewAlign = DL.getABITypeAlign(IRTy); 11605ffd83dbSDimitry Andric if (NewAlign > Alignment) { 11615ffd83dbSDimitry Andric Alignment = NewAlign; 11628bcb0991SDimitry Andric unsigned FI = FIDef->getOperand(1).getIndex(); 11638bcb0991SDimitry Andric // Give the stack frame object a larger alignment if needed. 11645ffd83dbSDimitry Andric if (MFI.getObjectAlign(FI) < Alignment) 11655ffd83dbSDimitry Andric MFI.setObjectAlignment(FI, Alignment); 11668bcb0991SDimitry Andric } 11678bcb0991SDimitry Andric } 11688bcb0991SDimitry Andric 11698bcb0991SDimitry Andric MachineIRBuilder MIB(MI); 11708bcb0991SDimitry Andric // Find the largest store and generate the bit pattern for it. 11718bcb0991SDimitry Andric LLT LargestTy = MemOps[0]; 11728bcb0991SDimitry Andric for (unsigned i = 1; i < MemOps.size(); i++) 11738bcb0991SDimitry Andric if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits()) 11748bcb0991SDimitry Andric LargestTy = MemOps[i]; 11758bcb0991SDimitry Andric 11768bcb0991SDimitry Andric // The memset stored value is always defined as an s8, so in order to make it 11778bcb0991SDimitry Andric // work with larger store types we need to repeat the bit pattern across the 11788bcb0991SDimitry Andric // wider type. 11798bcb0991SDimitry Andric Register MemSetValue = getMemsetValue(Val, LargestTy, MIB); 11808bcb0991SDimitry Andric 11818bcb0991SDimitry Andric if (!MemSetValue) 11828bcb0991SDimitry Andric return false; 11838bcb0991SDimitry Andric 11848bcb0991SDimitry Andric // Generate the stores. For each store type in the list, we generate the 11858bcb0991SDimitry Andric // matching store of that type to the destination address. 11868bcb0991SDimitry Andric LLT PtrTy = MRI.getType(Dst); 11878bcb0991SDimitry Andric unsigned DstOff = 0; 11888bcb0991SDimitry Andric unsigned Size = KnownLen; 11898bcb0991SDimitry Andric for (unsigned I = 0; I < MemOps.size(); I++) { 11908bcb0991SDimitry Andric LLT Ty = MemOps[I]; 11918bcb0991SDimitry Andric unsigned TySize = Ty.getSizeInBytes(); 11928bcb0991SDimitry Andric if (TySize > Size) { 11938bcb0991SDimitry Andric // Issuing an unaligned load / store pair that overlaps with the previous 11948bcb0991SDimitry Andric // pair. Adjust the offset accordingly. 11958bcb0991SDimitry Andric assert(I == MemOps.size() - 1 && I != 0); 11968bcb0991SDimitry Andric DstOff -= TySize - Size; 11978bcb0991SDimitry Andric } 11988bcb0991SDimitry Andric 11998bcb0991SDimitry Andric // If this store is smaller than the largest store see whether we can get 12008bcb0991SDimitry Andric // the smaller value for free with a truncate. 12018bcb0991SDimitry Andric Register Value = MemSetValue; 12028bcb0991SDimitry Andric if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) { 12038bcb0991SDimitry Andric MVT VT = getMVTForLLT(Ty); 12048bcb0991SDimitry Andric MVT LargestVT = getMVTForLLT(LargestTy); 12058bcb0991SDimitry Andric if (!LargestTy.isVector() && !Ty.isVector() && 12068bcb0991SDimitry Andric TLI.isTruncateFree(LargestVT, VT)) 12078bcb0991SDimitry Andric Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0); 12088bcb0991SDimitry Andric else 12098bcb0991SDimitry Andric Value = getMemsetValue(Val, Ty, MIB); 12108bcb0991SDimitry Andric if (!Value) 12118bcb0991SDimitry Andric return false; 12128bcb0991SDimitry Andric } 12138bcb0991SDimitry Andric 12148bcb0991SDimitry Andric auto *StoreMMO = 12158bcb0991SDimitry Andric MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes()); 12168bcb0991SDimitry Andric 12178bcb0991SDimitry Andric Register Ptr = Dst; 12188bcb0991SDimitry Andric if (DstOff != 0) { 12198bcb0991SDimitry Andric auto Offset = 12208bcb0991SDimitry Andric MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff); 1221480093f4SDimitry Andric Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); 12228bcb0991SDimitry Andric } 12238bcb0991SDimitry Andric 12248bcb0991SDimitry Andric MIB.buildStore(Value, Ptr, *StoreMMO); 12258bcb0991SDimitry Andric DstOff += Ty.getSizeInBytes(); 12268bcb0991SDimitry Andric Size -= TySize; 12278bcb0991SDimitry Andric } 12288bcb0991SDimitry Andric 12298bcb0991SDimitry Andric MI.eraseFromParent(); 12308bcb0991SDimitry Andric return true; 12318bcb0991SDimitry Andric } 12328bcb0991SDimitry Andric 12338bcb0991SDimitry Andric bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, 12348bcb0991SDimitry Andric Register Src, unsigned KnownLen, 12355ffd83dbSDimitry Andric Align DstAlign, Align SrcAlign, 12368bcb0991SDimitry Andric bool IsVolatile) { 12378bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 12388bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 12398bcb0991SDimitry Andric auto &DL = MF.getDataLayout(); 12408bcb0991SDimitry Andric LLVMContext &C = MF.getFunction().getContext(); 12418bcb0991SDimitry Andric 12428bcb0991SDimitry Andric assert(KnownLen != 0 && "Have a zero length memcpy length!"); 12438bcb0991SDimitry Andric 12448bcb0991SDimitry Andric bool DstAlignCanChange = false; 12458bcb0991SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 12468bcb0991SDimitry Andric bool OptSize = shouldLowerMemFuncForSize(MF); 12475ffd83dbSDimitry Andric Align Alignment = commonAlignment(DstAlign, SrcAlign); 12488bcb0991SDimitry Andric 12498bcb0991SDimitry Andric MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); 12508bcb0991SDimitry Andric if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) 12518bcb0991SDimitry Andric DstAlignCanChange = true; 12528bcb0991SDimitry Andric 12538bcb0991SDimitry Andric // FIXME: infer better src pointer alignment like SelectionDAG does here. 12548bcb0991SDimitry Andric // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining 12558bcb0991SDimitry Andric // if the memcpy is in a tail call position. 12568bcb0991SDimitry Andric 12578bcb0991SDimitry Andric unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize); 12588bcb0991SDimitry Andric std::vector<LLT> MemOps; 12598bcb0991SDimitry Andric 12608bcb0991SDimitry Andric const auto &DstMMO = **MI.memoperands_begin(); 12618bcb0991SDimitry Andric const auto &SrcMMO = **std::next(MI.memoperands_begin()); 12628bcb0991SDimitry Andric MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); 12638bcb0991SDimitry Andric MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); 12648bcb0991SDimitry Andric 12658bcb0991SDimitry Andric if (!findGISelOptimalMemOpLowering( 12665ffd83dbSDimitry Andric MemOps, Limit, 12675ffd83dbSDimitry Andric MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, 12685ffd83dbSDimitry Andric IsVolatile), 12695ffd83dbSDimitry Andric DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), 12705ffd83dbSDimitry Andric MF.getFunction().getAttributes(), TLI)) 12718bcb0991SDimitry Andric return false; 12728bcb0991SDimitry Andric 12738bcb0991SDimitry Andric if (DstAlignCanChange) { 12748bcb0991SDimitry Andric // Get an estimate of the type from the LLT. 12758bcb0991SDimitry Andric Type *IRTy = getTypeForLLT(MemOps[0], C); 12765ffd83dbSDimitry Andric Align NewAlign = DL.getABITypeAlign(IRTy); 12778bcb0991SDimitry Andric 12788bcb0991SDimitry Andric // Don't promote to an alignment that would require dynamic stack 12798bcb0991SDimitry Andric // realignment. 12808bcb0991SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 12818bcb0991SDimitry Andric if (!TRI->needsStackRealignment(MF)) 12825ffd83dbSDimitry Andric while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) 12835ffd83dbSDimitry Andric NewAlign = NewAlign / 2; 12848bcb0991SDimitry Andric 12858bcb0991SDimitry Andric if (NewAlign > Alignment) { 12868bcb0991SDimitry Andric Alignment = NewAlign; 12878bcb0991SDimitry Andric unsigned FI = FIDef->getOperand(1).getIndex(); 12888bcb0991SDimitry Andric // Give the stack frame object a larger alignment if needed. 12895ffd83dbSDimitry Andric if (MFI.getObjectAlign(FI) < Alignment) 12908bcb0991SDimitry Andric MFI.setObjectAlignment(FI, Alignment); 12918bcb0991SDimitry Andric } 12928bcb0991SDimitry Andric } 12938bcb0991SDimitry Andric 12948bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n"); 12958bcb0991SDimitry Andric 12968bcb0991SDimitry Andric MachineIRBuilder MIB(MI); 12978bcb0991SDimitry Andric // Now we need to emit a pair of load and stores for each of the types we've 12988bcb0991SDimitry Andric // collected. I.e. for each type, generate a load from the source pointer of 12998bcb0991SDimitry Andric // that type width, and then generate a corresponding store to the dest buffer 13008bcb0991SDimitry Andric // of that value loaded. This can result in a sequence of loads and stores 13018bcb0991SDimitry Andric // mixed types, depending on what the target specifies as good types to use. 13028bcb0991SDimitry Andric unsigned CurrOffset = 0; 13038bcb0991SDimitry Andric LLT PtrTy = MRI.getType(Src); 13048bcb0991SDimitry Andric unsigned Size = KnownLen; 13058bcb0991SDimitry Andric for (auto CopyTy : MemOps) { 13068bcb0991SDimitry Andric // Issuing an unaligned load / store pair that overlaps with the previous 13078bcb0991SDimitry Andric // pair. Adjust the offset accordingly. 13088bcb0991SDimitry Andric if (CopyTy.getSizeInBytes() > Size) 13098bcb0991SDimitry Andric CurrOffset -= CopyTy.getSizeInBytes() - Size; 13108bcb0991SDimitry Andric 13118bcb0991SDimitry Andric // Construct MMOs for the accesses. 13128bcb0991SDimitry Andric auto *LoadMMO = 13138bcb0991SDimitry Andric MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); 13148bcb0991SDimitry Andric auto *StoreMMO = 13158bcb0991SDimitry Andric MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); 13168bcb0991SDimitry Andric 13178bcb0991SDimitry Andric // Create the load. 13188bcb0991SDimitry Andric Register LoadPtr = Src; 13198bcb0991SDimitry Andric Register Offset; 13208bcb0991SDimitry Andric if (CurrOffset != 0) { 13218bcb0991SDimitry Andric Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) 13228bcb0991SDimitry Andric .getReg(0); 1323480093f4SDimitry Andric LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); 13248bcb0991SDimitry Andric } 13258bcb0991SDimitry Andric auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); 13268bcb0991SDimitry Andric 13278bcb0991SDimitry Andric // Create the store. 13288bcb0991SDimitry Andric Register StorePtr = 1329480093f4SDimitry Andric CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); 13308bcb0991SDimitry Andric MIB.buildStore(LdVal, StorePtr, *StoreMMO); 13318bcb0991SDimitry Andric CurrOffset += CopyTy.getSizeInBytes(); 13328bcb0991SDimitry Andric Size -= CopyTy.getSizeInBytes(); 13338bcb0991SDimitry Andric } 13348bcb0991SDimitry Andric 13358bcb0991SDimitry Andric MI.eraseFromParent(); 13368bcb0991SDimitry Andric return true; 13378bcb0991SDimitry Andric } 13388bcb0991SDimitry Andric 13398bcb0991SDimitry Andric bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, 13408bcb0991SDimitry Andric Register Src, unsigned KnownLen, 13415ffd83dbSDimitry Andric Align DstAlign, Align SrcAlign, 13428bcb0991SDimitry Andric bool IsVolatile) { 13438bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 13448bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 13458bcb0991SDimitry Andric auto &DL = MF.getDataLayout(); 13468bcb0991SDimitry Andric LLVMContext &C = MF.getFunction().getContext(); 13478bcb0991SDimitry Andric 13488bcb0991SDimitry Andric assert(KnownLen != 0 && "Have a zero length memmove length!"); 13498bcb0991SDimitry Andric 13508bcb0991SDimitry Andric bool DstAlignCanChange = false; 13518bcb0991SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 13528bcb0991SDimitry Andric bool OptSize = shouldLowerMemFuncForSize(MF); 13535ffd83dbSDimitry Andric Align Alignment = commonAlignment(DstAlign, SrcAlign); 13548bcb0991SDimitry Andric 13558bcb0991SDimitry Andric MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); 13568bcb0991SDimitry Andric if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) 13578bcb0991SDimitry Andric DstAlignCanChange = true; 13588bcb0991SDimitry Andric 13598bcb0991SDimitry Andric unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize); 13608bcb0991SDimitry Andric std::vector<LLT> MemOps; 13618bcb0991SDimitry Andric 13628bcb0991SDimitry Andric const auto &DstMMO = **MI.memoperands_begin(); 13638bcb0991SDimitry Andric const auto &SrcMMO = **std::next(MI.memoperands_begin()); 13648bcb0991SDimitry Andric MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); 13658bcb0991SDimitry Andric MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); 13668bcb0991SDimitry Andric 13678bcb0991SDimitry Andric // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due 13688bcb0991SDimitry Andric // to a bug in it's findOptimalMemOpLowering implementation. For now do the 13698bcb0991SDimitry Andric // same thing here. 13708bcb0991SDimitry Andric if (!findGISelOptimalMemOpLowering( 13715ffd83dbSDimitry Andric MemOps, Limit, 13725ffd83dbSDimitry Andric MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, 13735ffd83dbSDimitry Andric /*IsVolatile*/ true), 13745ffd83dbSDimitry Andric DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), 13755ffd83dbSDimitry Andric MF.getFunction().getAttributes(), TLI)) 13768bcb0991SDimitry Andric return false; 13778bcb0991SDimitry Andric 13788bcb0991SDimitry Andric if (DstAlignCanChange) { 13798bcb0991SDimitry Andric // Get an estimate of the type from the LLT. 13808bcb0991SDimitry Andric Type *IRTy = getTypeForLLT(MemOps[0], C); 13815ffd83dbSDimitry Andric Align NewAlign = DL.getABITypeAlign(IRTy); 13828bcb0991SDimitry Andric 13838bcb0991SDimitry Andric // Don't promote to an alignment that would require dynamic stack 13848bcb0991SDimitry Andric // realignment. 13858bcb0991SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 13868bcb0991SDimitry Andric if (!TRI->needsStackRealignment(MF)) 13875ffd83dbSDimitry Andric while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) 13885ffd83dbSDimitry Andric NewAlign = NewAlign / 2; 13898bcb0991SDimitry Andric 13908bcb0991SDimitry Andric if (NewAlign > Alignment) { 13918bcb0991SDimitry Andric Alignment = NewAlign; 13928bcb0991SDimitry Andric unsigned FI = FIDef->getOperand(1).getIndex(); 13938bcb0991SDimitry Andric // Give the stack frame object a larger alignment if needed. 13945ffd83dbSDimitry Andric if (MFI.getObjectAlign(FI) < Alignment) 13958bcb0991SDimitry Andric MFI.setObjectAlignment(FI, Alignment); 13968bcb0991SDimitry Andric } 13978bcb0991SDimitry Andric } 13988bcb0991SDimitry Andric 13998bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n"); 14008bcb0991SDimitry Andric 14018bcb0991SDimitry Andric MachineIRBuilder MIB(MI); 14028bcb0991SDimitry Andric // Memmove requires that we perform the loads first before issuing the stores. 14038bcb0991SDimitry Andric // Apart from that, this loop is pretty much doing the same thing as the 14048bcb0991SDimitry Andric // memcpy codegen function. 14058bcb0991SDimitry Andric unsigned CurrOffset = 0; 14068bcb0991SDimitry Andric LLT PtrTy = MRI.getType(Src); 14078bcb0991SDimitry Andric SmallVector<Register, 16> LoadVals; 14088bcb0991SDimitry Andric for (auto CopyTy : MemOps) { 14098bcb0991SDimitry Andric // Construct MMO for the load. 14108bcb0991SDimitry Andric auto *LoadMMO = 14118bcb0991SDimitry Andric MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); 14128bcb0991SDimitry Andric 14138bcb0991SDimitry Andric // Create the load. 14148bcb0991SDimitry Andric Register LoadPtr = Src; 14158bcb0991SDimitry Andric if (CurrOffset != 0) { 14168bcb0991SDimitry Andric auto Offset = 14178bcb0991SDimitry Andric MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); 1418480093f4SDimitry Andric LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); 14198bcb0991SDimitry Andric } 14208bcb0991SDimitry Andric LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); 14218bcb0991SDimitry Andric CurrOffset += CopyTy.getSizeInBytes(); 14228bcb0991SDimitry Andric } 14238bcb0991SDimitry Andric 14248bcb0991SDimitry Andric CurrOffset = 0; 14258bcb0991SDimitry Andric for (unsigned I = 0; I < MemOps.size(); ++I) { 14268bcb0991SDimitry Andric LLT CopyTy = MemOps[I]; 14278bcb0991SDimitry Andric // Now store the values loaded. 14288bcb0991SDimitry Andric auto *StoreMMO = 14298bcb0991SDimitry Andric MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); 14308bcb0991SDimitry Andric 14318bcb0991SDimitry Andric Register StorePtr = Dst; 14328bcb0991SDimitry Andric if (CurrOffset != 0) { 14338bcb0991SDimitry Andric auto Offset = 14348bcb0991SDimitry Andric MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); 1435480093f4SDimitry Andric StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); 14368bcb0991SDimitry Andric } 14378bcb0991SDimitry Andric MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); 14388bcb0991SDimitry Andric CurrOffset += CopyTy.getSizeInBytes(); 14398bcb0991SDimitry Andric } 14408bcb0991SDimitry Andric MI.eraseFromParent(); 14418bcb0991SDimitry Andric return true; 14428bcb0991SDimitry Andric } 14438bcb0991SDimitry Andric 14448bcb0991SDimitry Andric bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { 1445e8d8bef9SDimitry Andric const unsigned Opc = MI.getOpcode(); 14468bcb0991SDimitry Andric // This combine is fairly complex so it's not written with a separate 14478bcb0991SDimitry Andric // matcher function. 1448e8d8bef9SDimitry Andric assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE || 1449e8d8bef9SDimitry Andric Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction"); 14508bcb0991SDimitry Andric 14518bcb0991SDimitry Andric auto MMOIt = MI.memoperands_begin(); 14528bcb0991SDimitry Andric const MachineMemOperand *MemOp = *MMOIt; 14538bcb0991SDimitry Andric bool IsVolatile = MemOp->isVolatile(); 14548bcb0991SDimitry Andric // Don't try to optimize volatile. 14558bcb0991SDimitry Andric if (IsVolatile) 14568bcb0991SDimitry Andric return false; 14578bcb0991SDimitry Andric 14585ffd83dbSDimitry Andric Align DstAlign = MemOp->getBaseAlign(); 14595ffd83dbSDimitry Andric Align SrcAlign; 1460e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 1461e8d8bef9SDimitry Andric Register Src = MI.getOperand(1).getReg(); 1462e8d8bef9SDimitry Andric Register Len = MI.getOperand(2).getReg(); 14638bcb0991SDimitry Andric 1464e8d8bef9SDimitry Andric if (Opc != TargetOpcode::G_MEMSET) { 14658bcb0991SDimitry Andric assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI"); 14668bcb0991SDimitry Andric MemOp = *(++MMOIt); 14675ffd83dbSDimitry Andric SrcAlign = MemOp->getBaseAlign(); 14688bcb0991SDimitry Andric } 14698bcb0991SDimitry Andric 14708bcb0991SDimitry Andric // See if this is a constant length copy 14718bcb0991SDimitry Andric auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); 14728bcb0991SDimitry Andric if (!LenVRegAndVal) 14738bcb0991SDimitry Andric return false; // Leave it to the legalizer to lower it to a libcall. 1474e8d8bef9SDimitry Andric unsigned KnownLen = LenVRegAndVal->Value.getZExtValue(); 14758bcb0991SDimitry Andric 14768bcb0991SDimitry Andric if (KnownLen == 0) { 14778bcb0991SDimitry Andric MI.eraseFromParent(); 14788bcb0991SDimitry Andric return true; 14798bcb0991SDimitry Andric } 14808bcb0991SDimitry Andric 14818bcb0991SDimitry Andric if (MaxLen && KnownLen > MaxLen) 14828bcb0991SDimitry Andric return false; 14838bcb0991SDimitry Andric 1484e8d8bef9SDimitry Andric if (Opc == TargetOpcode::G_MEMCPY) 14858bcb0991SDimitry Andric return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); 1486e8d8bef9SDimitry Andric if (Opc == TargetOpcode::G_MEMMOVE) 14878bcb0991SDimitry Andric return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); 1488e8d8bef9SDimitry Andric if (Opc == TargetOpcode::G_MEMSET) 14898bcb0991SDimitry Andric return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile); 14908bcb0991SDimitry Andric return false; 14918bcb0991SDimitry Andric } 14928bcb0991SDimitry Andric 1493e8d8bef9SDimitry Andric static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy, 1494e8d8bef9SDimitry Andric const Register Op, 1495e8d8bef9SDimitry Andric const MachineRegisterInfo &MRI) { 1496e8d8bef9SDimitry Andric const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI); 1497e8d8bef9SDimitry Andric if (!MaybeCst) 1498e8d8bef9SDimitry Andric return None; 1499e8d8bef9SDimitry Andric 1500e8d8bef9SDimitry Andric APFloat V = MaybeCst->getValueAPF(); 1501e8d8bef9SDimitry Andric switch (Opcode) { 1502e8d8bef9SDimitry Andric default: 1503e8d8bef9SDimitry Andric llvm_unreachable("Unexpected opcode!"); 1504e8d8bef9SDimitry Andric case TargetOpcode::G_FNEG: { 1505e8d8bef9SDimitry Andric V.changeSign(); 1506e8d8bef9SDimitry Andric return V; 1507e8d8bef9SDimitry Andric } 1508e8d8bef9SDimitry Andric case TargetOpcode::G_FABS: { 1509e8d8bef9SDimitry Andric V.clearSign(); 1510e8d8bef9SDimitry Andric return V; 1511e8d8bef9SDimitry Andric } 1512e8d8bef9SDimitry Andric case TargetOpcode::G_FPTRUNC: 1513e8d8bef9SDimitry Andric break; 1514e8d8bef9SDimitry Andric case TargetOpcode::G_FSQRT: { 1515e8d8bef9SDimitry Andric bool Unused; 1516e8d8bef9SDimitry Andric V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); 1517e8d8bef9SDimitry Andric V = APFloat(sqrt(V.convertToDouble())); 1518e8d8bef9SDimitry Andric break; 1519e8d8bef9SDimitry Andric } 1520e8d8bef9SDimitry Andric case TargetOpcode::G_FLOG2: { 1521e8d8bef9SDimitry Andric bool Unused; 1522e8d8bef9SDimitry Andric V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); 1523e8d8bef9SDimitry Andric V = APFloat(log2(V.convertToDouble())); 1524e8d8bef9SDimitry Andric break; 1525e8d8bef9SDimitry Andric } 1526e8d8bef9SDimitry Andric } 1527e8d8bef9SDimitry Andric // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise, 1528e8d8bef9SDimitry Andric // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`, 1529e8d8bef9SDimitry Andric // and `G_FLOG2` reach here. 1530e8d8bef9SDimitry Andric bool Unused; 1531e8d8bef9SDimitry Andric V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused); 1532e8d8bef9SDimitry Andric return V; 1533e8d8bef9SDimitry Andric } 1534e8d8bef9SDimitry Andric 1535e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI, 1536e8d8bef9SDimitry Andric Optional<APFloat> &Cst) { 1537e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1538e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1539e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 1540e8d8bef9SDimitry Andric Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI); 1541e8d8bef9SDimitry Andric return Cst.hasValue(); 1542e8d8bef9SDimitry Andric } 1543e8d8bef9SDimitry Andric 1544e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, 1545e8d8bef9SDimitry Andric Optional<APFloat> &Cst) { 1546e8d8bef9SDimitry Andric assert(Cst.hasValue() && "Optional is unexpectedly empty!"); 1547e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1548e8d8bef9SDimitry Andric MachineFunction &MF = Builder.getMF(); 1549e8d8bef9SDimitry Andric auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst); 1550e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1551e8d8bef9SDimitry Andric Builder.buildFConstant(DstReg, *FPVal); 1552e8d8bef9SDimitry Andric MI.eraseFromParent(); 1553e8d8bef9SDimitry Andric return true; 1554e8d8bef9SDimitry Andric } 1555e8d8bef9SDimitry Andric 1556480093f4SDimitry Andric bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, 1557480093f4SDimitry Andric PtrAddChain &MatchInfo) { 1558480093f4SDimitry Andric // We're trying to match the following pattern: 1559480093f4SDimitry Andric // %t1 = G_PTR_ADD %base, G_CONSTANT imm1 1560480093f4SDimitry Andric // %root = G_PTR_ADD %t1, G_CONSTANT imm2 1561480093f4SDimitry Andric // --> 1562480093f4SDimitry Andric // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2) 1563480093f4SDimitry Andric 1564480093f4SDimitry Andric if (MI.getOpcode() != TargetOpcode::G_PTR_ADD) 1565480093f4SDimitry Andric return false; 1566480093f4SDimitry Andric 1567480093f4SDimitry Andric Register Add2 = MI.getOperand(1).getReg(); 1568480093f4SDimitry Andric Register Imm1 = MI.getOperand(2).getReg(); 1569480093f4SDimitry Andric auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI); 1570480093f4SDimitry Andric if (!MaybeImmVal) 1571480093f4SDimitry Andric return false; 1572480093f4SDimitry Andric 1573480093f4SDimitry Andric MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2); 1574480093f4SDimitry Andric if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD) 1575480093f4SDimitry Andric return false; 1576480093f4SDimitry Andric 1577480093f4SDimitry Andric Register Base = Add2Def->getOperand(1).getReg(); 1578480093f4SDimitry Andric Register Imm2 = Add2Def->getOperand(2).getReg(); 1579480093f4SDimitry Andric auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI); 1580480093f4SDimitry Andric if (!MaybeImm2Val) 1581480093f4SDimitry Andric return false; 1582480093f4SDimitry Andric 1583480093f4SDimitry Andric // Pass the combined immediate to the apply function. 1584e8d8bef9SDimitry Andric MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue(); 1585480093f4SDimitry Andric MatchInfo.Base = Base; 1586480093f4SDimitry Andric return true; 1587480093f4SDimitry Andric } 1588480093f4SDimitry Andric 1589480093f4SDimitry Andric bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI, 1590480093f4SDimitry Andric PtrAddChain &MatchInfo) { 1591480093f4SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); 1592480093f4SDimitry Andric MachineIRBuilder MIB(MI); 1593480093f4SDimitry Andric LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg()); 1594480093f4SDimitry Andric auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm); 1595480093f4SDimitry Andric Observer.changingInstr(MI); 1596480093f4SDimitry Andric MI.getOperand(1).setReg(MatchInfo.Base); 1597480093f4SDimitry Andric MI.getOperand(2).setReg(NewOffset.getReg(0)); 1598480093f4SDimitry Andric Observer.changedInstr(MI); 1599480093f4SDimitry Andric return true; 1600480093f4SDimitry Andric } 1601480093f4SDimitry Andric 1602e8d8bef9SDimitry Andric bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI, 1603e8d8bef9SDimitry Andric RegisterImmPair &MatchInfo) { 1604e8d8bef9SDimitry Andric // We're trying to match the following pattern with any of 1605e8d8bef9SDimitry Andric // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions: 1606e8d8bef9SDimitry Andric // %t1 = SHIFT %base, G_CONSTANT imm1 1607e8d8bef9SDimitry Andric // %root = SHIFT %t1, G_CONSTANT imm2 1608e8d8bef9SDimitry Andric // --> 1609e8d8bef9SDimitry Andric // %root = SHIFT %base, G_CONSTANT (imm1 + imm2) 1610e8d8bef9SDimitry Andric 1611e8d8bef9SDimitry Andric unsigned Opcode = MI.getOpcode(); 1612e8d8bef9SDimitry Andric assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || 1613e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || 1614e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_USHLSAT) && 1615e8d8bef9SDimitry Andric "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); 1616e8d8bef9SDimitry Andric 1617e8d8bef9SDimitry Andric Register Shl2 = MI.getOperand(1).getReg(); 1618e8d8bef9SDimitry Andric Register Imm1 = MI.getOperand(2).getReg(); 1619e8d8bef9SDimitry Andric auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI); 1620e8d8bef9SDimitry Andric if (!MaybeImmVal) 1621e8d8bef9SDimitry Andric return false; 1622e8d8bef9SDimitry Andric 1623e8d8bef9SDimitry Andric MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2); 1624e8d8bef9SDimitry Andric if (Shl2Def->getOpcode() != Opcode) 1625e8d8bef9SDimitry Andric return false; 1626e8d8bef9SDimitry Andric 1627e8d8bef9SDimitry Andric Register Base = Shl2Def->getOperand(1).getReg(); 1628e8d8bef9SDimitry Andric Register Imm2 = Shl2Def->getOperand(2).getReg(); 1629e8d8bef9SDimitry Andric auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI); 1630e8d8bef9SDimitry Andric if (!MaybeImm2Val) 1631e8d8bef9SDimitry Andric return false; 1632e8d8bef9SDimitry Andric 1633e8d8bef9SDimitry Andric // Pass the combined immediate to the apply function. 1634e8d8bef9SDimitry Andric MatchInfo.Imm = 1635e8d8bef9SDimitry Andric (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue(); 1636e8d8bef9SDimitry Andric MatchInfo.Reg = Base; 1637e8d8bef9SDimitry Andric 1638e8d8bef9SDimitry Andric // There is no simple replacement for a saturating unsigned left shift that 1639e8d8bef9SDimitry Andric // exceeds the scalar size. 1640e8d8bef9SDimitry Andric if (Opcode == TargetOpcode::G_USHLSAT && 1641e8d8bef9SDimitry Andric MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits()) 1642e8d8bef9SDimitry Andric return false; 1643e8d8bef9SDimitry Andric 1644e8d8bef9SDimitry Andric return true; 1645e8d8bef9SDimitry Andric } 1646e8d8bef9SDimitry Andric 1647e8d8bef9SDimitry Andric bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI, 1648e8d8bef9SDimitry Andric RegisterImmPair &MatchInfo) { 1649e8d8bef9SDimitry Andric unsigned Opcode = MI.getOpcode(); 1650e8d8bef9SDimitry Andric assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || 1651e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || 1652e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_USHLSAT) && 1653e8d8bef9SDimitry Andric "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); 1654e8d8bef9SDimitry Andric 1655e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1656e8d8bef9SDimitry Andric LLT Ty = MRI.getType(MI.getOperand(1).getReg()); 1657e8d8bef9SDimitry Andric unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits(); 1658e8d8bef9SDimitry Andric auto Imm = MatchInfo.Imm; 1659e8d8bef9SDimitry Andric 1660e8d8bef9SDimitry Andric if (Imm >= ScalarSizeInBits) { 1661e8d8bef9SDimitry Andric // Any logical shift that exceeds scalar size will produce zero. 1662e8d8bef9SDimitry Andric if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) { 1663e8d8bef9SDimitry Andric Builder.buildConstant(MI.getOperand(0), 0); 1664e8d8bef9SDimitry Andric MI.eraseFromParent(); 1665e8d8bef9SDimitry Andric return true; 1666e8d8bef9SDimitry Andric } 1667e8d8bef9SDimitry Andric // Arithmetic shift and saturating signed left shift have no effect beyond 1668e8d8bef9SDimitry Andric // scalar size. 1669e8d8bef9SDimitry Andric Imm = ScalarSizeInBits - 1; 1670e8d8bef9SDimitry Andric } 1671e8d8bef9SDimitry Andric 1672e8d8bef9SDimitry Andric LLT ImmTy = MRI.getType(MI.getOperand(2).getReg()); 1673e8d8bef9SDimitry Andric Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0); 1674e8d8bef9SDimitry Andric Observer.changingInstr(MI); 1675e8d8bef9SDimitry Andric MI.getOperand(1).setReg(MatchInfo.Reg); 1676e8d8bef9SDimitry Andric MI.getOperand(2).setReg(NewImm); 1677e8d8bef9SDimitry Andric Observer.changedInstr(MI); 1678e8d8bef9SDimitry Andric return true; 1679e8d8bef9SDimitry Andric } 1680e8d8bef9SDimitry Andric 1681e8d8bef9SDimitry Andric bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI, 1682e8d8bef9SDimitry Andric ShiftOfShiftedLogic &MatchInfo) { 1683e8d8bef9SDimitry Andric // We're trying to match the following pattern with any of 1684e8d8bef9SDimitry Andric // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination 1685e8d8bef9SDimitry Andric // with any of G_AND/G_OR/G_XOR logic instructions. 1686e8d8bef9SDimitry Andric // %t1 = SHIFT %X, G_CONSTANT C0 1687e8d8bef9SDimitry Andric // %t2 = LOGIC %t1, %Y 1688e8d8bef9SDimitry Andric // %root = SHIFT %t2, G_CONSTANT C1 1689e8d8bef9SDimitry Andric // --> 1690e8d8bef9SDimitry Andric // %t3 = SHIFT %X, G_CONSTANT (C0+C1) 1691e8d8bef9SDimitry Andric // %t4 = SHIFT %Y, G_CONSTANT C1 1692e8d8bef9SDimitry Andric // %root = LOGIC %t3, %t4 1693e8d8bef9SDimitry Andric unsigned ShiftOpcode = MI.getOpcode(); 1694e8d8bef9SDimitry Andric assert((ShiftOpcode == TargetOpcode::G_SHL || 1695e8d8bef9SDimitry Andric ShiftOpcode == TargetOpcode::G_ASHR || 1696e8d8bef9SDimitry Andric ShiftOpcode == TargetOpcode::G_LSHR || 1697e8d8bef9SDimitry Andric ShiftOpcode == TargetOpcode::G_USHLSAT || 1698e8d8bef9SDimitry Andric ShiftOpcode == TargetOpcode::G_SSHLSAT) && 1699e8d8bef9SDimitry Andric "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT"); 1700e8d8bef9SDimitry Andric 1701e8d8bef9SDimitry Andric // Match a one-use bitwise logic op. 1702e8d8bef9SDimitry Andric Register LogicDest = MI.getOperand(1).getReg(); 1703e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(LogicDest)) 1704e8d8bef9SDimitry Andric return false; 1705e8d8bef9SDimitry Andric 1706e8d8bef9SDimitry Andric MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest); 1707e8d8bef9SDimitry Andric unsigned LogicOpcode = LogicMI->getOpcode(); 1708e8d8bef9SDimitry Andric if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR && 1709e8d8bef9SDimitry Andric LogicOpcode != TargetOpcode::G_XOR) 1710e8d8bef9SDimitry Andric return false; 1711e8d8bef9SDimitry Andric 1712e8d8bef9SDimitry Andric // Find a matching one-use shift by constant. 1713e8d8bef9SDimitry Andric const Register C1 = MI.getOperand(2).getReg(); 1714e8d8bef9SDimitry Andric auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI); 1715e8d8bef9SDimitry Andric if (!MaybeImmVal) 1716e8d8bef9SDimitry Andric return false; 1717e8d8bef9SDimitry Andric 1718e8d8bef9SDimitry Andric const uint64_t C1Val = MaybeImmVal->Value.getZExtValue(); 1719e8d8bef9SDimitry Andric 1720e8d8bef9SDimitry Andric auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) { 1721e8d8bef9SDimitry Andric // Shift should match previous one and should be a one-use. 1722e8d8bef9SDimitry Andric if (MI->getOpcode() != ShiftOpcode || 1723e8d8bef9SDimitry Andric !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 1724e8d8bef9SDimitry Andric return false; 1725e8d8bef9SDimitry Andric 1726e8d8bef9SDimitry Andric // Must be a constant. 1727e8d8bef9SDimitry Andric auto MaybeImmVal = 1728e8d8bef9SDimitry Andric getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); 1729e8d8bef9SDimitry Andric if (!MaybeImmVal) 1730e8d8bef9SDimitry Andric return false; 1731e8d8bef9SDimitry Andric 1732e8d8bef9SDimitry Andric ShiftVal = MaybeImmVal->Value.getSExtValue(); 1733e8d8bef9SDimitry Andric return true; 1734e8d8bef9SDimitry Andric }; 1735e8d8bef9SDimitry Andric 1736e8d8bef9SDimitry Andric // Logic ops are commutative, so check each operand for a match. 1737e8d8bef9SDimitry Andric Register LogicMIReg1 = LogicMI->getOperand(1).getReg(); 1738e8d8bef9SDimitry Andric MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1); 1739e8d8bef9SDimitry Andric Register LogicMIReg2 = LogicMI->getOperand(2).getReg(); 1740e8d8bef9SDimitry Andric MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2); 1741e8d8bef9SDimitry Andric uint64_t C0Val; 1742e8d8bef9SDimitry Andric 1743e8d8bef9SDimitry Andric if (matchFirstShift(LogicMIOp1, C0Val)) { 1744e8d8bef9SDimitry Andric MatchInfo.LogicNonShiftReg = LogicMIReg2; 1745e8d8bef9SDimitry Andric MatchInfo.Shift2 = LogicMIOp1; 1746e8d8bef9SDimitry Andric } else if (matchFirstShift(LogicMIOp2, C0Val)) { 1747e8d8bef9SDimitry Andric MatchInfo.LogicNonShiftReg = LogicMIReg1; 1748e8d8bef9SDimitry Andric MatchInfo.Shift2 = LogicMIOp2; 1749e8d8bef9SDimitry Andric } else 1750e8d8bef9SDimitry Andric return false; 1751e8d8bef9SDimitry Andric 1752e8d8bef9SDimitry Andric MatchInfo.ValSum = C0Val + C1Val; 1753e8d8bef9SDimitry Andric 1754e8d8bef9SDimitry Andric // The fold is not valid if the sum of the shift values exceeds bitwidth. 1755e8d8bef9SDimitry Andric if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits()) 1756e8d8bef9SDimitry Andric return false; 1757e8d8bef9SDimitry Andric 1758e8d8bef9SDimitry Andric MatchInfo.Logic = LogicMI; 1759e8d8bef9SDimitry Andric return true; 1760e8d8bef9SDimitry Andric } 1761e8d8bef9SDimitry Andric 1762e8d8bef9SDimitry Andric bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, 1763e8d8bef9SDimitry Andric ShiftOfShiftedLogic &MatchInfo) { 1764e8d8bef9SDimitry Andric unsigned Opcode = MI.getOpcode(); 1765e8d8bef9SDimitry Andric assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || 1766e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT || 1767e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_SSHLSAT) && 1768e8d8bef9SDimitry Andric "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT"); 1769e8d8bef9SDimitry Andric 1770e8d8bef9SDimitry Andric LLT ShlType = MRI.getType(MI.getOperand(2).getReg()); 1771e8d8bef9SDimitry Andric LLT DestType = MRI.getType(MI.getOperand(0).getReg()); 1772e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1773e8d8bef9SDimitry Andric 1774e8d8bef9SDimitry Andric Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0); 1775e8d8bef9SDimitry Andric 1776e8d8bef9SDimitry Andric Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg(); 1777e8d8bef9SDimitry Andric Register Shift1 = 1778e8d8bef9SDimitry Andric Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0); 1779e8d8bef9SDimitry Andric 1780e8d8bef9SDimitry Andric Register Shift2Const = MI.getOperand(2).getReg(); 1781e8d8bef9SDimitry Andric Register Shift2 = Builder 1782e8d8bef9SDimitry Andric .buildInstr(Opcode, {DestType}, 1783e8d8bef9SDimitry Andric {MatchInfo.LogicNonShiftReg, Shift2Const}) 1784e8d8bef9SDimitry Andric .getReg(0); 1785e8d8bef9SDimitry Andric 1786e8d8bef9SDimitry Andric Register Dest = MI.getOperand(0).getReg(); 1787e8d8bef9SDimitry Andric Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2}); 1788e8d8bef9SDimitry Andric 1789e8d8bef9SDimitry Andric // These were one use so it's safe to remove them. 1790e8d8bef9SDimitry Andric MatchInfo.Shift2->eraseFromParent(); 1791e8d8bef9SDimitry Andric MatchInfo.Logic->eraseFromParent(); 1792e8d8bef9SDimitry Andric 1793e8d8bef9SDimitry Andric MI.eraseFromParent(); 1794e8d8bef9SDimitry Andric return true; 1795e8d8bef9SDimitry Andric } 1796e8d8bef9SDimitry Andric 17975ffd83dbSDimitry Andric bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, 17985ffd83dbSDimitry Andric unsigned &ShiftVal) { 17995ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); 18005ffd83dbSDimitry Andric auto MaybeImmVal = 18015ffd83dbSDimitry Andric getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); 1802e8d8bef9SDimitry Andric if (!MaybeImmVal) 18035ffd83dbSDimitry Andric return false; 1804e8d8bef9SDimitry Andric 1805e8d8bef9SDimitry Andric ShiftVal = MaybeImmVal->Value.exactLogBase2(); 1806e8d8bef9SDimitry Andric return (static_cast<int32_t>(ShiftVal) != -1); 18075ffd83dbSDimitry Andric } 18085ffd83dbSDimitry Andric 18095ffd83dbSDimitry Andric bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI, 18105ffd83dbSDimitry Andric unsigned &ShiftVal) { 18115ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); 18125ffd83dbSDimitry Andric MachineIRBuilder MIB(MI); 18135ffd83dbSDimitry Andric LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg()); 18145ffd83dbSDimitry Andric auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal); 18155ffd83dbSDimitry Andric Observer.changingInstr(MI); 18165ffd83dbSDimitry Andric MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL)); 18175ffd83dbSDimitry Andric MI.getOperand(2).setReg(ShiftCst.getReg(0)); 18185ffd83dbSDimitry Andric Observer.changedInstr(MI); 18195ffd83dbSDimitry Andric return true; 18205ffd83dbSDimitry Andric } 18215ffd83dbSDimitry Andric 1822e8d8bef9SDimitry Andric // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source 1823e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, 1824e8d8bef9SDimitry Andric RegisterImmPair &MatchData) { 1825e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHL && KB); 1826e8d8bef9SDimitry Andric 1827e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 1828e8d8bef9SDimitry Andric 1829e8d8bef9SDimitry Andric Register ExtSrc; 1830e8d8bef9SDimitry Andric if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) && 1831e8d8bef9SDimitry Andric !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) && 1832e8d8bef9SDimitry Andric !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc)))) 1833e8d8bef9SDimitry Andric return false; 1834e8d8bef9SDimitry Andric 1835e8d8bef9SDimitry Andric // TODO: Should handle vector splat. 1836e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 1837e8d8bef9SDimitry Andric auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI); 1838e8d8bef9SDimitry Andric if (!MaybeShiftAmtVal) 1839e8d8bef9SDimitry Andric return false; 1840e8d8bef9SDimitry Andric 1841e8d8bef9SDimitry Andric if (LI) { 1842e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(ExtSrc); 1843e8d8bef9SDimitry Andric 1844e8d8bef9SDimitry Andric // We only really care about the legality with the shifted value. We can 1845e8d8bef9SDimitry Andric // pick any type the constant shift amount, so ask the target what to 1846e8d8bef9SDimitry Andric // use. Otherwise we would have to guess and hope it is reported as legal. 1847e8d8bef9SDimitry Andric LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy); 1848e8d8bef9SDimitry Andric if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}})) 1849e8d8bef9SDimitry Andric return false; 1850e8d8bef9SDimitry Andric } 1851e8d8bef9SDimitry Andric 1852e8d8bef9SDimitry Andric int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue(); 1853e8d8bef9SDimitry Andric MatchData.Reg = ExtSrc; 1854e8d8bef9SDimitry Andric MatchData.Imm = ShiftAmt; 1855e8d8bef9SDimitry Andric 1856e8d8bef9SDimitry Andric unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes(); 1857e8d8bef9SDimitry Andric return MinLeadingZeros >= ShiftAmt; 1858e8d8bef9SDimitry Andric } 1859e8d8bef9SDimitry Andric 1860e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, 1861e8d8bef9SDimitry Andric const RegisterImmPair &MatchData) { 1862e8d8bef9SDimitry Andric Register ExtSrcReg = MatchData.Reg; 1863e8d8bef9SDimitry Andric int64_t ShiftAmtVal = MatchData.Imm; 1864e8d8bef9SDimitry Andric 1865e8d8bef9SDimitry Andric LLT ExtSrcTy = MRI.getType(ExtSrcReg); 1866e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1867e8d8bef9SDimitry Andric auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal); 1868e8d8bef9SDimitry Andric auto NarrowShift = 1869e8d8bef9SDimitry Andric Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags()); 1870e8d8bef9SDimitry Andric Builder.buildZExt(MI.getOperand(0), NarrowShift); 1871e8d8bef9SDimitry Andric MI.eraseFromParent(); 1872e8d8bef9SDimitry Andric return true; 1873e8d8bef9SDimitry Andric } 1874e8d8bef9SDimitry Andric 1875e8d8bef9SDimitry Andric static Register peekThroughBitcast(Register Reg, 1876e8d8bef9SDimitry Andric const MachineRegisterInfo &MRI) { 1877e8d8bef9SDimitry Andric while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg)))) 1878e8d8bef9SDimitry Andric ; 1879e8d8bef9SDimitry Andric 1880e8d8bef9SDimitry Andric return Reg; 1881e8d8bef9SDimitry Andric } 1882e8d8bef9SDimitry Andric 1883e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineUnmergeMergeToPlainValues( 1884e8d8bef9SDimitry Andric MachineInstr &MI, SmallVectorImpl<Register> &Operands) { 1885e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 1886e8d8bef9SDimitry Andric "Expected an unmerge"); 1887e8d8bef9SDimitry Andric Register SrcReg = 1888e8d8bef9SDimitry Andric peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI); 1889e8d8bef9SDimitry Andric 1890e8d8bef9SDimitry Andric MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); 1891e8d8bef9SDimitry Andric if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES && 1892e8d8bef9SDimitry Andric SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR && 1893e8d8bef9SDimitry Andric SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS) 1894e8d8bef9SDimitry Andric return false; 1895e8d8bef9SDimitry Andric 1896e8d8bef9SDimitry Andric // Check the source type of the merge. 1897e8d8bef9SDimitry Andric LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg()); 1898e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); 1899e8d8bef9SDimitry Andric bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits(); 1900e8d8bef9SDimitry Andric if (SrcMergeTy != Dst0Ty && !SameSize) 1901e8d8bef9SDimitry Andric return false; 1902e8d8bef9SDimitry Andric // They are the same now (modulo a bitcast). 1903e8d8bef9SDimitry Andric // We can collect all the src registers. 1904e8d8bef9SDimitry Andric for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx; 1905e8d8bef9SDimitry Andric ++Idx) 1906e8d8bef9SDimitry Andric Operands.push_back(SrcInstr->getOperand(Idx).getReg()); 1907e8d8bef9SDimitry Andric return true; 1908e8d8bef9SDimitry Andric } 1909e8d8bef9SDimitry Andric 1910e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineUnmergeMergeToPlainValues( 1911e8d8bef9SDimitry Andric MachineInstr &MI, SmallVectorImpl<Register> &Operands) { 1912e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 1913e8d8bef9SDimitry Andric "Expected an unmerge"); 1914e8d8bef9SDimitry Andric assert((MI.getNumOperands() - 1 == Operands.size()) && 1915e8d8bef9SDimitry Andric "Not enough operands to replace all defs"); 1916e8d8bef9SDimitry Andric unsigned NumElems = MI.getNumOperands() - 1; 1917e8d8bef9SDimitry Andric 1918e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(Operands[0]); 1919e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 1920e8d8bef9SDimitry Andric bool CanReuseInputDirectly = DstTy == SrcTy; 1921e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1922e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < NumElems; ++Idx) { 1923e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(Idx).getReg(); 1924e8d8bef9SDimitry Andric Register SrcReg = Operands[Idx]; 1925e8d8bef9SDimitry Andric if (CanReuseInputDirectly) 1926e8d8bef9SDimitry Andric replaceRegWith(MRI, DstReg, SrcReg); 1927e8d8bef9SDimitry Andric else 1928e8d8bef9SDimitry Andric Builder.buildCast(DstReg, SrcReg); 1929e8d8bef9SDimitry Andric } 1930e8d8bef9SDimitry Andric MI.eraseFromParent(); 1931e8d8bef9SDimitry Andric return true; 1932e8d8bef9SDimitry Andric } 1933e8d8bef9SDimitry Andric 1934e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI, 1935e8d8bef9SDimitry Andric SmallVectorImpl<APInt> &Csts) { 1936e8d8bef9SDimitry Andric unsigned SrcIdx = MI.getNumOperands() - 1; 1937e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(SrcIdx).getReg(); 1938e8d8bef9SDimitry Andric MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); 1939e8d8bef9SDimitry Andric if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT && 1940e8d8bef9SDimitry Andric SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT) 1941e8d8bef9SDimitry Andric return false; 1942e8d8bef9SDimitry Andric // Break down the big constant in smaller ones. 1943e8d8bef9SDimitry Andric const MachineOperand &CstVal = SrcInstr->getOperand(1); 1944e8d8bef9SDimitry Andric APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT 1945e8d8bef9SDimitry Andric ? CstVal.getCImm()->getValue() 1946e8d8bef9SDimitry Andric : CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); 1947e8d8bef9SDimitry Andric 1948e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); 1949e8d8bef9SDimitry Andric unsigned ShiftAmt = Dst0Ty.getSizeInBits(); 1950e8d8bef9SDimitry Andric // Unmerge a constant. 1951e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) { 1952e8d8bef9SDimitry Andric Csts.emplace_back(Val.trunc(ShiftAmt)); 1953e8d8bef9SDimitry Andric Val = Val.lshr(ShiftAmt); 1954e8d8bef9SDimitry Andric } 1955e8d8bef9SDimitry Andric 1956e8d8bef9SDimitry Andric return true; 1957e8d8bef9SDimitry Andric } 1958e8d8bef9SDimitry Andric 1959e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, 1960e8d8bef9SDimitry Andric SmallVectorImpl<APInt> &Csts) { 1961e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 1962e8d8bef9SDimitry Andric "Expected an unmerge"); 1963e8d8bef9SDimitry Andric assert((MI.getNumOperands() - 1 == Csts.size()) && 1964e8d8bef9SDimitry Andric "Not enough operands to replace all defs"); 1965e8d8bef9SDimitry Andric unsigned NumElems = MI.getNumOperands() - 1; 1966e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1967e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < NumElems; ++Idx) { 1968e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(Idx).getReg(); 1969e8d8bef9SDimitry Andric Builder.buildConstant(DstReg, Csts[Idx]); 1970e8d8bef9SDimitry Andric } 1971e8d8bef9SDimitry Andric 1972e8d8bef9SDimitry Andric MI.eraseFromParent(); 1973e8d8bef9SDimitry Andric return true; 1974e8d8bef9SDimitry Andric } 1975e8d8bef9SDimitry Andric 1976e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { 1977e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 1978e8d8bef9SDimitry Andric "Expected an unmerge"); 1979e8d8bef9SDimitry Andric // Check that all the lanes are dead except the first one. 1980e8d8bef9SDimitry Andric for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { 1981e8d8bef9SDimitry Andric if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg())) 1982e8d8bef9SDimitry Andric return false; 1983e8d8bef9SDimitry Andric } 1984e8d8bef9SDimitry Andric return true; 1985e8d8bef9SDimitry Andric } 1986e8d8bef9SDimitry Andric 1987e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { 1988e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1989e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); 1990e8d8bef9SDimitry Andric // Truncating a vector is going to truncate every single lane, 1991e8d8bef9SDimitry Andric // whereas we want the full lowbits. 1992e8d8bef9SDimitry Andric // Do the operation on a scalar instead. 1993e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 1994e8d8bef9SDimitry Andric if (SrcTy.isVector()) 1995e8d8bef9SDimitry Andric SrcReg = 1996e8d8bef9SDimitry Andric Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0); 1997e8d8bef9SDimitry Andric 1998e8d8bef9SDimitry Andric Register Dst0Reg = MI.getOperand(0).getReg(); 1999e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(Dst0Reg); 2000e8d8bef9SDimitry Andric if (Dst0Ty.isVector()) { 2001e8d8bef9SDimitry Andric auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg); 2002e8d8bef9SDimitry Andric Builder.buildCast(Dst0Reg, MIB); 2003e8d8bef9SDimitry Andric } else 2004e8d8bef9SDimitry Andric Builder.buildTrunc(Dst0Reg, SrcReg); 2005e8d8bef9SDimitry Andric MI.eraseFromParent(); 2006e8d8bef9SDimitry Andric return true; 2007e8d8bef9SDimitry Andric } 2008e8d8bef9SDimitry Andric 2009e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) { 2010e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 2011e8d8bef9SDimitry Andric "Expected an unmerge"); 2012e8d8bef9SDimitry Andric Register Dst0Reg = MI.getOperand(0).getReg(); 2013e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(Dst0Reg); 2014e8d8bef9SDimitry Andric // G_ZEXT on vector applies to each lane, so it will 2015e8d8bef9SDimitry Andric // affect all destinations. Therefore we won't be able 2016e8d8bef9SDimitry Andric // to simplify the unmerge to just the first definition. 2017e8d8bef9SDimitry Andric if (Dst0Ty.isVector()) 2018e8d8bef9SDimitry Andric return false; 2019e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); 2020e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 2021e8d8bef9SDimitry Andric if (SrcTy.isVector()) 2022e8d8bef9SDimitry Andric return false; 2023e8d8bef9SDimitry Andric 2024e8d8bef9SDimitry Andric Register ZExtSrcReg; 2025e8d8bef9SDimitry Andric if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg)))) 2026e8d8bef9SDimitry Andric return false; 2027e8d8bef9SDimitry Andric 2028e8d8bef9SDimitry Andric // Finally we can replace the first definition with 2029e8d8bef9SDimitry Andric // a zext of the source if the definition is big enough to hold 2030e8d8bef9SDimitry Andric // all of ZExtSrc bits. 2031e8d8bef9SDimitry Andric LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); 2032e8d8bef9SDimitry Andric return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits(); 2033e8d8bef9SDimitry Andric } 2034e8d8bef9SDimitry Andric 2035e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) { 2036e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 2037e8d8bef9SDimitry Andric "Expected an unmerge"); 2038e8d8bef9SDimitry Andric 2039e8d8bef9SDimitry Andric Register Dst0Reg = MI.getOperand(0).getReg(); 2040e8d8bef9SDimitry Andric 2041e8d8bef9SDimitry Andric MachineInstr *ZExtInstr = 2042e8d8bef9SDimitry Andric MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg()); 2043e8d8bef9SDimitry Andric assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT && 2044e8d8bef9SDimitry Andric "Expecting a G_ZEXT"); 2045e8d8bef9SDimitry Andric 2046e8d8bef9SDimitry Andric Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg(); 2047e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(Dst0Reg); 2048e8d8bef9SDimitry Andric LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); 2049e8d8bef9SDimitry Andric 2050e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2051e8d8bef9SDimitry Andric 2052e8d8bef9SDimitry Andric if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) { 2053e8d8bef9SDimitry Andric Builder.buildZExt(Dst0Reg, ZExtSrcReg); 2054e8d8bef9SDimitry Andric } else { 2055e8d8bef9SDimitry Andric assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() && 2056e8d8bef9SDimitry Andric "ZExt src doesn't fit in destination"); 2057e8d8bef9SDimitry Andric replaceRegWith(MRI, Dst0Reg, ZExtSrcReg); 2058e8d8bef9SDimitry Andric } 2059e8d8bef9SDimitry Andric 2060e8d8bef9SDimitry Andric Register ZeroReg; 2061e8d8bef9SDimitry Andric for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { 2062e8d8bef9SDimitry Andric if (!ZeroReg) 2063e8d8bef9SDimitry Andric ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0); 2064e8d8bef9SDimitry Andric replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg); 2065e8d8bef9SDimitry Andric } 2066e8d8bef9SDimitry Andric MI.eraseFromParent(); 2067e8d8bef9SDimitry Andric return true; 2068e8d8bef9SDimitry Andric } 2069e8d8bef9SDimitry Andric 20705ffd83dbSDimitry Andric bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, 20715ffd83dbSDimitry Andric unsigned TargetShiftSize, 20725ffd83dbSDimitry Andric unsigned &ShiftVal) { 20735ffd83dbSDimitry Andric assert((MI.getOpcode() == TargetOpcode::G_SHL || 20745ffd83dbSDimitry Andric MI.getOpcode() == TargetOpcode::G_LSHR || 20755ffd83dbSDimitry Andric MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift"); 20765ffd83dbSDimitry Andric 20775ffd83dbSDimitry Andric LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 20785ffd83dbSDimitry Andric if (Ty.isVector()) // TODO: 20795ffd83dbSDimitry Andric return false; 20805ffd83dbSDimitry Andric 20815ffd83dbSDimitry Andric // Don't narrow further than the requested size. 20825ffd83dbSDimitry Andric unsigned Size = Ty.getSizeInBits(); 20835ffd83dbSDimitry Andric if (Size <= TargetShiftSize) 20845ffd83dbSDimitry Andric return false; 20855ffd83dbSDimitry Andric 20865ffd83dbSDimitry Andric auto MaybeImmVal = 20875ffd83dbSDimitry Andric getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); 20885ffd83dbSDimitry Andric if (!MaybeImmVal) 20895ffd83dbSDimitry Andric return false; 20905ffd83dbSDimitry Andric 2091e8d8bef9SDimitry Andric ShiftVal = MaybeImmVal->Value.getSExtValue(); 20925ffd83dbSDimitry Andric return ShiftVal >= Size / 2 && ShiftVal < Size; 20935ffd83dbSDimitry Andric } 20945ffd83dbSDimitry Andric 20955ffd83dbSDimitry Andric bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI, 20965ffd83dbSDimitry Andric const unsigned &ShiftVal) { 20975ffd83dbSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 20985ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 20995ffd83dbSDimitry Andric LLT Ty = MRI.getType(SrcReg); 21005ffd83dbSDimitry Andric unsigned Size = Ty.getSizeInBits(); 21015ffd83dbSDimitry Andric unsigned HalfSize = Size / 2; 21025ffd83dbSDimitry Andric assert(ShiftVal >= HalfSize); 21035ffd83dbSDimitry Andric 21045ffd83dbSDimitry Andric LLT HalfTy = LLT::scalar(HalfSize); 21055ffd83dbSDimitry Andric 21065ffd83dbSDimitry Andric Builder.setInstr(MI); 21075ffd83dbSDimitry Andric auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg); 21085ffd83dbSDimitry Andric unsigned NarrowShiftAmt = ShiftVal - HalfSize; 21095ffd83dbSDimitry Andric 21105ffd83dbSDimitry Andric if (MI.getOpcode() == TargetOpcode::G_LSHR) { 21115ffd83dbSDimitry Andric Register Narrowed = Unmerge.getReg(1); 21125ffd83dbSDimitry Andric 21135ffd83dbSDimitry Andric // dst = G_LSHR s64:x, C for C >= 32 21145ffd83dbSDimitry Andric // => 21155ffd83dbSDimitry Andric // lo, hi = G_UNMERGE_VALUES x 21165ffd83dbSDimitry Andric // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0 21175ffd83dbSDimitry Andric 21185ffd83dbSDimitry Andric if (NarrowShiftAmt != 0) { 21195ffd83dbSDimitry Andric Narrowed = Builder.buildLShr(HalfTy, Narrowed, 21205ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0); 21215ffd83dbSDimitry Andric } 21225ffd83dbSDimitry Andric 21235ffd83dbSDimitry Andric auto Zero = Builder.buildConstant(HalfTy, 0); 21245ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Narrowed, Zero }); 21255ffd83dbSDimitry Andric } else if (MI.getOpcode() == TargetOpcode::G_SHL) { 21265ffd83dbSDimitry Andric Register Narrowed = Unmerge.getReg(0); 21275ffd83dbSDimitry Andric // dst = G_SHL s64:x, C for C >= 32 21285ffd83dbSDimitry Andric // => 21295ffd83dbSDimitry Andric // lo, hi = G_UNMERGE_VALUES x 21305ffd83dbSDimitry Andric // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32) 21315ffd83dbSDimitry Andric if (NarrowShiftAmt != 0) { 21325ffd83dbSDimitry Andric Narrowed = Builder.buildShl(HalfTy, Narrowed, 21335ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0); 21345ffd83dbSDimitry Andric } 21355ffd83dbSDimitry Andric 21365ffd83dbSDimitry Andric auto Zero = Builder.buildConstant(HalfTy, 0); 21375ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Zero, Narrowed }); 21385ffd83dbSDimitry Andric } else { 21395ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ASHR); 21405ffd83dbSDimitry Andric auto Hi = Builder.buildAShr( 21415ffd83dbSDimitry Andric HalfTy, Unmerge.getReg(1), 21425ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, HalfSize - 1)); 21435ffd83dbSDimitry Andric 21445ffd83dbSDimitry Andric if (ShiftVal == HalfSize) { 21455ffd83dbSDimitry Andric // (G_ASHR i64:x, 32) -> 21465ffd83dbSDimitry Andric // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31) 21475ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi }); 21485ffd83dbSDimitry Andric } else if (ShiftVal == Size - 1) { 21495ffd83dbSDimitry Andric // Don't need a second shift. 21505ffd83dbSDimitry Andric // (G_ASHR i64:x, 63) -> 21515ffd83dbSDimitry Andric // %narrowed = (G_ASHR hi_32(x), 31) 21525ffd83dbSDimitry Andric // G_MERGE_VALUES %narrowed, %narrowed 21535ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Hi, Hi }); 21545ffd83dbSDimitry Andric } else { 21555ffd83dbSDimitry Andric auto Lo = Builder.buildAShr( 21565ffd83dbSDimitry Andric HalfTy, Unmerge.getReg(1), 21575ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, ShiftVal - HalfSize)); 21585ffd83dbSDimitry Andric 21595ffd83dbSDimitry Andric // (G_ASHR i64:x, C) ->, for C >= 32 21605ffd83dbSDimitry Andric // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31) 21615ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Lo, Hi }); 21625ffd83dbSDimitry Andric } 21635ffd83dbSDimitry Andric } 21645ffd83dbSDimitry Andric 21655ffd83dbSDimitry Andric MI.eraseFromParent(); 21665ffd83dbSDimitry Andric return true; 21675ffd83dbSDimitry Andric } 21685ffd83dbSDimitry Andric 21695ffd83dbSDimitry Andric bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI, 21705ffd83dbSDimitry Andric unsigned TargetShiftAmount) { 21715ffd83dbSDimitry Andric unsigned ShiftAmt; 21725ffd83dbSDimitry Andric if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) { 21735ffd83dbSDimitry Andric applyCombineShiftToUnmerge(MI, ShiftAmt); 21745ffd83dbSDimitry Andric return true; 21755ffd83dbSDimitry Andric } 21765ffd83dbSDimitry Andric 21775ffd83dbSDimitry Andric return false; 21785ffd83dbSDimitry Andric } 21795ffd83dbSDimitry Andric 2180e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) { 2181e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); 2182e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2183e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2184e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2185e8d8bef9SDimitry Andric return mi_match(SrcReg, MRI, 2186e8d8bef9SDimitry Andric m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg)))); 2187e8d8bef9SDimitry Andric } 2188e8d8bef9SDimitry Andric 2189e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) { 2190e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); 2191e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2192e8d8bef9SDimitry Andric Builder.setInstr(MI); 2193e8d8bef9SDimitry Andric Builder.buildCopy(DstReg, Reg); 2194e8d8bef9SDimitry Andric MI.eraseFromParent(); 2195e8d8bef9SDimitry Andric return true; 2196e8d8bef9SDimitry Andric } 2197e8d8bef9SDimitry Andric 2198e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) { 2199e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); 2200e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2201e8d8bef9SDimitry Andric return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg))); 2202e8d8bef9SDimitry Andric } 2203e8d8bef9SDimitry Andric 2204e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) { 2205e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); 2206e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2207e8d8bef9SDimitry Andric Builder.setInstr(MI); 2208e8d8bef9SDimitry Andric Builder.buildZExtOrTrunc(DstReg, Reg); 2209e8d8bef9SDimitry Andric MI.eraseFromParent(); 2210e8d8bef9SDimitry Andric return true; 2211e8d8bef9SDimitry Andric } 2212e8d8bef9SDimitry Andric 2213e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineAddP2IToPtrAdd( 2214e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, bool> &PtrReg) { 2215e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ADD); 2216e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 2217e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 2218e8d8bef9SDimitry Andric LLT IntTy = MRI.getType(LHS); 2219e8d8bef9SDimitry Andric 2220e8d8bef9SDimitry Andric // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the 2221e8d8bef9SDimitry Andric // instruction. 2222e8d8bef9SDimitry Andric PtrReg.second = false; 2223e8d8bef9SDimitry Andric for (Register SrcReg : {LHS, RHS}) { 2224e8d8bef9SDimitry Andric if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) { 2225e8d8bef9SDimitry Andric // Don't handle cases where the integer is implicitly converted to the 2226e8d8bef9SDimitry Andric // pointer width. 2227e8d8bef9SDimitry Andric LLT PtrTy = MRI.getType(PtrReg.first); 2228e8d8bef9SDimitry Andric if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits()) 2229e8d8bef9SDimitry Andric return true; 2230e8d8bef9SDimitry Andric } 2231e8d8bef9SDimitry Andric 2232e8d8bef9SDimitry Andric PtrReg.second = true; 2233e8d8bef9SDimitry Andric } 2234e8d8bef9SDimitry Andric 2235e8d8bef9SDimitry Andric return false; 2236e8d8bef9SDimitry Andric } 2237e8d8bef9SDimitry Andric 2238e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineAddP2IToPtrAdd( 2239e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, bool> &PtrReg) { 2240e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 2241e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 2242e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 2243e8d8bef9SDimitry Andric 2244e8d8bef9SDimitry Andric const bool DoCommute = PtrReg.second; 2245e8d8bef9SDimitry Andric if (DoCommute) 2246e8d8bef9SDimitry Andric std::swap(LHS, RHS); 2247e8d8bef9SDimitry Andric LHS = PtrReg.first; 2248e8d8bef9SDimitry Andric 2249e8d8bef9SDimitry Andric LLT PtrTy = MRI.getType(LHS); 2250e8d8bef9SDimitry Andric 2251e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2252e8d8bef9SDimitry Andric auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS); 2253e8d8bef9SDimitry Andric Builder.buildPtrToInt(Dst, PtrAdd); 2254e8d8bef9SDimitry Andric MI.eraseFromParent(); 2255e8d8bef9SDimitry Andric return true; 2256e8d8bef9SDimitry Andric } 2257e8d8bef9SDimitry Andric 2258e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, 2259e8d8bef9SDimitry Andric int64_t &NewCst) { 2260e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); 2261e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 2262e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 2263e8d8bef9SDimitry Andric MachineRegisterInfo &MRI = Builder.getMF().getRegInfo(); 2264e8d8bef9SDimitry Andric 2265e8d8bef9SDimitry Andric if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) { 2266e8d8bef9SDimitry Andric int64_t Cst; 2267e8d8bef9SDimitry Andric if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) { 2268e8d8bef9SDimitry Andric NewCst = Cst + *RHSCst; 2269e8d8bef9SDimitry Andric return true; 2270e8d8bef9SDimitry Andric } 2271e8d8bef9SDimitry Andric } 2272e8d8bef9SDimitry Andric 2273e8d8bef9SDimitry Andric return false; 2274e8d8bef9SDimitry Andric } 2275e8d8bef9SDimitry Andric 2276e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI, 2277e8d8bef9SDimitry Andric int64_t &NewCst) { 2278e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); 2279e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 2280e8d8bef9SDimitry Andric 2281e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2282e8d8bef9SDimitry Andric Builder.buildConstant(Dst, NewCst); 2283e8d8bef9SDimitry Andric MI.eraseFromParent(); 2284e8d8bef9SDimitry Andric return true; 2285e8d8bef9SDimitry Andric } 2286e8d8bef9SDimitry Andric 2287e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { 2288e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT"); 2289e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2290e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2291e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2292e8d8bef9SDimitry Andric return mi_match(SrcReg, MRI, 2293e8d8bef9SDimitry Andric m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))); 2294e8d8bef9SDimitry Andric } 2295e8d8bef9SDimitry Andric 2296e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { 2297e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT"); 2298e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2299e8d8bef9SDimitry Andric MI.eraseFromParent(); 2300e8d8bef9SDimitry Andric replaceRegWith(MRI, DstReg, Reg); 2301e8d8bef9SDimitry Andric return true; 2302e8d8bef9SDimitry Andric } 2303e8d8bef9SDimitry Andric 2304e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineExtOfExt( 2305e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 2306e8d8bef9SDimitry Andric assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || 2307e8d8bef9SDimitry Andric MI.getOpcode() == TargetOpcode::G_SEXT || 2308e8d8bef9SDimitry Andric MI.getOpcode() == TargetOpcode::G_ZEXT) && 2309e8d8bef9SDimitry Andric "Expected a G_[ASZ]EXT"); 2310e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2311e8d8bef9SDimitry Andric MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); 2312e8d8bef9SDimitry Andric // Match exts with the same opcode, anyext([sz]ext) and sext(zext). 2313e8d8bef9SDimitry Andric unsigned Opc = MI.getOpcode(); 2314e8d8bef9SDimitry Andric unsigned SrcOpc = SrcMI->getOpcode(); 2315e8d8bef9SDimitry Andric if (Opc == SrcOpc || 2316e8d8bef9SDimitry Andric (Opc == TargetOpcode::G_ANYEXT && 2317e8d8bef9SDimitry Andric (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) || 2318e8d8bef9SDimitry Andric (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) { 2319e8d8bef9SDimitry Andric MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc); 2320e8d8bef9SDimitry Andric return true; 2321e8d8bef9SDimitry Andric } 2322e8d8bef9SDimitry Andric return false; 2323e8d8bef9SDimitry Andric } 2324e8d8bef9SDimitry Andric 2325e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineExtOfExt( 2326e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 2327e8d8bef9SDimitry Andric assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || 2328e8d8bef9SDimitry Andric MI.getOpcode() == TargetOpcode::G_SEXT || 2329e8d8bef9SDimitry Andric MI.getOpcode() == TargetOpcode::G_ZEXT) && 2330e8d8bef9SDimitry Andric "Expected a G_[ASZ]EXT"); 2331e8d8bef9SDimitry Andric 2332e8d8bef9SDimitry Andric Register Reg = std::get<0>(MatchInfo); 2333e8d8bef9SDimitry Andric unsigned SrcExtOp = std::get<1>(MatchInfo); 2334e8d8bef9SDimitry Andric 2335e8d8bef9SDimitry Andric // Combine exts with the same opcode. 2336e8d8bef9SDimitry Andric if (MI.getOpcode() == SrcExtOp) { 2337e8d8bef9SDimitry Andric Observer.changingInstr(MI); 2338e8d8bef9SDimitry Andric MI.getOperand(1).setReg(Reg); 2339e8d8bef9SDimitry Andric Observer.changedInstr(MI); 2340e8d8bef9SDimitry Andric return true; 2341e8d8bef9SDimitry Andric } 2342e8d8bef9SDimitry Andric 2343e8d8bef9SDimitry Andric // Combine: 2344e8d8bef9SDimitry Andric // - anyext([sz]ext x) to [sz]ext x 2345e8d8bef9SDimitry Andric // - sext(zext x) to zext x 2346e8d8bef9SDimitry Andric if (MI.getOpcode() == TargetOpcode::G_ANYEXT || 2347e8d8bef9SDimitry Andric (MI.getOpcode() == TargetOpcode::G_SEXT && 2348e8d8bef9SDimitry Andric SrcExtOp == TargetOpcode::G_ZEXT)) { 2349e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2350e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2351e8d8bef9SDimitry Andric Builder.buildInstr(SrcExtOp, {DstReg}, {Reg}); 2352e8d8bef9SDimitry Andric MI.eraseFromParent(); 2353e8d8bef9SDimitry Andric return true; 2354e8d8bef9SDimitry Andric } 2355e8d8bef9SDimitry Andric 2356e8d8bef9SDimitry Andric return false; 2357e8d8bef9SDimitry Andric } 2358e8d8bef9SDimitry Andric 2359e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) { 2360e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); 2361e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2362e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2363e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2364e8d8bef9SDimitry Andric 2365e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2366e8d8bef9SDimitry Andric Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg, 2367e8d8bef9SDimitry Andric MI.getFlags()); 2368e8d8bef9SDimitry Andric MI.eraseFromParent(); 2369e8d8bef9SDimitry Andric return true; 2370e8d8bef9SDimitry Andric } 2371e8d8bef9SDimitry Andric 2372e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) { 2373e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG"); 2374e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2375e8d8bef9SDimitry Andric return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg))); 2376e8d8bef9SDimitry Andric } 2377e8d8bef9SDimitry Andric 2378e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { 2379e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); 2380e8d8bef9SDimitry Andric Src = MI.getOperand(1).getReg(); 2381e8d8bef9SDimitry Andric Register AbsSrc; 2382e8d8bef9SDimitry Andric return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc))); 2383e8d8bef9SDimitry Andric } 2384e8d8bef9SDimitry Andric 2385e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { 2386e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); 2387e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 2388e8d8bef9SDimitry Andric MI.eraseFromParent(); 2389e8d8bef9SDimitry Andric replaceRegWith(MRI, Dst, Src); 2390e8d8bef9SDimitry Andric return true; 2391e8d8bef9SDimitry Andric } 2392e8d8bef9SDimitry Andric 2393e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineTruncOfExt( 2394e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { 2395e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 2396e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2397e8d8bef9SDimitry Andric MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); 2398e8d8bef9SDimitry Andric unsigned SrcOpc = SrcMI->getOpcode(); 2399e8d8bef9SDimitry Andric if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT || 2400e8d8bef9SDimitry Andric SrcOpc == TargetOpcode::G_ZEXT) { 2401e8d8bef9SDimitry Andric MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc); 2402e8d8bef9SDimitry Andric return true; 2403e8d8bef9SDimitry Andric } 2404e8d8bef9SDimitry Andric return false; 2405e8d8bef9SDimitry Andric } 2406e8d8bef9SDimitry Andric 2407e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineTruncOfExt( 2408e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { 2409e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 2410e8d8bef9SDimitry Andric Register SrcReg = MatchInfo.first; 2411e8d8bef9SDimitry Andric unsigned SrcExtOp = MatchInfo.second; 2412e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2413e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 2414e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2415e8d8bef9SDimitry Andric if (SrcTy == DstTy) { 2416e8d8bef9SDimitry Andric MI.eraseFromParent(); 2417e8d8bef9SDimitry Andric replaceRegWith(MRI, DstReg, SrcReg); 2418e8d8bef9SDimitry Andric return true; 2419e8d8bef9SDimitry Andric } 2420e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2421e8d8bef9SDimitry Andric if (SrcTy.getSizeInBits() < DstTy.getSizeInBits()) 2422e8d8bef9SDimitry Andric Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg}); 2423e8d8bef9SDimitry Andric else 2424e8d8bef9SDimitry Andric Builder.buildTrunc(DstReg, SrcReg); 2425e8d8bef9SDimitry Andric MI.eraseFromParent(); 2426e8d8bef9SDimitry Andric return true; 2427e8d8bef9SDimitry Andric } 2428e8d8bef9SDimitry Andric 2429e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineTruncOfShl( 2430e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 2431e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 2432e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2433e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2434e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2435e8d8bef9SDimitry Andric Register ShiftSrc; 2436e8d8bef9SDimitry Andric Register ShiftAmt; 2437e8d8bef9SDimitry Andric 2438e8d8bef9SDimitry Andric if (MRI.hasOneNonDBGUse(SrcReg) && 2439e8d8bef9SDimitry Andric mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) && 2440e8d8bef9SDimitry Andric isLegalOrBeforeLegalizer( 2441e8d8bef9SDimitry Andric {TargetOpcode::G_SHL, 2442e8d8bef9SDimitry Andric {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) { 2443e8d8bef9SDimitry Andric KnownBits Known = KB->getKnownBits(ShiftAmt); 2444e8d8bef9SDimitry Andric unsigned Size = DstTy.getSizeInBits(); 2445e8d8bef9SDimitry Andric if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { 2446e8d8bef9SDimitry Andric MatchInfo = std::make_pair(ShiftSrc, ShiftAmt); 2447e8d8bef9SDimitry Andric return true; 2448e8d8bef9SDimitry Andric } 2449e8d8bef9SDimitry Andric } 2450e8d8bef9SDimitry Andric return false; 2451e8d8bef9SDimitry Andric } 2452e8d8bef9SDimitry Andric 2453e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineTruncOfShl( 2454e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 2455e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 2456e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2457e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2458e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2459e8d8bef9SDimitry Andric MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); 2460e8d8bef9SDimitry Andric 2461e8d8bef9SDimitry Andric Register ShiftSrc = MatchInfo.first; 2462e8d8bef9SDimitry Andric Register ShiftAmt = MatchInfo.second; 2463e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2464e8d8bef9SDimitry Andric auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc); 2465e8d8bef9SDimitry Andric Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags()); 2466e8d8bef9SDimitry Andric MI.eraseFromParent(); 2467e8d8bef9SDimitry Andric return true; 2468e8d8bef9SDimitry Andric } 2469e8d8bef9SDimitry Andric 24705ffd83dbSDimitry Andric bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { 24715ffd83dbSDimitry Andric return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) { 24725ffd83dbSDimitry Andric return MO.isReg() && 24735ffd83dbSDimitry Andric getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); 24745ffd83dbSDimitry Andric }); 24755ffd83dbSDimitry Andric } 24765ffd83dbSDimitry Andric 24775ffd83dbSDimitry Andric bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) { 24785ffd83dbSDimitry Andric return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) { 24795ffd83dbSDimitry Andric return !MO.isReg() || 24805ffd83dbSDimitry Andric getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); 24815ffd83dbSDimitry Andric }); 24825ffd83dbSDimitry Andric } 24835ffd83dbSDimitry Andric 24845ffd83dbSDimitry Andric bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) { 24855ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 24865ffd83dbSDimitry Andric ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 24875ffd83dbSDimitry Andric return all_of(Mask, [](int Elt) { return Elt < 0; }); 24885ffd83dbSDimitry Andric } 24895ffd83dbSDimitry Andric 24905ffd83dbSDimitry Andric bool CombinerHelper::matchUndefStore(MachineInstr &MI) { 24915ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_STORE); 24925ffd83dbSDimitry Andric return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(), 24935ffd83dbSDimitry Andric MRI); 24945ffd83dbSDimitry Andric } 24955ffd83dbSDimitry Andric 2496e8d8bef9SDimitry Andric bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) { 2497e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SELECT); 2498e8d8bef9SDimitry Andric return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(), 2499e8d8bef9SDimitry Andric MRI); 2500e8d8bef9SDimitry Andric } 2501e8d8bef9SDimitry Andric 2502e8d8bef9SDimitry Andric bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) { 2503e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SELECT); 2504e8d8bef9SDimitry Andric if (auto MaybeCstCmp = 2505e8d8bef9SDimitry Andric getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) { 2506e8d8bef9SDimitry Andric OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2; 2507e8d8bef9SDimitry Andric return true; 2508e8d8bef9SDimitry Andric } 2509e8d8bef9SDimitry Andric return false; 2510e8d8bef9SDimitry Andric } 2511e8d8bef9SDimitry Andric 25125ffd83dbSDimitry Andric bool CombinerHelper::eraseInst(MachineInstr &MI) { 25135ffd83dbSDimitry Andric MI.eraseFromParent(); 25145ffd83dbSDimitry Andric return true; 25155ffd83dbSDimitry Andric } 25165ffd83dbSDimitry Andric 25175ffd83dbSDimitry Andric bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, 25185ffd83dbSDimitry Andric const MachineOperand &MOP2) { 25195ffd83dbSDimitry Andric if (!MOP1.isReg() || !MOP2.isReg()) 25205ffd83dbSDimitry Andric return false; 25215ffd83dbSDimitry Andric MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI); 25225ffd83dbSDimitry Andric if (!I1) 25235ffd83dbSDimitry Andric return false; 25245ffd83dbSDimitry Andric MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI); 25255ffd83dbSDimitry Andric if (!I2) 25265ffd83dbSDimitry Andric return false; 25275ffd83dbSDimitry Andric 25285ffd83dbSDimitry Andric // Handle a case like this: 25295ffd83dbSDimitry Andric // 25305ffd83dbSDimitry Andric // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>) 25315ffd83dbSDimitry Andric // 25325ffd83dbSDimitry Andric // Even though %0 and %1 are produced by the same instruction they are not 25335ffd83dbSDimitry Andric // the same values. 25345ffd83dbSDimitry Andric if (I1 == I2) 25355ffd83dbSDimitry Andric return MOP1.getReg() == MOP2.getReg(); 25365ffd83dbSDimitry Andric 25375ffd83dbSDimitry Andric // If we have an instruction which loads or stores, we can't guarantee that 25385ffd83dbSDimitry Andric // it is identical. 25395ffd83dbSDimitry Andric // 25405ffd83dbSDimitry Andric // For example, we may have 25415ffd83dbSDimitry Andric // 25425ffd83dbSDimitry Andric // %x1 = G_LOAD %addr (load N from @somewhere) 25435ffd83dbSDimitry Andric // ... 25445ffd83dbSDimitry Andric // call @foo 25455ffd83dbSDimitry Andric // ... 25465ffd83dbSDimitry Andric // %x2 = G_LOAD %addr (load N from @somewhere) 25475ffd83dbSDimitry Andric // ... 25485ffd83dbSDimitry Andric // %or = G_OR %x1, %x2 25495ffd83dbSDimitry Andric // 25505ffd83dbSDimitry Andric // It's possible that @foo will modify whatever lives at the address we're 25515ffd83dbSDimitry Andric // loading from. To be safe, let's just assume that all loads and stores 25525ffd83dbSDimitry Andric // are different (unless we have something which is guaranteed to not 25535ffd83dbSDimitry Andric // change.) 25545ffd83dbSDimitry Andric if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr)) 25555ffd83dbSDimitry Andric return false; 25565ffd83dbSDimitry Andric 25575ffd83dbSDimitry Andric // Check for physical registers on the instructions first to avoid cases 25585ffd83dbSDimitry Andric // like this: 25595ffd83dbSDimitry Andric // 25605ffd83dbSDimitry Andric // %a = COPY $physreg 25615ffd83dbSDimitry Andric // ... 25625ffd83dbSDimitry Andric // SOMETHING implicit-def $physreg 25635ffd83dbSDimitry Andric // ... 25645ffd83dbSDimitry Andric // %b = COPY $physreg 25655ffd83dbSDimitry Andric // 25665ffd83dbSDimitry Andric // These copies are not equivalent. 25675ffd83dbSDimitry Andric if (any_of(I1->uses(), [](const MachineOperand &MO) { 25685ffd83dbSDimitry Andric return MO.isReg() && MO.getReg().isPhysical(); 25695ffd83dbSDimitry Andric })) { 25705ffd83dbSDimitry Andric // Check if we have a case like this: 25715ffd83dbSDimitry Andric // 25725ffd83dbSDimitry Andric // %a = COPY $physreg 25735ffd83dbSDimitry Andric // %b = COPY %a 25745ffd83dbSDimitry Andric // 25755ffd83dbSDimitry Andric // In this case, I1 and I2 will both be equal to %a = COPY $physreg. 25765ffd83dbSDimitry Andric // From that, we know that they must have the same value, since they must 25775ffd83dbSDimitry Andric // have come from the same COPY. 25785ffd83dbSDimitry Andric return I1->isIdenticalTo(*I2); 25795ffd83dbSDimitry Andric } 25805ffd83dbSDimitry Andric 25815ffd83dbSDimitry Andric // We don't have any physical registers, so we don't necessarily need the 25825ffd83dbSDimitry Andric // same vreg defs. 25835ffd83dbSDimitry Andric // 25845ffd83dbSDimitry Andric // On the off-chance that there's some target instruction feeding into the 25855ffd83dbSDimitry Andric // instruction, let's use produceSameValue instead of isIdenticalTo. 25865ffd83dbSDimitry Andric return Builder.getTII().produceSameValue(*I1, *I2, &MRI); 25875ffd83dbSDimitry Andric } 25885ffd83dbSDimitry Andric 25895ffd83dbSDimitry Andric bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) { 25905ffd83dbSDimitry Andric if (!MOP.isReg()) 25915ffd83dbSDimitry Andric return false; 25925ffd83dbSDimitry Andric // MIPatternMatch doesn't let us look through G_ZEXT etc. 25935ffd83dbSDimitry Andric auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI); 25945ffd83dbSDimitry Andric return ValAndVReg && ValAndVReg->Value == C; 25955ffd83dbSDimitry Andric } 25965ffd83dbSDimitry Andric 25975ffd83dbSDimitry Andric bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, 25985ffd83dbSDimitry Andric unsigned OpIdx) { 25995ffd83dbSDimitry Andric assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); 26005ffd83dbSDimitry Andric Register OldReg = MI.getOperand(0).getReg(); 26015ffd83dbSDimitry Andric Register Replacement = MI.getOperand(OpIdx).getReg(); 26025ffd83dbSDimitry Andric assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); 26035ffd83dbSDimitry Andric MI.eraseFromParent(); 26045ffd83dbSDimitry Andric replaceRegWith(MRI, OldReg, Replacement); 26055ffd83dbSDimitry Andric return true; 26065ffd83dbSDimitry Andric } 26075ffd83dbSDimitry Andric 2608e8d8bef9SDimitry Andric bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI, 2609e8d8bef9SDimitry Andric Register Replacement) { 2610e8d8bef9SDimitry Andric assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); 2611e8d8bef9SDimitry Andric Register OldReg = MI.getOperand(0).getReg(); 2612e8d8bef9SDimitry Andric assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); 2613e8d8bef9SDimitry Andric MI.eraseFromParent(); 2614e8d8bef9SDimitry Andric replaceRegWith(MRI, OldReg, Replacement); 2615e8d8bef9SDimitry Andric return true; 2616e8d8bef9SDimitry Andric } 2617e8d8bef9SDimitry Andric 26185ffd83dbSDimitry Andric bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { 26195ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SELECT); 26205ffd83dbSDimitry Andric // Match (cond ? x : x) 26215ffd83dbSDimitry Andric return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) && 26225ffd83dbSDimitry Andric canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(), 26235ffd83dbSDimitry Andric MRI); 26245ffd83dbSDimitry Andric } 26255ffd83dbSDimitry Andric 26265ffd83dbSDimitry Andric bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) { 26275ffd83dbSDimitry Andric return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) && 26285ffd83dbSDimitry Andric canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 26295ffd83dbSDimitry Andric MRI); 26305ffd83dbSDimitry Andric } 26315ffd83dbSDimitry Andric 26325ffd83dbSDimitry Andric bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) { 26335ffd83dbSDimitry Andric return matchConstantOp(MI.getOperand(OpIdx), 0) && 26345ffd83dbSDimitry Andric canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(), 26355ffd83dbSDimitry Andric MRI); 26365ffd83dbSDimitry Andric } 26375ffd83dbSDimitry Andric 2638e8d8bef9SDimitry Andric bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) { 2639e8d8bef9SDimitry Andric MachineOperand &MO = MI.getOperand(OpIdx); 2640e8d8bef9SDimitry Andric return MO.isReg() && 2641e8d8bef9SDimitry Andric getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); 2642e8d8bef9SDimitry Andric } 2643e8d8bef9SDimitry Andric 2644e8d8bef9SDimitry Andric bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, 2645e8d8bef9SDimitry Andric unsigned OpIdx) { 2646e8d8bef9SDimitry Andric MachineOperand &MO = MI.getOperand(OpIdx); 2647e8d8bef9SDimitry Andric return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB); 2648e8d8bef9SDimitry Andric } 2649e8d8bef9SDimitry Andric 26505ffd83dbSDimitry Andric bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { 26515ffd83dbSDimitry Andric assert(MI.getNumDefs() == 1 && "Expected only one def?"); 26525ffd83dbSDimitry Andric Builder.setInstr(MI); 26535ffd83dbSDimitry Andric Builder.buildFConstant(MI.getOperand(0), C); 26545ffd83dbSDimitry Andric MI.eraseFromParent(); 26555ffd83dbSDimitry Andric return true; 26565ffd83dbSDimitry Andric } 26575ffd83dbSDimitry Andric 26585ffd83dbSDimitry Andric bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) { 26595ffd83dbSDimitry Andric assert(MI.getNumDefs() == 1 && "Expected only one def?"); 26605ffd83dbSDimitry Andric Builder.setInstr(MI); 26615ffd83dbSDimitry Andric Builder.buildConstant(MI.getOperand(0), C); 26625ffd83dbSDimitry Andric MI.eraseFromParent(); 26635ffd83dbSDimitry Andric return true; 26645ffd83dbSDimitry Andric } 26655ffd83dbSDimitry Andric 26665ffd83dbSDimitry Andric bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) { 26675ffd83dbSDimitry Andric assert(MI.getNumDefs() == 1 && "Expected only one def?"); 26685ffd83dbSDimitry Andric Builder.setInstr(MI); 26695ffd83dbSDimitry Andric Builder.buildUndef(MI.getOperand(0)); 26705ffd83dbSDimitry Andric MI.eraseFromParent(); 26715ffd83dbSDimitry Andric return true; 26725ffd83dbSDimitry Andric } 26735ffd83dbSDimitry Andric 26745ffd83dbSDimitry Andric bool CombinerHelper::matchSimplifyAddToSub( 26755ffd83dbSDimitry Andric MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { 26765ffd83dbSDimitry Andric Register LHS = MI.getOperand(1).getReg(); 26775ffd83dbSDimitry Andric Register RHS = MI.getOperand(2).getReg(); 26785ffd83dbSDimitry Andric Register &NewLHS = std::get<0>(MatchInfo); 26795ffd83dbSDimitry Andric Register &NewRHS = std::get<1>(MatchInfo); 26805ffd83dbSDimitry Andric 26815ffd83dbSDimitry Andric // Helper lambda to check for opportunities for 26825ffd83dbSDimitry Andric // ((0-A) + B) -> B - A 26835ffd83dbSDimitry Andric // (A + (0-B)) -> A - B 26845ffd83dbSDimitry Andric auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) { 2685e8d8bef9SDimitry Andric if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS)))) 26865ffd83dbSDimitry Andric return false; 26875ffd83dbSDimitry Andric NewLHS = MaybeNewLHS; 26885ffd83dbSDimitry Andric return true; 26895ffd83dbSDimitry Andric }; 26905ffd83dbSDimitry Andric 26915ffd83dbSDimitry Andric return CheckFold(LHS, RHS) || CheckFold(RHS, LHS); 26925ffd83dbSDimitry Andric } 26935ffd83dbSDimitry Andric 2694e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineInsertVecElts( 2695e8d8bef9SDimitry Andric MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { 2696e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT && 2697e8d8bef9SDimitry Andric "Invalid opcode"); 2698e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2699e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2700e8d8bef9SDimitry Andric assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?"); 2701e8d8bef9SDimitry Andric unsigned NumElts = DstTy.getNumElements(); 2702e8d8bef9SDimitry Andric // If this MI is part of a sequence of insert_vec_elts, then 2703e8d8bef9SDimitry Andric // don't do the combine in the middle of the sequence. 2704e8d8bef9SDimitry Andric if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() == 2705e8d8bef9SDimitry Andric TargetOpcode::G_INSERT_VECTOR_ELT) 2706e8d8bef9SDimitry Andric return false; 2707e8d8bef9SDimitry Andric MachineInstr *CurrInst = &MI; 2708e8d8bef9SDimitry Andric MachineInstr *TmpInst; 2709e8d8bef9SDimitry Andric int64_t IntImm; 2710e8d8bef9SDimitry Andric Register TmpReg; 2711e8d8bef9SDimitry Andric MatchInfo.resize(NumElts); 2712e8d8bef9SDimitry Andric while (mi_match( 2713e8d8bef9SDimitry Andric CurrInst->getOperand(0).getReg(), MRI, 2714e8d8bef9SDimitry Andric m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) { 2715e8d8bef9SDimitry Andric if (IntImm >= NumElts) 2716e8d8bef9SDimitry Andric return false; 2717e8d8bef9SDimitry Andric if (!MatchInfo[IntImm]) 2718e8d8bef9SDimitry Andric MatchInfo[IntImm] = TmpReg; 2719e8d8bef9SDimitry Andric CurrInst = TmpInst; 2720e8d8bef9SDimitry Andric } 2721e8d8bef9SDimitry Andric // Variable index. 2722e8d8bef9SDimitry Andric if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) 2723e8d8bef9SDimitry Andric return false; 2724e8d8bef9SDimitry Andric if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) { 2725e8d8bef9SDimitry Andric for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) { 2726e8d8bef9SDimitry Andric if (!MatchInfo[I - 1].isValid()) 2727e8d8bef9SDimitry Andric MatchInfo[I - 1] = TmpInst->getOperand(I).getReg(); 2728e8d8bef9SDimitry Andric } 2729e8d8bef9SDimitry Andric return true; 2730e8d8bef9SDimitry Andric } 2731e8d8bef9SDimitry Andric // If we didn't end in a G_IMPLICIT_DEF, bail out. 2732e8d8bef9SDimitry Andric return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF; 2733e8d8bef9SDimitry Andric } 2734e8d8bef9SDimitry Andric 2735e8d8bef9SDimitry Andric bool CombinerHelper::applyCombineInsertVecElts( 2736e8d8bef9SDimitry Andric MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { 2737e8d8bef9SDimitry Andric Builder.setInstr(MI); 2738e8d8bef9SDimitry Andric Register UndefReg; 2739e8d8bef9SDimitry Andric auto GetUndef = [&]() { 2740e8d8bef9SDimitry Andric if (UndefReg) 2741e8d8bef9SDimitry Andric return UndefReg; 2742e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 2743e8d8bef9SDimitry Andric UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0); 2744e8d8bef9SDimitry Andric return UndefReg; 2745e8d8bef9SDimitry Andric }; 2746e8d8bef9SDimitry Andric for (unsigned I = 0; I < MatchInfo.size(); ++I) { 2747e8d8bef9SDimitry Andric if (!MatchInfo[I]) 2748e8d8bef9SDimitry Andric MatchInfo[I] = GetUndef(); 2749e8d8bef9SDimitry Andric } 2750e8d8bef9SDimitry Andric Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo); 2751e8d8bef9SDimitry Andric MI.eraseFromParent(); 2752e8d8bef9SDimitry Andric return true; 2753e8d8bef9SDimitry Andric } 2754e8d8bef9SDimitry Andric 27555ffd83dbSDimitry Andric bool CombinerHelper::applySimplifyAddToSub( 27565ffd83dbSDimitry Andric MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { 27575ffd83dbSDimitry Andric Builder.setInstr(MI); 27585ffd83dbSDimitry Andric Register SubLHS, SubRHS; 27595ffd83dbSDimitry Andric std::tie(SubLHS, SubRHS) = MatchInfo; 27605ffd83dbSDimitry Andric Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS); 27615ffd83dbSDimitry Andric MI.eraseFromParent(); 27625ffd83dbSDimitry Andric return true; 27635ffd83dbSDimitry Andric } 27645ffd83dbSDimitry Andric 2765e8d8bef9SDimitry Andric bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( 2766e8d8bef9SDimitry Andric MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { 2767e8d8bef9SDimitry Andric // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ... 2768e8d8bef9SDimitry Andric // 2769e8d8bef9SDimitry Andric // Creates the new hand + logic instruction (but does not insert them.) 2770e8d8bef9SDimitry Andric // 2771e8d8bef9SDimitry Andric // On success, MatchInfo is populated with the new instructions. These are 2772e8d8bef9SDimitry Andric // inserted in applyHoistLogicOpWithSameOpcodeHands. 2773e8d8bef9SDimitry Andric unsigned LogicOpcode = MI.getOpcode(); 2774e8d8bef9SDimitry Andric assert(LogicOpcode == TargetOpcode::G_AND || 2775e8d8bef9SDimitry Andric LogicOpcode == TargetOpcode::G_OR || 2776e8d8bef9SDimitry Andric LogicOpcode == TargetOpcode::G_XOR); 2777e8d8bef9SDimitry Andric MachineIRBuilder MIB(MI); 2778e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 2779e8d8bef9SDimitry Andric Register LHSReg = MI.getOperand(1).getReg(); 2780e8d8bef9SDimitry Andric Register RHSReg = MI.getOperand(2).getReg(); 2781e8d8bef9SDimitry Andric 2782e8d8bef9SDimitry Andric // Don't recompute anything. 2783e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg)) 2784e8d8bef9SDimitry Andric return false; 2785e8d8bef9SDimitry Andric 2786e8d8bef9SDimitry Andric // Make sure we have (hand x, ...), (hand y, ...) 2787e8d8bef9SDimitry Andric MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI); 2788e8d8bef9SDimitry Andric MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI); 2789e8d8bef9SDimitry Andric if (!LeftHandInst || !RightHandInst) 2790e8d8bef9SDimitry Andric return false; 2791e8d8bef9SDimitry Andric unsigned HandOpcode = LeftHandInst->getOpcode(); 2792e8d8bef9SDimitry Andric if (HandOpcode != RightHandInst->getOpcode()) 2793e8d8bef9SDimitry Andric return false; 2794e8d8bef9SDimitry Andric if (!LeftHandInst->getOperand(1).isReg() || 2795e8d8bef9SDimitry Andric !RightHandInst->getOperand(1).isReg()) 2796e8d8bef9SDimitry Andric return false; 2797e8d8bef9SDimitry Andric 2798e8d8bef9SDimitry Andric // Make sure the types match up, and if we're doing this post-legalization, 2799e8d8bef9SDimitry Andric // we end up with legal types. 2800e8d8bef9SDimitry Andric Register X = LeftHandInst->getOperand(1).getReg(); 2801e8d8bef9SDimitry Andric Register Y = RightHandInst->getOperand(1).getReg(); 2802e8d8bef9SDimitry Andric LLT XTy = MRI.getType(X); 2803e8d8bef9SDimitry Andric LLT YTy = MRI.getType(Y); 2804e8d8bef9SDimitry Andric if (XTy != YTy) 2805e8d8bef9SDimitry Andric return false; 2806e8d8bef9SDimitry Andric if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}})) 2807e8d8bef9SDimitry Andric return false; 2808e8d8bef9SDimitry Andric 2809e8d8bef9SDimitry Andric // Optional extra source register. 2810e8d8bef9SDimitry Andric Register ExtraHandOpSrcReg; 2811e8d8bef9SDimitry Andric switch (HandOpcode) { 2812e8d8bef9SDimitry Andric default: 2813e8d8bef9SDimitry Andric return false; 2814e8d8bef9SDimitry Andric case TargetOpcode::G_ANYEXT: 2815e8d8bef9SDimitry Andric case TargetOpcode::G_SEXT: 2816e8d8bef9SDimitry Andric case TargetOpcode::G_ZEXT: { 2817e8d8bef9SDimitry Andric // Match: logic (ext X), (ext Y) --> ext (logic X, Y) 2818e8d8bef9SDimitry Andric break; 2819e8d8bef9SDimitry Andric } 2820e8d8bef9SDimitry Andric case TargetOpcode::G_AND: 2821e8d8bef9SDimitry Andric case TargetOpcode::G_ASHR: 2822e8d8bef9SDimitry Andric case TargetOpcode::G_LSHR: 2823e8d8bef9SDimitry Andric case TargetOpcode::G_SHL: { 2824e8d8bef9SDimitry Andric // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z 2825e8d8bef9SDimitry Andric MachineOperand &ZOp = LeftHandInst->getOperand(2); 2826e8d8bef9SDimitry Andric if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2))) 2827e8d8bef9SDimitry Andric return false; 2828e8d8bef9SDimitry Andric ExtraHandOpSrcReg = ZOp.getReg(); 2829e8d8bef9SDimitry Andric break; 2830e8d8bef9SDimitry Andric } 2831e8d8bef9SDimitry Andric } 2832e8d8bef9SDimitry Andric 2833e8d8bef9SDimitry Andric // Record the steps to build the new instructions. 2834e8d8bef9SDimitry Andric // 2835e8d8bef9SDimitry Andric // Steps to build (logic x, y) 2836e8d8bef9SDimitry Andric auto NewLogicDst = MRI.createGenericVirtualRegister(XTy); 2837e8d8bef9SDimitry Andric OperandBuildSteps LogicBuildSteps = { 2838e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); }, 2839e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(X); }, 2840e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }}; 2841e8d8bef9SDimitry Andric InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps); 2842e8d8bef9SDimitry Andric 2843e8d8bef9SDimitry Andric // Steps to build hand (logic x, y), ...z 2844e8d8bef9SDimitry Andric OperandBuildSteps HandBuildSteps = { 2845e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); }, 2846e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }}; 2847e8d8bef9SDimitry Andric if (ExtraHandOpSrcReg.isValid()) 2848e8d8bef9SDimitry Andric HandBuildSteps.push_back( 2849e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); }); 2850e8d8bef9SDimitry Andric InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps); 2851e8d8bef9SDimitry Andric 2852e8d8bef9SDimitry Andric MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps}); 2853e8d8bef9SDimitry Andric return true; 2854e8d8bef9SDimitry Andric } 2855e8d8bef9SDimitry Andric 2856e8d8bef9SDimitry Andric bool CombinerHelper::applyBuildInstructionSteps( 2857e8d8bef9SDimitry Andric MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { 2858e8d8bef9SDimitry Andric assert(MatchInfo.InstrsToBuild.size() && 2859e8d8bef9SDimitry Andric "Expected at least one instr to build?"); 2860e8d8bef9SDimitry Andric Builder.setInstr(MI); 2861e8d8bef9SDimitry Andric for (auto &InstrToBuild : MatchInfo.InstrsToBuild) { 2862e8d8bef9SDimitry Andric assert(InstrToBuild.Opcode && "Expected a valid opcode?"); 2863e8d8bef9SDimitry Andric assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?"); 2864e8d8bef9SDimitry Andric MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode); 2865e8d8bef9SDimitry Andric for (auto &OperandFn : InstrToBuild.OperandFns) 2866e8d8bef9SDimitry Andric OperandFn(Instr); 2867e8d8bef9SDimitry Andric } 2868e8d8bef9SDimitry Andric MI.eraseFromParent(); 2869e8d8bef9SDimitry Andric return true; 2870e8d8bef9SDimitry Andric } 2871e8d8bef9SDimitry Andric 2872e8d8bef9SDimitry Andric bool CombinerHelper::matchAshrShlToSextInreg( 2873e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { 2874e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ASHR); 2875e8d8bef9SDimitry Andric int64_t ShlCst, AshrCst; 2876e8d8bef9SDimitry Andric Register Src; 2877e8d8bef9SDimitry Andric // FIXME: detect splat constant vectors. 2878e8d8bef9SDimitry Andric if (!mi_match(MI.getOperand(0).getReg(), MRI, 2879e8d8bef9SDimitry Andric m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst)))) 2880e8d8bef9SDimitry Andric return false; 2881e8d8bef9SDimitry Andric if (ShlCst != AshrCst) 2882e8d8bef9SDimitry Andric return false; 2883e8d8bef9SDimitry Andric if (!isLegalOrBeforeLegalizer( 2884e8d8bef9SDimitry Andric {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}})) 2885e8d8bef9SDimitry Andric return false; 2886e8d8bef9SDimitry Andric MatchInfo = std::make_tuple(Src, ShlCst); 2887e8d8bef9SDimitry Andric return true; 2888e8d8bef9SDimitry Andric } 2889e8d8bef9SDimitry Andric bool CombinerHelper::applyAshShlToSextInreg( 2890e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { 2891e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ASHR); 2892e8d8bef9SDimitry Andric Register Src; 2893e8d8bef9SDimitry Andric int64_t ShiftAmt; 2894e8d8bef9SDimitry Andric std::tie(Src, ShiftAmt) = MatchInfo; 2895e8d8bef9SDimitry Andric unsigned Size = MRI.getType(Src).getScalarSizeInBits(); 2896e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2897e8d8bef9SDimitry Andric Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt); 2898e8d8bef9SDimitry Andric MI.eraseFromParent(); 2899e8d8bef9SDimitry Andric return true; 2900e8d8bef9SDimitry Andric } 2901e8d8bef9SDimitry Andric 2902e8d8bef9SDimitry Andric bool CombinerHelper::matchRedundantAnd(MachineInstr &MI, 2903e8d8bef9SDimitry Andric Register &Replacement) { 2904e8d8bef9SDimitry Andric // Given 2905e8d8bef9SDimitry Andric // 2906e8d8bef9SDimitry Andric // %y:_(sN) = G_SOMETHING 2907e8d8bef9SDimitry Andric // %x:_(sN) = G_SOMETHING 2908e8d8bef9SDimitry Andric // %res:_(sN) = G_AND %x, %y 2909e8d8bef9SDimitry Andric // 2910e8d8bef9SDimitry Andric // Eliminate the G_AND when it is known that x & y == x or x & y == y. 2911e8d8bef9SDimitry Andric // 2912e8d8bef9SDimitry Andric // Patterns like this can appear as a result of legalization. E.g. 2913e8d8bef9SDimitry Andric // 2914e8d8bef9SDimitry Andric // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y 2915e8d8bef9SDimitry Andric // %one:_(s32) = G_CONSTANT i32 1 2916e8d8bef9SDimitry Andric // %and:_(s32) = G_AND %cmp, %one 2917e8d8bef9SDimitry Andric // 2918e8d8bef9SDimitry Andric // In this case, G_ICMP only produces a single bit, so x & 1 == x. 2919e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_AND); 2920e8d8bef9SDimitry Andric if (!KB) 2921e8d8bef9SDimitry Andric return false; 2922e8d8bef9SDimitry Andric 2923e8d8bef9SDimitry Andric Register AndDst = MI.getOperand(0).getReg(); 2924e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(AndDst); 2925e8d8bef9SDimitry Andric 2926e8d8bef9SDimitry Andric // FIXME: This should be removed once GISelKnownBits supports vectors. 2927e8d8bef9SDimitry Andric if (DstTy.isVector()) 2928e8d8bef9SDimitry Andric return false; 2929e8d8bef9SDimitry Andric 2930e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 2931e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 2932e8d8bef9SDimitry Andric KnownBits LHSBits = KB->getKnownBits(LHS); 2933e8d8bef9SDimitry Andric KnownBits RHSBits = KB->getKnownBits(RHS); 2934e8d8bef9SDimitry Andric 2935e8d8bef9SDimitry Andric // Check that x & Mask == x. 2936e8d8bef9SDimitry Andric // x & 1 == x, always 2937e8d8bef9SDimitry Andric // x & 0 == x, only if x is also 0 2938e8d8bef9SDimitry Andric // Meaning Mask has no effect if every bit is either one in Mask or zero in x. 2939e8d8bef9SDimitry Andric // 2940e8d8bef9SDimitry Andric // Check if we can replace AndDst with the LHS of the G_AND 2941e8d8bef9SDimitry Andric if (canReplaceReg(AndDst, LHS, MRI) && 2942e8d8bef9SDimitry Andric (LHSBits.Zero | RHSBits.One).isAllOnesValue()) { 2943e8d8bef9SDimitry Andric Replacement = LHS; 2944e8d8bef9SDimitry Andric return true; 2945e8d8bef9SDimitry Andric } 2946e8d8bef9SDimitry Andric 2947e8d8bef9SDimitry Andric // Check if we can replace AndDst with the RHS of the G_AND 2948e8d8bef9SDimitry Andric if (canReplaceReg(AndDst, RHS, MRI) && 2949e8d8bef9SDimitry Andric (LHSBits.One | RHSBits.Zero).isAllOnesValue()) { 2950e8d8bef9SDimitry Andric Replacement = RHS; 2951e8d8bef9SDimitry Andric return true; 2952e8d8bef9SDimitry Andric } 2953e8d8bef9SDimitry Andric 2954e8d8bef9SDimitry Andric return false; 2955e8d8bef9SDimitry Andric } 2956e8d8bef9SDimitry Andric 2957e8d8bef9SDimitry Andric bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) { 2958e8d8bef9SDimitry Andric // Given 2959e8d8bef9SDimitry Andric // 2960e8d8bef9SDimitry Andric // %y:_(sN) = G_SOMETHING 2961e8d8bef9SDimitry Andric // %x:_(sN) = G_SOMETHING 2962e8d8bef9SDimitry Andric // %res:_(sN) = G_OR %x, %y 2963e8d8bef9SDimitry Andric // 2964e8d8bef9SDimitry Andric // Eliminate the G_OR when it is known that x | y == x or x | y == y. 2965e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_OR); 2966e8d8bef9SDimitry Andric if (!KB) 2967e8d8bef9SDimitry Andric return false; 2968e8d8bef9SDimitry Andric 2969e8d8bef9SDimitry Andric Register OrDst = MI.getOperand(0).getReg(); 2970e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(OrDst); 2971e8d8bef9SDimitry Andric 2972e8d8bef9SDimitry Andric // FIXME: This should be removed once GISelKnownBits supports vectors. 2973e8d8bef9SDimitry Andric if (DstTy.isVector()) 2974e8d8bef9SDimitry Andric return false; 2975e8d8bef9SDimitry Andric 2976e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 2977e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 2978e8d8bef9SDimitry Andric KnownBits LHSBits = KB->getKnownBits(LHS); 2979e8d8bef9SDimitry Andric KnownBits RHSBits = KB->getKnownBits(RHS); 2980e8d8bef9SDimitry Andric 2981e8d8bef9SDimitry Andric // Check that x | Mask == x. 2982e8d8bef9SDimitry Andric // x | 0 == x, always 2983e8d8bef9SDimitry Andric // x | 1 == x, only if x is also 1 2984e8d8bef9SDimitry Andric // Meaning Mask has no effect if every bit is either zero in Mask or one in x. 2985e8d8bef9SDimitry Andric // 2986e8d8bef9SDimitry Andric // Check if we can replace OrDst with the LHS of the G_OR 2987e8d8bef9SDimitry Andric if (canReplaceReg(OrDst, LHS, MRI) && 2988e8d8bef9SDimitry Andric (LHSBits.One | RHSBits.Zero).isAllOnesValue()) { 2989e8d8bef9SDimitry Andric Replacement = LHS; 2990e8d8bef9SDimitry Andric return true; 2991e8d8bef9SDimitry Andric } 2992e8d8bef9SDimitry Andric 2993e8d8bef9SDimitry Andric // Check if we can replace OrDst with the RHS of the G_OR 2994e8d8bef9SDimitry Andric if (canReplaceReg(OrDst, RHS, MRI) && 2995e8d8bef9SDimitry Andric (LHSBits.Zero | RHSBits.One).isAllOnesValue()) { 2996e8d8bef9SDimitry Andric Replacement = RHS; 2997e8d8bef9SDimitry Andric return true; 2998e8d8bef9SDimitry Andric } 2999e8d8bef9SDimitry Andric 3000e8d8bef9SDimitry Andric return false; 3001e8d8bef9SDimitry Andric } 3002e8d8bef9SDimitry Andric 3003e8d8bef9SDimitry Andric bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) { 3004e8d8bef9SDimitry Andric // If the input is already sign extended, just drop the extension. 3005e8d8bef9SDimitry Andric Register Src = MI.getOperand(1).getReg(); 3006e8d8bef9SDimitry Andric unsigned ExtBits = MI.getOperand(2).getImm(); 3007e8d8bef9SDimitry Andric unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits(); 3008e8d8bef9SDimitry Andric return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1); 3009e8d8bef9SDimitry Andric } 3010e8d8bef9SDimitry Andric 3011e8d8bef9SDimitry Andric static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, 3012e8d8bef9SDimitry Andric int64_t Cst, bool IsVector, bool IsFP) { 3013e8d8bef9SDimitry Andric // For i1, Cst will always be -1 regardless of boolean contents. 3014e8d8bef9SDimitry Andric return (ScalarSizeBits == 1 && Cst == -1) || 3015e8d8bef9SDimitry Andric isConstTrueVal(TLI, Cst, IsVector, IsFP); 3016e8d8bef9SDimitry Andric } 3017e8d8bef9SDimitry Andric 3018e8d8bef9SDimitry Andric bool CombinerHelper::matchNotCmp(MachineInstr &MI, 3019e8d8bef9SDimitry Andric SmallVectorImpl<Register> &RegsToNegate) { 3020e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_XOR); 3021e8d8bef9SDimitry Andric LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 3022e8d8bef9SDimitry Andric const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering(); 3023e8d8bef9SDimitry Andric Register XorSrc; 3024e8d8bef9SDimitry Andric Register CstReg; 3025e8d8bef9SDimitry Andric // We match xor(src, true) here. 3026e8d8bef9SDimitry Andric if (!mi_match(MI.getOperand(0).getReg(), MRI, 3027e8d8bef9SDimitry Andric m_GXor(m_Reg(XorSrc), m_Reg(CstReg)))) 3028e8d8bef9SDimitry Andric return false; 3029e8d8bef9SDimitry Andric 3030e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(XorSrc)) 3031e8d8bef9SDimitry Andric return false; 3032e8d8bef9SDimitry Andric 3033e8d8bef9SDimitry Andric // Check that XorSrc is the root of a tree of comparisons combined with ANDs 3034e8d8bef9SDimitry Andric // and ORs. The suffix of RegsToNegate starting from index I is used a work 3035e8d8bef9SDimitry Andric // list of tree nodes to visit. 3036e8d8bef9SDimitry Andric RegsToNegate.push_back(XorSrc); 3037e8d8bef9SDimitry Andric // Remember whether the comparisons are all integer or all floating point. 3038e8d8bef9SDimitry Andric bool IsInt = false; 3039e8d8bef9SDimitry Andric bool IsFP = false; 3040e8d8bef9SDimitry Andric for (unsigned I = 0; I < RegsToNegate.size(); ++I) { 3041e8d8bef9SDimitry Andric Register Reg = RegsToNegate[I]; 3042e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(Reg)) 3043e8d8bef9SDimitry Andric return false; 3044e8d8bef9SDimitry Andric MachineInstr *Def = MRI.getVRegDef(Reg); 3045e8d8bef9SDimitry Andric switch (Def->getOpcode()) { 3046e8d8bef9SDimitry Andric default: 3047e8d8bef9SDimitry Andric // Don't match if the tree contains anything other than ANDs, ORs and 3048e8d8bef9SDimitry Andric // comparisons. 3049e8d8bef9SDimitry Andric return false; 3050e8d8bef9SDimitry Andric case TargetOpcode::G_ICMP: 3051e8d8bef9SDimitry Andric if (IsFP) 3052e8d8bef9SDimitry Andric return false; 3053e8d8bef9SDimitry Andric IsInt = true; 3054e8d8bef9SDimitry Andric // When we apply the combine we will invert the predicate. 3055e8d8bef9SDimitry Andric break; 3056e8d8bef9SDimitry Andric case TargetOpcode::G_FCMP: 3057e8d8bef9SDimitry Andric if (IsInt) 3058e8d8bef9SDimitry Andric return false; 3059e8d8bef9SDimitry Andric IsFP = true; 3060e8d8bef9SDimitry Andric // When we apply the combine we will invert the predicate. 3061e8d8bef9SDimitry Andric break; 3062e8d8bef9SDimitry Andric case TargetOpcode::G_AND: 3063e8d8bef9SDimitry Andric case TargetOpcode::G_OR: 3064e8d8bef9SDimitry Andric // Implement De Morgan's laws: 3065e8d8bef9SDimitry Andric // ~(x & y) -> ~x | ~y 3066e8d8bef9SDimitry Andric // ~(x | y) -> ~x & ~y 3067e8d8bef9SDimitry Andric // When we apply the combine we will change the opcode and recursively 3068e8d8bef9SDimitry Andric // negate the operands. 3069e8d8bef9SDimitry Andric RegsToNegate.push_back(Def->getOperand(1).getReg()); 3070e8d8bef9SDimitry Andric RegsToNegate.push_back(Def->getOperand(2).getReg()); 3071e8d8bef9SDimitry Andric break; 3072e8d8bef9SDimitry Andric } 3073e8d8bef9SDimitry Andric } 3074e8d8bef9SDimitry Andric 3075e8d8bef9SDimitry Andric // Now we know whether the comparisons are integer or floating point, check 3076e8d8bef9SDimitry Andric // the constant in the xor. 3077e8d8bef9SDimitry Andric int64_t Cst; 3078e8d8bef9SDimitry Andric if (Ty.isVector()) { 3079e8d8bef9SDimitry Andric MachineInstr *CstDef = MRI.getVRegDef(CstReg); 3080e8d8bef9SDimitry Andric auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI); 3081e8d8bef9SDimitry Andric if (!MaybeCst) 3082e8d8bef9SDimitry Andric return false; 3083e8d8bef9SDimitry Andric if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP)) 3084e8d8bef9SDimitry Andric return false; 3085e8d8bef9SDimitry Andric } else { 3086e8d8bef9SDimitry Andric if (!mi_match(CstReg, MRI, m_ICst(Cst))) 3087e8d8bef9SDimitry Andric return false; 3088e8d8bef9SDimitry Andric if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP)) 3089e8d8bef9SDimitry Andric return false; 3090e8d8bef9SDimitry Andric } 3091e8d8bef9SDimitry Andric 3092e8d8bef9SDimitry Andric return true; 3093e8d8bef9SDimitry Andric } 3094e8d8bef9SDimitry Andric 3095e8d8bef9SDimitry Andric bool CombinerHelper::applyNotCmp(MachineInstr &MI, 3096e8d8bef9SDimitry Andric SmallVectorImpl<Register> &RegsToNegate) { 3097e8d8bef9SDimitry Andric for (Register Reg : RegsToNegate) { 3098e8d8bef9SDimitry Andric MachineInstr *Def = MRI.getVRegDef(Reg); 3099e8d8bef9SDimitry Andric Observer.changingInstr(*Def); 3100e8d8bef9SDimitry Andric // For each comparison, invert the opcode. For each AND and OR, change the 3101e8d8bef9SDimitry Andric // opcode. 3102e8d8bef9SDimitry Andric switch (Def->getOpcode()) { 3103e8d8bef9SDimitry Andric default: 3104e8d8bef9SDimitry Andric llvm_unreachable("Unexpected opcode"); 3105e8d8bef9SDimitry Andric case TargetOpcode::G_ICMP: 3106e8d8bef9SDimitry Andric case TargetOpcode::G_FCMP: { 3107e8d8bef9SDimitry Andric MachineOperand &PredOp = Def->getOperand(1); 3108e8d8bef9SDimitry Andric CmpInst::Predicate NewP = CmpInst::getInversePredicate( 3109e8d8bef9SDimitry Andric (CmpInst::Predicate)PredOp.getPredicate()); 3110e8d8bef9SDimitry Andric PredOp.setPredicate(NewP); 3111e8d8bef9SDimitry Andric break; 3112e8d8bef9SDimitry Andric } 3113e8d8bef9SDimitry Andric case TargetOpcode::G_AND: 3114e8d8bef9SDimitry Andric Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR)); 3115e8d8bef9SDimitry Andric break; 3116e8d8bef9SDimitry Andric case TargetOpcode::G_OR: 3117e8d8bef9SDimitry Andric Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND)); 3118e8d8bef9SDimitry Andric break; 3119e8d8bef9SDimitry Andric } 3120e8d8bef9SDimitry Andric Observer.changedInstr(*Def); 3121e8d8bef9SDimitry Andric } 3122e8d8bef9SDimitry Andric 3123e8d8bef9SDimitry Andric replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); 3124e8d8bef9SDimitry Andric MI.eraseFromParent(); 3125e8d8bef9SDimitry Andric return true; 3126e8d8bef9SDimitry Andric } 3127e8d8bef9SDimitry Andric 3128e8d8bef9SDimitry Andric bool CombinerHelper::matchXorOfAndWithSameReg( 3129e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 3130e8d8bef9SDimitry Andric // Match (xor (and x, y), y) (or any of its commuted cases) 3131e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_XOR); 3132e8d8bef9SDimitry Andric Register &X = MatchInfo.first; 3133e8d8bef9SDimitry Andric Register &Y = MatchInfo.second; 3134e8d8bef9SDimitry Andric Register AndReg = MI.getOperand(1).getReg(); 3135e8d8bef9SDimitry Andric Register SharedReg = MI.getOperand(2).getReg(); 3136e8d8bef9SDimitry Andric 3137e8d8bef9SDimitry Andric // Find a G_AND on either side of the G_XOR. 3138e8d8bef9SDimitry Andric // Look for one of 3139e8d8bef9SDimitry Andric // 3140e8d8bef9SDimitry Andric // (xor (and x, y), SharedReg) 3141e8d8bef9SDimitry Andric // (xor SharedReg, (and x, y)) 3142e8d8bef9SDimitry Andric if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) { 3143e8d8bef9SDimitry Andric std::swap(AndReg, SharedReg); 3144e8d8bef9SDimitry Andric if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) 3145e8d8bef9SDimitry Andric return false; 3146e8d8bef9SDimitry Andric } 3147e8d8bef9SDimitry Andric 3148e8d8bef9SDimitry Andric // Only do this if we'll eliminate the G_AND. 3149e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(AndReg)) 3150e8d8bef9SDimitry Andric return false; 3151e8d8bef9SDimitry Andric 3152e8d8bef9SDimitry Andric // We can combine if SharedReg is the same as either the LHS or RHS of the 3153e8d8bef9SDimitry Andric // G_AND. 3154e8d8bef9SDimitry Andric if (Y != SharedReg) 3155e8d8bef9SDimitry Andric std::swap(X, Y); 3156e8d8bef9SDimitry Andric return Y == SharedReg; 3157e8d8bef9SDimitry Andric } 3158e8d8bef9SDimitry Andric 3159e8d8bef9SDimitry Andric bool CombinerHelper::applyXorOfAndWithSameReg( 3160e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 3161e8d8bef9SDimitry Andric // Fold (xor (and x, y), y) -> (and (not x), y) 3162e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3163e8d8bef9SDimitry Andric Register X, Y; 3164e8d8bef9SDimitry Andric std::tie(X, Y) = MatchInfo; 3165e8d8bef9SDimitry Andric auto Not = Builder.buildNot(MRI.getType(X), X); 3166e8d8bef9SDimitry Andric Observer.changingInstr(MI); 3167e8d8bef9SDimitry Andric MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND)); 3168e8d8bef9SDimitry Andric MI.getOperand(1).setReg(Not->getOperand(0).getReg()); 3169e8d8bef9SDimitry Andric MI.getOperand(2).setReg(Y); 3170e8d8bef9SDimitry Andric Observer.changedInstr(MI); 3171e8d8bef9SDimitry Andric return true; 3172e8d8bef9SDimitry Andric } 3173e8d8bef9SDimitry Andric 3174e8d8bef9SDimitry Andric bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) { 3175e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3176e8d8bef9SDimitry Andric LLT Ty = MRI.getType(DstReg); 3177e8d8bef9SDimitry Andric const DataLayout &DL = Builder.getMF().getDataLayout(); 3178e8d8bef9SDimitry Andric 3179e8d8bef9SDimitry Andric if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace())) 3180e8d8bef9SDimitry Andric return false; 3181e8d8bef9SDimitry Andric 3182e8d8bef9SDimitry Andric if (Ty.isPointer()) { 3183e8d8bef9SDimitry Andric auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI); 3184e8d8bef9SDimitry Andric return ConstVal && *ConstVal == 0; 3185e8d8bef9SDimitry Andric } 3186e8d8bef9SDimitry Andric 3187e8d8bef9SDimitry Andric assert(Ty.isVector() && "Expecting a vector type"); 3188e8d8bef9SDimitry Andric const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg()); 3189e8d8bef9SDimitry Andric return isBuildVectorAllZeros(*VecMI, MRI); 3190e8d8bef9SDimitry Andric } 3191e8d8bef9SDimitry Andric 3192e8d8bef9SDimitry Andric bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) { 3193e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD); 3194e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3195e8d8bef9SDimitry Andric Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2)); 3196e8d8bef9SDimitry Andric MI.eraseFromParent(); 3197e8d8bef9SDimitry Andric return true; 3198e8d8bef9SDimitry Andric } 3199e8d8bef9SDimitry Andric 3200e8d8bef9SDimitry Andric /// The second source operand is known to be a power of 2. 3201e8d8bef9SDimitry Andric bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) { 3202e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3203e8d8bef9SDimitry Andric Register Src0 = MI.getOperand(1).getReg(); 3204e8d8bef9SDimitry Andric Register Pow2Src1 = MI.getOperand(2).getReg(); 3205e8d8bef9SDimitry Andric LLT Ty = MRI.getType(DstReg); 3206e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3207e8d8bef9SDimitry Andric 3208e8d8bef9SDimitry Andric // Fold (urem x, pow2) -> (and x, pow2-1) 3209e8d8bef9SDimitry Andric auto NegOne = Builder.buildConstant(Ty, -1); 3210e8d8bef9SDimitry Andric auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne); 3211e8d8bef9SDimitry Andric Builder.buildAnd(DstReg, Src0, Add); 3212e8d8bef9SDimitry Andric MI.eraseFromParent(); 3213e8d8bef9SDimitry Andric return true; 3214e8d8bef9SDimitry Andric } 3215e8d8bef9SDimitry Andric 3216e8d8bef9SDimitry Andric Optional<SmallVector<Register, 8>> 3217e8d8bef9SDimitry Andric CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const { 3218e8d8bef9SDimitry Andric assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!"); 3219e8d8bef9SDimitry Andric // We want to detect if Root is part of a tree which represents a bunch 3220e8d8bef9SDimitry Andric // of loads being merged into a larger load. We'll try to recognize patterns 3221e8d8bef9SDimitry Andric // like, for example: 3222e8d8bef9SDimitry Andric // 3223e8d8bef9SDimitry Andric // Reg Reg 3224e8d8bef9SDimitry Andric // \ / 3225e8d8bef9SDimitry Andric // OR_1 Reg 3226e8d8bef9SDimitry Andric // \ / 3227e8d8bef9SDimitry Andric // OR_2 3228e8d8bef9SDimitry Andric // \ Reg 3229e8d8bef9SDimitry Andric // .. / 3230e8d8bef9SDimitry Andric // Root 3231e8d8bef9SDimitry Andric // 3232e8d8bef9SDimitry Andric // Reg Reg Reg Reg 3233e8d8bef9SDimitry Andric // \ / \ / 3234e8d8bef9SDimitry Andric // OR_1 OR_2 3235e8d8bef9SDimitry Andric // \ / 3236e8d8bef9SDimitry Andric // \ / 3237e8d8bef9SDimitry Andric // ... 3238e8d8bef9SDimitry Andric // Root 3239e8d8bef9SDimitry Andric // 3240e8d8bef9SDimitry Andric // Each "Reg" may have been produced by a load + some arithmetic. This 3241e8d8bef9SDimitry Andric // function will save each of them. 3242e8d8bef9SDimitry Andric SmallVector<Register, 8> RegsToVisit; 3243e8d8bef9SDimitry Andric SmallVector<const MachineInstr *, 7> Ors = {Root}; 3244e8d8bef9SDimitry Andric 3245e8d8bef9SDimitry Andric // In the "worst" case, we're dealing with a load for each byte. So, there 3246e8d8bef9SDimitry Andric // are at most #bytes - 1 ORs. 3247e8d8bef9SDimitry Andric const unsigned MaxIter = 3248e8d8bef9SDimitry Andric MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1; 3249e8d8bef9SDimitry Andric for (unsigned Iter = 0; Iter < MaxIter; ++Iter) { 3250e8d8bef9SDimitry Andric if (Ors.empty()) 3251e8d8bef9SDimitry Andric break; 3252e8d8bef9SDimitry Andric const MachineInstr *Curr = Ors.pop_back_val(); 3253e8d8bef9SDimitry Andric Register OrLHS = Curr->getOperand(1).getReg(); 3254e8d8bef9SDimitry Andric Register OrRHS = Curr->getOperand(2).getReg(); 3255e8d8bef9SDimitry Andric 3256e8d8bef9SDimitry Andric // In the combine, we want to elimate the entire tree. 3257e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS)) 3258e8d8bef9SDimitry Andric return None; 3259e8d8bef9SDimitry Andric 3260e8d8bef9SDimitry Andric // If it's a G_OR, save it and continue to walk. If it's not, then it's 3261e8d8bef9SDimitry Andric // something that may be a load + arithmetic. 3262e8d8bef9SDimitry Andric if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI)) 3263e8d8bef9SDimitry Andric Ors.push_back(Or); 3264e8d8bef9SDimitry Andric else 3265e8d8bef9SDimitry Andric RegsToVisit.push_back(OrLHS); 3266e8d8bef9SDimitry Andric if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI)) 3267e8d8bef9SDimitry Andric Ors.push_back(Or); 3268e8d8bef9SDimitry Andric else 3269e8d8bef9SDimitry Andric RegsToVisit.push_back(OrRHS); 3270e8d8bef9SDimitry Andric } 3271e8d8bef9SDimitry Andric 3272e8d8bef9SDimitry Andric // We're going to try and merge each register into a wider power-of-2 type, 3273e8d8bef9SDimitry Andric // so we ought to have an even number of registers. 3274e8d8bef9SDimitry Andric if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0) 3275e8d8bef9SDimitry Andric return None; 3276e8d8bef9SDimitry Andric return RegsToVisit; 3277e8d8bef9SDimitry Andric } 3278e8d8bef9SDimitry Andric 3279e8d8bef9SDimitry Andric /// Helper function for findLoadOffsetsForLoadOrCombine. 3280e8d8bef9SDimitry Andric /// 3281e8d8bef9SDimitry Andric /// Check if \p Reg is the result of loading a \p MemSizeInBits wide value, 3282e8d8bef9SDimitry Andric /// and then moving that value into a specific byte offset. 3283e8d8bef9SDimitry Andric /// 3284e8d8bef9SDimitry Andric /// e.g. x[i] << 24 3285e8d8bef9SDimitry Andric /// 3286e8d8bef9SDimitry Andric /// \returns The load instruction and the byte offset it is moved into. 3287e8d8bef9SDimitry Andric static Optional<std::pair<MachineInstr *, int64_t>> 3288e8d8bef9SDimitry Andric matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, 3289e8d8bef9SDimitry Andric const MachineRegisterInfo &MRI) { 3290e8d8bef9SDimitry Andric assert(MRI.hasOneNonDBGUse(Reg) && 3291e8d8bef9SDimitry Andric "Expected Reg to only have one non-debug use?"); 3292e8d8bef9SDimitry Andric Register MaybeLoad; 3293e8d8bef9SDimitry Andric int64_t Shift; 3294e8d8bef9SDimitry Andric if (!mi_match(Reg, MRI, 3295e8d8bef9SDimitry Andric m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) { 3296e8d8bef9SDimitry Andric Shift = 0; 3297e8d8bef9SDimitry Andric MaybeLoad = Reg; 3298e8d8bef9SDimitry Andric } 3299e8d8bef9SDimitry Andric 3300e8d8bef9SDimitry Andric if (Shift % MemSizeInBits != 0) 3301e8d8bef9SDimitry Andric return None; 3302e8d8bef9SDimitry Andric 3303e8d8bef9SDimitry Andric // TODO: Handle other types of loads. 3304e8d8bef9SDimitry Andric auto *Load = getOpcodeDef(TargetOpcode::G_ZEXTLOAD, MaybeLoad, MRI); 3305e8d8bef9SDimitry Andric if (!Load) 3306e8d8bef9SDimitry Andric return None; 3307e8d8bef9SDimitry Andric 3308e8d8bef9SDimitry Andric const auto &MMO = **Load->memoperands_begin(); 3309e8d8bef9SDimitry Andric if (!MMO.isUnordered() || MMO.getSizeInBits() != MemSizeInBits) 3310e8d8bef9SDimitry Andric return None; 3311e8d8bef9SDimitry Andric 3312e8d8bef9SDimitry Andric return std::make_pair(Load, Shift / MemSizeInBits); 3313e8d8bef9SDimitry Andric } 3314e8d8bef9SDimitry Andric 3315e8d8bef9SDimitry Andric Optional<std::pair<MachineInstr *, int64_t>> 3316e8d8bef9SDimitry Andric CombinerHelper::findLoadOffsetsForLoadOrCombine( 3317e8d8bef9SDimitry Andric SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, 3318e8d8bef9SDimitry Andric const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) { 3319e8d8bef9SDimitry Andric 3320e8d8bef9SDimitry Andric // Each load found for the pattern. There should be one for each RegsToVisit. 3321e8d8bef9SDimitry Andric SmallSetVector<const MachineInstr *, 8> Loads; 3322e8d8bef9SDimitry Andric 3323e8d8bef9SDimitry Andric // The lowest index used in any load. (The lowest "i" for each x[i].) 3324e8d8bef9SDimitry Andric int64_t LowestIdx = INT64_MAX; 3325e8d8bef9SDimitry Andric 3326e8d8bef9SDimitry Andric // The load which uses the lowest index. 3327e8d8bef9SDimitry Andric MachineInstr *LowestIdxLoad = nullptr; 3328e8d8bef9SDimitry Andric 3329e8d8bef9SDimitry Andric // Keeps track of the load indices we see. We shouldn't see any indices twice. 3330e8d8bef9SDimitry Andric SmallSet<int64_t, 8> SeenIdx; 3331e8d8bef9SDimitry Andric 3332e8d8bef9SDimitry Andric // Ensure each load is in the same MBB. 3333e8d8bef9SDimitry Andric // TODO: Support multiple MachineBasicBlocks. 3334e8d8bef9SDimitry Andric MachineBasicBlock *MBB = nullptr; 3335e8d8bef9SDimitry Andric const MachineMemOperand *MMO = nullptr; 3336e8d8bef9SDimitry Andric 3337e8d8bef9SDimitry Andric // Earliest instruction-order load in the pattern. 3338e8d8bef9SDimitry Andric MachineInstr *EarliestLoad = nullptr; 3339e8d8bef9SDimitry Andric 3340e8d8bef9SDimitry Andric // Latest instruction-order load in the pattern. 3341e8d8bef9SDimitry Andric MachineInstr *LatestLoad = nullptr; 3342e8d8bef9SDimitry Andric 3343e8d8bef9SDimitry Andric // Base pointer which every load should share. 3344e8d8bef9SDimitry Andric Register BasePtr; 3345e8d8bef9SDimitry Andric 3346e8d8bef9SDimitry Andric // We want to find a load for each register. Each load should have some 3347e8d8bef9SDimitry Andric // appropriate bit twiddling arithmetic. During this loop, we will also keep 3348e8d8bef9SDimitry Andric // track of the load which uses the lowest index. Later, we will check if we 3349e8d8bef9SDimitry Andric // can use its pointer in the final, combined load. 3350e8d8bef9SDimitry Andric for (auto Reg : RegsToVisit) { 3351e8d8bef9SDimitry Andric // Find the load, and find the position that it will end up in (e.g. a 3352e8d8bef9SDimitry Andric // shifted) value. 3353e8d8bef9SDimitry Andric auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI); 3354e8d8bef9SDimitry Andric if (!LoadAndPos) 3355e8d8bef9SDimitry Andric return None; 3356e8d8bef9SDimitry Andric MachineInstr *Load; 3357e8d8bef9SDimitry Andric int64_t DstPos; 3358e8d8bef9SDimitry Andric std::tie(Load, DstPos) = *LoadAndPos; 3359e8d8bef9SDimitry Andric 3360e8d8bef9SDimitry Andric // TODO: Handle multiple MachineBasicBlocks. Currently not handled because 3361e8d8bef9SDimitry Andric // it is difficult to check for stores/calls/etc between loads. 3362e8d8bef9SDimitry Andric MachineBasicBlock *LoadMBB = Load->getParent(); 3363e8d8bef9SDimitry Andric if (!MBB) 3364e8d8bef9SDimitry Andric MBB = LoadMBB; 3365e8d8bef9SDimitry Andric if (LoadMBB != MBB) 3366e8d8bef9SDimitry Andric return None; 3367e8d8bef9SDimitry Andric 3368e8d8bef9SDimitry Andric // Make sure that the MachineMemOperands of every seen load are compatible. 3369e8d8bef9SDimitry Andric const MachineMemOperand *LoadMMO = *Load->memoperands_begin(); 3370e8d8bef9SDimitry Andric if (!MMO) 3371e8d8bef9SDimitry Andric MMO = LoadMMO; 3372e8d8bef9SDimitry Andric if (MMO->getAddrSpace() != LoadMMO->getAddrSpace()) 3373e8d8bef9SDimitry Andric return None; 3374e8d8bef9SDimitry Andric 3375e8d8bef9SDimitry Andric // Find out what the base pointer and index for the load is. 3376e8d8bef9SDimitry Andric Register LoadPtr; 3377e8d8bef9SDimitry Andric int64_t Idx; 3378e8d8bef9SDimitry Andric if (!mi_match(Load->getOperand(1).getReg(), MRI, 3379e8d8bef9SDimitry Andric m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) { 3380e8d8bef9SDimitry Andric LoadPtr = Load->getOperand(1).getReg(); 3381e8d8bef9SDimitry Andric Idx = 0; 3382e8d8bef9SDimitry Andric } 3383e8d8bef9SDimitry Andric 3384e8d8bef9SDimitry Andric // Don't combine things like a[i], a[i] -> a bigger load. 3385e8d8bef9SDimitry Andric if (!SeenIdx.insert(Idx).second) 3386e8d8bef9SDimitry Andric return None; 3387e8d8bef9SDimitry Andric 3388e8d8bef9SDimitry Andric // Every load must share the same base pointer; don't combine things like: 3389e8d8bef9SDimitry Andric // 3390e8d8bef9SDimitry Andric // a[i], b[i + 1] -> a bigger load. 3391e8d8bef9SDimitry Andric if (!BasePtr.isValid()) 3392e8d8bef9SDimitry Andric BasePtr = LoadPtr; 3393e8d8bef9SDimitry Andric if (BasePtr != LoadPtr) 3394e8d8bef9SDimitry Andric return None; 3395e8d8bef9SDimitry Andric 3396e8d8bef9SDimitry Andric if (Idx < LowestIdx) { 3397e8d8bef9SDimitry Andric LowestIdx = Idx; 3398e8d8bef9SDimitry Andric LowestIdxLoad = Load; 3399e8d8bef9SDimitry Andric } 3400e8d8bef9SDimitry Andric 3401e8d8bef9SDimitry Andric // Keep track of the byte offset that this load ends up at. If we have seen 3402e8d8bef9SDimitry Andric // the byte offset, then stop here. We do not want to combine: 3403e8d8bef9SDimitry Andric // 3404e8d8bef9SDimitry Andric // a[i] << 16, a[i + k] << 16 -> a bigger load. 3405e8d8bef9SDimitry Andric if (!MemOffset2Idx.try_emplace(DstPos, Idx).second) 3406e8d8bef9SDimitry Andric return None; 3407e8d8bef9SDimitry Andric Loads.insert(Load); 3408e8d8bef9SDimitry Andric 3409e8d8bef9SDimitry Andric // Keep track of the position of the earliest/latest loads in the pattern. 3410e8d8bef9SDimitry Andric // We will check that there are no load fold barriers between them later 3411e8d8bef9SDimitry Andric // on. 3412e8d8bef9SDimitry Andric // 3413e8d8bef9SDimitry Andric // FIXME: Is there a better way to check for load fold barriers? 3414e8d8bef9SDimitry Andric if (!EarliestLoad || dominates(*Load, *EarliestLoad)) 3415e8d8bef9SDimitry Andric EarliestLoad = Load; 3416e8d8bef9SDimitry Andric if (!LatestLoad || dominates(*LatestLoad, *Load)) 3417e8d8bef9SDimitry Andric LatestLoad = Load; 3418e8d8bef9SDimitry Andric } 3419e8d8bef9SDimitry Andric 3420e8d8bef9SDimitry Andric // We found a load for each register. Let's check if each load satisfies the 3421e8d8bef9SDimitry Andric // pattern. 3422e8d8bef9SDimitry Andric assert(Loads.size() == RegsToVisit.size() && 3423e8d8bef9SDimitry Andric "Expected to find a load for each register?"); 3424e8d8bef9SDimitry Andric assert(EarliestLoad != LatestLoad && EarliestLoad && 3425e8d8bef9SDimitry Andric LatestLoad && "Expected at least two loads?"); 3426e8d8bef9SDimitry Andric 3427e8d8bef9SDimitry Andric // Check if there are any stores, calls, etc. between any of the loads. If 3428e8d8bef9SDimitry Andric // there are, then we can't safely perform the combine. 3429e8d8bef9SDimitry Andric // 3430e8d8bef9SDimitry Andric // MaxIter is chosen based off the (worst case) number of iterations it 3431e8d8bef9SDimitry Andric // typically takes to succeed in the LLVM test suite plus some padding. 3432e8d8bef9SDimitry Andric // 3433e8d8bef9SDimitry Andric // FIXME: Is there a better way to check for load fold barriers? 3434e8d8bef9SDimitry Andric const unsigned MaxIter = 20; 3435e8d8bef9SDimitry Andric unsigned Iter = 0; 3436e8d8bef9SDimitry Andric for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(), 3437e8d8bef9SDimitry Andric LatestLoad->getIterator())) { 3438e8d8bef9SDimitry Andric if (Loads.count(&MI)) 3439e8d8bef9SDimitry Andric continue; 3440e8d8bef9SDimitry Andric if (MI.isLoadFoldBarrier()) 3441e8d8bef9SDimitry Andric return None; 3442e8d8bef9SDimitry Andric if (Iter++ == MaxIter) 3443e8d8bef9SDimitry Andric return None; 3444e8d8bef9SDimitry Andric } 3445e8d8bef9SDimitry Andric 3446e8d8bef9SDimitry Andric return std::make_pair(LowestIdxLoad, LowestIdx); 3447e8d8bef9SDimitry Andric } 3448e8d8bef9SDimitry Andric 3449e8d8bef9SDimitry Andric bool CombinerHelper::matchLoadOrCombine( 3450e8d8bef9SDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 3451e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_OR); 3452e8d8bef9SDimitry Andric MachineFunction &MF = *MI.getMF(); 3453e8d8bef9SDimitry Andric // Assuming a little-endian target, transform: 3454e8d8bef9SDimitry Andric // s8 *a = ... 3455e8d8bef9SDimitry Andric // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24) 3456e8d8bef9SDimitry Andric // => 3457e8d8bef9SDimitry Andric // s32 val = *((i32)a) 3458e8d8bef9SDimitry Andric // 3459e8d8bef9SDimitry Andric // s8 *a = ... 3460e8d8bef9SDimitry Andric // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3] 3461e8d8bef9SDimitry Andric // => 3462e8d8bef9SDimitry Andric // s32 val = BSWAP(*((s32)a)) 3463e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 3464e8d8bef9SDimitry Andric LLT Ty = MRI.getType(Dst); 3465e8d8bef9SDimitry Andric if (Ty.isVector()) 3466e8d8bef9SDimitry Andric return false; 3467e8d8bef9SDimitry Andric 3468e8d8bef9SDimitry Andric // We need to combine at least two loads into this type. Since the smallest 3469e8d8bef9SDimitry Andric // possible load is into a byte, we need at least a 16-bit wide type. 3470e8d8bef9SDimitry Andric const unsigned WideMemSizeInBits = Ty.getSizeInBits(); 3471e8d8bef9SDimitry Andric if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0) 3472e8d8bef9SDimitry Andric return false; 3473e8d8bef9SDimitry Andric 3474e8d8bef9SDimitry Andric // Match a collection of non-OR instructions in the pattern. 3475e8d8bef9SDimitry Andric auto RegsToVisit = findCandidatesForLoadOrCombine(&MI); 3476e8d8bef9SDimitry Andric if (!RegsToVisit) 3477e8d8bef9SDimitry Andric return false; 3478e8d8bef9SDimitry Andric 3479e8d8bef9SDimitry Andric // We have a collection of non-OR instructions. Figure out how wide each of 3480e8d8bef9SDimitry Andric // the small loads should be based off of the number of potential loads we 3481e8d8bef9SDimitry Andric // found. 3482e8d8bef9SDimitry Andric const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size(); 3483e8d8bef9SDimitry Andric if (NarrowMemSizeInBits % 8 != 0) 3484e8d8bef9SDimitry Andric return false; 3485e8d8bef9SDimitry Andric 3486e8d8bef9SDimitry Andric // Check if each register feeding into each OR is a load from the same 3487e8d8bef9SDimitry Andric // base pointer + some arithmetic. 3488e8d8bef9SDimitry Andric // 3489e8d8bef9SDimitry Andric // e.g. a[0], a[1] << 8, a[2] << 16, etc. 3490e8d8bef9SDimitry Andric // 3491e8d8bef9SDimitry Andric // Also verify that each of these ends up putting a[i] into the same memory 3492e8d8bef9SDimitry Andric // offset as a load into a wide type would. 3493e8d8bef9SDimitry Andric SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx; 3494e8d8bef9SDimitry Andric MachineInstr *LowestIdxLoad; 3495e8d8bef9SDimitry Andric int64_t LowestIdx; 3496e8d8bef9SDimitry Andric auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine( 3497e8d8bef9SDimitry Andric MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits); 3498e8d8bef9SDimitry Andric if (!MaybeLoadInfo) 3499e8d8bef9SDimitry Andric return false; 3500e8d8bef9SDimitry Andric std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo; 3501e8d8bef9SDimitry Andric 3502e8d8bef9SDimitry Andric // We have a bunch of loads being OR'd together. Using the addresses + offsets 3503e8d8bef9SDimitry Andric // we found before, check if this corresponds to a big or little endian byte 3504e8d8bef9SDimitry Andric // pattern. If it does, then we can represent it using a load + possibly a 3505e8d8bef9SDimitry Andric // BSWAP. 3506e8d8bef9SDimitry Andric bool IsBigEndianTarget = MF.getDataLayout().isBigEndian(); 3507e8d8bef9SDimitry Andric Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx); 3508e8d8bef9SDimitry Andric if (!IsBigEndian.hasValue()) 3509e8d8bef9SDimitry Andric return false; 3510e8d8bef9SDimitry Andric bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian; 3511e8d8bef9SDimitry Andric if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}})) 3512e8d8bef9SDimitry Andric return false; 3513e8d8bef9SDimitry Andric 3514e8d8bef9SDimitry Andric // Make sure that the load from the lowest index produces offset 0 in the 3515e8d8bef9SDimitry Andric // final value. 3516e8d8bef9SDimitry Andric // 3517e8d8bef9SDimitry Andric // This ensures that we won't combine something like this: 3518e8d8bef9SDimitry Andric // 3519e8d8bef9SDimitry Andric // load x[i] -> byte 2 3520e8d8bef9SDimitry Andric // load x[i+1] -> byte 0 ---> wide_load x[i] 3521e8d8bef9SDimitry Andric // load x[i+2] -> byte 1 3522e8d8bef9SDimitry Andric const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits; 3523e8d8bef9SDimitry Andric const unsigned ZeroByteOffset = 3524e8d8bef9SDimitry Andric *IsBigEndian 3525e8d8bef9SDimitry Andric ? bigEndianByteAt(NumLoadsInTy, 0) 3526e8d8bef9SDimitry Andric : littleEndianByteAt(NumLoadsInTy, 0); 3527e8d8bef9SDimitry Andric auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset); 3528e8d8bef9SDimitry Andric if (ZeroOffsetIdx == MemOffset2Idx.end() || 3529e8d8bef9SDimitry Andric ZeroOffsetIdx->second != LowestIdx) 3530e8d8bef9SDimitry Andric return false; 3531e8d8bef9SDimitry Andric 3532e8d8bef9SDimitry Andric // We wil reuse the pointer from the load which ends up at byte offset 0. It 3533e8d8bef9SDimitry Andric // may not use index 0. 3534e8d8bef9SDimitry Andric Register Ptr = LowestIdxLoad->getOperand(1).getReg(); 3535e8d8bef9SDimitry Andric const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin(); 3536e8d8bef9SDimitry Andric LegalityQuery::MemDesc MMDesc; 3537e8d8bef9SDimitry Andric MMDesc.SizeInBits = WideMemSizeInBits; 3538e8d8bef9SDimitry Andric MMDesc.AlignInBits = MMO.getAlign().value() * 8; 3539e8d8bef9SDimitry Andric MMDesc.Ordering = MMO.getOrdering(); 3540e8d8bef9SDimitry Andric if (!isLegalOrBeforeLegalizer( 3541e8d8bef9SDimitry Andric {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}})) 3542e8d8bef9SDimitry Andric return false; 3543e8d8bef9SDimitry Andric auto PtrInfo = MMO.getPointerInfo(); 3544e8d8bef9SDimitry Andric auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8); 3545e8d8bef9SDimitry Andric 3546e8d8bef9SDimitry Andric // Load must be allowed and fast on the target. 3547e8d8bef9SDimitry Andric LLVMContext &C = MF.getFunction().getContext(); 3548e8d8bef9SDimitry Andric auto &DL = MF.getDataLayout(); 3549e8d8bef9SDimitry Andric bool Fast = false; 3550e8d8bef9SDimitry Andric if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) || 3551e8d8bef9SDimitry Andric !Fast) 3552e8d8bef9SDimitry Andric return false; 3553e8d8bef9SDimitry Andric 3554e8d8bef9SDimitry Andric MatchInfo = [=](MachineIRBuilder &MIB) { 3555e8d8bef9SDimitry Andric Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst; 3556e8d8bef9SDimitry Andric MIB.buildLoad(LoadDst, Ptr, *NewMMO); 3557e8d8bef9SDimitry Andric if (NeedsBSwap) 3558e8d8bef9SDimitry Andric MIB.buildBSwap(Dst, LoadDst); 3559e8d8bef9SDimitry Andric }; 3560e8d8bef9SDimitry Andric return true; 3561e8d8bef9SDimitry Andric } 3562e8d8bef9SDimitry Andric 3563e8d8bef9SDimitry Andric bool CombinerHelper::applyLoadOrCombine( 3564e8d8bef9SDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 3565e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3566e8d8bef9SDimitry Andric MatchInfo(Builder); 3567e8d8bef9SDimitry Andric MI.eraseFromParent(); 3568e8d8bef9SDimitry Andric return true; 3569e8d8bef9SDimitry Andric } 3570e8d8bef9SDimitry Andric 35710b57cec5SDimitry Andric bool CombinerHelper::tryCombine(MachineInstr &MI) { 35720b57cec5SDimitry Andric if (tryCombineCopy(MI)) 35730b57cec5SDimitry Andric return true; 35748bcb0991SDimitry Andric if (tryCombineExtendingLoads(MI)) 35758bcb0991SDimitry Andric return true; 35768bcb0991SDimitry Andric if (tryCombineIndexedLoadStore(MI)) 35778bcb0991SDimitry Andric return true; 35788bcb0991SDimitry Andric return false; 35790b57cec5SDimitry Andric } 3580