10b57cec5SDimitry Andric //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 9*fe6060f1SDimitry Andric #include "llvm/ADT/SetVector.h" 10*fe6060f1SDimitry Andric #include "llvm/ADT/SmallBitVector.h" 110b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h" 120b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 138bcb0991SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 14*fe6060f1SDimitry Andric #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 155ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 165ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 170b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 180b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/Utils.h" 19*fe6060f1SDimitry Andric #include "llvm/CodeGen/LowLevelType.h" 20*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 218bcb0991SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 228bcb0991SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 230b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 24e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h" 250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 260b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 278bcb0991SDimitry Andric #include "llvm/CodeGen/TargetLowering.h" 28*fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetOpcodes.h" 295ffd83dbSDimitry Andric #include "llvm/Support/MathExtras.h" 308bcb0991SDimitry Andric #include "llvm/Target/TargetMachine.h" 31*fe6060f1SDimitry Andric #include <tuple> 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric #define DEBUG_TYPE "gi-combiner" 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric using namespace llvm; 365ffd83dbSDimitry Andric using namespace MIPatternMatch; 370b57cec5SDimitry Andric 388bcb0991SDimitry Andric // Option to allow testing of the combiner while no targets know about indexed 398bcb0991SDimitry Andric // addressing. 408bcb0991SDimitry Andric static cl::opt<bool> 418bcb0991SDimitry Andric ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), 428bcb0991SDimitry Andric cl::desc("Force all indexed operations to be " 438bcb0991SDimitry Andric "legal for the GlobalISel combiner")); 448bcb0991SDimitry Andric 450b57cec5SDimitry Andric CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, 468bcb0991SDimitry Andric MachineIRBuilder &B, GISelKnownBits *KB, 475ffd83dbSDimitry Andric MachineDominatorTree *MDT, 485ffd83dbSDimitry Andric const LegalizerInfo *LI) 498bcb0991SDimitry Andric : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), 505ffd83dbSDimitry Andric KB(KB), MDT(MDT), LI(LI) { 518bcb0991SDimitry Andric (void)this->KB; 528bcb0991SDimitry Andric } 530b57cec5SDimitry Andric 54e8d8bef9SDimitry Andric const TargetLowering &CombinerHelper::getTargetLowering() const { 55e8d8bef9SDimitry Andric return *Builder.getMF().getSubtarget().getTargetLowering(); 56e8d8bef9SDimitry Andric } 57e8d8bef9SDimitry Andric 58e8d8bef9SDimitry Andric /// \returns The little endian in-memory byte position of byte \p I in a 59e8d8bef9SDimitry Andric /// \p ByteWidth bytes wide type. 60e8d8bef9SDimitry Andric /// 61e8d8bef9SDimitry Andric /// E.g. Given a 4-byte type x, x[0] -> byte 0 62e8d8bef9SDimitry Andric static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) { 63e8d8bef9SDimitry Andric assert(I < ByteWidth && "I must be in [0, ByteWidth)"); 64e8d8bef9SDimitry Andric return I; 65e8d8bef9SDimitry Andric } 66e8d8bef9SDimitry Andric 67e8d8bef9SDimitry Andric /// \returns The big endian in-memory byte position of byte \p I in a 68e8d8bef9SDimitry Andric /// \p ByteWidth bytes wide type. 69e8d8bef9SDimitry Andric /// 70e8d8bef9SDimitry Andric /// E.g. Given a 4-byte type x, x[0] -> byte 3 71e8d8bef9SDimitry Andric static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) { 72e8d8bef9SDimitry Andric assert(I < ByteWidth && "I must be in [0, ByteWidth)"); 73e8d8bef9SDimitry Andric return ByteWidth - I - 1; 74e8d8bef9SDimitry Andric } 75e8d8bef9SDimitry Andric 76e8d8bef9SDimitry Andric /// Given a map from byte offsets in memory to indices in a load/store, 77e8d8bef9SDimitry Andric /// determine if that map corresponds to a little or big endian byte pattern. 78e8d8bef9SDimitry Andric /// 79e8d8bef9SDimitry Andric /// \param MemOffset2Idx maps memory offsets to address offsets. 80e8d8bef9SDimitry Andric /// \param LowestIdx is the lowest index in \p MemOffset2Idx. 81e8d8bef9SDimitry Andric /// 82e8d8bef9SDimitry Andric /// \returns true if the map corresponds to a big endian byte pattern, false 83e8d8bef9SDimitry Andric /// if it corresponds to a little endian byte pattern, and None otherwise. 84e8d8bef9SDimitry Andric /// 85e8d8bef9SDimitry Andric /// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns 86e8d8bef9SDimitry Andric /// are as follows: 87e8d8bef9SDimitry Andric /// 88e8d8bef9SDimitry Andric /// AddrOffset Little endian Big endian 89e8d8bef9SDimitry Andric /// 0 0 3 90e8d8bef9SDimitry Andric /// 1 1 2 91e8d8bef9SDimitry Andric /// 2 2 1 92e8d8bef9SDimitry Andric /// 3 3 0 93e8d8bef9SDimitry Andric static Optional<bool> 94e8d8bef9SDimitry Andric isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, 95e8d8bef9SDimitry Andric int64_t LowestIdx) { 96e8d8bef9SDimitry Andric // Need at least two byte positions to decide on endianness. 97e8d8bef9SDimitry Andric unsigned Width = MemOffset2Idx.size(); 98e8d8bef9SDimitry Andric if (Width < 2) 99e8d8bef9SDimitry Andric return None; 100e8d8bef9SDimitry Andric bool BigEndian = true, LittleEndian = true; 101e8d8bef9SDimitry Andric for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) { 102e8d8bef9SDimitry Andric auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset); 103e8d8bef9SDimitry Andric if (MemOffsetAndIdx == MemOffset2Idx.end()) 104e8d8bef9SDimitry Andric return None; 105e8d8bef9SDimitry Andric const int64_t Idx = MemOffsetAndIdx->second - LowestIdx; 106e8d8bef9SDimitry Andric assert(Idx >= 0 && "Expected non-negative byte offset?"); 107e8d8bef9SDimitry Andric LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset); 108e8d8bef9SDimitry Andric BigEndian &= Idx == bigEndianByteAt(Width, MemOffset); 109e8d8bef9SDimitry Andric if (!BigEndian && !LittleEndian) 110e8d8bef9SDimitry Andric return None; 111e8d8bef9SDimitry Andric } 112e8d8bef9SDimitry Andric 113e8d8bef9SDimitry Andric assert((BigEndian != LittleEndian) && 114e8d8bef9SDimitry Andric "Pattern cannot be both big and little endian!"); 115e8d8bef9SDimitry Andric return BigEndian; 116e8d8bef9SDimitry Andric } 117e8d8bef9SDimitry Andric 118e8d8bef9SDimitry Andric bool CombinerHelper::isLegalOrBeforeLegalizer( 119e8d8bef9SDimitry Andric const LegalityQuery &Query) const { 120e8d8bef9SDimitry Andric return !LI || LI->getAction(Query).Action == LegalizeActions::Legal; 121e8d8bef9SDimitry Andric } 122e8d8bef9SDimitry Andric 1230b57cec5SDimitry Andric void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, 1240b57cec5SDimitry Andric Register ToReg) const { 1250b57cec5SDimitry Andric Observer.changingAllUsesOfReg(MRI, FromReg); 1260b57cec5SDimitry Andric 1270b57cec5SDimitry Andric if (MRI.constrainRegAttrs(ToReg, FromReg)) 1280b57cec5SDimitry Andric MRI.replaceRegWith(FromReg, ToReg); 1290b57cec5SDimitry Andric else 1300b57cec5SDimitry Andric Builder.buildCopy(ToReg, FromReg); 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric Observer.finishedChangingAllUsesOfReg(); 1330b57cec5SDimitry Andric } 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI, 1360b57cec5SDimitry Andric MachineOperand &FromRegOp, 1370b57cec5SDimitry Andric Register ToReg) const { 1380b57cec5SDimitry Andric assert(FromRegOp.getParent() && "Expected an operand in an MI"); 1390b57cec5SDimitry Andric Observer.changingInstr(*FromRegOp.getParent()); 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric FromRegOp.setReg(ToReg); 1420b57cec5SDimitry Andric 1430b57cec5SDimitry Andric Observer.changedInstr(*FromRegOp.getParent()); 1440b57cec5SDimitry Andric } 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { 1470b57cec5SDimitry Andric if (matchCombineCopy(MI)) { 1480b57cec5SDimitry Andric applyCombineCopy(MI); 1490b57cec5SDimitry Andric return true; 1500b57cec5SDimitry Andric } 1510b57cec5SDimitry Andric return false; 1520b57cec5SDimitry Andric } 1530b57cec5SDimitry Andric bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { 1540b57cec5SDimitry Andric if (MI.getOpcode() != TargetOpcode::COPY) 1550b57cec5SDimitry Andric return false; 1568bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1578bcb0991SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1585ffd83dbSDimitry Andric return canReplaceReg(DstReg, SrcReg, MRI); 1590b57cec5SDimitry Andric } 1600b57cec5SDimitry Andric void CombinerHelper::applyCombineCopy(MachineInstr &MI) { 1618bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1628bcb0991SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1630b57cec5SDimitry Andric MI.eraseFromParent(); 1640b57cec5SDimitry Andric replaceRegWith(MRI, DstReg, SrcReg); 1650b57cec5SDimitry Andric } 1660b57cec5SDimitry Andric 1678bcb0991SDimitry Andric bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) { 1688bcb0991SDimitry Andric bool IsUndef = false; 1698bcb0991SDimitry Andric SmallVector<Register, 4> Ops; 1708bcb0991SDimitry Andric if (matchCombineConcatVectors(MI, IsUndef, Ops)) { 1718bcb0991SDimitry Andric applyCombineConcatVectors(MI, IsUndef, Ops); 1728bcb0991SDimitry Andric return true; 1738bcb0991SDimitry Andric } 1748bcb0991SDimitry Andric return false; 1758bcb0991SDimitry Andric } 1768bcb0991SDimitry Andric 1778bcb0991SDimitry Andric bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, 1788bcb0991SDimitry Andric SmallVectorImpl<Register> &Ops) { 1798bcb0991SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && 1808bcb0991SDimitry Andric "Invalid instruction"); 1818bcb0991SDimitry Andric IsUndef = true; 1828bcb0991SDimitry Andric MachineInstr *Undef = nullptr; 1838bcb0991SDimitry Andric 1848bcb0991SDimitry Andric // Walk over all the operands of concat vectors and check if they are 1858bcb0991SDimitry Andric // build_vector themselves or undef. 1868bcb0991SDimitry Andric // Then collect their operands in Ops. 187480093f4SDimitry Andric for (const MachineOperand &MO : MI.uses()) { 1888bcb0991SDimitry Andric Register Reg = MO.getReg(); 1898bcb0991SDimitry Andric MachineInstr *Def = MRI.getVRegDef(Reg); 1908bcb0991SDimitry Andric assert(Def && "Operand not defined"); 1918bcb0991SDimitry Andric switch (Def->getOpcode()) { 1928bcb0991SDimitry Andric case TargetOpcode::G_BUILD_VECTOR: 1938bcb0991SDimitry Andric IsUndef = false; 1948bcb0991SDimitry Andric // Remember the operands of the build_vector to fold 1958bcb0991SDimitry Andric // them into the yet-to-build flattened concat vectors. 196480093f4SDimitry Andric for (const MachineOperand &BuildVecMO : Def->uses()) 1978bcb0991SDimitry Andric Ops.push_back(BuildVecMO.getReg()); 1988bcb0991SDimitry Andric break; 1998bcb0991SDimitry Andric case TargetOpcode::G_IMPLICIT_DEF: { 2008bcb0991SDimitry Andric LLT OpType = MRI.getType(Reg); 2018bcb0991SDimitry Andric // Keep one undef value for all the undef operands. 2028bcb0991SDimitry Andric if (!Undef) { 2038bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 2048bcb0991SDimitry Andric Undef = Builder.buildUndef(OpType.getScalarType()); 2058bcb0991SDimitry Andric } 2068bcb0991SDimitry Andric assert(MRI.getType(Undef->getOperand(0).getReg()) == 2078bcb0991SDimitry Andric OpType.getScalarType() && 2088bcb0991SDimitry Andric "All undefs should have the same type"); 2098bcb0991SDimitry Andric // Break the undef vector in as many scalar elements as needed 2108bcb0991SDimitry Andric // for the flattening. 2118bcb0991SDimitry Andric for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements(); 2128bcb0991SDimitry Andric EltIdx != EltEnd; ++EltIdx) 2138bcb0991SDimitry Andric Ops.push_back(Undef->getOperand(0).getReg()); 2148bcb0991SDimitry Andric break; 2158bcb0991SDimitry Andric } 2168bcb0991SDimitry Andric default: 2178bcb0991SDimitry Andric return false; 2188bcb0991SDimitry Andric } 2198bcb0991SDimitry Andric } 2208bcb0991SDimitry Andric return true; 2218bcb0991SDimitry Andric } 2228bcb0991SDimitry Andric void CombinerHelper::applyCombineConcatVectors( 2238bcb0991SDimitry Andric MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) { 2248bcb0991SDimitry Andric // We determined that the concat_vectors can be flatten. 2258bcb0991SDimitry Andric // Generate the flattened build_vector. 2268bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2278bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 2288bcb0991SDimitry Andric Register NewDstReg = MRI.cloneVirtualRegister(DstReg); 2298bcb0991SDimitry Andric 2308bcb0991SDimitry Andric // Note: IsUndef is sort of redundant. We could have determine it by 2318bcb0991SDimitry Andric // checking that at all Ops are undef. Alternatively, we could have 2328bcb0991SDimitry Andric // generate a build_vector of undefs and rely on another combine to 2338bcb0991SDimitry Andric // clean that up. For now, given we already gather this information 2348bcb0991SDimitry Andric // in tryCombineConcatVectors, just save compile time and issue the 2358bcb0991SDimitry Andric // right thing. 2368bcb0991SDimitry Andric if (IsUndef) 2378bcb0991SDimitry Andric Builder.buildUndef(NewDstReg); 2388bcb0991SDimitry Andric else 2398bcb0991SDimitry Andric Builder.buildBuildVector(NewDstReg, Ops); 2408bcb0991SDimitry Andric MI.eraseFromParent(); 2418bcb0991SDimitry Andric replaceRegWith(MRI, DstReg, NewDstReg); 2428bcb0991SDimitry Andric } 2438bcb0991SDimitry Andric 2448bcb0991SDimitry Andric bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { 2458bcb0991SDimitry Andric SmallVector<Register, 4> Ops; 2468bcb0991SDimitry Andric if (matchCombineShuffleVector(MI, Ops)) { 2478bcb0991SDimitry Andric applyCombineShuffleVector(MI, Ops); 2488bcb0991SDimitry Andric return true; 2498bcb0991SDimitry Andric } 2508bcb0991SDimitry Andric return false; 2518bcb0991SDimitry Andric } 2528bcb0991SDimitry Andric 2538bcb0991SDimitry Andric bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, 2548bcb0991SDimitry Andric SmallVectorImpl<Register> &Ops) { 2558bcb0991SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && 2568bcb0991SDimitry Andric "Invalid instruction kind"); 2578bcb0991SDimitry Andric LLT DstType = MRI.getType(MI.getOperand(0).getReg()); 2588bcb0991SDimitry Andric Register Src1 = MI.getOperand(1).getReg(); 2598bcb0991SDimitry Andric LLT SrcType = MRI.getType(Src1); 260480093f4SDimitry Andric // As bizarre as it may look, shuffle vector can actually produce 261480093f4SDimitry Andric // scalar! This is because at the IR level a <1 x ty> shuffle 262480093f4SDimitry Andric // vector is perfectly valid. 263480093f4SDimitry Andric unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1; 264480093f4SDimitry Andric unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1; 2658bcb0991SDimitry Andric 2668bcb0991SDimitry Andric // If the resulting vector is smaller than the size of the source 2678bcb0991SDimitry Andric // vectors being concatenated, we won't be able to replace the 2688bcb0991SDimitry Andric // shuffle vector into a concat_vectors. 2698bcb0991SDimitry Andric // 2708bcb0991SDimitry Andric // Note: We may still be able to produce a concat_vectors fed by 2718bcb0991SDimitry Andric // extract_vector_elt and so on. It is less clear that would 2728bcb0991SDimitry Andric // be better though, so don't bother for now. 273480093f4SDimitry Andric // 274480093f4SDimitry Andric // If the destination is a scalar, the size of the sources doesn't 275480093f4SDimitry Andric // matter. we will lower the shuffle to a plain copy. This will 276480093f4SDimitry Andric // work only if the source and destination have the same size. But 277480093f4SDimitry Andric // that's covered by the next condition. 278480093f4SDimitry Andric // 279480093f4SDimitry Andric // TODO: If the size between the source and destination don't match 280480093f4SDimitry Andric // we could still emit an extract vector element in that case. 281480093f4SDimitry Andric if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1) 2828bcb0991SDimitry Andric return false; 2838bcb0991SDimitry Andric 2848bcb0991SDimitry Andric // Check that the shuffle mask can be broken evenly between the 2858bcb0991SDimitry Andric // different sources. 2868bcb0991SDimitry Andric if (DstNumElts % SrcNumElts != 0) 2878bcb0991SDimitry Andric return false; 2888bcb0991SDimitry Andric 2898bcb0991SDimitry Andric // Mask length is a multiple of the source vector length. 2908bcb0991SDimitry Andric // Check if the shuffle is some kind of concatenation of the input 2918bcb0991SDimitry Andric // vectors. 2928bcb0991SDimitry Andric unsigned NumConcat = DstNumElts / SrcNumElts; 2938bcb0991SDimitry Andric SmallVector<int, 8> ConcatSrcs(NumConcat, -1); 294480093f4SDimitry Andric ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 2958bcb0991SDimitry Andric for (unsigned i = 0; i != DstNumElts; ++i) { 2968bcb0991SDimitry Andric int Idx = Mask[i]; 2978bcb0991SDimitry Andric // Undef value. 2988bcb0991SDimitry Andric if (Idx < 0) 2998bcb0991SDimitry Andric continue; 3008bcb0991SDimitry Andric // Ensure the indices in each SrcType sized piece are sequential and that 3018bcb0991SDimitry Andric // the same source is used for the whole piece. 3028bcb0991SDimitry Andric if ((Idx % SrcNumElts != (i % SrcNumElts)) || 3038bcb0991SDimitry Andric (ConcatSrcs[i / SrcNumElts] >= 0 && 3048bcb0991SDimitry Andric ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) 3058bcb0991SDimitry Andric return false; 3068bcb0991SDimitry Andric // Remember which source this index came from. 3078bcb0991SDimitry Andric ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; 3088bcb0991SDimitry Andric } 3098bcb0991SDimitry Andric 3108bcb0991SDimitry Andric // The shuffle is concatenating multiple vectors together. 3118bcb0991SDimitry Andric // Collect the different operands for that. 3128bcb0991SDimitry Andric Register UndefReg; 3138bcb0991SDimitry Andric Register Src2 = MI.getOperand(2).getReg(); 3148bcb0991SDimitry Andric for (auto Src : ConcatSrcs) { 3158bcb0991SDimitry Andric if (Src < 0) { 3168bcb0991SDimitry Andric if (!UndefReg) { 3178bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 3188bcb0991SDimitry Andric UndefReg = Builder.buildUndef(SrcType).getReg(0); 3198bcb0991SDimitry Andric } 3208bcb0991SDimitry Andric Ops.push_back(UndefReg); 3218bcb0991SDimitry Andric } else if (Src == 0) 3228bcb0991SDimitry Andric Ops.push_back(Src1); 3238bcb0991SDimitry Andric else 3248bcb0991SDimitry Andric Ops.push_back(Src2); 3258bcb0991SDimitry Andric } 3268bcb0991SDimitry Andric return true; 3278bcb0991SDimitry Andric } 3288bcb0991SDimitry Andric 3298bcb0991SDimitry Andric void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI, 3308bcb0991SDimitry Andric const ArrayRef<Register> Ops) { 3318bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3328bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 3338bcb0991SDimitry Andric Register NewDstReg = MRI.cloneVirtualRegister(DstReg); 3348bcb0991SDimitry Andric 335480093f4SDimitry Andric if (Ops.size() == 1) 336480093f4SDimitry Andric Builder.buildCopy(NewDstReg, Ops[0]); 337480093f4SDimitry Andric else 338480093f4SDimitry Andric Builder.buildMerge(NewDstReg, Ops); 3398bcb0991SDimitry Andric 3408bcb0991SDimitry Andric MI.eraseFromParent(); 3418bcb0991SDimitry Andric replaceRegWith(MRI, DstReg, NewDstReg); 3428bcb0991SDimitry Andric } 3438bcb0991SDimitry Andric 3440b57cec5SDimitry Andric namespace { 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric /// Select a preference between two uses. CurrentUse is the current preference 3470b57cec5SDimitry Andric /// while *ForCandidate is attributes of the candidate under consideration. 3480b57cec5SDimitry Andric PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse, 3495ffd83dbSDimitry Andric const LLT TyForCandidate, 3500b57cec5SDimitry Andric unsigned OpcodeForCandidate, 3510b57cec5SDimitry Andric MachineInstr *MIForCandidate) { 3520b57cec5SDimitry Andric if (!CurrentUse.Ty.isValid()) { 3530b57cec5SDimitry Andric if (CurrentUse.ExtendOpcode == OpcodeForCandidate || 3540b57cec5SDimitry Andric CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT) 3550b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 3560b57cec5SDimitry Andric return CurrentUse; 3570b57cec5SDimitry Andric } 3580b57cec5SDimitry Andric 3590b57cec5SDimitry Andric // We permit the extend to hoist through basic blocks but this is only 3600b57cec5SDimitry Andric // sensible if the target has extending loads. If you end up lowering back 3610b57cec5SDimitry Andric // into a load and extend during the legalizer then the end result is 3620b57cec5SDimitry Andric // hoisting the extend up to the load. 3630b57cec5SDimitry Andric 3640b57cec5SDimitry Andric // Prefer defined extensions to undefined extensions as these are more 3650b57cec5SDimitry Andric // likely to reduce the number of instructions. 3660b57cec5SDimitry Andric if (OpcodeForCandidate == TargetOpcode::G_ANYEXT && 3670b57cec5SDimitry Andric CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT) 3680b57cec5SDimitry Andric return CurrentUse; 3690b57cec5SDimitry Andric else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT && 3700b57cec5SDimitry Andric OpcodeForCandidate != TargetOpcode::G_ANYEXT) 3710b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 3720b57cec5SDimitry Andric 3730b57cec5SDimitry Andric // Prefer sign extensions to zero extensions as sign-extensions tend to be 3740b57cec5SDimitry Andric // more expensive. 3750b57cec5SDimitry Andric if (CurrentUse.Ty == TyForCandidate) { 3760b57cec5SDimitry Andric if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT && 3770b57cec5SDimitry Andric OpcodeForCandidate == TargetOpcode::G_ZEXT) 3780b57cec5SDimitry Andric return CurrentUse; 3790b57cec5SDimitry Andric else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT && 3800b57cec5SDimitry Andric OpcodeForCandidate == TargetOpcode::G_SEXT) 3810b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 3820b57cec5SDimitry Andric } 3830b57cec5SDimitry Andric 3840b57cec5SDimitry Andric // This is potentially target specific. We've chosen the largest type 3850b57cec5SDimitry Andric // because G_TRUNC is usually free. One potential catch with this is that 3860b57cec5SDimitry Andric // some targets have a reduced number of larger registers than smaller 3870b57cec5SDimitry Andric // registers and this choice potentially increases the live-range for the 3880b57cec5SDimitry Andric // larger value. 3890b57cec5SDimitry Andric if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) { 3900b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 3910b57cec5SDimitry Andric } 3920b57cec5SDimitry Andric return CurrentUse; 3930b57cec5SDimitry Andric } 3940b57cec5SDimitry Andric 3950b57cec5SDimitry Andric /// Find a suitable place to insert some instructions and insert them. This 3960b57cec5SDimitry Andric /// function accounts for special cases like inserting before a PHI node. 3970b57cec5SDimitry Andric /// The current strategy for inserting before PHI's is to duplicate the 3980b57cec5SDimitry Andric /// instructions for each predecessor. However, while that's ok for G_TRUNC 3990b57cec5SDimitry Andric /// on most targets since it generally requires no code, other targets/cases may 4000b57cec5SDimitry Andric /// want to try harder to find a dominating block. 4010b57cec5SDimitry Andric static void InsertInsnsWithoutSideEffectsBeforeUse( 4020b57cec5SDimitry Andric MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO, 4030b57cec5SDimitry Andric std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator, 4040b57cec5SDimitry Andric MachineOperand &UseMO)> 4050b57cec5SDimitry Andric Inserter) { 4060b57cec5SDimitry Andric MachineInstr &UseMI = *UseMO.getParent(); 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric MachineBasicBlock *InsertBB = UseMI.getParent(); 4090b57cec5SDimitry Andric 4100b57cec5SDimitry Andric // If the use is a PHI then we want the predecessor block instead. 4110b57cec5SDimitry Andric if (UseMI.isPHI()) { 4120b57cec5SDimitry Andric MachineOperand *PredBB = std::next(&UseMO); 4130b57cec5SDimitry Andric InsertBB = PredBB->getMBB(); 4140b57cec5SDimitry Andric } 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric // If the block is the same block as the def then we want to insert just after 4170b57cec5SDimitry Andric // the def instead of at the start of the block. 4180b57cec5SDimitry Andric if (InsertBB == DefMI.getParent()) { 4190b57cec5SDimitry Andric MachineBasicBlock::iterator InsertPt = &DefMI; 4200b57cec5SDimitry Andric Inserter(InsertBB, std::next(InsertPt), UseMO); 4210b57cec5SDimitry Andric return; 4220b57cec5SDimitry Andric } 4230b57cec5SDimitry Andric 4240b57cec5SDimitry Andric // Otherwise we want the start of the BB 4250b57cec5SDimitry Andric Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO); 4260b57cec5SDimitry Andric } 4270b57cec5SDimitry Andric } // end anonymous namespace 4280b57cec5SDimitry Andric 4290b57cec5SDimitry Andric bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { 4300b57cec5SDimitry Andric PreferredTuple Preferred; 4310b57cec5SDimitry Andric if (matchCombineExtendingLoads(MI, Preferred)) { 4320b57cec5SDimitry Andric applyCombineExtendingLoads(MI, Preferred); 4330b57cec5SDimitry Andric return true; 4340b57cec5SDimitry Andric } 4350b57cec5SDimitry Andric return false; 4360b57cec5SDimitry Andric } 4370b57cec5SDimitry Andric 4380b57cec5SDimitry Andric bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, 4390b57cec5SDimitry Andric PreferredTuple &Preferred) { 4400b57cec5SDimitry Andric // We match the loads and follow the uses to the extend instead of matching 4410b57cec5SDimitry Andric // the extends and following the def to the load. This is because the load 4420b57cec5SDimitry Andric // must remain in the same position for correctness (unless we also add code 4430b57cec5SDimitry Andric // to find a safe place to sink it) whereas the extend is freely movable. 4440b57cec5SDimitry Andric // It also prevents us from duplicating the load for the volatile case or just 4450b57cec5SDimitry Andric // for performance. 446*fe6060f1SDimitry Andric GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI); 447*fe6060f1SDimitry Andric if (!LoadMI) 4480b57cec5SDimitry Andric return false; 4490b57cec5SDimitry Andric 450*fe6060f1SDimitry Andric Register LoadReg = LoadMI->getDstReg(); 4510b57cec5SDimitry Andric 452*fe6060f1SDimitry Andric LLT LoadValueTy = MRI.getType(LoadReg); 4530b57cec5SDimitry Andric if (!LoadValueTy.isScalar()) 4540b57cec5SDimitry Andric return false; 4550b57cec5SDimitry Andric 4560b57cec5SDimitry Andric // Most architectures are going to legalize <s8 loads into at least a 1 byte 4570b57cec5SDimitry Andric // load, and the MMOs can only describe memory accesses in multiples of bytes. 4580b57cec5SDimitry Andric // If we try to perform extload combining on those, we can end up with 4590b57cec5SDimitry Andric // %a(s8) = extload %ptr (load 1 byte from %ptr) 4600b57cec5SDimitry Andric // ... which is an illegal extload instruction. 4610b57cec5SDimitry Andric if (LoadValueTy.getSizeInBits() < 8) 4620b57cec5SDimitry Andric return false; 4630b57cec5SDimitry Andric 4640b57cec5SDimitry Andric // For non power-of-2 types, they will very likely be legalized into multiple 4650b57cec5SDimitry Andric // loads. Don't bother trying to match them into extending loads. 4660b57cec5SDimitry Andric if (!isPowerOf2_32(LoadValueTy.getSizeInBits())) 4670b57cec5SDimitry Andric return false; 4680b57cec5SDimitry Andric 4690b57cec5SDimitry Andric // Find the preferred type aside from the any-extends (unless it's the only 4700b57cec5SDimitry Andric // one) and non-extending ops. We'll emit an extending load to that type and 4710b57cec5SDimitry Andric // and emit a variant of (extend (trunc X)) for the others according to the 4720b57cec5SDimitry Andric // relative type sizes. At the same time, pick an extend to use based on the 4730b57cec5SDimitry Andric // extend involved in the chosen type. 474*fe6060f1SDimitry Andric unsigned PreferredOpcode = 475*fe6060f1SDimitry Andric isa<GLoad>(&MI) 4760b57cec5SDimitry Andric ? TargetOpcode::G_ANYEXT 477*fe6060f1SDimitry Andric : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 4780b57cec5SDimitry Andric Preferred = {LLT(), PreferredOpcode, nullptr}; 479*fe6060f1SDimitry Andric for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) { 4800b57cec5SDimitry Andric if (UseMI.getOpcode() == TargetOpcode::G_SEXT || 4810b57cec5SDimitry Andric UseMI.getOpcode() == TargetOpcode::G_ZEXT || 4825ffd83dbSDimitry Andric (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) { 483*fe6060f1SDimitry Andric const auto &MMO = LoadMI->getMMO(); 484*fe6060f1SDimitry Andric // For atomics, only form anyextending loads. 485*fe6060f1SDimitry Andric if (MMO.isAtomic() && UseMI.getOpcode() != TargetOpcode::G_ANYEXT) 486*fe6060f1SDimitry Andric continue; 4875ffd83dbSDimitry Andric // Check for legality. 4885ffd83dbSDimitry Andric if (LI) { 4895ffd83dbSDimitry Andric LegalityQuery::MemDesc MMDesc; 490*fe6060f1SDimitry Andric MMDesc.MemoryTy = MMO.getMemoryType(); 4915ffd83dbSDimitry Andric MMDesc.AlignInBits = MMO.getAlign().value() * 8; 492*fe6060f1SDimitry Andric MMDesc.Ordering = MMO.getSuccessOrdering(); 4935ffd83dbSDimitry Andric LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg()); 494*fe6060f1SDimitry Andric LLT SrcTy = MRI.getType(LoadMI->getPointerReg()); 495*fe6060f1SDimitry Andric if (LI->getAction({LoadMI->getOpcode(), {UseTy, SrcTy}, {MMDesc}}) 496*fe6060f1SDimitry Andric .Action != LegalizeActions::Legal) 4975ffd83dbSDimitry Andric continue; 4985ffd83dbSDimitry Andric } 4990b57cec5SDimitry Andric Preferred = ChoosePreferredUse(Preferred, 5000b57cec5SDimitry Andric MRI.getType(UseMI.getOperand(0).getReg()), 5010b57cec5SDimitry Andric UseMI.getOpcode(), &UseMI); 5020b57cec5SDimitry Andric } 5030b57cec5SDimitry Andric } 5040b57cec5SDimitry Andric 5050b57cec5SDimitry Andric // There were no extends 5060b57cec5SDimitry Andric if (!Preferred.MI) 5070b57cec5SDimitry Andric return false; 5080b57cec5SDimitry Andric // It should be impossible to chose an extend without selecting a different 5090b57cec5SDimitry Andric // type since by definition the result of an extend is larger. 5100b57cec5SDimitry Andric assert(Preferred.Ty != LoadValueTy && "Extending to same type?"); 5110b57cec5SDimitry Andric 5120b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI); 5130b57cec5SDimitry Andric return true; 5140b57cec5SDimitry Andric } 5150b57cec5SDimitry Andric 5160b57cec5SDimitry Andric void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, 5170b57cec5SDimitry Andric PreferredTuple &Preferred) { 5180b57cec5SDimitry Andric // Rewrite the load to the chosen extending load. 5190b57cec5SDimitry Andric Register ChosenDstReg = Preferred.MI->getOperand(0).getReg(); 5200b57cec5SDimitry Andric 5210b57cec5SDimitry Andric // Inserter to insert a truncate back to the original type at a given point 5220b57cec5SDimitry Andric // with some basic CSE to limit truncate duplication to one per BB. 5230b57cec5SDimitry Andric DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns; 5240b57cec5SDimitry Andric auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB, 5250b57cec5SDimitry Andric MachineBasicBlock::iterator InsertBefore, 5260b57cec5SDimitry Andric MachineOperand &UseMO) { 5270b57cec5SDimitry Andric MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB); 5280b57cec5SDimitry Andric if (PreviouslyEmitted) { 5290b57cec5SDimitry Andric Observer.changingInstr(*UseMO.getParent()); 5300b57cec5SDimitry Andric UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg()); 5310b57cec5SDimitry Andric Observer.changedInstr(*UseMO.getParent()); 5320b57cec5SDimitry Andric return; 5330b57cec5SDimitry Andric } 5340b57cec5SDimitry Andric 5350b57cec5SDimitry Andric Builder.setInsertPt(*InsertIntoBB, InsertBefore); 5360b57cec5SDimitry Andric Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); 5370b57cec5SDimitry Andric MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg); 5380b57cec5SDimitry Andric EmittedInsns[InsertIntoBB] = NewMI; 5390b57cec5SDimitry Andric replaceRegOpWith(MRI, UseMO, NewDstReg); 5400b57cec5SDimitry Andric }; 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric Observer.changingInstr(MI); 5430b57cec5SDimitry Andric MI.setDesc( 5440b57cec5SDimitry Andric Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT 5450b57cec5SDimitry Andric ? TargetOpcode::G_SEXTLOAD 5460b57cec5SDimitry Andric : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT 5470b57cec5SDimitry Andric ? TargetOpcode::G_ZEXTLOAD 5480b57cec5SDimitry Andric : TargetOpcode::G_LOAD)); 5490b57cec5SDimitry Andric 5500b57cec5SDimitry Andric // Rewrite all the uses to fix up the types. 5510b57cec5SDimitry Andric auto &LoadValue = MI.getOperand(0); 5520b57cec5SDimitry Andric SmallVector<MachineOperand *, 4> Uses; 5530b57cec5SDimitry Andric for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) 5540b57cec5SDimitry Andric Uses.push_back(&UseMO); 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric for (auto *UseMO : Uses) { 5570b57cec5SDimitry Andric MachineInstr *UseMI = UseMO->getParent(); 5580b57cec5SDimitry Andric 5590b57cec5SDimitry Andric // If the extend is compatible with the preferred extend then we should fix 5600b57cec5SDimitry Andric // up the type and extend so that it uses the preferred use. 5610b57cec5SDimitry Andric if (UseMI->getOpcode() == Preferred.ExtendOpcode || 5620b57cec5SDimitry Andric UseMI->getOpcode() == TargetOpcode::G_ANYEXT) { 5638bcb0991SDimitry Andric Register UseDstReg = UseMI->getOperand(0).getReg(); 5640b57cec5SDimitry Andric MachineOperand &UseSrcMO = UseMI->getOperand(1); 5655ffd83dbSDimitry Andric const LLT UseDstTy = MRI.getType(UseDstReg); 5660b57cec5SDimitry Andric if (UseDstReg != ChosenDstReg) { 5670b57cec5SDimitry Andric if (Preferred.Ty == UseDstTy) { 5680b57cec5SDimitry Andric // If the use has the same type as the preferred use, then merge 5690b57cec5SDimitry Andric // the vregs and erase the extend. For example: 5700b57cec5SDimitry Andric // %1:_(s8) = G_LOAD ... 5710b57cec5SDimitry Andric // %2:_(s32) = G_SEXT %1(s8) 5720b57cec5SDimitry Andric // %3:_(s32) = G_ANYEXT %1(s8) 5730b57cec5SDimitry Andric // ... = ... %3(s32) 5740b57cec5SDimitry Andric // rewrites to: 5750b57cec5SDimitry Andric // %2:_(s32) = G_SEXTLOAD ... 5760b57cec5SDimitry Andric // ... = ... %2(s32) 5770b57cec5SDimitry Andric replaceRegWith(MRI, UseDstReg, ChosenDstReg); 5780b57cec5SDimitry Andric Observer.erasingInstr(*UseMO->getParent()); 5790b57cec5SDimitry Andric UseMO->getParent()->eraseFromParent(); 5800b57cec5SDimitry Andric } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) { 5810b57cec5SDimitry Andric // If the preferred size is smaller, then keep the extend but extend 5820b57cec5SDimitry Andric // from the result of the extending load. For example: 5830b57cec5SDimitry Andric // %1:_(s8) = G_LOAD ... 5840b57cec5SDimitry Andric // %2:_(s32) = G_SEXT %1(s8) 5850b57cec5SDimitry Andric // %3:_(s64) = G_ANYEXT %1(s8) 5860b57cec5SDimitry Andric // ... = ... %3(s64) 5870b57cec5SDimitry Andric /// rewrites to: 5880b57cec5SDimitry Andric // %2:_(s32) = G_SEXTLOAD ... 5890b57cec5SDimitry Andric // %3:_(s64) = G_ANYEXT %2:_(s32) 5900b57cec5SDimitry Andric // ... = ... %3(s64) 5910b57cec5SDimitry Andric replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg); 5920b57cec5SDimitry Andric } else { 5930b57cec5SDimitry Andric // If the preferred size is large, then insert a truncate. For 5940b57cec5SDimitry Andric // example: 5950b57cec5SDimitry Andric // %1:_(s8) = G_LOAD ... 5960b57cec5SDimitry Andric // %2:_(s64) = G_SEXT %1(s8) 5970b57cec5SDimitry Andric // %3:_(s32) = G_ZEXT %1(s8) 5980b57cec5SDimitry Andric // ... = ... %3(s32) 5990b57cec5SDimitry Andric /// rewrites to: 6000b57cec5SDimitry Andric // %2:_(s64) = G_SEXTLOAD ... 6010b57cec5SDimitry Andric // %4:_(s8) = G_TRUNC %2:_(s32) 6020b57cec5SDimitry Andric // %3:_(s64) = G_ZEXT %2:_(s8) 6030b57cec5SDimitry Andric // ... = ... %3(s64) 6040b57cec5SDimitry Andric InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, 6050b57cec5SDimitry Andric InsertTruncAt); 6060b57cec5SDimitry Andric } 6070b57cec5SDimitry Andric continue; 6080b57cec5SDimitry Andric } 6090b57cec5SDimitry Andric // The use is (one of) the uses of the preferred use we chose earlier. 6100b57cec5SDimitry Andric // We're going to update the load to def this value later so just erase 6110b57cec5SDimitry Andric // the old extend. 6120b57cec5SDimitry Andric Observer.erasingInstr(*UseMO->getParent()); 6130b57cec5SDimitry Andric UseMO->getParent()->eraseFromParent(); 6140b57cec5SDimitry Andric continue; 6150b57cec5SDimitry Andric } 6160b57cec5SDimitry Andric 6170b57cec5SDimitry Andric // The use isn't an extend. Truncate back to the type we originally loaded. 6180b57cec5SDimitry Andric // This is free on many targets. 6190b57cec5SDimitry Andric InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt); 6200b57cec5SDimitry Andric } 6210b57cec5SDimitry Andric 6220b57cec5SDimitry Andric MI.getOperand(0).setReg(ChosenDstReg); 6230b57cec5SDimitry Andric Observer.changedInstr(MI); 6240b57cec5SDimitry Andric } 6250b57cec5SDimitry Andric 6265ffd83dbSDimitry Andric bool CombinerHelper::isPredecessor(const MachineInstr &DefMI, 6275ffd83dbSDimitry Andric const MachineInstr &UseMI) { 6285ffd83dbSDimitry Andric assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() && 6295ffd83dbSDimitry Andric "shouldn't consider debug uses"); 6308bcb0991SDimitry Andric assert(DefMI.getParent() == UseMI.getParent()); 6318bcb0991SDimitry Andric if (&DefMI == &UseMI) 6328bcb0991SDimitry Andric return false; 633e8d8bef9SDimitry Andric const MachineBasicBlock &MBB = *DefMI.getParent(); 634e8d8bef9SDimitry Andric auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) { 635e8d8bef9SDimitry Andric return &MI == &DefMI || &MI == &UseMI; 636e8d8bef9SDimitry Andric }); 637e8d8bef9SDimitry Andric if (DefOrUse == MBB.end()) 638e8d8bef9SDimitry Andric llvm_unreachable("Block must contain both DefMI and UseMI!"); 639e8d8bef9SDimitry Andric return &*DefOrUse == &DefMI; 6408bcb0991SDimitry Andric } 6418bcb0991SDimitry Andric 6425ffd83dbSDimitry Andric bool CombinerHelper::dominates(const MachineInstr &DefMI, 6435ffd83dbSDimitry Andric const MachineInstr &UseMI) { 6445ffd83dbSDimitry Andric assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() && 6455ffd83dbSDimitry Andric "shouldn't consider debug uses"); 6468bcb0991SDimitry Andric if (MDT) 6478bcb0991SDimitry Andric return MDT->dominates(&DefMI, &UseMI); 6488bcb0991SDimitry Andric else if (DefMI.getParent() != UseMI.getParent()) 6498bcb0991SDimitry Andric return false; 6508bcb0991SDimitry Andric 6518bcb0991SDimitry Andric return isPredecessor(DefMI, UseMI); 6528bcb0991SDimitry Andric } 6538bcb0991SDimitry Andric 654e8d8bef9SDimitry Andric bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) { 6555ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 6565ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 657e8d8bef9SDimitry Andric Register LoadUser = SrcReg; 658e8d8bef9SDimitry Andric 659e8d8bef9SDimitry Andric if (MRI.getType(SrcReg).isVector()) 660e8d8bef9SDimitry Andric return false; 661e8d8bef9SDimitry Andric 662e8d8bef9SDimitry Andric Register TruncSrc; 663e8d8bef9SDimitry Andric if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) 664e8d8bef9SDimitry Andric LoadUser = TruncSrc; 665e8d8bef9SDimitry Andric 666e8d8bef9SDimitry Andric uint64_t SizeInBits = MI.getOperand(2).getImm(); 667e8d8bef9SDimitry Andric // If the source is a G_SEXTLOAD from the same bit width, then we don't 668e8d8bef9SDimitry Andric // need any extend at all, just a truncate. 669*fe6060f1SDimitry Andric if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) { 670e8d8bef9SDimitry Andric // If truncating more than the original extended value, abort. 671*fe6060f1SDimitry Andric auto LoadSizeBits = LoadMI->getMemSizeInBits(); 672*fe6060f1SDimitry Andric if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits) 673e8d8bef9SDimitry Andric return false; 674*fe6060f1SDimitry Andric if (LoadSizeBits == SizeInBits) 675e8d8bef9SDimitry Andric return true; 676e8d8bef9SDimitry Andric } 677e8d8bef9SDimitry Andric return false; 6785ffd83dbSDimitry Andric } 6795ffd83dbSDimitry Andric 680*fe6060f1SDimitry Andric void CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) { 6815ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 682e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 683e8d8bef9SDimitry Andric Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); 684e8d8bef9SDimitry Andric MI.eraseFromParent(); 685e8d8bef9SDimitry Andric } 686e8d8bef9SDimitry Andric 687e8d8bef9SDimitry Andric bool CombinerHelper::matchSextInRegOfLoad( 688e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 689e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 690e8d8bef9SDimitry Andric 691e8d8bef9SDimitry Andric // Only supports scalars for now. 692e8d8bef9SDimitry Andric if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 693e8d8bef9SDimitry Andric return false; 694e8d8bef9SDimitry Andric 695e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 696*fe6060f1SDimitry Andric auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI); 697*fe6060f1SDimitry Andric if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()) || 698*fe6060f1SDimitry Andric !LoadDef->isSimple()) 699e8d8bef9SDimitry Andric return false; 700e8d8bef9SDimitry Andric 701e8d8bef9SDimitry Andric // If the sign extend extends from a narrower width than the load's width, 702e8d8bef9SDimitry Andric // then we can narrow the load width when we combine to a G_SEXTLOAD. 703e8d8bef9SDimitry Andric // Avoid widening the load at all. 704*fe6060f1SDimitry Andric unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), 705*fe6060f1SDimitry Andric LoadDef->getMemSizeInBits()); 706e8d8bef9SDimitry Andric 707e8d8bef9SDimitry Andric // Don't generate G_SEXTLOADs with a < 1 byte width. 708e8d8bef9SDimitry Andric if (NewSizeBits < 8) 709e8d8bef9SDimitry Andric return false; 710e8d8bef9SDimitry Andric // Don't bother creating a non-power-2 sextload, it will likely be broken up 711e8d8bef9SDimitry Andric // anyway for most targets. 712e8d8bef9SDimitry Andric if (!isPowerOf2_32(NewSizeBits)) 713e8d8bef9SDimitry Andric return false; 714*fe6060f1SDimitry Andric MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits); 715e8d8bef9SDimitry Andric return true; 716e8d8bef9SDimitry Andric } 717e8d8bef9SDimitry Andric 718*fe6060f1SDimitry Andric void CombinerHelper::applySextInRegOfLoad( 719e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 720e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 721e8d8bef9SDimitry Andric Register LoadReg; 722e8d8bef9SDimitry Andric unsigned ScalarSizeBits; 723e8d8bef9SDimitry Andric std::tie(LoadReg, ScalarSizeBits) = MatchInfo; 724*fe6060f1SDimitry Andric GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg)); 725e8d8bef9SDimitry Andric 726e8d8bef9SDimitry Andric // If we have the following: 727e8d8bef9SDimitry Andric // %ld = G_LOAD %ptr, (load 2) 728e8d8bef9SDimitry Andric // %ext = G_SEXT_INREG %ld, 8 729e8d8bef9SDimitry Andric // ==> 730e8d8bef9SDimitry Andric // %ld = G_SEXTLOAD %ptr (load 1) 731e8d8bef9SDimitry Andric 732*fe6060f1SDimitry Andric auto &MMO = LoadDef->getMMO(); 733*fe6060f1SDimitry Andric Builder.setInstrAndDebugLoc(*LoadDef); 734e8d8bef9SDimitry Andric auto &MF = Builder.getMF(); 735e8d8bef9SDimitry Andric auto PtrInfo = MMO.getPointerInfo(); 736e8d8bef9SDimitry Andric auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8); 737e8d8bef9SDimitry Andric Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(), 738*fe6060f1SDimitry Andric LoadDef->getPointerReg(), *NewMMO); 7395ffd83dbSDimitry Andric MI.eraseFromParent(); 7405ffd83dbSDimitry Andric } 7415ffd83dbSDimitry Andric 7428bcb0991SDimitry Andric bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, 7438bcb0991SDimitry Andric Register &Base, Register &Offset) { 7448bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 7458bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 7468bcb0991SDimitry Andric 7478bcb0991SDimitry Andric #ifndef NDEBUG 7488bcb0991SDimitry Andric unsigned Opcode = MI.getOpcode(); 7498bcb0991SDimitry Andric assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || 7508bcb0991SDimitry Andric Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); 7518bcb0991SDimitry Andric #endif 7528bcb0991SDimitry Andric 7538bcb0991SDimitry Andric Base = MI.getOperand(1).getReg(); 7548bcb0991SDimitry Andric MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base); 7558bcb0991SDimitry Andric if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) 7568bcb0991SDimitry Andric return false; 7578bcb0991SDimitry Andric 7588bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI); 759e8d8bef9SDimitry Andric // FIXME: The following use traversal needs a bail out for patholigical cases. 7605ffd83dbSDimitry Andric for (auto &Use : MRI.use_nodbg_instructions(Base)) { 761480093f4SDimitry Andric if (Use.getOpcode() != TargetOpcode::G_PTR_ADD) 7628bcb0991SDimitry Andric continue; 7638bcb0991SDimitry Andric 7648bcb0991SDimitry Andric Offset = Use.getOperand(2).getReg(); 7658bcb0991SDimitry Andric if (!ForceLegalIndexing && 7668bcb0991SDimitry Andric !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) { 7678bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: " 7688bcb0991SDimitry Andric << Use); 7698bcb0991SDimitry Andric continue; 7708bcb0991SDimitry Andric } 7718bcb0991SDimitry Andric 7728bcb0991SDimitry Andric // Make sure the offset calculation is before the potentially indexed op. 7738bcb0991SDimitry Andric // FIXME: we really care about dependency here. The offset calculation might 7748bcb0991SDimitry Andric // be movable. 7758bcb0991SDimitry Andric MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset); 7768bcb0991SDimitry Andric if (!OffsetDef || !dominates(*OffsetDef, MI)) { 7778bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: " 7788bcb0991SDimitry Andric << Use); 7798bcb0991SDimitry Andric continue; 7808bcb0991SDimitry Andric } 7818bcb0991SDimitry Andric 7828bcb0991SDimitry Andric // FIXME: check whether all uses of Base are load/store with foldable 7838bcb0991SDimitry Andric // addressing modes. If so, using the normal addr-modes is better than 7848bcb0991SDimitry Andric // forming an indexed one. 7858bcb0991SDimitry Andric 7868bcb0991SDimitry Andric bool MemOpDominatesAddrUses = true; 7875ffd83dbSDimitry Andric for (auto &PtrAddUse : 7885ffd83dbSDimitry Andric MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) { 789480093f4SDimitry Andric if (!dominates(MI, PtrAddUse)) { 7908bcb0991SDimitry Andric MemOpDominatesAddrUses = false; 7918bcb0991SDimitry Andric break; 7928bcb0991SDimitry Andric } 7938bcb0991SDimitry Andric } 7948bcb0991SDimitry Andric 7958bcb0991SDimitry Andric if (!MemOpDominatesAddrUses) { 7968bcb0991SDimitry Andric LLVM_DEBUG( 7978bcb0991SDimitry Andric dbgs() << " Ignoring candidate as memop does not dominate uses: " 7988bcb0991SDimitry Andric << Use); 7998bcb0991SDimitry Andric continue; 8008bcb0991SDimitry Andric } 8018bcb0991SDimitry Andric 8028bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Found match: " << Use); 8038bcb0991SDimitry Andric Addr = Use.getOperand(0).getReg(); 8048bcb0991SDimitry Andric return true; 8058bcb0991SDimitry Andric } 8068bcb0991SDimitry Andric 8078bcb0991SDimitry Andric return false; 8088bcb0991SDimitry Andric } 8098bcb0991SDimitry Andric 8108bcb0991SDimitry Andric bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr, 8118bcb0991SDimitry Andric Register &Base, Register &Offset) { 8128bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 8138bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 8148bcb0991SDimitry Andric 8158bcb0991SDimitry Andric #ifndef NDEBUG 8168bcb0991SDimitry Andric unsigned Opcode = MI.getOpcode(); 8178bcb0991SDimitry Andric assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || 8188bcb0991SDimitry Andric Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); 8198bcb0991SDimitry Andric #endif 8208bcb0991SDimitry Andric 8218bcb0991SDimitry Andric Addr = MI.getOperand(1).getReg(); 822480093f4SDimitry Andric MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI); 8235ffd83dbSDimitry Andric if (!AddrDef || MRI.hasOneNonDBGUse(Addr)) 8248bcb0991SDimitry Andric return false; 8258bcb0991SDimitry Andric 8268bcb0991SDimitry Andric Base = AddrDef->getOperand(1).getReg(); 8278bcb0991SDimitry Andric Offset = AddrDef->getOperand(2).getReg(); 8288bcb0991SDimitry Andric 8298bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI); 8308bcb0991SDimitry Andric 8318bcb0991SDimitry Andric if (!ForceLegalIndexing && 8328bcb0991SDimitry Andric !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) { 8338bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, not legal for target"); 8348bcb0991SDimitry Andric return false; 8358bcb0991SDimitry Andric } 8368bcb0991SDimitry Andric 8378bcb0991SDimitry Andric MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI); 8388bcb0991SDimitry Andric if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { 8398bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway."); 8408bcb0991SDimitry Andric return false; 8418bcb0991SDimitry Andric } 8428bcb0991SDimitry Andric 8438bcb0991SDimitry Andric if (MI.getOpcode() == TargetOpcode::G_STORE) { 8448bcb0991SDimitry Andric // Would require a copy. 8458bcb0991SDimitry Andric if (Base == MI.getOperand(0).getReg()) { 8468bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway."); 8478bcb0991SDimitry Andric return false; 8488bcb0991SDimitry Andric } 8498bcb0991SDimitry Andric 8508bcb0991SDimitry Andric // We're expecting one use of Addr in MI, but it could also be the 8518bcb0991SDimitry Andric // value stored, which isn't actually dominated by the instruction. 8528bcb0991SDimitry Andric if (MI.getOperand(0).getReg() == Addr) { 8538bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses"); 8548bcb0991SDimitry Andric return false; 8558bcb0991SDimitry Andric } 8568bcb0991SDimitry Andric } 8578bcb0991SDimitry Andric 858480093f4SDimitry Andric // FIXME: check whether all uses of the base pointer are constant PtrAdds. 859480093f4SDimitry Andric // That might allow us to end base's liveness here by adjusting the constant. 8608bcb0991SDimitry Andric 8615ffd83dbSDimitry Andric for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) { 8628bcb0991SDimitry Andric if (!dominates(MI, UseMI)) { 8638bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses."); 8648bcb0991SDimitry Andric return false; 8658bcb0991SDimitry Andric } 8668bcb0991SDimitry Andric } 8678bcb0991SDimitry Andric 8688bcb0991SDimitry Andric return true; 8698bcb0991SDimitry Andric } 8708bcb0991SDimitry Andric 8718bcb0991SDimitry Andric bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) { 872480093f4SDimitry Andric IndexedLoadStoreMatchInfo MatchInfo; 873480093f4SDimitry Andric if (matchCombineIndexedLoadStore(MI, MatchInfo)) { 874480093f4SDimitry Andric applyCombineIndexedLoadStore(MI, MatchInfo); 875480093f4SDimitry Andric return true; 876480093f4SDimitry Andric } 877480093f4SDimitry Andric return false; 878480093f4SDimitry Andric } 879480093f4SDimitry Andric 880480093f4SDimitry Andric bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { 8818bcb0991SDimitry Andric unsigned Opcode = MI.getOpcode(); 8828bcb0991SDimitry Andric if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD && 8838bcb0991SDimitry Andric Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE) 8848bcb0991SDimitry Andric return false; 8858bcb0991SDimitry Andric 886e8d8bef9SDimitry Andric // For now, no targets actually support these opcodes so don't waste time 887e8d8bef9SDimitry Andric // running these unless we're forced to for testing. 888e8d8bef9SDimitry Andric if (!ForceLegalIndexing) 889e8d8bef9SDimitry Andric return false; 890e8d8bef9SDimitry Andric 891480093f4SDimitry Andric MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, 892480093f4SDimitry Andric MatchInfo.Offset); 893480093f4SDimitry Andric if (!MatchInfo.IsPre && 894480093f4SDimitry Andric !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, 895480093f4SDimitry Andric MatchInfo.Offset)) 8968bcb0991SDimitry Andric return false; 8978bcb0991SDimitry Andric 898480093f4SDimitry Andric return true; 899480093f4SDimitry Andric } 9008bcb0991SDimitry Andric 901480093f4SDimitry Andric void CombinerHelper::applyCombineIndexedLoadStore( 902480093f4SDimitry Andric MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { 903480093f4SDimitry Andric MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr); 904480093f4SDimitry Andric MachineIRBuilder MIRBuilder(MI); 905480093f4SDimitry Andric unsigned Opcode = MI.getOpcode(); 906480093f4SDimitry Andric bool IsStore = Opcode == TargetOpcode::G_STORE; 9078bcb0991SDimitry Andric unsigned NewOpcode; 9088bcb0991SDimitry Andric switch (Opcode) { 9098bcb0991SDimitry Andric case TargetOpcode::G_LOAD: 9108bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_LOAD; 9118bcb0991SDimitry Andric break; 9128bcb0991SDimitry Andric case TargetOpcode::G_SEXTLOAD: 9138bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD; 9148bcb0991SDimitry Andric break; 9158bcb0991SDimitry Andric case TargetOpcode::G_ZEXTLOAD: 9168bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD; 9178bcb0991SDimitry Andric break; 9188bcb0991SDimitry Andric case TargetOpcode::G_STORE: 9198bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_STORE; 9208bcb0991SDimitry Andric break; 9218bcb0991SDimitry Andric default: 9228bcb0991SDimitry Andric llvm_unreachable("Unknown load/store opcode"); 9238bcb0991SDimitry Andric } 9248bcb0991SDimitry Andric 9258bcb0991SDimitry Andric auto MIB = MIRBuilder.buildInstr(NewOpcode); 9268bcb0991SDimitry Andric if (IsStore) { 927480093f4SDimitry Andric MIB.addDef(MatchInfo.Addr); 9288bcb0991SDimitry Andric MIB.addUse(MI.getOperand(0).getReg()); 9298bcb0991SDimitry Andric } else { 9308bcb0991SDimitry Andric MIB.addDef(MI.getOperand(0).getReg()); 931480093f4SDimitry Andric MIB.addDef(MatchInfo.Addr); 9328bcb0991SDimitry Andric } 9338bcb0991SDimitry Andric 934480093f4SDimitry Andric MIB.addUse(MatchInfo.Base); 935480093f4SDimitry Andric MIB.addUse(MatchInfo.Offset); 936480093f4SDimitry Andric MIB.addImm(MatchInfo.IsPre); 9378bcb0991SDimitry Andric MI.eraseFromParent(); 9388bcb0991SDimitry Andric AddrDef.eraseFromParent(); 9398bcb0991SDimitry Andric 9408bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); 9418bcb0991SDimitry Andric } 9428bcb0991SDimitry Andric 943*fe6060f1SDimitry Andric bool CombinerHelper::matchCombineDivRem(MachineInstr &MI, 944*fe6060f1SDimitry Andric MachineInstr *&OtherMI) { 945*fe6060f1SDimitry Andric unsigned Opcode = MI.getOpcode(); 946*fe6060f1SDimitry Andric bool IsDiv, IsSigned; 947*fe6060f1SDimitry Andric 948*fe6060f1SDimitry Andric switch (Opcode) { 949*fe6060f1SDimitry Andric default: 950*fe6060f1SDimitry Andric llvm_unreachable("Unexpected opcode!"); 951*fe6060f1SDimitry Andric case TargetOpcode::G_SDIV: 952*fe6060f1SDimitry Andric case TargetOpcode::G_UDIV: { 953*fe6060f1SDimitry Andric IsDiv = true; 954*fe6060f1SDimitry Andric IsSigned = Opcode == TargetOpcode::G_SDIV; 955*fe6060f1SDimitry Andric break; 956*fe6060f1SDimitry Andric } 957*fe6060f1SDimitry Andric case TargetOpcode::G_SREM: 958*fe6060f1SDimitry Andric case TargetOpcode::G_UREM: { 959*fe6060f1SDimitry Andric IsDiv = false; 960*fe6060f1SDimitry Andric IsSigned = Opcode == TargetOpcode::G_SREM; 961*fe6060f1SDimitry Andric break; 962*fe6060f1SDimitry Andric } 963*fe6060f1SDimitry Andric } 964*fe6060f1SDimitry Andric 965*fe6060f1SDimitry Andric Register Src1 = MI.getOperand(1).getReg(); 966*fe6060f1SDimitry Andric unsigned DivOpcode, RemOpcode, DivremOpcode; 967*fe6060f1SDimitry Andric if (IsSigned) { 968*fe6060f1SDimitry Andric DivOpcode = TargetOpcode::G_SDIV; 969*fe6060f1SDimitry Andric RemOpcode = TargetOpcode::G_SREM; 970*fe6060f1SDimitry Andric DivremOpcode = TargetOpcode::G_SDIVREM; 971*fe6060f1SDimitry Andric } else { 972*fe6060f1SDimitry Andric DivOpcode = TargetOpcode::G_UDIV; 973*fe6060f1SDimitry Andric RemOpcode = TargetOpcode::G_UREM; 974*fe6060f1SDimitry Andric DivremOpcode = TargetOpcode::G_UDIVREM; 975*fe6060f1SDimitry Andric } 976*fe6060f1SDimitry Andric 977*fe6060f1SDimitry Andric if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}})) 9788bcb0991SDimitry Andric return false; 9798bcb0991SDimitry Andric 980*fe6060f1SDimitry Andric // Combine: 981*fe6060f1SDimitry Andric // %div:_ = G_[SU]DIV %src1:_, %src2:_ 982*fe6060f1SDimitry Andric // %rem:_ = G_[SU]REM %src1:_, %src2:_ 983*fe6060f1SDimitry Andric // into: 984*fe6060f1SDimitry Andric // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_ 985*fe6060f1SDimitry Andric 986*fe6060f1SDimitry Andric // Combine: 987*fe6060f1SDimitry Andric // %rem:_ = G_[SU]REM %src1:_, %src2:_ 988*fe6060f1SDimitry Andric // %div:_ = G_[SU]DIV %src1:_, %src2:_ 989*fe6060f1SDimitry Andric // into: 990*fe6060f1SDimitry Andric // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_ 991*fe6060f1SDimitry Andric 992*fe6060f1SDimitry Andric for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) { 993*fe6060f1SDimitry Andric if (MI.getParent() == UseMI.getParent() && 994*fe6060f1SDimitry Andric ((IsDiv && UseMI.getOpcode() == RemOpcode) || 995*fe6060f1SDimitry Andric (!IsDiv && UseMI.getOpcode() == DivOpcode)) && 996*fe6060f1SDimitry Andric matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2))) { 997*fe6060f1SDimitry Andric OtherMI = &UseMI; 998*fe6060f1SDimitry Andric return true; 999*fe6060f1SDimitry Andric } 1000*fe6060f1SDimitry Andric } 1001*fe6060f1SDimitry Andric 1002*fe6060f1SDimitry Andric return false; 1003*fe6060f1SDimitry Andric } 1004*fe6060f1SDimitry Andric 1005*fe6060f1SDimitry Andric void CombinerHelper::applyCombineDivRem(MachineInstr &MI, 1006*fe6060f1SDimitry Andric MachineInstr *&OtherMI) { 1007*fe6060f1SDimitry Andric unsigned Opcode = MI.getOpcode(); 1008*fe6060f1SDimitry Andric assert(OtherMI && "OtherMI shouldn't be empty."); 1009*fe6060f1SDimitry Andric 1010*fe6060f1SDimitry Andric Register DestDivReg, DestRemReg; 1011*fe6060f1SDimitry Andric if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) { 1012*fe6060f1SDimitry Andric DestDivReg = MI.getOperand(0).getReg(); 1013*fe6060f1SDimitry Andric DestRemReg = OtherMI->getOperand(0).getReg(); 1014*fe6060f1SDimitry Andric } else { 1015*fe6060f1SDimitry Andric DestDivReg = OtherMI->getOperand(0).getReg(); 1016*fe6060f1SDimitry Andric DestRemReg = MI.getOperand(0).getReg(); 1017*fe6060f1SDimitry Andric } 1018*fe6060f1SDimitry Andric 1019*fe6060f1SDimitry Andric bool IsSigned = 1020*fe6060f1SDimitry Andric Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM; 1021*fe6060f1SDimitry Andric 1022*fe6060f1SDimitry Andric // Check which instruction is first in the block so we don't break def-use 1023*fe6060f1SDimitry Andric // deps by "moving" the instruction incorrectly. 1024*fe6060f1SDimitry Andric if (dominates(MI, *OtherMI)) 1025*fe6060f1SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1026*fe6060f1SDimitry Andric else 1027*fe6060f1SDimitry Andric Builder.setInstrAndDebugLoc(*OtherMI); 1028*fe6060f1SDimitry Andric 1029*fe6060f1SDimitry Andric Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM 1030*fe6060f1SDimitry Andric : TargetOpcode::G_UDIVREM, 1031*fe6060f1SDimitry Andric {DestDivReg, DestRemReg}, 1032*fe6060f1SDimitry Andric {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()}); 1033*fe6060f1SDimitry Andric MI.eraseFromParent(); 1034*fe6060f1SDimitry Andric OtherMI->eraseFromParent(); 1035*fe6060f1SDimitry Andric } 1036*fe6060f1SDimitry Andric 1037*fe6060f1SDimitry Andric bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI, 1038*fe6060f1SDimitry Andric MachineInstr *&BrCond) { 1039*fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_BR); 1040*fe6060f1SDimitry Andric 10410b57cec5SDimitry Andric // Try to match the following: 10420b57cec5SDimitry Andric // bb1: 10430b57cec5SDimitry Andric // G_BRCOND %c1, %bb2 10440b57cec5SDimitry Andric // G_BR %bb3 10450b57cec5SDimitry Andric // bb2: 10460b57cec5SDimitry Andric // ... 10470b57cec5SDimitry Andric // bb3: 10480b57cec5SDimitry Andric 10490b57cec5SDimitry Andric // The above pattern does not have a fall through to the successor bb2, always 10500b57cec5SDimitry Andric // resulting in a branch no matter which path is taken. Here we try to find 10510b57cec5SDimitry Andric // and replace that pattern with conditional branch to bb3 and otherwise 1052e8d8bef9SDimitry Andric // fallthrough to bb2. This is generally better for branch predictors. 10530b57cec5SDimitry Andric 10540b57cec5SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 10550b57cec5SDimitry Andric MachineBasicBlock::iterator BrIt(MI); 10560b57cec5SDimitry Andric if (BrIt == MBB->begin()) 10570b57cec5SDimitry Andric return false; 10580b57cec5SDimitry Andric assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator"); 10590b57cec5SDimitry Andric 1060*fe6060f1SDimitry Andric BrCond = &*std::prev(BrIt); 10610b57cec5SDimitry Andric if (BrCond->getOpcode() != TargetOpcode::G_BRCOND) 10620b57cec5SDimitry Andric return false; 10630b57cec5SDimitry Andric 1064d409305fSDimitry Andric // Check that the next block is the conditional branch target. Also make sure 1065d409305fSDimitry Andric // that it isn't the same as the G_BR's target (otherwise, this will loop.) 1066d409305fSDimitry Andric MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB(); 1067d409305fSDimitry Andric return BrCondTarget != MI.getOperand(0).getMBB() && 1068d409305fSDimitry Andric MBB->isLayoutSuccessor(BrCondTarget); 10690b57cec5SDimitry Andric } 10700b57cec5SDimitry Andric 1071*fe6060f1SDimitry Andric void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI, 1072*fe6060f1SDimitry Andric MachineInstr *&BrCond) { 10730b57cec5SDimitry Andric MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); 1074e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(*BrCond); 1075e8d8bef9SDimitry Andric LLT Ty = MRI.getType(BrCond->getOperand(0).getReg()); 1076e8d8bef9SDimitry Andric // FIXME: Does int/fp matter for this? If so, we might need to restrict 1077e8d8bef9SDimitry Andric // this to i1 only since we might not know for sure what kind of 1078e8d8bef9SDimitry Andric // compare generated the condition value. 1079e8d8bef9SDimitry Andric auto True = Builder.buildConstant( 1080e8d8bef9SDimitry Andric Ty, getICmpTrueVal(getTargetLowering(), false, false)); 1081e8d8bef9SDimitry Andric auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True); 10820b57cec5SDimitry Andric 1083e8d8bef9SDimitry Andric auto *FallthroughBB = BrCond->getOperand(1).getMBB(); 1084e8d8bef9SDimitry Andric Observer.changingInstr(MI); 1085e8d8bef9SDimitry Andric MI.getOperand(0).setMBB(FallthroughBB); 1086e8d8bef9SDimitry Andric Observer.changedInstr(MI); 10870b57cec5SDimitry Andric 1088e8d8bef9SDimitry Andric // Change the conditional branch to use the inverted condition and 1089e8d8bef9SDimitry Andric // new target block. 10900b57cec5SDimitry Andric Observer.changingInstr(*BrCond); 1091e8d8bef9SDimitry Andric BrCond->getOperand(0).setReg(Xor.getReg(0)); 10920b57cec5SDimitry Andric BrCond->getOperand(1).setMBB(BrTarget); 10930b57cec5SDimitry Andric Observer.changedInstr(*BrCond); 10948bcb0991SDimitry Andric } 10958bcb0991SDimitry Andric 10968bcb0991SDimitry Andric static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { 10978bcb0991SDimitry Andric // On Darwin, -Os means optimize for size without hurting performance, so 10988bcb0991SDimitry Andric // only really optimize for size when -Oz (MinSize) is used. 10998bcb0991SDimitry Andric if (MF.getTarget().getTargetTriple().isOSDarwin()) 11008bcb0991SDimitry Andric return MF.getFunction().hasMinSize(); 11018bcb0991SDimitry Andric return MF.getFunction().hasOptSize(); 11028bcb0991SDimitry Andric } 11038bcb0991SDimitry Andric 11048bcb0991SDimitry Andric // Returns a list of types to use for memory op lowering in MemOps. A partial 11058bcb0991SDimitry Andric // port of findOptimalMemOpLowering in TargetLowering. 11065ffd83dbSDimitry Andric static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps, 11075ffd83dbSDimitry Andric unsigned Limit, const MemOp &Op, 11085ffd83dbSDimitry Andric unsigned DstAS, unsigned SrcAS, 11095ffd83dbSDimitry Andric const AttributeList &FuncAttributes, 11105ffd83dbSDimitry Andric const TargetLowering &TLI) { 11115ffd83dbSDimitry Andric if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) 11128bcb0991SDimitry Andric return false; 11138bcb0991SDimitry Andric 11145ffd83dbSDimitry Andric LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes); 11158bcb0991SDimitry Andric 11168bcb0991SDimitry Andric if (Ty == LLT()) { 11178bcb0991SDimitry Andric // Use the largest scalar type whose alignment constraints are satisfied. 11188bcb0991SDimitry Andric // We only need to check DstAlign here as SrcAlign is always greater or 11198bcb0991SDimitry Andric // equal to DstAlign (or zero). 11208bcb0991SDimitry Andric Ty = LLT::scalar(64); 11215ffd83dbSDimitry Andric if (Op.isFixedDstAlign()) 11225ffd83dbSDimitry Andric while (Op.getDstAlign() < Ty.getSizeInBytes() && 11235ffd83dbSDimitry Andric !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign())) 11248bcb0991SDimitry Andric Ty = LLT::scalar(Ty.getSizeInBytes()); 11258bcb0991SDimitry Andric assert(Ty.getSizeInBits() > 0 && "Could not find valid type"); 11268bcb0991SDimitry Andric // FIXME: check for the largest legal type we can load/store to. 11278bcb0991SDimitry Andric } 11288bcb0991SDimitry Andric 11298bcb0991SDimitry Andric unsigned NumMemOps = 0; 11305ffd83dbSDimitry Andric uint64_t Size = Op.size(); 11315ffd83dbSDimitry Andric while (Size) { 11328bcb0991SDimitry Andric unsigned TySize = Ty.getSizeInBytes(); 11338bcb0991SDimitry Andric while (TySize > Size) { 11348bcb0991SDimitry Andric // For now, only use non-vector load / store's for the left-over pieces. 11358bcb0991SDimitry Andric LLT NewTy = Ty; 11368bcb0991SDimitry Andric // FIXME: check for mem op safety and legality of the types. Not all of 11378bcb0991SDimitry Andric // SDAGisms map cleanly to GISel concepts. 11388bcb0991SDimitry Andric if (NewTy.isVector()) 11398bcb0991SDimitry Andric NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32); 11408bcb0991SDimitry Andric NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1)); 11418bcb0991SDimitry Andric unsigned NewTySize = NewTy.getSizeInBytes(); 11428bcb0991SDimitry Andric assert(NewTySize > 0 && "Could not find appropriate type"); 11438bcb0991SDimitry Andric 11448bcb0991SDimitry Andric // If the new LLT cannot cover all of the remaining bits, then consider 11458bcb0991SDimitry Andric // issuing a (or a pair of) unaligned and overlapping load / store. 11468bcb0991SDimitry Andric bool Fast; 11478bcb0991SDimitry Andric // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). 11488bcb0991SDimitry Andric MVT VT = getMVTForLLT(Ty); 11495ffd83dbSDimitry Andric if (NumMemOps && Op.allowOverlap() && NewTySize < Size && 11508bcb0991SDimitry Andric TLI.allowsMisalignedMemoryAccesses( 1151*fe6060f1SDimitry Andric VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1), 11525ffd83dbSDimitry Andric MachineMemOperand::MONone, &Fast) && 11538bcb0991SDimitry Andric Fast) 11548bcb0991SDimitry Andric TySize = Size; 11558bcb0991SDimitry Andric else { 11568bcb0991SDimitry Andric Ty = NewTy; 11578bcb0991SDimitry Andric TySize = NewTySize; 11588bcb0991SDimitry Andric } 11598bcb0991SDimitry Andric } 11608bcb0991SDimitry Andric 11618bcb0991SDimitry Andric if (++NumMemOps > Limit) 11628bcb0991SDimitry Andric return false; 11638bcb0991SDimitry Andric 11648bcb0991SDimitry Andric MemOps.push_back(Ty); 11658bcb0991SDimitry Andric Size -= TySize; 11668bcb0991SDimitry Andric } 11678bcb0991SDimitry Andric 11680b57cec5SDimitry Andric return true; 11690b57cec5SDimitry Andric } 11700b57cec5SDimitry Andric 11718bcb0991SDimitry Andric static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { 11728bcb0991SDimitry Andric if (Ty.isVector()) 11735ffd83dbSDimitry Andric return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), 11748bcb0991SDimitry Andric Ty.getNumElements()); 11758bcb0991SDimitry Andric return IntegerType::get(C, Ty.getSizeInBits()); 11768bcb0991SDimitry Andric } 11778bcb0991SDimitry Andric 11788bcb0991SDimitry Andric // Get a vectorized representation of the memset value operand, GISel edition. 11798bcb0991SDimitry Andric static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { 11808bcb0991SDimitry Andric MachineRegisterInfo &MRI = *MIB.getMRI(); 11818bcb0991SDimitry Andric unsigned NumBits = Ty.getScalarSizeInBits(); 11828bcb0991SDimitry Andric auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); 11838bcb0991SDimitry Andric if (!Ty.isVector() && ValVRegAndVal) { 1184e8d8bef9SDimitry Andric APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8); 11858bcb0991SDimitry Andric APInt SplatVal = APInt::getSplat(NumBits, Scalar); 11868bcb0991SDimitry Andric return MIB.buildConstant(Ty, SplatVal).getReg(0); 11878bcb0991SDimitry Andric } 11888bcb0991SDimitry Andric 11898bcb0991SDimitry Andric // Extend the byte value to the larger type, and then multiply by a magic 11908bcb0991SDimitry Andric // value 0x010101... in order to replicate it across every byte. 11915ffd83dbSDimitry Andric // Unless it's zero, in which case just emit a larger G_CONSTANT 0. 11925ffd83dbSDimitry Andric if (ValVRegAndVal && ValVRegAndVal->Value == 0) { 11935ffd83dbSDimitry Andric return MIB.buildConstant(Ty, 0).getReg(0); 11945ffd83dbSDimitry Andric } 11955ffd83dbSDimitry Andric 11968bcb0991SDimitry Andric LLT ExtType = Ty.getScalarType(); 11978bcb0991SDimitry Andric auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val); 11988bcb0991SDimitry Andric if (NumBits > 8) { 11998bcb0991SDimitry Andric APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); 12008bcb0991SDimitry Andric auto MagicMI = MIB.buildConstant(ExtType, Magic); 12018bcb0991SDimitry Andric Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0); 12028bcb0991SDimitry Andric } 12038bcb0991SDimitry Andric 12045ffd83dbSDimitry Andric // For vector types create a G_BUILD_VECTOR. 12055ffd83dbSDimitry Andric if (Ty.isVector()) 12065ffd83dbSDimitry Andric Val = MIB.buildSplatVector(Ty, Val).getReg(0); 12075ffd83dbSDimitry Andric 12088bcb0991SDimitry Andric return Val; 12098bcb0991SDimitry Andric } 12108bcb0991SDimitry Andric 12115ffd83dbSDimitry Andric bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, 1212*fe6060f1SDimitry Andric Register Val, uint64_t KnownLen, 12135ffd83dbSDimitry Andric Align Alignment, bool IsVolatile) { 12148bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 12158bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 12168bcb0991SDimitry Andric auto &DL = MF.getDataLayout(); 12178bcb0991SDimitry Andric LLVMContext &C = MF.getFunction().getContext(); 12188bcb0991SDimitry Andric 12198bcb0991SDimitry Andric assert(KnownLen != 0 && "Have a zero length memset length!"); 12208bcb0991SDimitry Andric 12218bcb0991SDimitry Andric bool DstAlignCanChange = false; 12228bcb0991SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 12238bcb0991SDimitry Andric bool OptSize = shouldLowerMemFuncForSize(MF); 12248bcb0991SDimitry Andric 12258bcb0991SDimitry Andric MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); 12268bcb0991SDimitry Andric if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) 12278bcb0991SDimitry Andric DstAlignCanChange = true; 12288bcb0991SDimitry Andric 12298bcb0991SDimitry Andric unsigned Limit = TLI.getMaxStoresPerMemset(OptSize); 12308bcb0991SDimitry Andric std::vector<LLT> MemOps; 12318bcb0991SDimitry Andric 12328bcb0991SDimitry Andric const auto &DstMMO = **MI.memoperands_begin(); 12338bcb0991SDimitry Andric MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); 12348bcb0991SDimitry Andric 12358bcb0991SDimitry Andric auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); 12368bcb0991SDimitry Andric bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0; 12378bcb0991SDimitry Andric 12385ffd83dbSDimitry Andric if (!findGISelOptimalMemOpLowering(MemOps, Limit, 12395ffd83dbSDimitry Andric MemOp::Set(KnownLen, DstAlignCanChange, 12405ffd83dbSDimitry Andric Alignment, 12415ffd83dbSDimitry Andric /*IsZeroMemset=*/IsZeroVal, 12425ffd83dbSDimitry Andric /*IsVolatile=*/IsVolatile), 12435ffd83dbSDimitry Andric DstPtrInfo.getAddrSpace(), ~0u, 12448bcb0991SDimitry Andric MF.getFunction().getAttributes(), TLI)) 12458bcb0991SDimitry Andric return false; 12468bcb0991SDimitry Andric 12478bcb0991SDimitry Andric if (DstAlignCanChange) { 12488bcb0991SDimitry Andric // Get an estimate of the type from the LLT. 12498bcb0991SDimitry Andric Type *IRTy = getTypeForLLT(MemOps[0], C); 12505ffd83dbSDimitry Andric Align NewAlign = DL.getABITypeAlign(IRTy); 12515ffd83dbSDimitry Andric if (NewAlign > Alignment) { 12525ffd83dbSDimitry Andric Alignment = NewAlign; 12538bcb0991SDimitry Andric unsigned FI = FIDef->getOperand(1).getIndex(); 12548bcb0991SDimitry Andric // Give the stack frame object a larger alignment if needed. 12555ffd83dbSDimitry Andric if (MFI.getObjectAlign(FI) < Alignment) 12565ffd83dbSDimitry Andric MFI.setObjectAlignment(FI, Alignment); 12578bcb0991SDimitry Andric } 12588bcb0991SDimitry Andric } 12598bcb0991SDimitry Andric 12608bcb0991SDimitry Andric MachineIRBuilder MIB(MI); 12618bcb0991SDimitry Andric // Find the largest store and generate the bit pattern for it. 12628bcb0991SDimitry Andric LLT LargestTy = MemOps[0]; 12638bcb0991SDimitry Andric for (unsigned i = 1; i < MemOps.size(); i++) 12648bcb0991SDimitry Andric if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits()) 12658bcb0991SDimitry Andric LargestTy = MemOps[i]; 12668bcb0991SDimitry Andric 12678bcb0991SDimitry Andric // The memset stored value is always defined as an s8, so in order to make it 12688bcb0991SDimitry Andric // work with larger store types we need to repeat the bit pattern across the 12698bcb0991SDimitry Andric // wider type. 12708bcb0991SDimitry Andric Register MemSetValue = getMemsetValue(Val, LargestTy, MIB); 12718bcb0991SDimitry Andric 12728bcb0991SDimitry Andric if (!MemSetValue) 12738bcb0991SDimitry Andric return false; 12748bcb0991SDimitry Andric 12758bcb0991SDimitry Andric // Generate the stores. For each store type in the list, we generate the 12768bcb0991SDimitry Andric // matching store of that type to the destination address. 12778bcb0991SDimitry Andric LLT PtrTy = MRI.getType(Dst); 12788bcb0991SDimitry Andric unsigned DstOff = 0; 12798bcb0991SDimitry Andric unsigned Size = KnownLen; 12808bcb0991SDimitry Andric for (unsigned I = 0; I < MemOps.size(); I++) { 12818bcb0991SDimitry Andric LLT Ty = MemOps[I]; 12828bcb0991SDimitry Andric unsigned TySize = Ty.getSizeInBytes(); 12838bcb0991SDimitry Andric if (TySize > Size) { 12848bcb0991SDimitry Andric // Issuing an unaligned load / store pair that overlaps with the previous 12858bcb0991SDimitry Andric // pair. Adjust the offset accordingly. 12868bcb0991SDimitry Andric assert(I == MemOps.size() - 1 && I != 0); 12878bcb0991SDimitry Andric DstOff -= TySize - Size; 12888bcb0991SDimitry Andric } 12898bcb0991SDimitry Andric 12908bcb0991SDimitry Andric // If this store is smaller than the largest store see whether we can get 12918bcb0991SDimitry Andric // the smaller value for free with a truncate. 12928bcb0991SDimitry Andric Register Value = MemSetValue; 12938bcb0991SDimitry Andric if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) { 12948bcb0991SDimitry Andric MVT VT = getMVTForLLT(Ty); 12958bcb0991SDimitry Andric MVT LargestVT = getMVTForLLT(LargestTy); 12968bcb0991SDimitry Andric if (!LargestTy.isVector() && !Ty.isVector() && 12978bcb0991SDimitry Andric TLI.isTruncateFree(LargestVT, VT)) 12988bcb0991SDimitry Andric Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0); 12998bcb0991SDimitry Andric else 13008bcb0991SDimitry Andric Value = getMemsetValue(Val, Ty, MIB); 13018bcb0991SDimitry Andric if (!Value) 13028bcb0991SDimitry Andric return false; 13038bcb0991SDimitry Andric } 13048bcb0991SDimitry Andric 13058bcb0991SDimitry Andric auto *StoreMMO = 1306*fe6060f1SDimitry Andric MF.getMachineMemOperand(&DstMMO, DstOff, Ty); 13078bcb0991SDimitry Andric 13088bcb0991SDimitry Andric Register Ptr = Dst; 13098bcb0991SDimitry Andric if (DstOff != 0) { 13108bcb0991SDimitry Andric auto Offset = 13118bcb0991SDimitry Andric MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff); 1312480093f4SDimitry Andric Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); 13138bcb0991SDimitry Andric } 13148bcb0991SDimitry Andric 13158bcb0991SDimitry Andric MIB.buildStore(Value, Ptr, *StoreMMO); 13168bcb0991SDimitry Andric DstOff += Ty.getSizeInBytes(); 13178bcb0991SDimitry Andric Size -= TySize; 13188bcb0991SDimitry Andric } 13198bcb0991SDimitry Andric 13208bcb0991SDimitry Andric MI.eraseFromParent(); 13218bcb0991SDimitry Andric return true; 13228bcb0991SDimitry Andric } 13238bcb0991SDimitry Andric 1324*fe6060f1SDimitry Andric bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) { 1325*fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE); 1326*fe6060f1SDimitry Andric 1327*fe6060f1SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 1328*fe6060f1SDimitry Andric Register Src = MI.getOperand(1).getReg(); 1329*fe6060f1SDimitry Andric Register Len = MI.getOperand(2).getReg(); 1330*fe6060f1SDimitry Andric 1331*fe6060f1SDimitry Andric const auto *MMOIt = MI.memoperands_begin(); 1332*fe6060f1SDimitry Andric const MachineMemOperand *MemOp = *MMOIt; 1333*fe6060f1SDimitry Andric bool IsVolatile = MemOp->isVolatile(); 1334*fe6060f1SDimitry Andric 1335*fe6060f1SDimitry Andric // See if this is a constant length copy 1336*fe6060f1SDimitry Andric auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); 1337*fe6060f1SDimitry Andric // FIXME: support dynamically sized G_MEMCPY_INLINE 1338*fe6060f1SDimitry Andric assert(LenVRegAndVal.hasValue() && 1339*fe6060f1SDimitry Andric "inline memcpy with dynamic size is not yet supported"); 1340*fe6060f1SDimitry Andric uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue(); 1341*fe6060f1SDimitry Andric if (KnownLen == 0) { 1342*fe6060f1SDimitry Andric MI.eraseFromParent(); 1343*fe6060f1SDimitry Andric return true; 1344*fe6060f1SDimitry Andric } 1345*fe6060f1SDimitry Andric 1346*fe6060f1SDimitry Andric const auto &DstMMO = **MI.memoperands_begin(); 1347*fe6060f1SDimitry Andric const auto &SrcMMO = **std::next(MI.memoperands_begin()); 1348*fe6060f1SDimitry Andric Align DstAlign = DstMMO.getBaseAlign(); 1349*fe6060f1SDimitry Andric Align SrcAlign = SrcMMO.getBaseAlign(); 1350*fe6060f1SDimitry Andric 1351*fe6060f1SDimitry Andric return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, 1352*fe6060f1SDimitry Andric IsVolatile); 1353*fe6060f1SDimitry Andric } 1354*fe6060f1SDimitry Andric 1355*fe6060f1SDimitry Andric bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI, Register Dst, 1356*fe6060f1SDimitry Andric Register Src, uint64_t KnownLen, 13575ffd83dbSDimitry Andric Align DstAlign, Align SrcAlign, 13588bcb0991SDimitry Andric bool IsVolatile) { 1359*fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE); 1360*fe6060f1SDimitry Andric return optimizeMemcpy(MI, Dst, Src, KnownLen, 1361*fe6060f1SDimitry Andric std::numeric_limits<uint64_t>::max(), DstAlign, 1362*fe6060f1SDimitry Andric SrcAlign, IsVolatile); 1363*fe6060f1SDimitry Andric } 1364*fe6060f1SDimitry Andric 1365*fe6060f1SDimitry Andric bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, 1366*fe6060f1SDimitry Andric Register Src, uint64_t KnownLen, 1367*fe6060f1SDimitry Andric uint64_t Limit, Align DstAlign, 1368*fe6060f1SDimitry Andric Align SrcAlign, bool IsVolatile) { 13698bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 13708bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 13718bcb0991SDimitry Andric auto &DL = MF.getDataLayout(); 13728bcb0991SDimitry Andric LLVMContext &C = MF.getFunction().getContext(); 13738bcb0991SDimitry Andric 13748bcb0991SDimitry Andric assert(KnownLen != 0 && "Have a zero length memcpy length!"); 13758bcb0991SDimitry Andric 13768bcb0991SDimitry Andric bool DstAlignCanChange = false; 13778bcb0991SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 13785ffd83dbSDimitry Andric Align Alignment = commonAlignment(DstAlign, SrcAlign); 13798bcb0991SDimitry Andric 13808bcb0991SDimitry Andric MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); 13818bcb0991SDimitry Andric if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) 13828bcb0991SDimitry Andric DstAlignCanChange = true; 13838bcb0991SDimitry Andric 13848bcb0991SDimitry Andric // FIXME: infer better src pointer alignment like SelectionDAG does here. 13858bcb0991SDimitry Andric // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining 13868bcb0991SDimitry Andric // if the memcpy is in a tail call position. 13878bcb0991SDimitry Andric 13888bcb0991SDimitry Andric std::vector<LLT> MemOps; 13898bcb0991SDimitry Andric 13908bcb0991SDimitry Andric const auto &DstMMO = **MI.memoperands_begin(); 13918bcb0991SDimitry Andric const auto &SrcMMO = **std::next(MI.memoperands_begin()); 13928bcb0991SDimitry Andric MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); 13938bcb0991SDimitry Andric MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); 13948bcb0991SDimitry Andric 13958bcb0991SDimitry Andric if (!findGISelOptimalMemOpLowering( 13965ffd83dbSDimitry Andric MemOps, Limit, 13975ffd83dbSDimitry Andric MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, 13985ffd83dbSDimitry Andric IsVolatile), 13995ffd83dbSDimitry Andric DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), 14005ffd83dbSDimitry Andric MF.getFunction().getAttributes(), TLI)) 14018bcb0991SDimitry Andric return false; 14028bcb0991SDimitry Andric 14038bcb0991SDimitry Andric if (DstAlignCanChange) { 14048bcb0991SDimitry Andric // Get an estimate of the type from the LLT. 14058bcb0991SDimitry Andric Type *IRTy = getTypeForLLT(MemOps[0], C); 14065ffd83dbSDimitry Andric Align NewAlign = DL.getABITypeAlign(IRTy); 14078bcb0991SDimitry Andric 14088bcb0991SDimitry Andric // Don't promote to an alignment that would require dynamic stack 14098bcb0991SDimitry Andric // realignment. 14108bcb0991SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1411*fe6060f1SDimitry Andric if (!TRI->hasStackRealignment(MF)) 14125ffd83dbSDimitry Andric while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) 14135ffd83dbSDimitry Andric NewAlign = NewAlign / 2; 14148bcb0991SDimitry Andric 14158bcb0991SDimitry Andric if (NewAlign > Alignment) { 14168bcb0991SDimitry Andric Alignment = NewAlign; 14178bcb0991SDimitry Andric unsigned FI = FIDef->getOperand(1).getIndex(); 14188bcb0991SDimitry Andric // Give the stack frame object a larger alignment if needed. 14195ffd83dbSDimitry Andric if (MFI.getObjectAlign(FI) < Alignment) 14208bcb0991SDimitry Andric MFI.setObjectAlignment(FI, Alignment); 14218bcb0991SDimitry Andric } 14228bcb0991SDimitry Andric } 14238bcb0991SDimitry Andric 14248bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n"); 14258bcb0991SDimitry Andric 14268bcb0991SDimitry Andric MachineIRBuilder MIB(MI); 14278bcb0991SDimitry Andric // Now we need to emit a pair of load and stores for each of the types we've 14288bcb0991SDimitry Andric // collected. I.e. for each type, generate a load from the source pointer of 14298bcb0991SDimitry Andric // that type width, and then generate a corresponding store to the dest buffer 14308bcb0991SDimitry Andric // of that value loaded. This can result in a sequence of loads and stores 14318bcb0991SDimitry Andric // mixed types, depending on what the target specifies as good types to use. 14328bcb0991SDimitry Andric unsigned CurrOffset = 0; 14338bcb0991SDimitry Andric LLT PtrTy = MRI.getType(Src); 14348bcb0991SDimitry Andric unsigned Size = KnownLen; 14358bcb0991SDimitry Andric for (auto CopyTy : MemOps) { 14368bcb0991SDimitry Andric // Issuing an unaligned load / store pair that overlaps with the previous 14378bcb0991SDimitry Andric // pair. Adjust the offset accordingly. 14388bcb0991SDimitry Andric if (CopyTy.getSizeInBytes() > Size) 14398bcb0991SDimitry Andric CurrOffset -= CopyTy.getSizeInBytes() - Size; 14408bcb0991SDimitry Andric 14418bcb0991SDimitry Andric // Construct MMOs for the accesses. 14428bcb0991SDimitry Andric auto *LoadMMO = 14438bcb0991SDimitry Andric MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); 14448bcb0991SDimitry Andric auto *StoreMMO = 14458bcb0991SDimitry Andric MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); 14468bcb0991SDimitry Andric 14478bcb0991SDimitry Andric // Create the load. 14488bcb0991SDimitry Andric Register LoadPtr = Src; 14498bcb0991SDimitry Andric Register Offset; 14508bcb0991SDimitry Andric if (CurrOffset != 0) { 14518bcb0991SDimitry Andric Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) 14528bcb0991SDimitry Andric .getReg(0); 1453480093f4SDimitry Andric LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); 14548bcb0991SDimitry Andric } 14558bcb0991SDimitry Andric auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); 14568bcb0991SDimitry Andric 14578bcb0991SDimitry Andric // Create the store. 14588bcb0991SDimitry Andric Register StorePtr = 1459480093f4SDimitry Andric CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); 14608bcb0991SDimitry Andric MIB.buildStore(LdVal, StorePtr, *StoreMMO); 14618bcb0991SDimitry Andric CurrOffset += CopyTy.getSizeInBytes(); 14628bcb0991SDimitry Andric Size -= CopyTy.getSizeInBytes(); 14638bcb0991SDimitry Andric } 14648bcb0991SDimitry Andric 14658bcb0991SDimitry Andric MI.eraseFromParent(); 14668bcb0991SDimitry Andric return true; 14678bcb0991SDimitry Andric } 14688bcb0991SDimitry Andric 14698bcb0991SDimitry Andric bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, 1470*fe6060f1SDimitry Andric Register Src, uint64_t KnownLen, 14715ffd83dbSDimitry Andric Align DstAlign, Align SrcAlign, 14728bcb0991SDimitry Andric bool IsVolatile) { 14738bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 14748bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 14758bcb0991SDimitry Andric auto &DL = MF.getDataLayout(); 14768bcb0991SDimitry Andric LLVMContext &C = MF.getFunction().getContext(); 14778bcb0991SDimitry Andric 14788bcb0991SDimitry Andric assert(KnownLen != 0 && "Have a zero length memmove length!"); 14798bcb0991SDimitry Andric 14808bcb0991SDimitry Andric bool DstAlignCanChange = false; 14818bcb0991SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 14828bcb0991SDimitry Andric bool OptSize = shouldLowerMemFuncForSize(MF); 14835ffd83dbSDimitry Andric Align Alignment = commonAlignment(DstAlign, SrcAlign); 14848bcb0991SDimitry Andric 14858bcb0991SDimitry Andric MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); 14868bcb0991SDimitry Andric if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) 14878bcb0991SDimitry Andric DstAlignCanChange = true; 14888bcb0991SDimitry Andric 14898bcb0991SDimitry Andric unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize); 14908bcb0991SDimitry Andric std::vector<LLT> MemOps; 14918bcb0991SDimitry Andric 14928bcb0991SDimitry Andric const auto &DstMMO = **MI.memoperands_begin(); 14938bcb0991SDimitry Andric const auto &SrcMMO = **std::next(MI.memoperands_begin()); 14948bcb0991SDimitry Andric MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); 14958bcb0991SDimitry Andric MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); 14968bcb0991SDimitry Andric 14978bcb0991SDimitry Andric // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due 14988bcb0991SDimitry Andric // to a bug in it's findOptimalMemOpLowering implementation. For now do the 14998bcb0991SDimitry Andric // same thing here. 15008bcb0991SDimitry Andric if (!findGISelOptimalMemOpLowering( 15015ffd83dbSDimitry Andric MemOps, Limit, 15025ffd83dbSDimitry Andric MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, 15035ffd83dbSDimitry Andric /*IsVolatile*/ true), 15045ffd83dbSDimitry Andric DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), 15055ffd83dbSDimitry Andric MF.getFunction().getAttributes(), TLI)) 15068bcb0991SDimitry Andric return false; 15078bcb0991SDimitry Andric 15088bcb0991SDimitry Andric if (DstAlignCanChange) { 15098bcb0991SDimitry Andric // Get an estimate of the type from the LLT. 15108bcb0991SDimitry Andric Type *IRTy = getTypeForLLT(MemOps[0], C); 15115ffd83dbSDimitry Andric Align NewAlign = DL.getABITypeAlign(IRTy); 15128bcb0991SDimitry Andric 15138bcb0991SDimitry Andric // Don't promote to an alignment that would require dynamic stack 15148bcb0991SDimitry Andric // realignment. 15158bcb0991SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1516*fe6060f1SDimitry Andric if (!TRI->hasStackRealignment(MF)) 15175ffd83dbSDimitry Andric while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) 15185ffd83dbSDimitry Andric NewAlign = NewAlign / 2; 15198bcb0991SDimitry Andric 15208bcb0991SDimitry Andric if (NewAlign > Alignment) { 15218bcb0991SDimitry Andric Alignment = NewAlign; 15228bcb0991SDimitry Andric unsigned FI = FIDef->getOperand(1).getIndex(); 15238bcb0991SDimitry Andric // Give the stack frame object a larger alignment if needed. 15245ffd83dbSDimitry Andric if (MFI.getObjectAlign(FI) < Alignment) 15258bcb0991SDimitry Andric MFI.setObjectAlignment(FI, Alignment); 15268bcb0991SDimitry Andric } 15278bcb0991SDimitry Andric } 15288bcb0991SDimitry Andric 15298bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n"); 15308bcb0991SDimitry Andric 15318bcb0991SDimitry Andric MachineIRBuilder MIB(MI); 15328bcb0991SDimitry Andric // Memmove requires that we perform the loads first before issuing the stores. 15338bcb0991SDimitry Andric // Apart from that, this loop is pretty much doing the same thing as the 15348bcb0991SDimitry Andric // memcpy codegen function. 15358bcb0991SDimitry Andric unsigned CurrOffset = 0; 15368bcb0991SDimitry Andric LLT PtrTy = MRI.getType(Src); 15378bcb0991SDimitry Andric SmallVector<Register, 16> LoadVals; 15388bcb0991SDimitry Andric for (auto CopyTy : MemOps) { 15398bcb0991SDimitry Andric // Construct MMO for the load. 15408bcb0991SDimitry Andric auto *LoadMMO = 15418bcb0991SDimitry Andric MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); 15428bcb0991SDimitry Andric 15438bcb0991SDimitry Andric // Create the load. 15448bcb0991SDimitry Andric Register LoadPtr = Src; 15458bcb0991SDimitry Andric if (CurrOffset != 0) { 15468bcb0991SDimitry Andric auto Offset = 15478bcb0991SDimitry Andric MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); 1548480093f4SDimitry Andric LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); 15498bcb0991SDimitry Andric } 15508bcb0991SDimitry Andric LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); 15518bcb0991SDimitry Andric CurrOffset += CopyTy.getSizeInBytes(); 15528bcb0991SDimitry Andric } 15538bcb0991SDimitry Andric 15548bcb0991SDimitry Andric CurrOffset = 0; 15558bcb0991SDimitry Andric for (unsigned I = 0; I < MemOps.size(); ++I) { 15568bcb0991SDimitry Andric LLT CopyTy = MemOps[I]; 15578bcb0991SDimitry Andric // Now store the values loaded. 15588bcb0991SDimitry Andric auto *StoreMMO = 15598bcb0991SDimitry Andric MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); 15608bcb0991SDimitry Andric 15618bcb0991SDimitry Andric Register StorePtr = Dst; 15628bcb0991SDimitry Andric if (CurrOffset != 0) { 15638bcb0991SDimitry Andric auto Offset = 15648bcb0991SDimitry Andric MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); 1565480093f4SDimitry Andric StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); 15668bcb0991SDimitry Andric } 15678bcb0991SDimitry Andric MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); 15688bcb0991SDimitry Andric CurrOffset += CopyTy.getSizeInBytes(); 15698bcb0991SDimitry Andric } 15708bcb0991SDimitry Andric MI.eraseFromParent(); 15718bcb0991SDimitry Andric return true; 15728bcb0991SDimitry Andric } 15738bcb0991SDimitry Andric 15748bcb0991SDimitry Andric bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { 1575e8d8bef9SDimitry Andric const unsigned Opc = MI.getOpcode(); 15768bcb0991SDimitry Andric // This combine is fairly complex so it's not written with a separate 15778bcb0991SDimitry Andric // matcher function. 1578e8d8bef9SDimitry Andric assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE || 1579e8d8bef9SDimitry Andric Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction"); 15808bcb0991SDimitry Andric 15818bcb0991SDimitry Andric auto MMOIt = MI.memoperands_begin(); 15828bcb0991SDimitry Andric const MachineMemOperand *MemOp = *MMOIt; 15838bcb0991SDimitry Andric 15845ffd83dbSDimitry Andric Align DstAlign = MemOp->getBaseAlign(); 15855ffd83dbSDimitry Andric Align SrcAlign; 1586e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 1587e8d8bef9SDimitry Andric Register Src = MI.getOperand(1).getReg(); 1588e8d8bef9SDimitry Andric Register Len = MI.getOperand(2).getReg(); 15898bcb0991SDimitry Andric 1590e8d8bef9SDimitry Andric if (Opc != TargetOpcode::G_MEMSET) { 15918bcb0991SDimitry Andric assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI"); 15928bcb0991SDimitry Andric MemOp = *(++MMOIt); 15935ffd83dbSDimitry Andric SrcAlign = MemOp->getBaseAlign(); 15948bcb0991SDimitry Andric } 15958bcb0991SDimitry Andric 15968bcb0991SDimitry Andric // See if this is a constant length copy 15978bcb0991SDimitry Andric auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); 15988bcb0991SDimitry Andric if (!LenVRegAndVal) 15998bcb0991SDimitry Andric return false; // Leave it to the legalizer to lower it to a libcall. 1600*fe6060f1SDimitry Andric uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue(); 16018bcb0991SDimitry Andric 16028bcb0991SDimitry Andric if (KnownLen == 0) { 16038bcb0991SDimitry Andric MI.eraseFromParent(); 16048bcb0991SDimitry Andric return true; 16058bcb0991SDimitry Andric } 16068bcb0991SDimitry Andric 1607*fe6060f1SDimitry Andric bool IsVolatile = MemOp->isVolatile(); 1608*fe6060f1SDimitry Andric if (Opc == TargetOpcode::G_MEMCPY_INLINE) 1609*fe6060f1SDimitry Andric return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, 1610*fe6060f1SDimitry Andric IsVolatile); 1611*fe6060f1SDimitry Andric 1612*fe6060f1SDimitry Andric // Don't try to optimize volatile. 1613*fe6060f1SDimitry Andric if (IsVolatile) 1614*fe6060f1SDimitry Andric return false; 1615*fe6060f1SDimitry Andric 16168bcb0991SDimitry Andric if (MaxLen && KnownLen > MaxLen) 16178bcb0991SDimitry Andric return false; 16188bcb0991SDimitry Andric 1619*fe6060f1SDimitry Andric if (Opc == TargetOpcode::G_MEMCPY) { 1620*fe6060f1SDimitry Andric auto &MF = *MI.getParent()->getParent(); 1621*fe6060f1SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 1622*fe6060f1SDimitry Andric bool OptSize = shouldLowerMemFuncForSize(MF); 1623*fe6060f1SDimitry Andric uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize); 1624*fe6060f1SDimitry Andric return optimizeMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign, 1625*fe6060f1SDimitry Andric IsVolatile); 1626*fe6060f1SDimitry Andric } 1627e8d8bef9SDimitry Andric if (Opc == TargetOpcode::G_MEMMOVE) 16288bcb0991SDimitry Andric return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); 1629e8d8bef9SDimitry Andric if (Opc == TargetOpcode::G_MEMSET) 16308bcb0991SDimitry Andric return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile); 16318bcb0991SDimitry Andric return false; 16328bcb0991SDimitry Andric } 16338bcb0991SDimitry Andric 1634e8d8bef9SDimitry Andric static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy, 1635e8d8bef9SDimitry Andric const Register Op, 1636e8d8bef9SDimitry Andric const MachineRegisterInfo &MRI) { 1637e8d8bef9SDimitry Andric const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI); 1638e8d8bef9SDimitry Andric if (!MaybeCst) 1639e8d8bef9SDimitry Andric return None; 1640e8d8bef9SDimitry Andric 1641e8d8bef9SDimitry Andric APFloat V = MaybeCst->getValueAPF(); 1642e8d8bef9SDimitry Andric switch (Opcode) { 1643e8d8bef9SDimitry Andric default: 1644e8d8bef9SDimitry Andric llvm_unreachable("Unexpected opcode!"); 1645e8d8bef9SDimitry Andric case TargetOpcode::G_FNEG: { 1646e8d8bef9SDimitry Andric V.changeSign(); 1647e8d8bef9SDimitry Andric return V; 1648e8d8bef9SDimitry Andric } 1649e8d8bef9SDimitry Andric case TargetOpcode::G_FABS: { 1650e8d8bef9SDimitry Andric V.clearSign(); 1651e8d8bef9SDimitry Andric return V; 1652e8d8bef9SDimitry Andric } 1653e8d8bef9SDimitry Andric case TargetOpcode::G_FPTRUNC: 1654e8d8bef9SDimitry Andric break; 1655e8d8bef9SDimitry Andric case TargetOpcode::G_FSQRT: { 1656e8d8bef9SDimitry Andric bool Unused; 1657e8d8bef9SDimitry Andric V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); 1658e8d8bef9SDimitry Andric V = APFloat(sqrt(V.convertToDouble())); 1659e8d8bef9SDimitry Andric break; 1660e8d8bef9SDimitry Andric } 1661e8d8bef9SDimitry Andric case TargetOpcode::G_FLOG2: { 1662e8d8bef9SDimitry Andric bool Unused; 1663e8d8bef9SDimitry Andric V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); 1664e8d8bef9SDimitry Andric V = APFloat(log2(V.convertToDouble())); 1665e8d8bef9SDimitry Andric break; 1666e8d8bef9SDimitry Andric } 1667e8d8bef9SDimitry Andric } 1668e8d8bef9SDimitry Andric // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise, 1669e8d8bef9SDimitry Andric // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`, 1670e8d8bef9SDimitry Andric // and `G_FLOG2` reach here. 1671e8d8bef9SDimitry Andric bool Unused; 1672e8d8bef9SDimitry Andric V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused); 1673e8d8bef9SDimitry Andric return V; 1674e8d8bef9SDimitry Andric } 1675e8d8bef9SDimitry Andric 1676e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI, 1677e8d8bef9SDimitry Andric Optional<APFloat> &Cst) { 1678e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1679e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1680e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 1681e8d8bef9SDimitry Andric Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI); 1682e8d8bef9SDimitry Andric return Cst.hasValue(); 1683e8d8bef9SDimitry Andric } 1684e8d8bef9SDimitry Andric 1685*fe6060f1SDimitry Andric void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, 1686e8d8bef9SDimitry Andric Optional<APFloat> &Cst) { 1687e8d8bef9SDimitry Andric assert(Cst.hasValue() && "Optional is unexpectedly empty!"); 1688e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1689e8d8bef9SDimitry Andric MachineFunction &MF = Builder.getMF(); 1690e8d8bef9SDimitry Andric auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst); 1691e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1692e8d8bef9SDimitry Andric Builder.buildFConstant(DstReg, *FPVal); 1693e8d8bef9SDimitry Andric MI.eraseFromParent(); 1694e8d8bef9SDimitry Andric } 1695e8d8bef9SDimitry Andric 1696480093f4SDimitry Andric bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, 1697480093f4SDimitry Andric PtrAddChain &MatchInfo) { 1698480093f4SDimitry Andric // We're trying to match the following pattern: 1699480093f4SDimitry Andric // %t1 = G_PTR_ADD %base, G_CONSTANT imm1 1700480093f4SDimitry Andric // %root = G_PTR_ADD %t1, G_CONSTANT imm2 1701480093f4SDimitry Andric // --> 1702480093f4SDimitry Andric // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2) 1703480093f4SDimitry Andric 1704480093f4SDimitry Andric if (MI.getOpcode() != TargetOpcode::G_PTR_ADD) 1705480093f4SDimitry Andric return false; 1706480093f4SDimitry Andric 1707480093f4SDimitry Andric Register Add2 = MI.getOperand(1).getReg(); 1708480093f4SDimitry Andric Register Imm1 = MI.getOperand(2).getReg(); 1709480093f4SDimitry Andric auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI); 1710480093f4SDimitry Andric if (!MaybeImmVal) 1711480093f4SDimitry Andric return false; 1712480093f4SDimitry Andric 1713*fe6060f1SDimitry Andric // Don't do this combine if there multiple uses of the first PTR_ADD, 1714*fe6060f1SDimitry Andric // since we may be able to compute the second PTR_ADD as an immediate 1715*fe6060f1SDimitry Andric // offset anyway. Folding the first offset into the second may cause us 1716*fe6060f1SDimitry Andric // to go beyond the bounds of our legal addressing modes. 1717*fe6060f1SDimitry Andric if (!MRI.hasOneNonDBGUse(Add2)) 1718*fe6060f1SDimitry Andric return false; 1719*fe6060f1SDimitry Andric 1720480093f4SDimitry Andric MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2); 1721480093f4SDimitry Andric if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD) 1722480093f4SDimitry Andric return false; 1723480093f4SDimitry Andric 1724480093f4SDimitry Andric Register Base = Add2Def->getOperand(1).getReg(); 1725480093f4SDimitry Andric Register Imm2 = Add2Def->getOperand(2).getReg(); 1726480093f4SDimitry Andric auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI); 1727480093f4SDimitry Andric if (!MaybeImm2Val) 1728480093f4SDimitry Andric return false; 1729480093f4SDimitry Andric 1730480093f4SDimitry Andric // Pass the combined immediate to the apply function. 1731e8d8bef9SDimitry Andric MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue(); 1732480093f4SDimitry Andric MatchInfo.Base = Base; 1733480093f4SDimitry Andric return true; 1734480093f4SDimitry Andric } 1735480093f4SDimitry Andric 1736*fe6060f1SDimitry Andric void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI, 1737480093f4SDimitry Andric PtrAddChain &MatchInfo) { 1738480093f4SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); 1739480093f4SDimitry Andric MachineIRBuilder MIB(MI); 1740480093f4SDimitry Andric LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg()); 1741480093f4SDimitry Andric auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm); 1742480093f4SDimitry Andric Observer.changingInstr(MI); 1743480093f4SDimitry Andric MI.getOperand(1).setReg(MatchInfo.Base); 1744480093f4SDimitry Andric MI.getOperand(2).setReg(NewOffset.getReg(0)); 1745480093f4SDimitry Andric Observer.changedInstr(MI); 1746480093f4SDimitry Andric } 1747480093f4SDimitry Andric 1748e8d8bef9SDimitry Andric bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI, 1749e8d8bef9SDimitry Andric RegisterImmPair &MatchInfo) { 1750e8d8bef9SDimitry Andric // We're trying to match the following pattern with any of 1751e8d8bef9SDimitry Andric // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions: 1752e8d8bef9SDimitry Andric // %t1 = SHIFT %base, G_CONSTANT imm1 1753e8d8bef9SDimitry Andric // %root = SHIFT %t1, G_CONSTANT imm2 1754e8d8bef9SDimitry Andric // --> 1755e8d8bef9SDimitry Andric // %root = SHIFT %base, G_CONSTANT (imm1 + imm2) 1756e8d8bef9SDimitry Andric 1757e8d8bef9SDimitry Andric unsigned Opcode = MI.getOpcode(); 1758e8d8bef9SDimitry Andric assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || 1759e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || 1760e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_USHLSAT) && 1761e8d8bef9SDimitry Andric "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); 1762e8d8bef9SDimitry Andric 1763e8d8bef9SDimitry Andric Register Shl2 = MI.getOperand(1).getReg(); 1764e8d8bef9SDimitry Andric Register Imm1 = MI.getOperand(2).getReg(); 1765e8d8bef9SDimitry Andric auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI); 1766e8d8bef9SDimitry Andric if (!MaybeImmVal) 1767e8d8bef9SDimitry Andric return false; 1768e8d8bef9SDimitry Andric 1769e8d8bef9SDimitry Andric MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2); 1770e8d8bef9SDimitry Andric if (Shl2Def->getOpcode() != Opcode) 1771e8d8bef9SDimitry Andric return false; 1772e8d8bef9SDimitry Andric 1773e8d8bef9SDimitry Andric Register Base = Shl2Def->getOperand(1).getReg(); 1774e8d8bef9SDimitry Andric Register Imm2 = Shl2Def->getOperand(2).getReg(); 1775e8d8bef9SDimitry Andric auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI); 1776e8d8bef9SDimitry Andric if (!MaybeImm2Val) 1777e8d8bef9SDimitry Andric return false; 1778e8d8bef9SDimitry Andric 1779e8d8bef9SDimitry Andric // Pass the combined immediate to the apply function. 1780e8d8bef9SDimitry Andric MatchInfo.Imm = 1781e8d8bef9SDimitry Andric (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue(); 1782e8d8bef9SDimitry Andric MatchInfo.Reg = Base; 1783e8d8bef9SDimitry Andric 1784e8d8bef9SDimitry Andric // There is no simple replacement for a saturating unsigned left shift that 1785e8d8bef9SDimitry Andric // exceeds the scalar size. 1786e8d8bef9SDimitry Andric if (Opcode == TargetOpcode::G_USHLSAT && 1787e8d8bef9SDimitry Andric MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits()) 1788e8d8bef9SDimitry Andric return false; 1789e8d8bef9SDimitry Andric 1790e8d8bef9SDimitry Andric return true; 1791e8d8bef9SDimitry Andric } 1792e8d8bef9SDimitry Andric 1793*fe6060f1SDimitry Andric void CombinerHelper::applyShiftImmedChain(MachineInstr &MI, 1794e8d8bef9SDimitry Andric RegisterImmPair &MatchInfo) { 1795e8d8bef9SDimitry Andric unsigned Opcode = MI.getOpcode(); 1796e8d8bef9SDimitry Andric assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || 1797e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || 1798e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_USHLSAT) && 1799e8d8bef9SDimitry Andric "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); 1800e8d8bef9SDimitry Andric 1801e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1802e8d8bef9SDimitry Andric LLT Ty = MRI.getType(MI.getOperand(1).getReg()); 1803e8d8bef9SDimitry Andric unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits(); 1804e8d8bef9SDimitry Andric auto Imm = MatchInfo.Imm; 1805e8d8bef9SDimitry Andric 1806e8d8bef9SDimitry Andric if (Imm >= ScalarSizeInBits) { 1807e8d8bef9SDimitry Andric // Any logical shift that exceeds scalar size will produce zero. 1808e8d8bef9SDimitry Andric if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) { 1809e8d8bef9SDimitry Andric Builder.buildConstant(MI.getOperand(0), 0); 1810e8d8bef9SDimitry Andric MI.eraseFromParent(); 1811*fe6060f1SDimitry Andric return; 1812e8d8bef9SDimitry Andric } 1813e8d8bef9SDimitry Andric // Arithmetic shift and saturating signed left shift have no effect beyond 1814e8d8bef9SDimitry Andric // scalar size. 1815e8d8bef9SDimitry Andric Imm = ScalarSizeInBits - 1; 1816e8d8bef9SDimitry Andric } 1817e8d8bef9SDimitry Andric 1818e8d8bef9SDimitry Andric LLT ImmTy = MRI.getType(MI.getOperand(2).getReg()); 1819e8d8bef9SDimitry Andric Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0); 1820e8d8bef9SDimitry Andric Observer.changingInstr(MI); 1821e8d8bef9SDimitry Andric MI.getOperand(1).setReg(MatchInfo.Reg); 1822e8d8bef9SDimitry Andric MI.getOperand(2).setReg(NewImm); 1823e8d8bef9SDimitry Andric Observer.changedInstr(MI); 1824e8d8bef9SDimitry Andric } 1825e8d8bef9SDimitry Andric 1826e8d8bef9SDimitry Andric bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI, 1827e8d8bef9SDimitry Andric ShiftOfShiftedLogic &MatchInfo) { 1828e8d8bef9SDimitry Andric // We're trying to match the following pattern with any of 1829e8d8bef9SDimitry Andric // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination 1830e8d8bef9SDimitry Andric // with any of G_AND/G_OR/G_XOR logic instructions. 1831e8d8bef9SDimitry Andric // %t1 = SHIFT %X, G_CONSTANT C0 1832e8d8bef9SDimitry Andric // %t2 = LOGIC %t1, %Y 1833e8d8bef9SDimitry Andric // %root = SHIFT %t2, G_CONSTANT C1 1834e8d8bef9SDimitry Andric // --> 1835e8d8bef9SDimitry Andric // %t3 = SHIFT %X, G_CONSTANT (C0+C1) 1836e8d8bef9SDimitry Andric // %t4 = SHIFT %Y, G_CONSTANT C1 1837e8d8bef9SDimitry Andric // %root = LOGIC %t3, %t4 1838e8d8bef9SDimitry Andric unsigned ShiftOpcode = MI.getOpcode(); 1839e8d8bef9SDimitry Andric assert((ShiftOpcode == TargetOpcode::G_SHL || 1840e8d8bef9SDimitry Andric ShiftOpcode == TargetOpcode::G_ASHR || 1841e8d8bef9SDimitry Andric ShiftOpcode == TargetOpcode::G_LSHR || 1842e8d8bef9SDimitry Andric ShiftOpcode == TargetOpcode::G_USHLSAT || 1843e8d8bef9SDimitry Andric ShiftOpcode == TargetOpcode::G_SSHLSAT) && 1844e8d8bef9SDimitry Andric "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT"); 1845e8d8bef9SDimitry Andric 1846e8d8bef9SDimitry Andric // Match a one-use bitwise logic op. 1847e8d8bef9SDimitry Andric Register LogicDest = MI.getOperand(1).getReg(); 1848e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(LogicDest)) 1849e8d8bef9SDimitry Andric return false; 1850e8d8bef9SDimitry Andric 1851e8d8bef9SDimitry Andric MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest); 1852e8d8bef9SDimitry Andric unsigned LogicOpcode = LogicMI->getOpcode(); 1853e8d8bef9SDimitry Andric if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR && 1854e8d8bef9SDimitry Andric LogicOpcode != TargetOpcode::G_XOR) 1855e8d8bef9SDimitry Andric return false; 1856e8d8bef9SDimitry Andric 1857e8d8bef9SDimitry Andric // Find a matching one-use shift by constant. 1858e8d8bef9SDimitry Andric const Register C1 = MI.getOperand(2).getReg(); 1859e8d8bef9SDimitry Andric auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI); 1860e8d8bef9SDimitry Andric if (!MaybeImmVal) 1861e8d8bef9SDimitry Andric return false; 1862e8d8bef9SDimitry Andric 1863e8d8bef9SDimitry Andric const uint64_t C1Val = MaybeImmVal->Value.getZExtValue(); 1864e8d8bef9SDimitry Andric 1865e8d8bef9SDimitry Andric auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) { 1866e8d8bef9SDimitry Andric // Shift should match previous one and should be a one-use. 1867e8d8bef9SDimitry Andric if (MI->getOpcode() != ShiftOpcode || 1868e8d8bef9SDimitry Andric !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 1869e8d8bef9SDimitry Andric return false; 1870e8d8bef9SDimitry Andric 1871e8d8bef9SDimitry Andric // Must be a constant. 1872e8d8bef9SDimitry Andric auto MaybeImmVal = 1873e8d8bef9SDimitry Andric getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); 1874e8d8bef9SDimitry Andric if (!MaybeImmVal) 1875e8d8bef9SDimitry Andric return false; 1876e8d8bef9SDimitry Andric 1877e8d8bef9SDimitry Andric ShiftVal = MaybeImmVal->Value.getSExtValue(); 1878e8d8bef9SDimitry Andric return true; 1879e8d8bef9SDimitry Andric }; 1880e8d8bef9SDimitry Andric 1881e8d8bef9SDimitry Andric // Logic ops are commutative, so check each operand for a match. 1882e8d8bef9SDimitry Andric Register LogicMIReg1 = LogicMI->getOperand(1).getReg(); 1883e8d8bef9SDimitry Andric MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1); 1884e8d8bef9SDimitry Andric Register LogicMIReg2 = LogicMI->getOperand(2).getReg(); 1885e8d8bef9SDimitry Andric MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2); 1886e8d8bef9SDimitry Andric uint64_t C0Val; 1887e8d8bef9SDimitry Andric 1888e8d8bef9SDimitry Andric if (matchFirstShift(LogicMIOp1, C0Val)) { 1889e8d8bef9SDimitry Andric MatchInfo.LogicNonShiftReg = LogicMIReg2; 1890e8d8bef9SDimitry Andric MatchInfo.Shift2 = LogicMIOp1; 1891e8d8bef9SDimitry Andric } else if (matchFirstShift(LogicMIOp2, C0Val)) { 1892e8d8bef9SDimitry Andric MatchInfo.LogicNonShiftReg = LogicMIReg1; 1893e8d8bef9SDimitry Andric MatchInfo.Shift2 = LogicMIOp2; 1894e8d8bef9SDimitry Andric } else 1895e8d8bef9SDimitry Andric return false; 1896e8d8bef9SDimitry Andric 1897e8d8bef9SDimitry Andric MatchInfo.ValSum = C0Val + C1Val; 1898e8d8bef9SDimitry Andric 1899e8d8bef9SDimitry Andric // The fold is not valid if the sum of the shift values exceeds bitwidth. 1900e8d8bef9SDimitry Andric if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits()) 1901e8d8bef9SDimitry Andric return false; 1902e8d8bef9SDimitry Andric 1903e8d8bef9SDimitry Andric MatchInfo.Logic = LogicMI; 1904e8d8bef9SDimitry Andric return true; 1905e8d8bef9SDimitry Andric } 1906e8d8bef9SDimitry Andric 1907*fe6060f1SDimitry Andric void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, 1908e8d8bef9SDimitry Andric ShiftOfShiftedLogic &MatchInfo) { 1909e8d8bef9SDimitry Andric unsigned Opcode = MI.getOpcode(); 1910e8d8bef9SDimitry Andric assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || 1911e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT || 1912e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_SSHLSAT) && 1913e8d8bef9SDimitry Andric "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT"); 1914e8d8bef9SDimitry Andric 1915e8d8bef9SDimitry Andric LLT ShlType = MRI.getType(MI.getOperand(2).getReg()); 1916e8d8bef9SDimitry Andric LLT DestType = MRI.getType(MI.getOperand(0).getReg()); 1917e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1918e8d8bef9SDimitry Andric 1919e8d8bef9SDimitry Andric Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0); 1920e8d8bef9SDimitry Andric 1921e8d8bef9SDimitry Andric Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg(); 1922e8d8bef9SDimitry Andric Register Shift1 = 1923e8d8bef9SDimitry Andric Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0); 1924e8d8bef9SDimitry Andric 1925e8d8bef9SDimitry Andric Register Shift2Const = MI.getOperand(2).getReg(); 1926e8d8bef9SDimitry Andric Register Shift2 = Builder 1927e8d8bef9SDimitry Andric .buildInstr(Opcode, {DestType}, 1928e8d8bef9SDimitry Andric {MatchInfo.LogicNonShiftReg, Shift2Const}) 1929e8d8bef9SDimitry Andric .getReg(0); 1930e8d8bef9SDimitry Andric 1931e8d8bef9SDimitry Andric Register Dest = MI.getOperand(0).getReg(); 1932e8d8bef9SDimitry Andric Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2}); 1933e8d8bef9SDimitry Andric 1934e8d8bef9SDimitry Andric // These were one use so it's safe to remove them. 1935e8d8bef9SDimitry Andric MatchInfo.Shift2->eraseFromParent(); 1936e8d8bef9SDimitry Andric MatchInfo.Logic->eraseFromParent(); 1937e8d8bef9SDimitry Andric 1938e8d8bef9SDimitry Andric MI.eraseFromParent(); 1939e8d8bef9SDimitry Andric } 1940e8d8bef9SDimitry Andric 19415ffd83dbSDimitry Andric bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, 19425ffd83dbSDimitry Andric unsigned &ShiftVal) { 19435ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); 19445ffd83dbSDimitry Andric auto MaybeImmVal = 19455ffd83dbSDimitry Andric getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); 1946e8d8bef9SDimitry Andric if (!MaybeImmVal) 19475ffd83dbSDimitry Andric return false; 1948e8d8bef9SDimitry Andric 1949e8d8bef9SDimitry Andric ShiftVal = MaybeImmVal->Value.exactLogBase2(); 1950e8d8bef9SDimitry Andric return (static_cast<int32_t>(ShiftVal) != -1); 19515ffd83dbSDimitry Andric } 19525ffd83dbSDimitry Andric 1953*fe6060f1SDimitry Andric void CombinerHelper::applyCombineMulToShl(MachineInstr &MI, 19545ffd83dbSDimitry Andric unsigned &ShiftVal) { 19555ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); 19565ffd83dbSDimitry Andric MachineIRBuilder MIB(MI); 19575ffd83dbSDimitry Andric LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg()); 19585ffd83dbSDimitry Andric auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal); 19595ffd83dbSDimitry Andric Observer.changingInstr(MI); 19605ffd83dbSDimitry Andric MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL)); 19615ffd83dbSDimitry Andric MI.getOperand(2).setReg(ShiftCst.getReg(0)); 19625ffd83dbSDimitry Andric Observer.changedInstr(MI); 19635ffd83dbSDimitry Andric } 19645ffd83dbSDimitry Andric 1965e8d8bef9SDimitry Andric // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source 1966e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, 1967e8d8bef9SDimitry Andric RegisterImmPair &MatchData) { 1968e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHL && KB); 1969e8d8bef9SDimitry Andric 1970e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 1971e8d8bef9SDimitry Andric 1972e8d8bef9SDimitry Andric Register ExtSrc; 1973e8d8bef9SDimitry Andric if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) && 1974e8d8bef9SDimitry Andric !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) && 1975e8d8bef9SDimitry Andric !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc)))) 1976e8d8bef9SDimitry Andric return false; 1977e8d8bef9SDimitry Andric 1978e8d8bef9SDimitry Andric // TODO: Should handle vector splat. 1979e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 1980e8d8bef9SDimitry Andric auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI); 1981e8d8bef9SDimitry Andric if (!MaybeShiftAmtVal) 1982e8d8bef9SDimitry Andric return false; 1983e8d8bef9SDimitry Andric 1984e8d8bef9SDimitry Andric if (LI) { 1985e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(ExtSrc); 1986e8d8bef9SDimitry Andric 1987e8d8bef9SDimitry Andric // We only really care about the legality with the shifted value. We can 1988e8d8bef9SDimitry Andric // pick any type the constant shift amount, so ask the target what to 1989e8d8bef9SDimitry Andric // use. Otherwise we would have to guess and hope it is reported as legal. 1990e8d8bef9SDimitry Andric LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy); 1991e8d8bef9SDimitry Andric if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}})) 1992e8d8bef9SDimitry Andric return false; 1993e8d8bef9SDimitry Andric } 1994e8d8bef9SDimitry Andric 1995e8d8bef9SDimitry Andric int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue(); 1996e8d8bef9SDimitry Andric MatchData.Reg = ExtSrc; 1997e8d8bef9SDimitry Andric MatchData.Imm = ShiftAmt; 1998e8d8bef9SDimitry Andric 1999e8d8bef9SDimitry Andric unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes(); 2000e8d8bef9SDimitry Andric return MinLeadingZeros >= ShiftAmt; 2001e8d8bef9SDimitry Andric } 2002e8d8bef9SDimitry Andric 2003*fe6060f1SDimitry Andric void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, 2004e8d8bef9SDimitry Andric const RegisterImmPair &MatchData) { 2005e8d8bef9SDimitry Andric Register ExtSrcReg = MatchData.Reg; 2006e8d8bef9SDimitry Andric int64_t ShiftAmtVal = MatchData.Imm; 2007e8d8bef9SDimitry Andric 2008e8d8bef9SDimitry Andric LLT ExtSrcTy = MRI.getType(ExtSrcReg); 2009e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2010e8d8bef9SDimitry Andric auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal); 2011e8d8bef9SDimitry Andric auto NarrowShift = 2012e8d8bef9SDimitry Andric Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags()); 2013e8d8bef9SDimitry Andric Builder.buildZExt(MI.getOperand(0), NarrowShift); 2014e8d8bef9SDimitry Andric MI.eraseFromParent(); 2015*fe6060f1SDimitry Andric } 2016*fe6060f1SDimitry Andric 2017*fe6060f1SDimitry Andric bool CombinerHelper::matchCombineMergeUnmerge(MachineInstr &MI, 2018*fe6060f1SDimitry Andric Register &MatchInfo) { 2019*fe6060f1SDimitry Andric GMerge &Merge = cast<GMerge>(MI); 2020*fe6060f1SDimitry Andric SmallVector<Register, 16> MergedValues; 2021*fe6060f1SDimitry Andric for (unsigned I = 0; I < Merge.getNumSources(); ++I) 2022*fe6060f1SDimitry Andric MergedValues.emplace_back(Merge.getSourceReg(I)); 2023*fe6060f1SDimitry Andric 2024*fe6060f1SDimitry Andric auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI); 2025*fe6060f1SDimitry Andric if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources()) 2026*fe6060f1SDimitry Andric return false; 2027*fe6060f1SDimitry Andric 2028*fe6060f1SDimitry Andric for (unsigned I = 0; I < MergedValues.size(); ++I) 2029*fe6060f1SDimitry Andric if (MergedValues[I] != Unmerge->getReg(I)) 2030*fe6060f1SDimitry Andric return false; 2031*fe6060f1SDimitry Andric 2032*fe6060f1SDimitry Andric MatchInfo = Unmerge->getSourceReg(); 2033e8d8bef9SDimitry Andric return true; 2034e8d8bef9SDimitry Andric } 2035e8d8bef9SDimitry Andric 2036e8d8bef9SDimitry Andric static Register peekThroughBitcast(Register Reg, 2037e8d8bef9SDimitry Andric const MachineRegisterInfo &MRI) { 2038e8d8bef9SDimitry Andric while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg)))) 2039e8d8bef9SDimitry Andric ; 2040e8d8bef9SDimitry Andric 2041e8d8bef9SDimitry Andric return Reg; 2042e8d8bef9SDimitry Andric } 2043e8d8bef9SDimitry Andric 2044e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineUnmergeMergeToPlainValues( 2045e8d8bef9SDimitry Andric MachineInstr &MI, SmallVectorImpl<Register> &Operands) { 2046e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 2047e8d8bef9SDimitry Andric "Expected an unmerge"); 2048e8d8bef9SDimitry Andric Register SrcReg = 2049e8d8bef9SDimitry Andric peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI); 2050e8d8bef9SDimitry Andric 2051e8d8bef9SDimitry Andric MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); 2052e8d8bef9SDimitry Andric if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES && 2053e8d8bef9SDimitry Andric SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR && 2054e8d8bef9SDimitry Andric SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS) 2055e8d8bef9SDimitry Andric return false; 2056e8d8bef9SDimitry Andric 2057e8d8bef9SDimitry Andric // Check the source type of the merge. 2058e8d8bef9SDimitry Andric LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg()); 2059e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); 2060e8d8bef9SDimitry Andric bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits(); 2061e8d8bef9SDimitry Andric if (SrcMergeTy != Dst0Ty && !SameSize) 2062e8d8bef9SDimitry Andric return false; 2063e8d8bef9SDimitry Andric // They are the same now (modulo a bitcast). 2064e8d8bef9SDimitry Andric // We can collect all the src registers. 2065e8d8bef9SDimitry Andric for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx; 2066e8d8bef9SDimitry Andric ++Idx) 2067e8d8bef9SDimitry Andric Operands.push_back(SrcInstr->getOperand(Idx).getReg()); 2068e8d8bef9SDimitry Andric return true; 2069e8d8bef9SDimitry Andric } 2070e8d8bef9SDimitry Andric 2071*fe6060f1SDimitry Andric void CombinerHelper::applyCombineUnmergeMergeToPlainValues( 2072e8d8bef9SDimitry Andric MachineInstr &MI, SmallVectorImpl<Register> &Operands) { 2073e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 2074e8d8bef9SDimitry Andric "Expected an unmerge"); 2075e8d8bef9SDimitry Andric assert((MI.getNumOperands() - 1 == Operands.size()) && 2076e8d8bef9SDimitry Andric "Not enough operands to replace all defs"); 2077e8d8bef9SDimitry Andric unsigned NumElems = MI.getNumOperands() - 1; 2078e8d8bef9SDimitry Andric 2079e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(Operands[0]); 2080e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 2081e8d8bef9SDimitry Andric bool CanReuseInputDirectly = DstTy == SrcTy; 2082e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2083e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < NumElems; ++Idx) { 2084e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(Idx).getReg(); 2085e8d8bef9SDimitry Andric Register SrcReg = Operands[Idx]; 2086e8d8bef9SDimitry Andric if (CanReuseInputDirectly) 2087e8d8bef9SDimitry Andric replaceRegWith(MRI, DstReg, SrcReg); 2088e8d8bef9SDimitry Andric else 2089e8d8bef9SDimitry Andric Builder.buildCast(DstReg, SrcReg); 2090e8d8bef9SDimitry Andric } 2091e8d8bef9SDimitry Andric MI.eraseFromParent(); 2092e8d8bef9SDimitry Andric } 2093e8d8bef9SDimitry Andric 2094e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI, 2095e8d8bef9SDimitry Andric SmallVectorImpl<APInt> &Csts) { 2096e8d8bef9SDimitry Andric unsigned SrcIdx = MI.getNumOperands() - 1; 2097e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(SrcIdx).getReg(); 2098e8d8bef9SDimitry Andric MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); 2099e8d8bef9SDimitry Andric if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT && 2100e8d8bef9SDimitry Andric SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT) 2101e8d8bef9SDimitry Andric return false; 2102e8d8bef9SDimitry Andric // Break down the big constant in smaller ones. 2103e8d8bef9SDimitry Andric const MachineOperand &CstVal = SrcInstr->getOperand(1); 2104e8d8bef9SDimitry Andric APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT 2105e8d8bef9SDimitry Andric ? CstVal.getCImm()->getValue() 2106e8d8bef9SDimitry Andric : CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); 2107e8d8bef9SDimitry Andric 2108e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); 2109e8d8bef9SDimitry Andric unsigned ShiftAmt = Dst0Ty.getSizeInBits(); 2110e8d8bef9SDimitry Andric // Unmerge a constant. 2111e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) { 2112e8d8bef9SDimitry Andric Csts.emplace_back(Val.trunc(ShiftAmt)); 2113e8d8bef9SDimitry Andric Val = Val.lshr(ShiftAmt); 2114e8d8bef9SDimitry Andric } 2115e8d8bef9SDimitry Andric 2116e8d8bef9SDimitry Andric return true; 2117e8d8bef9SDimitry Andric } 2118e8d8bef9SDimitry Andric 2119*fe6060f1SDimitry Andric void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, 2120e8d8bef9SDimitry Andric SmallVectorImpl<APInt> &Csts) { 2121e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 2122e8d8bef9SDimitry Andric "Expected an unmerge"); 2123e8d8bef9SDimitry Andric assert((MI.getNumOperands() - 1 == Csts.size()) && 2124e8d8bef9SDimitry Andric "Not enough operands to replace all defs"); 2125e8d8bef9SDimitry Andric unsigned NumElems = MI.getNumOperands() - 1; 2126e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2127e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < NumElems; ++Idx) { 2128e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(Idx).getReg(); 2129e8d8bef9SDimitry Andric Builder.buildConstant(DstReg, Csts[Idx]); 2130e8d8bef9SDimitry Andric } 2131e8d8bef9SDimitry Andric 2132e8d8bef9SDimitry Andric MI.eraseFromParent(); 2133e8d8bef9SDimitry Andric } 2134e8d8bef9SDimitry Andric 2135e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { 2136e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 2137e8d8bef9SDimitry Andric "Expected an unmerge"); 2138e8d8bef9SDimitry Andric // Check that all the lanes are dead except the first one. 2139e8d8bef9SDimitry Andric for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { 2140e8d8bef9SDimitry Andric if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg())) 2141e8d8bef9SDimitry Andric return false; 2142e8d8bef9SDimitry Andric } 2143e8d8bef9SDimitry Andric return true; 2144e8d8bef9SDimitry Andric } 2145e8d8bef9SDimitry Andric 2146*fe6060f1SDimitry Andric void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { 2147e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2148e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); 2149e8d8bef9SDimitry Andric // Truncating a vector is going to truncate every single lane, 2150e8d8bef9SDimitry Andric // whereas we want the full lowbits. 2151e8d8bef9SDimitry Andric // Do the operation on a scalar instead. 2152e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 2153e8d8bef9SDimitry Andric if (SrcTy.isVector()) 2154e8d8bef9SDimitry Andric SrcReg = 2155e8d8bef9SDimitry Andric Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0); 2156e8d8bef9SDimitry Andric 2157e8d8bef9SDimitry Andric Register Dst0Reg = MI.getOperand(0).getReg(); 2158e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(Dst0Reg); 2159e8d8bef9SDimitry Andric if (Dst0Ty.isVector()) { 2160e8d8bef9SDimitry Andric auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg); 2161e8d8bef9SDimitry Andric Builder.buildCast(Dst0Reg, MIB); 2162e8d8bef9SDimitry Andric } else 2163e8d8bef9SDimitry Andric Builder.buildTrunc(Dst0Reg, SrcReg); 2164e8d8bef9SDimitry Andric MI.eraseFromParent(); 2165e8d8bef9SDimitry Andric } 2166e8d8bef9SDimitry Andric 2167e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) { 2168e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 2169e8d8bef9SDimitry Andric "Expected an unmerge"); 2170e8d8bef9SDimitry Andric Register Dst0Reg = MI.getOperand(0).getReg(); 2171e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(Dst0Reg); 2172e8d8bef9SDimitry Andric // G_ZEXT on vector applies to each lane, so it will 2173e8d8bef9SDimitry Andric // affect all destinations. Therefore we won't be able 2174e8d8bef9SDimitry Andric // to simplify the unmerge to just the first definition. 2175e8d8bef9SDimitry Andric if (Dst0Ty.isVector()) 2176e8d8bef9SDimitry Andric return false; 2177e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); 2178e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 2179e8d8bef9SDimitry Andric if (SrcTy.isVector()) 2180e8d8bef9SDimitry Andric return false; 2181e8d8bef9SDimitry Andric 2182e8d8bef9SDimitry Andric Register ZExtSrcReg; 2183e8d8bef9SDimitry Andric if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg)))) 2184e8d8bef9SDimitry Andric return false; 2185e8d8bef9SDimitry Andric 2186e8d8bef9SDimitry Andric // Finally we can replace the first definition with 2187e8d8bef9SDimitry Andric // a zext of the source if the definition is big enough to hold 2188e8d8bef9SDimitry Andric // all of ZExtSrc bits. 2189e8d8bef9SDimitry Andric LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); 2190e8d8bef9SDimitry Andric return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits(); 2191e8d8bef9SDimitry Andric } 2192e8d8bef9SDimitry Andric 2193*fe6060f1SDimitry Andric void CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) { 2194e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 2195e8d8bef9SDimitry Andric "Expected an unmerge"); 2196e8d8bef9SDimitry Andric 2197e8d8bef9SDimitry Andric Register Dst0Reg = MI.getOperand(0).getReg(); 2198e8d8bef9SDimitry Andric 2199e8d8bef9SDimitry Andric MachineInstr *ZExtInstr = 2200e8d8bef9SDimitry Andric MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg()); 2201e8d8bef9SDimitry Andric assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT && 2202e8d8bef9SDimitry Andric "Expecting a G_ZEXT"); 2203e8d8bef9SDimitry Andric 2204e8d8bef9SDimitry Andric Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg(); 2205e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(Dst0Reg); 2206e8d8bef9SDimitry Andric LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); 2207e8d8bef9SDimitry Andric 2208e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2209e8d8bef9SDimitry Andric 2210e8d8bef9SDimitry Andric if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) { 2211e8d8bef9SDimitry Andric Builder.buildZExt(Dst0Reg, ZExtSrcReg); 2212e8d8bef9SDimitry Andric } else { 2213e8d8bef9SDimitry Andric assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() && 2214e8d8bef9SDimitry Andric "ZExt src doesn't fit in destination"); 2215e8d8bef9SDimitry Andric replaceRegWith(MRI, Dst0Reg, ZExtSrcReg); 2216e8d8bef9SDimitry Andric } 2217e8d8bef9SDimitry Andric 2218e8d8bef9SDimitry Andric Register ZeroReg; 2219e8d8bef9SDimitry Andric for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { 2220e8d8bef9SDimitry Andric if (!ZeroReg) 2221e8d8bef9SDimitry Andric ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0); 2222e8d8bef9SDimitry Andric replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg); 2223e8d8bef9SDimitry Andric } 2224e8d8bef9SDimitry Andric MI.eraseFromParent(); 2225e8d8bef9SDimitry Andric } 2226e8d8bef9SDimitry Andric 22275ffd83dbSDimitry Andric bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, 22285ffd83dbSDimitry Andric unsigned TargetShiftSize, 22295ffd83dbSDimitry Andric unsigned &ShiftVal) { 22305ffd83dbSDimitry Andric assert((MI.getOpcode() == TargetOpcode::G_SHL || 22315ffd83dbSDimitry Andric MI.getOpcode() == TargetOpcode::G_LSHR || 22325ffd83dbSDimitry Andric MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift"); 22335ffd83dbSDimitry Andric 22345ffd83dbSDimitry Andric LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 22355ffd83dbSDimitry Andric if (Ty.isVector()) // TODO: 22365ffd83dbSDimitry Andric return false; 22375ffd83dbSDimitry Andric 22385ffd83dbSDimitry Andric // Don't narrow further than the requested size. 22395ffd83dbSDimitry Andric unsigned Size = Ty.getSizeInBits(); 22405ffd83dbSDimitry Andric if (Size <= TargetShiftSize) 22415ffd83dbSDimitry Andric return false; 22425ffd83dbSDimitry Andric 22435ffd83dbSDimitry Andric auto MaybeImmVal = 22445ffd83dbSDimitry Andric getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); 22455ffd83dbSDimitry Andric if (!MaybeImmVal) 22465ffd83dbSDimitry Andric return false; 22475ffd83dbSDimitry Andric 2248e8d8bef9SDimitry Andric ShiftVal = MaybeImmVal->Value.getSExtValue(); 22495ffd83dbSDimitry Andric return ShiftVal >= Size / 2 && ShiftVal < Size; 22505ffd83dbSDimitry Andric } 22515ffd83dbSDimitry Andric 2252*fe6060f1SDimitry Andric void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI, 22535ffd83dbSDimitry Andric const unsigned &ShiftVal) { 22545ffd83dbSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 22555ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 22565ffd83dbSDimitry Andric LLT Ty = MRI.getType(SrcReg); 22575ffd83dbSDimitry Andric unsigned Size = Ty.getSizeInBits(); 22585ffd83dbSDimitry Andric unsigned HalfSize = Size / 2; 22595ffd83dbSDimitry Andric assert(ShiftVal >= HalfSize); 22605ffd83dbSDimitry Andric 22615ffd83dbSDimitry Andric LLT HalfTy = LLT::scalar(HalfSize); 22625ffd83dbSDimitry Andric 22635ffd83dbSDimitry Andric Builder.setInstr(MI); 22645ffd83dbSDimitry Andric auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg); 22655ffd83dbSDimitry Andric unsigned NarrowShiftAmt = ShiftVal - HalfSize; 22665ffd83dbSDimitry Andric 22675ffd83dbSDimitry Andric if (MI.getOpcode() == TargetOpcode::G_LSHR) { 22685ffd83dbSDimitry Andric Register Narrowed = Unmerge.getReg(1); 22695ffd83dbSDimitry Andric 22705ffd83dbSDimitry Andric // dst = G_LSHR s64:x, C for C >= 32 22715ffd83dbSDimitry Andric // => 22725ffd83dbSDimitry Andric // lo, hi = G_UNMERGE_VALUES x 22735ffd83dbSDimitry Andric // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0 22745ffd83dbSDimitry Andric 22755ffd83dbSDimitry Andric if (NarrowShiftAmt != 0) { 22765ffd83dbSDimitry Andric Narrowed = Builder.buildLShr(HalfTy, Narrowed, 22775ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0); 22785ffd83dbSDimitry Andric } 22795ffd83dbSDimitry Andric 22805ffd83dbSDimitry Andric auto Zero = Builder.buildConstant(HalfTy, 0); 22815ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Narrowed, Zero }); 22825ffd83dbSDimitry Andric } else if (MI.getOpcode() == TargetOpcode::G_SHL) { 22835ffd83dbSDimitry Andric Register Narrowed = Unmerge.getReg(0); 22845ffd83dbSDimitry Andric // dst = G_SHL s64:x, C for C >= 32 22855ffd83dbSDimitry Andric // => 22865ffd83dbSDimitry Andric // lo, hi = G_UNMERGE_VALUES x 22875ffd83dbSDimitry Andric // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32) 22885ffd83dbSDimitry Andric if (NarrowShiftAmt != 0) { 22895ffd83dbSDimitry Andric Narrowed = Builder.buildShl(HalfTy, Narrowed, 22905ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0); 22915ffd83dbSDimitry Andric } 22925ffd83dbSDimitry Andric 22935ffd83dbSDimitry Andric auto Zero = Builder.buildConstant(HalfTy, 0); 22945ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Zero, Narrowed }); 22955ffd83dbSDimitry Andric } else { 22965ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ASHR); 22975ffd83dbSDimitry Andric auto Hi = Builder.buildAShr( 22985ffd83dbSDimitry Andric HalfTy, Unmerge.getReg(1), 22995ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, HalfSize - 1)); 23005ffd83dbSDimitry Andric 23015ffd83dbSDimitry Andric if (ShiftVal == HalfSize) { 23025ffd83dbSDimitry Andric // (G_ASHR i64:x, 32) -> 23035ffd83dbSDimitry Andric // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31) 23045ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi }); 23055ffd83dbSDimitry Andric } else if (ShiftVal == Size - 1) { 23065ffd83dbSDimitry Andric // Don't need a second shift. 23075ffd83dbSDimitry Andric // (G_ASHR i64:x, 63) -> 23085ffd83dbSDimitry Andric // %narrowed = (G_ASHR hi_32(x), 31) 23095ffd83dbSDimitry Andric // G_MERGE_VALUES %narrowed, %narrowed 23105ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Hi, Hi }); 23115ffd83dbSDimitry Andric } else { 23125ffd83dbSDimitry Andric auto Lo = Builder.buildAShr( 23135ffd83dbSDimitry Andric HalfTy, Unmerge.getReg(1), 23145ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, ShiftVal - HalfSize)); 23155ffd83dbSDimitry Andric 23165ffd83dbSDimitry Andric // (G_ASHR i64:x, C) ->, for C >= 32 23175ffd83dbSDimitry Andric // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31) 23185ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Lo, Hi }); 23195ffd83dbSDimitry Andric } 23205ffd83dbSDimitry Andric } 23215ffd83dbSDimitry Andric 23225ffd83dbSDimitry Andric MI.eraseFromParent(); 23235ffd83dbSDimitry Andric } 23245ffd83dbSDimitry Andric 23255ffd83dbSDimitry Andric bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI, 23265ffd83dbSDimitry Andric unsigned TargetShiftAmount) { 23275ffd83dbSDimitry Andric unsigned ShiftAmt; 23285ffd83dbSDimitry Andric if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) { 23295ffd83dbSDimitry Andric applyCombineShiftToUnmerge(MI, ShiftAmt); 23305ffd83dbSDimitry Andric return true; 23315ffd83dbSDimitry Andric } 23325ffd83dbSDimitry Andric 23335ffd83dbSDimitry Andric return false; 23345ffd83dbSDimitry Andric } 23355ffd83dbSDimitry Andric 2336e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) { 2337e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); 2338e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2339e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2340e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2341e8d8bef9SDimitry Andric return mi_match(SrcReg, MRI, 2342e8d8bef9SDimitry Andric m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg)))); 2343e8d8bef9SDimitry Andric } 2344e8d8bef9SDimitry Andric 2345*fe6060f1SDimitry Andric void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) { 2346e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); 2347e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2348e8d8bef9SDimitry Andric Builder.setInstr(MI); 2349e8d8bef9SDimitry Andric Builder.buildCopy(DstReg, Reg); 2350e8d8bef9SDimitry Andric MI.eraseFromParent(); 2351e8d8bef9SDimitry Andric } 2352e8d8bef9SDimitry Andric 2353e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) { 2354e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); 2355e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2356e8d8bef9SDimitry Andric return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg))); 2357e8d8bef9SDimitry Andric } 2358e8d8bef9SDimitry Andric 2359*fe6060f1SDimitry Andric void CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) { 2360e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); 2361e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2362e8d8bef9SDimitry Andric Builder.setInstr(MI); 2363e8d8bef9SDimitry Andric Builder.buildZExtOrTrunc(DstReg, Reg); 2364e8d8bef9SDimitry Andric MI.eraseFromParent(); 2365e8d8bef9SDimitry Andric } 2366e8d8bef9SDimitry Andric 2367e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineAddP2IToPtrAdd( 2368e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, bool> &PtrReg) { 2369e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ADD); 2370e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 2371e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 2372e8d8bef9SDimitry Andric LLT IntTy = MRI.getType(LHS); 2373e8d8bef9SDimitry Andric 2374e8d8bef9SDimitry Andric // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the 2375e8d8bef9SDimitry Andric // instruction. 2376e8d8bef9SDimitry Andric PtrReg.second = false; 2377e8d8bef9SDimitry Andric for (Register SrcReg : {LHS, RHS}) { 2378e8d8bef9SDimitry Andric if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) { 2379e8d8bef9SDimitry Andric // Don't handle cases where the integer is implicitly converted to the 2380e8d8bef9SDimitry Andric // pointer width. 2381e8d8bef9SDimitry Andric LLT PtrTy = MRI.getType(PtrReg.first); 2382e8d8bef9SDimitry Andric if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits()) 2383e8d8bef9SDimitry Andric return true; 2384e8d8bef9SDimitry Andric } 2385e8d8bef9SDimitry Andric 2386e8d8bef9SDimitry Andric PtrReg.second = true; 2387e8d8bef9SDimitry Andric } 2388e8d8bef9SDimitry Andric 2389e8d8bef9SDimitry Andric return false; 2390e8d8bef9SDimitry Andric } 2391e8d8bef9SDimitry Andric 2392*fe6060f1SDimitry Andric void CombinerHelper::applyCombineAddP2IToPtrAdd( 2393e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, bool> &PtrReg) { 2394e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 2395e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 2396e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 2397e8d8bef9SDimitry Andric 2398e8d8bef9SDimitry Andric const bool DoCommute = PtrReg.second; 2399e8d8bef9SDimitry Andric if (DoCommute) 2400e8d8bef9SDimitry Andric std::swap(LHS, RHS); 2401e8d8bef9SDimitry Andric LHS = PtrReg.first; 2402e8d8bef9SDimitry Andric 2403e8d8bef9SDimitry Andric LLT PtrTy = MRI.getType(LHS); 2404e8d8bef9SDimitry Andric 2405e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2406e8d8bef9SDimitry Andric auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS); 2407e8d8bef9SDimitry Andric Builder.buildPtrToInt(Dst, PtrAdd); 2408e8d8bef9SDimitry Andric MI.eraseFromParent(); 2409e8d8bef9SDimitry Andric } 2410e8d8bef9SDimitry Andric 2411e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, 2412e8d8bef9SDimitry Andric int64_t &NewCst) { 2413e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); 2414e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 2415e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 2416e8d8bef9SDimitry Andric MachineRegisterInfo &MRI = Builder.getMF().getRegInfo(); 2417e8d8bef9SDimitry Andric 2418e8d8bef9SDimitry Andric if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) { 2419e8d8bef9SDimitry Andric int64_t Cst; 2420e8d8bef9SDimitry Andric if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) { 2421e8d8bef9SDimitry Andric NewCst = Cst + *RHSCst; 2422e8d8bef9SDimitry Andric return true; 2423e8d8bef9SDimitry Andric } 2424e8d8bef9SDimitry Andric } 2425e8d8bef9SDimitry Andric 2426e8d8bef9SDimitry Andric return false; 2427e8d8bef9SDimitry Andric } 2428e8d8bef9SDimitry Andric 2429*fe6060f1SDimitry Andric void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI, 2430e8d8bef9SDimitry Andric int64_t &NewCst) { 2431e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); 2432e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 2433e8d8bef9SDimitry Andric 2434e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2435e8d8bef9SDimitry Andric Builder.buildConstant(Dst, NewCst); 2436e8d8bef9SDimitry Andric MI.eraseFromParent(); 2437e8d8bef9SDimitry Andric } 2438e8d8bef9SDimitry Andric 2439e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { 2440e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT"); 2441e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2442e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2443e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2444e8d8bef9SDimitry Andric return mi_match(SrcReg, MRI, 2445e8d8bef9SDimitry Andric m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))); 2446e8d8bef9SDimitry Andric } 2447e8d8bef9SDimitry Andric 2448*fe6060f1SDimitry Andric bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI, Register &Reg) { 2449*fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT"); 2450e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2451*fe6060f1SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2452*fe6060f1SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2453*fe6060f1SDimitry Andric if (mi_match(SrcReg, MRI, 2454*fe6060f1SDimitry Andric m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) { 2455*fe6060f1SDimitry Andric unsigned DstSize = DstTy.getScalarSizeInBits(); 2456*fe6060f1SDimitry Andric unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits(); 2457*fe6060f1SDimitry Andric return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize; 2458*fe6060f1SDimitry Andric } 2459*fe6060f1SDimitry Andric return false; 2460e8d8bef9SDimitry Andric } 2461e8d8bef9SDimitry Andric 2462e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineExtOfExt( 2463e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 2464e8d8bef9SDimitry Andric assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || 2465e8d8bef9SDimitry Andric MI.getOpcode() == TargetOpcode::G_SEXT || 2466e8d8bef9SDimitry Andric MI.getOpcode() == TargetOpcode::G_ZEXT) && 2467e8d8bef9SDimitry Andric "Expected a G_[ASZ]EXT"); 2468e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2469e8d8bef9SDimitry Andric MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); 2470e8d8bef9SDimitry Andric // Match exts with the same opcode, anyext([sz]ext) and sext(zext). 2471e8d8bef9SDimitry Andric unsigned Opc = MI.getOpcode(); 2472e8d8bef9SDimitry Andric unsigned SrcOpc = SrcMI->getOpcode(); 2473e8d8bef9SDimitry Andric if (Opc == SrcOpc || 2474e8d8bef9SDimitry Andric (Opc == TargetOpcode::G_ANYEXT && 2475e8d8bef9SDimitry Andric (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) || 2476e8d8bef9SDimitry Andric (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) { 2477e8d8bef9SDimitry Andric MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc); 2478e8d8bef9SDimitry Andric return true; 2479e8d8bef9SDimitry Andric } 2480e8d8bef9SDimitry Andric return false; 2481e8d8bef9SDimitry Andric } 2482e8d8bef9SDimitry Andric 2483*fe6060f1SDimitry Andric void CombinerHelper::applyCombineExtOfExt( 2484e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 2485e8d8bef9SDimitry Andric assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || 2486e8d8bef9SDimitry Andric MI.getOpcode() == TargetOpcode::G_SEXT || 2487e8d8bef9SDimitry Andric MI.getOpcode() == TargetOpcode::G_ZEXT) && 2488e8d8bef9SDimitry Andric "Expected a G_[ASZ]EXT"); 2489e8d8bef9SDimitry Andric 2490e8d8bef9SDimitry Andric Register Reg = std::get<0>(MatchInfo); 2491e8d8bef9SDimitry Andric unsigned SrcExtOp = std::get<1>(MatchInfo); 2492e8d8bef9SDimitry Andric 2493e8d8bef9SDimitry Andric // Combine exts with the same opcode. 2494e8d8bef9SDimitry Andric if (MI.getOpcode() == SrcExtOp) { 2495e8d8bef9SDimitry Andric Observer.changingInstr(MI); 2496e8d8bef9SDimitry Andric MI.getOperand(1).setReg(Reg); 2497e8d8bef9SDimitry Andric Observer.changedInstr(MI); 2498*fe6060f1SDimitry Andric return; 2499e8d8bef9SDimitry Andric } 2500e8d8bef9SDimitry Andric 2501e8d8bef9SDimitry Andric // Combine: 2502e8d8bef9SDimitry Andric // - anyext([sz]ext x) to [sz]ext x 2503e8d8bef9SDimitry Andric // - sext(zext x) to zext x 2504e8d8bef9SDimitry Andric if (MI.getOpcode() == TargetOpcode::G_ANYEXT || 2505e8d8bef9SDimitry Andric (MI.getOpcode() == TargetOpcode::G_SEXT && 2506e8d8bef9SDimitry Andric SrcExtOp == TargetOpcode::G_ZEXT)) { 2507e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2508e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2509e8d8bef9SDimitry Andric Builder.buildInstr(SrcExtOp, {DstReg}, {Reg}); 2510e8d8bef9SDimitry Andric MI.eraseFromParent(); 2511*fe6060f1SDimitry Andric } 2512e8d8bef9SDimitry Andric } 2513e8d8bef9SDimitry Andric 2514*fe6060f1SDimitry Andric void CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) { 2515e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); 2516e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2517e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2518e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2519e8d8bef9SDimitry Andric 2520e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2521e8d8bef9SDimitry Andric Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg, 2522e8d8bef9SDimitry Andric MI.getFlags()); 2523e8d8bef9SDimitry Andric MI.eraseFromParent(); 2524e8d8bef9SDimitry Andric } 2525e8d8bef9SDimitry Andric 2526e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) { 2527e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG"); 2528e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2529e8d8bef9SDimitry Andric return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg))); 2530e8d8bef9SDimitry Andric } 2531e8d8bef9SDimitry Andric 2532e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { 2533e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); 2534e8d8bef9SDimitry Andric Src = MI.getOperand(1).getReg(); 2535e8d8bef9SDimitry Andric Register AbsSrc; 2536e8d8bef9SDimitry Andric return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc))); 2537e8d8bef9SDimitry Andric } 2538e8d8bef9SDimitry Andric 2539e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineTruncOfExt( 2540e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { 2541e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 2542e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2543e8d8bef9SDimitry Andric MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); 2544e8d8bef9SDimitry Andric unsigned SrcOpc = SrcMI->getOpcode(); 2545e8d8bef9SDimitry Andric if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT || 2546e8d8bef9SDimitry Andric SrcOpc == TargetOpcode::G_ZEXT) { 2547e8d8bef9SDimitry Andric MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc); 2548e8d8bef9SDimitry Andric return true; 2549e8d8bef9SDimitry Andric } 2550e8d8bef9SDimitry Andric return false; 2551e8d8bef9SDimitry Andric } 2552e8d8bef9SDimitry Andric 2553*fe6060f1SDimitry Andric void CombinerHelper::applyCombineTruncOfExt( 2554e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { 2555e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 2556e8d8bef9SDimitry Andric Register SrcReg = MatchInfo.first; 2557e8d8bef9SDimitry Andric unsigned SrcExtOp = MatchInfo.second; 2558e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2559e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 2560e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2561e8d8bef9SDimitry Andric if (SrcTy == DstTy) { 2562e8d8bef9SDimitry Andric MI.eraseFromParent(); 2563e8d8bef9SDimitry Andric replaceRegWith(MRI, DstReg, SrcReg); 2564*fe6060f1SDimitry Andric return; 2565e8d8bef9SDimitry Andric } 2566e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2567e8d8bef9SDimitry Andric if (SrcTy.getSizeInBits() < DstTy.getSizeInBits()) 2568e8d8bef9SDimitry Andric Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg}); 2569e8d8bef9SDimitry Andric else 2570e8d8bef9SDimitry Andric Builder.buildTrunc(DstReg, SrcReg); 2571e8d8bef9SDimitry Andric MI.eraseFromParent(); 2572e8d8bef9SDimitry Andric } 2573e8d8bef9SDimitry Andric 2574e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineTruncOfShl( 2575e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 2576e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 2577e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2578e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2579e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2580e8d8bef9SDimitry Andric Register ShiftSrc; 2581e8d8bef9SDimitry Andric Register ShiftAmt; 2582e8d8bef9SDimitry Andric 2583e8d8bef9SDimitry Andric if (MRI.hasOneNonDBGUse(SrcReg) && 2584e8d8bef9SDimitry Andric mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) && 2585e8d8bef9SDimitry Andric isLegalOrBeforeLegalizer( 2586e8d8bef9SDimitry Andric {TargetOpcode::G_SHL, 2587e8d8bef9SDimitry Andric {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) { 2588e8d8bef9SDimitry Andric KnownBits Known = KB->getKnownBits(ShiftAmt); 2589e8d8bef9SDimitry Andric unsigned Size = DstTy.getSizeInBits(); 2590e8d8bef9SDimitry Andric if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { 2591e8d8bef9SDimitry Andric MatchInfo = std::make_pair(ShiftSrc, ShiftAmt); 2592e8d8bef9SDimitry Andric return true; 2593e8d8bef9SDimitry Andric } 2594e8d8bef9SDimitry Andric } 2595e8d8bef9SDimitry Andric return false; 2596e8d8bef9SDimitry Andric } 2597e8d8bef9SDimitry Andric 2598*fe6060f1SDimitry Andric void CombinerHelper::applyCombineTruncOfShl( 2599e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 2600e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 2601e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2602e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2603e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2604e8d8bef9SDimitry Andric MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); 2605e8d8bef9SDimitry Andric 2606e8d8bef9SDimitry Andric Register ShiftSrc = MatchInfo.first; 2607e8d8bef9SDimitry Andric Register ShiftAmt = MatchInfo.second; 2608e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2609e8d8bef9SDimitry Andric auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc); 2610e8d8bef9SDimitry Andric Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags()); 2611e8d8bef9SDimitry Andric MI.eraseFromParent(); 2612e8d8bef9SDimitry Andric } 2613e8d8bef9SDimitry Andric 26145ffd83dbSDimitry Andric bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { 26155ffd83dbSDimitry Andric return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) { 26165ffd83dbSDimitry Andric return MO.isReg() && 26175ffd83dbSDimitry Andric getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); 26185ffd83dbSDimitry Andric }); 26195ffd83dbSDimitry Andric } 26205ffd83dbSDimitry Andric 26215ffd83dbSDimitry Andric bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) { 26225ffd83dbSDimitry Andric return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) { 26235ffd83dbSDimitry Andric return !MO.isReg() || 26245ffd83dbSDimitry Andric getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); 26255ffd83dbSDimitry Andric }); 26265ffd83dbSDimitry Andric } 26275ffd83dbSDimitry Andric 26285ffd83dbSDimitry Andric bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) { 26295ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 26305ffd83dbSDimitry Andric ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 26315ffd83dbSDimitry Andric return all_of(Mask, [](int Elt) { return Elt < 0; }); 26325ffd83dbSDimitry Andric } 26335ffd83dbSDimitry Andric 26345ffd83dbSDimitry Andric bool CombinerHelper::matchUndefStore(MachineInstr &MI) { 26355ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_STORE); 26365ffd83dbSDimitry Andric return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(), 26375ffd83dbSDimitry Andric MRI); 26385ffd83dbSDimitry Andric } 26395ffd83dbSDimitry Andric 2640e8d8bef9SDimitry Andric bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) { 2641e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SELECT); 2642e8d8bef9SDimitry Andric return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(), 2643e8d8bef9SDimitry Andric MRI); 2644e8d8bef9SDimitry Andric } 2645e8d8bef9SDimitry Andric 2646e8d8bef9SDimitry Andric bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) { 2647e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SELECT); 2648e8d8bef9SDimitry Andric if (auto MaybeCstCmp = 2649e8d8bef9SDimitry Andric getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) { 2650e8d8bef9SDimitry Andric OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2; 2651e8d8bef9SDimitry Andric return true; 2652e8d8bef9SDimitry Andric } 2653e8d8bef9SDimitry Andric return false; 2654e8d8bef9SDimitry Andric } 2655e8d8bef9SDimitry Andric 26565ffd83dbSDimitry Andric bool CombinerHelper::eraseInst(MachineInstr &MI) { 26575ffd83dbSDimitry Andric MI.eraseFromParent(); 26585ffd83dbSDimitry Andric return true; 26595ffd83dbSDimitry Andric } 26605ffd83dbSDimitry Andric 26615ffd83dbSDimitry Andric bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, 26625ffd83dbSDimitry Andric const MachineOperand &MOP2) { 26635ffd83dbSDimitry Andric if (!MOP1.isReg() || !MOP2.isReg()) 26645ffd83dbSDimitry Andric return false; 26655ffd83dbSDimitry Andric MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI); 26665ffd83dbSDimitry Andric if (!I1) 26675ffd83dbSDimitry Andric return false; 26685ffd83dbSDimitry Andric MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI); 26695ffd83dbSDimitry Andric if (!I2) 26705ffd83dbSDimitry Andric return false; 26715ffd83dbSDimitry Andric 26725ffd83dbSDimitry Andric // Handle a case like this: 26735ffd83dbSDimitry Andric // 26745ffd83dbSDimitry Andric // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>) 26755ffd83dbSDimitry Andric // 26765ffd83dbSDimitry Andric // Even though %0 and %1 are produced by the same instruction they are not 26775ffd83dbSDimitry Andric // the same values. 26785ffd83dbSDimitry Andric if (I1 == I2) 26795ffd83dbSDimitry Andric return MOP1.getReg() == MOP2.getReg(); 26805ffd83dbSDimitry Andric 26815ffd83dbSDimitry Andric // If we have an instruction which loads or stores, we can't guarantee that 26825ffd83dbSDimitry Andric // it is identical. 26835ffd83dbSDimitry Andric // 26845ffd83dbSDimitry Andric // For example, we may have 26855ffd83dbSDimitry Andric // 26865ffd83dbSDimitry Andric // %x1 = G_LOAD %addr (load N from @somewhere) 26875ffd83dbSDimitry Andric // ... 26885ffd83dbSDimitry Andric // call @foo 26895ffd83dbSDimitry Andric // ... 26905ffd83dbSDimitry Andric // %x2 = G_LOAD %addr (load N from @somewhere) 26915ffd83dbSDimitry Andric // ... 26925ffd83dbSDimitry Andric // %or = G_OR %x1, %x2 26935ffd83dbSDimitry Andric // 26945ffd83dbSDimitry Andric // It's possible that @foo will modify whatever lives at the address we're 26955ffd83dbSDimitry Andric // loading from. To be safe, let's just assume that all loads and stores 26965ffd83dbSDimitry Andric // are different (unless we have something which is guaranteed to not 26975ffd83dbSDimitry Andric // change.) 26985ffd83dbSDimitry Andric if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr)) 26995ffd83dbSDimitry Andric return false; 27005ffd83dbSDimitry Andric 27015ffd83dbSDimitry Andric // Check for physical registers on the instructions first to avoid cases 27025ffd83dbSDimitry Andric // like this: 27035ffd83dbSDimitry Andric // 27045ffd83dbSDimitry Andric // %a = COPY $physreg 27055ffd83dbSDimitry Andric // ... 27065ffd83dbSDimitry Andric // SOMETHING implicit-def $physreg 27075ffd83dbSDimitry Andric // ... 27085ffd83dbSDimitry Andric // %b = COPY $physreg 27095ffd83dbSDimitry Andric // 27105ffd83dbSDimitry Andric // These copies are not equivalent. 27115ffd83dbSDimitry Andric if (any_of(I1->uses(), [](const MachineOperand &MO) { 27125ffd83dbSDimitry Andric return MO.isReg() && MO.getReg().isPhysical(); 27135ffd83dbSDimitry Andric })) { 27145ffd83dbSDimitry Andric // Check if we have a case like this: 27155ffd83dbSDimitry Andric // 27165ffd83dbSDimitry Andric // %a = COPY $physreg 27175ffd83dbSDimitry Andric // %b = COPY %a 27185ffd83dbSDimitry Andric // 27195ffd83dbSDimitry Andric // In this case, I1 and I2 will both be equal to %a = COPY $physreg. 27205ffd83dbSDimitry Andric // From that, we know that they must have the same value, since they must 27215ffd83dbSDimitry Andric // have come from the same COPY. 27225ffd83dbSDimitry Andric return I1->isIdenticalTo(*I2); 27235ffd83dbSDimitry Andric } 27245ffd83dbSDimitry Andric 27255ffd83dbSDimitry Andric // We don't have any physical registers, so we don't necessarily need the 27265ffd83dbSDimitry Andric // same vreg defs. 27275ffd83dbSDimitry Andric // 27285ffd83dbSDimitry Andric // On the off-chance that there's some target instruction feeding into the 27295ffd83dbSDimitry Andric // instruction, let's use produceSameValue instead of isIdenticalTo. 27305ffd83dbSDimitry Andric return Builder.getTII().produceSameValue(*I1, *I2, &MRI); 27315ffd83dbSDimitry Andric } 27325ffd83dbSDimitry Andric 27335ffd83dbSDimitry Andric bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) { 27345ffd83dbSDimitry Andric if (!MOP.isReg()) 27355ffd83dbSDimitry Andric return false; 27365ffd83dbSDimitry Andric // MIPatternMatch doesn't let us look through G_ZEXT etc. 27375ffd83dbSDimitry Andric auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI); 27385ffd83dbSDimitry Andric return ValAndVReg && ValAndVReg->Value == C; 27395ffd83dbSDimitry Andric } 27405ffd83dbSDimitry Andric 27415ffd83dbSDimitry Andric bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, 27425ffd83dbSDimitry Andric unsigned OpIdx) { 27435ffd83dbSDimitry Andric assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); 27445ffd83dbSDimitry Andric Register OldReg = MI.getOperand(0).getReg(); 27455ffd83dbSDimitry Andric Register Replacement = MI.getOperand(OpIdx).getReg(); 27465ffd83dbSDimitry Andric assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); 27475ffd83dbSDimitry Andric MI.eraseFromParent(); 27485ffd83dbSDimitry Andric replaceRegWith(MRI, OldReg, Replacement); 27495ffd83dbSDimitry Andric return true; 27505ffd83dbSDimitry Andric } 27515ffd83dbSDimitry Andric 2752e8d8bef9SDimitry Andric bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI, 2753e8d8bef9SDimitry Andric Register Replacement) { 2754e8d8bef9SDimitry Andric assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); 2755e8d8bef9SDimitry Andric Register OldReg = MI.getOperand(0).getReg(); 2756e8d8bef9SDimitry Andric assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); 2757e8d8bef9SDimitry Andric MI.eraseFromParent(); 2758e8d8bef9SDimitry Andric replaceRegWith(MRI, OldReg, Replacement); 2759e8d8bef9SDimitry Andric return true; 2760e8d8bef9SDimitry Andric } 2761e8d8bef9SDimitry Andric 27625ffd83dbSDimitry Andric bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { 27635ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SELECT); 27645ffd83dbSDimitry Andric // Match (cond ? x : x) 27655ffd83dbSDimitry Andric return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) && 27665ffd83dbSDimitry Andric canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(), 27675ffd83dbSDimitry Andric MRI); 27685ffd83dbSDimitry Andric } 27695ffd83dbSDimitry Andric 27705ffd83dbSDimitry Andric bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) { 27715ffd83dbSDimitry Andric return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) && 27725ffd83dbSDimitry Andric canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 27735ffd83dbSDimitry Andric MRI); 27745ffd83dbSDimitry Andric } 27755ffd83dbSDimitry Andric 27765ffd83dbSDimitry Andric bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) { 27775ffd83dbSDimitry Andric return matchConstantOp(MI.getOperand(OpIdx), 0) && 27785ffd83dbSDimitry Andric canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(), 27795ffd83dbSDimitry Andric MRI); 27805ffd83dbSDimitry Andric } 27815ffd83dbSDimitry Andric 2782e8d8bef9SDimitry Andric bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) { 2783e8d8bef9SDimitry Andric MachineOperand &MO = MI.getOperand(OpIdx); 2784e8d8bef9SDimitry Andric return MO.isReg() && 2785e8d8bef9SDimitry Andric getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); 2786e8d8bef9SDimitry Andric } 2787e8d8bef9SDimitry Andric 2788e8d8bef9SDimitry Andric bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, 2789e8d8bef9SDimitry Andric unsigned OpIdx) { 2790e8d8bef9SDimitry Andric MachineOperand &MO = MI.getOperand(OpIdx); 2791e8d8bef9SDimitry Andric return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB); 2792e8d8bef9SDimitry Andric } 2793e8d8bef9SDimitry Andric 27945ffd83dbSDimitry Andric bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { 27955ffd83dbSDimitry Andric assert(MI.getNumDefs() == 1 && "Expected only one def?"); 27965ffd83dbSDimitry Andric Builder.setInstr(MI); 27975ffd83dbSDimitry Andric Builder.buildFConstant(MI.getOperand(0), C); 27985ffd83dbSDimitry Andric MI.eraseFromParent(); 27995ffd83dbSDimitry Andric return true; 28005ffd83dbSDimitry Andric } 28015ffd83dbSDimitry Andric 28025ffd83dbSDimitry Andric bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) { 28035ffd83dbSDimitry Andric assert(MI.getNumDefs() == 1 && "Expected only one def?"); 28045ffd83dbSDimitry Andric Builder.setInstr(MI); 28055ffd83dbSDimitry Andric Builder.buildConstant(MI.getOperand(0), C); 28065ffd83dbSDimitry Andric MI.eraseFromParent(); 28075ffd83dbSDimitry Andric return true; 28085ffd83dbSDimitry Andric } 28095ffd83dbSDimitry Andric 2810*fe6060f1SDimitry Andric bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) { 2811*fe6060f1SDimitry Andric assert(MI.getNumDefs() == 1 && "Expected only one def?"); 2812*fe6060f1SDimitry Andric Builder.setInstr(MI); 2813*fe6060f1SDimitry Andric Builder.buildConstant(MI.getOperand(0), C); 2814*fe6060f1SDimitry Andric MI.eraseFromParent(); 2815*fe6060f1SDimitry Andric return true; 2816*fe6060f1SDimitry Andric } 2817*fe6060f1SDimitry Andric 28185ffd83dbSDimitry Andric bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) { 28195ffd83dbSDimitry Andric assert(MI.getNumDefs() == 1 && "Expected only one def?"); 28205ffd83dbSDimitry Andric Builder.setInstr(MI); 28215ffd83dbSDimitry Andric Builder.buildUndef(MI.getOperand(0)); 28225ffd83dbSDimitry Andric MI.eraseFromParent(); 28235ffd83dbSDimitry Andric return true; 28245ffd83dbSDimitry Andric } 28255ffd83dbSDimitry Andric 28265ffd83dbSDimitry Andric bool CombinerHelper::matchSimplifyAddToSub( 28275ffd83dbSDimitry Andric MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { 28285ffd83dbSDimitry Andric Register LHS = MI.getOperand(1).getReg(); 28295ffd83dbSDimitry Andric Register RHS = MI.getOperand(2).getReg(); 28305ffd83dbSDimitry Andric Register &NewLHS = std::get<0>(MatchInfo); 28315ffd83dbSDimitry Andric Register &NewRHS = std::get<1>(MatchInfo); 28325ffd83dbSDimitry Andric 28335ffd83dbSDimitry Andric // Helper lambda to check for opportunities for 28345ffd83dbSDimitry Andric // ((0-A) + B) -> B - A 28355ffd83dbSDimitry Andric // (A + (0-B)) -> A - B 28365ffd83dbSDimitry Andric auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) { 2837e8d8bef9SDimitry Andric if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS)))) 28385ffd83dbSDimitry Andric return false; 28395ffd83dbSDimitry Andric NewLHS = MaybeNewLHS; 28405ffd83dbSDimitry Andric return true; 28415ffd83dbSDimitry Andric }; 28425ffd83dbSDimitry Andric 28435ffd83dbSDimitry Andric return CheckFold(LHS, RHS) || CheckFold(RHS, LHS); 28445ffd83dbSDimitry Andric } 28455ffd83dbSDimitry Andric 2846e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineInsertVecElts( 2847e8d8bef9SDimitry Andric MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { 2848e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT && 2849e8d8bef9SDimitry Andric "Invalid opcode"); 2850e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2851e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2852e8d8bef9SDimitry Andric assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?"); 2853e8d8bef9SDimitry Andric unsigned NumElts = DstTy.getNumElements(); 2854e8d8bef9SDimitry Andric // If this MI is part of a sequence of insert_vec_elts, then 2855e8d8bef9SDimitry Andric // don't do the combine in the middle of the sequence. 2856e8d8bef9SDimitry Andric if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() == 2857e8d8bef9SDimitry Andric TargetOpcode::G_INSERT_VECTOR_ELT) 2858e8d8bef9SDimitry Andric return false; 2859e8d8bef9SDimitry Andric MachineInstr *CurrInst = &MI; 2860e8d8bef9SDimitry Andric MachineInstr *TmpInst; 2861e8d8bef9SDimitry Andric int64_t IntImm; 2862e8d8bef9SDimitry Andric Register TmpReg; 2863e8d8bef9SDimitry Andric MatchInfo.resize(NumElts); 2864e8d8bef9SDimitry Andric while (mi_match( 2865e8d8bef9SDimitry Andric CurrInst->getOperand(0).getReg(), MRI, 2866e8d8bef9SDimitry Andric m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) { 2867e8d8bef9SDimitry Andric if (IntImm >= NumElts) 2868e8d8bef9SDimitry Andric return false; 2869e8d8bef9SDimitry Andric if (!MatchInfo[IntImm]) 2870e8d8bef9SDimitry Andric MatchInfo[IntImm] = TmpReg; 2871e8d8bef9SDimitry Andric CurrInst = TmpInst; 2872e8d8bef9SDimitry Andric } 2873e8d8bef9SDimitry Andric // Variable index. 2874e8d8bef9SDimitry Andric if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) 2875e8d8bef9SDimitry Andric return false; 2876e8d8bef9SDimitry Andric if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) { 2877e8d8bef9SDimitry Andric for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) { 2878e8d8bef9SDimitry Andric if (!MatchInfo[I - 1].isValid()) 2879e8d8bef9SDimitry Andric MatchInfo[I - 1] = TmpInst->getOperand(I).getReg(); 2880e8d8bef9SDimitry Andric } 2881e8d8bef9SDimitry Andric return true; 2882e8d8bef9SDimitry Andric } 2883e8d8bef9SDimitry Andric // If we didn't end in a G_IMPLICIT_DEF, bail out. 2884e8d8bef9SDimitry Andric return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF; 2885e8d8bef9SDimitry Andric } 2886e8d8bef9SDimitry Andric 2887*fe6060f1SDimitry Andric void CombinerHelper::applyCombineInsertVecElts( 2888e8d8bef9SDimitry Andric MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { 2889e8d8bef9SDimitry Andric Builder.setInstr(MI); 2890e8d8bef9SDimitry Andric Register UndefReg; 2891e8d8bef9SDimitry Andric auto GetUndef = [&]() { 2892e8d8bef9SDimitry Andric if (UndefReg) 2893e8d8bef9SDimitry Andric return UndefReg; 2894e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 2895e8d8bef9SDimitry Andric UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0); 2896e8d8bef9SDimitry Andric return UndefReg; 2897e8d8bef9SDimitry Andric }; 2898e8d8bef9SDimitry Andric for (unsigned I = 0; I < MatchInfo.size(); ++I) { 2899e8d8bef9SDimitry Andric if (!MatchInfo[I]) 2900e8d8bef9SDimitry Andric MatchInfo[I] = GetUndef(); 2901e8d8bef9SDimitry Andric } 2902e8d8bef9SDimitry Andric Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo); 2903e8d8bef9SDimitry Andric MI.eraseFromParent(); 2904e8d8bef9SDimitry Andric } 2905e8d8bef9SDimitry Andric 2906*fe6060f1SDimitry Andric void CombinerHelper::applySimplifyAddToSub( 29075ffd83dbSDimitry Andric MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { 29085ffd83dbSDimitry Andric Builder.setInstr(MI); 29095ffd83dbSDimitry Andric Register SubLHS, SubRHS; 29105ffd83dbSDimitry Andric std::tie(SubLHS, SubRHS) = MatchInfo; 29115ffd83dbSDimitry Andric Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS); 29125ffd83dbSDimitry Andric MI.eraseFromParent(); 29135ffd83dbSDimitry Andric } 29145ffd83dbSDimitry Andric 2915e8d8bef9SDimitry Andric bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( 2916e8d8bef9SDimitry Andric MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { 2917e8d8bef9SDimitry Andric // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ... 2918e8d8bef9SDimitry Andric // 2919e8d8bef9SDimitry Andric // Creates the new hand + logic instruction (but does not insert them.) 2920e8d8bef9SDimitry Andric // 2921e8d8bef9SDimitry Andric // On success, MatchInfo is populated with the new instructions. These are 2922e8d8bef9SDimitry Andric // inserted in applyHoistLogicOpWithSameOpcodeHands. 2923e8d8bef9SDimitry Andric unsigned LogicOpcode = MI.getOpcode(); 2924e8d8bef9SDimitry Andric assert(LogicOpcode == TargetOpcode::G_AND || 2925e8d8bef9SDimitry Andric LogicOpcode == TargetOpcode::G_OR || 2926e8d8bef9SDimitry Andric LogicOpcode == TargetOpcode::G_XOR); 2927e8d8bef9SDimitry Andric MachineIRBuilder MIB(MI); 2928e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 2929e8d8bef9SDimitry Andric Register LHSReg = MI.getOperand(1).getReg(); 2930e8d8bef9SDimitry Andric Register RHSReg = MI.getOperand(2).getReg(); 2931e8d8bef9SDimitry Andric 2932e8d8bef9SDimitry Andric // Don't recompute anything. 2933e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg)) 2934e8d8bef9SDimitry Andric return false; 2935e8d8bef9SDimitry Andric 2936e8d8bef9SDimitry Andric // Make sure we have (hand x, ...), (hand y, ...) 2937e8d8bef9SDimitry Andric MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI); 2938e8d8bef9SDimitry Andric MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI); 2939e8d8bef9SDimitry Andric if (!LeftHandInst || !RightHandInst) 2940e8d8bef9SDimitry Andric return false; 2941e8d8bef9SDimitry Andric unsigned HandOpcode = LeftHandInst->getOpcode(); 2942e8d8bef9SDimitry Andric if (HandOpcode != RightHandInst->getOpcode()) 2943e8d8bef9SDimitry Andric return false; 2944e8d8bef9SDimitry Andric if (!LeftHandInst->getOperand(1).isReg() || 2945e8d8bef9SDimitry Andric !RightHandInst->getOperand(1).isReg()) 2946e8d8bef9SDimitry Andric return false; 2947e8d8bef9SDimitry Andric 2948e8d8bef9SDimitry Andric // Make sure the types match up, and if we're doing this post-legalization, 2949e8d8bef9SDimitry Andric // we end up with legal types. 2950e8d8bef9SDimitry Andric Register X = LeftHandInst->getOperand(1).getReg(); 2951e8d8bef9SDimitry Andric Register Y = RightHandInst->getOperand(1).getReg(); 2952e8d8bef9SDimitry Andric LLT XTy = MRI.getType(X); 2953e8d8bef9SDimitry Andric LLT YTy = MRI.getType(Y); 2954e8d8bef9SDimitry Andric if (XTy != YTy) 2955e8d8bef9SDimitry Andric return false; 2956e8d8bef9SDimitry Andric if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}})) 2957e8d8bef9SDimitry Andric return false; 2958e8d8bef9SDimitry Andric 2959e8d8bef9SDimitry Andric // Optional extra source register. 2960e8d8bef9SDimitry Andric Register ExtraHandOpSrcReg; 2961e8d8bef9SDimitry Andric switch (HandOpcode) { 2962e8d8bef9SDimitry Andric default: 2963e8d8bef9SDimitry Andric return false; 2964e8d8bef9SDimitry Andric case TargetOpcode::G_ANYEXT: 2965e8d8bef9SDimitry Andric case TargetOpcode::G_SEXT: 2966e8d8bef9SDimitry Andric case TargetOpcode::G_ZEXT: { 2967e8d8bef9SDimitry Andric // Match: logic (ext X), (ext Y) --> ext (logic X, Y) 2968e8d8bef9SDimitry Andric break; 2969e8d8bef9SDimitry Andric } 2970e8d8bef9SDimitry Andric case TargetOpcode::G_AND: 2971e8d8bef9SDimitry Andric case TargetOpcode::G_ASHR: 2972e8d8bef9SDimitry Andric case TargetOpcode::G_LSHR: 2973e8d8bef9SDimitry Andric case TargetOpcode::G_SHL: { 2974e8d8bef9SDimitry Andric // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z 2975e8d8bef9SDimitry Andric MachineOperand &ZOp = LeftHandInst->getOperand(2); 2976e8d8bef9SDimitry Andric if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2))) 2977e8d8bef9SDimitry Andric return false; 2978e8d8bef9SDimitry Andric ExtraHandOpSrcReg = ZOp.getReg(); 2979e8d8bef9SDimitry Andric break; 2980e8d8bef9SDimitry Andric } 2981e8d8bef9SDimitry Andric } 2982e8d8bef9SDimitry Andric 2983e8d8bef9SDimitry Andric // Record the steps to build the new instructions. 2984e8d8bef9SDimitry Andric // 2985e8d8bef9SDimitry Andric // Steps to build (logic x, y) 2986e8d8bef9SDimitry Andric auto NewLogicDst = MRI.createGenericVirtualRegister(XTy); 2987e8d8bef9SDimitry Andric OperandBuildSteps LogicBuildSteps = { 2988e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); }, 2989e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(X); }, 2990e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }}; 2991e8d8bef9SDimitry Andric InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps); 2992e8d8bef9SDimitry Andric 2993e8d8bef9SDimitry Andric // Steps to build hand (logic x, y), ...z 2994e8d8bef9SDimitry Andric OperandBuildSteps HandBuildSteps = { 2995e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); }, 2996e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }}; 2997e8d8bef9SDimitry Andric if (ExtraHandOpSrcReg.isValid()) 2998e8d8bef9SDimitry Andric HandBuildSteps.push_back( 2999e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); }); 3000e8d8bef9SDimitry Andric InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps); 3001e8d8bef9SDimitry Andric 3002e8d8bef9SDimitry Andric MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps}); 3003e8d8bef9SDimitry Andric return true; 3004e8d8bef9SDimitry Andric } 3005e8d8bef9SDimitry Andric 3006*fe6060f1SDimitry Andric void CombinerHelper::applyBuildInstructionSteps( 3007e8d8bef9SDimitry Andric MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { 3008e8d8bef9SDimitry Andric assert(MatchInfo.InstrsToBuild.size() && 3009e8d8bef9SDimitry Andric "Expected at least one instr to build?"); 3010e8d8bef9SDimitry Andric Builder.setInstr(MI); 3011e8d8bef9SDimitry Andric for (auto &InstrToBuild : MatchInfo.InstrsToBuild) { 3012e8d8bef9SDimitry Andric assert(InstrToBuild.Opcode && "Expected a valid opcode?"); 3013e8d8bef9SDimitry Andric assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?"); 3014e8d8bef9SDimitry Andric MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode); 3015e8d8bef9SDimitry Andric for (auto &OperandFn : InstrToBuild.OperandFns) 3016e8d8bef9SDimitry Andric OperandFn(Instr); 3017e8d8bef9SDimitry Andric } 3018e8d8bef9SDimitry Andric MI.eraseFromParent(); 3019e8d8bef9SDimitry Andric } 3020e8d8bef9SDimitry Andric 3021e8d8bef9SDimitry Andric bool CombinerHelper::matchAshrShlToSextInreg( 3022e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { 3023e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ASHR); 3024e8d8bef9SDimitry Andric int64_t ShlCst, AshrCst; 3025e8d8bef9SDimitry Andric Register Src; 3026e8d8bef9SDimitry Andric // FIXME: detect splat constant vectors. 3027e8d8bef9SDimitry Andric if (!mi_match(MI.getOperand(0).getReg(), MRI, 3028e8d8bef9SDimitry Andric m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst)))) 3029e8d8bef9SDimitry Andric return false; 3030e8d8bef9SDimitry Andric if (ShlCst != AshrCst) 3031e8d8bef9SDimitry Andric return false; 3032e8d8bef9SDimitry Andric if (!isLegalOrBeforeLegalizer( 3033e8d8bef9SDimitry Andric {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}})) 3034e8d8bef9SDimitry Andric return false; 3035e8d8bef9SDimitry Andric MatchInfo = std::make_tuple(Src, ShlCst); 3036e8d8bef9SDimitry Andric return true; 3037e8d8bef9SDimitry Andric } 3038*fe6060f1SDimitry Andric 3039*fe6060f1SDimitry Andric void CombinerHelper::applyAshShlToSextInreg( 3040e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { 3041e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ASHR); 3042e8d8bef9SDimitry Andric Register Src; 3043e8d8bef9SDimitry Andric int64_t ShiftAmt; 3044e8d8bef9SDimitry Andric std::tie(Src, ShiftAmt) = MatchInfo; 3045e8d8bef9SDimitry Andric unsigned Size = MRI.getType(Src).getScalarSizeInBits(); 3046e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3047e8d8bef9SDimitry Andric Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt); 3048e8d8bef9SDimitry Andric MI.eraseFromParent(); 3049*fe6060f1SDimitry Andric } 3050*fe6060f1SDimitry Andric 3051*fe6060f1SDimitry Andric /// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0 3052*fe6060f1SDimitry Andric bool CombinerHelper::matchOverlappingAnd( 3053*fe6060f1SDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 3054*fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_AND); 3055*fe6060f1SDimitry Andric 3056*fe6060f1SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 3057*fe6060f1SDimitry Andric LLT Ty = MRI.getType(Dst); 3058*fe6060f1SDimitry Andric 3059*fe6060f1SDimitry Andric Register R; 3060*fe6060f1SDimitry Andric int64_t C1; 3061*fe6060f1SDimitry Andric int64_t C2; 3062*fe6060f1SDimitry Andric if (!mi_match( 3063*fe6060f1SDimitry Andric Dst, MRI, 3064*fe6060f1SDimitry Andric m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2)))) 3065*fe6060f1SDimitry Andric return false; 3066*fe6060f1SDimitry Andric 3067*fe6060f1SDimitry Andric MatchInfo = [=](MachineIRBuilder &B) { 3068*fe6060f1SDimitry Andric if (C1 & C2) { 3069*fe6060f1SDimitry Andric B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2)); 3070*fe6060f1SDimitry Andric return; 3071*fe6060f1SDimitry Andric } 3072*fe6060f1SDimitry Andric auto Zero = B.buildConstant(Ty, 0); 3073*fe6060f1SDimitry Andric replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg()); 3074*fe6060f1SDimitry Andric }; 3075e8d8bef9SDimitry Andric return true; 3076e8d8bef9SDimitry Andric } 3077e8d8bef9SDimitry Andric 3078e8d8bef9SDimitry Andric bool CombinerHelper::matchRedundantAnd(MachineInstr &MI, 3079e8d8bef9SDimitry Andric Register &Replacement) { 3080e8d8bef9SDimitry Andric // Given 3081e8d8bef9SDimitry Andric // 3082e8d8bef9SDimitry Andric // %y:_(sN) = G_SOMETHING 3083e8d8bef9SDimitry Andric // %x:_(sN) = G_SOMETHING 3084e8d8bef9SDimitry Andric // %res:_(sN) = G_AND %x, %y 3085e8d8bef9SDimitry Andric // 3086e8d8bef9SDimitry Andric // Eliminate the G_AND when it is known that x & y == x or x & y == y. 3087e8d8bef9SDimitry Andric // 3088e8d8bef9SDimitry Andric // Patterns like this can appear as a result of legalization. E.g. 3089e8d8bef9SDimitry Andric // 3090e8d8bef9SDimitry Andric // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y 3091e8d8bef9SDimitry Andric // %one:_(s32) = G_CONSTANT i32 1 3092e8d8bef9SDimitry Andric // %and:_(s32) = G_AND %cmp, %one 3093e8d8bef9SDimitry Andric // 3094e8d8bef9SDimitry Andric // In this case, G_ICMP only produces a single bit, so x & 1 == x. 3095e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_AND); 3096e8d8bef9SDimitry Andric if (!KB) 3097e8d8bef9SDimitry Andric return false; 3098e8d8bef9SDimitry Andric 3099e8d8bef9SDimitry Andric Register AndDst = MI.getOperand(0).getReg(); 3100e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(AndDst); 3101e8d8bef9SDimitry Andric 3102e8d8bef9SDimitry Andric // FIXME: This should be removed once GISelKnownBits supports vectors. 3103e8d8bef9SDimitry Andric if (DstTy.isVector()) 3104e8d8bef9SDimitry Andric return false; 3105e8d8bef9SDimitry Andric 3106e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 3107e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 3108e8d8bef9SDimitry Andric KnownBits LHSBits = KB->getKnownBits(LHS); 3109e8d8bef9SDimitry Andric KnownBits RHSBits = KB->getKnownBits(RHS); 3110e8d8bef9SDimitry Andric 3111e8d8bef9SDimitry Andric // Check that x & Mask == x. 3112e8d8bef9SDimitry Andric // x & 1 == x, always 3113e8d8bef9SDimitry Andric // x & 0 == x, only if x is also 0 3114e8d8bef9SDimitry Andric // Meaning Mask has no effect if every bit is either one in Mask or zero in x. 3115e8d8bef9SDimitry Andric // 3116e8d8bef9SDimitry Andric // Check if we can replace AndDst with the LHS of the G_AND 3117e8d8bef9SDimitry Andric if (canReplaceReg(AndDst, LHS, MRI) && 3118e8d8bef9SDimitry Andric (LHSBits.Zero | RHSBits.One).isAllOnesValue()) { 3119e8d8bef9SDimitry Andric Replacement = LHS; 3120e8d8bef9SDimitry Andric return true; 3121e8d8bef9SDimitry Andric } 3122e8d8bef9SDimitry Andric 3123e8d8bef9SDimitry Andric // Check if we can replace AndDst with the RHS of the G_AND 3124e8d8bef9SDimitry Andric if (canReplaceReg(AndDst, RHS, MRI) && 3125e8d8bef9SDimitry Andric (LHSBits.One | RHSBits.Zero).isAllOnesValue()) { 3126e8d8bef9SDimitry Andric Replacement = RHS; 3127e8d8bef9SDimitry Andric return true; 3128e8d8bef9SDimitry Andric } 3129e8d8bef9SDimitry Andric 3130e8d8bef9SDimitry Andric return false; 3131e8d8bef9SDimitry Andric } 3132e8d8bef9SDimitry Andric 3133e8d8bef9SDimitry Andric bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) { 3134e8d8bef9SDimitry Andric // Given 3135e8d8bef9SDimitry Andric // 3136e8d8bef9SDimitry Andric // %y:_(sN) = G_SOMETHING 3137e8d8bef9SDimitry Andric // %x:_(sN) = G_SOMETHING 3138e8d8bef9SDimitry Andric // %res:_(sN) = G_OR %x, %y 3139e8d8bef9SDimitry Andric // 3140e8d8bef9SDimitry Andric // Eliminate the G_OR when it is known that x | y == x or x | y == y. 3141e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_OR); 3142e8d8bef9SDimitry Andric if (!KB) 3143e8d8bef9SDimitry Andric return false; 3144e8d8bef9SDimitry Andric 3145e8d8bef9SDimitry Andric Register OrDst = MI.getOperand(0).getReg(); 3146e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(OrDst); 3147e8d8bef9SDimitry Andric 3148e8d8bef9SDimitry Andric // FIXME: This should be removed once GISelKnownBits supports vectors. 3149e8d8bef9SDimitry Andric if (DstTy.isVector()) 3150e8d8bef9SDimitry Andric return false; 3151e8d8bef9SDimitry Andric 3152e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 3153e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 3154e8d8bef9SDimitry Andric KnownBits LHSBits = KB->getKnownBits(LHS); 3155e8d8bef9SDimitry Andric KnownBits RHSBits = KB->getKnownBits(RHS); 3156e8d8bef9SDimitry Andric 3157e8d8bef9SDimitry Andric // Check that x | Mask == x. 3158e8d8bef9SDimitry Andric // x | 0 == x, always 3159e8d8bef9SDimitry Andric // x | 1 == x, only if x is also 1 3160e8d8bef9SDimitry Andric // Meaning Mask has no effect if every bit is either zero in Mask or one in x. 3161e8d8bef9SDimitry Andric // 3162e8d8bef9SDimitry Andric // Check if we can replace OrDst with the LHS of the G_OR 3163e8d8bef9SDimitry Andric if (canReplaceReg(OrDst, LHS, MRI) && 3164e8d8bef9SDimitry Andric (LHSBits.One | RHSBits.Zero).isAllOnesValue()) { 3165e8d8bef9SDimitry Andric Replacement = LHS; 3166e8d8bef9SDimitry Andric return true; 3167e8d8bef9SDimitry Andric } 3168e8d8bef9SDimitry Andric 3169e8d8bef9SDimitry Andric // Check if we can replace OrDst with the RHS of the G_OR 3170e8d8bef9SDimitry Andric if (canReplaceReg(OrDst, RHS, MRI) && 3171e8d8bef9SDimitry Andric (LHSBits.Zero | RHSBits.One).isAllOnesValue()) { 3172e8d8bef9SDimitry Andric Replacement = RHS; 3173e8d8bef9SDimitry Andric return true; 3174e8d8bef9SDimitry Andric } 3175e8d8bef9SDimitry Andric 3176e8d8bef9SDimitry Andric return false; 3177e8d8bef9SDimitry Andric } 3178e8d8bef9SDimitry Andric 3179e8d8bef9SDimitry Andric bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) { 3180e8d8bef9SDimitry Andric // If the input is already sign extended, just drop the extension. 3181e8d8bef9SDimitry Andric Register Src = MI.getOperand(1).getReg(); 3182e8d8bef9SDimitry Andric unsigned ExtBits = MI.getOperand(2).getImm(); 3183e8d8bef9SDimitry Andric unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits(); 3184e8d8bef9SDimitry Andric return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1); 3185e8d8bef9SDimitry Andric } 3186e8d8bef9SDimitry Andric 3187e8d8bef9SDimitry Andric static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, 3188e8d8bef9SDimitry Andric int64_t Cst, bool IsVector, bool IsFP) { 3189e8d8bef9SDimitry Andric // For i1, Cst will always be -1 regardless of boolean contents. 3190e8d8bef9SDimitry Andric return (ScalarSizeBits == 1 && Cst == -1) || 3191e8d8bef9SDimitry Andric isConstTrueVal(TLI, Cst, IsVector, IsFP); 3192e8d8bef9SDimitry Andric } 3193e8d8bef9SDimitry Andric 3194e8d8bef9SDimitry Andric bool CombinerHelper::matchNotCmp(MachineInstr &MI, 3195e8d8bef9SDimitry Andric SmallVectorImpl<Register> &RegsToNegate) { 3196e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_XOR); 3197e8d8bef9SDimitry Andric LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 3198e8d8bef9SDimitry Andric const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering(); 3199e8d8bef9SDimitry Andric Register XorSrc; 3200e8d8bef9SDimitry Andric Register CstReg; 3201e8d8bef9SDimitry Andric // We match xor(src, true) here. 3202e8d8bef9SDimitry Andric if (!mi_match(MI.getOperand(0).getReg(), MRI, 3203e8d8bef9SDimitry Andric m_GXor(m_Reg(XorSrc), m_Reg(CstReg)))) 3204e8d8bef9SDimitry Andric return false; 3205e8d8bef9SDimitry Andric 3206e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(XorSrc)) 3207e8d8bef9SDimitry Andric return false; 3208e8d8bef9SDimitry Andric 3209e8d8bef9SDimitry Andric // Check that XorSrc is the root of a tree of comparisons combined with ANDs 3210e8d8bef9SDimitry Andric // and ORs. The suffix of RegsToNegate starting from index I is used a work 3211e8d8bef9SDimitry Andric // list of tree nodes to visit. 3212e8d8bef9SDimitry Andric RegsToNegate.push_back(XorSrc); 3213e8d8bef9SDimitry Andric // Remember whether the comparisons are all integer or all floating point. 3214e8d8bef9SDimitry Andric bool IsInt = false; 3215e8d8bef9SDimitry Andric bool IsFP = false; 3216e8d8bef9SDimitry Andric for (unsigned I = 0; I < RegsToNegate.size(); ++I) { 3217e8d8bef9SDimitry Andric Register Reg = RegsToNegate[I]; 3218e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(Reg)) 3219e8d8bef9SDimitry Andric return false; 3220e8d8bef9SDimitry Andric MachineInstr *Def = MRI.getVRegDef(Reg); 3221e8d8bef9SDimitry Andric switch (Def->getOpcode()) { 3222e8d8bef9SDimitry Andric default: 3223e8d8bef9SDimitry Andric // Don't match if the tree contains anything other than ANDs, ORs and 3224e8d8bef9SDimitry Andric // comparisons. 3225e8d8bef9SDimitry Andric return false; 3226e8d8bef9SDimitry Andric case TargetOpcode::G_ICMP: 3227e8d8bef9SDimitry Andric if (IsFP) 3228e8d8bef9SDimitry Andric return false; 3229e8d8bef9SDimitry Andric IsInt = true; 3230e8d8bef9SDimitry Andric // When we apply the combine we will invert the predicate. 3231e8d8bef9SDimitry Andric break; 3232e8d8bef9SDimitry Andric case TargetOpcode::G_FCMP: 3233e8d8bef9SDimitry Andric if (IsInt) 3234e8d8bef9SDimitry Andric return false; 3235e8d8bef9SDimitry Andric IsFP = true; 3236e8d8bef9SDimitry Andric // When we apply the combine we will invert the predicate. 3237e8d8bef9SDimitry Andric break; 3238e8d8bef9SDimitry Andric case TargetOpcode::G_AND: 3239e8d8bef9SDimitry Andric case TargetOpcode::G_OR: 3240e8d8bef9SDimitry Andric // Implement De Morgan's laws: 3241e8d8bef9SDimitry Andric // ~(x & y) -> ~x | ~y 3242e8d8bef9SDimitry Andric // ~(x | y) -> ~x & ~y 3243e8d8bef9SDimitry Andric // When we apply the combine we will change the opcode and recursively 3244e8d8bef9SDimitry Andric // negate the operands. 3245e8d8bef9SDimitry Andric RegsToNegate.push_back(Def->getOperand(1).getReg()); 3246e8d8bef9SDimitry Andric RegsToNegate.push_back(Def->getOperand(2).getReg()); 3247e8d8bef9SDimitry Andric break; 3248e8d8bef9SDimitry Andric } 3249e8d8bef9SDimitry Andric } 3250e8d8bef9SDimitry Andric 3251e8d8bef9SDimitry Andric // Now we know whether the comparisons are integer or floating point, check 3252e8d8bef9SDimitry Andric // the constant in the xor. 3253e8d8bef9SDimitry Andric int64_t Cst; 3254e8d8bef9SDimitry Andric if (Ty.isVector()) { 3255e8d8bef9SDimitry Andric MachineInstr *CstDef = MRI.getVRegDef(CstReg); 3256e8d8bef9SDimitry Andric auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI); 3257e8d8bef9SDimitry Andric if (!MaybeCst) 3258e8d8bef9SDimitry Andric return false; 3259e8d8bef9SDimitry Andric if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP)) 3260e8d8bef9SDimitry Andric return false; 3261e8d8bef9SDimitry Andric } else { 3262e8d8bef9SDimitry Andric if (!mi_match(CstReg, MRI, m_ICst(Cst))) 3263e8d8bef9SDimitry Andric return false; 3264e8d8bef9SDimitry Andric if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP)) 3265e8d8bef9SDimitry Andric return false; 3266e8d8bef9SDimitry Andric } 3267e8d8bef9SDimitry Andric 3268e8d8bef9SDimitry Andric return true; 3269e8d8bef9SDimitry Andric } 3270e8d8bef9SDimitry Andric 3271*fe6060f1SDimitry Andric void CombinerHelper::applyNotCmp(MachineInstr &MI, 3272e8d8bef9SDimitry Andric SmallVectorImpl<Register> &RegsToNegate) { 3273e8d8bef9SDimitry Andric for (Register Reg : RegsToNegate) { 3274e8d8bef9SDimitry Andric MachineInstr *Def = MRI.getVRegDef(Reg); 3275e8d8bef9SDimitry Andric Observer.changingInstr(*Def); 3276e8d8bef9SDimitry Andric // For each comparison, invert the opcode. For each AND and OR, change the 3277e8d8bef9SDimitry Andric // opcode. 3278e8d8bef9SDimitry Andric switch (Def->getOpcode()) { 3279e8d8bef9SDimitry Andric default: 3280e8d8bef9SDimitry Andric llvm_unreachable("Unexpected opcode"); 3281e8d8bef9SDimitry Andric case TargetOpcode::G_ICMP: 3282e8d8bef9SDimitry Andric case TargetOpcode::G_FCMP: { 3283e8d8bef9SDimitry Andric MachineOperand &PredOp = Def->getOperand(1); 3284e8d8bef9SDimitry Andric CmpInst::Predicate NewP = CmpInst::getInversePredicate( 3285e8d8bef9SDimitry Andric (CmpInst::Predicate)PredOp.getPredicate()); 3286e8d8bef9SDimitry Andric PredOp.setPredicate(NewP); 3287e8d8bef9SDimitry Andric break; 3288e8d8bef9SDimitry Andric } 3289e8d8bef9SDimitry Andric case TargetOpcode::G_AND: 3290e8d8bef9SDimitry Andric Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR)); 3291e8d8bef9SDimitry Andric break; 3292e8d8bef9SDimitry Andric case TargetOpcode::G_OR: 3293e8d8bef9SDimitry Andric Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND)); 3294e8d8bef9SDimitry Andric break; 3295e8d8bef9SDimitry Andric } 3296e8d8bef9SDimitry Andric Observer.changedInstr(*Def); 3297e8d8bef9SDimitry Andric } 3298e8d8bef9SDimitry Andric 3299e8d8bef9SDimitry Andric replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); 3300e8d8bef9SDimitry Andric MI.eraseFromParent(); 3301e8d8bef9SDimitry Andric } 3302e8d8bef9SDimitry Andric 3303e8d8bef9SDimitry Andric bool CombinerHelper::matchXorOfAndWithSameReg( 3304e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 3305e8d8bef9SDimitry Andric // Match (xor (and x, y), y) (or any of its commuted cases) 3306e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_XOR); 3307e8d8bef9SDimitry Andric Register &X = MatchInfo.first; 3308e8d8bef9SDimitry Andric Register &Y = MatchInfo.second; 3309e8d8bef9SDimitry Andric Register AndReg = MI.getOperand(1).getReg(); 3310e8d8bef9SDimitry Andric Register SharedReg = MI.getOperand(2).getReg(); 3311e8d8bef9SDimitry Andric 3312e8d8bef9SDimitry Andric // Find a G_AND on either side of the G_XOR. 3313e8d8bef9SDimitry Andric // Look for one of 3314e8d8bef9SDimitry Andric // 3315e8d8bef9SDimitry Andric // (xor (and x, y), SharedReg) 3316e8d8bef9SDimitry Andric // (xor SharedReg, (and x, y)) 3317e8d8bef9SDimitry Andric if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) { 3318e8d8bef9SDimitry Andric std::swap(AndReg, SharedReg); 3319e8d8bef9SDimitry Andric if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) 3320e8d8bef9SDimitry Andric return false; 3321e8d8bef9SDimitry Andric } 3322e8d8bef9SDimitry Andric 3323e8d8bef9SDimitry Andric // Only do this if we'll eliminate the G_AND. 3324e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(AndReg)) 3325e8d8bef9SDimitry Andric return false; 3326e8d8bef9SDimitry Andric 3327e8d8bef9SDimitry Andric // We can combine if SharedReg is the same as either the LHS or RHS of the 3328e8d8bef9SDimitry Andric // G_AND. 3329e8d8bef9SDimitry Andric if (Y != SharedReg) 3330e8d8bef9SDimitry Andric std::swap(X, Y); 3331e8d8bef9SDimitry Andric return Y == SharedReg; 3332e8d8bef9SDimitry Andric } 3333e8d8bef9SDimitry Andric 3334*fe6060f1SDimitry Andric void CombinerHelper::applyXorOfAndWithSameReg( 3335e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 3336e8d8bef9SDimitry Andric // Fold (xor (and x, y), y) -> (and (not x), y) 3337e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3338e8d8bef9SDimitry Andric Register X, Y; 3339e8d8bef9SDimitry Andric std::tie(X, Y) = MatchInfo; 3340e8d8bef9SDimitry Andric auto Not = Builder.buildNot(MRI.getType(X), X); 3341e8d8bef9SDimitry Andric Observer.changingInstr(MI); 3342e8d8bef9SDimitry Andric MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND)); 3343e8d8bef9SDimitry Andric MI.getOperand(1).setReg(Not->getOperand(0).getReg()); 3344e8d8bef9SDimitry Andric MI.getOperand(2).setReg(Y); 3345e8d8bef9SDimitry Andric Observer.changedInstr(MI); 3346e8d8bef9SDimitry Andric } 3347e8d8bef9SDimitry Andric 3348e8d8bef9SDimitry Andric bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) { 3349e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3350e8d8bef9SDimitry Andric LLT Ty = MRI.getType(DstReg); 3351e8d8bef9SDimitry Andric const DataLayout &DL = Builder.getMF().getDataLayout(); 3352e8d8bef9SDimitry Andric 3353e8d8bef9SDimitry Andric if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace())) 3354e8d8bef9SDimitry Andric return false; 3355e8d8bef9SDimitry Andric 3356e8d8bef9SDimitry Andric if (Ty.isPointer()) { 3357e8d8bef9SDimitry Andric auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI); 3358e8d8bef9SDimitry Andric return ConstVal && *ConstVal == 0; 3359e8d8bef9SDimitry Andric } 3360e8d8bef9SDimitry Andric 3361e8d8bef9SDimitry Andric assert(Ty.isVector() && "Expecting a vector type"); 3362e8d8bef9SDimitry Andric const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg()); 3363e8d8bef9SDimitry Andric return isBuildVectorAllZeros(*VecMI, MRI); 3364e8d8bef9SDimitry Andric } 3365e8d8bef9SDimitry Andric 3366*fe6060f1SDimitry Andric void CombinerHelper::applyPtrAddZero(MachineInstr &MI) { 3367e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD); 3368e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3369e8d8bef9SDimitry Andric Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2)); 3370e8d8bef9SDimitry Andric MI.eraseFromParent(); 3371e8d8bef9SDimitry Andric } 3372e8d8bef9SDimitry Andric 3373e8d8bef9SDimitry Andric /// The second source operand is known to be a power of 2. 3374*fe6060f1SDimitry Andric void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) { 3375e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3376e8d8bef9SDimitry Andric Register Src0 = MI.getOperand(1).getReg(); 3377e8d8bef9SDimitry Andric Register Pow2Src1 = MI.getOperand(2).getReg(); 3378e8d8bef9SDimitry Andric LLT Ty = MRI.getType(DstReg); 3379e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3380e8d8bef9SDimitry Andric 3381e8d8bef9SDimitry Andric // Fold (urem x, pow2) -> (and x, pow2-1) 3382e8d8bef9SDimitry Andric auto NegOne = Builder.buildConstant(Ty, -1); 3383e8d8bef9SDimitry Andric auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne); 3384e8d8bef9SDimitry Andric Builder.buildAnd(DstReg, Src0, Add); 3385e8d8bef9SDimitry Andric MI.eraseFromParent(); 3386e8d8bef9SDimitry Andric } 3387e8d8bef9SDimitry Andric 3388e8d8bef9SDimitry Andric Optional<SmallVector<Register, 8>> 3389e8d8bef9SDimitry Andric CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const { 3390e8d8bef9SDimitry Andric assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!"); 3391e8d8bef9SDimitry Andric // We want to detect if Root is part of a tree which represents a bunch 3392e8d8bef9SDimitry Andric // of loads being merged into a larger load. We'll try to recognize patterns 3393e8d8bef9SDimitry Andric // like, for example: 3394e8d8bef9SDimitry Andric // 3395e8d8bef9SDimitry Andric // Reg Reg 3396e8d8bef9SDimitry Andric // \ / 3397e8d8bef9SDimitry Andric // OR_1 Reg 3398e8d8bef9SDimitry Andric // \ / 3399e8d8bef9SDimitry Andric // OR_2 3400e8d8bef9SDimitry Andric // \ Reg 3401e8d8bef9SDimitry Andric // .. / 3402e8d8bef9SDimitry Andric // Root 3403e8d8bef9SDimitry Andric // 3404e8d8bef9SDimitry Andric // Reg Reg Reg Reg 3405e8d8bef9SDimitry Andric // \ / \ / 3406e8d8bef9SDimitry Andric // OR_1 OR_2 3407e8d8bef9SDimitry Andric // \ / 3408e8d8bef9SDimitry Andric // \ / 3409e8d8bef9SDimitry Andric // ... 3410e8d8bef9SDimitry Andric // Root 3411e8d8bef9SDimitry Andric // 3412e8d8bef9SDimitry Andric // Each "Reg" may have been produced by a load + some arithmetic. This 3413e8d8bef9SDimitry Andric // function will save each of them. 3414e8d8bef9SDimitry Andric SmallVector<Register, 8> RegsToVisit; 3415e8d8bef9SDimitry Andric SmallVector<const MachineInstr *, 7> Ors = {Root}; 3416e8d8bef9SDimitry Andric 3417e8d8bef9SDimitry Andric // In the "worst" case, we're dealing with a load for each byte. So, there 3418e8d8bef9SDimitry Andric // are at most #bytes - 1 ORs. 3419e8d8bef9SDimitry Andric const unsigned MaxIter = 3420e8d8bef9SDimitry Andric MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1; 3421e8d8bef9SDimitry Andric for (unsigned Iter = 0; Iter < MaxIter; ++Iter) { 3422e8d8bef9SDimitry Andric if (Ors.empty()) 3423e8d8bef9SDimitry Andric break; 3424e8d8bef9SDimitry Andric const MachineInstr *Curr = Ors.pop_back_val(); 3425e8d8bef9SDimitry Andric Register OrLHS = Curr->getOperand(1).getReg(); 3426e8d8bef9SDimitry Andric Register OrRHS = Curr->getOperand(2).getReg(); 3427e8d8bef9SDimitry Andric 3428e8d8bef9SDimitry Andric // In the combine, we want to elimate the entire tree. 3429e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS)) 3430e8d8bef9SDimitry Andric return None; 3431e8d8bef9SDimitry Andric 3432e8d8bef9SDimitry Andric // If it's a G_OR, save it and continue to walk. If it's not, then it's 3433e8d8bef9SDimitry Andric // something that may be a load + arithmetic. 3434e8d8bef9SDimitry Andric if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI)) 3435e8d8bef9SDimitry Andric Ors.push_back(Or); 3436e8d8bef9SDimitry Andric else 3437e8d8bef9SDimitry Andric RegsToVisit.push_back(OrLHS); 3438e8d8bef9SDimitry Andric if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI)) 3439e8d8bef9SDimitry Andric Ors.push_back(Or); 3440e8d8bef9SDimitry Andric else 3441e8d8bef9SDimitry Andric RegsToVisit.push_back(OrRHS); 3442e8d8bef9SDimitry Andric } 3443e8d8bef9SDimitry Andric 3444e8d8bef9SDimitry Andric // We're going to try and merge each register into a wider power-of-2 type, 3445e8d8bef9SDimitry Andric // so we ought to have an even number of registers. 3446e8d8bef9SDimitry Andric if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0) 3447e8d8bef9SDimitry Andric return None; 3448e8d8bef9SDimitry Andric return RegsToVisit; 3449e8d8bef9SDimitry Andric } 3450e8d8bef9SDimitry Andric 3451e8d8bef9SDimitry Andric /// Helper function for findLoadOffsetsForLoadOrCombine. 3452e8d8bef9SDimitry Andric /// 3453e8d8bef9SDimitry Andric /// Check if \p Reg is the result of loading a \p MemSizeInBits wide value, 3454e8d8bef9SDimitry Andric /// and then moving that value into a specific byte offset. 3455e8d8bef9SDimitry Andric /// 3456e8d8bef9SDimitry Andric /// e.g. x[i] << 24 3457e8d8bef9SDimitry Andric /// 3458e8d8bef9SDimitry Andric /// \returns The load instruction and the byte offset it is moved into. 3459*fe6060f1SDimitry Andric static Optional<std::pair<GZExtLoad *, int64_t>> 3460e8d8bef9SDimitry Andric matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, 3461e8d8bef9SDimitry Andric const MachineRegisterInfo &MRI) { 3462e8d8bef9SDimitry Andric assert(MRI.hasOneNonDBGUse(Reg) && 3463e8d8bef9SDimitry Andric "Expected Reg to only have one non-debug use?"); 3464e8d8bef9SDimitry Andric Register MaybeLoad; 3465e8d8bef9SDimitry Andric int64_t Shift; 3466e8d8bef9SDimitry Andric if (!mi_match(Reg, MRI, 3467e8d8bef9SDimitry Andric m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) { 3468e8d8bef9SDimitry Andric Shift = 0; 3469e8d8bef9SDimitry Andric MaybeLoad = Reg; 3470e8d8bef9SDimitry Andric } 3471e8d8bef9SDimitry Andric 3472e8d8bef9SDimitry Andric if (Shift % MemSizeInBits != 0) 3473e8d8bef9SDimitry Andric return None; 3474e8d8bef9SDimitry Andric 3475e8d8bef9SDimitry Andric // TODO: Handle other types of loads. 3476*fe6060f1SDimitry Andric auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI); 3477e8d8bef9SDimitry Andric if (!Load) 3478e8d8bef9SDimitry Andric return None; 3479e8d8bef9SDimitry Andric 3480*fe6060f1SDimitry Andric if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits) 3481e8d8bef9SDimitry Andric return None; 3482e8d8bef9SDimitry Andric 3483e8d8bef9SDimitry Andric return std::make_pair(Load, Shift / MemSizeInBits); 3484e8d8bef9SDimitry Andric } 3485e8d8bef9SDimitry Andric 3486*fe6060f1SDimitry Andric Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>> 3487e8d8bef9SDimitry Andric CombinerHelper::findLoadOffsetsForLoadOrCombine( 3488e8d8bef9SDimitry Andric SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, 3489e8d8bef9SDimitry Andric const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) { 3490e8d8bef9SDimitry Andric 3491e8d8bef9SDimitry Andric // Each load found for the pattern. There should be one for each RegsToVisit. 3492e8d8bef9SDimitry Andric SmallSetVector<const MachineInstr *, 8> Loads; 3493e8d8bef9SDimitry Andric 3494e8d8bef9SDimitry Andric // The lowest index used in any load. (The lowest "i" for each x[i].) 3495e8d8bef9SDimitry Andric int64_t LowestIdx = INT64_MAX; 3496e8d8bef9SDimitry Andric 3497e8d8bef9SDimitry Andric // The load which uses the lowest index. 3498*fe6060f1SDimitry Andric GZExtLoad *LowestIdxLoad = nullptr; 3499e8d8bef9SDimitry Andric 3500e8d8bef9SDimitry Andric // Keeps track of the load indices we see. We shouldn't see any indices twice. 3501e8d8bef9SDimitry Andric SmallSet<int64_t, 8> SeenIdx; 3502e8d8bef9SDimitry Andric 3503e8d8bef9SDimitry Andric // Ensure each load is in the same MBB. 3504e8d8bef9SDimitry Andric // TODO: Support multiple MachineBasicBlocks. 3505e8d8bef9SDimitry Andric MachineBasicBlock *MBB = nullptr; 3506e8d8bef9SDimitry Andric const MachineMemOperand *MMO = nullptr; 3507e8d8bef9SDimitry Andric 3508e8d8bef9SDimitry Andric // Earliest instruction-order load in the pattern. 3509*fe6060f1SDimitry Andric GZExtLoad *EarliestLoad = nullptr; 3510e8d8bef9SDimitry Andric 3511e8d8bef9SDimitry Andric // Latest instruction-order load in the pattern. 3512*fe6060f1SDimitry Andric GZExtLoad *LatestLoad = nullptr; 3513e8d8bef9SDimitry Andric 3514e8d8bef9SDimitry Andric // Base pointer which every load should share. 3515e8d8bef9SDimitry Andric Register BasePtr; 3516e8d8bef9SDimitry Andric 3517e8d8bef9SDimitry Andric // We want to find a load for each register. Each load should have some 3518e8d8bef9SDimitry Andric // appropriate bit twiddling arithmetic. During this loop, we will also keep 3519e8d8bef9SDimitry Andric // track of the load which uses the lowest index. Later, we will check if we 3520e8d8bef9SDimitry Andric // can use its pointer in the final, combined load. 3521e8d8bef9SDimitry Andric for (auto Reg : RegsToVisit) { 3522e8d8bef9SDimitry Andric // Find the load, and find the position that it will end up in (e.g. a 3523e8d8bef9SDimitry Andric // shifted) value. 3524e8d8bef9SDimitry Andric auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI); 3525e8d8bef9SDimitry Andric if (!LoadAndPos) 3526e8d8bef9SDimitry Andric return None; 3527*fe6060f1SDimitry Andric GZExtLoad *Load; 3528e8d8bef9SDimitry Andric int64_t DstPos; 3529e8d8bef9SDimitry Andric std::tie(Load, DstPos) = *LoadAndPos; 3530e8d8bef9SDimitry Andric 3531e8d8bef9SDimitry Andric // TODO: Handle multiple MachineBasicBlocks. Currently not handled because 3532e8d8bef9SDimitry Andric // it is difficult to check for stores/calls/etc between loads. 3533e8d8bef9SDimitry Andric MachineBasicBlock *LoadMBB = Load->getParent(); 3534e8d8bef9SDimitry Andric if (!MBB) 3535e8d8bef9SDimitry Andric MBB = LoadMBB; 3536e8d8bef9SDimitry Andric if (LoadMBB != MBB) 3537e8d8bef9SDimitry Andric return None; 3538e8d8bef9SDimitry Andric 3539e8d8bef9SDimitry Andric // Make sure that the MachineMemOperands of every seen load are compatible. 3540*fe6060f1SDimitry Andric auto &LoadMMO = Load->getMMO(); 3541e8d8bef9SDimitry Andric if (!MMO) 3542*fe6060f1SDimitry Andric MMO = &LoadMMO; 3543*fe6060f1SDimitry Andric if (MMO->getAddrSpace() != LoadMMO.getAddrSpace()) 3544e8d8bef9SDimitry Andric return None; 3545e8d8bef9SDimitry Andric 3546e8d8bef9SDimitry Andric // Find out what the base pointer and index for the load is. 3547e8d8bef9SDimitry Andric Register LoadPtr; 3548e8d8bef9SDimitry Andric int64_t Idx; 3549e8d8bef9SDimitry Andric if (!mi_match(Load->getOperand(1).getReg(), MRI, 3550e8d8bef9SDimitry Andric m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) { 3551e8d8bef9SDimitry Andric LoadPtr = Load->getOperand(1).getReg(); 3552e8d8bef9SDimitry Andric Idx = 0; 3553e8d8bef9SDimitry Andric } 3554e8d8bef9SDimitry Andric 3555e8d8bef9SDimitry Andric // Don't combine things like a[i], a[i] -> a bigger load. 3556e8d8bef9SDimitry Andric if (!SeenIdx.insert(Idx).second) 3557e8d8bef9SDimitry Andric return None; 3558e8d8bef9SDimitry Andric 3559e8d8bef9SDimitry Andric // Every load must share the same base pointer; don't combine things like: 3560e8d8bef9SDimitry Andric // 3561e8d8bef9SDimitry Andric // a[i], b[i + 1] -> a bigger load. 3562e8d8bef9SDimitry Andric if (!BasePtr.isValid()) 3563e8d8bef9SDimitry Andric BasePtr = LoadPtr; 3564e8d8bef9SDimitry Andric if (BasePtr != LoadPtr) 3565e8d8bef9SDimitry Andric return None; 3566e8d8bef9SDimitry Andric 3567e8d8bef9SDimitry Andric if (Idx < LowestIdx) { 3568e8d8bef9SDimitry Andric LowestIdx = Idx; 3569e8d8bef9SDimitry Andric LowestIdxLoad = Load; 3570e8d8bef9SDimitry Andric } 3571e8d8bef9SDimitry Andric 3572e8d8bef9SDimitry Andric // Keep track of the byte offset that this load ends up at. If we have seen 3573e8d8bef9SDimitry Andric // the byte offset, then stop here. We do not want to combine: 3574e8d8bef9SDimitry Andric // 3575e8d8bef9SDimitry Andric // a[i] << 16, a[i + k] << 16 -> a bigger load. 3576e8d8bef9SDimitry Andric if (!MemOffset2Idx.try_emplace(DstPos, Idx).second) 3577e8d8bef9SDimitry Andric return None; 3578e8d8bef9SDimitry Andric Loads.insert(Load); 3579e8d8bef9SDimitry Andric 3580e8d8bef9SDimitry Andric // Keep track of the position of the earliest/latest loads in the pattern. 3581e8d8bef9SDimitry Andric // We will check that there are no load fold barriers between them later 3582e8d8bef9SDimitry Andric // on. 3583e8d8bef9SDimitry Andric // 3584e8d8bef9SDimitry Andric // FIXME: Is there a better way to check for load fold barriers? 3585e8d8bef9SDimitry Andric if (!EarliestLoad || dominates(*Load, *EarliestLoad)) 3586e8d8bef9SDimitry Andric EarliestLoad = Load; 3587e8d8bef9SDimitry Andric if (!LatestLoad || dominates(*LatestLoad, *Load)) 3588e8d8bef9SDimitry Andric LatestLoad = Load; 3589e8d8bef9SDimitry Andric } 3590e8d8bef9SDimitry Andric 3591e8d8bef9SDimitry Andric // We found a load for each register. Let's check if each load satisfies the 3592e8d8bef9SDimitry Andric // pattern. 3593e8d8bef9SDimitry Andric assert(Loads.size() == RegsToVisit.size() && 3594e8d8bef9SDimitry Andric "Expected to find a load for each register?"); 3595e8d8bef9SDimitry Andric assert(EarliestLoad != LatestLoad && EarliestLoad && 3596e8d8bef9SDimitry Andric LatestLoad && "Expected at least two loads?"); 3597e8d8bef9SDimitry Andric 3598e8d8bef9SDimitry Andric // Check if there are any stores, calls, etc. between any of the loads. If 3599e8d8bef9SDimitry Andric // there are, then we can't safely perform the combine. 3600e8d8bef9SDimitry Andric // 3601e8d8bef9SDimitry Andric // MaxIter is chosen based off the (worst case) number of iterations it 3602e8d8bef9SDimitry Andric // typically takes to succeed in the LLVM test suite plus some padding. 3603e8d8bef9SDimitry Andric // 3604e8d8bef9SDimitry Andric // FIXME: Is there a better way to check for load fold barriers? 3605e8d8bef9SDimitry Andric const unsigned MaxIter = 20; 3606e8d8bef9SDimitry Andric unsigned Iter = 0; 3607e8d8bef9SDimitry Andric for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(), 3608e8d8bef9SDimitry Andric LatestLoad->getIterator())) { 3609e8d8bef9SDimitry Andric if (Loads.count(&MI)) 3610e8d8bef9SDimitry Andric continue; 3611e8d8bef9SDimitry Andric if (MI.isLoadFoldBarrier()) 3612e8d8bef9SDimitry Andric return None; 3613e8d8bef9SDimitry Andric if (Iter++ == MaxIter) 3614e8d8bef9SDimitry Andric return None; 3615e8d8bef9SDimitry Andric } 3616e8d8bef9SDimitry Andric 3617*fe6060f1SDimitry Andric return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad); 3618e8d8bef9SDimitry Andric } 3619e8d8bef9SDimitry Andric 3620e8d8bef9SDimitry Andric bool CombinerHelper::matchLoadOrCombine( 3621e8d8bef9SDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 3622e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_OR); 3623e8d8bef9SDimitry Andric MachineFunction &MF = *MI.getMF(); 3624e8d8bef9SDimitry Andric // Assuming a little-endian target, transform: 3625e8d8bef9SDimitry Andric // s8 *a = ... 3626e8d8bef9SDimitry Andric // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24) 3627e8d8bef9SDimitry Andric // => 3628e8d8bef9SDimitry Andric // s32 val = *((i32)a) 3629e8d8bef9SDimitry Andric // 3630e8d8bef9SDimitry Andric // s8 *a = ... 3631e8d8bef9SDimitry Andric // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3] 3632e8d8bef9SDimitry Andric // => 3633e8d8bef9SDimitry Andric // s32 val = BSWAP(*((s32)a)) 3634e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 3635e8d8bef9SDimitry Andric LLT Ty = MRI.getType(Dst); 3636e8d8bef9SDimitry Andric if (Ty.isVector()) 3637e8d8bef9SDimitry Andric return false; 3638e8d8bef9SDimitry Andric 3639e8d8bef9SDimitry Andric // We need to combine at least two loads into this type. Since the smallest 3640e8d8bef9SDimitry Andric // possible load is into a byte, we need at least a 16-bit wide type. 3641e8d8bef9SDimitry Andric const unsigned WideMemSizeInBits = Ty.getSizeInBits(); 3642e8d8bef9SDimitry Andric if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0) 3643e8d8bef9SDimitry Andric return false; 3644e8d8bef9SDimitry Andric 3645e8d8bef9SDimitry Andric // Match a collection of non-OR instructions in the pattern. 3646e8d8bef9SDimitry Andric auto RegsToVisit = findCandidatesForLoadOrCombine(&MI); 3647e8d8bef9SDimitry Andric if (!RegsToVisit) 3648e8d8bef9SDimitry Andric return false; 3649e8d8bef9SDimitry Andric 3650e8d8bef9SDimitry Andric // We have a collection of non-OR instructions. Figure out how wide each of 3651e8d8bef9SDimitry Andric // the small loads should be based off of the number of potential loads we 3652e8d8bef9SDimitry Andric // found. 3653e8d8bef9SDimitry Andric const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size(); 3654e8d8bef9SDimitry Andric if (NarrowMemSizeInBits % 8 != 0) 3655e8d8bef9SDimitry Andric return false; 3656e8d8bef9SDimitry Andric 3657e8d8bef9SDimitry Andric // Check if each register feeding into each OR is a load from the same 3658e8d8bef9SDimitry Andric // base pointer + some arithmetic. 3659e8d8bef9SDimitry Andric // 3660e8d8bef9SDimitry Andric // e.g. a[0], a[1] << 8, a[2] << 16, etc. 3661e8d8bef9SDimitry Andric // 3662e8d8bef9SDimitry Andric // Also verify that each of these ends up putting a[i] into the same memory 3663e8d8bef9SDimitry Andric // offset as a load into a wide type would. 3664e8d8bef9SDimitry Andric SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx; 3665*fe6060f1SDimitry Andric GZExtLoad *LowestIdxLoad, *LatestLoad; 3666e8d8bef9SDimitry Andric int64_t LowestIdx; 3667e8d8bef9SDimitry Andric auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine( 3668e8d8bef9SDimitry Andric MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits); 3669e8d8bef9SDimitry Andric if (!MaybeLoadInfo) 3670e8d8bef9SDimitry Andric return false; 3671*fe6060f1SDimitry Andric std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo; 3672e8d8bef9SDimitry Andric 3673e8d8bef9SDimitry Andric // We have a bunch of loads being OR'd together. Using the addresses + offsets 3674e8d8bef9SDimitry Andric // we found before, check if this corresponds to a big or little endian byte 3675e8d8bef9SDimitry Andric // pattern. If it does, then we can represent it using a load + possibly a 3676e8d8bef9SDimitry Andric // BSWAP. 3677e8d8bef9SDimitry Andric bool IsBigEndianTarget = MF.getDataLayout().isBigEndian(); 3678e8d8bef9SDimitry Andric Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx); 3679e8d8bef9SDimitry Andric if (!IsBigEndian.hasValue()) 3680e8d8bef9SDimitry Andric return false; 3681e8d8bef9SDimitry Andric bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian; 3682e8d8bef9SDimitry Andric if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}})) 3683e8d8bef9SDimitry Andric return false; 3684e8d8bef9SDimitry Andric 3685e8d8bef9SDimitry Andric // Make sure that the load from the lowest index produces offset 0 in the 3686e8d8bef9SDimitry Andric // final value. 3687e8d8bef9SDimitry Andric // 3688e8d8bef9SDimitry Andric // This ensures that we won't combine something like this: 3689e8d8bef9SDimitry Andric // 3690e8d8bef9SDimitry Andric // load x[i] -> byte 2 3691e8d8bef9SDimitry Andric // load x[i+1] -> byte 0 ---> wide_load x[i] 3692e8d8bef9SDimitry Andric // load x[i+2] -> byte 1 3693e8d8bef9SDimitry Andric const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits; 3694e8d8bef9SDimitry Andric const unsigned ZeroByteOffset = 3695e8d8bef9SDimitry Andric *IsBigEndian 3696e8d8bef9SDimitry Andric ? bigEndianByteAt(NumLoadsInTy, 0) 3697e8d8bef9SDimitry Andric : littleEndianByteAt(NumLoadsInTy, 0); 3698e8d8bef9SDimitry Andric auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset); 3699e8d8bef9SDimitry Andric if (ZeroOffsetIdx == MemOffset2Idx.end() || 3700e8d8bef9SDimitry Andric ZeroOffsetIdx->second != LowestIdx) 3701e8d8bef9SDimitry Andric return false; 3702e8d8bef9SDimitry Andric 3703e8d8bef9SDimitry Andric // We wil reuse the pointer from the load which ends up at byte offset 0. It 3704e8d8bef9SDimitry Andric // may not use index 0. 3705*fe6060f1SDimitry Andric Register Ptr = LowestIdxLoad->getPointerReg(); 3706*fe6060f1SDimitry Andric const MachineMemOperand &MMO = LowestIdxLoad->getMMO(); 3707e8d8bef9SDimitry Andric LegalityQuery::MemDesc MMDesc; 3708*fe6060f1SDimitry Andric MMDesc.MemoryTy = Ty; 3709e8d8bef9SDimitry Andric MMDesc.AlignInBits = MMO.getAlign().value() * 8; 3710*fe6060f1SDimitry Andric MMDesc.Ordering = MMO.getSuccessOrdering(); 3711e8d8bef9SDimitry Andric if (!isLegalOrBeforeLegalizer( 3712e8d8bef9SDimitry Andric {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}})) 3713e8d8bef9SDimitry Andric return false; 3714e8d8bef9SDimitry Andric auto PtrInfo = MMO.getPointerInfo(); 3715e8d8bef9SDimitry Andric auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8); 3716e8d8bef9SDimitry Andric 3717e8d8bef9SDimitry Andric // Load must be allowed and fast on the target. 3718e8d8bef9SDimitry Andric LLVMContext &C = MF.getFunction().getContext(); 3719e8d8bef9SDimitry Andric auto &DL = MF.getDataLayout(); 3720e8d8bef9SDimitry Andric bool Fast = false; 3721e8d8bef9SDimitry Andric if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) || 3722e8d8bef9SDimitry Andric !Fast) 3723e8d8bef9SDimitry Andric return false; 3724e8d8bef9SDimitry Andric 3725e8d8bef9SDimitry Andric MatchInfo = [=](MachineIRBuilder &MIB) { 3726*fe6060f1SDimitry Andric MIB.setInstrAndDebugLoc(*LatestLoad); 3727e8d8bef9SDimitry Andric Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst; 3728e8d8bef9SDimitry Andric MIB.buildLoad(LoadDst, Ptr, *NewMMO); 3729e8d8bef9SDimitry Andric if (NeedsBSwap) 3730e8d8bef9SDimitry Andric MIB.buildBSwap(Dst, LoadDst); 3731e8d8bef9SDimitry Andric }; 3732e8d8bef9SDimitry Andric return true; 3733e8d8bef9SDimitry Andric } 3734e8d8bef9SDimitry Andric 3735*fe6060f1SDimitry Andric bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI, 3736*fe6060f1SDimitry Andric MachineInstr *&ExtMI) { 3737*fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PHI); 3738*fe6060f1SDimitry Andric 3739*fe6060f1SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3740*fe6060f1SDimitry Andric 3741*fe6060f1SDimitry Andric // TODO: Extending a vector may be expensive, don't do this until heuristics 3742*fe6060f1SDimitry Andric // are better. 3743*fe6060f1SDimitry Andric if (MRI.getType(DstReg).isVector()) 3744*fe6060f1SDimitry Andric return false; 3745*fe6060f1SDimitry Andric 3746*fe6060f1SDimitry Andric // Try to match a phi, whose only use is an extend. 3747*fe6060f1SDimitry Andric if (!MRI.hasOneNonDBGUse(DstReg)) 3748*fe6060f1SDimitry Andric return false; 3749*fe6060f1SDimitry Andric ExtMI = &*MRI.use_instr_nodbg_begin(DstReg); 3750*fe6060f1SDimitry Andric switch (ExtMI->getOpcode()) { 3751*fe6060f1SDimitry Andric case TargetOpcode::G_ANYEXT: 3752*fe6060f1SDimitry Andric return true; // G_ANYEXT is usually free. 3753*fe6060f1SDimitry Andric case TargetOpcode::G_ZEXT: 3754*fe6060f1SDimitry Andric case TargetOpcode::G_SEXT: 3755*fe6060f1SDimitry Andric break; 3756*fe6060f1SDimitry Andric default: 3757*fe6060f1SDimitry Andric return false; 3758*fe6060f1SDimitry Andric } 3759*fe6060f1SDimitry Andric 3760*fe6060f1SDimitry Andric // If the target is likely to fold this extend away, don't propagate. 3761*fe6060f1SDimitry Andric if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI)) 3762*fe6060f1SDimitry Andric return false; 3763*fe6060f1SDimitry Andric 3764*fe6060f1SDimitry Andric // We don't want to propagate the extends unless there's a good chance that 3765*fe6060f1SDimitry Andric // they'll be optimized in some way. 3766*fe6060f1SDimitry Andric // Collect the unique incoming values. 3767*fe6060f1SDimitry Andric SmallPtrSet<MachineInstr *, 4> InSrcs; 3768*fe6060f1SDimitry Andric for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { 3769*fe6060f1SDimitry Andric auto *DefMI = getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI); 3770*fe6060f1SDimitry Andric switch (DefMI->getOpcode()) { 3771*fe6060f1SDimitry Andric case TargetOpcode::G_LOAD: 3772*fe6060f1SDimitry Andric case TargetOpcode::G_TRUNC: 3773*fe6060f1SDimitry Andric case TargetOpcode::G_SEXT: 3774*fe6060f1SDimitry Andric case TargetOpcode::G_ZEXT: 3775*fe6060f1SDimitry Andric case TargetOpcode::G_ANYEXT: 3776*fe6060f1SDimitry Andric case TargetOpcode::G_CONSTANT: 3777*fe6060f1SDimitry Andric InSrcs.insert(getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI)); 3778*fe6060f1SDimitry Andric // Don't try to propagate if there are too many places to create new 3779*fe6060f1SDimitry Andric // extends, chances are it'll increase code size. 3780*fe6060f1SDimitry Andric if (InSrcs.size() > 2) 3781*fe6060f1SDimitry Andric return false; 3782*fe6060f1SDimitry Andric break; 3783*fe6060f1SDimitry Andric default: 3784*fe6060f1SDimitry Andric return false; 3785*fe6060f1SDimitry Andric } 3786*fe6060f1SDimitry Andric } 3787*fe6060f1SDimitry Andric return true; 3788*fe6060f1SDimitry Andric } 3789*fe6060f1SDimitry Andric 3790*fe6060f1SDimitry Andric void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI, 3791*fe6060f1SDimitry Andric MachineInstr *&ExtMI) { 3792*fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PHI); 3793*fe6060f1SDimitry Andric Register DstReg = ExtMI->getOperand(0).getReg(); 3794*fe6060f1SDimitry Andric LLT ExtTy = MRI.getType(DstReg); 3795*fe6060f1SDimitry Andric 3796*fe6060f1SDimitry Andric // Propagate the extension into the block of each incoming reg's block. 3797*fe6060f1SDimitry Andric // Use a SetVector here because PHIs can have duplicate edges, and we want 3798*fe6060f1SDimitry Andric // deterministic iteration order. 3799*fe6060f1SDimitry Andric SmallSetVector<MachineInstr *, 8> SrcMIs; 3800*fe6060f1SDimitry Andric SmallDenseMap<MachineInstr *, MachineInstr *, 8> OldToNewSrcMap; 3801*fe6060f1SDimitry Andric for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); SrcIdx += 2) { 3802*fe6060f1SDimitry Andric auto *SrcMI = MRI.getVRegDef(MI.getOperand(SrcIdx).getReg()); 3803*fe6060f1SDimitry Andric if (!SrcMIs.insert(SrcMI)) 3804*fe6060f1SDimitry Andric continue; 3805*fe6060f1SDimitry Andric 3806*fe6060f1SDimitry Andric // Build an extend after each src inst. 3807*fe6060f1SDimitry Andric auto *MBB = SrcMI->getParent(); 3808*fe6060f1SDimitry Andric MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator(); 3809*fe6060f1SDimitry Andric if (InsertPt != MBB->end() && InsertPt->isPHI()) 3810*fe6060f1SDimitry Andric InsertPt = MBB->getFirstNonPHI(); 3811*fe6060f1SDimitry Andric 3812*fe6060f1SDimitry Andric Builder.setInsertPt(*SrcMI->getParent(), InsertPt); 3813*fe6060f1SDimitry Andric Builder.setDebugLoc(MI.getDebugLoc()); 3814*fe6060f1SDimitry Andric auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, 3815*fe6060f1SDimitry Andric SrcMI->getOperand(0).getReg()); 3816*fe6060f1SDimitry Andric OldToNewSrcMap[SrcMI] = NewExt; 3817*fe6060f1SDimitry Andric } 3818*fe6060f1SDimitry Andric 3819*fe6060f1SDimitry Andric // Create a new phi with the extended inputs. 3820*fe6060f1SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3821*fe6060f1SDimitry Andric auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI); 3822*fe6060f1SDimitry Andric NewPhi.addDef(DstReg); 3823*fe6060f1SDimitry Andric for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) { 3824*fe6060f1SDimitry Andric auto &MO = MI.getOperand(SrcIdx); 3825*fe6060f1SDimitry Andric if (!MO.isReg()) { 3826*fe6060f1SDimitry Andric NewPhi.addMBB(MO.getMBB()); 3827*fe6060f1SDimitry Andric continue; 3828*fe6060f1SDimitry Andric } 3829*fe6060f1SDimitry Andric auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())]; 3830*fe6060f1SDimitry Andric NewPhi.addUse(NewSrc->getOperand(0).getReg()); 3831*fe6060f1SDimitry Andric } 3832*fe6060f1SDimitry Andric Builder.insertInstr(NewPhi); 3833*fe6060f1SDimitry Andric ExtMI->eraseFromParent(); 3834*fe6060f1SDimitry Andric } 3835*fe6060f1SDimitry Andric 3836*fe6060f1SDimitry Andric bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI, 3837*fe6060f1SDimitry Andric Register &Reg) { 3838*fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT); 3839*fe6060f1SDimitry Andric // If we have a constant index, look for a G_BUILD_VECTOR source 3840*fe6060f1SDimitry Andric // and find the source register that the index maps to. 3841*fe6060f1SDimitry Andric Register SrcVec = MI.getOperand(1).getReg(); 3842*fe6060f1SDimitry Andric LLT SrcTy = MRI.getType(SrcVec); 3843*fe6060f1SDimitry Andric if (!isLegalOrBeforeLegalizer( 3844*fe6060f1SDimitry Andric {TargetOpcode::G_BUILD_VECTOR, {SrcTy, SrcTy.getElementType()}})) 3845*fe6060f1SDimitry Andric return false; 3846*fe6060f1SDimitry Andric 3847*fe6060f1SDimitry Andric auto Cst = getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); 3848*fe6060f1SDimitry Andric if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements()) 3849*fe6060f1SDimitry Andric return false; 3850*fe6060f1SDimitry Andric 3851*fe6060f1SDimitry Andric unsigned VecIdx = Cst->Value.getZExtValue(); 3852*fe6060f1SDimitry Andric MachineInstr *BuildVecMI = 3853*fe6060f1SDimitry Andric getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, SrcVec, MRI); 3854*fe6060f1SDimitry Andric if (!BuildVecMI) { 3855*fe6060f1SDimitry Andric BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR_TRUNC, SrcVec, MRI); 3856*fe6060f1SDimitry Andric if (!BuildVecMI) 3857*fe6060f1SDimitry Andric return false; 3858*fe6060f1SDimitry Andric LLT ScalarTy = MRI.getType(BuildVecMI->getOperand(1).getReg()); 3859*fe6060f1SDimitry Andric if (!isLegalOrBeforeLegalizer( 3860*fe6060f1SDimitry Andric {TargetOpcode::G_BUILD_VECTOR_TRUNC, {SrcTy, ScalarTy}})) 3861*fe6060f1SDimitry Andric return false; 3862*fe6060f1SDimitry Andric } 3863*fe6060f1SDimitry Andric 3864*fe6060f1SDimitry Andric EVT Ty(getMVTForLLT(SrcTy)); 3865*fe6060f1SDimitry Andric if (!MRI.hasOneNonDBGUse(SrcVec) && 3866*fe6060f1SDimitry Andric !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty)) 3867*fe6060f1SDimitry Andric return false; 3868*fe6060f1SDimitry Andric 3869*fe6060f1SDimitry Andric Reg = BuildVecMI->getOperand(VecIdx + 1).getReg(); 3870*fe6060f1SDimitry Andric return true; 3871*fe6060f1SDimitry Andric } 3872*fe6060f1SDimitry Andric 3873*fe6060f1SDimitry Andric void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI, 3874*fe6060f1SDimitry Andric Register &Reg) { 3875*fe6060f1SDimitry Andric // Check the type of the register, since it may have come from a 3876*fe6060f1SDimitry Andric // G_BUILD_VECTOR_TRUNC. 3877*fe6060f1SDimitry Andric LLT ScalarTy = MRI.getType(Reg); 3878*fe6060f1SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3879*fe6060f1SDimitry Andric LLT DstTy = MRI.getType(DstReg); 3880*fe6060f1SDimitry Andric 3881*fe6060f1SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3882*fe6060f1SDimitry Andric if (ScalarTy != DstTy) { 3883*fe6060f1SDimitry Andric assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits()); 3884*fe6060f1SDimitry Andric Builder.buildTrunc(DstReg, Reg); 3885*fe6060f1SDimitry Andric MI.eraseFromParent(); 3886*fe6060f1SDimitry Andric return; 3887*fe6060f1SDimitry Andric } 3888*fe6060f1SDimitry Andric replaceSingleDefInstWithReg(MI, Reg); 3889*fe6060f1SDimitry Andric } 3890*fe6060f1SDimitry Andric 3891*fe6060f1SDimitry Andric bool CombinerHelper::matchExtractAllEltsFromBuildVector( 3892*fe6060f1SDimitry Andric MachineInstr &MI, 3893*fe6060f1SDimitry Andric SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) { 3894*fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 3895*fe6060f1SDimitry Andric // This combine tries to find build_vector's which have every source element 3896*fe6060f1SDimitry Andric // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like 3897*fe6060f1SDimitry Andric // the masked load scalarization is run late in the pipeline. There's already 3898*fe6060f1SDimitry Andric // a combine for a similar pattern starting from the extract, but that 3899*fe6060f1SDimitry Andric // doesn't attempt to do it if there are multiple uses of the build_vector, 3900*fe6060f1SDimitry Andric // which in this case is true. Starting the combine from the build_vector 3901*fe6060f1SDimitry Andric // feels more natural than trying to find sibling nodes of extracts. 3902*fe6060f1SDimitry Andric // E.g. 3903*fe6060f1SDimitry Andric // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4 3904*fe6060f1SDimitry Andric // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0 3905*fe6060f1SDimitry Andric // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1 3906*fe6060f1SDimitry Andric // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2 3907*fe6060f1SDimitry Andric // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3 3908*fe6060f1SDimitry Andric // ==> 3909*fe6060f1SDimitry Andric // replace ext{1,2,3,4} with %s{1,2,3,4} 3910*fe6060f1SDimitry Andric 3911*fe6060f1SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3912*fe6060f1SDimitry Andric LLT DstTy = MRI.getType(DstReg); 3913*fe6060f1SDimitry Andric unsigned NumElts = DstTy.getNumElements(); 3914*fe6060f1SDimitry Andric 3915*fe6060f1SDimitry Andric SmallBitVector ExtractedElts(NumElts); 3916*fe6060f1SDimitry Andric for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg), 3917*fe6060f1SDimitry Andric MRI.use_instr_nodbg_end())) { 3918*fe6060f1SDimitry Andric if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT) 3919*fe6060f1SDimitry Andric return false; 3920*fe6060f1SDimitry Andric auto Cst = getConstantVRegVal(II.getOperand(2).getReg(), MRI); 3921*fe6060f1SDimitry Andric if (!Cst) 3922*fe6060f1SDimitry Andric return false; 3923*fe6060f1SDimitry Andric unsigned Idx = Cst.getValue().getZExtValue(); 3924*fe6060f1SDimitry Andric if (Idx >= NumElts) 3925*fe6060f1SDimitry Andric return false; // Out of range. 3926*fe6060f1SDimitry Andric ExtractedElts.set(Idx); 3927*fe6060f1SDimitry Andric SrcDstPairs.emplace_back( 3928*fe6060f1SDimitry Andric std::make_pair(MI.getOperand(Idx + 1).getReg(), &II)); 3929*fe6060f1SDimitry Andric } 3930*fe6060f1SDimitry Andric // Match if every element was extracted. 3931*fe6060f1SDimitry Andric return ExtractedElts.all(); 3932*fe6060f1SDimitry Andric } 3933*fe6060f1SDimitry Andric 3934*fe6060f1SDimitry Andric void CombinerHelper::applyExtractAllEltsFromBuildVector( 3935*fe6060f1SDimitry Andric MachineInstr &MI, 3936*fe6060f1SDimitry Andric SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) { 3937*fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 3938*fe6060f1SDimitry Andric for (auto &Pair : SrcDstPairs) { 3939*fe6060f1SDimitry Andric auto *ExtMI = Pair.second; 3940*fe6060f1SDimitry Andric replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first); 3941*fe6060f1SDimitry Andric ExtMI->eraseFromParent(); 3942*fe6060f1SDimitry Andric } 3943*fe6060f1SDimitry Andric MI.eraseFromParent(); 3944*fe6060f1SDimitry Andric } 3945*fe6060f1SDimitry Andric 3946*fe6060f1SDimitry Andric void CombinerHelper::applyBuildFn( 3947e8d8bef9SDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 3948e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3949e8d8bef9SDimitry Andric MatchInfo(Builder); 3950e8d8bef9SDimitry Andric MI.eraseFromParent(); 3951*fe6060f1SDimitry Andric } 3952*fe6060f1SDimitry Andric 3953*fe6060f1SDimitry Andric void CombinerHelper::applyBuildFnNoErase( 3954*fe6060f1SDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 3955*fe6060f1SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3956*fe6060f1SDimitry Andric MatchInfo(Builder); 3957*fe6060f1SDimitry Andric } 3958*fe6060f1SDimitry Andric 3959*fe6060f1SDimitry Andric /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate. 3960*fe6060f1SDimitry Andric bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) { 3961*fe6060f1SDimitry Andric unsigned Opc = MI.getOpcode(); 3962*fe6060f1SDimitry Andric assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR); 3963*fe6060f1SDimitry Andric Register X = MI.getOperand(1).getReg(); 3964*fe6060f1SDimitry Andric Register Y = MI.getOperand(2).getReg(); 3965*fe6060f1SDimitry Andric if (X != Y) 3966*fe6060f1SDimitry Andric return false; 3967*fe6060f1SDimitry Andric unsigned RotateOpc = 3968*fe6060f1SDimitry Andric Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR; 3969*fe6060f1SDimitry Andric return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}}); 3970*fe6060f1SDimitry Andric } 3971*fe6060f1SDimitry Andric 3972*fe6060f1SDimitry Andric void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) { 3973*fe6060f1SDimitry Andric unsigned Opc = MI.getOpcode(); 3974*fe6060f1SDimitry Andric assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR); 3975*fe6060f1SDimitry Andric bool IsFSHL = Opc == TargetOpcode::G_FSHL; 3976*fe6060f1SDimitry Andric Observer.changingInstr(MI); 3977*fe6060f1SDimitry Andric MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL 3978*fe6060f1SDimitry Andric : TargetOpcode::G_ROTR)); 3979*fe6060f1SDimitry Andric MI.RemoveOperand(2); 3980*fe6060f1SDimitry Andric Observer.changedInstr(MI); 3981*fe6060f1SDimitry Andric } 3982*fe6060f1SDimitry Andric 3983*fe6060f1SDimitry Andric // Fold (rot x, c) -> (rot x, c % BitSize) 3984*fe6060f1SDimitry Andric bool CombinerHelper::matchRotateOutOfRange(MachineInstr &MI) { 3985*fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ROTL || 3986*fe6060f1SDimitry Andric MI.getOpcode() == TargetOpcode::G_ROTR); 3987*fe6060f1SDimitry Andric unsigned Bitsize = 3988*fe6060f1SDimitry Andric MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits(); 3989*fe6060f1SDimitry Andric Register AmtReg = MI.getOperand(2).getReg(); 3990*fe6060f1SDimitry Andric bool OutOfRange = false; 3991*fe6060f1SDimitry Andric auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) { 3992*fe6060f1SDimitry Andric if (auto *CI = dyn_cast<ConstantInt>(C)) 3993*fe6060f1SDimitry Andric OutOfRange |= CI->getValue().uge(Bitsize); 3994*fe6060f1SDimitry Andric return true; 3995*fe6060f1SDimitry Andric }; 3996*fe6060f1SDimitry Andric return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange; 3997*fe6060f1SDimitry Andric } 3998*fe6060f1SDimitry Andric 3999*fe6060f1SDimitry Andric void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) { 4000*fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ROTL || 4001*fe6060f1SDimitry Andric MI.getOpcode() == TargetOpcode::G_ROTR); 4002*fe6060f1SDimitry Andric unsigned Bitsize = 4003*fe6060f1SDimitry Andric MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits(); 4004*fe6060f1SDimitry Andric Builder.setInstrAndDebugLoc(MI); 4005*fe6060f1SDimitry Andric Register Amt = MI.getOperand(2).getReg(); 4006*fe6060f1SDimitry Andric LLT AmtTy = MRI.getType(Amt); 4007*fe6060f1SDimitry Andric auto Bits = Builder.buildConstant(AmtTy, Bitsize); 4008*fe6060f1SDimitry Andric Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0); 4009*fe6060f1SDimitry Andric Observer.changingInstr(MI); 4010*fe6060f1SDimitry Andric MI.getOperand(2).setReg(Amt); 4011*fe6060f1SDimitry Andric Observer.changedInstr(MI); 4012*fe6060f1SDimitry Andric } 4013*fe6060f1SDimitry Andric 4014*fe6060f1SDimitry Andric bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI, 4015*fe6060f1SDimitry Andric int64_t &MatchInfo) { 4016*fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ICMP); 4017*fe6060f1SDimitry Andric auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 4018*fe6060f1SDimitry Andric auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg()); 4019*fe6060f1SDimitry Andric auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg()); 4020*fe6060f1SDimitry Andric Optional<bool> KnownVal; 4021*fe6060f1SDimitry Andric switch (Pred) { 4022*fe6060f1SDimitry Andric default: 4023*fe6060f1SDimitry Andric llvm_unreachable("Unexpected G_ICMP predicate?"); 4024*fe6060f1SDimitry Andric case CmpInst::ICMP_EQ: 4025*fe6060f1SDimitry Andric KnownVal = KnownBits::eq(KnownLHS, KnownRHS); 4026*fe6060f1SDimitry Andric break; 4027*fe6060f1SDimitry Andric case CmpInst::ICMP_NE: 4028*fe6060f1SDimitry Andric KnownVal = KnownBits::ne(KnownLHS, KnownRHS); 4029*fe6060f1SDimitry Andric break; 4030*fe6060f1SDimitry Andric case CmpInst::ICMP_SGE: 4031*fe6060f1SDimitry Andric KnownVal = KnownBits::sge(KnownLHS, KnownRHS); 4032*fe6060f1SDimitry Andric break; 4033*fe6060f1SDimitry Andric case CmpInst::ICMP_SGT: 4034*fe6060f1SDimitry Andric KnownVal = KnownBits::sgt(KnownLHS, KnownRHS); 4035*fe6060f1SDimitry Andric break; 4036*fe6060f1SDimitry Andric case CmpInst::ICMP_SLE: 4037*fe6060f1SDimitry Andric KnownVal = KnownBits::sle(KnownLHS, KnownRHS); 4038*fe6060f1SDimitry Andric break; 4039*fe6060f1SDimitry Andric case CmpInst::ICMP_SLT: 4040*fe6060f1SDimitry Andric KnownVal = KnownBits::slt(KnownLHS, KnownRHS); 4041*fe6060f1SDimitry Andric break; 4042*fe6060f1SDimitry Andric case CmpInst::ICMP_UGE: 4043*fe6060f1SDimitry Andric KnownVal = KnownBits::uge(KnownLHS, KnownRHS); 4044*fe6060f1SDimitry Andric break; 4045*fe6060f1SDimitry Andric case CmpInst::ICMP_UGT: 4046*fe6060f1SDimitry Andric KnownVal = KnownBits::ugt(KnownLHS, KnownRHS); 4047*fe6060f1SDimitry Andric break; 4048*fe6060f1SDimitry Andric case CmpInst::ICMP_ULE: 4049*fe6060f1SDimitry Andric KnownVal = KnownBits::ule(KnownLHS, KnownRHS); 4050*fe6060f1SDimitry Andric break; 4051*fe6060f1SDimitry Andric case CmpInst::ICMP_ULT: 4052*fe6060f1SDimitry Andric KnownVal = KnownBits::ult(KnownLHS, KnownRHS); 4053*fe6060f1SDimitry Andric break; 4054*fe6060f1SDimitry Andric } 4055*fe6060f1SDimitry Andric if (!KnownVal) 4056*fe6060f1SDimitry Andric return false; 4057*fe6060f1SDimitry Andric MatchInfo = 4058*fe6060f1SDimitry Andric *KnownVal 4059*fe6060f1SDimitry Andric ? getICmpTrueVal(getTargetLowering(), 4060*fe6060f1SDimitry Andric /*IsVector = */ 4061*fe6060f1SDimitry Andric MRI.getType(MI.getOperand(0).getReg()).isVector(), 4062*fe6060f1SDimitry Andric /* IsFP = */ false) 4063*fe6060f1SDimitry Andric : 0; 4064*fe6060f1SDimitry Andric return true; 4065*fe6060f1SDimitry Andric } 4066*fe6060f1SDimitry Andric 4067*fe6060f1SDimitry Andric /// Form a G_SBFX from a G_SEXT_INREG fed by a right shift. 4068*fe6060f1SDimitry Andric bool CombinerHelper::matchBitfieldExtractFromSExtInReg( 4069*fe6060f1SDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 4070*fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 4071*fe6060f1SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 4072*fe6060f1SDimitry Andric Register Src = MI.getOperand(1).getReg(); 4073*fe6060f1SDimitry Andric LLT Ty = MRI.getType(Src); 4074*fe6060f1SDimitry Andric LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); 4075*fe6060f1SDimitry Andric if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}})) 4076*fe6060f1SDimitry Andric return false; 4077*fe6060f1SDimitry Andric int64_t Width = MI.getOperand(2).getImm(); 4078*fe6060f1SDimitry Andric Register ShiftSrc; 4079*fe6060f1SDimitry Andric int64_t ShiftImm; 4080*fe6060f1SDimitry Andric if (!mi_match( 4081*fe6060f1SDimitry Andric Src, MRI, 4082*fe6060f1SDimitry Andric m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)), 4083*fe6060f1SDimitry Andric m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)))))) 4084*fe6060f1SDimitry Andric return false; 4085*fe6060f1SDimitry Andric if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits()) 4086*fe6060f1SDimitry Andric return false; 4087*fe6060f1SDimitry Andric 4088*fe6060f1SDimitry Andric MatchInfo = [=](MachineIRBuilder &B) { 4089*fe6060f1SDimitry Andric auto Cst1 = B.buildConstant(ExtractTy, ShiftImm); 4090*fe6060f1SDimitry Andric auto Cst2 = B.buildConstant(ExtractTy, Width); 4091*fe6060f1SDimitry Andric B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2); 4092*fe6060f1SDimitry Andric }; 4093*fe6060f1SDimitry Andric return true; 4094*fe6060f1SDimitry Andric } 4095*fe6060f1SDimitry Andric 4096*fe6060f1SDimitry Andric /// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants. 4097*fe6060f1SDimitry Andric bool CombinerHelper::matchBitfieldExtractFromAnd( 4098*fe6060f1SDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 4099*fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_AND); 4100*fe6060f1SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 4101*fe6060f1SDimitry Andric LLT Ty = MRI.getType(Dst); 4102*fe6060f1SDimitry Andric if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal( 4103*fe6060f1SDimitry Andric TargetOpcode::G_UBFX, Ty, Ty)) 4104*fe6060f1SDimitry Andric return false; 4105*fe6060f1SDimitry Andric 4106*fe6060f1SDimitry Andric int64_t AndImm, LSBImm; 4107*fe6060f1SDimitry Andric Register ShiftSrc; 4108*fe6060f1SDimitry Andric const unsigned Size = Ty.getScalarSizeInBits(); 4109*fe6060f1SDimitry Andric if (!mi_match(MI.getOperand(0).getReg(), MRI, 4110*fe6060f1SDimitry Andric m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))), 4111*fe6060f1SDimitry Andric m_ICst(AndImm)))) 4112*fe6060f1SDimitry Andric return false; 4113*fe6060f1SDimitry Andric 4114*fe6060f1SDimitry Andric // The mask is a mask of the low bits iff imm & (imm+1) == 0. 4115*fe6060f1SDimitry Andric auto MaybeMask = static_cast<uint64_t>(AndImm); 4116*fe6060f1SDimitry Andric if (MaybeMask & (MaybeMask + 1)) 4117*fe6060f1SDimitry Andric return false; 4118*fe6060f1SDimitry Andric 4119*fe6060f1SDimitry Andric // LSB must fit within the register. 4120*fe6060f1SDimitry Andric if (static_cast<uint64_t>(LSBImm) >= Size) 4121*fe6060f1SDimitry Andric return false; 4122*fe6060f1SDimitry Andric 4123*fe6060f1SDimitry Andric LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); 4124*fe6060f1SDimitry Andric uint64_t Width = APInt(Size, AndImm).countTrailingOnes(); 4125*fe6060f1SDimitry Andric MatchInfo = [=](MachineIRBuilder &B) { 4126*fe6060f1SDimitry Andric auto WidthCst = B.buildConstant(ExtractTy, Width); 4127*fe6060f1SDimitry Andric auto LSBCst = B.buildConstant(ExtractTy, LSBImm); 4128*fe6060f1SDimitry Andric B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst}); 4129*fe6060f1SDimitry Andric }; 4130*fe6060f1SDimitry Andric return true; 4131*fe6060f1SDimitry Andric } 4132*fe6060f1SDimitry Andric 4133*fe6060f1SDimitry Andric bool CombinerHelper::reassociationCanBreakAddressingModePattern( 4134*fe6060f1SDimitry Andric MachineInstr &PtrAdd) { 4135*fe6060f1SDimitry Andric assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD); 4136*fe6060f1SDimitry Andric 4137*fe6060f1SDimitry Andric Register Src1Reg = PtrAdd.getOperand(1).getReg(); 4138*fe6060f1SDimitry Andric MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI); 4139*fe6060f1SDimitry Andric if (!Src1Def) 4140*fe6060f1SDimitry Andric return false; 4141*fe6060f1SDimitry Andric 4142*fe6060f1SDimitry Andric Register Src2Reg = PtrAdd.getOperand(2).getReg(); 4143*fe6060f1SDimitry Andric 4144*fe6060f1SDimitry Andric if (MRI.hasOneNonDBGUse(Src1Reg)) 4145*fe6060f1SDimitry Andric return false; 4146*fe6060f1SDimitry Andric 4147*fe6060f1SDimitry Andric auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI); 4148*fe6060f1SDimitry Andric if (!C1) 4149*fe6060f1SDimitry Andric return false; 4150*fe6060f1SDimitry Andric auto C2 = getConstantVRegVal(Src2Reg, MRI); 4151*fe6060f1SDimitry Andric if (!C2) 4152*fe6060f1SDimitry Andric return false; 4153*fe6060f1SDimitry Andric 4154*fe6060f1SDimitry Andric const APInt &C1APIntVal = *C1; 4155*fe6060f1SDimitry Andric const APInt &C2APIntVal = *C2; 4156*fe6060f1SDimitry Andric const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue(); 4157*fe6060f1SDimitry Andric 4158*fe6060f1SDimitry Andric for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) { 4159*fe6060f1SDimitry Andric // This combine may end up running before ptrtoint/inttoptr combines 4160*fe6060f1SDimitry Andric // manage to eliminate redundant conversions, so try to look through them. 4161*fe6060f1SDimitry Andric MachineInstr *ConvUseMI = &UseMI; 4162*fe6060f1SDimitry Andric unsigned ConvUseOpc = ConvUseMI->getOpcode(); 4163*fe6060f1SDimitry Andric while (ConvUseOpc == TargetOpcode::G_INTTOPTR || 4164*fe6060f1SDimitry Andric ConvUseOpc == TargetOpcode::G_PTRTOINT) { 4165*fe6060f1SDimitry Andric Register DefReg = ConvUseMI->getOperand(0).getReg(); 4166*fe6060f1SDimitry Andric if (!MRI.hasOneNonDBGUse(DefReg)) 4167*fe6060f1SDimitry Andric break; 4168*fe6060f1SDimitry Andric ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg); 4169*fe6060f1SDimitry Andric ConvUseOpc = ConvUseMI->getOpcode(); 4170*fe6060f1SDimitry Andric } 4171*fe6060f1SDimitry Andric auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD || 4172*fe6060f1SDimitry Andric ConvUseOpc == TargetOpcode::G_STORE; 4173*fe6060f1SDimitry Andric if (!LoadStore) 4174*fe6060f1SDimitry Andric continue; 4175*fe6060f1SDimitry Andric // Is x[offset2] already not a legal addressing mode? If so then 4176*fe6060f1SDimitry Andric // reassociating the constants breaks nothing (we test offset2 because 4177*fe6060f1SDimitry Andric // that's the one we hope to fold into the load or store). 4178*fe6060f1SDimitry Andric TargetLoweringBase::AddrMode AM; 4179*fe6060f1SDimitry Andric AM.HasBaseReg = true; 4180*fe6060f1SDimitry Andric AM.BaseOffs = C2APIntVal.getSExtValue(); 4181*fe6060f1SDimitry Andric unsigned AS = 4182*fe6060f1SDimitry Andric MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace(); 4183*fe6060f1SDimitry Andric Type *AccessTy = 4184*fe6060f1SDimitry Andric getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()), 4185*fe6060f1SDimitry Andric PtrAdd.getMF()->getFunction().getContext()); 4186*fe6060f1SDimitry Andric const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering(); 4187*fe6060f1SDimitry Andric if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM, 4188*fe6060f1SDimitry Andric AccessTy, AS)) 4189*fe6060f1SDimitry Andric continue; 4190*fe6060f1SDimitry Andric 4191*fe6060f1SDimitry Andric // Would x[offset1+offset2] still be a legal addressing mode? 4192*fe6060f1SDimitry Andric AM.BaseOffs = CombinedValue; 4193*fe6060f1SDimitry Andric if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM, 4194*fe6060f1SDimitry Andric AccessTy, AS)) 4195*fe6060f1SDimitry Andric return true; 4196*fe6060f1SDimitry Andric } 4197*fe6060f1SDimitry Andric 4198*fe6060f1SDimitry Andric return false; 4199*fe6060f1SDimitry Andric } 4200*fe6060f1SDimitry Andric 4201*fe6060f1SDimitry Andric bool CombinerHelper::matchReassocPtrAdd( 4202*fe6060f1SDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 4203*fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD); 4204*fe6060f1SDimitry Andric // We're trying to match a few pointer computation patterns here for 4205*fe6060f1SDimitry Andric // re-association opportunities. 4206*fe6060f1SDimitry Andric // 1) Isolating a constant operand to be on the RHS, e.g.: 4207*fe6060f1SDimitry Andric // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C) 4208*fe6060f1SDimitry Andric // 4209*fe6060f1SDimitry Andric // 2) Folding two constants in each sub-tree as long as such folding 4210*fe6060f1SDimitry Andric // doesn't break a legal addressing mode. 4211*fe6060f1SDimitry Andric // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2) 4212*fe6060f1SDimitry Andric Register Src1Reg = MI.getOperand(1).getReg(); 4213*fe6060f1SDimitry Andric Register Src2Reg = MI.getOperand(2).getReg(); 4214*fe6060f1SDimitry Andric MachineInstr *LHS = MRI.getVRegDef(Src1Reg); 4215*fe6060f1SDimitry Andric MachineInstr *RHS = MRI.getVRegDef(Src2Reg); 4216*fe6060f1SDimitry Andric 4217*fe6060f1SDimitry Andric if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) { 4218*fe6060f1SDimitry Andric // Try to match example 1). 4219*fe6060f1SDimitry Andric if (RHS->getOpcode() != TargetOpcode::G_ADD) 4220*fe6060f1SDimitry Andric return false; 4221*fe6060f1SDimitry Andric auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI); 4222*fe6060f1SDimitry Andric if (!C2) 4223*fe6060f1SDimitry Andric return false; 4224*fe6060f1SDimitry Andric 4225*fe6060f1SDimitry Andric MatchInfo = [=,&MI](MachineIRBuilder &B) { 4226*fe6060f1SDimitry Andric LLT PtrTy = MRI.getType(MI.getOperand(0).getReg()); 4227*fe6060f1SDimitry Andric 4228*fe6060f1SDimitry Andric auto NewBase = 4229*fe6060f1SDimitry Andric Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg()); 4230*fe6060f1SDimitry Andric Observer.changingInstr(MI); 4231*fe6060f1SDimitry Andric MI.getOperand(1).setReg(NewBase.getReg(0)); 4232*fe6060f1SDimitry Andric MI.getOperand(2).setReg(RHS->getOperand(2).getReg()); 4233*fe6060f1SDimitry Andric Observer.changedInstr(MI); 4234*fe6060f1SDimitry Andric }; 4235*fe6060f1SDimitry Andric } else { 4236*fe6060f1SDimitry Andric // Try to match example 2. 4237*fe6060f1SDimitry Andric Register LHSSrc1 = LHS->getOperand(1).getReg(); 4238*fe6060f1SDimitry Andric Register LHSSrc2 = LHS->getOperand(2).getReg(); 4239*fe6060f1SDimitry Andric auto C1 = getConstantVRegVal(LHSSrc2, MRI); 4240*fe6060f1SDimitry Andric if (!C1) 4241*fe6060f1SDimitry Andric return false; 4242*fe6060f1SDimitry Andric auto C2 = getConstantVRegVal(Src2Reg, MRI); 4243*fe6060f1SDimitry Andric if (!C2) 4244*fe6060f1SDimitry Andric return false; 4245*fe6060f1SDimitry Andric 4246*fe6060f1SDimitry Andric MatchInfo = [=, &MI](MachineIRBuilder &B) { 4247*fe6060f1SDimitry Andric auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2); 4248*fe6060f1SDimitry Andric Observer.changingInstr(MI); 4249*fe6060f1SDimitry Andric MI.getOperand(1).setReg(LHSSrc1); 4250*fe6060f1SDimitry Andric MI.getOperand(2).setReg(NewCst.getReg(0)); 4251*fe6060f1SDimitry Andric Observer.changedInstr(MI); 4252*fe6060f1SDimitry Andric }; 4253*fe6060f1SDimitry Andric } 4254*fe6060f1SDimitry Andric return !reassociationCanBreakAddressingModePattern(MI); 4255*fe6060f1SDimitry Andric } 4256*fe6060f1SDimitry Andric 4257*fe6060f1SDimitry Andric bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) { 4258*fe6060f1SDimitry Andric Register Op1 = MI.getOperand(1).getReg(); 4259*fe6060f1SDimitry Andric Register Op2 = MI.getOperand(2).getReg(); 4260*fe6060f1SDimitry Andric auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI); 4261*fe6060f1SDimitry Andric if (!MaybeCst) 4262*fe6060f1SDimitry Andric return false; 4263*fe6060f1SDimitry Andric MatchInfo = *MaybeCst; 4264e8d8bef9SDimitry Andric return true; 4265e8d8bef9SDimitry Andric } 4266e8d8bef9SDimitry Andric 42670b57cec5SDimitry Andric bool CombinerHelper::tryCombine(MachineInstr &MI) { 42680b57cec5SDimitry Andric if (tryCombineCopy(MI)) 42690b57cec5SDimitry Andric return true; 42708bcb0991SDimitry Andric if (tryCombineExtendingLoads(MI)) 42718bcb0991SDimitry Andric return true; 42728bcb0991SDimitry Andric if (tryCombineIndexedLoadStore(MI)) 42738bcb0991SDimitry Andric return true; 42748bcb0991SDimitry Andric return false; 42750b57cec5SDimitry Andric } 4276