10b57cec5SDimitry Andric //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 9fe6060f1SDimitry Andric #include "llvm/ADT/SetVector.h" 10fe6060f1SDimitry Andric #include "llvm/ADT/SmallBitVector.h" 110b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h" 120b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 138bcb0991SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 14fe6060f1SDimitry Andric #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 15*349cc55cSDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 165ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 175ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 180b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 19*349cc55cSDimitry Andric #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" 200b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/Utils.h" 21fe6060f1SDimitry Andric #include "llvm/CodeGen/LowLevelType.h" 22fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 238bcb0991SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 248bcb0991SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 26e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h" 270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 280b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 298bcb0991SDimitry Andric #include "llvm/CodeGen/TargetLowering.h" 30fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetOpcodes.h" 31*349cc55cSDimitry Andric #include "llvm/IR/DataLayout.h" 32*349cc55cSDimitry Andric #include "llvm/Support/Casting.h" 33*349cc55cSDimitry Andric #include "llvm/Support/DivisionByConstantInfo.h" 345ffd83dbSDimitry Andric #include "llvm/Support/MathExtras.h" 35fe6060f1SDimitry Andric #include <tuple> 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric #define DEBUG_TYPE "gi-combiner" 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric using namespace llvm; 405ffd83dbSDimitry Andric using namespace MIPatternMatch; 410b57cec5SDimitry Andric 428bcb0991SDimitry Andric // Option to allow testing of the combiner while no targets know about indexed 438bcb0991SDimitry Andric // addressing. 448bcb0991SDimitry Andric static cl::opt<bool> 458bcb0991SDimitry Andric ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), 468bcb0991SDimitry Andric cl::desc("Force all indexed operations to be " 478bcb0991SDimitry Andric "legal for the GlobalISel combiner")); 488bcb0991SDimitry Andric 490b57cec5SDimitry Andric CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, 508bcb0991SDimitry Andric MachineIRBuilder &B, GISelKnownBits *KB, 515ffd83dbSDimitry Andric MachineDominatorTree *MDT, 525ffd83dbSDimitry Andric const LegalizerInfo *LI) 53*349cc55cSDimitry Andric : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB), 54*349cc55cSDimitry Andric MDT(MDT), LI(LI), RBI(Builder.getMF().getSubtarget().getRegBankInfo()), 55*349cc55cSDimitry Andric TRI(Builder.getMF().getSubtarget().getRegisterInfo()) { 568bcb0991SDimitry Andric (void)this->KB; 578bcb0991SDimitry Andric } 580b57cec5SDimitry Andric 59e8d8bef9SDimitry Andric const TargetLowering &CombinerHelper::getTargetLowering() const { 60e8d8bef9SDimitry Andric return *Builder.getMF().getSubtarget().getTargetLowering(); 61e8d8bef9SDimitry Andric } 62e8d8bef9SDimitry Andric 63e8d8bef9SDimitry Andric /// \returns The little endian in-memory byte position of byte \p I in a 64e8d8bef9SDimitry Andric /// \p ByteWidth bytes wide type. 65e8d8bef9SDimitry Andric /// 66e8d8bef9SDimitry Andric /// E.g. Given a 4-byte type x, x[0] -> byte 0 67e8d8bef9SDimitry Andric static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) { 68e8d8bef9SDimitry Andric assert(I < ByteWidth && "I must be in [0, ByteWidth)"); 69e8d8bef9SDimitry Andric return I; 70e8d8bef9SDimitry Andric } 71e8d8bef9SDimitry Andric 72*349cc55cSDimitry Andric /// Determines the LogBase2 value for a non-null input value using the 73*349cc55cSDimitry Andric /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V). 74*349cc55cSDimitry Andric static Register buildLogBase2(Register V, MachineIRBuilder &MIB) { 75*349cc55cSDimitry Andric auto &MRI = *MIB.getMRI(); 76*349cc55cSDimitry Andric LLT Ty = MRI.getType(V); 77*349cc55cSDimitry Andric auto Ctlz = MIB.buildCTLZ(Ty, V); 78*349cc55cSDimitry Andric auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1); 79*349cc55cSDimitry Andric return MIB.buildSub(Ty, Base, Ctlz).getReg(0); 80*349cc55cSDimitry Andric } 81*349cc55cSDimitry Andric 82e8d8bef9SDimitry Andric /// \returns The big endian in-memory byte position of byte \p I in a 83e8d8bef9SDimitry Andric /// \p ByteWidth bytes wide type. 84e8d8bef9SDimitry Andric /// 85e8d8bef9SDimitry Andric /// E.g. Given a 4-byte type x, x[0] -> byte 3 86e8d8bef9SDimitry Andric static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) { 87e8d8bef9SDimitry Andric assert(I < ByteWidth && "I must be in [0, ByteWidth)"); 88e8d8bef9SDimitry Andric return ByteWidth - I - 1; 89e8d8bef9SDimitry Andric } 90e8d8bef9SDimitry Andric 91e8d8bef9SDimitry Andric /// Given a map from byte offsets in memory to indices in a load/store, 92e8d8bef9SDimitry Andric /// determine if that map corresponds to a little or big endian byte pattern. 93e8d8bef9SDimitry Andric /// 94e8d8bef9SDimitry Andric /// \param MemOffset2Idx maps memory offsets to address offsets. 95e8d8bef9SDimitry Andric /// \param LowestIdx is the lowest index in \p MemOffset2Idx. 96e8d8bef9SDimitry Andric /// 97e8d8bef9SDimitry Andric /// \returns true if the map corresponds to a big endian byte pattern, false 98e8d8bef9SDimitry Andric /// if it corresponds to a little endian byte pattern, and None otherwise. 99e8d8bef9SDimitry Andric /// 100e8d8bef9SDimitry Andric /// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns 101e8d8bef9SDimitry Andric /// are as follows: 102e8d8bef9SDimitry Andric /// 103e8d8bef9SDimitry Andric /// AddrOffset Little endian Big endian 104e8d8bef9SDimitry Andric /// 0 0 3 105e8d8bef9SDimitry Andric /// 1 1 2 106e8d8bef9SDimitry Andric /// 2 2 1 107e8d8bef9SDimitry Andric /// 3 3 0 108e8d8bef9SDimitry Andric static Optional<bool> 109e8d8bef9SDimitry Andric isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, 110e8d8bef9SDimitry Andric int64_t LowestIdx) { 111e8d8bef9SDimitry Andric // Need at least two byte positions to decide on endianness. 112e8d8bef9SDimitry Andric unsigned Width = MemOffset2Idx.size(); 113e8d8bef9SDimitry Andric if (Width < 2) 114e8d8bef9SDimitry Andric return None; 115e8d8bef9SDimitry Andric bool BigEndian = true, LittleEndian = true; 116e8d8bef9SDimitry Andric for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) { 117e8d8bef9SDimitry Andric auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset); 118e8d8bef9SDimitry Andric if (MemOffsetAndIdx == MemOffset2Idx.end()) 119e8d8bef9SDimitry Andric return None; 120e8d8bef9SDimitry Andric const int64_t Idx = MemOffsetAndIdx->second - LowestIdx; 121e8d8bef9SDimitry Andric assert(Idx >= 0 && "Expected non-negative byte offset?"); 122e8d8bef9SDimitry Andric LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset); 123e8d8bef9SDimitry Andric BigEndian &= Idx == bigEndianByteAt(Width, MemOffset); 124e8d8bef9SDimitry Andric if (!BigEndian && !LittleEndian) 125e8d8bef9SDimitry Andric return None; 126e8d8bef9SDimitry Andric } 127e8d8bef9SDimitry Andric 128e8d8bef9SDimitry Andric assert((BigEndian != LittleEndian) && 129e8d8bef9SDimitry Andric "Pattern cannot be both big and little endian!"); 130e8d8bef9SDimitry Andric return BigEndian; 131e8d8bef9SDimitry Andric } 132e8d8bef9SDimitry Andric 133e8d8bef9SDimitry Andric bool CombinerHelper::isLegalOrBeforeLegalizer( 134e8d8bef9SDimitry Andric const LegalityQuery &Query) const { 135e8d8bef9SDimitry Andric return !LI || LI->getAction(Query).Action == LegalizeActions::Legal; 136e8d8bef9SDimitry Andric } 137e8d8bef9SDimitry Andric 1380b57cec5SDimitry Andric void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, 1390b57cec5SDimitry Andric Register ToReg) const { 1400b57cec5SDimitry Andric Observer.changingAllUsesOfReg(MRI, FromReg); 1410b57cec5SDimitry Andric 1420b57cec5SDimitry Andric if (MRI.constrainRegAttrs(ToReg, FromReg)) 1430b57cec5SDimitry Andric MRI.replaceRegWith(FromReg, ToReg); 1440b57cec5SDimitry Andric else 1450b57cec5SDimitry Andric Builder.buildCopy(ToReg, FromReg); 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric Observer.finishedChangingAllUsesOfReg(); 1480b57cec5SDimitry Andric } 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI, 1510b57cec5SDimitry Andric MachineOperand &FromRegOp, 1520b57cec5SDimitry Andric Register ToReg) const { 1530b57cec5SDimitry Andric assert(FromRegOp.getParent() && "Expected an operand in an MI"); 1540b57cec5SDimitry Andric Observer.changingInstr(*FromRegOp.getParent()); 1550b57cec5SDimitry Andric 1560b57cec5SDimitry Andric FromRegOp.setReg(ToReg); 1570b57cec5SDimitry Andric 1580b57cec5SDimitry Andric Observer.changedInstr(*FromRegOp.getParent()); 1590b57cec5SDimitry Andric } 1600b57cec5SDimitry Andric 161*349cc55cSDimitry Andric void CombinerHelper::replaceOpcodeWith(MachineInstr &FromMI, 162*349cc55cSDimitry Andric unsigned ToOpcode) const { 163*349cc55cSDimitry Andric Observer.changingInstr(FromMI); 164*349cc55cSDimitry Andric 165*349cc55cSDimitry Andric FromMI.setDesc(Builder.getTII().get(ToOpcode)); 166*349cc55cSDimitry Andric 167*349cc55cSDimitry Andric Observer.changedInstr(FromMI); 168*349cc55cSDimitry Andric } 169*349cc55cSDimitry Andric 170*349cc55cSDimitry Andric const RegisterBank *CombinerHelper::getRegBank(Register Reg) const { 171*349cc55cSDimitry Andric return RBI->getRegBank(Reg, MRI, *TRI); 172*349cc55cSDimitry Andric } 173*349cc55cSDimitry Andric 174*349cc55cSDimitry Andric void CombinerHelper::setRegBank(Register Reg, const RegisterBank *RegBank) { 175*349cc55cSDimitry Andric if (RegBank) 176*349cc55cSDimitry Andric MRI.setRegBank(Reg, *RegBank); 177*349cc55cSDimitry Andric } 178*349cc55cSDimitry Andric 1790b57cec5SDimitry Andric bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { 1800b57cec5SDimitry Andric if (matchCombineCopy(MI)) { 1810b57cec5SDimitry Andric applyCombineCopy(MI); 1820b57cec5SDimitry Andric return true; 1830b57cec5SDimitry Andric } 1840b57cec5SDimitry Andric return false; 1850b57cec5SDimitry Andric } 1860b57cec5SDimitry Andric bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { 1870b57cec5SDimitry Andric if (MI.getOpcode() != TargetOpcode::COPY) 1880b57cec5SDimitry Andric return false; 1898bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1908bcb0991SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1915ffd83dbSDimitry Andric return canReplaceReg(DstReg, SrcReg, MRI); 1920b57cec5SDimitry Andric } 1930b57cec5SDimitry Andric void CombinerHelper::applyCombineCopy(MachineInstr &MI) { 1948bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1958bcb0991SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1960b57cec5SDimitry Andric MI.eraseFromParent(); 1970b57cec5SDimitry Andric replaceRegWith(MRI, DstReg, SrcReg); 1980b57cec5SDimitry Andric } 1990b57cec5SDimitry Andric 2008bcb0991SDimitry Andric bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) { 2018bcb0991SDimitry Andric bool IsUndef = false; 2028bcb0991SDimitry Andric SmallVector<Register, 4> Ops; 2038bcb0991SDimitry Andric if (matchCombineConcatVectors(MI, IsUndef, Ops)) { 2048bcb0991SDimitry Andric applyCombineConcatVectors(MI, IsUndef, Ops); 2058bcb0991SDimitry Andric return true; 2068bcb0991SDimitry Andric } 2078bcb0991SDimitry Andric return false; 2088bcb0991SDimitry Andric } 2098bcb0991SDimitry Andric 2108bcb0991SDimitry Andric bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, 2118bcb0991SDimitry Andric SmallVectorImpl<Register> &Ops) { 2128bcb0991SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && 2138bcb0991SDimitry Andric "Invalid instruction"); 2148bcb0991SDimitry Andric IsUndef = true; 2158bcb0991SDimitry Andric MachineInstr *Undef = nullptr; 2168bcb0991SDimitry Andric 2178bcb0991SDimitry Andric // Walk over all the operands of concat vectors and check if they are 2188bcb0991SDimitry Andric // build_vector themselves or undef. 2198bcb0991SDimitry Andric // Then collect their operands in Ops. 220480093f4SDimitry Andric for (const MachineOperand &MO : MI.uses()) { 2218bcb0991SDimitry Andric Register Reg = MO.getReg(); 2228bcb0991SDimitry Andric MachineInstr *Def = MRI.getVRegDef(Reg); 2238bcb0991SDimitry Andric assert(Def && "Operand not defined"); 2248bcb0991SDimitry Andric switch (Def->getOpcode()) { 2258bcb0991SDimitry Andric case TargetOpcode::G_BUILD_VECTOR: 2268bcb0991SDimitry Andric IsUndef = false; 2278bcb0991SDimitry Andric // Remember the operands of the build_vector to fold 2288bcb0991SDimitry Andric // them into the yet-to-build flattened concat vectors. 229480093f4SDimitry Andric for (const MachineOperand &BuildVecMO : Def->uses()) 2308bcb0991SDimitry Andric Ops.push_back(BuildVecMO.getReg()); 2318bcb0991SDimitry Andric break; 2328bcb0991SDimitry Andric case TargetOpcode::G_IMPLICIT_DEF: { 2338bcb0991SDimitry Andric LLT OpType = MRI.getType(Reg); 2348bcb0991SDimitry Andric // Keep one undef value for all the undef operands. 2358bcb0991SDimitry Andric if (!Undef) { 2368bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 2378bcb0991SDimitry Andric Undef = Builder.buildUndef(OpType.getScalarType()); 2388bcb0991SDimitry Andric } 2398bcb0991SDimitry Andric assert(MRI.getType(Undef->getOperand(0).getReg()) == 2408bcb0991SDimitry Andric OpType.getScalarType() && 2418bcb0991SDimitry Andric "All undefs should have the same type"); 2428bcb0991SDimitry Andric // Break the undef vector in as many scalar elements as needed 2438bcb0991SDimitry Andric // for the flattening. 2448bcb0991SDimitry Andric for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements(); 2458bcb0991SDimitry Andric EltIdx != EltEnd; ++EltIdx) 2468bcb0991SDimitry Andric Ops.push_back(Undef->getOperand(0).getReg()); 2478bcb0991SDimitry Andric break; 2488bcb0991SDimitry Andric } 2498bcb0991SDimitry Andric default: 2508bcb0991SDimitry Andric return false; 2518bcb0991SDimitry Andric } 2528bcb0991SDimitry Andric } 2538bcb0991SDimitry Andric return true; 2548bcb0991SDimitry Andric } 2558bcb0991SDimitry Andric void CombinerHelper::applyCombineConcatVectors( 2568bcb0991SDimitry Andric MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) { 2578bcb0991SDimitry Andric // We determined that the concat_vectors can be flatten. 2588bcb0991SDimitry Andric // Generate the flattened build_vector. 2598bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2608bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 2618bcb0991SDimitry Andric Register NewDstReg = MRI.cloneVirtualRegister(DstReg); 2628bcb0991SDimitry Andric 2638bcb0991SDimitry Andric // Note: IsUndef is sort of redundant. We could have determine it by 2648bcb0991SDimitry Andric // checking that at all Ops are undef. Alternatively, we could have 2658bcb0991SDimitry Andric // generate a build_vector of undefs and rely on another combine to 2668bcb0991SDimitry Andric // clean that up. For now, given we already gather this information 2678bcb0991SDimitry Andric // in tryCombineConcatVectors, just save compile time and issue the 2688bcb0991SDimitry Andric // right thing. 2698bcb0991SDimitry Andric if (IsUndef) 2708bcb0991SDimitry Andric Builder.buildUndef(NewDstReg); 2718bcb0991SDimitry Andric else 2728bcb0991SDimitry Andric Builder.buildBuildVector(NewDstReg, Ops); 2738bcb0991SDimitry Andric MI.eraseFromParent(); 2748bcb0991SDimitry Andric replaceRegWith(MRI, DstReg, NewDstReg); 2758bcb0991SDimitry Andric } 2768bcb0991SDimitry Andric 2778bcb0991SDimitry Andric bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { 2788bcb0991SDimitry Andric SmallVector<Register, 4> Ops; 2798bcb0991SDimitry Andric if (matchCombineShuffleVector(MI, Ops)) { 2808bcb0991SDimitry Andric applyCombineShuffleVector(MI, Ops); 2818bcb0991SDimitry Andric return true; 2828bcb0991SDimitry Andric } 2838bcb0991SDimitry Andric return false; 2848bcb0991SDimitry Andric } 2858bcb0991SDimitry Andric 2868bcb0991SDimitry Andric bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, 2878bcb0991SDimitry Andric SmallVectorImpl<Register> &Ops) { 2888bcb0991SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && 2898bcb0991SDimitry Andric "Invalid instruction kind"); 2908bcb0991SDimitry Andric LLT DstType = MRI.getType(MI.getOperand(0).getReg()); 2918bcb0991SDimitry Andric Register Src1 = MI.getOperand(1).getReg(); 2928bcb0991SDimitry Andric LLT SrcType = MRI.getType(Src1); 293480093f4SDimitry Andric // As bizarre as it may look, shuffle vector can actually produce 294480093f4SDimitry Andric // scalar! This is because at the IR level a <1 x ty> shuffle 295480093f4SDimitry Andric // vector is perfectly valid. 296480093f4SDimitry Andric unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1; 297480093f4SDimitry Andric unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1; 2988bcb0991SDimitry Andric 2998bcb0991SDimitry Andric // If the resulting vector is smaller than the size of the source 3008bcb0991SDimitry Andric // vectors being concatenated, we won't be able to replace the 3018bcb0991SDimitry Andric // shuffle vector into a concat_vectors. 3028bcb0991SDimitry Andric // 3038bcb0991SDimitry Andric // Note: We may still be able to produce a concat_vectors fed by 3048bcb0991SDimitry Andric // extract_vector_elt and so on. It is less clear that would 3058bcb0991SDimitry Andric // be better though, so don't bother for now. 306480093f4SDimitry Andric // 307480093f4SDimitry Andric // If the destination is a scalar, the size of the sources doesn't 308480093f4SDimitry Andric // matter. we will lower the shuffle to a plain copy. This will 309480093f4SDimitry Andric // work only if the source and destination have the same size. But 310480093f4SDimitry Andric // that's covered by the next condition. 311480093f4SDimitry Andric // 312480093f4SDimitry Andric // TODO: If the size between the source and destination don't match 313480093f4SDimitry Andric // we could still emit an extract vector element in that case. 314480093f4SDimitry Andric if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1) 3158bcb0991SDimitry Andric return false; 3168bcb0991SDimitry Andric 3178bcb0991SDimitry Andric // Check that the shuffle mask can be broken evenly between the 3188bcb0991SDimitry Andric // different sources. 3198bcb0991SDimitry Andric if (DstNumElts % SrcNumElts != 0) 3208bcb0991SDimitry Andric return false; 3218bcb0991SDimitry Andric 3228bcb0991SDimitry Andric // Mask length is a multiple of the source vector length. 3238bcb0991SDimitry Andric // Check if the shuffle is some kind of concatenation of the input 3248bcb0991SDimitry Andric // vectors. 3258bcb0991SDimitry Andric unsigned NumConcat = DstNumElts / SrcNumElts; 3268bcb0991SDimitry Andric SmallVector<int, 8> ConcatSrcs(NumConcat, -1); 327480093f4SDimitry Andric ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 3288bcb0991SDimitry Andric for (unsigned i = 0; i != DstNumElts; ++i) { 3298bcb0991SDimitry Andric int Idx = Mask[i]; 3308bcb0991SDimitry Andric // Undef value. 3318bcb0991SDimitry Andric if (Idx < 0) 3328bcb0991SDimitry Andric continue; 3338bcb0991SDimitry Andric // Ensure the indices in each SrcType sized piece are sequential and that 3348bcb0991SDimitry Andric // the same source is used for the whole piece. 3358bcb0991SDimitry Andric if ((Idx % SrcNumElts != (i % SrcNumElts)) || 3368bcb0991SDimitry Andric (ConcatSrcs[i / SrcNumElts] >= 0 && 3378bcb0991SDimitry Andric ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) 3388bcb0991SDimitry Andric return false; 3398bcb0991SDimitry Andric // Remember which source this index came from. 3408bcb0991SDimitry Andric ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; 3418bcb0991SDimitry Andric } 3428bcb0991SDimitry Andric 3438bcb0991SDimitry Andric // The shuffle is concatenating multiple vectors together. 3448bcb0991SDimitry Andric // Collect the different operands for that. 3458bcb0991SDimitry Andric Register UndefReg; 3468bcb0991SDimitry Andric Register Src2 = MI.getOperand(2).getReg(); 3478bcb0991SDimitry Andric for (auto Src : ConcatSrcs) { 3488bcb0991SDimitry Andric if (Src < 0) { 3498bcb0991SDimitry Andric if (!UndefReg) { 3508bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 3518bcb0991SDimitry Andric UndefReg = Builder.buildUndef(SrcType).getReg(0); 3528bcb0991SDimitry Andric } 3538bcb0991SDimitry Andric Ops.push_back(UndefReg); 3548bcb0991SDimitry Andric } else if (Src == 0) 3558bcb0991SDimitry Andric Ops.push_back(Src1); 3568bcb0991SDimitry Andric else 3578bcb0991SDimitry Andric Ops.push_back(Src2); 3588bcb0991SDimitry Andric } 3598bcb0991SDimitry Andric return true; 3608bcb0991SDimitry Andric } 3618bcb0991SDimitry Andric 3628bcb0991SDimitry Andric void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI, 3638bcb0991SDimitry Andric const ArrayRef<Register> Ops) { 3648bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3658bcb0991SDimitry Andric Builder.setInsertPt(*MI.getParent(), MI); 3668bcb0991SDimitry Andric Register NewDstReg = MRI.cloneVirtualRegister(DstReg); 3678bcb0991SDimitry Andric 368480093f4SDimitry Andric if (Ops.size() == 1) 369480093f4SDimitry Andric Builder.buildCopy(NewDstReg, Ops[0]); 370480093f4SDimitry Andric else 371480093f4SDimitry Andric Builder.buildMerge(NewDstReg, Ops); 3728bcb0991SDimitry Andric 3738bcb0991SDimitry Andric MI.eraseFromParent(); 3748bcb0991SDimitry Andric replaceRegWith(MRI, DstReg, NewDstReg); 3758bcb0991SDimitry Andric } 3768bcb0991SDimitry Andric 3770b57cec5SDimitry Andric namespace { 3780b57cec5SDimitry Andric 3790b57cec5SDimitry Andric /// Select a preference between two uses. CurrentUse is the current preference 3800b57cec5SDimitry Andric /// while *ForCandidate is attributes of the candidate under consideration. 3810b57cec5SDimitry Andric PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse, 3825ffd83dbSDimitry Andric const LLT TyForCandidate, 3830b57cec5SDimitry Andric unsigned OpcodeForCandidate, 3840b57cec5SDimitry Andric MachineInstr *MIForCandidate) { 3850b57cec5SDimitry Andric if (!CurrentUse.Ty.isValid()) { 3860b57cec5SDimitry Andric if (CurrentUse.ExtendOpcode == OpcodeForCandidate || 3870b57cec5SDimitry Andric CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT) 3880b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 3890b57cec5SDimitry Andric return CurrentUse; 3900b57cec5SDimitry Andric } 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andric // We permit the extend to hoist through basic blocks but this is only 3930b57cec5SDimitry Andric // sensible if the target has extending loads. If you end up lowering back 3940b57cec5SDimitry Andric // into a load and extend during the legalizer then the end result is 3950b57cec5SDimitry Andric // hoisting the extend up to the load. 3960b57cec5SDimitry Andric 3970b57cec5SDimitry Andric // Prefer defined extensions to undefined extensions as these are more 3980b57cec5SDimitry Andric // likely to reduce the number of instructions. 3990b57cec5SDimitry Andric if (OpcodeForCandidate == TargetOpcode::G_ANYEXT && 4000b57cec5SDimitry Andric CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT) 4010b57cec5SDimitry Andric return CurrentUse; 4020b57cec5SDimitry Andric else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT && 4030b57cec5SDimitry Andric OpcodeForCandidate != TargetOpcode::G_ANYEXT) 4040b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 4050b57cec5SDimitry Andric 4060b57cec5SDimitry Andric // Prefer sign extensions to zero extensions as sign-extensions tend to be 4070b57cec5SDimitry Andric // more expensive. 4080b57cec5SDimitry Andric if (CurrentUse.Ty == TyForCandidate) { 4090b57cec5SDimitry Andric if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT && 4100b57cec5SDimitry Andric OpcodeForCandidate == TargetOpcode::G_ZEXT) 4110b57cec5SDimitry Andric return CurrentUse; 4120b57cec5SDimitry Andric else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT && 4130b57cec5SDimitry Andric OpcodeForCandidate == TargetOpcode::G_SEXT) 4140b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 4150b57cec5SDimitry Andric } 4160b57cec5SDimitry Andric 4170b57cec5SDimitry Andric // This is potentially target specific. We've chosen the largest type 4180b57cec5SDimitry Andric // because G_TRUNC is usually free. One potential catch with this is that 4190b57cec5SDimitry Andric // some targets have a reduced number of larger registers than smaller 4200b57cec5SDimitry Andric // registers and this choice potentially increases the live-range for the 4210b57cec5SDimitry Andric // larger value. 4220b57cec5SDimitry Andric if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) { 4230b57cec5SDimitry Andric return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 4240b57cec5SDimitry Andric } 4250b57cec5SDimitry Andric return CurrentUse; 4260b57cec5SDimitry Andric } 4270b57cec5SDimitry Andric 4280b57cec5SDimitry Andric /// Find a suitable place to insert some instructions and insert them. This 4290b57cec5SDimitry Andric /// function accounts for special cases like inserting before a PHI node. 4300b57cec5SDimitry Andric /// The current strategy for inserting before PHI's is to duplicate the 4310b57cec5SDimitry Andric /// instructions for each predecessor. However, while that's ok for G_TRUNC 4320b57cec5SDimitry Andric /// on most targets since it generally requires no code, other targets/cases may 4330b57cec5SDimitry Andric /// want to try harder to find a dominating block. 4340b57cec5SDimitry Andric static void InsertInsnsWithoutSideEffectsBeforeUse( 4350b57cec5SDimitry Andric MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO, 4360b57cec5SDimitry Andric std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator, 4370b57cec5SDimitry Andric MachineOperand &UseMO)> 4380b57cec5SDimitry Andric Inserter) { 4390b57cec5SDimitry Andric MachineInstr &UseMI = *UseMO.getParent(); 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andric MachineBasicBlock *InsertBB = UseMI.getParent(); 4420b57cec5SDimitry Andric 4430b57cec5SDimitry Andric // If the use is a PHI then we want the predecessor block instead. 4440b57cec5SDimitry Andric if (UseMI.isPHI()) { 4450b57cec5SDimitry Andric MachineOperand *PredBB = std::next(&UseMO); 4460b57cec5SDimitry Andric InsertBB = PredBB->getMBB(); 4470b57cec5SDimitry Andric } 4480b57cec5SDimitry Andric 4490b57cec5SDimitry Andric // If the block is the same block as the def then we want to insert just after 4500b57cec5SDimitry Andric // the def instead of at the start of the block. 4510b57cec5SDimitry Andric if (InsertBB == DefMI.getParent()) { 4520b57cec5SDimitry Andric MachineBasicBlock::iterator InsertPt = &DefMI; 4530b57cec5SDimitry Andric Inserter(InsertBB, std::next(InsertPt), UseMO); 4540b57cec5SDimitry Andric return; 4550b57cec5SDimitry Andric } 4560b57cec5SDimitry Andric 4570b57cec5SDimitry Andric // Otherwise we want the start of the BB 4580b57cec5SDimitry Andric Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO); 4590b57cec5SDimitry Andric } 4600b57cec5SDimitry Andric } // end anonymous namespace 4610b57cec5SDimitry Andric 4620b57cec5SDimitry Andric bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { 4630b57cec5SDimitry Andric PreferredTuple Preferred; 4640b57cec5SDimitry Andric if (matchCombineExtendingLoads(MI, Preferred)) { 4650b57cec5SDimitry Andric applyCombineExtendingLoads(MI, Preferred); 4660b57cec5SDimitry Andric return true; 4670b57cec5SDimitry Andric } 4680b57cec5SDimitry Andric return false; 4690b57cec5SDimitry Andric } 4700b57cec5SDimitry Andric 4710b57cec5SDimitry Andric bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, 4720b57cec5SDimitry Andric PreferredTuple &Preferred) { 4730b57cec5SDimitry Andric // We match the loads and follow the uses to the extend instead of matching 4740b57cec5SDimitry Andric // the extends and following the def to the load. This is because the load 4750b57cec5SDimitry Andric // must remain in the same position for correctness (unless we also add code 4760b57cec5SDimitry Andric // to find a safe place to sink it) whereas the extend is freely movable. 4770b57cec5SDimitry Andric // It also prevents us from duplicating the load for the volatile case or just 4780b57cec5SDimitry Andric // for performance. 479fe6060f1SDimitry Andric GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI); 480fe6060f1SDimitry Andric if (!LoadMI) 4810b57cec5SDimitry Andric return false; 4820b57cec5SDimitry Andric 483fe6060f1SDimitry Andric Register LoadReg = LoadMI->getDstReg(); 4840b57cec5SDimitry Andric 485fe6060f1SDimitry Andric LLT LoadValueTy = MRI.getType(LoadReg); 4860b57cec5SDimitry Andric if (!LoadValueTy.isScalar()) 4870b57cec5SDimitry Andric return false; 4880b57cec5SDimitry Andric 4890b57cec5SDimitry Andric // Most architectures are going to legalize <s8 loads into at least a 1 byte 4900b57cec5SDimitry Andric // load, and the MMOs can only describe memory accesses in multiples of bytes. 4910b57cec5SDimitry Andric // If we try to perform extload combining on those, we can end up with 4920b57cec5SDimitry Andric // %a(s8) = extload %ptr (load 1 byte from %ptr) 4930b57cec5SDimitry Andric // ... which is an illegal extload instruction. 4940b57cec5SDimitry Andric if (LoadValueTy.getSizeInBits() < 8) 4950b57cec5SDimitry Andric return false; 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andric // For non power-of-2 types, they will very likely be legalized into multiple 4980b57cec5SDimitry Andric // loads. Don't bother trying to match them into extending loads. 4990b57cec5SDimitry Andric if (!isPowerOf2_32(LoadValueTy.getSizeInBits())) 5000b57cec5SDimitry Andric return false; 5010b57cec5SDimitry Andric 5020b57cec5SDimitry Andric // Find the preferred type aside from the any-extends (unless it's the only 5030b57cec5SDimitry Andric // one) and non-extending ops. We'll emit an extending load to that type and 5040b57cec5SDimitry Andric // and emit a variant of (extend (trunc X)) for the others according to the 5050b57cec5SDimitry Andric // relative type sizes. At the same time, pick an extend to use based on the 5060b57cec5SDimitry Andric // extend involved in the chosen type. 507fe6060f1SDimitry Andric unsigned PreferredOpcode = 508fe6060f1SDimitry Andric isa<GLoad>(&MI) 5090b57cec5SDimitry Andric ? TargetOpcode::G_ANYEXT 510fe6060f1SDimitry Andric : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 5110b57cec5SDimitry Andric Preferred = {LLT(), PreferredOpcode, nullptr}; 512fe6060f1SDimitry Andric for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) { 5130b57cec5SDimitry Andric if (UseMI.getOpcode() == TargetOpcode::G_SEXT || 5140b57cec5SDimitry Andric UseMI.getOpcode() == TargetOpcode::G_ZEXT || 5155ffd83dbSDimitry Andric (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) { 516fe6060f1SDimitry Andric const auto &MMO = LoadMI->getMMO(); 517fe6060f1SDimitry Andric // For atomics, only form anyextending loads. 518fe6060f1SDimitry Andric if (MMO.isAtomic() && UseMI.getOpcode() != TargetOpcode::G_ANYEXT) 519fe6060f1SDimitry Andric continue; 5205ffd83dbSDimitry Andric // Check for legality. 5215ffd83dbSDimitry Andric if (LI) { 522*349cc55cSDimitry Andric LegalityQuery::MemDesc MMDesc(MMO); 5235ffd83dbSDimitry Andric LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg()); 524fe6060f1SDimitry Andric LLT SrcTy = MRI.getType(LoadMI->getPointerReg()); 525fe6060f1SDimitry Andric if (LI->getAction({LoadMI->getOpcode(), {UseTy, SrcTy}, {MMDesc}}) 526fe6060f1SDimitry Andric .Action != LegalizeActions::Legal) 5275ffd83dbSDimitry Andric continue; 5285ffd83dbSDimitry Andric } 5290b57cec5SDimitry Andric Preferred = ChoosePreferredUse(Preferred, 5300b57cec5SDimitry Andric MRI.getType(UseMI.getOperand(0).getReg()), 5310b57cec5SDimitry Andric UseMI.getOpcode(), &UseMI); 5320b57cec5SDimitry Andric } 5330b57cec5SDimitry Andric } 5340b57cec5SDimitry Andric 5350b57cec5SDimitry Andric // There were no extends 5360b57cec5SDimitry Andric if (!Preferred.MI) 5370b57cec5SDimitry Andric return false; 5380b57cec5SDimitry Andric // It should be impossible to chose an extend without selecting a different 5390b57cec5SDimitry Andric // type since by definition the result of an extend is larger. 5400b57cec5SDimitry Andric assert(Preferred.Ty != LoadValueTy && "Extending to same type?"); 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI); 5430b57cec5SDimitry Andric return true; 5440b57cec5SDimitry Andric } 5450b57cec5SDimitry Andric 5460b57cec5SDimitry Andric void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, 5470b57cec5SDimitry Andric PreferredTuple &Preferred) { 5480b57cec5SDimitry Andric // Rewrite the load to the chosen extending load. 5490b57cec5SDimitry Andric Register ChosenDstReg = Preferred.MI->getOperand(0).getReg(); 5500b57cec5SDimitry Andric 5510b57cec5SDimitry Andric // Inserter to insert a truncate back to the original type at a given point 5520b57cec5SDimitry Andric // with some basic CSE to limit truncate duplication to one per BB. 5530b57cec5SDimitry Andric DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns; 5540b57cec5SDimitry Andric auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB, 5550b57cec5SDimitry Andric MachineBasicBlock::iterator InsertBefore, 5560b57cec5SDimitry Andric MachineOperand &UseMO) { 5570b57cec5SDimitry Andric MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB); 5580b57cec5SDimitry Andric if (PreviouslyEmitted) { 5590b57cec5SDimitry Andric Observer.changingInstr(*UseMO.getParent()); 5600b57cec5SDimitry Andric UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg()); 5610b57cec5SDimitry Andric Observer.changedInstr(*UseMO.getParent()); 5620b57cec5SDimitry Andric return; 5630b57cec5SDimitry Andric } 5640b57cec5SDimitry Andric 5650b57cec5SDimitry Andric Builder.setInsertPt(*InsertIntoBB, InsertBefore); 5660b57cec5SDimitry Andric Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); 5670b57cec5SDimitry Andric MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg); 5680b57cec5SDimitry Andric EmittedInsns[InsertIntoBB] = NewMI; 5690b57cec5SDimitry Andric replaceRegOpWith(MRI, UseMO, NewDstReg); 5700b57cec5SDimitry Andric }; 5710b57cec5SDimitry Andric 5720b57cec5SDimitry Andric Observer.changingInstr(MI); 5730b57cec5SDimitry Andric MI.setDesc( 5740b57cec5SDimitry Andric Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT 5750b57cec5SDimitry Andric ? TargetOpcode::G_SEXTLOAD 5760b57cec5SDimitry Andric : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT 5770b57cec5SDimitry Andric ? TargetOpcode::G_ZEXTLOAD 5780b57cec5SDimitry Andric : TargetOpcode::G_LOAD)); 5790b57cec5SDimitry Andric 5800b57cec5SDimitry Andric // Rewrite all the uses to fix up the types. 5810b57cec5SDimitry Andric auto &LoadValue = MI.getOperand(0); 5820b57cec5SDimitry Andric SmallVector<MachineOperand *, 4> Uses; 5830b57cec5SDimitry Andric for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) 5840b57cec5SDimitry Andric Uses.push_back(&UseMO); 5850b57cec5SDimitry Andric 5860b57cec5SDimitry Andric for (auto *UseMO : Uses) { 5870b57cec5SDimitry Andric MachineInstr *UseMI = UseMO->getParent(); 5880b57cec5SDimitry Andric 5890b57cec5SDimitry Andric // If the extend is compatible with the preferred extend then we should fix 5900b57cec5SDimitry Andric // up the type and extend so that it uses the preferred use. 5910b57cec5SDimitry Andric if (UseMI->getOpcode() == Preferred.ExtendOpcode || 5920b57cec5SDimitry Andric UseMI->getOpcode() == TargetOpcode::G_ANYEXT) { 5938bcb0991SDimitry Andric Register UseDstReg = UseMI->getOperand(0).getReg(); 5940b57cec5SDimitry Andric MachineOperand &UseSrcMO = UseMI->getOperand(1); 5955ffd83dbSDimitry Andric const LLT UseDstTy = MRI.getType(UseDstReg); 5960b57cec5SDimitry Andric if (UseDstReg != ChosenDstReg) { 5970b57cec5SDimitry Andric if (Preferred.Ty == UseDstTy) { 5980b57cec5SDimitry Andric // If the use has the same type as the preferred use, then merge 5990b57cec5SDimitry Andric // the vregs and erase the extend. For example: 6000b57cec5SDimitry Andric // %1:_(s8) = G_LOAD ... 6010b57cec5SDimitry Andric // %2:_(s32) = G_SEXT %1(s8) 6020b57cec5SDimitry Andric // %3:_(s32) = G_ANYEXT %1(s8) 6030b57cec5SDimitry Andric // ... = ... %3(s32) 6040b57cec5SDimitry Andric // rewrites to: 6050b57cec5SDimitry Andric // %2:_(s32) = G_SEXTLOAD ... 6060b57cec5SDimitry Andric // ... = ... %2(s32) 6070b57cec5SDimitry Andric replaceRegWith(MRI, UseDstReg, ChosenDstReg); 6080b57cec5SDimitry Andric Observer.erasingInstr(*UseMO->getParent()); 6090b57cec5SDimitry Andric UseMO->getParent()->eraseFromParent(); 6100b57cec5SDimitry Andric } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) { 6110b57cec5SDimitry Andric // If the preferred size is smaller, then keep the extend but extend 6120b57cec5SDimitry Andric // from the result of the extending load. For example: 6130b57cec5SDimitry Andric // %1:_(s8) = G_LOAD ... 6140b57cec5SDimitry Andric // %2:_(s32) = G_SEXT %1(s8) 6150b57cec5SDimitry Andric // %3:_(s64) = G_ANYEXT %1(s8) 6160b57cec5SDimitry Andric // ... = ... %3(s64) 6170b57cec5SDimitry Andric /// rewrites to: 6180b57cec5SDimitry Andric // %2:_(s32) = G_SEXTLOAD ... 6190b57cec5SDimitry Andric // %3:_(s64) = G_ANYEXT %2:_(s32) 6200b57cec5SDimitry Andric // ... = ... %3(s64) 6210b57cec5SDimitry Andric replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg); 6220b57cec5SDimitry Andric } else { 6230b57cec5SDimitry Andric // If the preferred size is large, then insert a truncate. For 6240b57cec5SDimitry Andric // example: 6250b57cec5SDimitry Andric // %1:_(s8) = G_LOAD ... 6260b57cec5SDimitry Andric // %2:_(s64) = G_SEXT %1(s8) 6270b57cec5SDimitry Andric // %3:_(s32) = G_ZEXT %1(s8) 6280b57cec5SDimitry Andric // ... = ... %3(s32) 6290b57cec5SDimitry Andric /// rewrites to: 6300b57cec5SDimitry Andric // %2:_(s64) = G_SEXTLOAD ... 6310b57cec5SDimitry Andric // %4:_(s8) = G_TRUNC %2:_(s32) 6320b57cec5SDimitry Andric // %3:_(s64) = G_ZEXT %2:_(s8) 6330b57cec5SDimitry Andric // ... = ... %3(s64) 6340b57cec5SDimitry Andric InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, 6350b57cec5SDimitry Andric InsertTruncAt); 6360b57cec5SDimitry Andric } 6370b57cec5SDimitry Andric continue; 6380b57cec5SDimitry Andric } 6390b57cec5SDimitry Andric // The use is (one of) the uses of the preferred use we chose earlier. 6400b57cec5SDimitry Andric // We're going to update the load to def this value later so just erase 6410b57cec5SDimitry Andric // the old extend. 6420b57cec5SDimitry Andric Observer.erasingInstr(*UseMO->getParent()); 6430b57cec5SDimitry Andric UseMO->getParent()->eraseFromParent(); 6440b57cec5SDimitry Andric continue; 6450b57cec5SDimitry Andric } 6460b57cec5SDimitry Andric 6470b57cec5SDimitry Andric // The use isn't an extend. Truncate back to the type we originally loaded. 6480b57cec5SDimitry Andric // This is free on many targets. 6490b57cec5SDimitry Andric InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt); 6500b57cec5SDimitry Andric } 6510b57cec5SDimitry Andric 6520b57cec5SDimitry Andric MI.getOperand(0).setReg(ChosenDstReg); 6530b57cec5SDimitry Andric Observer.changedInstr(MI); 6540b57cec5SDimitry Andric } 6550b57cec5SDimitry Andric 656*349cc55cSDimitry Andric bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, 657*349cc55cSDimitry Andric BuildFnTy &MatchInfo) { 658*349cc55cSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_AND); 659*349cc55cSDimitry Andric 660*349cc55cSDimitry Andric // If we have the following code: 661*349cc55cSDimitry Andric // %mask = G_CONSTANT 255 662*349cc55cSDimitry Andric // %ld = G_LOAD %ptr, (load s16) 663*349cc55cSDimitry Andric // %and = G_AND %ld, %mask 664*349cc55cSDimitry Andric // 665*349cc55cSDimitry Andric // Try to fold it into 666*349cc55cSDimitry Andric // %ld = G_ZEXTLOAD %ptr, (load s8) 667*349cc55cSDimitry Andric 668*349cc55cSDimitry Andric Register Dst = MI.getOperand(0).getReg(); 669*349cc55cSDimitry Andric if (MRI.getType(Dst).isVector()) 670*349cc55cSDimitry Andric return false; 671*349cc55cSDimitry Andric 672*349cc55cSDimitry Andric auto MaybeMask = 673*349cc55cSDimitry Andric getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); 674*349cc55cSDimitry Andric if (!MaybeMask) 675*349cc55cSDimitry Andric return false; 676*349cc55cSDimitry Andric 677*349cc55cSDimitry Andric APInt MaskVal = MaybeMask->Value; 678*349cc55cSDimitry Andric 679*349cc55cSDimitry Andric if (!MaskVal.isMask()) 680*349cc55cSDimitry Andric return false; 681*349cc55cSDimitry Andric 682*349cc55cSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 683*349cc55cSDimitry Andric GAnyLoad *LoadMI = getOpcodeDef<GAnyLoad>(SrcReg, MRI); 684*349cc55cSDimitry Andric if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()) || 685*349cc55cSDimitry Andric !LoadMI->isSimple()) 686*349cc55cSDimitry Andric return false; 687*349cc55cSDimitry Andric 688*349cc55cSDimitry Andric Register LoadReg = LoadMI->getDstReg(); 689*349cc55cSDimitry Andric LLT LoadTy = MRI.getType(LoadReg); 690*349cc55cSDimitry Andric Register PtrReg = LoadMI->getPointerReg(); 691*349cc55cSDimitry Andric uint64_t LoadSizeBits = LoadMI->getMemSizeInBits(); 692*349cc55cSDimitry Andric unsigned MaskSizeBits = MaskVal.countTrailingOnes(); 693*349cc55cSDimitry Andric 694*349cc55cSDimitry Andric // The mask may not be larger than the in-memory type, as it might cover sign 695*349cc55cSDimitry Andric // extended bits 696*349cc55cSDimitry Andric if (MaskSizeBits > LoadSizeBits) 697*349cc55cSDimitry Andric return false; 698*349cc55cSDimitry Andric 699*349cc55cSDimitry Andric // If the mask covers the whole destination register, there's nothing to 700*349cc55cSDimitry Andric // extend 701*349cc55cSDimitry Andric if (MaskSizeBits >= LoadTy.getSizeInBits()) 702*349cc55cSDimitry Andric return false; 703*349cc55cSDimitry Andric 704*349cc55cSDimitry Andric // Most targets cannot deal with loads of size < 8 and need to re-legalize to 705*349cc55cSDimitry Andric // at least byte loads. Avoid creating such loads here 706*349cc55cSDimitry Andric if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits)) 707*349cc55cSDimitry Andric return false; 708*349cc55cSDimitry Andric 709*349cc55cSDimitry Andric const MachineMemOperand &MMO = LoadMI->getMMO(); 710*349cc55cSDimitry Andric LegalityQuery::MemDesc MemDesc(MMO); 711*349cc55cSDimitry Andric MemDesc.MemoryTy = LLT::scalar(MaskSizeBits); 712*349cc55cSDimitry Andric if (!isLegalOrBeforeLegalizer( 713*349cc55cSDimitry Andric {TargetOpcode::G_ZEXTLOAD, {LoadTy, MRI.getType(PtrReg)}, {MemDesc}})) 714*349cc55cSDimitry Andric return false; 715*349cc55cSDimitry Andric 716*349cc55cSDimitry Andric MatchInfo = [=](MachineIRBuilder &B) { 717*349cc55cSDimitry Andric B.setInstrAndDebugLoc(*LoadMI); 718*349cc55cSDimitry Andric auto &MF = B.getMF(); 719*349cc55cSDimitry Andric auto PtrInfo = MMO.getPointerInfo(); 720*349cc55cSDimitry Andric auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MaskSizeBits / 8); 721*349cc55cSDimitry Andric B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO); 722*349cc55cSDimitry Andric }; 723*349cc55cSDimitry Andric return true; 724*349cc55cSDimitry Andric } 725*349cc55cSDimitry Andric 7265ffd83dbSDimitry Andric bool CombinerHelper::isPredecessor(const MachineInstr &DefMI, 7275ffd83dbSDimitry Andric const MachineInstr &UseMI) { 7285ffd83dbSDimitry Andric assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() && 7295ffd83dbSDimitry Andric "shouldn't consider debug uses"); 7308bcb0991SDimitry Andric assert(DefMI.getParent() == UseMI.getParent()); 7318bcb0991SDimitry Andric if (&DefMI == &UseMI) 732*349cc55cSDimitry Andric return true; 733e8d8bef9SDimitry Andric const MachineBasicBlock &MBB = *DefMI.getParent(); 734e8d8bef9SDimitry Andric auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) { 735e8d8bef9SDimitry Andric return &MI == &DefMI || &MI == &UseMI; 736e8d8bef9SDimitry Andric }); 737e8d8bef9SDimitry Andric if (DefOrUse == MBB.end()) 738e8d8bef9SDimitry Andric llvm_unreachable("Block must contain both DefMI and UseMI!"); 739e8d8bef9SDimitry Andric return &*DefOrUse == &DefMI; 7408bcb0991SDimitry Andric } 7418bcb0991SDimitry Andric 7425ffd83dbSDimitry Andric bool CombinerHelper::dominates(const MachineInstr &DefMI, 7435ffd83dbSDimitry Andric const MachineInstr &UseMI) { 7445ffd83dbSDimitry Andric assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() && 7455ffd83dbSDimitry Andric "shouldn't consider debug uses"); 7468bcb0991SDimitry Andric if (MDT) 7478bcb0991SDimitry Andric return MDT->dominates(&DefMI, &UseMI); 7488bcb0991SDimitry Andric else if (DefMI.getParent() != UseMI.getParent()) 7498bcb0991SDimitry Andric return false; 7508bcb0991SDimitry Andric 7518bcb0991SDimitry Andric return isPredecessor(DefMI, UseMI); 7528bcb0991SDimitry Andric } 7538bcb0991SDimitry Andric 754e8d8bef9SDimitry Andric bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) { 7555ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 7565ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 757e8d8bef9SDimitry Andric Register LoadUser = SrcReg; 758e8d8bef9SDimitry Andric 759e8d8bef9SDimitry Andric if (MRI.getType(SrcReg).isVector()) 760e8d8bef9SDimitry Andric return false; 761e8d8bef9SDimitry Andric 762e8d8bef9SDimitry Andric Register TruncSrc; 763e8d8bef9SDimitry Andric if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) 764e8d8bef9SDimitry Andric LoadUser = TruncSrc; 765e8d8bef9SDimitry Andric 766e8d8bef9SDimitry Andric uint64_t SizeInBits = MI.getOperand(2).getImm(); 767e8d8bef9SDimitry Andric // If the source is a G_SEXTLOAD from the same bit width, then we don't 768e8d8bef9SDimitry Andric // need any extend at all, just a truncate. 769fe6060f1SDimitry Andric if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) { 770e8d8bef9SDimitry Andric // If truncating more than the original extended value, abort. 771fe6060f1SDimitry Andric auto LoadSizeBits = LoadMI->getMemSizeInBits(); 772fe6060f1SDimitry Andric if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits) 773e8d8bef9SDimitry Andric return false; 774fe6060f1SDimitry Andric if (LoadSizeBits == SizeInBits) 775e8d8bef9SDimitry Andric return true; 776e8d8bef9SDimitry Andric } 777e8d8bef9SDimitry Andric return false; 7785ffd83dbSDimitry Andric } 7795ffd83dbSDimitry Andric 780fe6060f1SDimitry Andric void CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) { 7815ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 782e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 783e8d8bef9SDimitry Andric Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); 784e8d8bef9SDimitry Andric MI.eraseFromParent(); 785e8d8bef9SDimitry Andric } 786e8d8bef9SDimitry Andric 787e8d8bef9SDimitry Andric bool CombinerHelper::matchSextInRegOfLoad( 788e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 789e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 790e8d8bef9SDimitry Andric 791e8d8bef9SDimitry Andric // Only supports scalars for now. 792e8d8bef9SDimitry Andric if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 793e8d8bef9SDimitry Andric return false; 794e8d8bef9SDimitry Andric 795e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 796fe6060f1SDimitry Andric auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI); 797fe6060f1SDimitry Andric if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()) || 798fe6060f1SDimitry Andric !LoadDef->isSimple()) 799e8d8bef9SDimitry Andric return false; 800e8d8bef9SDimitry Andric 801e8d8bef9SDimitry Andric // If the sign extend extends from a narrower width than the load's width, 802e8d8bef9SDimitry Andric // then we can narrow the load width when we combine to a G_SEXTLOAD. 803e8d8bef9SDimitry Andric // Avoid widening the load at all. 804fe6060f1SDimitry Andric unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), 805fe6060f1SDimitry Andric LoadDef->getMemSizeInBits()); 806e8d8bef9SDimitry Andric 807e8d8bef9SDimitry Andric // Don't generate G_SEXTLOADs with a < 1 byte width. 808e8d8bef9SDimitry Andric if (NewSizeBits < 8) 809e8d8bef9SDimitry Andric return false; 810e8d8bef9SDimitry Andric // Don't bother creating a non-power-2 sextload, it will likely be broken up 811e8d8bef9SDimitry Andric // anyway for most targets. 812e8d8bef9SDimitry Andric if (!isPowerOf2_32(NewSizeBits)) 813e8d8bef9SDimitry Andric return false; 814*349cc55cSDimitry Andric 815*349cc55cSDimitry Andric const MachineMemOperand &MMO = LoadDef->getMMO(); 816*349cc55cSDimitry Andric LegalityQuery::MemDesc MMDesc(MMO); 817*349cc55cSDimitry Andric MMDesc.MemoryTy = LLT::scalar(NewSizeBits); 818*349cc55cSDimitry Andric if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD, 819*349cc55cSDimitry Andric {MRI.getType(LoadDef->getDstReg()), 820*349cc55cSDimitry Andric MRI.getType(LoadDef->getPointerReg())}, 821*349cc55cSDimitry Andric {MMDesc}})) 822*349cc55cSDimitry Andric return false; 823*349cc55cSDimitry Andric 824fe6060f1SDimitry Andric MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits); 825e8d8bef9SDimitry Andric return true; 826e8d8bef9SDimitry Andric } 827e8d8bef9SDimitry Andric 828fe6060f1SDimitry Andric void CombinerHelper::applySextInRegOfLoad( 829e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 830e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 831e8d8bef9SDimitry Andric Register LoadReg; 832e8d8bef9SDimitry Andric unsigned ScalarSizeBits; 833e8d8bef9SDimitry Andric std::tie(LoadReg, ScalarSizeBits) = MatchInfo; 834fe6060f1SDimitry Andric GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg)); 835e8d8bef9SDimitry Andric 836e8d8bef9SDimitry Andric // If we have the following: 837e8d8bef9SDimitry Andric // %ld = G_LOAD %ptr, (load 2) 838e8d8bef9SDimitry Andric // %ext = G_SEXT_INREG %ld, 8 839e8d8bef9SDimitry Andric // ==> 840e8d8bef9SDimitry Andric // %ld = G_SEXTLOAD %ptr (load 1) 841e8d8bef9SDimitry Andric 842fe6060f1SDimitry Andric auto &MMO = LoadDef->getMMO(); 843fe6060f1SDimitry Andric Builder.setInstrAndDebugLoc(*LoadDef); 844e8d8bef9SDimitry Andric auto &MF = Builder.getMF(); 845e8d8bef9SDimitry Andric auto PtrInfo = MMO.getPointerInfo(); 846e8d8bef9SDimitry Andric auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8); 847e8d8bef9SDimitry Andric Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(), 848fe6060f1SDimitry Andric LoadDef->getPointerReg(), *NewMMO); 8495ffd83dbSDimitry Andric MI.eraseFromParent(); 8505ffd83dbSDimitry Andric } 8515ffd83dbSDimitry Andric 8528bcb0991SDimitry Andric bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, 8538bcb0991SDimitry Andric Register &Base, Register &Offset) { 8548bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 8558bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 8568bcb0991SDimitry Andric 8578bcb0991SDimitry Andric #ifndef NDEBUG 8588bcb0991SDimitry Andric unsigned Opcode = MI.getOpcode(); 8598bcb0991SDimitry Andric assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || 8608bcb0991SDimitry Andric Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); 8618bcb0991SDimitry Andric #endif 8628bcb0991SDimitry Andric 8638bcb0991SDimitry Andric Base = MI.getOperand(1).getReg(); 8648bcb0991SDimitry Andric MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base); 8658bcb0991SDimitry Andric if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) 8668bcb0991SDimitry Andric return false; 8678bcb0991SDimitry Andric 8688bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI); 869e8d8bef9SDimitry Andric // FIXME: The following use traversal needs a bail out for patholigical cases. 8705ffd83dbSDimitry Andric for (auto &Use : MRI.use_nodbg_instructions(Base)) { 871480093f4SDimitry Andric if (Use.getOpcode() != TargetOpcode::G_PTR_ADD) 8728bcb0991SDimitry Andric continue; 8738bcb0991SDimitry Andric 8748bcb0991SDimitry Andric Offset = Use.getOperand(2).getReg(); 8758bcb0991SDimitry Andric if (!ForceLegalIndexing && 8768bcb0991SDimitry Andric !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) { 8778bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: " 8788bcb0991SDimitry Andric << Use); 8798bcb0991SDimitry Andric continue; 8808bcb0991SDimitry Andric } 8818bcb0991SDimitry Andric 8828bcb0991SDimitry Andric // Make sure the offset calculation is before the potentially indexed op. 8838bcb0991SDimitry Andric // FIXME: we really care about dependency here. The offset calculation might 8848bcb0991SDimitry Andric // be movable. 8858bcb0991SDimitry Andric MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset); 8868bcb0991SDimitry Andric if (!OffsetDef || !dominates(*OffsetDef, MI)) { 8878bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: " 8888bcb0991SDimitry Andric << Use); 8898bcb0991SDimitry Andric continue; 8908bcb0991SDimitry Andric } 8918bcb0991SDimitry Andric 8928bcb0991SDimitry Andric // FIXME: check whether all uses of Base are load/store with foldable 8938bcb0991SDimitry Andric // addressing modes. If so, using the normal addr-modes is better than 8948bcb0991SDimitry Andric // forming an indexed one. 8958bcb0991SDimitry Andric 8968bcb0991SDimitry Andric bool MemOpDominatesAddrUses = true; 8975ffd83dbSDimitry Andric for (auto &PtrAddUse : 8985ffd83dbSDimitry Andric MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) { 899480093f4SDimitry Andric if (!dominates(MI, PtrAddUse)) { 9008bcb0991SDimitry Andric MemOpDominatesAddrUses = false; 9018bcb0991SDimitry Andric break; 9028bcb0991SDimitry Andric } 9038bcb0991SDimitry Andric } 9048bcb0991SDimitry Andric 9058bcb0991SDimitry Andric if (!MemOpDominatesAddrUses) { 9068bcb0991SDimitry Andric LLVM_DEBUG( 9078bcb0991SDimitry Andric dbgs() << " Ignoring candidate as memop does not dominate uses: " 9088bcb0991SDimitry Andric << Use); 9098bcb0991SDimitry Andric continue; 9108bcb0991SDimitry Andric } 9118bcb0991SDimitry Andric 9128bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Found match: " << Use); 9138bcb0991SDimitry Andric Addr = Use.getOperand(0).getReg(); 9148bcb0991SDimitry Andric return true; 9158bcb0991SDimitry Andric } 9168bcb0991SDimitry Andric 9178bcb0991SDimitry Andric return false; 9188bcb0991SDimitry Andric } 9198bcb0991SDimitry Andric 9208bcb0991SDimitry Andric bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr, 9218bcb0991SDimitry Andric Register &Base, Register &Offset) { 9228bcb0991SDimitry Andric auto &MF = *MI.getParent()->getParent(); 9238bcb0991SDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 9248bcb0991SDimitry Andric 9258bcb0991SDimitry Andric #ifndef NDEBUG 9268bcb0991SDimitry Andric unsigned Opcode = MI.getOpcode(); 9278bcb0991SDimitry Andric assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || 9288bcb0991SDimitry Andric Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); 9298bcb0991SDimitry Andric #endif 9308bcb0991SDimitry Andric 9318bcb0991SDimitry Andric Addr = MI.getOperand(1).getReg(); 932480093f4SDimitry Andric MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI); 9335ffd83dbSDimitry Andric if (!AddrDef || MRI.hasOneNonDBGUse(Addr)) 9348bcb0991SDimitry Andric return false; 9358bcb0991SDimitry Andric 9368bcb0991SDimitry Andric Base = AddrDef->getOperand(1).getReg(); 9378bcb0991SDimitry Andric Offset = AddrDef->getOperand(2).getReg(); 9388bcb0991SDimitry Andric 9398bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI); 9408bcb0991SDimitry Andric 9418bcb0991SDimitry Andric if (!ForceLegalIndexing && 9428bcb0991SDimitry Andric !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) { 9438bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, not legal for target"); 9448bcb0991SDimitry Andric return false; 9458bcb0991SDimitry Andric } 9468bcb0991SDimitry Andric 9478bcb0991SDimitry Andric MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI); 9488bcb0991SDimitry Andric if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { 9498bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway."); 9508bcb0991SDimitry Andric return false; 9518bcb0991SDimitry Andric } 9528bcb0991SDimitry Andric 9538bcb0991SDimitry Andric if (MI.getOpcode() == TargetOpcode::G_STORE) { 9548bcb0991SDimitry Andric // Would require a copy. 9558bcb0991SDimitry Andric if (Base == MI.getOperand(0).getReg()) { 9568bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway."); 9578bcb0991SDimitry Andric return false; 9588bcb0991SDimitry Andric } 9598bcb0991SDimitry Andric 9608bcb0991SDimitry Andric // We're expecting one use of Addr in MI, but it could also be the 9618bcb0991SDimitry Andric // value stored, which isn't actually dominated by the instruction. 9628bcb0991SDimitry Andric if (MI.getOperand(0).getReg() == Addr) { 9638bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses"); 9648bcb0991SDimitry Andric return false; 9658bcb0991SDimitry Andric } 9668bcb0991SDimitry Andric } 9678bcb0991SDimitry Andric 968480093f4SDimitry Andric // FIXME: check whether all uses of the base pointer are constant PtrAdds. 969480093f4SDimitry Andric // That might allow us to end base's liveness here by adjusting the constant. 9708bcb0991SDimitry Andric 9715ffd83dbSDimitry Andric for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) { 9728bcb0991SDimitry Andric if (!dominates(MI, UseMI)) { 9738bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses."); 9748bcb0991SDimitry Andric return false; 9758bcb0991SDimitry Andric } 9768bcb0991SDimitry Andric } 9778bcb0991SDimitry Andric 9788bcb0991SDimitry Andric return true; 9798bcb0991SDimitry Andric } 9808bcb0991SDimitry Andric 9818bcb0991SDimitry Andric bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) { 982480093f4SDimitry Andric IndexedLoadStoreMatchInfo MatchInfo; 983480093f4SDimitry Andric if (matchCombineIndexedLoadStore(MI, MatchInfo)) { 984480093f4SDimitry Andric applyCombineIndexedLoadStore(MI, MatchInfo); 985480093f4SDimitry Andric return true; 986480093f4SDimitry Andric } 987480093f4SDimitry Andric return false; 988480093f4SDimitry Andric } 989480093f4SDimitry Andric 990480093f4SDimitry Andric bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { 9918bcb0991SDimitry Andric unsigned Opcode = MI.getOpcode(); 9928bcb0991SDimitry Andric if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD && 9938bcb0991SDimitry Andric Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE) 9948bcb0991SDimitry Andric return false; 9958bcb0991SDimitry Andric 996e8d8bef9SDimitry Andric // For now, no targets actually support these opcodes so don't waste time 997e8d8bef9SDimitry Andric // running these unless we're forced to for testing. 998e8d8bef9SDimitry Andric if (!ForceLegalIndexing) 999e8d8bef9SDimitry Andric return false; 1000e8d8bef9SDimitry Andric 1001480093f4SDimitry Andric MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, 1002480093f4SDimitry Andric MatchInfo.Offset); 1003480093f4SDimitry Andric if (!MatchInfo.IsPre && 1004480093f4SDimitry Andric !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, 1005480093f4SDimitry Andric MatchInfo.Offset)) 10068bcb0991SDimitry Andric return false; 10078bcb0991SDimitry Andric 1008480093f4SDimitry Andric return true; 1009480093f4SDimitry Andric } 10108bcb0991SDimitry Andric 1011480093f4SDimitry Andric void CombinerHelper::applyCombineIndexedLoadStore( 1012480093f4SDimitry Andric MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { 1013480093f4SDimitry Andric MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr); 1014480093f4SDimitry Andric MachineIRBuilder MIRBuilder(MI); 1015480093f4SDimitry Andric unsigned Opcode = MI.getOpcode(); 1016480093f4SDimitry Andric bool IsStore = Opcode == TargetOpcode::G_STORE; 10178bcb0991SDimitry Andric unsigned NewOpcode; 10188bcb0991SDimitry Andric switch (Opcode) { 10198bcb0991SDimitry Andric case TargetOpcode::G_LOAD: 10208bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_LOAD; 10218bcb0991SDimitry Andric break; 10228bcb0991SDimitry Andric case TargetOpcode::G_SEXTLOAD: 10238bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD; 10248bcb0991SDimitry Andric break; 10258bcb0991SDimitry Andric case TargetOpcode::G_ZEXTLOAD: 10268bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD; 10278bcb0991SDimitry Andric break; 10288bcb0991SDimitry Andric case TargetOpcode::G_STORE: 10298bcb0991SDimitry Andric NewOpcode = TargetOpcode::G_INDEXED_STORE; 10308bcb0991SDimitry Andric break; 10318bcb0991SDimitry Andric default: 10328bcb0991SDimitry Andric llvm_unreachable("Unknown load/store opcode"); 10338bcb0991SDimitry Andric } 10348bcb0991SDimitry Andric 10358bcb0991SDimitry Andric auto MIB = MIRBuilder.buildInstr(NewOpcode); 10368bcb0991SDimitry Andric if (IsStore) { 1037480093f4SDimitry Andric MIB.addDef(MatchInfo.Addr); 10388bcb0991SDimitry Andric MIB.addUse(MI.getOperand(0).getReg()); 10398bcb0991SDimitry Andric } else { 10408bcb0991SDimitry Andric MIB.addDef(MI.getOperand(0).getReg()); 1041480093f4SDimitry Andric MIB.addDef(MatchInfo.Addr); 10428bcb0991SDimitry Andric } 10438bcb0991SDimitry Andric 1044480093f4SDimitry Andric MIB.addUse(MatchInfo.Base); 1045480093f4SDimitry Andric MIB.addUse(MatchInfo.Offset); 1046480093f4SDimitry Andric MIB.addImm(MatchInfo.IsPre); 10478bcb0991SDimitry Andric MI.eraseFromParent(); 10488bcb0991SDimitry Andric AddrDef.eraseFromParent(); 10498bcb0991SDimitry Andric 10508bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); 10518bcb0991SDimitry Andric } 10528bcb0991SDimitry Andric 1053fe6060f1SDimitry Andric bool CombinerHelper::matchCombineDivRem(MachineInstr &MI, 1054fe6060f1SDimitry Andric MachineInstr *&OtherMI) { 1055fe6060f1SDimitry Andric unsigned Opcode = MI.getOpcode(); 1056fe6060f1SDimitry Andric bool IsDiv, IsSigned; 1057fe6060f1SDimitry Andric 1058fe6060f1SDimitry Andric switch (Opcode) { 1059fe6060f1SDimitry Andric default: 1060fe6060f1SDimitry Andric llvm_unreachable("Unexpected opcode!"); 1061fe6060f1SDimitry Andric case TargetOpcode::G_SDIV: 1062fe6060f1SDimitry Andric case TargetOpcode::G_UDIV: { 1063fe6060f1SDimitry Andric IsDiv = true; 1064fe6060f1SDimitry Andric IsSigned = Opcode == TargetOpcode::G_SDIV; 1065fe6060f1SDimitry Andric break; 1066fe6060f1SDimitry Andric } 1067fe6060f1SDimitry Andric case TargetOpcode::G_SREM: 1068fe6060f1SDimitry Andric case TargetOpcode::G_UREM: { 1069fe6060f1SDimitry Andric IsDiv = false; 1070fe6060f1SDimitry Andric IsSigned = Opcode == TargetOpcode::G_SREM; 1071fe6060f1SDimitry Andric break; 1072fe6060f1SDimitry Andric } 1073fe6060f1SDimitry Andric } 1074fe6060f1SDimitry Andric 1075fe6060f1SDimitry Andric Register Src1 = MI.getOperand(1).getReg(); 1076fe6060f1SDimitry Andric unsigned DivOpcode, RemOpcode, DivremOpcode; 1077fe6060f1SDimitry Andric if (IsSigned) { 1078fe6060f1SDimitry Andric DivOpcode = TargetOpcode::G_SDIV; 1079fe6060f1SDimitry Andric RemOpcode = TargetOpcode::G_SREM; 1080fe6060f1SDimitry Andric DivremOpcode = TargetOpcode::G_SDIVREM; 1081fe6060f1SDimitry Andric } else { 1082fe6060f1SDimitry Andric DivOpcode = TargetOpcode::G_UDIV; 1083fe6060f1SDimitry Andric RemOpcode = TargetOpcode::G_UREM; 1084fe6060f1SDimitry Andric DivremOpcode = TargetOpcode::G_UDIVREM; 1085fe6060f1SDimitry Andric } 1086fe6060f1SDimitry Andric 1087fe6060f1SDimitry Andric if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}})) 10888bcb0991SDimitry Andric return false; 10898bcb0991SDimitry Andric 1090fe6060f1SDimitry Andric // Combine: 1091fe6060f1SDimitry Andric // %div:_ = G_[SU]DIV %src1:_, %src2:_ 1092fe6060f1SDimitry Andric // %rem:_ = G_[SU]REM %src1:_, %src2:_ 1093fe6060f1SDimitry Andric // into: 1094fe6060f1SDimitry Andric // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_ 1095fe6060f1SDimitry Andric 1096fe6060f1SDimitry Andric // Combine: 1097fe6060f1SDimitry Andric // %rem:_ = G_[SU]REM %src1:_, %src2:_ 1098fe6060f1SDimitry Andric // %div:_ = G_[SU]DIV %src1:_, %src2:_ 1099fe6060f1SDimitry Andric // into: 1100fe6060f1SDimitry Andric // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_ 1101fe6060f1SDimitry Andric 1102fe6060f1SDimitry Andric for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) { 1103fe6060f1SDimitry Andric if (MI.getParent() == UseMI.getParent() && 1104fe6060f1SDimitry Andric ((IsDiv && UseMI.getOpcode() == RemOpcode) || 1105fe6060f1SDimitry Andric (!IsDiv && UseMI.getOpcode() == DivOpcode)) && 1106fe6060f1SDimitry Andric matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2))) { 1107fe6060f1SDimitry Andric OtherMI = &UseMI; 1108fe6060f1SDimitry Andric return true; 1109fe6060f1SDimitry Andric } 1110fe6060f1SDimitry Andric } 1111fe6060f1SDimitry Andric 1112fe6060f1SDimitry Andric return false; 1113fe6060f1SDimitry Andric } 1114fe6060f1SDimitry Andric 1115fe6060f1SDimitry Andric void CombinerHelper::applyCombineDivRem(MachineInstr &MI, 1116fe6060f1SDimitry Andric MachineInstr *&OtherMI) { 1117fe6060f1SDimitry Andric unsigned Opcode = MI.getOpcode(); 1118fe6060f1SDimitry Andric assert(OtherMI && "OtherMI shouldn't be empty."); 1119fe6060f1SDimitry Andric 1120fe6060f1SDimitry Andric Register DestDivReg, DestRemReg; 1121fe6060f1SDimitry Andric if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) { 1122fe6060f1SDimitry Andric DestDivReg = MI.getOperand(0).getReg(); 1123fe6060f1SDimitry Andric DestRemReg = OtherMI->getOperand(0).getReg(); 1124fe6060f1SDimitry Andric } else { 1125fe6060f1SDimitry Andric DestDivReg = OtherMI->getOperand(0).getReg(); 1126fe6060f1SDimitry Andric DestRemReg = MI.getOperand(0).getReg(); 1127fe6060f1SDimitry Andric } 1128fe6060f1SDimitry Andric 1129fe6060f1SDimitry Andric bool IsSigned = 1130fe6060f1SDimitry Andric Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM; 1131fe6060f1SDimitry Andric 1132fe6060f1SDimitry Andric // Check which instruction is first in the block so we don't break def-use 1133fe6060f1SDimitry Andric // deps by "moving" the instruction incorrectly. 1134fe6060f1SDimitry Andric if (dominates(MI, *OtherMI)) 1135fe6060f1SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1136fe6060f1SDimitry Andric else 1137fe6060f1SDimitry Andric Builder.setInstrAndDebugLoc(*OtherMI); 1138fe6060f1SDimitry Andric 1139fe6060f1SDimitry Andric Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM 1140fe6060f1SDimitry Andric : TargetOpcode::G_UDIVREM, 1141fe6060f1SDimitry Andric {DestDivReg, DestRemReg}, 1142fe6060f1SDimitry Andric {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()}); 1143fe6060f1SDimitry Andric MI.eraseFromParent(); 1144fe6060f1SDimitry Andric OtherMI->eraseFromParent(); 1145fe6060f1SDimitry Andric } 1146fe6060f1SDimitry Andric 1147fe6060f1SDimitry Andric bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI, 1148fe6060f1SDimitry Andric MachineInstr *&BrCond) { 1149fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_BR); 1150fe6060f1SDimitry Andric 11510b57cec5SDimitry Andric // Try to match the following: 11520b57cec5SDimitry Andric // bb1: 11530b57cec5SDimitry Andric // G_BRCOND %c1, %bb2 11540b57cec5SDimitry Andric // G_BR %bb3 11550b57cec5SDimitry Andric // bb2: 11560b57cec5SDimitry Andric // ... 11570b57cec5SDimitry Andric // bb3: 11580b57cec5SDimitry Andric 11590b57cec5SDimitry Andric // The above pattern does not have a fall through to the successor bb2, always 11600b57cec5SDimitry Andric // resulting in a branch no matter which path is taken. Here we try to find 11610b57cec5SDimitry Andric // and replace that pattern with conditional branch to bb3 and otherwise 1162e8d8bef9SDimitry Andric // fallthrough to bb2. This is generally better for branch predictors. 11630b57cec5SDimitry Andric 11640b57cec5SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 11650b57cec5SDimitry Andric MachineBasicBlock::iterator BrIt(MI); 11660b57cec5SDimitry Andric if (BrIt == MBB->begin()) 11670b57cec5SDimitry Andric return false; 11680b57cec5SDimitry Andric assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator"); 11690b57cec5SDimitry Andric 1170fe6060f1SDimitry Andric BrCond = &*std::prev(BrIt); 11710b57cec5SDimitry Andric if (BrCond->getOpcode() != TargetOpcode::G_BRCOND) 11720b57cec5SDimitry Andric return false; 11730b57cec5SDimitry Andric 1174d409305fSDimitry Andric // Check that the next block is the conditional branch target. Also make sure 1175d409305fSDimitry Andric // that it isn't the same as the G_BR's target (otherwise, this will loop.) 1176d409305fSDimitry Andric MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB(); 1177d409305fSDimitry Andric return BrCondTarget != MI.getOperand(0).getMBB() && 1178d409305fSDimitry Andric MBB->isLayoutSuccessor(BrCondTarget); 11790b57cec5SDimitry Andric } 11800b57cec5SDimitry Andric 1181fe6060f1SDimitry Andric void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI, 1182fe6060f1SDimitry Andric MachineInstr *&BrCond) { 11830b57cec5SDimitry Andric MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); 1184e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(*BrCond); 1185e8d8bef9SDimitry Andric LLT Ty = MRI.getType(BrCond->getOperand(0).getReg()); 1186e8d8bef9SDimitry Andric // FIXME: Does int/fp matter for this? If so, we might need to restrict 1187e8d8bef9SDimitry Andric // this to i1 only since we might not know for sure what kind of 1188e8d8bef9SDimitry Andric // compare generated the condition value. 1189e8d8bef9SDimitry Andric auto True = Builder.buildConstant( 1190e8d8bef9SDimitry Andric Ty, getICmpTrueVal(getTargetLowering(), false, false)); 1191e8d8bef9SDimitry Andric auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True); 11920b57cec5SDimitry Andric 1193e8d8bef9SDimitry Andric auto *FallthroughBB = BrCond->getOperand(1).getMBB(); 1194e8d8bef9SDimitry Andric Observer.changingInstr(MI); 1195e8d8bef9SDimitry Andric MI.getOperand(0).setMBB(FallthroughBB); 1196e8d8bef9SDimitry Andric Observer.changedInstr(MI); 11970b57cec5SDimitry Andric 1198e8d8bef9SDimitry Andric // Change the conditional branch to use the inverted condition and 1199e8d8bef9SDimitry Andric // new target block. 12000b57cec5SDimitry Andric Observer.changingInstr(*BrCond); 1201e8d8bef9SDimitry Andric BrCond->getOperand(0).setReg(Xor.getReg(0)); 12020b57cec5SDimitry Andric BrCond->getOperand(1).setMBB(BrTarget); 12030b57cec5SDimitry Andric Observer.changedInstr(*BrCond); 12048bcb0991SDimitry Andric } 12058bcb0991SDimitry Andric 12068bcb0991SDimitry Andric static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { 12078bcb0991SDimitry Andric if (Ty.isVector()) 12085ffd83dbSDimitry Andric return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), 12098bcb0991SDimitry Andric Ty.getNumElements()); 12108bcb0991SDimitry Andric return IntegerType::get(C, Ty.getSizeInBits()); 12118bcb0991SDimitry Andric } 12128bcb0991SDimitry Andric 1213fe6060f1SDimitry Andric bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) { 1214*349cc55cSDimitry Andric MachineIRBuilder HelperBuilder(MI); 1215*349cc55cSDimitry Andric GISelObserverWrapper DummyObserver; 1216*349cc55cSDimitry Andric LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder); 1217*349cc55cSDimitry Andric return Helper.lowerMemcpyInline(MI) == 1218*349cc55cSDimitry Andric LegalizerHelper::LegalizeResult::Legalized; 12198bcb0991SDimitry Andric } 12208bcb0991SDimitry Andric 12218bcb0991SDimitry Andric bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { 1222*349cc55cSDimitry Andric MachineIRBuilder HelperBuilder(MI); 1223*349cc55cSDimitry Andric GISelObserverWrapper DummyObserver; 1224*349cc55cSDimitry Andric LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder); 1225*349cc55cSDimitry Andric return Helper.lowerMemCpyFamily(MI, MaxLen) == 1226*349cc55cSDimitry Andric LegalizerHelper::LegalizeResult::Legalized; 12278bcb0991SDimitry Andric } 12288bcb0991SDimitry Andric 1229e8d8bef9SDimitry Andric static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy, 1230e8d8bef9SDimitry Andric const Register Op, 1231e8d8bef9SDimitry Andric const MachineRegisterInfo &MRI) { 1232e8d8bef9SDimitry Andric const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI); 1233e8d8bef9SDimitry Andric if (!MaybeCst) 1234e8d8bef9SDimitry Andric return None; 1235e8d8bef9SDimitry Andric 1236e8d8bef9SDimitry Andric APFloat V = MaybeCst->getValueAPF(); 1237e8d8bef9SDimitry Andric switch (Opcode) { 1238e8d8bef9SDimitry Andric default: 1239e8d8bef9SDimitry Andric llvm_unreachable("Unexpected opcode!"); 1240e8d8bef9SDimitry Andric case TargetOpcode::G_FNEG: { 1241e8d8bef9SDimitry Andric V.changeSign(); 1242e8d8bef9SDimitry Andric return V; 1243e8d8bef9SDimitry Andric } 1244e8d8bef9SDimitry Andric case TargetOpcode::G_FABS: { 1245e8d8bef9SDimitry Andric V.clearSign(); 1246e8d8bef9SDimitry Andric return V; 1247e8d8bef9SDimitry Andric } 1248e8d8bef9SDimitry Andric case TargetOpcode::G_FPTRUNC: 1249e8d8bef9SDimitry Andric break; 1250e8d8bef9SDimitry Andric case TargetOpcode::G_FSQRT: { 1251e8d8bef9SDimitry Andric bool Unused; 1252e8d8bef9SDimitry Andric V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); 1253e8d8bef9SDimitry Andric V = APFloat(sqrt(V.convertToDouble())); 1254e8d8bef9SDimitry Andric break; 1255e8d8bef9SDimitry Andric } 1256e8d8bef9SDimitry Andric case TargetOpcode::G_FLOG2: { 1257e8d8bef9SDimitry Andric bool Unused; 1258e8d8bef9SDimitry Andric V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused); 1259e8d8bef9SDimitry Andric V = APFloat(log2(V.convertToDouble())); 1260e8d8bef9SDimitry Andric break; 1261e8d8bef9SDimitry Andric } 1262e8d8bef9SDimitry Andric } 1263e8d8bef9SDimitry Andric // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise, 1264e8d8bef9SDimitry Andric // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`, 1265e8d8bef9SDimitry Andric // and `G_FLOG2` reach here. 1266e8d8bef9SDimitry Andric bool Unused; 1267e8d8bef9SDimitry Andric V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused); 1268e8d8bef9SDimitry Andric return V; 1269e8d8bef9SDimitry Andric } 1270e8d8bef9SDimitry Andric 1271e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI, 1272e8d8bef9SDimitry Andric Optional<APFloat> &Cst) { 1273e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1274e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1275e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 1276e8d8bef9SDimitry Andric Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI); 1277e8d8bef9SDimitry Andric return Cst.hasValue(); 1278e8d8bef9SDimitry Andric } 1279e8d8bef9SDimitry Andric 1280fe6060f1SDimitry Andric void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, 1281e8d8bef9SDimitry Andric Optional<APFloat> &Cst) { 1282e8d8bef9SDimitry Andric assert(Cst.hasValue() && "Optional is unexpectedly empty!"); 1283e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1284e8d8bef9SDimitry Andric MachineFunction &MF = Builder.getMF(); 1285e8d8bef9SDimitry Andric auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst); 1286e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1287e8d8bef9SDimitry Andric Builder.buildFConstant(DstReg, *FPVal); 1288e8d8bef9SDimitry Andric MI.eraseFromParent(); 1289e8d8bef9SDimitry Andric } 1290e8d8bef9SDimitry Andric 1291480093f4SDimitry Andric bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, 1292480093f4SDimitry Andric PtrAddChain &MatchInfo) { 1293480093f4SDimitry Andric // We're trying to match the following pattern: 1294480093f4SDimitry Andric // %t1 = G_PTR_ADD %base, G_CONSTANT imm1 1295480093f4SDimitry Andric // %root = G_PTR_ADD %t1, G_CONSTANT imm2 1296480093f4SDimitry Andric // --> 1297480093f4SDimitry Andric // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2) 1298480093f4SDimitry Andric 1299480093f4SDimitry Andric if (MI.getOpcode() != TargetOpcode::G_PTR_ADD) 1300480093f4SDimitry Andric return false; 1301480093f4SDimitry Andric 1302480093f4SDimitry Andric Register Add2 = MI.getOperand(1).getReg(); 1303480093f4SDimitry Andric Register Imm1 = MI.getOperand(2).getReg(); 1304*349cc55cSDimitry Andric auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI); 1305480093f4SDimitry Andric if (!MaybeImmVal) 1306480093f4SDimitry Andric return false; 1307480093f4SDimitry Andric 1308*349cc55cSDimitry Andric MachineInstr *Add2Def = MRI.getVRegDef(Add2); 1309480093f4SDimitry Andric if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD) 1310480093f4SDimitry Andric return false; 1311480093f4SDimitry Andric 1312480093f4SDimitry Andric Register Base = Add2Def->getOperand(1).getReg(); 1313480093f4SDimitry Andric Register Imm2 = Add2Def->getOperand(2).getReg(); 1314*349cc55cSDimitry Andric auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI); 1315480093f4SDimitry Andric if (!MaybeImm2Val) 1316480093f4SDimitry Andric return false; 1317480093f4SDimitry Andric 1318*349cc55cSDimitry Andric // Check if the new combined immediate forms an illegal addressing mode. 1319*349cc55cSDimitry Andric // Do not combine if it was legal before but would get illegal. 1320*349cc55cSDimitry Andric // To do so, we need to find a load/store user of the pointer to get 1321*349cc55cSDimitry Andric // the access type. 1322*349cc55cSDimitry Andric Type *AccessTy = nullptr; 1323*349cc55cSDimitry Andric auto &MF = *MI.getMF(); 1324*349cc55cSDimitry Andric for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) { 1325*349cc55cSDimitry Andric if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) { 1326*349cc55cSDimitry Andric AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)), 1327*349cc55cSDimitry Andric MF.getFunction().getContext()); 1328*349cc55cSDimitry Andric break; 1329*349cc55cSDimitry Andric } 1330*349cc55cSDimitry Andric } 1331*349cc55cSDimitry Andric TargetLoweringBase::AddrMode AMNew; 1332*349cc55cSDimitry Andric APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value; 1333*349cc55cSDimitry Andric AMNew.BaseOffs = CombinedImm.getSExtValue(); 1334*349cc55cSDimitry Andric if (AccessTy) { 1335*349cc55cSDimitry Andric AMNew.HasBaseReg = true; 1336*349cc55cSDimitry Andric TargetLoweringBase::AddrMode AMOld; 1337*349cc55cSDimitry Andric AMOld.BaseOffs = MaybeImm2Val->Value.getSExtValue(); 1338*349cc55cSDimitry Andric AMOld.HasBaseReg = true; 1339*349cc55cSDimitry Andric unsigned AS = MRI.getType(Add2).getAddressSpace(); 1340*349cc55cSDimitry Andric const auto &TLI = *MF.getSubtarget().getTargetLowering(); 1341*349cc55cSDimitry Andric if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) && 1342*349cc55cSDimitry Andric !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS)) 1343*349cc55cSDimitry Andric return false; 1344*349cc55cSDimitry Andric } 1345*349cc55cSDimitry Andric 1346480093f4SDimitry Andric // Pass the combined immediate to the apply function. 1347*349cc55cSDimitry Andric MatchInfo.Imm = AMNew.BaseOffs; 1348480093f4SDimitry Andric MatchInfo.Base = Base; 1349*349cc55cSDimitry Andric MatchInfo.Bank = getRegBank(Imm2); 1350480093f4SDimitry Andric return true; 1351480093f4SDimitry Andric } 1352480093f4SDimitry Andric 1353fe6060f1SDimitry Andric void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI, 1354480093f4SDimitry Andric PtrAddChain &MatchInfo) { 1355480093f4SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); 1356480093f4SDimitry Andric MachineIRBuilder MIB(MI); 1357480093f4SDimitry Andric LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg()); 1358480093f4SDimitry Andric auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm); 1359*349cc55cSDimitry Andric setRegBank(NewOffset.getReg(0), MatchInfo.Bank); 1360480093f4SDimitry Andric Observer.changingInstr(MI); 1361480093f4SDimitry Andric MI.getOperand(1).setReg(MatchInfo.Base); 1362480093f4SDimitry Andric MI.getOperand(2).setReg(NewOffset.getReg(0)); 1363480093f4SDimitry Andric Observer.changedInstr(MI); 1364480093f4SDimitry Andric } 1365480093f4SDimitry Andric 1366e8d8bef9SDimitry Andric bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI, 1367e8d8bef9SDimitry Andric RegisterImmPair &MatchInfo) { 1368e8d8bef9SDimitry Andric // We're trying to match the following pattern with any of 1369e8d8bef9SDimitry Andric // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions: 1370e8d8bef9SDimitry Andric // %t1 = SHIFT %base, G_CONSTANT imm1 1371e8d8bef9SDimitry Andric // %root = SHIFT %t1, G_CONSTANT imm2 1372e8d8bef9SDimitry Andric // --> 1373e8d8bef9SDimitry Andric // %root = SHIFT %base, G_CONSTANT (imm1 + imm2) 1374e8d8bef9SDimitry Andric 1375e8d8bef9SDimitry Andric unsigned Opcode = MI.getOpcode(); 1376e8d8bef9SDimitry Andric assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || 1377e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || 1378e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_USHLSAT) && 1379e8d8bef9SDimitry Andric "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); 1380e8d8bef9SDimitry Andric 1381e8d8bef9SDimitry Andric Register Shl2 = MI.getOperand(1).getReg(); 1382e8d8bef9SDimitry Andric Register Imm1 = MI.getOperand(2).getReg(); 1383*349cc55cSDimitry Andric auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI); 1384e8d8bef9SDimitry Andric if (!MaybeImmVal) 1385e8d8bef9SDimitry Andric return false; 1386e8d8bef9SDimitry Andric 1387e8d8bef9SDimitry Andric MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2); 1388e8d8bef9SDimitry Andric if (Shl2Def->getOpcode() != Opcode) 1389e8d8bef9SDimitry Andric return false; 1390e8d8bef9SDimitry Andric 1391e8d8bef9SDimitry Andric Register Base = Shl2Def->getOperand(1).getReg(); 1392e8d8bef9SDimitry Andric Register Imm2 = Shl2Def->getOperand(2).getReg(); 1393*349cc55cSDimitry Andric auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI); 1394e8d8bef9SDimitry Andric if (!MaybeImm2Val) 1395e8d8bef9SDimitry Andric return false; 1396e8d8bef9SDimitry Andric 1397e8d8bef9SDimitry Andric // Pass the combined immediate to the apply function. 1398e8d8bef9SDimitry Andric MatchInfo.Imm = 1399e8d8bef9SDimitry Andric (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue(); 1400e8d8bef9SDimitry Andric MatchInfo.Reg = Base; 1401e8d8bef9SDimitry Andric 1402e8d8bef9SDimitry Andric // There is no simple replacement for a saturating unsigned left shift that 1403e8d8bef9SDimitry Andric // exceeds the scalar size. 1404e8d8bef9SDimitry Andric if (Opcode == TargetOpcode::G_USHLSAT && 1405e8d8bef9SDimitry Andric MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits()) 1406e8d8bef9SDimitry Andric return false; 1407e8d8bef9SDimitry Andric 1408e8d8bef9SDimitry Andric return true; 1409e8d8bef9SDimitry Andric } 1410e8d8bef9SDimitry Andric 1411fe6060f1SDimitry Andric void CombinerHelper::applyShiftImmedChain(MachineInstr &MI, 1412e8d8bef9SDimitry Andric RegisterImmPair &MatchInfo) { 1413e8d8bef9SDimitry Andric unsigned Opcode = MI.getOpcode(); 1414e8d8bef9SDimitry Andric assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || 1415e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || 1416e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_USHLSAT) && 1417e8d8bef9SDimitry Andric "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT"); 1418e8d8bef9SDimitry Andric 1419e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1420e8d8bef9SDimitry Andric LLT Ty = MRI.getType(MI.getOperand(1).getReg()); 1421e8d8bef9SDimitry Andric unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits(); 1422e8d8bef9SDimitry Andric auto Imm = MatchInfo.Imm; 1423e8d8bef9SDimitry Andric 1424e8d8bef9SDimitry Andric if (Imm >= ScalarSizeInBits) { 1425e8d8bef9SDimitry Andric // Any logical shift that exceeds scalar size will produce zero. 1426e8d8bef9SDimitry Andric if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) { 1427e8d8bef9SDimitry Andric Builder.buildConstant(MI.getOperand(0), 0); 1428e8d8bef9SDimitry Andric MI.eraseFromParent(); 1429fe6060f1SDimitry Andric return; 1430e8d8bef9SDimitry Andric } 1431e8d8bef9SDimitry Andric // Arithmetic shift and saturating signed left shift have no effect beyond 1432e8d8bef9SDimitry Andric // scalar size. 1433e8d8bef9SDimitry Andric Imm = ScalarSizeInBits - 1; 1434e8d8bef9SDimitry Andric } 1435e8d8bef9SDimitry Andric 1436e8d8bef9SDimitry Andric LLT ImmTy = MRI.getType(MI.getOperand(2).getReg()); 1437e8d8bef9SDimitry Andric Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0); 1438e8d8bef9SDimitry Andric Observer.changingInstr(MI); 1439e8d8bef9SDimitry Andric MI.getOperand(1).setReg(MatchInfo.Reg); 1440e8d8bef9SDimitry Andric MI.getOperand(2).setReg(NewImm); 1441e8d8bef9SDimitry Andric Observer.changedInstr(MI); 1442e8d8bef9SDimitry Andric } 1443e8d8bef9SDimitry Andric 1444e8d8bef9SDimitry Andric bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI, 1445e8d8bef9SDimitry Andric ShiftOfShiftedLogic &MatchInfo) { 1446e8d8bef9SDimitry Andric // We're trying to match the following pattern with any of 1447e8d8bef9SDimitry Andric // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination 1448e8d8bef9SDimitry Andric // with any of G_AND/G_OR/G_XOR logic instructions. 1449e8d8bef9SDimitry Andric // %t1 = SHIFT %X, G_CONSTANT C0 1450e8d8bef9SDimitry Andric // %t2 = LOGIC %t1, %Y 1451e8d8bef9SDimitry Andric // %root = SHIFT %t2, G_CONSTANT C1 1452e8d8bef9SDimitry Andric // --> 1453e8d8bef9SDimitry Andric // %t3 = SHIFT %X, G_CONSTANT (C0+C1) 1454e8d8bef9SDimitry Andric // %t4 = SHIFT %Y, G_CONSTANT C1 1455e8d8bef9SDimitry Andric // %root = LOGIC %t3, %t4 1456e8d8bef9SDimitry Andric unsigned ShiftOpcode = MI.getOpcode(); 1457e8d8bef9SDimitry Andric assert((ShiftOpcode == TargetOpcode::G_SHL || 1458e8d8bef9SDimitry Andric ShiftOpcode == TargetOpcode::G_ASHR || 1459e8d8bef9SDimitry Andric ShiftOpcode == TargetOpcode::G_LSHR || 1460e8d8bef9SDimitry Andric ShiftOpcode == TargetOpcode::G_USHLSAT || 1461e8d8bef9SDimitry Andric ShiftOpcode == TargetOpcode::G_SSHLSAT) && 1462e8d8bef9SDimitry Andric "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT"); 1463e8d8bef9SDimitry Andric 1464e8d8bef9SDimitry Andric // Match a one-use bitwise logic op. 1465e8d8bef9SDimitry Andric Register LogicDest = MI.getOperand(1).getReg(); 1466e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(LogicDest)) 1467e8d8bef9SDimitry Andric return false; 1468e8d8bef9SDimitry Andric 1469e8d8bef9SDimitry Andric MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest); 1470e8d8bef9SDimitry Andric unsigned LogicOpcode = LogicMI->getOpcode(); 1471e8d8bef9SDimitry Andric if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR && 1472e8d8bef9SDimitry Andric LogicOpcode != TargetOpcode::G_XOR) 1473e8d8bef9SDimitry Andric return false; 1474e8d8bef9SDimitry Andric 1475e8d8bef9SDimitry Andric // Find a matching one-use shift by constant. 1476e8d8bef9SDimitry Andric const Register C1 = MI.getOperand(2).getReg(); 1477*349cc55cSDimitry Andric auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI); 1478e8d8bef9SDimitry Andric if (!MaybeImmVal) 1479e8d8bef9SDimitry Andric return false; 1480e8d8bef9SDimitry Andric 1481e8d8bef9SDimitry Andric const uint64_t C1Val = MaybeImmVal->Value.getZExtValue(); 1482e8d8bef9SDimitry Andric 1483e8d8bef9SDimitry Andric auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) { 1484e8d8bef9SDimitry Andric // Shift should match previous one and should be a one-use. 1485e8d8bef9SDimitry Andric if (MI->getOpcode() != ShiftOpcode || 1486e8d8bef9SDimitry Andric !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 1487e8d8bef9SDimitry Andric return false; 1488e8d8bef9SDimitry Andric 1489e8d8bef9SDimitry Andric // Must be a constant. 1490e8d8bef9SDimitry Andric auto MaybeImmVal = 1491*349cc55cSDimitry Andric getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); 1492e8d8bef9SDimitry Andric if (!MaybeImmVal) 1493e8d8bef9SDimitry Andric return false; 1494e8d8bef9SDimitry Andric 1495e8d8bef9SDimitry Andric ShiftVal = MaybeImmVal->Value.getSExtValue(); 1496e8d8bef9SDimitry Andric return true; 1497e8d8bef9SDimitry Andric }; 1498e8d8bef9SDimitry Andric 1499e8d8bef9SDimitry Andric // Logic ops are commutative, so check each operand for a match. 1500e8d8bef9SDimitry Andric Register LogicMIReg1 = LogicMI->getOperand(1).getReg(); 1501e8d8bef9SDimitry Andric MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1); 1502e8d8bef9SDimitry Andric Register LogicMIReg2 = LogicMI->getOperand(2).getReg(); 1503e8d8bef9SDimitry Andric MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2); 1504e8d8bef9SDimitry Andric uint64_t C0Val; 1505e8d8bef9SDimitry Andric 1506e8d8bef9SDimitry Andric if (matchFirstShift(LogicMIOp1, C0Val)) { 1507e8d8bef9SDimitry Andric MatchInfo.LogicNonShiftReg = LogicMIReg2; 1508e8d8bef9SDimitry Andric MatchInfo.Shift2 = LogicMIOp1; 1509e8d8bef9SDimitry Andric } else if (matchFirstShift(LogicMIOp2, C0Val)) { 1510e8d8bef9SDimitry Andric MatchInfo.LogicNonShiftReg = LogicMIReg1; 1511e8d8bef9SDimitry Andric MatchInfo.Shift2 = LogicMIOp2; 1512e8d8bef9SDimitry Andric } else 1513e8d8bef9SDimitry Andric return false; 1514e8d8bef9SDimitry Andric 1515e8d8bef9SDimitry Andric MatchInfo.ValSum = C0Val + C1Val; 1516e8d8bef9SDimitry Andric 1517e8d8bef9SDimitry Andric // The fold is not valid if the sum of the shift values exceeds bitwidth. 1518e8d8bef9SDimitry Andric if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits()) 1519e8d8bef9SDimitry Andric return false; 1520e8d8bef9SDimitry Andric 1521e8d8bef9SDimitry Andric MatchInfo.Logic = LogicMI; 1522e8d8bef9SDimitry Andric return true; 1523e8d8bef9SDimitry Andric } 1524e8d8bef9SDimitry Andric 1525fe6060f1SDimitry Andric void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, 1526e8d8bef9SDimitry Andric ShiftOfShiftedLogic &MatchInfo) { 1527e8d8bef9SDimitry Andric unsigned Opcode = MI.getOpcode(); 1528e8d8bef9SDimitry Andric assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || 1529e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT || 1530e8d8bef9SDimitry Andric Opcode == TargetOpcode::G_SSHLSAT) && 1531e8d8bef9SDimitry Andric "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT"); 1532e8d8bef9SDimitry Andric 1533e8d8bef9SDimitry Andric LLT ShlType = MRI.getType(MI.getOperand(2).getReg()); 1534e8d8bef9SDimitry Andric LLT DestType = MRI.getType(MI.getOperand(0).getReg()); 1535e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1536e8d8bef9SDimitry Andric 1537e8d8bef9SDimitry Andric Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0); 1538e8d8bef9SDimitry Andric 1539e8d8bef9SDimitry Andric Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg(); 1540e8d8bef9SDimitry Andric Register Shift1 = 1541e8d8bef9SDimitry Andric Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0); 1542e8d8bef9SDimitry Andric 1543e8d8bef9SDimitry Andric Register Shift2Const = MI.getOperand(2).getReg(); 1544e8d8bef9SDimitry Andric Register Shift2 = Builder 1545e8d8bef9SDimitry Andric .buildInstr(Opcode, {DestType}, 1546e8d8bef9SDimitry Andric {MatchInfo.LogicNonShiftReg, Shift2Const}) 1547e8d8bef9SDimitry Andric .getReg(0); 1548e8d8bef9SDimitry Andric 1549e8d8bef9SDimitry Andric Register Dest = MI.getOperand(0).getReg(); 1550e8d8bef9SDimitry Andric Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2}); 1551e8d8bef9SDimitry Andric 1552e8d8bef9SDimitry Andric // These were one use so it's safe to remove them. 1553*349cc55cSDimitry Andric MatchInfo.Shift2->eraseFromParentAndMarkDBGValuesForRemoval(); 1554*349cc55cSDimitry Andric MatchInfo.Logic->eraseFromParentAndMarkDBGValuesForRemoval(); 1555e8d8bef9SDimitry Andric 1556e8d8bef9SDimitry Andric MI.eraseFromParent(); 1557e8d8bef9SDimitry Andric } 1558e8d8bef9SDimitry Andric 15595ffd83dbSDimitry Andric bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, 15605ffd83dbSDimitry Andric unsigned &ShiftVal) { 15615ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); 15625ffd83dbSDimitry Andric auto MaybeImmVal = 1563*349cc55cSDimitry Andric getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); 1564e8d8bef9SDimitry Andric if (!MaybeImmVal) 15655ffd83dbSDimitry Andric return false; 1566e8d8bef9SDimitry Andric 1567e8d8bef9SDimitry Andric ShiftVal = MaybeImmVal->Value.exactLogBase2(); 1568e8d8bef9SDimitry Andric return (static_cast<int32_t>(ShiftVal) != -1); 15695ffd83dbSDimitry Andric } 15705ffd83dbSDimitry Andric 1571fe6060f1SDimitry Andric void CombinerHelper::applyCombineMulToShl(MachineInstr &MI, 15725ffd83dbSDimitry Andric unsigned &ShiftVal) { 15735ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); 15745ffd83dbSDimitry Andric MachineIRBuilder MIB(MI); 15755ffd83dbSDimitry Andric LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg()); 15765ffd83dbSDimitry Andric auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal); 15775ffd83dbSDimitry Andric Observer.changingInstr(MI); 15785ffd83dbSDimitry Andric MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL)); 15795ffd83dbSDimitry Andric MI.getOperand(2).setReg(ShiftCst.getReg(0)); 15805ffd83dbSDimitry Andric Observer.changedInstr(MI); 15815ffd83dbSDimitry Andric } 15825ffd83dbSDimitry Andric 1583e8d8bef9SDimitry Andric // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source 1584e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, 1585e8d8bef9SDimitry Andric RegisterImmPair &MatchData) { 1586e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHL && KB); 1587e8d8bef9SDimitry Andric 1588e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 1589e8d8bef9SDimitry Andric 1590e8d8bef9SDimitry Andric Register ExtSrc; 1591e8d8bef9SDimitry Andric if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) && 1592e8d8bef9SDimitry Andric !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) && 1593e8d8bef9SDimitry Andric !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc)))) 1594e8d8bef9SDimitry Andric return false; 1595e8d8bef9SDimitry Andric 1596e8d8bef9SDimitry Andric // TODO: Should handle vector splat. 1597e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 1598*349cc55cSDimitry Andric auto MaybeShiftAmtVal = getIConstantVRegValWithLookThrough(RHS, MRI); 1599e8d8bef9SDimitry Andric if (!MaybeShiftAmtVal) 1600e8d8bef9SDimitry Andric return false; 1601e8d8bef9SDimitry Andric 1602e8d8bef9SDimitry Andric if (LI) { 1603e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(ExtSrc); 1604e8d8bef9SDimitry Andric 1605e8d8bef9SDimitry Andric // We only really care about the legality with the shifted value. We can 1606e8d8bef9SDimitry Andric // pick any type the constant shift amount, so ask the target what to 1607e8d8bef9SDimitry Andric // use. Otherwise we would have to guess and hope it is reported as legal. 1608e8d8bef9SDimitry Andric LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy); 1609e8d8bef9SDimitry Andric if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}})) 1610e8d8bef9SDimitry Andric return false; 1611e8d8bef9SDimitry Andric } 1612e8d8bef9SDimitry Andric 1613e8d8bef9SDimitry Andric int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue(); 1614e8d8bef9SDimitry Andric MatchData.Reg = ExtSrc; 1615e8d8bef9SDimitry Andric MatchData.Imm = ShiftAmt; 1616e8d8bef9SDimitry Andric 1617e8d8bef9SDimitry Andric unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes(); 1618e8d8bef9SDimitry Andric return MinLeadingZeros >= ShiftAmt; 1619e8d8bef9SDimitry Andric } 1620e8d8bef9SDimitry Andric 1621fe6060f1SDimitry Andric void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, 1622e8d8bef9SDimitry Andric const RegisterImmPair &MatchData) { 1623e8d8bef9SDimitry Andric Register ExtSrcReg = MatchData.Reg; 1624e8d8bef9SDimitry Andric int64_t ShiftAmtVal = MatchData.Imm; 1625e8d8bef9SDimitry Andric 1626e8d8bef9SDimitry Andric LLT ExtSrcTy = MRI.getType(ExtSrcReg); 1627e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1628e8d8bef9SDimitry Andric auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal); 1629e8d8bef9SDimitry Andric auto NarrowShift = 1630e8d8bef9SDimitry Andric Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags()); 1631e8d8bef9SDimitry Andric Builder.buildZExt(MI.getOperand(0), NarrowShift); 1632e8d8bef9SDimitry Andric MI.eraseFromParent(); 1633fe6060f1SDimitry Andric } 1634fe6060f1SDimitry Andric 1635fe6060f1SDimitry Andric bool CombinerHelper::matchCombineMergeUnmerge(MachineInstr &MI, 1636fe6060f1SDimitry Andric Register &MatchInfo) { 1637fe6060f1SDimitry Andric GMerge &Merge = cast<GMerge>(MI); 1638fe6060f1SDimitry Andric SmallVector<Register, 16> MergedValues; 1639fe6060f1SDimitry Andric for (unsigned I = 0; I < Merge.getNumSources(); ++I) 1640fe6060f1SDimitry Andric MergedValues.emplace_back(Merge.getSourceReg(I)); 1641fe6060f1SDimitry Andric 1642fe6060f1SDimitry Andric auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI); 1643fe6060f1SDimitry Andric if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources()) 1644fe6060f1SDimitry Andric return false; 1645fe6060f1SDimitry Andric 1646fe6060f1SDimitry Andric for (unsigned I = 0; I < MergedValues.size(); ++I) 1647fe6060f1SDimitry Andric if (MergedValues[I] != Unmerge->getReg(I)) 1648fe6060f1SDimitry Andric return false; 1649fe6060f1SDimitry Andric 1650fe6060f1SDimitry Andric MatchInfo = Unmerge->getSourceReg(); 1651e8d8bef9SDimitry Andric return true; 1652e8d8bef9SDimitry Andric } 1653e8d8bef9SDimitry Andric 1654e8d8bef9SDimitry Andric static Register peekThroughBitcast(Register Reg, 1655e8d8bef9SDimitry Andric const MachineRegisterInfo &MRI) { 1656e8d8bef9SDimitry Andric while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg)))) 1657e8d8bef9SDimitry Andric ; 1658e8d8bef9SDimitry Andric 1659e8d8bef9SDimitry Andric return Reg; 1660e8d8bef9SDimitry Andric } 1661e8d8bef9SDimitry Andric 1662e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineUnmergeMergeToPlainValues( 1663e8d8bef9SDimitry Andric MachineInstr &MI, SmallVectorImpl<Register> &Operands) { 1664e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 1665e8d8bef9SDimitry Andric "Expected an unmerge"); 1666*349cc55cSDimitry Andric auto &Unmerge = cast<GUnmerge>(MI); 1667*349cc55cSDimitry Andric Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI); 1668e8d8bef9SDimitry Andric 1669*349cc55cSDimitry Andric auto *SrcInstr = getOpcodeDef<GMergeLikeOp>(SrcReg, MRI); 1670*349cc55cSDimitry Andric if (!SrcInstr) 1671e8d8bef9SDimitry Andric return false; 1672e8d8bef9SDimitry Andric 1673e8d8bef9SDimitry Andric // Check the source type of the merge. 1674*349cc55cSDimitry Andric LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0)); 1675*349cc55cSDimitry Andric LLT Dst0Ty = MRI.getType(Unmerge.getReg(0)); 1676e8d8bef9SDimitry Andric bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits(); 1677e8d8bef9SDimitry Andric if (SrcMergeTy != Dst0Ty && !SameSize) 1678e8d8bef9SDimitry Andric return false; 1679e8d8bef9SDimitry Andric // They are the same now (modulo a bitcast). 1680e8d8bef9SDimitry Andric // We can collect all the src registers. 1681*349cc55cSDimitry Andric for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx) 1682*349cc55cSDimitry Andric Operands.push_back(SrcInstr->getSourceReg(Idx)); 1683e8d8bef9SDimitry Andric return true; 1684e8d8bef9SDimitry Andric } 1685e8d8bef9SDimitry Andric 1686fe6060f1SDimitry Andric void CombinerHelper::applyCombineUnmergeMergeToPlainValues( 1687e8d8bef9SDimitry Andric MachineInstr &MI, SmallVectorImpl<Register> &Operands) { 1688e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 1689e8d8bef9SDimitry Andric "Expected an unmerge"); 1690e8d8bef9SDimitry Andric assert((MI.getNumOperands() - 1 == Operands.size()) && 1691e8d8bef9SDimitry Andric "Not enough operands to replace all defs"); 1692e8d8bef9SDimitry Andric unsigned NumElems = MI.getNumOperands() - 1; 1693e8d8bef9SDimitry Andric 1694e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(Operands[0]); 1695e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 1696e8d8bef9SDimitry Andric bool CanReuseInputDirectly = DstTy == SrcTy; 1697e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1698e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < NumElems; ++Idx) { 1699e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(Idx).getReg(); 1700e8d8bef9SDimitry Andric Register SrcReg = Operands[Idx]; 1701e8d8bef9SDimitry Andric if (CanReuseInputDirectly) 1702e8d8bef9SDimitry Andric replaceRegWith(MRI, DstReg, SrcReg); 1703e8d8bef9SDimitry Andric else 1704e8d8bef9SDimitry Andric Builder.buildCast(DstReg, SrcReg); 1705e8d8bef9SDimitry Andric } 1706e8d8bef9SDimitry Andric MI.eraseFromParent(); 1707e8d8bef9SDimitry Andric } 1708e8d8bef9SDimitry Andric 1709e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI, 1710e8d8bef9SDimitry Andric SmallVectorImpl<APInt> &Csts) { 1711e8d8bef9SDimitry Andric unsigned SrcIdx = MI.getNumOperands() - 1; 1712e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(SrcIdx).getReg(); 1713e8d8bef9SDimitry Andric MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); 1714e8d8bef9SDimitry Andric if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT && 1715e8d8bef9SDimitry Andric SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT) 1716e8d8bef9SDimitry Andric return false; 1717e8d8bef9SDimitry Andric // Break down the big constant in smaller ones. 1718e8d8bef9SDimitry Andric const MachineOperand &CstVal = SrcInstr->getOperand(1); 1719e8d8bef9SDimitry Andric APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT 1720e8d8bef9SDimitry Andric ? CstVal.getCImm()->getValue() 1721e8d8bef9SDimitry Andric : CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); 1722e8d8bef9SDimitry Andric 1723e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); 1724e8d8bef9SDimitry Andric unsigned ShiftAmt = Dst0Ty.getSizeInBits(); 1725e8d8bef9SDimitry Andric // Unmerge a constant. 1726e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) { 1727e8d8bef9SDimitry Andric Csts.emplace_back(Val.trunc(ShiftAmt)); 1728e8d8bef9SDimitry Andric Val = Val.lshr(ShiftAmt); 1729e8d8bef9SDimitry Andric } 1730e8d8bef9SDimitry Andric 1731e8d8bef9SDimitry Andric return true; 1732e8d8bef9SDimitry Andric } 1733e8d8bef9SDimitry Andric 1734fe6060f1SDimitry Andric void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, 1735e8d8bef9SDimitry Andric SmallVectorImpl<APInt> &Csts) { 1736e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 1737e8d8bef9SDimitry Andric "Expected an unmerge"); 1738e8d8bef9SDimitry Andric assert((MI.getNumOperands() - 1 == Csts.size()) && 1739e8d8bef9SDimitry Andric "Not enough operands to replace all defs"); 1740e8d8bef9SDimitry Andric unsigned NumElems = MI.getNumOperands() - 1; 1741e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1742e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < NumElems; ++Idx) { 1743e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(Idx).getReg(); 1744e8d8bef9SDimitry Andric Builder.buildConstant(DstReg, Csts[Idx]); 1745e8d8bef9SDimitry Andric } 1746e8d8bef9SDimitry Andric 1747e8d8bef9SDimitry Andric MI.eraseFromParent(); 1748e8d8bef9SDimitry Andric } 1749e8d8bef9SDimitry Andric 1750e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { 1751e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 1752e8d8bef9SDimitry Andric "Expected an unmerge"); 1753e8d8bef9SDimitry Andric // Check that all the lanes are dead except the first one. 1754e8d8bef9SDimitry Andric for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { 1755e8d8bef9SDimitry Andric if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg())) 1756e8d8bef9SDimitry Andric return false; 1757e8d8bef9SDimitry Andric } 1758e8d8bef9SDimitry Andric return true; 1759e8d8bef9SDimitry Andric } 1760e8d8bef9SDimitry Andric 1761fe6060f1SDimitry Andric void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { 1762e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1763e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); 1764e8d8bef9SDimitry Andric // Truncating a vector is going to truncate every single lane, 1765e8d8bef9SDimitry Andric // whereas we want the full lowbits. 1766e8d8bef9SDimitry Andric // Do the operation on a scalar instead. 1767e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 1768e8d8bef9SDimitry Andric if (SrcTy.isVector()) 1769e8d8bef9SDimitry Andric SrcReg = 1770e8d8bef9SDimitry Andric Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0); 1771e8d8bef9SDimitry Andric 1772e8d8bef9SDimitry Andric Register Dst0Reg = MI.getOperand(0).getReg(); 1773e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(Dst0Reg); 1774e8d8bef9SDimitry Andric if (Dst0Ty.isVector()) { 1775e8d8bef9SDimitry Andric auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg); 1776e8d8bef9SDimitry Andric Builder.buildCast(Dst0Reg, MIB); 1777e8d8bef9SDimitry Andric } else 1778e8d8bef9SDimitry Andric Builder.buildTrunc(Dst0Reg, SrcReg); 1779e8d8bef9SDimitry Andric MI.eraseFromParent(); 1780e8d8bef9SDimitry Andric } 1781e8d8bef9SDimitry Andric 1782e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) { 1783e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 1784e8d8bef9SDimitry Andric "Expected an unmerge"); 1785e8d8bef9SDimitry Andric Register Dst0Reg = MI.getOperand(0).getReg(); 1786e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(Dst0Reg); 1787e8d8bef9SDimitry Andric // G_ZEXT on vector applies to each lane, so it will 1788e8d8bef9SDimitry Andric // affect all destinations. Therefore we won't be able 1789e8d8bef9SDimitry Andric // to simplify the unmerge to just the first definition. 1790e8d8bef9SDimitry Andric if (Dst0Ty.isVector()) 1791e8d8bef9SDimitry Andric return false; 1792e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); 1793e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 1794e8d8bef9SDimitry Andric if (SrcTy.isVector()) 1795e8d8bef9SDimitry Andric return false; 1796e8d8bef9SDimitry Andric 1797e8d8bef9SDimitry Andric Register ZExtSrcReg; 1798e8d8bef9SDimitry Andric if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg)))) 1799e8d8bef9SDimitry Andric return false; 1800e8d8bef9SDimitry Andric 1801e8d8bef9SDimitry Andric // Finally we can replace the first definition with 1802e8d8bef9SDimitry Andric // a zext of the source if the definition is big enough to hold 1803e8d8bef9SDimitry Andric // all of ZExtSrc bits. 1804e8d8bef9SDimitry Andric LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); 1805e8d8bef9SDimitry Andric return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits(); 1806e8d8bef9SDimitry Andric } 1807e8d8bef9SDimitry Andric 1808fe6060f1SDimitry Andric void CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) { 1809e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 1810e8d8bef9SDimitry Andric "Expected an unmerge"); 1811e8d8bef9SDimitry Andric 1812e8d8bef9SDimitry Andric Register Dst0Reg = MI.getOperand(0).getReg(); 1813e8d8bef9SDimitry Andric 1814e8d8bef9SDimitry Andric MachineInstr *ZExtInstr = 1815e8d8bef9SDimitry Andric MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg()); 1816e8d8bef9SDimitry Andric assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT && 1817e8d8bef9SDimitry Andric "Expecting a G_ZEXT"); 1818e8d8bef9SDimitry Andric 1819e8d8bef9SDimitry Andric Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg(); 1820e8d8bef9SDimitry Andric LLT Dst0Ty = MRI.getType(Dst0Reg); 1821e8d8bef9SDimitry Andric LLT ZExtSrcTy = MRI.getType(ZExtSrcReg); 1822e8d8bef9SDimitry Andric 1823e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 1824e8d8bef9SDimitry Andric 1825e8d8bef9SDimitry Andric if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) { 1826e8d8bef9SDimitry Andric Builder.buildZExt(Dst0Reg, ZExtSrcReg); 1827e8d8bef9SDimitry Andric } else { 1828e8d8bef9SDimitry Andric assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() && 1829e8d8bef9SDimitry Andric "ZExt src doesn't fit in destination"); 1830e8d8bef9SDimitry Andric replaceRegWith(MRI, Dst0Reg, ZExtSrcReg); 1831e8d8bef9SDimitry Andric } 1832e8d8bef9SDimitry Andric 1833e8d8bef9SDimitry Andric Register ZeroReg; 1834e8d8bef9SDimitry Andric for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { 1835e8d8bef9SDimitry Andric if (!ZeroReg) 1836e8d8bef9SDimitry Andric ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0); 1837e8d8bef9SDimitry Andric replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg); 1838e8d8bef9SDimitry Andric } 1839e8d8bef9SDimitry Andric MI.eraseFromParent(); 1840e8d8bef9SDimitry Andric } 1841e8d8bef9SDimitry Andric 18425ffd83dbSDimitry Andric bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, 18435ffd83dbSDimitry Andric unsigned TargetShiftSize, 18445ffd83dbSDimitry Andric unsigned &ShiftVal) { 18455ffd83dbSDimitry Andric assert((MI.getOpcode() == TargetOpcode::G_SHL || 18465ffd83dbSDimitry Andric MI.getOpcode() == TargetOpcode::G_LSHR || 18475ffd83dbSDimitry Andric MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift"); 18485ffd83dbSDimitry Andric 18495ffd83dbSDimitry Andric LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 18505ffd83dbSDimitry Andric if (Ty.isVector()) // TODO: 18515ffd83dbSDimitry Andric return false; 18525ffd83dbSDimitry Andric 18535ffd83dbSDimitry Andric // Don't narrow further than the requested size. 18545ffd83dbSDimitry Andric unsigned Size = Ty.getSizeInBits(); 18555ffd83dbSDimitry Andric if (Size <= TargetShiftSize) 18565ffd83dbSDimitry Andric return false; 18575ffd83dbSDimitry Andric 18585ffd83dbSDimitry Andric auto MaybeImmVal = 1859*349cc55cSDimitry Andric getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); 18605ffd83dbSDimitry Andric if (!MaybeImmVal) 18615ffd83dbSDimitry Andric return false; 18625ffd83dbSDimitry Andric 1863e8d8bef9SDimitry Andric ShiftVal = MaybeImmVal->Value.getSExtValue(); 18645ffd83dbSDimitry Andric return ShiftVal >= Size / 2 && ShiftVal < Size; 18655ffd83dbSDimitry Andric } 18665ffd83dbSDimitry Andric 1867fe6060f1SDimitry Andric void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI, 18685ffd83dbSDimitry Andric const unsigned &ShiftVal) { 18695ffd83dbSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 18705ffd83dbSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 18715ffd83dbSDimitry Andric LLT Ty = MRI.getType(SrcReg); 18725ffd83dbSDimitry Andric unsigned Size = Ty.getSizeInBits(); 18735ffd83dbSDimitry Andric unsigned HalfSize = Size / 2; 18745ffd83dbSDimitry Andric assert(ShiftVal >= HalfSize); 18755ffd83dbSDimitry Andric 18765ffd83dbSDimitry Andric LLT HalfTy = LLT::scalar(HalfSize); 18775ffd83dbSDimitry Andric 18785ffd83dbSDimitry Andric Builder.setInstr(MI); 18795ffd83dbSDimitry Andric auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg); 18805ffd83dbSDimitry Andric unsigned NarrowShiftAmt = ShiftVal - HalfSize; 18815ffd83dbSDimitry Andric 18825ffd83dbSDimitry Andric if (MI.getOpcode() == TargetOpcode::G_LSHR) { 18835ffd83dbSDimitry Andric Register Narrowed = Unmerge.getReg(1); 18845ffd83dbSDimitry Andric 18855ffd83dbSDimitry Andric // dst = G_LSHR s64:x, C for C >= 32 18865ffd83dbSDimitry Andric // => 18875ffd83dbSDimitry Andric // lo, hi = G_UNMERGE_VALUES x 18885ffd83dbSDimitry Andric // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0 18895ffd83dbSDimitry Andric 18905ffd83dbSDimitry Andric if (NarrowShiftAmt != 0) { 18915ffd83dbSDimitry Andric Narrowed = Builder.buildLShr(HalfTy, Narrowed, 18925ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0); 18935ffd83dbSDimitry Andric } 18945ffd83dbSDimitry Andric 18955ffd83dbSDimitry Andric auto Zero = Builder.buildConstant(HalfTy, 0); 18965ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Narrowed, Zero }); 18975ffd83dbSDimitry Andric } else if (MI.getOpcode() == TargetOpcode::G_SHL) { 18985ffd83dbSDimitry Andric Register Narrowed = Unmerge.getReg(0); 18995ffd83dbSDimitry Andric // dst = G_SHL s64:x, C for C >= 32 19005ffd83dbSDimitry Andric // => 19015ffd83dbSDimitry Andric // lo, hi = G_UNMERGE_VALUES x 19025ffd83dbSDimitry Andric // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32) 19035ffd83dbSDimitry Andric if (NarrowShiftAmt != 0) { 19045ffd83dbSDimitry Andric Narrowed = Builder.buildShl(HalfTy, Narrowed, 19055ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0); 19065ffd83dbSDimitry Andric } 19075ffd83dbSDimitry Andric 19085ffd83dbSDimitry Andric auto Zero = Builder.buildConstant(HalfTy, 0); 19095ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Zero, Narrowed }); 19105ffd83dbSDimitry Andric } else { 19115ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ASHR); 19125ffd83dbSDimitry Andric auto Hi = Builder.buildAShr( 19135ffd83dbSDimitry Andric HalfTy, Unmerge.getReg(1), 19145ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, HalfSize - 1)); 19155ffd83dbSDimitry Andric 19165ffd83dbSDimitry Andric if (ShiftVal == HalfSize) { 19175ffd83dbSDimitry Andric // (G_ASHR i64:x, 32) -> 19185ffd83dbSDimitry Andric // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31) 19195ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi }); 19205ffd83dbSDimitry Andric } else if (ShiftVal == Size - 1) { 19215ffd83dbSDimitry Andric // Don't need a second shift. 19225ffd83dbSDimitry Andric // (G_ASHR i64:x, 63) -> 19235ffd83dbSDimitry Andric // %narrowed = (G_ASHR hi_32(x), 31) 19245ffd83dbSDimitry Andric // G_MERGE_VALUES %narrowed, %narrowed 19255ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Hi, Hi }); 19265ffd83dbSDimitry Andric } else { 19275ffd83dbSDimitry Andric auto Lo = Builder.buildAShr( 19285ffd83dbSDimitry Andric HalfTy, Unmerge.getReg(1), 19295ffd83dbSDimitry Andric Builder.buildConstant(HalfTy, ShiftVal - HalfSize)); 19305ffd83dbSDimitry Andric 19315ffd83dbSDimitry Andric // (G_ASHR i64:x, C) ->, for C >= 32 19325ffd83dbSDimitry Andric // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31) 19335ffd83dbSDimitry Andric Builder.buildMerge(DstReg, { Lo, Hi }); 19345ffd83dbSDimitry Andric } 19355ffd83dbSDimitry Andric } 19365ffd83dbSDimitry Andric 19375ffd83dbSDimitry Andric MI.eraseFromParent(); 19385ffd83dbSDimitry Andric } 19395ffd83dbSDimitry Andric 19405ffd83dbSDimitry Andric bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI, 19415ffd83dbSDimitry Andric unsigned TargetShiftAmount) { 19425ffd83dbSDimitry Andric unsigned ShiftAmt; 19435ffd83dbSDimitry Andric if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) { 19445ffd83dbSDimitry Andric applyCombineShiftToUnmerge(MI, ShiftAmt); 19455ffd83dbSDimitry Andric return true; 19465ffd83dbSDimitry Andric } 19475ffd83dbSDimitry Andric 19485ffd83dbSDimitry Andric return false; 19495ffd83dbSDimitry Andric } 19505ffd83dbSDimitry Andric 1951e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) { 1952e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); 1953e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1954e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 1955e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1956e8d8bef9SDimitry Andric return mi_match(SrcReg, MRI, 1957e8d8bef9SDimitry Andric m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg)))); 1958e8d8bef9SDimitry Andric } 1959e8d8bef9SDimitry Andric 1960fe6060f1SDimitry Andric void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) { 1961e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); 1962e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1963e8d8bef9SDimitry Andric Builder.setInstr(MI); 1964e8d8bef9SDimitry Andric Builder.buildCopy(DstReg, Reg); 1965e8d8bef9SDimitry Andric MI.eraseFromParent(); 1966e8d8bef9SDimitry Andric } 1967e8d8bef9SDimitry Andric 1968e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) { 1969e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); 1970e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1971e8d8bef9SDimitry Andric return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg))); 1972e8d8bef9SDimitry Andric } 1973e8d8bef9SDimitry Andric 1974fe6060f1SDimitry Andric void CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) { 1975e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); 1976e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1977e8d8bef9SDimitry Andric Builder.setInstr(MI); 1978e8d8bef9SDimitry Andric Builder.buildZExtOrTrunc(DstReg, Reg); 1979e8d8bef9SDimitry Andric MI.eraseFromParent(); 1980e8d8bef9SDimitry Andric } 1981e8d8bef9SDimitry Andric 1982e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineAddP2IToPtrAdd( 1983e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, bool> &PtrReg) { 1984e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ADD); 1985e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 1986e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 1987e8d8bef9SDimitry Andric LLT IntTy = MRI.getType(LHS); 1988e8d8bef9SDimitry Andric 1989e8d8bef9SDimitry Andric // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the 1990e8d8bef9SDimitry Andric // instruction. 1991e8d8bef9SDimitry Andric PtrReg.second = false; 1992e8d8bef9SDimitry Andric for (Register SrcReg : {LHS, RHS}) { 1993e8d8bef9SDimitry Andric if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) { 1994e8d8bef9SDimitry Andric // Don't handle cases where the integer is implicitly converted to the 1995e8d8bef9SDimitry Andric // pointer width. 1996e8d8bef9SDimitry Andric LLT PtrTy = MRI.getType(PtrReg.first); 1997e8d8bef9SDimitry Andric if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits()) 1998e8d8bef9SDimitry Andric return true; 1999e8d8bef9SDimitry Andric } 2000e8d8bef9SDimitry Andric 2001e8d8bef9SDimitry Andric PtrReg.second = true; 2002e8d8bef9SDimitry Andric } 2003e8d8bef9SDimitry Andric 2004e8d8bef9SDimitry Andric return false; 2005e8d8bef9SDimitry Andric } 2006e8d8bef9SDimitry Andric 2007fe6060f1SDimitry Andric void CombinerHelper::applyCombineAddP2IToPtrAdd( 2008e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, bool> &PtrReg) { 2009e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 2010e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 2011e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 2012e8d8bef9SDimitry Andric 2013e8d8bef9SDimitry Andric const bool DoCommute = PtrReg.second; 2014e8d8bef9SDimitry Andric if (DoCommute) 2015e8d8bef9SDimitry Andric std::swap(LHS, RHS); 2016e8d8bef9SDimitry Andric LHS = PtrReg.first; 2017e8d8bef9SDimitry Andric 2018e8d8bef9SDimitry Andric LLT PtrTy = MRI.getType(LHS); 2019e8d8bef9SDimitry Andric 2020e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2021e8d8bef9SDimitry Andric auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS); 2022e8d8bef9SDimitry Andric Builder.buildPtrToInt(Dst, PtrAdd); 2023e8d8bef9SDimitry Andric MI.eraseFromParent(); 2024e8d8bef9SDimitry Andric } 2025e8d8bef9SDimitry Andric 2026e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, 2027e8d8bef9SDimitry Andric int64_t &NewCst) { 2028*349cc55cSDimitry Andric auto &PtrAdd = cast<GPtrAdd>(MI); 2029*349cc55cSDimitry Andric Register LHS = PtrAdd.getBaseReg(); 2030*349cc55cSDimitry Andric Register RHS = PtrAdd.getOffsetReg(); 2031e8d8bef9SDimitry Andric MachineRegisterInfo &MRI = Builder.getMF().getRegInfo(); 2032e8d8bef9SDimitry Andric 2033*349cc55cSDimitry Andric if (auto RHSCst = getIConstantVRegSExtVal(RHS, MRI)) { 2034e8d8bef9SDimitry Andric int64_t Cst; 2035e8d8bef9SDimitry Andric if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) { 2036e8d8bef9SDimitry Andric NewCst = Cst + *RHSCst; 2037e8d8bef9SDimitry Andric return true; 2038e8d8bef9SDimitry Andric } 2039e8d8bef9SDimitry Andric } 2040e8d8bef9SDimitry Andric 2041e8d8bef9SDimitry Andric return false; 2042e8d8bef9SDimitry Andric } 2043e8d8bef9SDimitry Andric 2044fe6060f1SDimitry Andric void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI, 2045e8d8bef9SDimitry Andric int64_t &NewCst) { 2046*349cc55cSDimitry Andric auto &PtrAdd = cast<GPtrAdd>(MI); 2047*349cc55cSDimitry Andric Register Dst = PtrAdd.getReg(0); 2048e8d8bef9SDimitry Andric 2049e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2050e8d8bef9SDimitry Andric Builder.buildConstant(Dst, NewCst); 2051*349cc55cSDimitry Andric PtrAdd.eraseFromParent(); 2052e8d8bef9SDimitry Andric } 2053e8d8bef9SDimitry Andric 2054e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { 2055e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT"); 2056e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2057e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2058e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2059e8d8bef9SDimitry Andric return mi_match(SrcReg, MRI, 2060e8d8bef9SDimitry Andric m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))); 2061e8d8bef9SDimitry Andric } 2062e8d8bef9SDimitry Andric 2063fe6060f1SDimitry Andric bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI, Register &Reg) { 2064fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT"); 2065e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2066fe6060f1SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2067fe6060f1SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2068fe6060f1SDimitry Andric if (mi_match(SrcReg, MRI, 2069fe6060f1SDimitry Andric m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) { 2070fe6060f1SDimitry Andric unsigned DstSize = DstTy.getScalarSizeInBits(); 2071fe6060f1SDimitry Andric unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits(); 2072fe6060f1SDimitry Andric return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize; 2073fe6060f1SDimitry Andric } 2074fe6060f1SDimitry Andric return false; 2075e8d8bef9SDimitry Andric } 2076e8d8bef9SDimitry Andric 2077e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineExtOfExt( 2078e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 2079e8d8bef9SDimitry Andric assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || 2080e8d8bef9SDimitry Andric MI.getOpcode() == TargetOpcode::G_SEXT || 2081e8d8bef9SDimitry Andric MI.getOpcode() == TargetOpcode::G_ZEXT) && 2082e8d8bef9SDimitry Andric "Expected a G_[ASZ]EXT"); 2083e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2084e8d8bef9SDimitry Andric MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); 2085e8d8bef9SDimitry Andric // Match exts with the same opcode, anyext([sz]ext) and sext(zext). 2086e8d8bef9SDimitry Andric unsigned Opc = MI.getOpcode(); 2087e8d8bef9SDimitry Andric unsigned SrcOpc = SrcMI->getOpcode(); 2088e8d8bef9SDimitry Andric if (Opc == SrcOpc || 2089e8d8bef9SDimitry Andric (Opc == TargetOpcode::G_ANYEXT && 2090e8d8bef9SDimitry Andric (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) || 2091e8d8bef9SDimitry Andric (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) { 2092e8d8bef9SDimitry Andric MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc); 2093e8d8bef9SDimitry Andric return true; 2094e8d8bef9SDimitry Andric } 2095e8d8bef9SDimitry Andric return false; 2096e8d8bef9SDimitry Andric } 2097e8d8bef9SDimitry Andric 2098fe6060f1SDimitry Andric void CombinerHelper::applyCombineExtOfExt( 2099e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 2100e8d8bef9SDimitry Andric assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || 2101e8d8bef9SDimitry Andric MI.getOpcode() == TargetOpcode::G_SEXT || 2102e8d8bef9SDimitry Andric MI.getOpcode() == TargetOpcode::G_ZEXT) && 2103e8d8bef9SDimitry Andric "Expected a G_[ASZ]EXT"); 2104e8d8bef9SDimitry Andric 2105e8d8bef9SDimitry Andric Register Reg = std::get<0>(MatchInfo); 2106e8d8bef9SDimitry Andric unsigned SrcExtOp = std::get<1>(MatchInfo); 2107e8d8bef9SDimitry Andric 2108e8d8bef9SDimitry Andric // Combine exts with the same opcode. 2109e8d8bef9SDimitry Andric if (MI.getOpcode() == SrcExtOp) { 2110e8d8bef9SDimitry Andric Observer.changingInstr(MI); 2111e8d8bef9SDimitry Andric MI.getOperand(1).setReg(Reg); 2112e8d8bef9SDimitry Andric Observer.changedInstr(MI); 2113fe6060f1SDimitry Andric return; 2114e8d8bef9SDimitry Andric } 2115e8d8bef9SDimitry Andric 2116e8d8bef9SDimitry Andric // Combine: 2117e8d8bef9SDimitry Andric // - anyext([sz]ext x) to [sz]ext x 2118e8d8bef9SDimitry Andric // - sext(zext x) to zext x 2119e8d8bef9SDimitry Andric if (MI.getOpcode() == TargetOpcode::G_ANYEXT || 2120e8d8bef9SDimitry Andric (MI.getOpcode() == TargetOpcode::G_SEXT && 2121e8d8bef9SDimitry Andric SrcExtOp == TargetOpcode::G_ZEXT)) { 2122e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2123e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2124e8d8bef9SDimitry Andric Builder.buildInstr(SrcExtOp, {DstReg}, {Reg}); 2125e8d8bef9SDimitry Andric MI.eraseFromParent(); 2126fe6060f1SDimitry Andric } 2127e8d8bef9SDimitry Andric } 2128e8d8bef9SDimitry Andric 2129fe6060f1SDimitry Andric void CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) { 2130e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); 2131e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2132e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2133e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2134e8d8bef9SDimitry Andric 2135e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2136e8d8bef9SDimitry Andric Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg, 2137e8d8bef9SDimitry Andric MI.getFlags()); 2138e8d8bef9SDimitry Andric MI.eraseFromParent(); 2139e8d8bef9SDimitry Andric } 2140e8d8bef9SDimitry Andric 2141e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) { 2142e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG"); 2143e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2144e8d8bef9SDimitry Andric return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg))); 2145e8d8bef9SDimitry Andric } 2146e8d8bef9SDimitry Andric 2147e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) { 2148e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); 2149e8d8bef9SDimitry Andric Src = MI.getOperand(1).getReg(); 2150e8d8bef9SDimitry Andric Register AbsSrc; 2151e8d8bef9SDimitry Andric return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc))); 2152e8d8bef9SDimitry Andric } 2153e8d8bef9SDimitry Andric 2154*349cc55cSDimitry Andric bool CombinerHelper::matchCombineFAbsOfFNeg(MachineInstr &MI, 2155*349cc55cSDimitry Andric BuildFnTy &MatchInfo) { 2156*349cc55cSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS"); 2157*349cc55cSDimitry Andric Register Src = MI.getOperand(1).getReg(); 2158*349cc55cSDimitry Andric Register NegSrc; 2159*349cc55cSDimitry Andric 2160*349cc55cSDimitry Andric if (!mi_match(Src, MRI, m_GFNeg(m_Reg(NegSrc)))) 2161*349cc55cSDimitry Andric return false; 2162*349cc55cSDimitry Andric 2163*349cc55cSDimitry Andric MatchInfo = [=, &MI](MachineIRBuilder &B) { 2164*349cc55cSDimitry Andric Observer.changingInstr(MI); 2165*349cc55cSDimitry Andric MI.getOperand(1).setReg(NegSrc); 2166*349cc55cSDimitry Andric Observer.changedInstr(MI); 2167*349cc55cSDimitry Andric }; 2168*349cc55cSDimitry Andric return true; 2169*349cc55cSDimitry Andric } 2170*349cc55cSDimitry Andric 2171e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineTruncOfExt( 2172e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { 2173e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 2174e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2175e8d8bef9SDimitry Andric MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); 2176e8d8bef9SDimitry Andric unsigned SrcOpc = SrcMI->getOpcode(); 2177e8d8bef9SDimitry Andric if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT || 2178e8d8bef9SDimitry Andric SrcOpc == TargetOpcode::G_ZEXT) { 2179e8d8bef9SDimitry Andric MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc); 2180e8d8bef9SDimitry Andric return true; 2181e8d8bef9SDimitry Andric } 2182e8d8bef9SDimitry Andric return false; 2183e8d8bef9SDimitry Andric } 2184e8d8bef9SDimitry Andric 2185fe6060f1SDimitry Andric void CombinerHelper::applyCombineTruncOfExt( 2186e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) { 2187e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 2188e8d8bef9SDimitry Andric Register SrcReg = MatchInfo.first; 2189e8d8bef9SDimitry Andric unsigned SrcExtOp = MatchInfo.second; 2190e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2191e8d8bef9SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 2192e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2193e8d8bef9SDimitry Andric if (SrcTy == DstTy) { 2194e8d8bef9SDimitry Andric MI.eraseFromParent(); 2195e8d8bef9SDimitry Andric replaceRegWith(MRI, DstReg, SrcReg); 2196fe6060f1SDimitry Andric return; 2197e8d8bef9SDimitry Andric } 2198e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2199e8d8bef9SDimitry Andric if (SrcTy.getSizeInBits() < DstTy.getSizeInBits()) 2200e8d8bef9SDimitry Andric Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg}); 2201e8d8bef9SDimitry Andric else 2202e8d8bef9SDimitry Andric Builder.buildTrunc(DstReg, SrcReg); 2203e8d8bef9SDimitry Andric MI.eraseFromParent(); 2204e8d8bef9SDimitry Andric } 2205e8d8bef9SDimitry Andric 2206e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineTruncOfShl( 2207e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 2208e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 2209e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2210e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2211e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2212e8d8bef9SDimitry Andric Register ShiftSrc; 2213e8d8bef9SDimitry Andric Register ShiftAmt; 2214e8d8bef9SDimitry Andric 2215e8d8bef9SDimitry Andric if (MRI.hasOneNonDBGUse(SrcReg) && 2216e8d8bef9SDimitry Andric mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) && 2217e8d8bef9SDimitry Andric isLegalOrBeforeLegalizer( 2218e8d8bef9SDimitry Andric {TargetOpcode::G_SHL, 2219e8d8bef9SDimitry Andric {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) { 2220e8d8bef9SDimitry Andric KnownBits Known = KB->getKnownBits(ShiftAmt); 2221e8d8bef9SDimitry Andric unsigned Size = DstTy.getSizeInBits(); 2222*349cc55cSDimitry Andric if (Known.countMaxActiveBits() <= Log2_32(Size)) { 2223e8d8bef9SDimitry Andric MatchInfo = std::make_pair(ShiftSrc, ShiftAmt); 2224e8d8bef9SDimitry Andric return true; 2225e8d8bef9SDimitry Andric } 2226e8d8bef9SDimitry Andric } 2227e8d8bef9SDimitry Andric return false; 2228e8d8bef9SDimitry Andric } 2229e8d8bef9SDimitry Andric 2230fe6060f1SDimitry Andric void CombinerHelper::applyCombineTruncOfShl( 2231e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 2232e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); 2233e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2234e8d8bef9SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2235e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2236e8d8bef9SDimitry Andric MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); 2237e8d8bef9SDimitry Andric 2238e8d8bef9SDimitry Andric Register ShiftSrc = MatchInfo.first; 2239e8d8bef9SDimitry Andric Register ShiftAmt = MatchInfo.second; 2240e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2241e8d8bef9SDimitry Andric auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc); 2242e8d8bef9SDimitry Andric Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags()); 2243e8d8bef9SDimitry Andric MI.eraseFromParent(); 2244e8d8bef9SDimitry Andric } 2245e8d8bef9SDimitry Andric 22465ffd83dbSDimitry Andric bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { 22475ffd83dbSDimitry Andric return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) { 22485ffd83dbSDimitry Andric return MO.isReg() && 22495ffd83dbSDimitry Andric getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); 22505ffd83dbSDimitry Andric }); 22515ffd83dbSDimitry Andric } 22525ffd83dbSDimitry Andric 22535ffd83dbSDimitry Andric bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) { 22545ffd83dbSDimitry Andric return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) { 22555ffd83dbSDimitry Andric return !MO.isReg() || 22565ffd83dbSDimitry Andric getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); 22575ffd83dbSDimitry Andric }); 22585ffd83dbSDimitry Andric } 22595ffd83dbSDimitry Andric 22605ffd83dbSDimitry Andric bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) { 22615ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 22625ffd83dbSDimitry Andric ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 22635ffd83dbSDimitry Andric return all_of(Mask, [](int Elt) { return Elt < 0; }); 22645ffd83dbSDimitry Andric } 22655ffd83dbSDimitry Andric 22665ffd83dbSDimitry Andric bool CombinerHelper::matchUndefStore(MachineInstr &MI) { 22675ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_STORE); 22685ffd83dbSDimitry Andric return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(), 22695ffd83dbSDimitry Andric MRI); 22705ffd83dbSDimitry Andric } 22715ffd83dbSDimitry Andric 2272e8d8bef9SDimitry Andric bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) { 2273e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SELECT); 2274e8d8bef9SDimitry Andric return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(), 2275e8d8bef9SDimitry Andric MRI); 2276e8d8bef9SDimitry Andric } 2277e8d8bef9SDimitry Andric 2278e8d8bef9SDimitry Andric bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) { 2279*349cc55cSDimitry Andric GSelect &SelMI = cast<GSelect>(MI); 2280*349cc55cSDimitry Andric auto Cst = 2281*349cc55cSDimitry Andric isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI); 2282*349cc55cSDimitry Andric if (!Cst) 2283e8d8bef9SDimitry Andric return false; 2284*349cc55cSDimitry Andric OpIdx = Cst->isZero() ? 3 : 2; 2285*349cc55cSDimitry Andric return true; 2286e8d8bef9SDimitry Andric } 2287e8d8bef9SDimitry Andric 22885ffd83dbSDimitry Andric bool CombinerHelper::eraseInst(MachineInstr &MI) { 22895ffd83dbSDimitry Andric MI.eraseFromParent(); 22905ffd83dbSDimitry Andric return true; 22915ffd83dbSDimitry Andric } 22925ffd83dbSDimitry Andric 22935ffd83dbSDimitry Andric bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, 22945ffd83dbSDimitry Andric const MachineOperand &MOP2) { 22955ffd83dbSDimitry Andric if (!MOP1.isReg() || !MOP2.isReg()) 22965ffd83dbSDimitry Andric return false; 2297*349cc55cSDimitry Andric auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI); 2298*349cc55cSDimitry Andric if (!InstAndDef1) 22995ffd83dbSDimitry Andric return false; 2300*349cc55cSDimitry Andric auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI); 2301*349cc55cSDimitry Andric if (!InstAndDef2) 23025ffd83dbSDimitry Andric return false; 2303*349cc55cSDimitry Andric MachineInstr *I1 = InstAndDef1->MI; 2304*349cc55cSDimitry Andric MachineInstr *I2 = InstAndDef2->MI; 23055ffd83dbSDimitry Andric 23065ffd83dbSDimitry Andric // Handle a case like this: 23075ffd83dbSDimitry Andric // 23085ffd83dbSDimitry Andric // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>) 23095ffd83dbSDimitry Andric // 23105ffd83dbSDimitry Andric // Even though %0 and %1 are produced by the same instruction they are not 23115ffd83dbSDimitry Andric // the same values. 23125ffd83dbSDimitry Andric if (I1 == I2) 23135ffd83dbSDimitry Andric return MOP1.getReg() == MOP2.getReg(); 23145ffd83dbSDimitry Andric 23155ffd83dbSDimitry Andric // If we have an instruction which loads or stores, we can't guarantee that 23165ffd83dbSDimitry Andric // it is identical. 23175ffd83dbSDimitry Andric // 23185ffd83dbSDimitry Andric // For example, we may have 23195ffd83dbSDimitry Andric // 23205ffd83dbSDimitry Andric // %x1 = G_LOAD %addr (load N from @somewhere) 23215ffd83dbSDimitry Andric // ... 23225ffd83dbSDimitry Andric // call @foo 23235ffd83dbSDimitry Andric // ... 23245ffd83dbSDimitry Andric // %x2 = G_LOAD %addr (load N from @somewhere) 23255ffd83dbSDimitry Andric // ... 23265ffd83dbSDimitry Andric // %or = G_OR %x1, %x2 23275ffd83dbSDimitry Andric // 23285ffd83dbSDimitry Andric // It's possible that @foo will modify whatever lives at the address we're 23295ffd83dbSDimitry Andric // loading from. To be safe, let's just assume that all loads and stores 23305ffd83dbSDimitry Andric // are different (unless we have something which is guaranteed to not 23315ffd83dbSDimitry Andric // change.) 23325ffd83dbSDimitry Andric if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr)) 23335ffd83dbSDimitry Andric return false; 23345ffd83dbSDimitry Andric 23355ffd83dbSDimitry Andric // Check for physical registers on the instructions first to avoid cases 23365ffd83dbSDimitry Andric // like this: 23375ffd83dbSDimitry Andric // 23385ffd83dbSDimitry Andric // %a = COPY $physreg 23395ffd83dbSDimitry Andric // ... 23405ffd83dbSDimitry Andric // SOMETHING implicit-def $physreg 23415ffd83dbSDimitry Andric // ... 23425ffd83dbSDimitry Andric // %b = COPY $physreg 23435ffd83dbSDimitry Andric // 23445ffd83dbSDimitry Andric // These copies are not equivalent. 23455ffd83dbSDimitry Andric if (any_of(I1->uses(), [](const MachineOperand &MO) { 23465ffd83dbSDimitry Andric return MO.isReg() && MO.getReg().isPhysical(); 23475ffd83dbSDimitry Andric })) { 23485ffd83dbSDimitry Andric // Check if we have a case like this: 23495ffd83dbSDimitry Andric // 23505ffd83dbSDimitry Andric // %a = COPY $physreg 23515ffd83dbSDimitry Andric // %b = COPY %a 23525ffd83dbSDimitry Andric // 23535ffd83dbSDimitry Andric // In this case, I1 and I2 will both be equal to %a = COPY $physreg. 23545ffd83dbSDimitry Andric // From that, we know that they must have the same value, since they must 23555ffd83dbSDimitry Andric // have come from the same COPY. 23565ffd83dbSDimitry Andric return I1->isIdenticalTo(*I2); 23575ffd83dbSDimitry Andric } 23585ffd83dbSDimitry Andric 23595ffd83dbSDimitry Andric // We don't have any physical registers, so we don't necessarily need the 23605ffd83dbSDimitry Andric // same vreg defs. 23615ffd83dbSDimitry Andric // 23625ffd83dbSDimitry Andric // On the off-chance that there's some target instruction feeding into the 23635ffd83dbSDimitry Andric // instruction, let's use produceSameValue instead of isIdenticalTo. 2364*349cc55cSDimitry Andric if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) { 2365*349cc55cSDimitry Andric // Handle instructions with multiple defs that produce same values. Values 2366*349cc55cSDimitry Andric // are same for operands with same index. 2367*349cc55cSDimitry Andric // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>) 2368*349cc55cSDimitry Andric // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>) 2369*349cc55cSDimitry Andric // I1 and I2 are different instructions but produce same values, 2370*349cc55cSDimitry Andric // %1 and %6 are same, %1 and %7 are not the same value. 2371*349cc55cSDimitry Andric return I1->findRegisterDefOperandIdx(InstAndDef1->Reg) == 2372*349cc55cSDimitry Andric I2->findRegisterDefOperandIdx(InstAndDef2->Reg); 2373*349cc55cSDimitry Andric } 2374*349cc55cSDimitry Andric return false; 23755ffd83dbSDimitry Andric } 23765ffd83dbSDimitry Andric 23775ffd83dbSDimitry Andric bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) { 23785ffd83dbSDimitry Andric if (!MOP.isReg()) 23795ffd83dbSDimitry Andric return false; 2380*349cc55cSDimitry Andric auto *MI = MRI.getVRegDef(MOP.getReg()); 2381*349cc55cSDimitry Andric auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI); 2382*349cc55cSDimitry Andric return MaybeCst.hasValue() && MaybeCst->getBitWidth() <= 64 && 2383*349cc55cSDimitry Andric MaybeCst->getSExtValue() == C; 23845ffd83dbSDimitry Andric } 23855ffd83dbSDimitry Andric 23865ffd83dbSDimitry Andric bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, 23875ffd83dbSDimitry Andric unsigned OpIdx) { 23885ffd83dbSDimitry Andric assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); 23895ffd83dbSDimitry Andric Register OldReg = MI.getOperand(0).getReg(); 23905ffd83dbSDimitry Andric Register Replacement = MI.getOperand(OpIdx).getReg(); 23915ffd83dbSDimitry Andric assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); 23925ffd83dbSDimitry Andric MI.eraseFromParent(); 23935ffd83dbSDimitry Andric replaceRegWith(MRI, OldReg, Replacement); 23945ffd83dbSDimitry Andric return true; 23955ffd83dbSDimitry Andric } 23965ffd83dbSDimitry Andric 2397e8d8bef9SDimitry Andric bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI, 2398e8d8bef9SDimitry Andric Register Replacement) { 2399e8d8bef9SDimitry Andric assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); 2400e8d8bef9SDimitry Andric Register OldReg = MI.getOperand(0).getReg(); 2401e8d8bef9SDimitry Andric assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); 2402e8d8bef9SDimitry Andric MI.eraseFromParent(); 2403e8d8bef9SDimitry Andric replaceRegWith(MRI, OldReg, Replacement); 2404e8d8bef9SDimitry Andric return true; 2405e8d8bef9SDimitry Andric } 2406e8d8bef9SDimitry Andric 24075ffd83dbSDimitry Andric bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { 24085ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SELECT); 24095ffd83dbSDimitry Andric // Match (cond ? x : x) 24105ffd83dbSDimitry Andric return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) && 24115ffd83dbSDimitry Andric canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(), 24125ffd83dbSDimitry Andric MRI); 24135ffd83dbSDimitry Andric } 24145ffd83dbSDimitry Andric 24155ffd83dbSDimitry Andric bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) { 24165ffd83dbSDimitry Andric return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) && 24175ffd83dbSDimitry Andric canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 24185ffd83dbSDimitry Andric MRI); 24195ffd83dbSDimitry Andric } 24205ffd83dbSDimitry Andric 24215ffd83dbSDimitry Andric bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) { 24225ffd83dbSDimitry Andric return matchConstantOp(MI.getOperand(OpIdx), 0) && 24235ffd83dbSDimitry Andric canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(), 24245ffd83dbSDimitry Andric MRI); 24255ffd83dbSDimitry Andric } 24265ffd83dbSDimitry Andric 2427e8d8bef9SDimitry Andric bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) { 2428e8d8bef9SDimitry Andric MachineOperand &MO = MI.getOperand(OpIdx); 2429e8d8bef9SDimitry Andric return MO.isReg() && 2430e8d8bef9SDimitry Andric getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); 2431e8d8bef9SDimitry Andric } 2432e8d8bef9SDimitry Andric 2433e8d8bef9SDimitry Andric bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, 2434e8d8bef9SDimitry Andric unsigned OpIdx) { 2435e8d8bef9SDimitry Andric MachineOperand &MO = MI.getOperand(OpIdx); 2436e8d8bef9SDimitry Andric return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB); 2437e8d8bef9SDimitry Andric } 2438e8d8bef9SDimitry Andric 24395ffd83dbSDimitry Andric bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { 24405ffd83dbSDimitry Andric assert(MI.getNumDefs() == 1 && "Expected only one def?"); 24415ffd83dbSDimitry Andric Builder.setInstr(MI); 24425ffd83dbSDimitry Andric Builder.buildFConstant(MI.getOperand(0), C); 24435ffd83dbSDimitry Andric MI.eraseFromParent(); 24445ffd83dbSDimitry Andric return true; 24455ffd83dbSDimitry Andric } 24465ffd83dbSDimitry Andric 24475ffd83dbSDimitry Andric bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) { 24485ffd83dbSDimitry Andric assert(MI.getNumDefs() == 1 && "Expected only one def?"); 24495ffd83dbSDimitry Andric Builder.setInstr(MI); 24505ffd83dbSDimitry Andric Builder.buildConstant(MI.getOperand(0), C); 24515ffd83dbSDimitry Andric MI.eraseFromParent(); 24525ffd83dbSDimitry Andric return true; 24535ffd83dbSDimitry Andric } 24545ffd83dbSDimitry Andric 2455fe6060f1SDimitry Andric bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) { 2456fe6060f1SDimitry Andric assert(MI.getNumDefs() == 1 && "Expected only one def?"); 2457fe6060f1SDimitry Andric Builder.setInstr(MI); 2458fe6060f1SDimitry Andric Builder.buildConstant(MI.getOperand(0), C); 2459fe6060f1SDimitry Andric MI.eraseFromParent(); 2460fe6060f1SDimitry Andric return true; 2461fe6060f1SDimitry Andric } 2462fe6060f1SDimitry Andric 24635ffd83dbSDimitry Andric bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) { 24645ffd83dbSDimitry Andric assert(MI.getNumDefs() == 1 && "Expected only one def?"); 24655ffd83dbSDimitry Andric Builder.setInstr(MI); 24665ffd83dbSDimitry Andric Builder.buildUndef(MI.getOperand(0)); 24675ffd83dbSDimitry Andric MI.eraseFromParent(); 24685ffd83dbSDimitry Andric return true; 24695ffd83dbSDimitry Andric } 24705ffd83dbSDimitry Andric 24715ffd83dbSDimitry Andric bool CombinerHelper::matchSimplifyAddToSub( 24725ffd83dbSDimitry Andric MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { 24735ffd83dbSDimitry Andric Register LHS = MI.getOperand(1).getReg(); 24745ffd83dbSDimitry Andric Register RHS = MI.getOperand(2).getReg(); 24755ffd83dbSDimitry Andric Register &NewLHS = std::get<0>(MatchInfo); 24765ffd83dbSDimitry Andric Register &NewRHS = std::get<1>(MatchInfo); 24775ffd83dbSDimitry Andric 24785ffd83dbSDimitry Andric // Helper lambda to check for opportunities for 24795ffd83dbSDimitry Andric // ((0-A) + B) -> B - A 24805ffd83dbSDimitry Andric // (A + (0-B)) -> A - B 24815ffd83dbSDimitry Andric auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) { 2482e8d8bef9SDimitry Andric if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS)))) 24835ffd83dbSDimitry Andric return false; 24845ffd83dbSDimitry Andric NewLHS = MaybeNewLHS; 24855ffd83dbSDimitry Andric return true; 24865ffd83dbSDimitry Andric }; 24875ffd83dbSDimitry Andric 24885ffd83dbSDimitry Andric return CheckFold(LHS, RHS) || CheckFold(RHS, LHS); 24895ffd83dbSDimitry Andric } 24905ffd83dbSDimitry Andric 2491e8d8bef9SDimitry Andric bool CombinerHelper::matchCombineInsertVecElts( 2492e8d8bef9SDimitry Andric MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { 2493e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT && 2494e8d8bef9SDimitry Andric "Invalid opcode"); 2495e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2496e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2497e8d8bef9SDimitry Andric assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?"); 2498e8d8bef9SDimitry Andric unsigned NumElts = DstTy.getNumElements(); 2499e8d8bef9SDimitry Andric // If this MI is part of a sequence of insert_vec_elts, then 2500e8d8bef9SDimitry Andric // don't do the combine in the middle of the sequence. 2501e8d8bef9SDimitry Andric if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() == 2502e8d8bef9SDimitry Andric TargetOpcode::G_INSERT_VECTOR_ELT) 2503e8d8bef9SDimitry Andric return false; 2504e8d8bef9SDimitry Andric MachineInstr *CurrInst = &MI; 2505e8d8bef9SDimitry Andric MachineInstr *TmpInst; 2506e8d8bef9SDimitry Andric int64_t IntImm; 2507e8d8bef9SDimitry Andric Register TmpReg; 2508e8d8bef9SDimitry Andric MatchInfo.resize(NumElts); 2509e8d8bef9SDimitry Andric while (mi_match( 2510e8d8bef9SDimitry Andric CurrInst->getOperand(0).getReg(), MRI, 2511e8d8bef9SDimitry Andric m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) { 2512e8d8bef9SDimitry Andric if (IntImm >= NumElts) 2513e8d8bef9SDimitry Andric return false; 2514e8d8bef9SDimitry Andric if (!MatchInfo[IntImm]) 2515e8d8bef9SDimitry Andric MatchInfo[IntImm] = TmpReg; 2516e8d8bef9SDimitry Andric CurrInst = TmpInst; 2517e8d8bef9SDimitry Andric } 2518e8d8bef9SDimitry Andric // Variable index. 2519e8d8bef9SDimitry Andric if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) 2520e8d8bef9SDimitry Andric return false; 2521e8d8bef9SDimitry Andric if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) { 2522e8d8bef9SDimitry Andric for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) { 2523e8d8bef9SDimitry Andric if (!MatchInfo[I - 1].isValid()) 2524e8d8bef9SDimitry Andric MatchInfo[I - 1] = TmpInst->getOperand(I).getReg(); 2525e8d8bef9SDimitry Andric } 2526e8d8bef9SDimitry Andric return true; 2527e8d8bef9SDimitry Andric } 2528e8d8bef9SDimitry Andric // If we didn't end in a G_IMPLICIT_DEF, bail out. 2529e8d8bef9SDimitry Andric return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF; 2530e8d8bef9SDimitry Andric } 2531e8d8bef9SDimitry Andric 2532fe6060f1SDimitry Andric void CombinerHelper::applyCombineInsertVecElts( 2533e8d8bef9SDimitry Andric MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { 2534e8d8bef9SDimitry Andric Builder.setInstr(MI); 2535e8d8bef9SDimitry Andric Register UndefReg; 2536e8d8bef9SDimitry Andric auto GetUndef = [&]() { 2537e8d8bef9SDimitry Andric if (UndefReg) 2538e8d8bef9SDimitry Andric return UndefReg; 2539e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 2540e8d8bef9SDimitry Andric UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0); 2541e8d8bef9SDimitry Andric return UndefReg; 2542e8d8bef9SDimitry Andric }; 2543e8d8bef9SDimitry Andric for (unsigned I = 0; I < MatchInfo.size(); ++I) { 2544e8d8bef9SDimitry Andric if (!MatchInfo[I]) 2545e8d8bef9SDimitry Andric MatchInfo[I] = GetUndef(); 2546e8d8bef9SDimitry Andric } 2547e8d8bef9SDimitry Andric Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo); 2548e8d8bef9SDimitry Andric MI.eraseFromParent(); 2549e8d8bef9SDimitry Andric } 2550e8d8bef9SDimitry Andric 2551fe6060f1SDimitry Andric void CombinerHelper::applySimplifyAddToSub( 25525ffd83dbSDimitry Andric MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { 25535ffd83dbSDimitry Andric Builder.setInstr(MI); 25545ffd83dbSDimitry Andric Register SubLHS, SubRHS; 25555ffd83dbSDimitry Andric std::tie(SubLHS, SubRHS) = MatchInfo; 25565ffd83dbSDimitry Andric Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS); 25575ffd83dbSDimitry Andric MI.eraseFromParent(); 25585ffd83dbSDimitry Andric } 25595ffd83dbSDimitry Andric 2560e8d8bef9SDimitry Andric bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( 2561e8d8bef9SDimitry Andric MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { 2562e8d8bef9SDimitry Andric // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ... 2563e8d8bef9SDimitry Andric // 2564e8d8bef9SDimitry Andric // Creates the new hand + logic instruction (but does not insert them.) 2565e8d8bef9SDimitry Andric // 2566e8d8bef9SDimitry Andric // On success, MatchInfo is populated with the new instructions. These are 2567e8d8bef9SDimitry Andric // inserted in applyHoistLogicOpWithSameOpcodeHands. 2568e8d8bef9SDimitry Andric unsigned LogicOpcode = MI.getOpcode(); 2569e8d8bef9SDimitry Andric assert(LogicOpcode == TargetOpcode::G_AND || 2570e8d8bef9SDimitry Andric LogicOpcode == TargetOpcode::G_OR || 2571e8d8bef9SDimitry Andric LogicOpcode == TargetOpcode::G_XOR); 2572e8d8bef9SDimitry Andric MachineIRBuilder MIB(MI); 2573e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 2574e8d8bef9SDimitry Andric Register LHSReg = MI.getOperand(1).getReg(); 2575e8d8bef9SDimitry Andric Register RHSReg = MI.getOperand(2).getReg(); 2576e8d8bef9SDimitry Andric 2577e8d8bef9SDimitry Andric // Don't recompute anything. 2578e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg)) 2579e8d8bef9SDimitry Andric return false; 2580e8d8bef9SDimitry Andric 2581e8d8bef9SDimitry Andric // Make sure we have (hand x, ...), (hand y, ...) 2582e8d8bef9SDimitry Andric MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI); 2583e8d8bef9SDimitry Andric MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI); 2584e8d8bef9SDimitry Andric if (!LeftHandInst || !RightHandInst) 2585e8d8bef9SDimitry Andric return false; 2586e8d8bef9SDimitry Andric unsigned HandOpcode = LeftHandInst->getOpcode(); 2587e8d8bef9SDimitry Andric if (HandOpcode != RightHandInst->getOpcode()) 2588e8d8bef9SDimitry Andric return false; 2589e8d8bef9SDimitry Andric if (!LeftHandInst->getOperand(1).isReg() || 2590e8d8bef9SDimitry Andric !RightHandInst->getOperand(1).isReg()) 2591e8d8bef9SDimitry Andric return false; 2592e8d8bef9SDimitry Andric 2593e8d8bef9SDimitry Andric // Make sure the types match up, and if we're doing this post-legalization, 2594e8d8bef9SDimitry Andric // we end up with legal types. 2595e8d8bef9SDimitry Andric Register X = LeftHandInst->getOperand(1).getReg(); 2596e8d8bef9SDimitry Andric Register Y = RightHandInst->getOperand(1).getReg(); 2597e8d8bef9SDimitry Andric LLT XTy = MRI.getType(X); 2598e8d8bef9SDimitry Andric LLT YTy = MRI.getType(Y); 2599e8d8bef9SDimitry Andric if (XTy != YTy) 2600e8d8bef9SDimitry Andric return false; 2601e8d8bef9SDimitry Andric if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}})) 2602e8d8bef9SDimitry Andric return false; 2603e8d8bef9SDimitry Andric 2604e8d8bef9SDimitry Andric // Optional extra source register. 2605e8d8bef9SDimitry Andric Register ExtraHandOpSrcReg; 2606e8d8bef9SDimitry Andric switch (HandOpcode) { 2607e8d8bef9SDimitry Andric default: 2608e8d8bef9SDimitry Andric return false; 2609e8d8bef9SDimitry Andric case TargetOpcode::G_ANYEXT: 2610e8d8bef9SDimitry Andric case TargetOpcode::G_SEXT: 2611e8d8bef9SDimitry Andric case TargetOpcode::G_ZEXT: { 2612e8d8bef9SDimitry Andric // Match: logic (ext X), (ext Y) --> ext (logic X, Y) 2613e8d8bef9SDimitry Andric break; 2614e8d8bef9SDimitry Andric } 2615e8d8bef9SDimitry Andric case TargetOpcode::G_AND: 2616e8d8bef9SDimitry Andric case TargetOpcode::G_ASHR: 2617e8d8bef9SDimitry Andric case TargetOpcode::G_LSHR: 2618e8d8bef9SDimitry Andric case TargetOpcode::G_SHL: { 2619e8d8bef9SDimitry Andric // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z 2620e8d8bef9SDimitry Andric MachineOperand &ZOp = LeftHandInst->getOperand(2); 2621e8d8bef9SDimitry Andric if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2))) 2622e8d8bef9SDimitry Andric return false; 2623e8d8bef9SDimitry Andric ExtraHandOpSrcReg = ZOp.getReg(); 2624e8d8bef9SDimitry Andric break; 2625e8d8bef9SDimitry Andric } 2626e8d8bef9SDimitry Andric } 2627e8d8bef9SDimitry Andric 2628e8d8bef9SDimitry Andric // Record the steps to build the new instructions. 2629e8d8bef9SDimitry Andric // 2630e8d8bef9SDimitry Andric // Steps to build (logic x, y) 2631e8d8bef9SDimitry Andric auto NewLogicDst = MRI.createGenericVirtualRegister(XTy); 2632e8d8bef9SDimitry Andric OperandBuildSteps LogicBuildSteps = { 2633e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); }, 2634e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(X); }, 2635e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }}; 2636e8d8bef9SDimitry Andric InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps); 2637e8d8bef9SDimitry Andric 2638e8d8bef9SDimitry Andric // Steps to build hand (logic x, y), ...z 2639e8d8bef9SDimitry Andric OperandBuildSteps HandBuildSteps = { 2640e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); }, 2641e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }}; 2642e8d8bef9SDimitry Andric if (ExtraHandOpSrcReg.isValid()) 2643e8d8bef9SDimitry Andric HandBuildSteps.push_back( 2644e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); }); 2645e8d8bef9SDimitry Andric InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps); 2646e8d8bef9SDimitry Andric 2647e8d8bef9SDimitry Andric MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps}); 2648e8d8bef9SDimitry Andric return true; 2649e8d8bef9SDimitry Andric } 2650e8d8bef9SDimitry Andric 2651fe6060f1SDimitry Andric void CombinerHelper::applyBuildInstructionSteps( 2652e8d8bef9SDimitry Andric MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { 2653e8d8bef9SDimitry Andric assert(MatchInfo.InstrsToBuild.size() && 2654e8d8bef9SDimitry Andric "Expected at least one instr to build?"); 2655e8d8bef9SDimitry Andric Builder.setInstr(MI); 2656e8d8bef9SDimitry Andric for (auto &InstrToBuild : MatchInfo.InstrsToBuild) { 2657e8d8bef9SDimitry Andric assert(InstrToBuild.Opcode && "Expected a valid opcode?"); 2658e8d8bef9SDimitry Andric assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?"); 2659e8d8bef9SDimitry Andric MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode); 2660e8d8bef9SDimitry Andric for (auto &OperandFn : InstrToBuild.OperandFns) 2661e8d8bef9SDimitry Andric OperandFn(Instr); 2662e8d8bef9SDimitry Andric } 2663e8d8bef9SDimitry Andric MI.eraseFromParent(); 2664e8d8bef9SDimitry Andric } 2665e8d8bef9SDimitry Andric 2666e8d8bef9SDimitry Andric bool CombinerHelper::matchAshrShlToSextInreg( 2667e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { 2668e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ASHR); 2669e8d8bef9SDimitry Andric int64_t ShlCst, AshrCst; 2670e8d8bef9SDimitry Andric Register Src; 2671e8d8bef9SDimitry Andric // FIXME: detect splat constant vectors. 2672e8d8bef9SDimitry Andric if (!mi_match(MI.getOperand(0).getReg(), MRI, 2673e8d8bef9SDimitry Andric m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst)))) 2674e8d8bef9SDimitry Andric return false; 2675e8d8bef9SDimitry Andric if (ShlCst != AshrCst) 2676e8d8bef9SDimitry Andric return false; 2677e8d8bef9SDimitry Andric if (!isLegalOrBeforeLegalizer( 2678e8d8bef9SDimitry Andric {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}})) 2679e8d8bef9SDimitry Andric return false; 2680e8d8bef9SDimitry Andric MatchInfo = std::make_tuple(Src, ShlCst); 2681e8d8bef9SDimitry Andric return true; 2682e8d8bef9SDimitry Andric } 2683fe6060f1SDimitry Andric 2684fe6060f1SDimitry Andric void CombinerHelper::applyAshShlToSextInreg( 2685e8d8bef9SDimitry Andric MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { 2686e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ASHR); 2687e8d8bef9SDimitry Andric Register Src; 2688e8d8bef9SDimitry Andric int64_t ShiftAmt; 2689e8d8bef9SDimitry Andric std::tie(Src, ShiftAmt) = MatchInfo; 2690e8d8bef9SDimitry Andric unsigned Size = MRI.getType(Src).getScalarSizeInBits(); 2691e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2692e8d8bef9SDimitry Andric Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt); 2693e8d8bef9SDimitry Andric MI.eraseFromParent(); 2694fe6060f1SDimitry Andric } 2695fe6060f1SDimitry Andric 2696fe6060f1SDimitry Andric /// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0 2697fe6060f1SDimitry Andric bool CombinerHelper::matchOverlappingAnd( 2698fe6060f1SDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 2699fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_AND); 2700fe6060f1SDimitry Andric 2701fe6060f1SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 2702fe6060f1SDimitry Andric LLT Ty = MRI.getType(Dst); 2703fe6060f1SDimitry Andric 2704fe6060f1SDimitry Andric Register R; 2705fe6060f1SDimitry Andric int64_t C1; 2706fe6060f1SDimitry Andric int64_t C2; 2707fe6060f1SDimitry Andric if (!mi_match( 2708fe6060f1SDimitry Andric Dst, MRI, 2709fe6060f1SDimitry Andric m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2)))) 2710fe6060f1SDimitry Andric return false; 2711fe6060f1SDimitry Andric 2712fe6060f1SDimitry Andric MatchInfo = [=](MachineIRBuilder &B) { 2713fe6060f1SDimitry Andric if (C1 & C2) { 2714fe6060f1SDimitry Andric B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2)); 2715fe6060f1SDimitry Andric return; 2716fe6060f1SDimitry Andric } 2717fe6060f1SDimitry Andric auto Zero = B.buildConstant(Ty, 0); 2718fe6060f1SDimitry Andric replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg()); 2719fe6060f1SDimitry Andric }; 2720e8d8bef9SDimitry Andric return true; 2721e8d8bef9SDimitry Andric } 2722e8d8bef9SDimitry Andric 2723e8d8bef9SDimitry Andric bool CombinerHelper::matchRedundantAnd(MachineInstr &MI, 2724e8d8bef9SDimitry Andric Register &Replacement) { 2725e8d8bef9SDimitry Andric // Given 2726e8d8bef9SDimitry Andric // 2727e8d8bef9SDimitry Andric // %y:_(sN) = G_SOMETHING 2728e8d8bef9SDimitry Andric // %x:_(sN) = G_SOMETHING 2729e8d8bef9SDimitry Andric // %res:_(sN) = G_AND %x, %y 2730e8d8bef9SDimitry Andric // 2731e8d8bef9SDimitry Andric // Eliminate the G_AND when it is known that x & y == x or x & y == y. 2732e8d8bef9SDimitry Andric // 2733e8d8bef9SDimitry Andric // Patterns like this can appear as a result of legalization. E.g. 2734e8d8bef9SDimitry Andric // 2735e8d8bef9SDimitry Andric // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y 2736e8d8bef9SDimitry Andric // %one:_(s32) = G_CONSTANT i32 1 2737e8d8bef9SDimitry Andric // %and:_(s32) = G_AND %cmp, %one 2738e8d8bef9SDimitry Andric // 2739e8d8bef9SDimitry Andric // In this case, G_ICMP only produces a single bit, so x & 1 == x. 2740e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_AND); 2741e8d8bef9SDimitry Andric if (!KB) 2742e8d8bef9SDimitry Andric return false; 2743e8d8bef9SDimitry Andric 2744e8d8bef9SDimitry Andric Register AndDst = MI.getOperand(0).getReg(); 2745e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(AndDst); 2746e8d8bef9SDimitry Andric 2747e8d8bef9SDimitry Andric // FIXME: This should be removed once GISelKnownBits supports vectors. 2748e8d8bef9SDimitry Andric if (DstTy.isVector()) 2749e8d8bef9SDimitry Andric return false; 2750e8d8bef9SDimitry Andric 2751e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 2752e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 2753e8d8bef9SDimitry Andric KnownBits LHSBits = KB->getKnownBits(LHS); 2754e8d8bef9SDimitry Andric KnownBits RHSBits = KB->getKnownBits(RHS); 2755e8d8bef9SDimitry Andric 2756e8d8bef9SDimitry Andric // Check that x & Mask == x. 2757e8d8bef9SDimitry Andric // x & 1 == x, always 2758e8d8bef9SDimitry Andric // x & 0 == x, only if x is also 0 2759e8d8bef9SDimitry Andric // Meaning Mask has no effect if every bit is either one in Mask or zero in x. 2760e8d8bef9SDimitry Andric // 2761e8d8bef9SDimitry Andric // Check if we can replace AndDst with the LHS of the G_AND 2762e8d8bef9SDimitry Andric if (canReplaceReg(AndDst, LHS, MRI) && 2763*349cc55cSDimitry Andric (LHSBits.Zero | RHSBits.One).isAllOnes()) { 2764e8d8bef9SDimitry Andric Replacement = LHS; 2765e8d8bef9SDimitry Andric return true; 2766e8d8bef9SDimitry Andric } 2767e8d8bef9SDimitry Andric 2768e8d8bef9SDimitry Andric // Check if we can replace AndDst with the RHS of the G_AND 2769e8d8bef9SDimitry Andric if (canReplaceReg(AndDst, RHS, MRI) && 2770*349cc55cSDimitry Andric (LHSBits.One | RHSBits.Zero).isAllOnes()) { 2771e8d8bef9SDimitry Andric Replacement = RHS; 2772e8d8bef9SDimitry Andric return true; 2773e8d8bef9SDimitry Andric } 2774e8d8bef9SDimitry Andric 2775e8d8bef9SDimitry Andric return false; 2776e8d8bef9SDimitry Andric } 2777e8d8bef9SDimitry Andric 2778e8d8bef9SDimitry Andric bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) { 2779e8d8bef9SDimitry Andric // Given 2780e8d8bef9SDimitry Andric // 2781e8d8bef9SDimitry Andric // %y:_(sN) = G_SOMETHING 2782e8d8bef9SDimitry Andric // %x:_(sN) = G_SOMETHING 2783e8d8bef9SDimitry Andric // %res:_(sN) = G_OR %x, %y 2784e8d8bef9SDimitry Andric // 2785e8d8bef9SDimitry Andric // Eliminate the G_OR when it is known that x | y == x or x | y == y. 2786e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_OR); 2787e8d8bef9SDimitry Andric if (!KB) 2788e8d8bef9SDimitry Andric return false; 2789e8d8bef9SDimitry Andric 2790e8d8bef9SDimitry Andric Register OrDst = MI.getOperand(0).getReg(); 2791e8d8bef9SDimitry Andric LLT DstTy = MRI.getType(OrDst); 2792e8d8bef9SDimitry Andric 2793e8d8bef9SDimitry Andric // FIXME: This should be removed once GISelKnownBits supports vectors. 2794e8d8bef9SDimitry Andric if (DstTy.isVector()) 2795e8d8bef9SDimitry Andric return false; 2796e8d8bef9SDimitry Andric 2797e8d8bef9SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 2798e8d8bef9SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 2799e8d8bef9SDimitry Andric KnownBits LHSBits = KB->getKnownBits(LHS); 2800e8d8bef9SDimitry Andric KnownBits RHSBits = KB->getKnownBits(RHS); 2801e8d8bef9SDimitry Andric 2802e8d8bef9SDimitry Andric // Check that x | Mask == x. 2803e8d8bef9SDimitry Andric // x | 0 == x, always 2804e8d8bef9SDimitry Andric // x | 1 == x, only if x is also 1 2805e8d8bef9SDimitry Andric // Meaning Mask has no effect if every bit is either zero in Mask or one in x. 2806e8d8bef9SDimitry Andric // 2807e8d8bef9SDimitry Andric // Check if we can replace OrDst with the LHS of the G_OR 2808e8d8bef9SDimitry Andric if (canReplaceReg(OrDst, LHS, MRI) && 2809*349cc55cSDimitry Andric (LHSBits.One | RHSBits.Zero).isAllOnes()) { 2810e8d8bef9SDimitry Andric Replacement = LHS; 2811e8d8bef9SDimitry Andric return true; 2812e8d8bef9SDimitry Andric } 2813e8d8bef9SDimitry Andric 2814e8d8bef9SDimitry Andric // Check if we can replace OrDst with the RHS of the G_OR 2815e8d8bef9SDimitry Andric if (canReplaceReg(OrDst, RHS, MRI) && 2816*349cc55cSDimitry Andric (LHSBits.Zero | RHSBits.One).isAllOnes()) { 2817e8d8bef9SDimitry Andric Replacement = RHS; 2818e8d8bef9SDimitry Andric return true; 2819e8d8bef9SDimitry Andric } 2820e8d8bef9SDimitry Andric 2821e8d8bef9SDimitry Andric return false; 2822e8d8bef9SDimitry Andric } 2823e8d8bef9SDimitry Andric 2824e8d8bef9SDimitry Andric bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) { 2825e8d8bef9SDimitry Andric // If the input is already sign extended, just drop the extension. 2826e8d8bef9SDimitry Andric Register Src = MI.getOperand(1).getReg(); 2827e8d8bef9SDimitry Andric unsigned ExtBits = MI.getOperand(2).getImm(); 2828e8d8bef9SDimitry Andric unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits(); 2829e8d8bef9SDimitry Andric return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1); 2830e8d8bef9SDimitry Andric } 2831e8d8bef9SDimitry Andric 2832e8d8bef9SDimitry Andric static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, 2833e8d8bef9SDimitry Andric int64_t Cst, bool IsVector, bool IsFP) { 2834e8d8bef9SDimitry Andric // For i1, Cst will always be -1 regardless of boolean contents. 2835e8d8bef9SDimitry Andric return (ScalarSizeBits == 1 && Cst == -1) || 2836e8d8bef9SDimitry Andric isConstTrueVal(TLI, Cst, IsVector, IsFP); 2837e8d8bef9SDimitry Andric } 2838e8d8bef9SDimitry Andric 2839e8d8bef9SDimitry Andric bool CombinerHelper::matchNotCmp(MachineInstr &MI, 2840e8d8bef9SDimitry Andric SmallVectorImpl<Register> &RegsToNegate) { 2841e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_XOR); 2842e8d8bef9SDimitry Andric LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 2843e8d8bef9SDimitry Andric const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering(); 2844e8d8bef9SDimitry Andric Register XorSrc; 2845e8d8bef9SDimitry Andric Register CstReg; 2846e8d8bef9SDimitry Andric // We match xor(src, true) here. 2847e8d8bef9SDimitry Andric if (!mi_match(MI.getOperand(0).getReg(), MRI, 2848e8d8bef9SDimitry Andric m_GXor(m_Reg(XorSrc), m_Reg(CstReg)))) 2849e8d8bef9SDimitry Andric return false; 2850e8d8bef9SDimitry Andric 2851e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(XorSrc)) 2852e8d8bef9SDimitry Andric return false; 2853e8d8bef9SDimitry Andric 2854e8d8bef9SDimitry Andric // Check that XorSrc is the root of a tree of comparisons combined with ANDs 2855e8d8bef9SDimitry Andric // and ORs. The suffix of RegsToNegate starting from index I is used a work 2856e8d8bef9SDimitry Andric // list of tree nodes to visit. 2857e8d8bef9SDimitry Andric RegsToNegate.push_back(XorSrc); 2858e8d8bef9SDimitry Andric // Remember whether the comparisons are all integer or all floating point. 2859e8d8bef9SDimitry Andric bool IsInt = false; 2860e8d8bef9SDimitry Andric bool IsFP = false; 2861e8d8bef9SDimitry Andric for (unsigned I = 0; I < RegsToNegate.size(); ++I) { 2862e8d8bef9SDimitry Andric Register Reg = RegsToNegate[I]; 2863e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(Reg)) 2864e8d8bef9SDimitry Andric return false; 2865e8d8bef9SDimitry Andric MachineInstr *Def = MRI.getVRegDef(Reg); 2866e8d8bef9SDimitry Andric switch (Def->getOpcode()) { 2867e8d8bef9SDimitry Andric default: 2868e8d8bef9SDimitry Andric // Don't match if the tree contains anything other than ANDs, ORs and 2869e8d8bef9SDimitry Andric // comparisons. 2870e8d8bef9SDimitry Andric return false; 2871e8d8bef9SDimitry Andric case TargetOpcode::G_ICMP: 2872e8d8bef9SDimitry Andric if (IsFP) 2873e8d8bef9SDimitry Andric return false; 2874e8d8bef9SDimitry Andric IsInt = true; 2875e8d8bef9SDimitry Andric // When we apply the combine we will invert the predicate. 2876e8d8bef9SDimitry Andric break; 2877e8d8bef9SDimitry Andric case TargetOpcode::G_FCMP: 2878e8d8bef9SDimitry Andric if (IsInt) 2879e8d8bef9SDimitry Andric return false; 2880e8d8bef9SDimitry Andric IsFP = true; 2881e8d8bef9SDimitry Andric // When we apply the combine we will invert the predicate. 2882e8d8bef9SDimitry Andric break; 2883e8d8bef9SDimitry Andric case TargetOpcode::G_AND: 2884e8d8bef9SDimitry Andric case TargetOpcode::G_OR: 2885e8d8bef9SDimitry Andric // Implement De Morgan's laws: 2886e8d8bef9SDimitry Andric // ~(x & y) -> ~x | ~y 2887e8d8bef9SDimitry Andric // ~(x | y) -> ~x & ~y 2888e8d8bef9SDimitry Andric // When we apply the combine we will change the opcode and recursively 2889e8d8bef9SDimitry Andric // negate the operands. 2890e8d8bef9SDimitry Andric RegsToNegate.push_back(Def->getOperand(1).getReg()); 2891e8d8bef9SDimitry Andric RegsToNegate.push_back(Def->getOperand(2).getReg()); 2892e8d8bef9SDimitry Andric break; 2893e8d8bef9SDimitry Andric } 2894e8d8bef9SDimitry Andric } 2895e8d8bef9SDimitry Andric 2896e8d8bef9SDimitry Andric // Now we know whether the comparisons are integer or floating point, check 2897e8d8bef9SDimitry Andric // the constant in the xor. 2898e8d8bef9SDimitry Andric int64_t Cst; 2899e8d8bef9SDimitry Andric if (Ty.isVector()) { 2900e8d8bef9SDimitry Andric MachineInstr *CstDef = MRI.getVRegDef(CstReg); 2901e8d8bef9SDimitry Andric auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI); 2902e8d8bef9SDimitry Andric if (!MaybeCst) 2903e8d8bef9SDimitry Andric return false; 2904e8d8bef9SDimitry Andric if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP)) 2905e8d8bef9SDimitry Andric return false; 2906e8d8bef9SDimitry Andric } else { 2907e8d8bef9SDimitry Andric if (!mi_match(CstReg, MRI, m_ICst(Cst))) 2908e8d8bef9SDimitry Andric return false; 2909e8d8bef9SDimitry Andric if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP)) 2910e8d8bef9SDimitry Andric return false; 2911e8d8bef9SDimitry Andric } 2912e8d8bef9SDimitry Andric 2913e8d8bef9SDimitry Andric return true; 2914e8d8bef9SDimitry Andric } 2915e8d8bef9SDimitry Andric 2916fe6060f1SDimitry Andric void CombinerHelper::applyNotCmp(MachineInstr &MI, 2917e8d8bef9SDimitry Andric SmallVectorImpl<Register> &RegsToNegate) { 2918e8d8bef9SDimitry Andric for (Register Reg : RegsToNegate) { 2919e8d8bef9SDimitry Andric MachineInstr *Def = MRI.getVRegDef(Reg); 2920e8d8bef9SDimitry Andric Observer.changingInstr(*Def); 2921e8d8bef9SDimitry Andric // For each comparison, invert the opcode. For each AND and OR, change the 2922e8d8bef9SDimitry Andric // opcode. 2923e8d8bef9SDimitry Andric switch (Def->getOpcode()) { 2924e8d8bef9SDimitry Andric default: 2925e8d8bef9SDimitry Andric llvm_unreachable("Unexpected opcode"); 2926e8d8bef9SDimitry Andric case TargetOpcode::G_ICMP: 2927e8d8bef9SDimitry Andric case TargetOpcode::G_FCMP: { 2928e8d8bef9SDimitry Andric MachineOperand &PredOp = Def->getOperand(1); 2929e8d8bef9SDimitry Andric CmpInst::Predicate NewP = CmpInst::getInversePredicate( 2930e8d8bef9SDimitry Andric (CmpInst::Predicate)PredOp.getPredicate()); 2931e8d8bef9SDimitry Andric PredOp.setPredicate(NewP); 2932e8d8bef9SDimitry Andric break; 2933e8d8bef9SDimitry Andric } 2934e8d8bef9SDimitry Andric case TargetOpcode::G_AND: 2935e8d8bef9SDimitry Andric Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR)); 2936e8d8bef9SDimitry Andric break; 2937e8d8bef9SDimitry Andric case TargetOpcode::G_OR: 2938e8d8bef9SDimitry Andric Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND)); 2939e8d8bef9SDimitry Andric break; 2940e8d8bef9SDimitry Andric } 2941e8d8bef9SDimitry Andric Observer.changedInstr(*Def); 2942e8d8bef9SDimitry Andric } 2943e8d8bef9SDimitry Andric 2944e8d8bef9SDimitry Andric replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); 2945e8d8bef9SDimitry Andric MI.eraseFromParent(); 2946e8d8bef9SDimitry Andric } 2947e8d8bef9SDimitry Andric 2948e8d8bef9SDimitry Andric bool CombinerHelper::matchXorOfAndWithSameReg( 2949e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 2950e8d8bef9SDimitry Andric // Match (xor (and x, y), y) (or any of its commuted cases) 2951e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_XOR); 2952e8d8bef9SDimitry Andric Register &X = MatchInfo.first; 2953e8d8bef9SDimitry Andric Register &Y = MatchInfo.second; 2954e8d8bef9SDimitry Andric Register AndReg = MI.getOperand(1).getReg(); 2955e8d8bef9SDimitry Andric Register SharedReg = MI.getOperand(2).getReg(); 2956e8d8bef9SDimitry Andric 2957e8d8bef9SDimitry Andric // Find a G_AND on either side of the G_XOR. 2958e8d8bef9SDimitry Andric // Look for one of 2959e8d8bef9SDimitry Andric // 2960e8d8bef9SDimitry Andric // (xor (and x, y), SharedReg) 2961e8d8bef9SDimitry Andric // (xor SharedReg, (and x, y)) 2962e8d8bef9SDimitry Andric if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) { 2963e8d8bef9SDimitry Andric std::swap(AndReg, SharedReg); 2964e8d8bef9SDimitry Andric if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) 2965e8d8bef9SDimitry Andric return false; 2966e8d8bef9SDimitry Andric } 2967e8d8bef9SDimitry Andric 2968e8d8bef9SDimitry Andric // Only do this if we'll eliminate the G_AND. 2969e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(AndReg)) 2970e8d8bef9SDimitry Andric return false; 2971e8d8bef9SDimitry Andric 2972e8d8bef9SDimitry Andric // We can combine if SharedReg is the same as either the LHS or RHS of the 2973e8d8bef9SDimitry Andric // G_AND. 2974e8d8bef9SDimitry Andric if (Y != SharedReg) 2975e8d8bef9SDimitry Andric std::swap(X, Y); 2976e8d8bef9SDimitry Andric return Y == SharedReg; 2977e8d8bef9SDimitry Andric } 2978e8d8bef9SDimitry Andric 2979fe6060f1SDimitry Andric void CombinerHelper::applyXorOfAndWithSameReg( 2980e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { 2981e8d8bef9SDimitry Andric // Fold (xor (and x, y), y) -> (and (not x), y) 2982e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 2983e8d8bef9SDimitry Andric Register X, Y; 2984e8d8bef9SDimitry Andric std::tie(X, Y) = MatchInfo; 2985e8d8bef9SDimitry Andric auto Not = Builder.buildNot(MRI.getType(X), X); 2986e8d8bef9SDimitry Andric Observer.changingInstr(MI); 2987e8d8bef9SDimitry Andric MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND)); 2988e8d8bef9SDimitry Andric MI.getOperand(1).setReg(Not->getOperand(0).getReg()); 2989e8d8bef9SDimitry Andric MI.getOperand(2).setReg(Y); 2990e8d8bef9SDimitry Andric Observer.changedInstr(MI); 2991e8d8bef9SDimitry Andric } 2992e8d8bef9SDimitry Andric 2993e8d8bef9SDimitry Andric bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) { 2994*349cc55cSDimitry Andric auto &PtrAdd = cast<GPtrAdd>(MI); 2995*349cc55cSDimitry Andric Register DstReg = PtrAdd.getReg(0); 2996e8d8bef9SDimitry Andric LLT Ty = MRI.getType(DstReg); 2997e8d8bef9SDimitry Andric const DataLayout &DL = Builder.getMF().getDataLayout(); 2998e8d8bef9SDimitry Andric 2999e8d8bef9SDimitry Andric if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace())) 3000e8d8bef9SDimitry Andric return false; 3001e8d8bef9SDimitry Andric 3002e8d8bef9SDimitry Andric if (Ty.isPointer()) { 3003*349cc55cSDimitry Andric auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI); 3004e8d8bef9SDimitry Andric return ConstVal && *ConstVal == 0; 3005e8d8bef9SDimitry Andric } 3006e8d8bef9SDimitry Andric 3007e8d8bef9SDimitry Andric assert(Ty.isVector() && "Expecting a vector type"); 3008*349cc55cSDimitry Andric const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg()); 3009e8d8bef9SDimitry Andric return isBuildVectorAllZeros(*VecMI, MRI); 3010e8d8bef9SDimitry Andric } 3011e8d8bef9SDimitry Andric 3012fe6060f1SDimitry Andric void CombinerHelper::applyPtrAddZero(MachineInstr &MI) { 3013*349cc55cSDimitry Andric auto &PtrAdd = cast<GPtrAdd>(MI); 3014*349cc55cSDimitry Andric Builder.setInstrAndDebugLoc(PtrAdd); 3015*349cc55cSDimitry Andric Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg()); 3016*349cc55cSDimitry Andric PtrAdd.eraseFromParent(); 3017e8d8bef9SDimitry Andric } 3018e8d8bef9SDimitry Andric 3019e8d8bef9SDimitry Andric /// The second source operand is known to be a power of 2. 3020fe6060f1SDimitry Andric void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) { 3021e8d8bef9SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3022e8d8bef9SDimitry Andric Register Src0 = MI.getOperand(1).getReg(); 3023e8d8bef9SDimitry Andric Register Pow2Src1 = MI.getOperand(2).getReg(); 3024e8d8bef9SDimitry Andric LLT Ty = MRI.getType(DstReg); 3025e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3026e8d8bef9SDimitry Andric 3027e8d8bef9SDimitry Andric // Fold (urem x, pow2) -> (and x, pow2-1) 3028e8d8bef9SDimitry Andric auto NegOne = Builder.buildConstant(Ty, -1); 3029e8d8bef9SDimitry Andric auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne); 3030e8d8bef9SDimitry Andric Builder.buildAnd(DstReg, Src0, Add); 3031e8d8bef9SDimitry Andric MI.eraseFromParent(); 3032e8d8bef9SDimitry Andric } 3033e8d8bef9SDimitry Andric 3034e8d8bef9SDimitry Andric Optional<SmallVector<Register, 8>> 3035e8d8bef9SDimitry Andric CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const { 3036e8d8bef9SDimitry Andric assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!"); 3037e8d8bef9SDimitry Andric // We want to detect if Root is part of a tree which represents a bunch 3038e8d8bef9SDimitry Andric // of loads being merged into a larger load. We'll try to recognize patterns 3039e8d8bef9SDimitry Andric // like, for example: 3040e8d8bef9SDimitry Andric // 3041e8d8bef9SDimitry Andric // Reg Reg 3042e8d8bef9SDimitry Andric // \ / 3043e8d8bef9SDimitry Andric // OR_1 Reg 3044e8d8bef9SDimitry Andric // \ / 3045e8d8bef9SDimitry Andric // OR_2 3046e8d8bef9SDimitry Andric // \ Reg 3047e8d8bef9SDimitry Andric // .. / 3048e8d8bef9SDimitry Andric // Root 3049e8d8bef9SDimitry Andric // 3050e8d8bef9SDimitry Andric // Reg Reg Reg Reg 3051e8d8bef9SDimitry Andric // \ / \ / 3052e8d8bef9SDimitry Andric // OR_1 OR_2 3053e8d8bef9SDimitry Andric // \ / 3054e8d8bef9SDimitry Andric // \ / 3055e8d8bef9SDimitry Andric // ... 3056e8d8bef9SDimitry Andric // Root 3057e8d8bef9SDimitry Andric // 3058e8d8bef9SDimitry Andric // Each "Reg" may have been produced by a load + some arithmetic. This 3059e8d8bef9SDimitry Andric // function will save each of them. 3060e8d8bef9SDimitry Andric SmallVector<Register, 8> RegsToVisit; 3061e8d8bef9SDimitry Andric SmallVector<const MachineInstr *, 7> Ors = {Root}; 3062e8d8bef9SDimitry Andric 3063e8d8bef9SDimitry Andric // In the "worst" case, we're dealing with a load for each byte. So, there 3064e8d8bef9SDimitry Andric // are at most #bytes - 1 ORs. 3065e8d8bef9SDimitry Andric const unsigned MaxIter = 3066e8d8bef9SDimitry Andric MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1; 3067e8d8bef9SDimitry Andric for (unsigned Iter = 0; Iter < MaxIter; ++Iter) { 3068e8d8bef9SDimitry Andric if (Ors.empty()) 3069e8d8bef9SDimitry Andric break; 3070e8d8bef9SDimitry Andric const MachineInstr *Curr = Ors.pop_back_val(); 3071e8d8bef9SDimitry Andric Register OrLHS = Curr->getOperand(1).getReg(); 3072e8d8bef9SDimitry Andric Register OrRHS = Curr->getOperand(2).getReg(); 3073e8d8bef9SDimitry Andric 3074e8d8bef9SDimitry Andric // In the combine, we want to elimate the entire tree. 3075e8d8bef9SDimitry Andric if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS)) 3076e8d8bef9SDimitry Andric return None; 3077e8d8bef9SDimitry Andric 3078e8d8bef9SDimitry Andric // If it's a G_OR, save it and continue to walk. If it's not, then it's 3079e8d8bef9SDimitry Andric // something that may be a load + arithmetic. 3080e8d8bef9SDimitry Andric if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI)) 3081e8d8bef9SDimitry Andric Ors.push_back(Or); 3082e8d8bef9SDimitry Andric else 3083e8d8bef9SDimitry Andric RegsToVisit.push_back(OrLHS); 3084e8d8bef9SDimitry Andric if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI)) 3085e8d8bef9SDimitry Andric Ors.push_back(Or); 3086e8d8bef9SDimitry Andric else 3087e8d8bef9SDimitry Andric RegsToVisit.push_back(OrRHS); 3088e8d8bef9SDimitry Andric } 3089e8d8bef9SDimitry Andric 3090e8d8bef9SDimitry Andric // We're going to try and merge each register into a wider power-of-2 type, 3091e8d8bef9SDimitry Andric // so we ought to have an even number of registers. 3092e8d8bef9SDimitry Andric if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0) 3093e8d8bef9SDimitry Andric return None; 3094e8d8bef9SDimitry Andric return RegsToVisit; 3095e8d8bef9SDimitry Andric } 3096e8d8bef9SDimitry Andric 3097e8d8bef9SDimitry Andric /// Helper function for findLoadOffsetsForLoadOrCombine. 3098e8d8bef9SDimitry Andric /// 3099e8d8bef9SDimitry Andric /// Check if \p Reg is the result of loading a \p MemSizeInBits wide value, 3100e8d8bef9SDimitry Andric /// and then moving that value into a specific byte offset. 3101e8d8bef9SDimitry Andric /// 3102e8d8bef9SDimitry Andric /// e.g. x[i] << 24 3103e8d8bef9SDimitry Andric /// 3104e8d8bef9SDimitry Andric /// \returns The load instruction and the byte offset it is moved into. 3105fe6060f1SDimitry Andric static Optional<std::pair<GZExtLoad *, int64_t>> 3106e8d8bef9SDimitry Andric matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, 3107e8d8bef9SDimitry Andric const MachineRegisterInfo &MRI) { 3108e8d8bef9SDimitry Andric assert(MRI.hasOneNonDBGUse(Reg) && 3109e8d8bef9SDimitry Andric "Expected Reg to only have one non-debug use?"); 3110e8d8bef9SDimitry Andric Register MaybeLoad; 3111e8d8bef9SDimitry Andric int64_t Shift; 3112e8d8bef9SDimitry Andric if (!mi_match(Reg, MRI, 3113e8d8bef9SDimitry Andric m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) { 3114e8d8bef9SDimitry Andric Shift = 0; 3115e8d8bef9SDimitry Andric MaybeLoad = Reg; 3116e8d8bef9SDimitry Andric } 3117e8d8bef9SDimitry Andric 3118e8d8bef9SDimitry Andric if (Shift % MemSizeInBits != 0) 3119e8d8bef9SDimitry Andric return None; 3120e8d8bef9SDimitry Andric 3121e8d8bef9SDimitry Andric // TODO: Handle other types of loads. 3122fe6060f1SDimitry Andric auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI); 3123e8d8bef9SDimitry Andric if (!Load) 3124e8d8bef9SDimitry Andric return None; 3125e8d8bef9SDimitry Andric 3126fe6060f1SDimitry Andric if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits) 3127e8d8bef9SDimitry Andric return None; 3128e8d8bef9SDimitry Andric 3129e8d8bef9SDimitry Andric return std::make_pair(Load, Shift / MemSizeInBits); 3130e8d8bef9SDimitry Andric } 3131e8d8bef9SDimitry Andric 3132fe6060f1SDimitry Andric Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>> 3133e8d8bef9SDimitry Andric CombinerHelper::findLoadOffsetsForLoadOrCombine( 3134e8d8bef9SDimitry Andric SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, 3135e8d8bef9SDimitry Andric const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) { 3136e8d8bef9SDimitry Andric 3137e8d8bef9SDimitry Andric // Each load found for the pattern. There should be one for each RegsToVisit. 3138e8d8bef9SDimitry Andric SmallSetVector<const MachineInstr *, 8> Loads; 3139e8d8bef9SDimitry Andric 3140e8d8bef9SDimitry Andric // The lowest index used in any load. (The lowest "i" for each x[i].) 3141e8d8bef9SDimitry Andric int64_t LowestIdx = INT64_MAX; 3142e8d8bef9SDimitry Andric 3143e8d8bef9SDimitry Andric // The load which uses the lowest index. 3144fe6060f1SDimitry Andric GZExtLoad *LowestIdxLoad = nullptr; 3145e8d8bef9SDimitry Andric 3146e8d8bef9SDimitry Andric // Keeps track of the load indices we see. We shouldn't see any indices twice. 3147e8d8bef9SDimitry Andric SmallSet<int64_t, 8> SeenIdx; 3148e8d8bef9SDimitry Andric 3149e8d8bef9SDimitry Andric // Ensure each load is in the same MBB. 3150e8d8bef9SDimitry Andric // TODO: Support multiple MachineBasicBlocks. 3151e8d8bef9SDimitry Andric MachineBasicBlock *MBB = nullptr; 3152e8d8bef9SDimitry Andric const MachineMemOperand *MMO = nullptr; 3153e8d8bef9SDimitry Andric 3154e8d8bef9SDimitry Andric // Earliest instruction-order load in the pattern. 3155fe6060f1SDimitry Andric GZExtLoad *EarliestLoad = nullptr; 3156e8d8bef9SDimitry Andric 3157e8d8bef9SDimitry Andric // Latest instruction-order load in the pattern. 3158fe6060f1SDimitry Andric GZExtLoad *LatestLoad = nullptr; 3159e8d8bef9SDimitry Andric 3160e8d8bef9SDimitry Andric // Base pointer which every load should share. 3161e8d8bef9SDimitry Andric Register BasePtr; 3162e8d8bef9SDimitry Andric 3163e8d8bef9SDimitry Andric // We want to find a load for each register. Each load should have some 3164e8d8bef9SDimitry Andric // appropriate bit twiddling arithmetic. During this loop, we will also keep 3165e8d8bef9SDimitry Andric // track of the load which uses the lowest index. Later, we will check if we 3166e8d8bef9SDimitry Andric // can use its pointer in the final, combined load. 3167e8d8bef9SDimitry Andric for (auto Reg : RegsToVisit) { 3168e8d8bef9SDimitry Andric // Find the load, and find the position that it will end up in (e.g. a 3169e8d8bef9SDimitry Andric // shifted) value. 3170e8d8bef9SDimitry Andric auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI); 3171e8d8bef9SDimitry Andric if (!LoadAndPos) 3172e8d8bef9SDimitry Andric return None; 3173fe6060f1SDimitry Andric GZExtLoad *Load; 3174e8d8bef9SDimitry Andric int64_t DstPos; 3175e8d8bef9SDimitry Andric std::tie(Load, DstPos) = *LoadAndPos; 3176e8d8bef9SDimitry Andric 3177e8d8bef9SDimitry Andric // TODO: Handle multiple MachineBasicBlocks. Currently not handled because 3178e8d8bef9SDimitry Andric // it is difficult to check for stores/calls/etc between loads. 3179e8d8bef9SDimitry Andric MachineBasicBlock *LoadMBB = Load->getParent(); 3180e8d8bef9SDimitry Andric if (!MBB) 3181e8d8bef9SDimitry Andric MBB = LoadMBB; 3182e8d8bef9SDimitry Andric if (LoadMBB != MBB) 3183e8d8bef9SDimitry Andric return None; 3184e8d8bef9SDimitry Andric 3185e8d8bef9SDimitry Andric // Make sure that the MachineMemOperands of every seen load are compatible. 3186fe6060f1SDimitry Andric auto &LoadMMO = Load->getMMO(); 3187e8d8bef9SDimitry Andric if (!MMO) 3188fe6060f1SDimitry Andric MMO = &LoadMMO; 3189fe6060f1SDimitry Andric if (MMO->getAddrSpace() != LoadMMO.getAddrSpace()) 3190e8d8bef9SDimitry Andric return None; 3191e8d8bef9SDimitry Andric 3192e8d8bef9SDimitry Andric // Find out what the base pointer and index for the load is. 3193e8d8bef9SDimitry Andric Register LoadPtr; 3194e8d8bef9SDimitry Andric int64_t Idx; 3195e8d8bef9SDimitry Andric if (!mi_match(Load->getOperand(1).getReg(), MRI, 3196e8d8bef9SDimitry Andric m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) { 3197e8d8bef9SDimitry Andric LoadPtr = Load->getOperand(1).getReg(); 3198e8d8bef9SDimitry Andric Idx = 0; 3199e8d8bef9SDimitry Andric } 3200e8d8bef9SDimitry Andric 3201e8d8bef9SDimitry Andric // Don't combine things like a[i], a[i] -> a bigger load. 3202e8d8bef9SDimitry Andric if (!SeenIdx.insert(Idx).second) 3203e8d8bef9SDimitry Andric return None; 3204e8d8bef9SDimitry Andric 3205e8d8bef9SDimitry Andric // Every load must share the same base pointer; don't combine things like: 3206e8d8bef9SDimitry Andric // 3207e8d8bef9SDimitry Andric // a[i], b[i + 1] -> a bigger load. 3208e8d8bef9SDimitry Andric if (!BasePtr.isValid()) 3209e8d8bef9SDimitry Andric BasePtr = LoadPtr; 3210e8d8bef9SDimitry Andric if (BasePtr != LoadPtr) 3211e8d8bef9SDimitry Andric return None; 3212e8d8bef9SDimitry Andric 3213e8d8bef9SDimitry Andric if (Idx < LowestIdx) { 3214e8d8bef9SDimitry Andric LowestIdx = Idx; 3215e8d8bef9SDimitry Andric LowestIdxLoad = Load; 3216e8d8bef9SDimitry Andric } 3217e8d8bef9SDimitry Andric 3218e8d8bef9SDimitry Andric // Keep track of the byte offset that this load ends up at. If we have seen 3219e8d8bef9SDimitry Andric // the byte offset, then stop here. We do not want to combine: 3220e8d8bef9SDimitry Andric // 3221e8d8bef9SDimitry Andric // a[i] << 16, a[i + k] << 16 -> a bigger load. 3222e8d8bef9SDimitry Andric if (!MemOffset2Idx.try_emplace(DstPos, Idx).second) 3223e8d8bef9SDimitry Andric return None; 3224e8d8bef9SDimitry Andric Loads.insert(Load); 3225e8d8bef9SDimitry Andric 3226e8d8bef9SDimitry Andric // Keep track of the position of the earliest/latest loads in the pattern. 3227e8d8bef9SDimitry Andric // We will check that there are no load fold barriers between them later 3228e8d8bef9SDimitry Andric // on. 3229e8d8bef9SDimitry Andric // 3230e8d8bef9SDimitry Andric // FIXME: Is there a better way to check for load fold barriers? 3231e8d8bef9SDimitry Andric if (!EarliestLoad || dominates(*Load, *EarliestLoad)) 3232e8d8bef9SDimitry Andric EarliestLoad = Load; 3233e8d8bef9SDimitry Andric if (!LatestLoad || dominates(*LatestLoad, *Load)) 3234e8d8bef9SDimitry Andric LatestLoad = Load; 3235e8d8bef9SDimitry Andric } 3236e8d8bef9SDimitry Andric 3237e8d8bef9SDimitry Andric // We found a load for each register. Let's check if each load satisfies the 3238e8d8bef9SDimitry Andric // pattern. 3239e8d8bef9SDimitry Andric assert(Loads.size() == RegsToVisit.size() && 3240e8d8bef9SDimitry Andric "Expected to find a load for each register?"); 3241e8d8bef9SDimitry Andric assert(EarliestLoad != LatestLoad && EarliestLoad && 3242e8d8bef9SDimitry Andric LatestLoad && "Expected at least two loads?"); 3243e8d8bef9SDimitry Andric 3244e8d8bef9SDimitry Andric // Check if there are any stores, calls, etc. between any of the loads. If 3245e8d8bef9SDimitry Andric // there are, then we can't safely perform the combine. 3246e8d8bef9SDimitry Andric // 3247e8d8bef9SDimitry Andric // MaxIter is chosen based off the (worst case) number of iterations it 3248e8d8bef9SDimitry Andric // typically takes to succeed in the LLVM test suite plus some padding. 3249e8d8bef9SDimitry Andric // 3250e8d8bef9SDimitry Andric // FIXME: Is there a better way to check for load fold barriers? 3251e8d8bef9SDimitry Andric const unsigned MaxIter = 20; 3252e8d8bef9SDimitry Andric unsigned Iter = 0; 3253e8d8bef9SDimitry Andric for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(), 3254e8d8bef9SDimitry Andric LatestLoad->getIterator())) { 3255e8d8bef9SDimitry Andric if (Loads.count(&MI)) 3256e8d8bef9SDimitry Andric continue; 3257e8d8bef9SDimitry Andric if (MI.isLoadFoldBarrier()) 3258e8d8bef9SDimitry Andric return None; 3259e8d8bef9SDimitry Andric if (Iter++ == MaxIter) 3260e8d8bef9SDimitry Andric return None; 3261e8d8bef9SDimitry Andric } 3262e8d8bef9SDimitry Andric 3263fe6060f1SDimitry Andric return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad); 3264e8d8bef9SDimitry Andric } 3265e8d8bef9SDimitry Andric 3266e8d8bef9SDimitry Andric bool CombinerHelper::matchLoadOrCombine( 3267e8d8bef9SDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 3268e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_OR); 3269e8d8bef9SDimitry Andric MachineFunction &MF = *MI.getMF(); 3270e8d8bef9SDimitry Andric // Assuming a little-endian target, transform: 3271e8d8bef9SDimitry Andric // s8 *a = ... 3272e8d8bef9SDimitry Andric // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24) 3273e8d8bef9SDimitry Andric // => 3274e8d8bef9SDimitry Andric // s32 val = *((i32)a) 3275e8d8bef9SDimitry Andric // 3276e8d8bef9SDimitry Andric // s8 *a = ... 3277e8d8bef9SDimitry Andric // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3] 3278e8d8bef9SDimitry Andric // => 3279e8d8bef9SDimitry Andric // s32 val = BSWAP(*((s32)a)) 3280e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 3281e8d8bef9SDimitry Andric LLT Ty = MRI.getType(Dst); 3282e8d8bef9SDimitry Andric if (Ty.isVector()) 3283e8d8bef9SDimitry Andric return false; 3284e8d8bef9SDimitry Andric 3285e8d8bef9SDimitry Andric // We need to combine at least two loads into this type. Since the smallest 3286e8d8bef9SDimitry Andric // possible load is into a byte, we need at least a 16-bit wide type. 3287e8d8bef9SDimitry Andric const unsigned WideMemSizeInBits = Ty.getSizeInBits(); 3288e8d8bef9SDimitry Andric if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0) 3289e8d8bef9SDimitry Andric return false; 3290e8d8bef9SDimitry Andric 3291e8d8bef9SDimitry Andric // Match a collection of non-OR instructions in the pattern. 3292e8d8bef9SDimitry Andric auto RegsToVisit = findCandidatesForLoadOrCombine(&MI); 3293e8d8bef9SDimitry Andric if (!RegsToVisit) 3294e8d8bef9SDimitry Andric return false; 3295e8d8bef9SDimitry Andric 3296e8d8bef9SDimitry Andric // We have a collection of non-OR instructions. Figure out how wide each of 3297e8d8bef9SDimitry Andric // the small loads should be based off of the number of potential loads we 3298e8d8bef9SDimitry Andric // found. 3299e8d8bef9SDimitry Andric const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size(); 3300e8d8bef9SDimitry Andric if (NarrowMemSizeInBits % 8 != 0) 3301e8d8bef9SDimitry Andric return false; 3302e8d8bef9SDimitry Andric 3303e8d8bef9SDimitry Andric // Check if each register feeding into each OR is a load from the same 3304e8d8bef9SDimitry Andric // base pointer + some arithmetic. 3305e8d8bef9SDimitry Andric // 3306e8d8bef9SDimitry Andric // e.g. a[0], a[1] << 8, a[2] << 16, etc. 3307e8d8bef9SDimitry Andric // 3308e8d8bef9SDimitry Andric // Also verify that each of these ends up putting a[i] into the same memory 3309e8d8bef9SDimitry Andric // offset as a load into a wide type would. 3310e8d8bef9SDimitry Andric SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx; 3311fe6060f1SDimitry Andric GZExtLoad *LowestIdxLoad, *LatestLoad; 3312e8d8bef9SDimitry Andric int64_t LowestIdx; 3313e8d8bef9SDimitry Andric auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine( 3314e8d8bef9SDimitry Andric MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits); 3315e8d8bef9SDimitry Andric if (!MaybeLoadInfo) 3316e8d8bef9SDimitry Andric return false; 3317fe6060f1SDimitry Andric std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo; 3318e8d8bef9SDimitry Andric 3319e8d8bef9SDimitry Andric // We have a bunch of loads being OR'd together. Using the addresses + offsets 3320e8d8bef9SDimitry Andric // we found before, check if this corresponds to a big or little endian byte 3321e8d8bef9SDimitry Andric // pattern. If it does, then we can represent it using a load + possibly a 3322e8d8bef9SDimitry Andric // BSWAP. 3323e8d8bef9SDimitry Andric bool IsBigEndianTarget = MF.getDataLayout().isBigEndian(); 3324e8d8bef9SDimitry Andric Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx); 3325e8d8bef9SDimitry Andric if (!IsBigEndian.hasValue()) 3326e8d8bef9SDimitry Andric return false; 3327e8d8bef9SDimitry Andric bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian; 3328e8d8bef9SDimitry Andric if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}})) 3329e8d8bef9SDimitry Andric return false; 3330e8d8bef9SDimitry Andric 3331e8d8bef9SDimitry Andric // Make sure that the load from the lowest index produces offset 0 in the 3332e8d8bef9SDimitry Andric // final value. 3333e8d8bef9SDimitry Andric // 3334e8d8bef9SDimitry Andric // This ensures that we won't combine something like this: 3335e8d8bef9SDimitry Andric // 3336e8d8bef9SDimitry Andric // load x[i] -> byte 2 3337e8d8bef9SDimitry Andric // load x[i+1] -> byte 0 ---> wide_load x[i] 3338e8d8bef9SDimitry Andric // load x[i+2] -> byte 1 3339e8d8bef9SDimitry Andric const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits; 3340e8d8bef9SDimitry Andric const unsigned ZeroByteOffset = 3341e8d8bef9SDimitry Andric *IsBigEndian 3342e8d8bef9SDimitry Andric ? bigEndianByteAt(NumLoadsInTy, 0) 3343e8d8bef9SDimitry Andric : littleEndianByteAt(NumLoadsInTy, 0); 3344e8d8bef9SDimitry Andric auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset); 3345e8d8bef9SDimitry Andric if (ZeroOffsetIdx == MemOffset2Idx.end() || 3346e8d8bef9SDimitry Andric ZeroOffsetIdx->second != LowestIdx) 3347e8d8bef9SDimitry Andric return false; 3348e8d8bef9SDimitry Andric 3349e8d8bef9SDimitry Andric // We wil reuse the pointer from the load which ends up at byte offset 0. It 3350e8d8bef9SDimitry Andric // may not use index 0. 3351fe6060f1SDimitry Andric Register Ptr = LowestIdxLoad->getPointerReg(); 3352fe6060f1SDimitry Andric const MachineMemOperand &MMO = LowestIdxLoad->getMMO(); 3353*349cc55cSDimitry Andric LegalityQuery::MemDesc MMDesc(MMO); 3354fe6060f1SDimitry Andric MMDesc.MemoryTy = Ty; 3355e8d8bef9SDimitry Andric if (!isLegalOrBeforeLegalizer( 3356e8d8bef9SDimitry Andric {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}})) 3357e8d8bef9SDimitry Andric return false; 3358e8d8bef9SDimitry Andric auto PtrInfo = MMO.getPointerInfo(); 3359e8d8bef9SDimitry Andric auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8); 3360e8d8bef9SDimitry Andric 3361e8d8bef9SDimitry Andric // Load must be allowed and fast on the target. 3362e8d8bef9SDimitry Andric LLVMContext &C = MF.getFunction().getContext(); 3363e8d8bef9SDimitry Andric auto &DL = MF.getDataLayout(); 3364e8d8bef9SDimitry Andric bool Fast = false; 3365e8d8bef9SDimitry Andric if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) || 3366e8d8bef9SDimitry Andric !Fast) 3367e8d8bef9SDimitry Andric return false; 3368e8d8bef9SDimitry Andric 3369e8d8bef9SDimitry Andric MatchInfo = [=](MachineIRBuilder &MIB) { 3370fe6060f1SDimitry Andric MIB.setInstrAndDebugLoc(*LatestLoad); 3371e8d8bef9SDimitry Andric Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst; 3372e8d8bef9SDimitry Andric MIB.buildLoad(LoadDst, Ptr, *NewMMO); 3373e8d8bef9SDimitry Andric if (NeedsBSwap) 3374e8d8bef9SDimitry Andric MIB.buildBSwap(Dst, LoadDst); 3375e8d8bef9SDimitry Andric }; 3376e8d8bef9SDimitry Andric return true; 3377e8d8bef9SDimitry Andric } 3378e8d8bef9SDimitry Andric 3379*349cc55cSDimitry Andric /// Check if the store \p Store is a truncstore that can be merged. That is, 3380*349cc55cSDimitry Andric /// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty 3381*349cc55cSDimitry Andric /// Register then it does not need to match and SrcVal is set to the source 3382*349cc55cSDimitry Andric /// value found. 3383*349cc55cSDimitry Andric /// On match, returns the start byte offset of the \p SrcVal that is being 3384*349cc55cSDimitry Andric /// stored. 3385*349cc55cSDimitry Andric static Optional<int64_t> getTruncStoreByteOffset(GStore &Store, Register &SrcVal, 3386*349cc55cSDimitry Andric MachineRegisterInfo &MRI) { 3387*349cc55cSDimitry Andric Register TruncVal; 3388*349cc55cSDimitry Andric if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal)))) 3389*349cc55cSDimitry Andric return None; 3390*349cc55cSDimitry Andric 3391*349cc55cSDimitry Andric // The shift amount must be a constant multiple of the narrow type. 3392*349cc55cSDimitry Andric // It is translated to the offset address in the wide source value "y". 3393*349cc55cSDimitry Andric // 3394*349cc55cSDimitry Andric // x = G_LSHR y, ShiftAmtC 3395*349cc55cSDimitry Andric // s8 z = G_TRUNC x 3396*349cc55cSDimitry Andric // store z, ... 3397*349cc55cSDimitry Andric Register FoundSrcVal; 3398*349cc55cSDimitry Andric int64_t ShiftAmt; 3399*349cc55cSDimitry Andric if (!mi_match(TruncVal, MRI, 3400*349cc55cSDimitry Andric m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)), 3401*349cc55cSDimitry Andric m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) { 3402*349cc55cSDimitry Andric if (!SrcVal.isValid() || TruncVal == SrcVal) { 3403*349cc55cSDimitry Andric if (!SrcVal.isValid()) 3404*349cc55cSDimitry Andric SrcVal = TruncVal; 3405*349cc55cSDimitry Andric return 0; // If it's the lowest index store. 3406*349cc55cSDimitry Andric } 3407*349cc55cSDimitry Andric return None; 3408*349cc55cSDimitry Andric } 3409*349cc55cSDimitry Andric 3410*349cc55cSDimitry Andric unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits(); 3411*349cc55cSDimitry Andric if (ShiftAmt % NarrowBits!= 0) 3412*349cc55cSDimitry Andric return None; 3413*349cc55cSDimitry Andric const unsigned Offset = ShiftAmt / NarrowBits; 3414*349cc55cSDimitry Andric 3415*349cc55cSDimitry Andric if (SrcVal.isValid() && FoundSrcVal != SrcVal) 3416*349cc55cSDimitry Andric return None; 3417*349cc55cSDimitry Andric 3418*349cc55cSDimitry Andric if (!SrcVal.isValid()) 3419*349cc55cSDimitry Andric SrcVal = FoundSrcVal; 3420*349cc55cSDimitry Andric else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal)) 3421*349cc55cSDimitry Andric return None; 3422*349cc55cSDimitry Andric return Offset; 3423*349cc55cSDimitry Andric } 3424*349cc55cSDimitry Andric 3425*349cc55cSDimitry Andric /// Match a pattern where a wide type scalar value is stored by several narrow 3426*349cc55cSDimitry Andric /// stores. Fold it into a single store or a BSWAP and a store if the targets 3427*349cc55cSDimitry Andric /// supports it. 3428*349cc55cSDimitry Andric /// 3429*349cc55cSDimitry Andric /// Assuming little endian target: 3430*349cc55cSDimitry Andric /// i8 *p = ... 3431*349cc55cSDimitry Andric /// i32 val = ... 3432*349cc55cSDimitry Andric /// p[0] = (val >> 0) & 0xFF; 3433*349cc55cSDimitry Andric /// p[1] = (val >> 8) & 0xFF; 3434*349cc55cSDimitry Andric /// p[2] = (val >> 16) & 0xFF; 3435*349cc55cSDimitry Andric /// p[3] = (val >> 24) & 0xFF; 3436*349cc55cSDimitry Andric /// => 3437*349cc55cSDimitry Andric /// *((i32)p) = val; 3438*349cc55cSDimitry Andric /// 3439*349cc55cSDimitry Andric /// i8 *p = ... 3440*349cc55cSDimitry Andric /// i32 val = ... 3441*349cc55cSDimitry Andric /// p[0] = (val >> 24) & 0xFF; 3442*349cc55cSDimitry Andric /// p[1] = (val >> 16) & 0xFF; 3443*349cc55cSDimitry Andric /// p[2] = (val >> 8) & 0xFF; 3444*349cc55cSDimitry Andric /// p[3] = (val >> 0) & 0xFF; 3445*349cc55cSDimitry Andric /// => 3446*349cc55cSDimitry Andric /// *((i32)p) = BSWAP(val); 3447*349cc55cSDimitry Andric bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI, 3448*349cc55cSDimitry Andric MergeTruncStoresInfo &MatchInfo) { 3449*349cc55cSDimitry Andric auto &StoreMI = cast<GStore>(MI); 3450*349cc55cSDimitry Andric LLT MemTy = StoreMI.getMMO().getMemoryType(); 3451*349cc55cSDimitry Andric 3452*349cc55cSDimitry Andric // We only handle merging simple stores of 1-4 bytes. 3453*349cc55cSDimitry Andric if (!MemTy.isScalar()) 3454*349cc55cSDimitry Andric return false; 3455*349cc55cSDimitry Andric switch (MemTy.getSizeInBits()) { 3456*349cc55cSDimitry Andric case 8: 3457*349cc55cSDimitry Andric case 16: 3458*349cc55cSDimitry Andric case 32: 3459*349cc55cSDimitry Andric break; 3460*349cc55cSDimitry Andric default: 3461*349cc55cSDimitry Andric return false; 3462*349cc55cSDimitry Andric } 3463*349cc55cSDimitry Andric if (!StoreMI.isSimple()) 3464*349cc55cSDimitry Andric return false; 3465*349cc55cSDimitry Andric 3466*349cc55cSDimitry Andric // We do a simple search for mergeable stores prior to this one. 3467*349cc55cSDimitry Andric // Any potential alias hazard along the way terminates the search. 3468*349cc55cSDimitry Andric SmallVector<GStore *> FoundStores; 3469*349cc55cSDimitry Andric 3470*349cc55cSDimitry Andric // We're looking for: 3471*349cc55cSDimitry Andric // 1) a (store(trunc(...))) 3472*349cc55cSDimitry Andric // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get 3473*349cc55cSDimitry Andric // the partial value stored. 3474*349cc55cSDimitry Andric // 3) where the offsets form either a little or big-endian sequence. 3475*349cc55cSDimitry Andric 3476*349cc55cSDimitry Andric auto &LastStore = StoreMI; 3477*349cc55cSDimitry Andric 3478*349cc55cSDimitry Andric // The single base pointer that all stores must use. 3479*349cc55cSDimitry Andric Register BaseReg; 3480*349cc55cSDimitry Andric int64_t LastOffset; 3481*349cc55cSDimitry Andric if (!mi_match(LastStore.getPointerReg(), MRI, 3482*349cc55cSDimitry Andric m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) { 3483*349cc55cSDimitry Andric BaseReg = LastStore.getPointerReg(); 3484*349cc55cSDimitry Andric LastOffset = 0; 3485*349cc55cSDimitry Andric } 3486*349cc55cSDimitry Andric 3487*349cc55cSDimitry Andric GStore *LowestIdxStore = &LastStore; 3488*349cc55cSDimitry Andric int64_t LowestIdxOffset = LastOffset; 3489*349cc55cSDimitry Andric 3490*349cc55cSDimitry Andric Register WideSrcVal; 3491*349cc55cSDimitry Andric auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI); 3492*349cc55cSDimitry Andric if (!LowestShiftAmt) 3493*349cc55cSDimitry Andric return false; // Didn't match a trunc. 3494*349cc55cSDimitry Andric assert(WideSrcVal.isValid()); 3495*349cc55cSDimitry Andric 3496*349cc55cSDimitry Andric LLT WideStoreTy = MRI.getType(WideSrcVal); 3497*349cc55cSDimitry Andric // The wide type might not be a multiple of the memory type, e.g. s48 and s32. 3498*349cc55cSDimitry Andric if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0) 3499*349cc55cSDimitry Andric return false; 3500*349cc55cSDimitry Andric const unsigned NumStoresRequired = 3501*349cc55cSDimitry Andric WideStoreTy.getSizeInBits() / MemTy.getSizeInBits(); 3502*349cc55cSDimitry Andric 3503*349cc55cSDimitry Andric SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX); 3504*349cc55cSDimitry Andric OffsetMap[*LowestShiftAmt] = LastOffset; 3505*349cc55cSDimitry Andric FoundStores.emplace_back(&LastStore); 3506*349cc55cSDimitry Andric 3507*349cc55cSDimitry Andric // Search the block up for more stores. 3508*349cc55cSDimitry Andric // We use a search threshold of 10 instructions here because the combiner 3509*349cc55cSDimitry Andric // works top-down within a block, and we don't want to search an unbounded 3510*349cc55cSDimitry Andric // number of predecessor instructions trying to find matching stores. 3511*349cc55cSDimitry Andric // If we moved this optimization into a separate pass then we could probably 3512*349cc55cSDimitry Andric // use a more efficient search without having a hard-coded threshold. 3513*349cc55cSDimitry Andric const int MaxInstsToCheck = 10; 3514*349cc55cSDimitry Andric int NumInstsChecked = 0; 3515*349cc55cSDimitry Andric for (auto II = ++LastStore.getReverseIterator(); 3516*349cc55cSDimitry Andric II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck; 3517*349cc55cSDimitry Andric ++II) { 3518*349cc55cSDimitry Andric NumInstsChecked++; 3519*349cc55cSDimitry Andric GStore *NewStore; 3520*349cc55cSDimitry Andric if ((NewStore = dyn_cast<GStore>(&*II))) { 3521*349cc55cSDimitry Andric if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple()) 3522*349cc55cSDimitry Andric break; 3523*349cc55cSDimitry Andric } else if (II->isLoadFoldBarrier() || II->mayLoad()) { 3524*349cc55cSDimitry Andric break; 3525*349cc55cSDimitry Andric } else { 3526*349cc55cSDimitry Andric continue; // This is a safe instruction we can look past. 3527*349cc55cSDimitry Andric } 3528*349cc55cSDimitry Andric 3529*349cc55cSDimitry Andric Register NewBaseReg; 3530*349cc55cSDimitry Andric int64_t MemOffset; 3531*349cc55cSDimitry Andric // Check we're storing to the same base + some offset. 3532*349cc55cSDimitry Andric if (!mi_match(NewStore->getPointerReg(), MRI, 3533*349cc55cSDimitry Andric m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) { 3534*349cc55cSDimitry Andric NewBaseReg = NewStore->getPointerReg(); 3535*349cc55cSDimitry Andric MemOffset = 0; 3536*349cc55cSDimitry Andric } 3537*349cc55cSDimitry Andric if (BaseReg != NewBaseReg) 3538*349cc55cSDimitry Andric break; 3539*349cc55cSDimitry Andric 3540*349cc55cSDimitry Andric auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI); 3541*349cc55cSDimitry Andric if (!ShiftByteOffset) 3542*349cc55cSDimitry Andric break; 3543*349cc55cSDimitry Andric if (MemOffset < LowestIdxOffset) { 3544*349cc55cSDimitry Andric LowestIdxOffset = MemOffset; 3545*349cc55cSDimitry Andric LowestIdxStore = NewStore; 3546*349cc55cSDimitry Andric } 3547*349cc55cSDimitry Andric 3548*349cc55cSDimitry Andric // Map the offset in the store and the offset in the combined value, and 3549*349cc55cSDimitry Andric // early return if it has been set before. 3550*349cc55cSDimitry Andric if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired || 3551*349cc55cSDimitry Andric OffsetMap[*ShiftByteOffset] != INT64_MAX) 3552*349cc55cSDimitry Andric break; 3553*349cc55cSDimitry Andric OffsetMap[*ShiftByteOffset] = MemOffset; 3554*349cc55cSDimitry Andric 3555*349cc55cSDimitry Andric FoundStores.emplace_back(NewStore); 3556*349cc55cSDimitry Andric // Reset counter since we've found a matching inst. 3557*349cc55cSDimitry Andric NumInstsChecked = 0; 3558*349cc55cSDimitry Andric if (FoundStores.size() == NumStoresRequired) 3559*349cc55cSDimitry Andric break; 3560*349cc55cSDimitry Andric } 3561*349cc55cSDimitry Andric 3562*349cc55cSDimitry Andric if (FoundStores.size() != NumStoresRequired) { 3563*349cc55cSDimitry Andric return false; 3564*349cc55cSDimitry Andric } 3565*349cc55cSDimitry Andric 3566*349cc55cSDimitry Andric const auto &DL = LastStore.getMF()->getDataLayout(); 3567*349cc55cSDimitry Andric auto &C = LastStore.getMF()->getFunction().getContext(); 3568*349cc55cSDimitry Andric // Check that a store of the wide type is both allowed and fast on the target 3569*349cc55cSDimitry Andric bool Fast = false; 3570*349cc55cSDimitry Andric bool Allowed = getTargetLowering().allowsMemoryAccess( 3571*349cc55cSDimitry Andric C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast); 3572*349cc55cSDimitry Andric if (!Allowed || !Fast) 3573*349cc55cSDimitry Andric return false; 3574*349cc55cSDimitry Andric 3575*349cc55cSDimitry Andric // Check if the pieces of the value are going to the expected places in memory 3576*349cc55cSDimitry Andric // to merge the stores. 3577*349cc55cSDimitry Andric unsigned NarrowBits = MemTy.getScalarSizeInBits(); 3578*349cc55cSDimitry Andric auto checkOffsets = [&](bool MatchLittleEndian) { 3579*349cc55cSDimitry Andric if (MatchLittleEndian) { 3580*349cc55cSDimitry Andric for (unsigned i = 0; i != NumStoresRequired; ++i) 3581*349cc55cSDimitry Andric if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset) 3582*349cc55cSDimitry Andric return false; 3583*349cc55cSDimitry Andric } else { // MatchBigEndian by reversing loop counter. 3584*349cc55cSDimitry Andric for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired; 3585*349cc55cSDimitry Andric ++i, --j) 3586*349cc55cSDimitry Andric if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset) 3587*349cc55cSDimitry Andric return false; 3588*349cc55cSDimitry Andric } 3589*349cc55cSDimitry Andric return true; 3590*349cc55cSDimitry Andric }; 3591*349cc55cSDimitry Andric 3592*349cc55cSDimitry Andric // Check if the offsets line up for the native data layout of this target. 3593*349cc55cSDimitry Andric bool NeedBswap = false; 3594*349cc55cSDimitry Andric bool NeedRotate = false; 3595*349cc55cSDimitry Andric if (!checkOffsets(DL.isLittleEndian())) { 3596*349cc55cSDimitry Andric // Special-case: check if byte offsets line up for the opposite endian. 3597*349cc55cSDimitry Andric if (NarrowBits == 8 && checkOffsets(DL.isBigEndian())) 3598*349cc55cSDimitry Andric NeedBswap = true; 3599*349cc55cSDimitry Andric else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian())) 3600*349cc55cSDimitry Andric NeedRotate = true; 3601*349cc55cSDimitry Andric else 3602*349cc55cSDimitry Andric return false; 3603*349cc55cSDimitry Andric } 3604*349cc55cSDimitry Andric 3605*349cc55cSDimitry Andric if (NeedBswap && 3606*349cc55cSDimitry Andric !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}})) 3607*349cc55cSDimitry Andric return false; 3608*349cc55cSDimitry Andric if (NeedRotate && 3609*349cc55cSDimitry Andric !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}})) 3610*349cc55cSDimitry Andric return false; 3611*349cc55cSDimitry Andric 3612*349cc55cSDimitry Andric MatchInfo.NeedBSwap = NeedBswap; 3613*349cc55cSDimitry Andric MatchInfo.NeedRotate = NeedRotate; 3614*349cc55cSDimitry Andric MatchInfo.LowestIdxStore = LowestIdxStore; 3615*349cc55cSDimitry Andric MatchInfo.WideSrcVal = WideSrcVal; 3616*349cc55cSDimitry Andric MatchInfo.FoundStores = std::move(FoundStores); 3617*349cc55cSDimitry Andric return true; 3618*349cc55cSDimitry Andric } 3619*349cc55cSDimitry Andric 3620*349cc55cSDimitry Andric void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI, 3621*349cc55cSDimitry Andric MergeTruncStoresInfo &MatchInfo) { 3622*349cc55cSDimitry Andric 3623*349cc55cSDimitry Andric Builder.setInstrAndDebugLoc(MI); 3624*349cc55cSDimitry Andric Register WideSrcVal = MatchInfo.WideSrcVal; 3625*349cc55cSDimitry Andric LLT WideStoreTy = MRI.getType(WideSrcVal); 3626*349cc55cSDimitry Andric 3627*349cc55cSDimitry Andric if (MatchInfo.NeedBSwap) { 3628*349cc55cSDimitry Andric WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0); 3629*349cc55cSDimitry Andric } else if (MatchInfo.NeedRotate) { 3630*349cc55cSDimitry Andric assert(WideStoreTy.getSizeInBits() % 2 == 0 && 3631*349cc55cSDimitry Andric "Unexpected type for rotate"); 3632*349cc55cSDimitry Andric auto RotAmt = 3633*349cc55cSDimitry Andric Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2); 3634*349cc55cSDimitry Andric WideSrcVal = 3635*349cc55cSDimitry Andric Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0); 3636*349cc55cSDimitry Andric } 3637*349cc55cSDimitry Andric 3638*349cc55cSDimitry Andric Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(), 3639*349cc55cSDimitry Andric MatchInfo.LowestIdxStore->getMMO().getPointerInfo(), 3640*349cc55cSDimitry Andric MatchInfo.LowestIdxStore->getMMO().getAlign()); 3641*349cc55cSDimitry Andric 3642*349cc55cSDimitry Andric // Erase the old stores. 3643*349cc55cSDimitry Andric for (auto *ST : MatchInfo.FoundStores) 3644*349cc55cSDimitry Andric ST->eraseFromParent(); 3645*349cc55cSDimitry Andric } 3646*349cc55cSDimitry Andric 3647fe6060f1SDimitry Andric bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI, 3648fe6060f1SDimitry Andric MachineInstr *&ExtMI) { 3649fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PHI); 3650fe6060f1SDimitry Andric 3651fe6060f1SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3652fe6060f1SDimitry Andric 3653fe6060f1SDimitry Andric // TODO: Extending a vector may be expensive, don't do this until heuristics 3654fe6060f1SDimitry Andric // are better. 3655fe6060f1SDimitry Andric if (MRI.getType(DstReg).isVector()) 3656fe6060f1SDimitry Andric return false; 3657fe6060f1SDimitry Andric 3658fe6060f1SDimitry Andric // Try to match a phi, whose only use is an extend. 3659fe6060f1SDimitry Andric if (!MRI.hasOneNonDBGUse(DstReg)) 3660fe6060f1SDimitry Andric return false; 3661fe6060f1SDimitry Andric ExtMI = &*MRI.use_instr_nodbg_begin(DstReg); 3662fe6060f1SDimitry Andric switch (ExtMI->getOpcode()) { 3663fe6060f1SDimitry Andric case TargetOpcode::G_ANYEXT: 3664fe6060f1SDimitry Andric return true; // G_ANYEXT is usually free. 3665fe6060f1SDimitry Andric case TargetOpcode::G_ZEXT: 3666fe6060f1SDimitry Andric case TargetOpcode::G_SEXT: 3667fe6060f1SDimitry Andric break; 3668fe6060f1SDimitry Andric default: 3669fe6060f1SDimitry Andric return false; 3670fe6060f1SDimitry Andric } 3671fe6060f1SDimitry Andric 3672fe6060f1SDimitry Andric // If the target is likely to fold this extend away, don't propagate. 3673fe6060f1SDimitry Andric if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI)) 3674fe6060f1SDimitry Andric return false; 3675fe6060f1SDimitry Andric 3676fe6060f1SDimitry Andric // We don't want to propagate the extends unless there's a good chance that 3677fe6060f1SDimitry Andric // they'll be optimized in some way. 3678fe6060f1SDimitry Andric // Collect the unique incoming values. 3679fe6060f1SDimitry Andric SmallPtrSet<MachineInstr *, 4> InSrcs; 3680fe6060f1SDimitry Andric for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { 3681fe6060f1SDimitry Andric auto *DefMI = getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI); 3682fe6060f1SDimitry Andric switch (DefMI->getOpcode()) { 3683fe6060f1SDimitry Andric case TargetOpcode::G_LOAD: 3684fe6060f1SDimitry Andric case TargetOpcode::G_TRUNC: 3685fe6060f1SDimitry Andric case TargetOpcode::G_SEXT: 3686fe6060f1SDimitry Andric case TargetOpcode::G_ZEXT: 3687fe6060f1SDimitry Andric case TargetOpcode::G_ANYEXT: 3688fe6060f1SDimitry Andric case TargetOpcode::G_CONSTANT: 3689fe6060f1SDimitry Andric InSrcs.insert(getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI)); 3690fe6060f1SDimitry Andric // Don't try to propagate if there are too many places to create new 3691fe6060f1SDimitry Andric // extends, chances are it'll increase code size. 3692fe6060f1SDimitry Andric if (InSrcs.size() > 2) 3693fe6060f1SDimitry Andric return false; 3694fe6060f1SDimitry Andric break; 3695fe6060f1SDimitry Andric default: 3696fe6060f1SDimitry Andric return false; 3697fe6060f1SDimitry Andric } 3698fe6060f1SDimitry Andric } 3699fe6060f1SDimitry Andric return true; 3700fe6060f1SDimitry Andric } 3701fe6060f1SDimitry Andric 3702fe6060f1SDimitry Andric void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI, 3703fe6060f1SDimitry Andric MachineInstr *&ExtMI) { 3704fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_PHI); 3705fe6060f1SDimitry Andric Register DstReg = ExtMI->getOperand(0).getReg(); 3706fe6060f1SDimitry Andric LLT ExtTy = MRI.getType(DstReg); 3707fe6060f1SDimitry Andric 3708fe6060f1SDimitry Andric // Propagate the extension into the block of each incoming reg's block. 3709fe6060f1SDimitry Andric // Use a SetVector here because PHIs can have duplicate edges, and we want 3710fe6060f1SDimitry Andric // deterministic iteration order. 3711fe6060f1SDimitry Andric SmallSetVector<MachineInstr *, 8> SrcMIs; 3712fe6060f1SDimitry Andric SmallDenseMap<MachineInstr *, MachineInstr *, 8> OldToNewSrcMap; 3713fe6060f1SDimitry Andric for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); SrcIdx += 2) { 3714fe6060f1SDimitry Andric auto *SrcMI = MRI.getVRegDef(MI.getOperand(SrcIdx).getReg()); 3715fe6060f1SDimitry Andric if (!SrcMIs.insert(SrcMI)) 3716fe6060f1SDimitry Andric continue; 3717fe6060f1SDimitry Andric 3718fe6060f1SDimitry Andric // Build an extend after each src inst. 3719fe6060f1SDimitry Andric auto *MBB = SrcMI->getParent(); 3720fe6060f1SDimitry Andric MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator(); 3721fe6060f1SDimitry Andric if (InsertPt != MBB->end() && InsertPt->isPHI()) 3722fe6060f1SDimitry Andric InsertPt = MBB->getFirstNonPHI(); 3723fe6060f1SDimitry Andric 3724fe6060f1SDimitry Andric Builder.setInsertPt(*SrcMI->getParent(), InsertPt); 3725fe6060f1SDimitry Andric Builder.setDebugLoc(MI.getDebugLoc()); 3726fe6060f1SDimitry Andric auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, 3727fe6060f1SDimitry Andric SrcMI->getOperand(0).getReg()); 3728fe6060f1SDimitry Andric OldToNewSrcMap[SrcMI] = NewExt; 3729fe6060f1SDimitry Andric } 3730fe6060f1SDimitry Andric 3731fe6060f1SDimitry Andric // Create a new phi with the extended inputs. 3732fe6060f1SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3733fe6060f1SDimitry Andric auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI); 3734fe6060f1SDimitry Andric NewPhi.addDef(DstReg); 3735fe6060f1SDimitry Andric for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) { 3736fe6060f1SDimitry Andric auto &MO = MI.getOperand(SrcIdx); 3737fe6060f1SDimitry Andric if (!MO.isReg()) { 3738fe6060f1SDimitry Andric NewPhi.addMBB(MO.getMBB()); 3739fe6060f1SDimitry Andric continue; 3740fe6060f1SDimitry Andric } 3741fe6060f1SDimitry Andric auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())]; 3742fe6060f1SDimitry Andric NewPhi.addUse(NewSrc->getOperand(0).getReg()); 3743fe6060f1SDimitry Andric } 3744fe6060f1SDimitry Andric Builder.insertInstr(NewPhi); 3745fe6060f1SDimitry Andric ExtMI->eraseFromParent(); 3746fe6060f1SDimitry Andric } 3747fe6060f1SDimitry Andric 3748fe6060f1SDimitry Andric bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI, 3749fe6060f1SDimitry Andric Register &Reg) { 3750fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT); 3751fe6060f1SDimitry Andric // If we have a constant index, look for a G_BUILD_VECTOR source 3752fe6060f1SDimitry Andric // and find the source register that the index maps to. 3753fe6060f1SDimitry Andric Register SrcVec = MI.getOperand(1).getReg(); 3754fe6060f1SDimitry Andric LLT SrcTy = MRI.getType(SrcVec); 3755fe6060f1SDimitry Andric if (!isLegalOrBeforeLegalizer( 3756fe6060f1SDimitry Andric {TargetOpcode::G_BUILD_VECTOR, {SrcTy, SrcTy.getElementType()}})) 3757fe6060f1SDimitry Andric return false; 3758fe6060f1SDimitry Andric 3759*349cc55cSDimitry Andric auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); 3760fe6060f1SDimitry Andric if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements()) 3761fe6060f1SDimitry Andric return false; 3762fe6060f1SDimitry Andric 3763fe6060f1SDimitry Andric unsigned VecIdx = Cst->Value.getZExtValue(); 3764fe6060f1SDimitry Andric MachineInstr *BuildVecMI = 3765fe6060f1SDimitry Andric getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, SrcVec, MRI); 3766fe6060f1SDimitry Andric if (!BuildVecMI) { 3767fe6060f1SDimitry Andric BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR_TRUNC, SrcVec, MRI); 3768fe6060f1SDimitry Andric if (!BuildVecMI) 3769fe6060f1SDimitry Andric return false; 3770fe6060f1SDimitry Andric LLT ScalarTy = MRI.getType(BuildVecMI->getOperand(1).getReg()); 3771fe6060f1SDimitry Andric if (!isLegalOrBeforeLegalizer( 3772fe6060f1SDimitry Andric {TargetOpcode::G_BUILD_VECTOR_TRUNC, {SrcTy, ScalarTy}})) 3773fe6060f1SDimitry Andric return false; 3774fe6060f1SDimitry Andric } 3775fe6060f1SDimitry Andric 3776fe6060f1SDimitry Andric EVT Ty(getMVTForLLT(SrcTy)); 3777fe6060f1SDimitry Andric if (!MRI.hasOneNonDBGUse(SrcVec) && 3778fe6060f1SDimitry Andric !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty)) 3779fe6060f1SDimitry Andric return false; 3780fe6060f1SDimitry Andric 3781fe6060f1SDimitry Andric Reg = BuildVecMI->getOperand(VecIdx + 1).getReg(); 3782fe6060f1SDimitry Andric return true; 3783fe6060f1SDimitry Andric } 3784fe6060f1SDimitry Andric 3785fe6060f1SDimitry Andric void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI, 3786fe6060f1SDimitry Andric Register &Reg) { 3787fe6060f1SDimitry Andric // Check the type of the register, since it may have come from a 3788fe6060f1SDimitry Andric // G_BUILD_VECTOR_TRUNC. 3789fe6060f1SDimitry Andric LLT ScalarTy = MRI.getType(Reg); 3790fe6060f1SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3791fe6060f1SDimitry Andric LLT DstTy = MRI.getType(DstReg); 3792fe6060f1SDimitry Andric 3793fe6060f1SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3794fe6060f1SDimitry Andric if (ScalarTy != DstTy) { 3795fe6060f1SDimitry Andric assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits()); 3796fe6060f1SDimitry Andric Builder.buildTrunc(DstReg, Reg); 3797fe6060f1SDimitry Andric MI.eraseFromParent(); 3798fe6060f1SDimitry Andric return; 3799fe6060f1SDimitry Andric } 3800fe6060f1SDimitry Andric replaceSingleDefInstWithReg(MI, Reg); 3801fe6060f1SDimitry Andric } 3802fe6060f1SDimitry Andric 3803fe6060f1SDimitry Andric bool CombinerHelper::matchExtractAllEltsFromBuildVector( 3804fe6060f1SDimitry Andric MachineInstr &MI, 3805fe6060f1SDimitry Andric SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) { 3806fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 3807fe6060f1SDimitry Andric // This combine tries to find build_vector's which have every source element 3808fe6060f1SDimitry Andric // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like 3809fe6060f1SDimitry Andric // the masked load scalarization is run late in the pipeline. There's already 3810fe6060f1SDimitry Andric // a combine for a similar pattern starting from the extract, but that 3811fe6060f1SDimitry Andric // doesn't attempt to do it if there are multiple uses of the build_vector, 3812fe6060f1SDimitry Andric // which in this case is true. Starting the combine from the build_vector 3813fe6060f1SDimitry Andric // feels more natural than trying to find sibling nodes of extracts. 3814fe6060f1SDimitry Andric // E.g. 3815fe6060f1SDimitry Andric // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4 3816fe6060f1SDimitry Andric // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0 3817fe6060f1SDimitry Andric // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1 3818fe6060f1SDimitry Andric // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2 3819fe6060f1SDimitry Andric // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3 3820fe6060f1SDimitry Andric // ==> 3821fe6060f1SDimitry Andric // replace ext{1,2,3,4} with %s{1,2,3,4} 3822fe6060f1SDimitry Andric 3823fe6060f1SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3824fe6060f1SDimitry Andric LLT DstTy = MRI.getType(DstReg); 3825fe6060f1SDimitry Andric unsigned NumElts = DstTy.getNumElements(); 3826fe6060f1SDimitry Andric 3827fe6060f1SDimitry Andric SmallBitVector ExtractedElts(NumElts); 3828fe6060f1SDimitry Andric for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg), 3829fe6060f1SDimitry Andric MRI.use_instr_nodbg_end())) { 3830fe6060f1SDimitry Andric if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT) 3831fe6060f1SDimitry Andric return false; 3832*349cc55cSDimitry Andric auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI); 3833fe6060f1SDimitry Andric if (!Cst) 3834fe6060f1SDimitry Andric return false; 3835fe6060f1SDimitry Andric unsigned Idx = Cst.getValue().getZExtValue(); 3836fe6060f1SDimitry Andric if (Idx >= NumElts) 3837fe6060f1SDimitry Andric return false; // Out of range. 3838fe6060f1SDimitry Andric ExtractedElts.set(Idx); 3839fe6060f1SDimitry Andric SrcDstPairs.emplace_back( 3840fe6060f1SDimitry Andric std::make_pair(MI.getOperand(Idx + 1).getReg(), &II)); 3841fe6060f1SDimitry Andric } 3842fe6060f1SDimitry Andric // Match if every element was extracted. 3843fe6060f1SDimitry Andric return ExtractedElts.all(); 3844fe6060f1SDimitry Andric } 3845fe6060f1SDimitry Andric 3846fe6060f1SDimitry Andric void CombinerHelper::applyExtractAllEltsFromBuildVector( 3847fe6060f1SDimitry Andric MachineInstr &MI, 3848fe6060f1SDimitry Andric SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) { 3849fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 3850fe6060f1SDimitry Andric for (auto &Pair : SrcDstPairs) { 3851fe6060f1SDimitry Andric auto *ExtMI = Pair.second; 3852fe6060f1SDimitry Andric replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first); 3853fe6060f1SDimitry Andric ExtMI->eraseFromParent(); 3854fe6060f1SDimitry Andric } 3855fe6060f1SDimitry Andric MI.eraseFromParent(); 3856fe6060f1SDimitry Andric } 3857fe6060f1SDimitry Andric 3858fe6060f1SDimitry Andric void CombinerHelper::applyBuildFn( 3859e8d8bef9SDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 3860e8d8bef9SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3861e8d8bef9SDimitry Andric MatchInfo(Builder); 3862e8d8bef9SDimitry Andric MI.eraseFromParent(); 3863fe6060f1SDimitry Andric } 3864fe6060f1SDimitry Andric 3865fe6060f1SDimitry Andric void CombinerHelper::applyBuildFnNoErase( 3866fe6060f1SDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 3867fe6060f1SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3868fe6060f1SDimitry Andric MatchInfo(Builder); 3869fe6060f1SDimitry Andric } 3870fe6060f1SDimitry Andric 3871fe6060f1SDimitry Andric /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate. 3872fe6060f1SDimitry Andric bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) { 3873fe6060f1SDimitry Andric unsigned Opc = MI.getOpcode(); 3874fe6060f1SDimitry Andric assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR); 3875fe6060f1SDimitry Andric Register X = MI.getOperand(1).getReg(); 3876fe6060f1SDimitry Andric Register Y = MI.getOperand(2).getReg(); 3877fe6060f1SDimitry Andric if (X != Y) 3878fe6060f1SDimitry Andric return false; 3879fe6060f1SDimitry Andric unsigned RotateOpc = 3880fe6060f1SDimitry Andric Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR; 3881fe6060f1SDimitry Andric return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}}); 3882fe6060f1SDimitry Andric } 3883fe6060f1SDimitry Andric 3884fe6060f1SDimitry Andric void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) { 3885fe6060f1SDimitry Andric unsigned Opc = MI.getOpcode(); 3886fe6060f1SDimitry Andric assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR); 3887fe6060f1SDimitry Andric bool IsFSHL = Opc == TargetOpcode::G_FSHL; 3888fe6060f1SDimitry Andric Observer.changingInstr(MI); 3889fe6060f1SDimitry Andric MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL 3890fe6060f1SDimitry Andric : TargetOpcode::G_ROTR)); 3891fe6060f1SDimitry Andric MI.RemoveOperand(2); 3892fe6060f1SDimitry Andric Observer.changedInstr(MI); 3893fe6060f1SDimitry Andric } 3894fe6060f1SDimitry Andric 3895fe6060f1SDimitry Andric // Fold (rot x, c) -> (rot x, c % BitSize) 3896fe6060f1SDimitry Andric bool CombinerHelper::matchRotateOutOfRange(MachineInstr &MI) { 3897fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ROTL || 3898fe6060f1SDimitry Andric MI.getOpcode() == TargetOpcode::G_ROTR); 3899fe6060f1SDimitry Andric unsigned Bitsize = 3900fe6060f1SDimitry Andric MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits(); 3901fe6060f1SDimitry Andric Register AmtReg = MI.getOperand(2).getReg(); 3902fe6060f1SDimitry Andric bool OutOfRange = false; 3903fe6060f1SDimitry Andric auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) { 3904fe6060f1SDimitry Andric if (auto *CI = dyn_cast<ConstantInt>(C)) 3905fe6060f1SDimitry Andric OutOfRange |= CI->getValue().uge(Bitsize); 3906fe6060f1SDimitry Andric return true; 3907fe6060f1SDimitry Andric }; 3908fe6060f1SDimitry Andric return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange; 3909fe6060f1SDimitry Andric } 3910fe6060f1SDimitry Andric 3911fe6060f1SDimitry Andric void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) { 3912fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ROTL || 3913fe6060f1SDimitry Andric MI.getOpcode() == TargetOpcode::G_ROTR); 3914fe6060f1SDimitry Andric unsigned Bitsize = 3915fe6060f1SDimitry Andric MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits(); 3916fe6060f1SDimitry Andric Builder.setInstrAndDebugLoc(MI); 3917fe6060f1SDimitry Andric Register Amt = MI.getOperand(2).getReg(); 3918fe6060f1SDimitry Andric LLT AmtTy = MRI.getType(Amt); 3919fe6060f1SDimitry Andric auto Bits = Builder.buildConstant(AmtTy, Bitsize); 3920fe6060f1SDimitry Andric Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0); 3921fe6060f1SDimitry Andric Observer.changingInstr(MI); 3922fe6060f1SDimitry Andric MI.getOperand(2).setReg(Amt); 3923fe6060f1SDimitry Andric Observer.changedInstr(MI); 3924fe6060f1SDimitry Andric } 3925fe6060f1SDimitry Andric 3926fe6060f1SDimitry Andric bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI, 3927fe6060f1SDimitry Andric int64_t &MatchInfo) { 3928fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ICMP); 3929fe6060f1SDimitry Andric auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 3930fe6060f1SDimitry Andric auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg()); 3931fe6060f1SDimitry Andric auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg()); 3932fe6060f1SDimitry Andric Optional<bool> KnownVal; 3933fe6060f1SDimitry Andric switch (Pred) { 3934fe6060f1SDimitry Andric default: 3935fe6060f1SDimitry Andric llvm_unreachable("Unexpected G_ICMP predicate?"); 3936fe6060f1SDimitry Andric case CmpInst::ICMP_EQ: 3937fe6060f1SDimitry Andric KnownVal = KnownBits::eq(KnownLHS, KnownRHS); 3938fe6060f1SDimitry Andric break; 3939fe6060f1SDimitry Andric case CmpInst::ICMP_NE: 3940fe6060f1SDimitry Andric KnownVal = KnownBits::ne(KnownLHS, KnownRHS); 3941fe6060f1SDimitry Andric break; 3942fe6060f1SDimitry Andric case CmpInst::ICMP_SGE: 3943fe6060f1SDimitry Andric KnownVal = KnownBits::sge(KnownLHS, KnownRHS); 3944fe6060f1SDimitry Andric break; 3945fe6060f1SDimitry Andric case CmpInst::ICMP_SGT: 3946fe6060f1SDimitry Andric KnownVal = KnownBits::sgt(KnownLHS, KnownRHS); 3947fe6060f1SDimitry Andric break; 3948fe6060f1SDimitry Andric case CmpInst::ICMP_SLE: 3949fe6060f1SDimitry Andric KnownVal = KnownBits::sle(KnownLHS, KnownRHS); 3950fe6060f1SDimitry Andric break; 3951fe6060f1SDimitry Andric case CmpInst::ICMP_SLT: 3952fe6060f1SDimitry Andric KnownVal = KnownBits::slt(KnownLHS, KnownRHS); 3953fe6060f1SDimitry Andric break; 3954fe6060f1SDimitry Andric case CmpInst::ICMP_UGE: 3955fe6060f1SDimitry Andric KnownVal = KnownBits::uge(KnownLHS, KnownRHS); 3956fe6060f1SDimitry Andric break; 3957fe6060f1SDimitry Andric case CmpInst::ICMP_UGT: 3958fe6060f1SDimitry Andric KnownVal = KnownBits::ugt(KnownLHS, KnownRHS); 3959fe6060f1SDimitry Andric break; 3960fe6060f1SDimitry Andric case CmpInst::ICMP_ULE: 3961fe6060f1SDimitry Andric KnownVal = KnownBits::ule(KnownLHS, KnownRHS); 3962fe6060f1SDimitry Andric break; 3963fe6060f1SDimitry Andric case CmpInst::ICMP_ULT: 3964fe6060f1SDimitry Andric KnownVal = KnownBits::ult(KnownLHS, KnownRHS); 3965fe6060f1SDimitry Andric break; 3966fe6060f1SDimitry Andric } 3967fe6060f1SDimitry Andric if (!KnownVal) 3968fe6060f1SDimitry Andric return false; 3969fe6060f1SDimitry Andric MatchInfo = 3970fe6060f1SDimitry Andric *KnownVal 3971fe6060f1SDimitry Andric ? getICmpTrueVal(getTargetLowering(), 3972fe6060f1SDimitry Andric /*IsVector = */ 3973fe6060f1SDimitry Andric MRI.getType(MI.getOperand(0).getReg()).isVector(), 3974fe6060f1SDimitry Andric /* IsFP = */ false) 3975fe6060f1SDimitry Andric : 0; 3976fe6060f1SDimitry Andric return true; 3977fe6060f1SDimitry Andric } 3978fe6060f1SDimitry Andric 3979*349cc55cSDimitry Andric bool CombinerHelper::matchICmpToLHSKnownBits( 3980*349cc55cSDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 3981*349cc55cSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ICMP); 3982*349cc55cSDimitry Andric // Given: 3983*349cc55cSDimitry Andric // 3984*349cc55cSDimitry Andric // %x = G_WHATEVER (... x is known to be 0 or 1 ...) 3985*349cc55cSDimitry Andric // %cmp = G_ICMP ne %x, 0 3986*349cc55cSDimitry Andric // 3987*349cc55cSDimitry Andric // Or: 3988*349cc55cSDimitry Andric // 3989*349cc55cSDimitry Andric // %x = G_WHATEVER (... x is known to be 0 or 1 ...) 3990*349cc55cSDimitry Andric // %cmp = G_ICMP eq %x, 1 3991*349cc55cSDimitry Andric // 3992*349cc55cSDimitry Andric // We can replace %cmp with %x assuming true is 1 on the target. 3993*349cc55cSDimitry Andric auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 3994*349cc55cSDimitry Andric if (!CmpInst::isEquality(Pred)) 3995*349cc55cSDimitry Andric return false; 3996*349cc55cSDimitry Andric Register Dst = MI.getOperand(0).getReg(); 3997*349cc55cSDimitry Andric LLT DstTy = MRI.getType(Dst); 3998*349cc55cSDimitry Andric if (getICmpTrueVal(getTargetLowering(), DstTy.isVector(), 3999*349cc55cSDimitry Andric /* IsFP = */ false) != 1) 4000*349cc55cSDimitry Andric return false; 4001*349cc55cSDimitry Andric int64_t OneOrZero = Pred == CmpInst::ICMP_EQ; 4002*349cc55cSDimitry Andric if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero))) 4003*349cc55cSDimitry Andric return false; 4004*349cc55cSDimitry Andric Register LHS = MI.getOperand(2).getReg(); 4005*349cc55cSDimitry Andric auto KnownLHS = KB->getKnownBits(LHS); 4006*349cc55cSDimitry Andric if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1) 4007*349cc55cSDimitry Andric return false; 4008*349cc55cSDimitry Andric // Make sure replacing Dst with the LHS is a legal operation. 4009*349cc55cSDimitry Andric LLT LHSTy = MRI.getType(LHS); 4010*349cc55cSDimitry Andric unsigned LHSSize = LHSTy.getSizeInBits(); 4011*349cc55cSDimitry Andric unsigned DstSize = DstTy.getSizeInBits(); 4012*349cc55cSDimitry Andric unsigned Op = TargetOpcode::COPY; 4013*349cc55cSDimitry Andric if (DstSize != LHSSize) 4014*349cc55cSDimitry Andric Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT; 4015*349cc55cSDimitry Andric if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}})) 4016*349cc55cSDimitry Andric return false; 4017*349cc55cSDimitry Andric MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); }; 4018*349cc55cSDimitry Andric return true; 4019*349cc55cSDimitry Andric } 4020*349cc55cSDimitry Andric 4021*349cc55cSDimitry Andric // Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0 4022*349cc55cSDimitry Andric bool CombinerHelper::matchAndOrDisjointMask( 4023*349cc55cSDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 4024*349cc55cSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_AND); 4025*349cc55cSDimitry Andric 4026*349cc55cSDimitry Andric // Ignore vector types to simplify matching the two constants. 4027*349cc55cSDimitry Andric // TODO: do this for vectors and scalars via a demanded bits analysis. 4028*349cc55cSDimitry Andric LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 4029*349cc55cSDimitry Andric if (Ty.isVector()) 4030*349cc55cSDimitry Andric return false; 4031*349cc55cSDimitry Andric 4032*349cc55cSDimitry Andric Register Src; 4033*349cc55cSDimitry Andric int64_t MaskAnd; 4034*349cc55cSDimitry Andric int64_t MaskOr; 4035*349cc55cSDimitry Andric if (!mi_match(MI, MRI, 4036*349cc55cSDimitry Andric m_GAnd(m_GOr(m_Reg(Src), m_ICst(MaskOr)), m_ICst(MaskAnd)))) 4037*349cc55cSDimitry Andric return false; 4038*349cc55cSDimitry Andric 4039*349cc55cSDimitry Andric // Check if MaskOr could turn on any bits in Src. 4040*349cc55cSDimitry Andric if (MaskAnd & MaskOr) 4041*349cc55cSDimitry Andric return false; 4042*349cc55cSDimitry Andric 4043*349cc55cSDimitry Andric MatchInfo = [=, &MI](MachineIRBuilder &B) { 4044*349cc55cSDimitry Andric Observer.changingInstr(MI); 4045*349cc55cSDimitry Andric MI.getOperand(1).setReg(Src); 4046*349cc55cSDimitry Andric Observer.changedInstr(MI); 4047*349cc55cSDimitry Andric }; 4048*349cc55cSDimitry Andric return true; 4049*349cc55cSDimitry Andric } 4050*349cc55cSDimitry Andric 4051fe6060f1SDimitry Andric /// Form a G_SBFX from a G_SEXT_INREG fed by a right shift. 4052fe6060f1SDimitry Andric bool CombinerHelper::matchBitfieldExtractFromSExtInReg( 4053fe6060f1SDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 4054fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 4055fe6060f1SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 4056fe6060f1SDimitry Andric Register Src = MI.getOperand(1).getReg(); 4057fe6060f1SDimitry Andric LLT Ty = MRI.getType(Src); 4058fe6060f1SDimitry Andric LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); 4059fe6060f1SDimitry Andric if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}})) 4060fe6060f1SDimitry Andric return false; 4061fe6060f1SDimitry Andric int64_t Width = MI.getOperand(2).getImm(); 4062fe6060f1SDimitry Andric Register ShiftSrc; 4063fe6060f1SDimitry Andric int64_t ShiftImm; 4064fe6060f1SDimitry Andric if (!mi_match( 4065fe6060f1SDimitry Andric Src, MRI, 4066fe6060f1SDimitry Andric m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)), 4067fe6060f1SDimitry Andric m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)))))) 4068fe6060f1SDimitry Andric return false; 4069fe6060f1SDimitry Andric if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits()) 4070fe6060f1SDimitry Andric return false; 4071fe6060f1SDimitry Andric 4072fe6060f1SDimitry Andric MatchInfo = [=](MachineIRBuilder &B) { 4073fe6060f1SDimitry Andric auto Cst1 = B.buildConstant(ExtractTy, ShiftImm); 4074fe6060f1SDimitry Andric auto Cst2 = B.buildConstant(ExtractTy, Width); 4075fe6060f1SDimitry Andric B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2); 4076fe6060f1SDimitry Andric }; 4077fe6060f1SDimitry Andric return true; 4078fe6060f1SDimitry Andric } 4079fe6060f1SDimitry Andric 4080fe6060f1SDimitry Andric /// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants. 4081fe6060f1SDimitry Andric bool CombinerHelper::matchBitfieldExtractFromAnd( 4082fe6060f1SDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 4083fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_AND); 4084fe6060f1SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 4085fe6060f1SDimitry Andric LLT Ty = MRI.getType(Dst); 4086fe6060f1SDimitry Andric if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal( 4087fe6060f1SDimitry Andric TargetOpcode::G_UBFX, Ty, Ty)) 4088fe6060f1SDimitry Andric return false; 4089fe6060f1SDimitry Andric 4090fe6060f1SDimitry Andric int64_t AndImm, LSBImm; 4091fe6060f1SDimitry Andric Register ShiftSrc; 4092fe6060f1SDimitry Andric const unsigned Size = Ty.getScalarSizeInBits(); 4093fe6060f1SDimitry Andric if (!mi_match(MI.getOperand(0).getReg(), MRI, 4094fe6060f1SDimitry Andric m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))), 4095fe6060f1SDimitry Andric m_ICst(AndImm)))) 4096fe6060f1SDimitry Andric return false; 4097fe6060f1SDimitry Andric 4098fe6060f1SDimitry Andric // The mask is a mask of the low bits iff imm & (imm+1) == 0. 4099fe6060f1SDimitry Andric auto MaybeMask = static_cast<uint64_t>(AndImm); 4100fe6060f1SDimitry Andric if (MaybeMask & (MaybeMask + 1)) 4101fe6060f1SDimitry Andric return false; 4102fe6060f1SDimitry Andric 4103fe6060f1SDimitry Andric // LSB must fit within the register. 4104fe6060f1SDimitry Andric if (static_cast<uint64_t>(LSBImm) >= Size) 4105fe6060f1SDimitry Andric return false; 4106fe6060f1SDimitry Andric 4107fe6060f1SDimitry Andric LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); 4108fe6060f1SDimitry Andric uint64_t Width = APInt(Size, AndImm).countTrailingOnes(); 4109fe6060f1SDimitry Andric MatchInfo = [=](MachineIRBuilder &B) { 4110fe6060f1SDimitry Andric auto WidthCst = B.buildConstant(ExtractTy, Width); 4111fe6060f1SDimitry Andric auto LSBCst = B.buildConstant(ExtractTy, LSBImm); 4112fe6060f1SDimitry Andric B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst}); 4113fe6060f1SDimitry Andric }; 4114fe6060f1SDimitry Andric return true; 4115fe6060f1SDimitry Andric } 4116fe6060f1SDimitry Andric 4117*349cc55cSDimitry Andric bool CombinerHelper::matchBitfieldExtractFromShr( 4118*349cc55cSDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 4119*349cc55cSDimitry Andric const unsigned Opcode = MI.getOpcode(); 4120*349cc55cSDimitry Andric assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR); 4121*349cc55cSDimitry Andric 4122*349cc55cSDimitry Andric const Register Dst = MI.getOperand(0).getReg(); 4123*349cc55cSDimitry Andric 4124*349cc55cSDimitry Andric const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR 4125*349cc55cSDimitry Andric ? TargetOpcode::G_SBFX 4126*349cc55cSDimitry Andric : TargetOpcode::G_UBFX; 4127*349cc55cSDimitry Andric 4128*349cc55cSDimitry Andric // Check if the type we would use for the extract is legal 4129*349cc55cSDimitry Andric LLT Ty = MRI.getType(Dst); 4130*349cc55cSDimitry Andric LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); 4131*349cc55cSDimitry Andric if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}})) 4132*349cc55cSDimitry Andric return false; 4133*349cc55cSDimitry Andric 4134*349cc55cSDimitry Andric Register ShlSrc; 4135*349cc55cSDimitry Andric int64_t ShrAmt; 4136*349cc55cSDimitry Andric int64_t ShlAmt; 4137*349cc55cSDimitry Andric const unsigned Size = Ty.getScalarSizeInBits(); 4138*349cc55cSDimitry Andric 4139*349cc55cSDimitry Andric // Try to match shr (shl x, c1), c2 4140*349cc55cSDimitry Andric if (!mi_match(Dst, MRI, 4141*349cc55cSDimitry Andric m_BinOp(Opcode, 4142*349cc55cSDimitry Andric m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))), 4143*349cc55cSDimitry Andric m_ICst(ShrAmt)))) 4144*349cc55cSDimitry Andric return false; 4145*349cc55cSDimitry Andric 4146*349cc55cSDimitry Andric // Make sure that the shift sizes can fit a bitfield extract 4147*349cc55cSDimitry Andric if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size) 4148*349cc55cSDimitry Andric return false; 4149*349cc55cSDimitry Andric 4150*349cc55cSDimitry Andric // Skip this combine if the G_SEXT_INREG combine could handle it 4151*349cc55cSDimitry Andric if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt) 4152*349cc55cSDimitry Andric return false; 4153*349cc55cSDimitry Andric 4154*349cc55cSDimitry Andric // Calculate start position and width of the extract 4155*349cc55cSDimitry Andric const int64_t Pos = ShrAmt - ShlAmt; 4156*349cc55cSDimitry Andric const int64_t Width = Size - ShrAmt; 4157*349cc55cSDimitry Andric 4158*349cc55cSDimitry Andric MatchInfo = [=](MachineIRBuilder &B) { 4159*349cc55cSDimitry Andric auto WidthCst = B.buildConstant(ExtractTy, Width); 4160*349cc55cSDimitry Andric auto PosCst = B.buildConstant(ExtractTy, Pos); 4161*349cc55cSDimitry Andric B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst}); 4162*349cc55cSDimitry Andric }; 4163*349cc55cSDimitry Andric return true; 4164*349cc55cSDimitry Andric } 4165*349cc55cSDimitry Andric 4166*349cc55cSDimitry Andric bool CombinerHelper::matchBitfieldExtractFromShrAnd( 4167*349cc55cSDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 4168*349cc55cSDimitry Andric const unsigned Opcode = MI.getOpcode(); 4169*349cc55cSDimitry Andric assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR); 4170*349cc55cSDimitry Andric 4171*349cc55cSDimitry Andric const Register Dst = MI.getOperand(0).getReg(); 4172*349cc55cSDimitry Andric LLT Ty = MRI.getType(Dst); 4173*349cc55cSDimitry Andric if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal( 4174*349cc55cSDimitry Andric TargetOpcode::G_UBFX, Ty, Ty)) 4175*349cc55cSDimitry Andric return false; 4176*349cc55cSDimitry Andric 4177*349cc55cSDimitry Andric // Try to match shr (and x, c1), c2 4178*349cc55cSDimitry Andric Register AndSrc; 4179*349cc55cSDimitry Andric int64_t ShrAmt; 4180*349cc55cSDimitry Andric int64_t SMask; 4181*349cc55cSDimitry Andric if (!mi_match(Dst, MRI, 4182*349cc55cSDimitry Andric m_BinOp(Opcode, 4183*349cc55cSDimitry Andric m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))), 4184*349cc55cSDimitry Andric m_ICst(ShrAmt)))) 4185*349cc55cSDimitry Andric return false; 4186*349cc55cSDimitry Andric 4187*349cc55cSDimitry Andric const unsigned Size = Ty.getScalarSizeInBits(); 4188*349cc55cSDimitry Andric if (ShrAmt < 0 || ShrAmt >= Size) 4189*349cc55cSDimitry Andric return false; 4190*349cc55cSDimitry Andric 4191*349cc55cSDimitry Andric // Check that ubfx can do the extraction, with no holes in the mask. 4192*349cc55cSDimitry Andric uint64_t UMask = SMask; 4193*349cc55cSDimitry Andric UMask |= maskTrailingOnes<uint64_t>(ShrAmt); 4194*349cc55cSDimitry Andric UMask &= maskTrailingOnes<uint64_t>(Size); 4195*349cc55cSDimitry Andric if (!isMask_64(UMask)) 4196*349cc55cSDimitry Andric return false; 4197*349cc55cSDimitry Andric 4198*349cc55cSDimitry Andric // Calculate start position and width of the extract. 4199*349cc55cSDimitry Andric const int64_t Pos = ShrAmt; 4200*349cc55cSDimitry Andric const int64_t Width = countTrailingOnes(UMask) - ShrAmt; 4201*349cc55cSDimitry Andric 4202*349cc55cSDimitry Andric // It's preferable to keep the shift, rather than form G_SBFX. 4203*349cc55cSDimitry Andric // TODO: remove the G_AND via demanded bits analysis. 4204*349cc55cSDimitry Andric if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size) 4205*349cc55cSDimitry Andric return false; 4206*349cc55cSDimitry Andric 4207*349cc55cSDimitry Andric MatchInfo = [=](MachineIRBuilder &B) { 4208*349cc55cSDimitry Andric auto WidthCst = B.buildConstant(Ty, Width); 4209*349cc55cSDimitry Andric auto PosCst = B.buildConstant(Ty, Pos); 4210*349cc55cSDimitry Andric B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst}); 4211*349cc55cSDimitry Andric }; 4212*349cc55cSDimitry Andric return true; 4213*349cc55cSDimitry Andric } 4214*349cc55cSDimitry Andric 4215fe6060f1SDimitry Andric bool CombinerHelper::reassociationCanBreakAddressingModePattern( 4216fe6060f1SDimitry Andric MachineInstr &PtrAdd) { 4217fe6060f1SDimitry Andric assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD); 4218fe6060f1SDimitry Andric 4219fe6060f1SDimitry Andric Register Src1Reg = PtrAdd.getOperand(1).getReg(); 4220fe6060f1SDimitry Andric MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI); 4221fe6060f1SDimitry Andric if (!Src1Def) 4222fe6060f1SDimitry Andric return false; 4223fe6060f1SDimitry Andric 4224fe6060f1SDimitry Andric Register Src2Reg = PtrAdd.getOperand(2).getReg(); 4225fe6060f1SDimitry Andric 4226fe6060f1SDimitry Andric if (MRI.hasOneNonDBGUse(Src1Reg)) 4227fe6060f1SDimitry Andric return false; 4228fe6060f1SDimitry Andric 4229*349cc55cSDimitry Andric auto C1 = getIConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI); 4230fe6060f1SDimitry Andric if (!C1) 4231fe6060f1SDimitry Andric return false; 4232*349cc55cSDimitry Andric auto C2 = getIConstantVRegVal(Src2Reg, MRI); 4233fe6060f1SDimitry Andric if (!C2) 4234fe6060f1SDimitry Andric return false; 4235fe6060f1SDimitry Andric 4236fe6060f1SDimitry Andric const APInt &C1APIntVal = *C1; 4237fe6060f1SDimitry Andric const APInt &C2APIntVal = *C2; 4238fe6060f1SDimitry Andric const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue(); 4239fe6060f1SDimitry Andric 4240fe6060f1SDimitry Andric for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) { 4241fe6060f1SDimitry Andric // This combine may end up running before ptrtoint/inttoptr combines 4242fe6060f1SDimitry Andric // manage to eliminate redundant conversions, so try to look through them. 4243fe6060f1SDimitry Andric MachineInstr *ConvUseMI = &UseMI; 4244fe6060f1SDimitry Andric unsigned ConvUseOpc = ConvUseMI->getOpcode(); 4245fe6060f1SDimitry Andric while (ConvUseOpc == TargetOpcode::G_INTTOPTR || 4246fe6060f1SDimitry Andric ConvUseOpc == TargetOpcode::G_PTRTOINT) { 4247fe6060f1SDimitry Andric Register DefReg = ConvUseMI->getOperand(0).getReg(); 4248fe6060f1SDimitry Andric if (!MRI.hasOneNonDBGUse(DefReg)) 4249fe6060f1SDimitry Andric break; 4250fe6060f1SDimitry Andric ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg); 4251fe6060f1SDimitry Andric ConvUseOpc = ConvUseMI->getOpcode(); 4252fe6060f1SDimitry Andric } 4253fe6060f1SDimitry Andric auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD || 4254fe6060f1SDimitry Andric ConvUseOpc == TargetOpcode::G_STORE; 4255fe6060f1SDimitry Andric if (!LoadStore) 4256fe6060f1SDimitry Andric continue; 4257fe6060f1SDimitry Andric // Is x[offset2] already not a legal addressing mode? If so then 4258fe6060f1SDimitry Andric // reassociating the constants breaks nothing (we test offset2 because 4259fe6060f1SDimitry Andric // that's the one we hope to fold into the load or store). 4260fe6060f1SDimitry Andric TargetLoweringBase::AddrMode AM; 4261fe6060f1SDimitry Andric AM.HasBaseReg = true; 4262fe6060f1SDimitry Andric AM.BaseOffs = C2APIntVal.getSExtValue(); 4263fe6060f1SDimitry Andric unsigned AS = 4264fe6060f1SDimitry Andric MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace(); 4265fe6060f1SDimitry Andric Type *AccessTy = 4266fe6060f1SDimitry Andric getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()), 4267fe6060f1SDimitry Andric PtrAdd.getMF()->getFunction().getContext()); 4268fe6060f1SDimitry Andric const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering(); 4269fe6060f1SDimitry Andric if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM, 4270fe6060f1SDimitry Andric AccessTy, AS)) 4271fe6060f1SDimitry Andric continue; 4272fe6060f1SDimitry Andric 4273fe6060f1SDimitry Andric // Would x[offset1+offset2] still be a legal addressing mode? 4274fe6060f1SDimitry Andric AM.BaseOffs = CombinedValue; 4275fe6060f1SDimitry Andric if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM, 4276fe6060f1SDimitry Andric AccessTy, AS)) 4277fe6060f1SDimitry Andric return true; 4278fe6060f1SDimitry Andric } 4279fe6060f1SDimitry Andric 4280fe6060f1SDimitry Andric return false; 4281fe6060f1SDimitry Andric } 4282fe6060f1SDimitry Andric 4283*349cc55cSDimitry Andric bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI, 4284*349cc55cSDimitry Andric MachineInstr *RHS, 4285*349cc55cSDimitry Andric BuildFnTy &MatchInfo) { 4286fe6060f1SDimitry Andric // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C) 4287fe6060f1SDimitry Andric Register Src1Reg = MI.getOperand(1).getReg(); 4288fe6060f1SDimitry Andric if (RHS->getOpcode() != TargetOpcode::G_ADD) 4289fe6060f1SDimitry Andric return false; 4290*349cc55cSDimitry Andric auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI); 4291fe6060f1SDimitry Andric if (!C2) 4292fe6060f1SDimitry Andric return false; 4293fe6060f1SDimitry Andric 4294fe6060f1SDimitry Andric MatchInfo = [=, &MI](MachineIRBuilder &B) { 4295fe6060f1SDimitry Andric LLT PtrTy = MRI.getType(MI.getOperand(0).getReg()); 4296fe6060f1SDimitry Andric 4297fe6060f1SDimitry Andric auto NewBase = 4298fe6060f1SDimitry Andric Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg()); 4299fe6060f1SDimitry Andric Observer.changingInstr(MI); 4300fe6060f1SDimitry Andric MI.getOperand(1).setReg(NewBase.getReg(0)); 4301fe6060f1SDimitry Andric MI.getOperand(2).setReg(RHS->getOperand(2).getReg()); 4302fe6060f1SDimitry Andric Observer.changedInstr(MI); 4303fe6060f1SDimitry Andric }; 4304*349cc55cSDimitry Andric return !reassociationCanBreakAddressingModePattern(MI); 4305*349cc55cSDimitry Andric } 4306*349cc55cSDimitry Andric 4307*349cc55cSDimitry Andric bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI, 4308*349cc55cSDimitry Andric MachineInstr *LHS, 4309*349cc55cSDimitry Andric MachineInstr *RHS, 4310*349cc55cSDimitry Andric BuildFnTy &MatchInfo) { 4311*349cc55cSDimitry Andric // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C) 4312*349cc55cSDimitry Andric // if and only if (G_PTR_ADD X, C) has one use. 4313*349cc55cSDimitry Andric Register LHSBase; 4314*349cc55cSDimitry Andric Optional<ValueAndVReg> LHSCstOff; 4315*349cc55cSDimitry Andric if (!mi_match(MI.getBaseReg(), MRI, 4316*349cc55cSDimitry Andric m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff))))) 4317*349cc55cSDimitry Andric return false; 4318*349cc55cSDimitry Andric 4319*349cc55cSDimitry Andric auto *LHSPtrAdd = cast<GPtrAdd>(LHS); 4320*349cc55cSDimitry Andric MatchInfo = [=, &MI](MachineIRBuilder &B) { 4321*349cc55cSDimitry Andric // When we change LHSPtrAdd's offset register we might cause it to use a reg 4322*349cc55cSDimitry Andric // before its def. Sink the instruction so the outer PTR_ADD to ensure this 4323*349cc55cSDimitry Andric // doesn't happen. 4324*349cc55cSDimitry Andric LHSPtrAdd->moveBefore(&MI); 4325*349cc55cSDimitry Andric Register RHSReg = MI.getOffsetReg(); 4326*349cc55cSDimitry Andric Observer.changingInstr(MI); 4327*349cc55cSDimitry Andric MI.getOperand(2).setReg(LHSCstOff->VReg); 4328*349cc55cSDimitry Andric Observer.changedInstr(MI); 4329*349cc55cSDimitry Andric Observer.changingInstr(*LHSPtrAdd); 4330*349cc55cSDimitry Andric LHSPtrAdd->getOperand(2).setReg(RHSReg); 4331*349cc55cSDimitry Andric Observer.changedInstr(*LHSPtrAdd); 4332*349cc55cSDimitry Andric }; 4333*349cc55cSDimitry Andric return !reassociationCanBreakAddressingModePattern(MI); 4334*349cc55cSDimitry Andric } 4335*349cc55cSDimitry Andric 4336*349cc55cSDimitry Andric bool CombinerHelper::matchReassocFoldConstantsInSubTree(GPtrAdd &MI, 4337*349cc55cSDimitry Andric MachineInstr *LHS, 4338*349cc55cSDimitry Andric MachineInstr *RHS, 4339*349cc55cSDimitry Andric BuildFnTy &MatchInfo) { 4340*349cc55cSDimitry Andric // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2) 4341*349cc55cSDimitry Andric auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS); 4342*349cc55cSDimitry Andric if (!LHSPtrAdd) 4343*349cc55cSDimitry Andric return false; 4344*349cc55cSDimitry Andric 4345*349cc55cSDimitry Andric Register Src2Reg = MI.getOperand(2).getReg(); 4346*349cc55cSDimitry Andric Register LHSSrc1 = LHSPtrAdd->getBaseReg(); 4347*349cc55cSDimitry Andric Register LHSSrc2 = LHSPtrAdd->getOffsetReg(); 4348*349cc55cSDimitry Andric auto C1 = getIConstantVRegVal(LHSSrc2, MRI); 4349fe6060f1SDimitry Andric if (!C1) 4350fe6060f1SDimitry Andric return false; 4351*349cc55cSDimitry Andric auto C2 = getIConstantVRegVal(Src2Reg, MRI); 4352fe6060f1SDimitry Andric if (!C2) 4353fe6060f1SDimitry Andric return false; 4354fe6060f1SDimitry Andric 4355fe6060f1SDimitry Andric MatchInfo = [=, &MI](MachineIRBuilder &B) { 4356fe6060f1SDimitry Andric auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2); 4357fe6060f1SDimitry Andric Observer.changingInstr(MI); 4358fe6060f1SDimitry Andric MI.getOperand(1).setReg(LHSSrc1); 4359fe6060f1SDimitry Andric MI.getOperand(2).setReg(NewCst.getReg(0)); 4360fe6060f1SDimitry Andric Observer.changedInstr(MI); 4361fe6060f1SDimitry Andric }; 4362fe6060f1SDimitry Andric return !reassociationCanBreakAddressingModePattern(MI); 4363fe6060f1SDimitry Andric } 4364fe6060f1SDimitry Andric 4365*349cc55cSDimitry Andric bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI, 4366*349cc55cSDimitry Andric BuildFnTy &MatchInfo) { 4367*349cc55cSDimitry Andric auto &PtrAdd = cast<GPtrAdd>(MI); 4368*349cc55cSDimitry Andric // We're trying to match a few pointer computation patterns here for 4369*349cc55cSDimitry Andric // re-association opportunities. 4370*349cc55cSDimitry Andric // 1) Isolating a constant operand to be on the RHS, e.g.: 4371*349cc55cSDimitry Andric // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C) 4372*349cc55cSDimitry Andric // 4373*349cc55cSDimitry Andric // 2) Folding two constants in each sub-tree as long as such folding 4374*349cc55cSDimitry Andric // doesn't break a legal addressing mode. 4375*349cc55cSDimitry Andric // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2) 4376*349cc55cSDimitry Andric // 4377*349cc55cSDimitry Andric // 3) Move a constant from the LHS of an inner op to the RHS of the outer. 4378*349cc55cSDimitry Andric // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C) 4379*349cc55cSDimitry Andric // iif (G_PTR_ADD X, C) has one use. 4380*349cc55cSDimitry Andric MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg()); 4381*349cc55cSDimitry Andric MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg()); 4382*349cc55cSDimitry Andric 4383*349cc55cSDimitry Andric // Try to match example 2. 4384*349cc55cSDimitry Andric if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo)) 4385*349cc55cSDimitry Andric return true; 4386*349cc55cSDimitry Andric 4387*349cc55cSDimitry Andric // Try to match example 3. 4388*349cc55cSDimitry Andric if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo)) 4389*349cc55cSDimitry Andric return true; 4390*349cc55cSDimitry Andric 4391*349cc55cSDimitry Andric // Try to match example 1. 4392*349cc55cSDimitry Andric if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo)) 4393*349cc55cSDimitry Andric return true; 4394*349cc55cSDimitry Andric 4395*349cc55cSDimitry Andric return false; 4396*349cc55cSDimitry Andric } 4397*349cc55cSDimitry Andric 4398fe6060f1SDimitry Andric bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) { 4399fe6060f1SDimitry Andric Register Op1 = MI.getOperand(1).getReg(); 4400fe6060f1SDimitry Andric Register Op2 = MI.getOperand(2).getReg(); 4401fe6060f1SDimitry Andric auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI); 4402fe6060f1SDimitry Andric if (!MaybeCst) 4403fe6060f1SDimitry Andric return false; 4404fe6060f1SDimitry Andric MatchInfo = *MaybeCst; 4405e8d8bef9SDimitry Andric return true; 4406e8d8bef9SDimitry Andric } 4407e8d8bef9SDimitry Andric 4408*349cc55cSDimitry Andric bool CombinerHelper::matchNarrowBinopFeedingAnd( 4409*349cc55cSDimitry Andric MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { 4410*349cc55cSDimitry Andric // Look for a binop feeding into an AND with a mask: 4411*349cc55cSDimitry Andric // 4412*349cc55cSDimitry Andric // %add = G_ADD %lhs, %rhs 4413*349cc55cSDimitry Andric // %and = G_AND %add, 000...11111111 4414*349cc55cSDimitry Andric // 4415*349cc55cSDimitry Andric // Check if it's possible to perform the binop at a narrower width and zext 4416*349cc55cSDimitry Andric // back to the original width like so: 4417*349cc55cSDimitry Andric // 4418*349cc55cSDimitry Andric // %narrow_lhs = G_TRUNC %lhs 4419*349cc55cSDimitry Andric // %narrow_rhs = G_TRUNC %rhs 4420*349cc55cSDimitry Andric // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs 4421*349cc55cSDimitry Andric // %new_add = G_ZEXT %narrow_add 4422*349cc55cSDimitry Andric // %and = G_AND %new_add, 000...11111111 4423*349cc55cSDimitry Andric // 4424*349cc55cSDimitry Andric // This can allow later combines to eliminate the G_AND if it turns out 4425*349cc55cSDimitry Andric // that the mask is irrelevant. 4426*349cc55cSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_AND); 4427*349cc55cSDimitry Andric Register Dst = MI.getOperand(0).getReg(); 4428*349cc55cSDimitry Andric Register AndLHS = MI.getOperand(1).getReg(); 4429*349cc55cSDimitry Andric Register AndRHS = MI.getOperand(2).getReg(); 4430*349cc55cSDimitry Andric LLT WideTy = MRI.getType(Dst); 4431*349cc55cSDimitry Andric 4432*349cc55cSDimitry Andric // If the potential binop has more than one use, then it's possible that one 4433*349cc55cSDimitry Andric // of those uses will need its full width. 4434*349cc55cSDimitry Andric if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS)) 4435*349cc55cSDimitry Andric return false; 4436*349cc55cSDimitry Andric 4437*349cc55cSDimitry Andric // Check if the LHS feeding the AND is impacted by the high bits that we're 4438*349cc55cSDimitry Andric // masking out. 4439*349cc55cSDimitry Andric // 4440*349cc55cSDimitry Andric // e.g. for 64-bit x, y: 4441*349cc55cSDimitry Andric // 4442*349cc55cSDimitry Andric // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535 4443*349cc55cSDimitry Andric MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI); 4444*349cc55cSDimitry Andric if (!LHSInst) 4445*349cc55cSDimitry Andric return false; 4446*349cc55cSDimitry Andric unsigned LHSOpc = LHSInst->getOpcode(); 4447*349cc55cSDimitry Andric switch (LHSOpc) { 4448*349cc55cSDimitry Andric default: 4449*349cc55cSDimitry Andric return false; 4450*349cc55cSDimitry Andric case TargetOpcode::G_ADD: 4451*349cc55cSDimitry Andric case TargetOpcode::G_SUB: 4452*349cc55cSDimitry Andric case TargetOpcode::G_MUL: 4453*349cc55cSDimitry Andric case TargetOpcode::G_AND: 4454*349cc55cSDimitry Andric case TargetOpcode::G_OR: 4455*349cc55cSDimitry Andric case TargetOpcode::G_XOR: 4456*349cc55cSDimitry Andric break; 4457*349cc55cSDimitry Andric } 4458*349cc55cSDimitry Andric 4459*349cc55cSDimitry Andric // Find the mask on the RHS. 4460*349cc55cSDimitry Andric auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI); 4461*349cc55cSDimitry Andric if (!Cst) 4462*349cc55cSDimitry Andric return false; 4463*349cc55cSDimitry Andric auto Mask = Cst->Value; 4464*349cc55cSDimitry Andric if (!Mask.isMask()) 4465*349cc55cSDimitry Andric return false; 4466*349cc55cSDimitry Andric 4467*349cc55cSDimitry Andric // No point in combining if there's nothing to truncate. 4468*349cc55cSDimitry Andric unsigned NarrowWidth = Mask.countTrailingOnes(); 4469*349cc55cSDimitry Andric if (NarrowWidth == WideTy.getSizeInBits()) 4470*349cc55cSDimitry Andric return false; 4471*349cc55cSDimitry Andric LLT NarrowTy = LLT::scalar(NarrowWidth); 4472*349cc55cSDimitry Andric 4473*349cc55cSDimitry Andric // Check if adding the zext + truncates could be harmful. 4474*349cc55cSDimitry Andric auto &MF = *MI.getMF(); 4475*349cc55cSDimitry Andric const auto &TLI = getTargetLowering(); 4476*349cc55cSDimitry Andric LLVMContext &Ctx = MF.getFunction().getContext(); 4477*349cc55cSDimitry Andric auto &DL = MF.getDataLayout(); 4478*349cc55cSDimitry Andric if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) || 4479*349cc55cSDimitry Andric !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx)) 4480*349cc55cSDimitry Andric return false; 4481*349cc55cSDimitry Andric if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) || 4482*349cc55cSDimitry Andric !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}})) 4483*349cc55cSDimitry Andric return false; 4484*349cc55cSDimitry Andric Register BinOpLHS = LHSInst->getOperand(1).getReg(); 4485*349cc55cSDimitry Andric Register BinOpRHS = LHSInst->getOperand(2).getReg(); 4486*349cc55cSDimitry Andric MatchInfo = [=, &MI](MachineIRBuilder &B) { 4487*349cc55cSDimitry Andric auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS); 4488*349cc55cSDimitry Andric auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS); 4489*349cc55cSDimitry Andric auto NarrowBinOp = 4490*349cc55cSDimitry Andric Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS}); 4491*349cc55cSDimitry Andric auto Ext = Builder.buildZExt(WideTy, NarrowBinOp); 4492*349cc55cSDimitry Andric Observer.changingInstr(MI); 4493*349cc55cSDimitry Andric MI.getOperand(1).setReg(Ext.getReg(0)); 4494*349cc55cSDimitry Andric Observer.changedInstr(MI); 4495*349cc55cSDimitry Andric }; 4496*349cc55cSDimitry Andric return true; 4497*349cc55cSDimitry Andric } 4498*349cc55cSDimitry Andric 4499*349cc55cSDimitry Andric bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) { 4500*349cc55cSDimitry Andric unsigned Opc = MI.getOpcode(); 4501*349cc55cSDimitry Andric assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO); 4502*349cc55cSDimitry Andric // Check for a constant 2 or a splat of 2 on the RHS. 4503*349cc55cSDimitry Andric auto RHS = MI.getOperand(3).getReg(); 4504*349cc55cSDimitry Andric bool IsVector = MRI.getType(RHS).isVector(); 4505*349cc55cSDimitry Andric if (!IsVector && !mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(2))) 4506*349cc55cSDimitry Andric return false; 4507*349cc55cSDimitry Andric if (IsVector) { 4508*349cc55cSDimitry Andric // FIXME: There's no mi_match pattern for this yet. 4509*349cc55cSDimitry Andric auto *RHSDef = getDefIgnoringCopies(RHS, MRI); 4510*349cc55cSDimitry Andric if (!RHSDef) 4511*349cc55cSDimitry Andric return false; 4512*349cc55cSDimitry Andric auto Splat = getBuildVectorConstantSplat(*RHSDef, MRI); 4513*349cc55cSDimitry Andric if (!Splat || *Splat != 2) 4514*349cc55cSDimitry Andric return false; 4515*349cc55cSDimitry Andric } 4516*349cc55cSDimitry Andric 4517*349cc55cSDimitry Andric MatchInfo = [=, &MI](MachineIRBuilder &B) { 4518*349cc55cSDimitry Andric Observer.changingInstr(MI); 4519*349cc55cSDimitry Andric unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO 4520*349cc55cSDimitry Andric : TargetOpcode::G_SADDO; 4521*349cc55cSDimitry Andric MI.setDesc(Builder.getTII().get(NewOpc)); 4522*349cc55cSDimitry Andric MI.getOperand(3).setReg(MI.getOperand(2).getReg()); 4523*349cc55cSDimitry Andric Observer.changedInstr(MI); 4524*349cc55cSDimitry Andric }; 4525*349cc55cSDimitry Andric return true; 4526*349cc55cSDimitry Andric } 4527*349cc55cSDimitry Andric 4528*349cc55cSDimitry Andric MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { 4529*349cc55cSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UDIV); 4530*349cc55cSDimitry Andric auto &UDiv = cast<GenericMachineInstr>(MI); 4531*349cc55cSDimitry Andric Register Dst = UDiv.getReg(0); 4532*349cc55cSDimitry Andric Register LHS = UDiv.getReg(1); 4533*349cc55cSDimitry Andric Register RHS = UDiv.getReg(2); 4534*349cc55cSDimitry Andric LLT Ty = MRI.getType(Dst); 4535*349cc55cSDimitry Andric LLT ScalarTy = Ty.getScalarType(); 4536*349cc55cSDimitry Andric const unsigned EltBits = ScalarTy.getScalarSizeInBits(); 4537*349cc55cSDimitry Andric LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty); 4538*349cc55cSDimitry Andric LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType(); 4539*349cc55cSDimitry Andric auto &MIB = Builder; 4540*349cc55cSDimitry Andric MIB.setInstrAndDebugLoc(MI); 4541*349cc55cSDimitry Andric 4542*349cc55cSDimitry Andric bool UseNPQ = false; 4543*349cc55cSDimitry Andric SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors; 4544*349cc55cSDimitry Andric 4545*349cc55cSDimitry Andric auto BuildUDIVPattern = [&](const Constant *C) { 4546*349cc55cSDimitry Andric auto *CI = cast<ConstantInt>(C); 4547*349cc55cSDimitry Andric const APInt &Divisor = CI->getValue(); 4548*349cc55cSDimitry Andric UnsignedDivisonByConstantInfo magics = 4549*349cc55cSDimitry Andric UnsignedDivisonByConstantInfo::get(Divisor); 4550*349cc55cSDimitry Andric unsigned PreShift = 0, PostShift = 0; 4551*349cc55cSDimitry Andric 4552*349cc55cSDimitry Andric // If the divisor is even, we can avoid using the expensive fixup by 4553*349cc55cSDimitry Andric // shifting the divided value upfront. 4554*349cc55cSDimitry Andric if (magics.IsAdd != 0 && !Divisor[0]) { 4555*349cc55cSDimitry Andric PreShift = Divisor.countTrailingZeros(); 4556*349cc55cSDimitry Andric // Get magic number for the shifted divisor. 4557*349cc55cSDimitry Andric magics = 4558*349cc55cSDimitry Andric UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift); 4559*349cc55cSDimitry Andric assert(magics.IsAdd == 0 && "Should use cheap fixup now"); 4560*349cc55cSDimitry Andric } 4561*349cc55cSDimitry Andric 4562*349cc55cSDimitry Andric APInt Magic = magics.Magic; 4563*349cc55cSDimitry Andric 4564*349cc55cSDimitry Andric unsigned SelNPQ; 4565*349cc55cSDimitry Andric if (magics.IsAdd == 0 || Divisor.isOneValue()) { 4566*349cc55cSDimitry Andric assert(magics.ShiftAmount < Divisor.getBitWidth() && 4567*349cc55cSDimitry Andric "We shouldn't generate an undefined shift!"); 4568*349cc55cSDimitry Andric PostShift = magics.ShiftAmount; 4569*349cc55cSDimitry Andric SelNPQ = false; 4570*349cc55cSDimitry Andric } else { 4571*349cc55cSDimitry Andric PostShift = magics.ShiftAmount - 1; 4572*349cc55cSDimitry Andric SelNPQ = true; 4573*349cc55cSDimitry Andric } 4574*349cc55cSDimitry Andric 4575*349cc55cSDimitry Andric PreShifts.push_back( 4576*349cc55cSDimitry Andric MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0)); 4577*349cc55cSDimitry Andric MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0)); 4578*349cc55cSDimitry Andric NPQFactors.push_back( 4579*349cc55cSDimitry Andric MIB.buildConstant(ScalarTy, 4580*349cc55cSDimitry Andric SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1) 4581*349cc55cSDimitry Andric : APInt::getZero(EltBits)) 4582*349cc55cSDimitry Andric .getReg(0)); 4583*349cc55cSDimitry Andric PostShifts.push_back( 4584*349cc55cSDimitry Andric MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0)); 4585*349cc55cSDimitry Andric UseNPQ |= SelNPQ; 4586*349cc55cSDimitry Andric return true; 4587*349cc55cSDimitry Andric }; 4588*349cc55cSDimitry Andric 4589*349cc55cSDimitry Andric // Collect the shifts/magic values from each element. 4590*349cc55cSDimitry Andric bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern); 4591*349cc55cSDimitry Andric (void)Matched; 4592*349cc55cSDimitry Andric assert(Matched && "Expected unary predicate match to succeed"); 4593*349cc55cSDimitry Andric 4594*349cc55cSDimitry Andric Register PreShift, PostShift, MagicFactor, NPQFactor; 4595*349cc55cSDimitry Andric auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI); 4596*349cc55cSDimitry Andric if (RHSDef) { 4597*349cc55cSDimitry Andric PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0); 4598*349cc55cSDimitry Andric MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0); 4599*349cc55cSDimitry Andric NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0); 4600*349cc55cSDimitry Andric PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0); 4601*349cc55cSDimitry Andric } else { 4602*349cc55cSDimitry Andric assert(MRI.getType(RHS).isScalar() && 4603*349cc55cSDimitry Andric "Non-build_vector operation should have been a scalar"); 4604*349cc55cSDimitry Andric PreShift = PreShifts[0]; 4605*349cc55cSDimitry Andric MagicFactor = MagicFactors[0]; 4606*349cc55cSDimitry Andric PostShift = PostShifts[0]; 4607*349cc55cSDimitry Andric } 4608*349cc55cSDimitry Andric 4609*349cc55cSDimitry Andric Register Q = LHS; 4610*349cc55cSDimitry Andric Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0); 4611*349cc55cSDimitry Andric 4612*349cc55cSDimitry Andric // Multiply the numerator (operand 0) by the magic value. 4613*349cc55cSDimitry Andric Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0); 4614*349cc55cSDimitry Andric 4615*349cc55cSDimitry Andric if (UseNPQ) { 4616*349cc55cSDimitry Andric Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0); 4617*349cc55cSDimitry Andric 4618*349cc55cSDimitry Andric // For vectors we might have a mix of non-NPQ/NPQ paths, so use 4619*349cc55cSDimitry Andric // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero. 4620*349cc55cSDimitry Andric if (Ty.isVector()) 4621*349cc55cSDimitry Andric NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0); 4622*349cc55cSDimitry Andric else 4623*349cc55cSDimitry Andric NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0); 4624*349cc55cSDimitry Andric 4625*349cc55cSDimitry Andric Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0); 4626*349cc55cSDimitry Andric } 4627*349cc55cSDimitry Andric 4628*349cc55cSDimitry Andric Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0); 4629*349cc55cSDimitry Andric auto One = MIB.buildConstant(Ty, 1); 4630*349cc55cSDimitry Andric auto IsOne = MIB.buildICmp( 4631*349cc55cSDimitry Andric CmpInst::Predicate::ICMP_EQ, 4632*349cc55cSDimitry Andric Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One); 4633*349cc55cSDimitry Andric return MIB.buildSelect(Ty, IsOne, LHS, Q); 4634*349cc55cSDimitry Andric } 4635*349cc55cSDimitry Andric 4636*349cc55cSDimitry Andric bool CombinerHelper::matchUDivByConst(MachineInstr &MI) { 4637*349cc55cSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UDIV); 4638*349cc55cSDimitry Andric Register Dst = MI.getOperand(0).getReg(); 4639*349cc55cSDimitry Andric Register RHS = MI.getOperand(2).getReg(); 4640*349cc55cSDimitry Andric LLT DstTy = MRI.getType(Dst); 4641*349cc55cSDimitry Andric auto *RHSDef = MRI.getVRegDef(RHS); 4642*349cc55cSDimitry Andric if (!isConstantOrConstantVector(*RHSDef, MRI)) 4643*349cc55cSDimitry Andric return false; 4644*349cc55cSDimitry Andric 4645*349cc55cSDimitry Andric auto &MF = *MI.getMF(); 4646*349cc55cSDimitry Andric AttributeList Attr = MF.getFunction().getAttributes(); 4647*349cc55cSDimitry Andric const auto &TLI = getTargetLowering(); 4648*349cc55cSDimitry Andric LLVMContext &Ctx = MF.getFunction().getContext(); 4649*349cc55cSDimitry Andric auto &DL = MF.getDataLayout(); 4650*349cc55cSDimitry Andric if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr)) 4651*349cc55cSDimitry Andric return false; 4652*349cc55cSDimitry Andric 4653*349cc55cSDimitry Andric // Don't do this for minsize because the instruction sequence is usually 4654*349cc55cSDimitry Andric // larger. 4655*349cc55cSDimitry Andric if (MF.getFunction().hasMinSize()) 4656*349cc55cSDimitry Andric return false; 4657*349cc55cSDimitry Andric 4658*349cc55cSDimitry Andric // Don't do this if the types are not going to be legal. 4659*349cc55cSDimitry Andric if (LI) { 4660*349cc55cSDimitry Andric if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}})) 4661*349cc55cSDimitry Andric return false; 4662*349cc55cSDimitry Andric if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}})) 4663*349cc55cSDimitry Andric return false; 4664*349cc55cSDimitry Andric if (!isLegalOrBeforeLegalizer( 4665*349cc55cSDimitry Andric {TargetOpcode::G_ICMP, 4666*349cc55cSDimitry Andric {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1), 4667*349cc55cSDimitry Andric DstTy}})) 4668*349cc55cSDimitry Andric return false; 4669*349cc55cSDimitry Andric } 4670*349cc55cSDimitry Andric 4671*349cc55cSDimitry Andric auto CheckEltValue = [&](const Constant *C) { 4672*349cc55cSDimitry Andric if (auto *CI = dyn_cast_or_null<ConstantInt>(C)) 4673*349cc55cSDimitry Andric return !CI->isZero(); 4674*349cc55cSDimitry Andric return false; 4675*349cc55cSDimitry Andric }; 4676*349cc55cSDimitry Andric return matchUnaryPredicate(MRI, RHS, CheckEltValue); 4677*349cc55cSDimitry Andric } 4678*349cc55cSDimitry Andric 4679*349cc55cSDimitry Andric void CombinerHelper::applyUDivByConst(MachineInstr &MI) { 4680*349cc55cSDimitry Andric auto *NewMI = buildUDivUsingMul(MI); 4681*349cc55cSDimitry Andric replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg()); 4682*349cc55cSDimitry Andric } 4683*349cc55cSDimitry Andric 4684*349cc55cSDimitry Andric bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) { 4685*349cc55cSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_UMULH); 4686*349cc55cSDimitry Andric Register RHS = MI.getOperand(2).getReg(); 4687*349cc55cSDimitry Andric Register Dst = MI.getOperand(0).getReg(); 4688*349cc55cSDimitry Andric LLT Ty = MRI.getType(Dst); 4689*349cc55cSDimitry Andric LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty); 4690*349cc55cSDimitry Andric auto MatchPow2ExceptOne = [&](const Constant *C) { 4691*349cc55cSDimitry Andric if (auto *CI = dyn_cast<ConstantInt>(C)) 4692*349cc55cSDimitry Andric return CI->getValue().isPowerOf2() && !CI->getValue().isOne(); 4693*349cc55cSDimitry Andric return false; 4694*349cc55cSDimitry Andric }; 4695*349cc55cSDimitry Andric if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false)) 4696*349cc55cSDimitry Andric return false; 4697*349cc55cSDimitry Andric return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}); 4698*349cc55cSDimitry Andric } 4699*349cc55cSDimitry Andric 4700*349cc55cSDimitry Andric void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) { 4701*349cc55cSDimitry Andric Register LHS = MI.getOperand(1).getReg(); 4702*349cc55cSDimitry Andric Register RHS = MI.getOperand(2).getReg(); 4703*349cc55cSDimitry Andric Register Dst = MI.getOperand(0).getReg(); 4704*349cc55cSDimitry Andric LLT Ty = MRI.getType(Dst); 4705*349cc55cSDimitry Andric LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty); 4706*349cc55cSDimitry Andric unsigned NumEltBits = Ty.getScalarSizeInBits(); 4707*349cc55cSDimitry Andric 4708*349cc55cSDimitry Andric Builder.setInstrAndDebugLoc(MI); 4709*349cc55cSDimitry Andric auto LogBase2 = buildLogBase2(RHS, Builder); 4710*349cc55cSDimitry Andric auto ShiftAmt = 4711*349cc55cSDimitry Andric Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2); 4712*349cc55cSDimitry Andric auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt); 4713*349cc55cSDimitry Andric Builder.buildLShr(Dst, LHS, Trunc); 4714*349cc55cSDimitry Andric MI.eraseFromParent(); 4715*349cc55cSDimitry Andric } 4716*349cc55cSDimitry Andric 4717*349cc55cSDimitry Andric bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI, 4718*349cc55cSDimitry Andric BuildFnTy &MatchInfo) { 4719*349cc55cSDimitry Andric unsigned Opc = MI.getOpcode(); 4720*349cc55cSDimitry Andric assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB || 4721*349cc55cSDimitry Andric Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV || 4722*349cc55cSDimitry Andric Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA); 4723*349cc55cSDimitry Andric 4724*349cc55cSDimitry Andric Register Dst = MI.getOperand(0).getReg(); 4725*349cc55cSDimitry Andric Register X = MI.getOperand(1).getReg(); 4726*349cc55cSDimitry Andric Register Y = MI.getOperand(2).getReg(); 4727*349cc55cSDimitry Andric LLT Type = MRI.getType(Dst); 4728*349cc55cSDimitry Andric 4729*349cc55cSDimitry Andric // fold (fadd x, fneg(y)) -> (fsub x, y) 4730*349cc55cSDimitry Andric // fold (fadd fneg(y), x) -> (fsub x, y) 4731*349cc55cSDimitry Andric // G_ADD is commutative so both cases are checked by m_GFAdd 4732*349cc55cSDimitry Andric if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) && 4733*349cc55cSDimitry Andric isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) { 4734*349cc55cSDimitry Andric Opc = TargetOpcode::G_FSUB; 4735*349cc55cSDimitry Andric } 4736*349cc55cSDimitry Andric /// fold (fsub x, fneg(y)) -> (fadd x, y) 4737*349cc55cSDimitry Andric else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) && 4738*349cc55cSDimitry Andric isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) { 4739*349cc55cSDimitry Andric Opc = TargetOpcode::G_FADD; 4740*349cc55cSDimitry Andric } 4741*349cc55cSDimitry Andric // fold (fmul fneg(x), fneg(y)) -> (fmul x, y) 4742*349cc55cSDimitry Andric // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y) 4743*349cc55cSDimitry Andric // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z) 4744*349cc55cSDimitry Andric // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z) 4745*349cc55cSDimitry Andric else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV || 4746*349cc55cSDimitry Andric Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) && 4747*349cc55cSDimitry Andric mi_match(X, MRI, m_GFNeg(m_Reg(X))) && 4748*349cc55cSDimitry Andric mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) { 4749*349cc55cSDimitry Andric // no opcode change 4750*349cc55cSDimitry Andric } else 4751*349cc55cSDimitry Andric return false; 4752*349cc55cSDimitry Andric 4753*349cc55cSDimitry Andric MatchInfo = [=, &MI](MachineIRBuilder &B) { 4754*349cc55cSDimitry Andric Observer.changingInstr(MI); 4755*349cc55cSDimitry Andric MI.setDesc(B.getTII().get(Opc)); 4756*349cc55cSDimitry Andric MI.getOperand(1).setReg(X); 4757*349cc55cSDimitry Andric MI.getOperand(2).setReg(Y); 4758*349cc55cSDimitry Andric Observer.changedInstr(MI); 4759*349cc55cSDimitry Andric }; 4760*349cc55cSDimitry Andric return true; 4761*349cc55cSDimitry Andric } 4762*349cc55cSDimitry Andric 47630b57cec5SDimitry Andric bool CombinerHelper::tryCombine(MachineInstr &MI) { 47640b57cec5SDimitry Andric if (tryCombineCopy(MI)) 47650b57cec5SDimitry Andric return true; 47668bcb0991SDimitry Andric if (tryCombineExtendingLoads(MI)) 47678bcb0991SDimitry Andric return true; 47688bcb0991SDimitry Andric if (tryCombineIndexedLoadStore(MI)) 47698bcb0991SDimitry Andric return true; 47708bcb0991SDimitry Andric return false; 47710b57cec5SDimitry Andric } 4772