xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (revision 74626c16ff489c0d64cf2843dfd522e7c544f3ce)
10b57cec5SDimitry Andric //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file This file implements the LegalizerHelper class to legalize
100b57cec5SDimitry Andric /// individual instructions and the LegalizeMachineIR wrapper pass for the
110b57cec5SDimitry Andric /// primary legalization.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
160b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/CallLowering.h"
170b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
1806c3fb27SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
1981ad6265SDimitry Andric #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
200b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
21fe6060f1SDimitry Andric #include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
22e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
2381ad6265SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
24fe6060f1SDimitry Andric #include "llvm/CodeGen/GlobalISel/Utils.h"
2506c3fb27SDimitry Andric #include "llvm/CodeGen/MachineConstantPool.h"
2681ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
281db9f3b2SDimitry Andric #include "llvm/CodeGen/RuntimeLibcalls.h"
298bcb0991SDimitry Andric #include "llvm/CodeGen/TargetFrameLowering.h"
300b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
310b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLowering.h"
32fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetOpcodes.h"
330b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
34fe6060f1SDimitry Andric #include "llvm/IR/Instructions.h"
350b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
360b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h"
370b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
38349cc55cSDimitry Andric #include "llvm/Target/TargetMachine.h"
39bdd1243dSDimitry Andric #include <numeric>
40bdd1243dSDimitry Andric #include <optional>
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric #define DEBUG_TYPE "legalizer"
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric using namespace llvm;
450b57cec5SDimitry Andric using namespace LegalizeActions;
46e8d8bef9SDimitry Andric using namespace MIPatternMatch;
470b57cec5SDimitry Andric 
480b57cec5SDimitry Andric /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
490b57cec5SDimitry Andric ///
500b57cec5SDimitry Andric /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
510b57cec5SDimitry Andric /// with any leftover piece as type \p LeftoverTy
520b57cec5SDimitry Andric ///
530b57cec5SDimitry Andric /// Returns -1 in the first element of the pair if the breakdown is not
540b57cec5SDimitry Andric /// satisfiable.
550b57cec5SDimitry Andric static std::pair<int, int>
560b57cec5SDimitry Andric getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
570b57cec5SDimitry Andric   assert(!LeftoverTy.isValid() && "this is an out argument");
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric   unsigned Size = OrigTy.getSizeInBits();
600b57cec5SDimitry Andric   unsigned NarrowSize = NarrowTy.getSizeInBits();
610b57cec5SDimitry Andric   unsigned NumParts = Size / NarrowSize;
620b57cec5SDimitry Andric   unsigned LeftoverSize = Size - NumParts * NarrowSize;
630b57cec5SDimitry Andric   assert(Size > NarrowSize);
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric   if (LeftoverSize == 0)
660b57cec5SDimitry Andric     return {NumParts, 0};
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric   if (NarrowTy.isVector()) {
690b57cec5SDimitry Andric     unsigned EltSize = OrigTy.getScalarSizeInBits();
700b57cec5SDimitry Andric     if (LeftoverSize % EltSize != 0)
710b57cec5SDimitry Andric       return {-1, -1};
72fe6060f1SDimitry Andric     LeftoverTy = LLT::scalarOrVector(
73fe6060f1SDimitry Andric         ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
740b57cec5SDimitry Andric   } else {
750b57cec5SDimitry Andric     LeftoverTy = LLT::scalar(LeftoverSize);
760b57cec5SDimitry Andric   }
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric   int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
790b57cec5SDimitry Andric   return std::make_pair(NumParts, NumLeftover);
800b57cec5SDimitry Andric }
810b57cec5SDimitry Andric 
825ffd83dbSDimitry Andric static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
835ffd83dbSDimitry Andric 
845ffd83dbSDimitry Andric   if (!Ty.isScalar())
855ffd83dbSDimitry Andric     return nullptr;
865ffd83dbSDimitry Andric 
875ffd83dbSDimitry Andric   switch (Ty.getSizeInBits()) {
885ffd83dbSDimitry Andric   case 16:
895ffd83dbSDimitry Andric     return Type::getHalfTy(Ctx);
905ffd83dbSDimitry Andric   case 32:
915ffd83dbSDimitry Andric     return Type::getFloatTy(Ctx);
925ffd83dbSDimitry Andric   case 64:
935ffd83dbSDimitry Andric     return Type::getDoubleTy(Ctx);
94e8d8bef9SDimitry Andric   case 80:
95e8d8bef9SDimitry Andric     return Type::getX86_FP80Ty(Ctx);
965ffd83dbSDimitry Andric   case 128:
975ffd83dbSDimitry Andric     return Type::getFP128Ty(Ctx);
985ffd83dbSDimitry Andric   default:
995ffd83dbSDimitry Andric     return nullptr;
1005ffd83dbSDimitry Andric   }
1015ffd83dbSDimitry Andric }
1025ffd83dbSDimitry Andric 
1030b57cec5SDimitry Andric LegalizerHelper::LegalizerHelper(MachineFunction &MF,
1040b57cec5SDimitry Andric                                  GISelChangeObserver &Observer,
1050b57cec5SDimitry Andric                                  MachineIRBuilder &Builder)
1065ffd83dbSDimitry Andric     : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
107e8d8bef9SDimitry Andric       LI(*MF.getSubtarget().getLegalizerInfo()),
10806c3fb27SDimitry Andric       TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
1090b57cec5SDimitry Andric 
1100b57cec5SDimitry Andric LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
1110b57cec5SDimitry Andric                                  GISelChangeObserver &Observer,
11206c3fb27SDimitry Andric                                  MachineIRBuilder &B, GISelKnownBits *KB)
113e8d8bef9SDimitry Andric     : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
11406c3fb27SDimitry Andric       TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
115e8d8bef9SDimitry Andric 
1160b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
117fe6060f1SDimitry Andric LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
118fe6060f1SDimitry Andric                                    LostDebugLocObserver &LocObserver) {
1195ffd83dbSDimitry Andric   LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
1205ffd83dbSDimitry Andric 
1215ffd83dbSDimitry Andric   MIRBuilder.setInstrAndDebugLoc(MI);
1220b57cec5SDimitry Andric 
1235f757f3fSDimitry Andric   if (isa<GIntrinsic>(MI))
1245ffd83dbSDimitry Andric     return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
1250b57cec5SDimitry Andric   auto Step = LI.getAction(MI, MRI);
1260b57cec5SDimitry Andric   switch (Step.Action) {
1270b57cec5SDimitry Andric   case Legal:
1280b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Already legal\n");
1290b57cec5SDimitry Andric     return AlreadyLegal;
1300b57cec5SDimitry Andric   case Libcall:
1310b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
132fe6060f1SDimitry Andric     return libcall(MI, LocObserver);
1330b57cec5SDimitry Andric   case NarrowScalar:
1340b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
1350b57cec5SDimitry Andric     return narrowScalar(MI, Step.TypeIdx, Step.NewType);
1360b57cec5SDimitry Andric   case WidenScalar:
1370b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
1380b57cec5SDimitry Andric     return widenScalar(MI, Step.TypeIdx, Step.NewType);
1395ffd83dbSDimitry Andric   case Bitcast:
1405ffd83dbSDimitry Andric     LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
1415ffd83dbSDimitry Andric     return bitcast(MI, Step.TypeIdx, Step.NewType);
1420b57cec5SDimitry Andric   case Lower:
1430b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Lower\n");
1440b57cec5SDimitry Andric     return lower(MI, Step.TypeIdx, Step.NewType);
1450b57cec5SDimitry Andric   case FewerElements:
1460b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
1470b57cec5SDimitry Andric     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
1480b57cec5SDimitry Andric   case MoreElements:
1490b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
1500b57cec5SDimitry Andric     return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
1510b57cec5SDimitry Andric   case Custom:
1520b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
1531db9f3b2SDimitry Andric     return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
1541db9f3b2SDimitry Andric                                                      : UnableToLegalize;
1550b57cec5SDimitry Andric   default:
1560b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
1570b57cec5SDimitry Andric     return UnableToLegalize;
1580b57cec5SDimitry Andric   }
1590b57cec5SDimitry Andric }
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric void LegalizerHelper::insertParts(Register DstReg,
1620b57cec5SDimitry Andric                                   LLT ResultTy, LLT PartTy,
1630b57cec5SDimitry Andric                                   ArrayRef<Register> PartRegs,
1640b57cec5SDimitry Andric                                   LLT LeftoverTy,
1650b57cec5SDimitry Andric                                   ArrayRef<Register> LeftoverRegs) {
1660b57cec5SDimitry Andric   if (!LeftoverTy.isValid()) {
1670b57cec5SDimitry Andric     assert(LeftoverRegs.empty());
1680b57cec5SDimitry Andric 
1690b57cec5SDimitry Andric     if (!ResultTy.isVector()) {
170bdd1243dSDimitry Andric       MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
1710b57cec5SDimitry Andric       return;
1720b57cec5SDimitry Andric     }
1730b57cec5SDimitry Andric 
1740b57cec5SDimitry Andric     if (PartTy.isVector())
1750b57cec5SDimitry Andric       MIRBuilder.buildConcatVectors(DstReg, PartRegs);
1760b57cec5SDimitry Andric     else
1770b57cec5SDimitry Andric       MIRBuilder.buildBuildVector(DstReg, PartRegs);
1780b57cec5SDimitry Andric     return;
1790b57cec5SDimitry Andric   }
1800b57cec5SDimitry Andric 
1810eae32dcSDimitry Andric   // Merge sub-vectors with different number of elements and insert into DstReg.
1820eae32dcSDimitry Andric   if (ResultTy.isVector()) {
1830eae32dcSDimitry Andric     assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
1840eae32dcSDimitry Andric     SmallVector<Register, 8> AllRegs;
1850eae32dcSDimitry Andric     for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
1860eae32dcSDimitry Andric       AllRegs.push_back(Reg);
1870eae32dcSDimitry Andric     return mergeMixedSubvectors(DstReg, AllRegs);
1880eae32dcSDimitry Andric   }
1890eae32dcSDimitry Andric 
190fe6060f1SDimitry Andric   SmallVector<Register> GCDRegs;
191fe6060f1SDimitry Andric   LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
192fe6060f1SDimitry Andric   for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
193fe6060f1SDimitry Andric     extractGCDType(GCDRegs, GCDTy, PartReg);
194fe6060f1SDimitry Andric   LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
195fe6060f1SDimitry Andric   buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
1960b57cec5SDimitry Andric }
1970b57cec5SDimitry Andric 
1980eae32dcSDimitry Andric void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
1990eae32dcSDimitry Andric                                        Register Reg) {
2000eae32dcSDimitry Andric   LLT Ty = MRI.getType(Reg);
2010eae32dcSDimitry Andric   SmallVector<Register, 8> RegElts;
2027a6dacacSDimitry Andric   extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
2037a6dacacSDimitry Andric                MIRBuilder, MRI);
2040eae32dcSDimitry Andric   Elts.append(RegElts);
2050eae32dcSDimitry Andric }
2060eae32dcSDimitry Andric 
2070eae32dcSDimitry Andric /// Merge \p PartRegs with different types into \p DstReg.
2080eae32dcSDimitry Andric void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
2090eae32dcSDimitry Andric                                            ArrayRef<Register> PartRegs) {
2100eae32dcSDimitry Andric   SmallVector<Register, 8> AllElts;
2110eae32dcSDimitry Andric   for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
2120eae32dcSDimitry Andric     appendVectorElts(AllElts, PartRegs[i]);
2130eae32dcSDimitry Andric 
2140eae32dcSDimitry Andric   Register Leftover = PartRegs[PartRegs.size() - 1];
2150eae32dcSDimitry Andric   if (MRI.getType(Leftover).isScalar())
2160eae32dcSDimitry Andric     AllElts.push_back(Leftover);
2170eae32dcSDimitry Andric   else
2180eae32dcSDimitry Andric     appendVectorElts(AllElts, Leftover);
2190eae32dcSDimitry Andric 
220bdd1243dSDimitry Andric   MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
2210eae32dcSDimitry Andric }
2220eae32dcSDimitry Andric 
223e8d8bef9SDimitry Andric /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
2245ffd83dbSDimitry Andric static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
2255ffd83dbSDimitry Andric                               const MachineInstr &MI) {
2265ffd83dbSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
2275ffd83dbSDimitry Andric 
228e8d8bef9SDimitry Andric   const int StartIdx = Regs.size();
2295ffd83dbSDimitry Andric   const int NumResults = MI.getNumOperands() - 1;
230e8d8bef9SDimitry Andric   Regs.resize(Regs.size() + NumResults);
2315ffd83dbSDimitry Andric   for (int I = 0; I != NumResults; ++I)
232e8d8bef9SDimitry Andric     Regs[StartIdx + I] = MI.getOperand(I).getReg();
2335ffd83dbSDimitry Andric }
2345ffd83dbSDimitry Andric 
235e8d8bef9SDimitry Andric void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
236e8d8bef9SDimitry Andric                                      LLT GCDTy, Register SrcReg) {
2375ffd83dbSDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
2385ffd83dbSDimitry Andric   if (SrcTy == GCDTy) {
2395ffd83dbSDimitry Andric     // If the source already evenly divides the result type, we don't need to do
2405ffd83dbSDimitry Andric     // anything.
2415ffd83dbSDimitry Andric     Parts.push_back(SrcReg);
2425ffd83dbSDimitry Andric   } else {
2435ffd83dbSDimitry Andric     // Need to split into common type sized pieces.
2445ffd83dbSDimitry Andric     auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2455ffd83dbSDimitry Andric     getUnmergeResults(Parts, *Unmerge);
2465ffd83dbSDimitry Andric   }
247e8d8bef9SDimitry Andric }
2485ffd83dbSDimitry Andric 
249e8d8bef9SDimitry Andric LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
250e8d8bef9SDimitry Andric                                     LLT NarrowTy, Register SrcReg) {
251e8d8bef9SDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
252e8d8bef9SDimitry Andric   LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
253e8d8bef9SDimitry Andric   extractGCDType(Parts, GCDTy, SrcReg);
2545ffd83dbSDimitry Andric   return GCDTy;
2555ffd83dbSDimitry Andric }
2565ffd83dbSDimitry Andric 
2575ffd83dbSDimitry Andric LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
2585ffd83dbSDimitry Andric                                          SmallVectorImpl<Register> &VRegs,
2595ffd83dbSDimitry Andric                                          unsigned PadStrategy) {
2605ffd83dbSDimitry Andric   LLT LCMTy = getLCMType(DstTy, NarrowTy);
2615ffd83dbSDimitry Andric 
2625ffd83dbSDimitry Andric   int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
2635ffd83dbSDimitry Andric   int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
2645ffd83dbSDimitry Andric   int NumOrigSrc = VRegs.size();
2655ffd83dbSDimitry Andric 
2665ffd83dbSDimitry Andric   Register PadReg;
2675ffd83dbSDimitry Andric 
2685ffd83dbSDimitry Andric   // Get a value we can use to pad the source value if the sources won't evenly
2695ffd83dbSDimitry Andric   // cover the result type.
2705ffd83dbSDimitry Andric   if (NumOrigSrc < NumParts * NumSubParts) {
2715ffd83dbSDimitry Andric     if (PadStrategy == TargetOpcode::G_ZEXT)
2725ffd83dbSDimitry Andric       PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
2735ffd83dbSDimitry Andric     else if (PadStrategy == TargetOpcode::G_ANYEXT)
2745ffd83dbSDimitry Andric       PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2755ffd83dbSDimitry Andric     else {
2765ffd83dbSDimitry Andric       assert(PadStrategy == TargetOpcode::G_SEXT);
2775ffd83dbSDimitry Andric 
2785ffd83dbSDimitry Andric       // Shift the sign bit of the low register through the high register.
2795ffd83dbSDimitry Andric       auto ShiftAmt =
2805ffd83dbSDimitry Andric         MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
2815ffd83dbSDimitry Andric       PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
2825ffd83dbSDimitry Andric     }
2835ffd83dbSDimitry Andric   }
2845ffd83dbSDimitry Andric 
2855ffd83dbSDimitry Andric   // Registers for the final merge to be produced.
2865ffd83dbSDimitry Andric   SmallVector<Register, 4> Remerge(NumParts);
2875ffd83dbSDimitry Andric 
2885ffd83dbSDimitry Andric   // Registers needed for intermediate merges, which will be merged into a
2895ffd83dbSDimitry Andric   // source for Remerge.
2905ffd83dbSDimitry Andric   SmallVector<Register, 4> SubMerge(NumSubParts);
2915ffd83dbSDimitry Andric 
2925ffd83dbSDimitry Andric   // Once we've fully read off the end of the original source bits, we can reuse
2935ffd83dbSDimitry Andric   // the same high bits for remaining padding elements.
2945ffd83dbSDimitry Andric   Register AllPadReg;
2955ffd83dbSDimitry Andric 
2965ffd83dbSDimitry Andric   // Build merges to the LCM type to cover the original result type.
2975ffd83dbSDimitry Andric   for (int I = 0; I != NumParts; ++I) {
2985ffd83dbSDimitry Andric     bool AllMergePartsArePadding = true;
2995ffd83dbSDimitry Andric 
3005ffd83dbSDimitry Andric     // Build the requested merges to the requested type.
3015ffd83dbSDimitry Andric     for (int J = 0; J != NumSubParts; ++J) {
3025ffd83dbSDimitry Andric       int Idx = I * NumSubParts + J;
3035ffd83dbSDimitry Andric       if (Idx >= NumOrigSrc) {
3045ffd83dbSDimitry Andric         SubMerge[J] = PadReg;
3055ffd83dbSDimitry Andric         continue;
3065ffd83dbSDimitry Andric       }
3075ffd83dbSDimitry Andric 
3085ffd83dbSDimitry Andric       SubMerge[J] = VRegs[Idx];
3095ffd83dbSDimitry Andric 
3105ffd83dbSDimitry Andric       // There are meaningful bits here we can't reuse later.
3115ffd83dbSDimitry Andric       AllMergePartsArePadding = false;
3125ffd83dbSDimitry Andric     }
3135ffd83dbSDimitry Andric 
3145ffd83dbSDimitry Andric     // If we've filled up a complete piece with padding bits, we can directly
3155ffd83dbSDimitry Andric     // emit the natural sized constant if applicable, rather than a merge of
3165ffd83dbSDimitry Andric     // smaller constants.
3175ffd83dbSDimitry Andric     if (AllMergePartsArePadding && !AllPadReg) {
3185ffd83dbSDimitry Andric       if (PadStrategy == TargetOpcode::G_ANYEXT)
3195ffd83dbSDimitry Andric         AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
3205ffd83dbSDimitry Andric       else if (PadStrategy == TargetOpcode::G_ZEXT)
3215ffd83dbSDimitry Andric         AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
3225ffd83dbSDimitry Andric 
3235ffd83dbSDimitry Andric       // If this is a sign extension, we can't materialize a trivial constant
3245ffd83dbSDimitry Andric       // with the right type and have to produce a merge.
3255ffd83dbSDimitry Andric     }
3265ffd83dbSDimitry Andric 
3275ffd83dbSDimitry Andric     if (AllPadReg) {
3285ffd83dbSDimitry Andric       // Avoid creating additional instructions if we're just adding additional
3295ffd83dbSDimitry Andric       // copies of padding bits.
3305ffd83dbSDimitry Andric       Remerge[I] = AllPadReg;
3315ffd83dbSDimitry Andric       continue;
3325ffd83dbSDimitry Andric     }
3335ffd83dbSDimitry Andric 
3345ffd83dbSDimitry Andric     if (NumSubParts == 1)
3355ffd83dbSDimitry Andric       Remerge[I] = SubMerge[0];
3365ffd83dbSDimitry Andric     else
337bdd1243dSDimitry Andric       Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
3385ffd83dbSDimitry Andric 
3395ffd83dbSDimitry Andric     // In the sign extend padding case, re-use the first all-signbit merge.
3405ffd83dbSDimitry Andric     if (AllMergePartsArePadding && !AllPadReg)
3415ffd83dbSDimitry Andric       AllPadReg = Remerge[I];
3425ffd83dbSDimitry Andric   }
3435ffd83dbSDimitry Andric 
3445ffd83dbSDimitry Andric   VRegs = std::move(Remerge);
3455ffd83dbSDimitry Andric   return LCMTy;
3465ffd83dbSDimitry Andric }
3475ffd83dbSDimitry Andric 
3485ffd83dbSDimitry Andric void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
3495ffd83dbSDimitry Andric                                                ArrayRef<Register> RemergeRegs) {
3505ffd83dbSDimitry Andric   LLT DstTy = MRI.getType(DstReg);
3515ffd83dbSDimitry Andric 
3525ffd83dbSDimitry Andric   // Create the merge to the widened source, and extract the relevant bits into
3535ffd83dbSDimitry Andric   // the result.
3545ffd83dbSDimitry Andric 
3555ffd83dbSDimitry Andric   if (DstTy == LCMTy) {
356bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
3575ffd83dbSDimitry Andric     return;
3585ffd83dbSDimitry Andric   }
3595ffd83dbSDimitry Andric 
360bdd1243dSDimitry Andric   auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
3615ffd83dbSDimitry Andric   if (DstTy.isScalar() && LCMTy.isScalar()) {
3625ffd83dbSDimitry Andric     MIRBuilder.buildTrunc(DstReg, Remerge);
3635ffd83dbSDimitry Andric     return;
3645ffd83dbSDimitry Andric   }
3655ffd83dbSDimitry Andric 
3665ffd83dbSDimitry Andric   if (LCMTy.isVector()) {
367e8d8bef9SDimitry Andric     unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
368e8d8bef9SDimitry Andric     SmallVector<Register, 8> UnmergeDefs(NumDefs);
369e8d8bef9SDimitry Andric     UnmergeDefs[0] = DstReg;
370e8d8bef9SDimitry Andric     for (unsigned I = 1; I != NumDefs; ++I)
371e8d8bef9SDimitry Andric       UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
372e8d8bef9SDimitry Andric 
373e8d8bef9SDimitry Andric     MIRBuilder.buildUnmerge(UnmergeDefs,
374bdd1243dSDimitry Andric                             MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
3755ffd83dbSDimitry Andric     return;
3765ffd83dbSDimitry Andric   }
3775ffd83dbSDimitry Andric 
3785ffd83dbSDimitry Andric   llvm_unreachable("unhandled case");
3795ffd83dbSDimitry Andric }
3805ffd83dbSDimitry Andric 
3810b57cec5SDimitry Andric static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
382e8d8bef9SDimitry Andric #define RTLIBCASE_INT(LibcallPrefix)                                           \
3835ffd83dbSDimitry Andric   do {                                                                         \
3845ffd83dbSDimitry Andric     switch (Size) {                                                            \
3855ffd83dbSDimitry Andric     case 32:                                                                   \
3865ffd83dbSDimitry Andric       return RTLIB::LibcallPrefix##32;                                         \
3875ffd83dbSDimitry Andric     case 64:                                                                   \
3885ffd83dbSDimitry Andric       return RTLIB::LibcallPrefix##64;                                         \
3895ffd83dbSDimitry Andric     case 128:                                                                  \
3905ffd83dbSDimitry Andric       return RTLIB::LibcallPrefix##128;                                        \
3915ffd83dbSDimitry Andric     default:                                                                   \
3925ffd83dbSDimitry Andric       llvm_unreachable("unexpected size");                                     \
3935ffd83dbSDimitry Andric     }                                                                          \
3945ffd83dbSDimitry Andric   } while (0)
3955ffd83dbSDimitry Andric 
396e8d8bef9SDimitry Andric #define RTLIBCASE(LibcallPrefix)                                               \
397e8d8bef9SDimitry Andric   do {                                                                         \
398e8d8bef9SDimitry Andric     switch (Size) {                                                            \
399e8d8bef9SDimitry Andric     case 32:                                                                   \
400e8d8bef9SDimitry Andric       return RTLIB::LibcallPrefix##32;                                         \
401e8d8bef9SDimitry Andric     case 64:                                                                   \
402e8d8bef9SDimitry Andric       return RTLIB::LibcallPrefix##64;                                         \
403e8d8bef9SDimitry Andric     case 80:                                                                   \
404e8d8bef9SDimitry Andric       return RTLIB::LibcallPrefix##80;                                         \
405e8d8bef9SDimitry Andric     case 128:                                                                  \
406e8d8bef9SDimitry Andric       return RTLIB::LibcallPrefix##128;                                        \
407e8d8bef9SDimitry Andric     default:                                                                   \
408e8d8bef9SDimitry Andric       llvm_unreachable("unexpected size");                                     \
409e8d8bef9SDimitry Andric     }                                                                          \
410e8d8bef9SDimitry Andric   } while (0)
4115ffd83dbSDimitry Andric 
4120b57cec5SDimitry Andric   switch (Opcode) {
413bdd1243dSDimitry Andric   case TargetOpcode::G_MUL:
414bdd1243dSDimitry Andric     RTLIBCASE_INT(MUL_I);
4150b57cec5SDimitry Andric   case TargetOpcode::G_SDIV:
416e8d8bef9SDimitry Andric     RTLIBCASE_INT(SDIV_I);
4170b57cec5SDimitry Andric   case TargetOpcode::G_UDIV:
418e8d8bef9SDimitry Andric     RTLIBCASE_INT(UDIV_I);
4190b57cec5SDimitry Andric   case TargetOpcode::G_SREM:
420e8d8bef9SDimitry Andric     RTLIBCASE_INT(SREM_I);
4210b57cec5SDimitry Andric   case TargetOpcode::G_UREM:
422e8d8bef9SDimitry Andric     RTLIBCASE_INT(UREM_I);
4230b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
424e8d8bef9SDimitry Andric     RTLIBCASE_INT(CTLZ_I);
4250b57cec5SDimitry Andric   case TargetOpcode::G_FADD:
4265ffd83dbSDimitry Andric     RTLIBCASE(ADD_F);
4270b57cec5SDimitry Andric   case TargetOpcode::G_FSUB:
4285ffd83dbSDimitry Andric     RTLIBCASE(SUB_F);
4290b57cec5SDimitry Andric   case TargetOpcode::G_FMUL:
4305ffd83dbSDimitry Andric     RTLIBCASE(MUL_F);
4310b57cec5SDimitry Andric   case TargetOpcode::G_FDIV:
4325ffd83dbSDimitry Andric     RTLIBCASE(DIV_F);
4330b57cec5SDimitry Andric   case TargetOpcode::G_FEXP:
4345ffd83dbSDimitry Andric     RTLIBCASE(EXP_F);
4350b57cec5SDimitry Andric   case TargetOpcode::G_FEXP2:
4365ffd83dbSDimitry Andric     RTLIBCASE(EXP2_F);
4375f757f3fSDimitry Andric   case TargetOpcode::G_FEXP10:
4385f757f3fSDimitry Andric     RTLIBCASE(EXP10_F);
4390b57cec5SDimitry Andric   case TargetOpcode::G_FREM:
4405ffd83dbSDimitry Andric     RTLIBCASE(REM_F);
4410b57cec5SDimitry Andric   case TargetOpcode::G_FPOW:
4425ffd83dbSDimitry Andric     RTLIBCASE(POW_F);
4431db9f3b2SDimitry Andric   case TargetOpcode::G_FPOWI:
4441db9f3b2SDimitry Andric     RTLIBCASE(POWI_F);
4450b57cec5SDimitry Andric   case TargetOpcode::G_FMA:
4465ffd83dbSDimitry Andric     RTLIBCASE(FMA_F);
4470b57cec5SDimitry Andric   case TargetOpcode::G_FSIN:
4485ffd83dbSDimitry Andric     RTLIBCASE(SIN_F);
4490b57cec5SDimitry Andric   case TargetOpcode::G_FCOS:
4505ffd83dbSDimitry Andric     RTLIBCASE(COS_F);
4510b57cec5SDimitry Andric   case TargetOpcode::G_FLOG10:
4525ffd83dbSDimitry Andric     RTLIBCASE(LOG10_F);
4530b57cec5SDimitry Andric   case TargetOpcode::G_FLOG:
4545ffd83dbSDimitry Andric     RTLIBCASE(LOG_F);
4550b57cec5SDimitry Andric   case TargetOpcode::G_FLOG2:
4565ffd83dbSDimitry Andric     RTLIBCASE(LOG2_F);
45706c3fb27SDimitry Andric   case TargetOpcode::G_FLDEXP:
45806c3fb27SDimitry Andric     RTLIBCASE(LDEXP_F);
4590b57cec5SDimitry Andric   case TargetOpcode::G_FCEIL:
4605ffd83dbSDimitry Andric     RTLIBCASE(CEIL_F);
4610b57cec5SDimitry Andric   case TargetOpcode::G_FFLOOR:
4625ffd83dbSDimitry Andric     RTLIBCASE(FLOOR_F);
4635ffd83dbSDimitry Andric   case TargetOpcode::G_FMINNUM:
4645ffd83dbSDimitry Andric     RTLIBCASE(FMIN_F);
4655ffd83dbSDimitry Andric   case TargetOpcode::G_FMAXNUM:
4665ffd83dbSDimitry Andric     RTLIBCASE(FMAX_F);
4675ffd83dbSDimitry Andric   case TargetOpcode::G_FSQRT:
4685ffd83dbSDimitry Andric     RTLIBCASE(SQRT_F);
4695ffd83dbSDimitry Andric   case TargetOpcode::G_FRINT:
4705ffd83dbSDimitry Andric     RTLIBCASE(RINT_F);
4715ffd83dbSDimitry Andric   case TargetOpcode::G_FNEARBYINT:
4725ffd83dbSDimitry Andric     RTLIBCASE(NEARBYINT_F);
473e8d8bef9SDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
474e8d8bef9SDimitry Andric     RTLIBCASE(ROUNDEVEN_F);
4750b57cec5SDimitry Andric   }
4760b57cec5SDimitry Andric   llvm_unreachable("Unknown libcall function");
4770b57cec5SDimitry Andric }
4780b57cec5SDimitry Andric 
4798bcb0991SDimitry Andric /// True if an instruction is in tail position in its caller. Intended for
4808bcb0991SDimitry Andric /// legalizing libcalls as tail calls when possible.
4811db9f3b2SDimitry Andric static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result,
4821db9f3b2SDimitry Andric                                     MachineInstr &MI,
483fe6060f1SDimitry Andric                                     const TargetInstrInfo &TII,
484fe6060f1SDimitry Andric                                     MachineRegisterInfo &MRI) {
4855ffd83dbSDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
4865ffd83dbSDimitry Andric   const Function &F = MBB.getParent()->getFunction();
4878bcb0991SDimitry Andric 
4888bcb0991SDimitry Andric   // Conservatively require the attributes of the call to match those of
4898bcb0991SDimitry Andric   // the return. Ignore NoAlias and NonNull because they don't affect the
4908bcb0991SDimitry Andric   // call sequence.
4918bcb0991SDimitry Andric   AttributeList CallerAttrs = F.getAttributes();
49204eeddc0SDimitry Andric   if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
4938bcb0991SDimitry Andric           .removeAttribute(Attribute::NoAlias)
4948bcb0991SDimitry Andric           .removeAttribute(Attribute::NonNull)
4958bcb0991SDimitry Andric           .hasAttributes())
4968bcb0991SDimitry Andric     return false;
4978bcb0991SDimitry Andric 
4988bcb0991SDimitry Andric   // It's not safe to eliminate the sign / zero extension of the return value.
499349cc55cSDimitry Andric   if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
500349cc55cSDimitry Andric       CallerAttrs.hasRetAttr(Attribute::SExt))
5018bcb0991SDimitry Andric     return false;
5028bcb0991SDimitry Andric 
503fe6060f1SDimitry Andric   // Only tail call if the following instruction is a standard return or if we
504fe6060f1SDimitry Andric   // have a `thisreturn` callee, and a sequence like:
505fe6060f1SDimitry Andric   //
506fe6060f1SDimitry Andric   //   G_MEMCPY %0, %1, %2
507fe6060f1SDimitry Andric   //   $x0 = COPY %0
508fe6060f1SDimitry Andric   //   RET_ReallyLR implicit $x0
5095ffd83dbSDimitry Andric   auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
510fe6060f1SDimitry Andric   if (Next != MBB.instr_end() && Next->isCopy()) {
5111db9f3b2SDimitry Andric     if (MI.getOpcode() == TargetOpcode::G_BZERO)
512fe6060f1SDimitry Andric       return false;
513fe6060f1SDimitry Andric 
5141db9f3b2SDimitry Andric     // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
5151db9f3b2SDimitry Andric     // mempy/etc routines return the same parameter. For other it will be the
5161db9f3b2SDimitry Andric     // returned value.
517fe6060f1SDimitry Andric     Register VReg = MI.getOperand(0).getReg();
518fe6060f1SDimitry Andric     if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
519fe6060f1SDimitry Andric       return false;
520fe6060f1SDimitry Andric 
521fe6060f1SDimitry Andric     Register PReg = Next->getOperand(0).getReg();
522fe6060f1SDimitry Andric     if (!PReg.isPhysical())
523fe6060f1SDimitry Andric       return false;
524fe6060f1SDimitry Andric 
525fe6060f1SDimitry Andric     auto Ret = next_nodbg(Next, MBB.instr_end());
526fe6060f1SDimitry Andric     if (Ret == MBB.instr_end() || !Ret->isReturn())
527fe6060f1SDimitry Andric       return false;
528fe6060f1SDimitry Andric 
529fe6060f1SDimitry Andric     if (Ret->getNumImplicitOperands() != 1)
530fe6060f1SDimitry Andric       return false;
531fe6060f1SDimitry Andric 
5321db9f3b2SDimitry Andric     if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
533fe6060f1SDimitry Andric       return false;
534fe6060f1SDimitry Andric 
535fe6060f1SDimitry Andric     // Skip over the COPY that we just validated.
536fe6060f1SDimitry Andric     Next = Ret;
537fe6060f1SDimitry Andric   }
538fe6060f1SDimitry Andric 
5395ffd83dbSDimitry Andric   if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
5408bcb0991SDimitry Andric     return false;
5418bcb0991SDimitry Andric 
5428bcb0991SDimitry Andric   return true;
5438bcb0991SDimitry Andric }
5448bcb0991SDimitry Andric 
5450b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
5465ffd83dbSDimitry Andric llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
5470b57cec5SDimitry Andric                     const CallLowering::ArgInfo &Result,
5485ffd83dbSDimitry Andric                     ArrayRef<CallLowering::ArgInfo> Args,
5491db9f3b2SDimitry Andric                     const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
5501db9f3b2SDimitry Andric                     MachineInstr *MI) {
5510b57cec5SDimitry Andric   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
5520b57cec5SDimitry Andric 
5538bcb0991SDimitry Andric   CallLowering::CallLoweringInfo Info;
5545ffd83dbSDimitry Andric   Info.CallConv = CC;
5558bcb0991SDimitry Andric   Info.Callee = MachineOperand::CreateES(Name);
5568bcb0991SDimitry Andric   Info.OrigRet = Result;
5571db9f3b2SDimitry Andric   if (MI)
5581db9f3b2SDimitry Andric     Info.IsTailCall =
5591db9f3b2SDimitry Andric         (Result.Ty->isVoidTy() ||
5601db9f3b2SDimitry Andric          Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
5611db9f3b2SDimitry Andric         isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
5621db9f3b2SDimitry Andric                                 *MIRBuilder.getMRI());
5631db9f3b2SDimitry Andric 
5648bcb0991SDimitry Andric   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
5658bcb0991SDimitry Andric   if (!CLI.lowerCall(MIRBuilder, Info))
5660b57cec5SDimitry Andric     return LegalizerHelper::UnableToLegalize;
5670b57cec5SDimitry Andric 
5681db9f3b2SDimitry Andric   if (MI && Info.LoweredTailCall) {
5691db9f3b2SDimitry Andric     assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
5701db9f3b2SDimitry Andric 
5711db9f3b2SDimitry Andric     // Check debug locations before removing the return.
5721db9f3b2SDimitry Andric     LocObserver.checkpoint(true);
5731db9f3b2SDimitry Andric 
5741db9f3b2SDimitry Andric     // We must have a return following the call (or debug insts) to get past
5751db9f3b2SDimitry Andric     // isLibCallInTailPosition.
5761db9f3b2SDimitry Andric     do {
5771db9f3b2SDimitry Andric       MachineInstr *Next = MI->getNextNode();
5781db9f3b2SDimitry Andric       assert(Next &&
5791db9f3b2SDimitry Andric              (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
5801db9f3b2SDimitry Andric              "Expected instr following MI to be return or debug inst?");
5811db9f3b2SDimitry Andric       // We lowered a tail call, so the call is now the return from the block.
5821db9f3b2SDimitry Andric       // Delete the old return.
5831db9f3b2SDimitry Andric       Next->eraseFromParent();
5841db9f3b2SDimitry Andric     } while (MI->getNextNode());
5851db9f3b2SDimitry Andric 
5861db9f3b2SDimitry Andric     // We expect to lose the debug location from the return.
5871db9f3b2SDimitry Andric     LocObserver.checkpoint(false);
5881db9f3b2SDimitry Andric   }
5890b57cec5SDimitry Andric   return LegalizerHelper::Legalized;
5900b57cec5SDimitry Andric }
5910b57cec5SDimitry Andric 
5925ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
5935ffd83dbSDimitry Andric llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
5945ffd83dbSDimitry Andric                     const CallLowering::ArgInfo &Result,
5951db9f3b2SDimitry Andric                     ArrayRef<CallLowering::ArgInfo> Args,
5961db9f3b2SDimitry Andric                     LostDebugLocObserver &LocObserver, MachineInstr *MI) {
5975ffd83dbSDimitry Andric   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
5985ffd83dbSDimitry Andric   const char *Name = TLI.getLibcallName(Libcall);
599*74626c16SDimitry Andric   if (!Name)
600*74626c16SDimitry Andric     return LegalizerHelper::UnableToLegalize;
6015ffd83dbSDimitry Andric   const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
6021db9f3b2SDimitry Andric   return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
6035ffd83dbSDimitry Andric }
6045ffd83dbSDimitry Andric 
6050b57cec5SDimitry Andric // Useful for libcalls where all operands have the same type.
6060b57cec5SDimitry Andric static LegalizerHelper::LegalizeResult
6070b57cec5SDimitry Andric simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
6081db9f3b2SDimitry Andric               Type *OpType, LostDebugLocObserver &LocObserver) {
6090b57cec5SDimitry Andric   auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
6100b57cec5SDimitry Andric 
611fe6060f1SDimitry Andric   // FIXME: What does the original arg index mean here?
6120b57cec5SDimitry Andric   SmallVector<CallLowering::ArgInfo, 3> Args;
6134824e7fdSDimitry Andric   for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
6144824e7fdSDimitry Andric     Args.push_back({MO.getReg(), OpType, 0});
615fe6060f1SDimitry Andric   return createLibcall(MIRBuilder, Libcall,
6161db9f3b2SDimitry Andric                        {MI.getOperand(0).getReg(), OpType, 0}, Args,
6171db9f3b2SDimitry Andric                        LocObserver, &MI);
6180b57cec5SDimitry Andric }
6190b57cec5SDimitry Andric 
6208bcb0991SDimitry Andric LegalizerHelper::LegalizeResult
6218bcb0991SDimitry Andric llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
622fe6060f1SDimitry Andric                        MachineInstr &MI, LostDebugLocObserver &LocObserver) {
6238bcb0991SDimitry Andric   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
6248bcb0991SDimitry Andric 
6258bcb0991SDimitry Andric   SmallVector<CallLowering::ArgInfo, 3> Args;
6268bcb0991SDimitry Andric   // Add all the args, except for the last which is an imm denoting 'tail'.
627e8d8bef9SDimitry Andric   for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
6288bcb0991SDimitry Andric     Register Reg = MI.getOperand(i).getReg();
6298bcb0991SDimitry Andric 
6308bcb0991SDimitry Andric     // Need derive an IR type for call lowering.
6318bcb0991SDimitry Andric     LLT OpLLT = MRI.getType(Reg);
6328bcb0991SDimitry Andric     Type *OpTy = nullptr;
6338bcb0991SDimitry Andric     if (OpLLT.isPointer())
6345f757f3fSDimitry Andric       OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
6358bcb0991SDimitry Andric     else
6368bcb0991SDimitry Andric       OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
637fe6060f1SDimitry Andric     Args.push_back({Reg, OpTy, 0});
6388bcb0991SDimitry Andric   }
6398bcb0991SDimitry Andric 
6408bcb0991SDimitry Andric   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
6418bcb0991SDimitry Andric   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
6428bcb0991SDimitry Andric   RTLIB::Libcall RTLibcall;
643fe6060f1SDimitry Andric   unsigned Opc = MI.getOpcode();
644fe6060f1SDimitry Andric   switch (Opc) {
645fe6060f1SDimitry Andric   case TargetOpcode::G_BZERO:
646fe6060f1SDimitry Andric     RTLibcall = RTLIB::BZERO;
647fe6060f1SDimitry Andric     break;
648e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMCPY:
6498bcb0991SDimitry Andric     RTLibcall = RTLIB::MEMCPY;
650fe6060f1SDimitry Andric     Args[0].Flags[0].setReturned();
6518bcb0991SDimitry Andric     break;
652e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMMOVE:
6538bcb0991SDimitry Andric     RTLibcall = RTLIB::MEMMOVE;
654fe6060f1SDimitry Andric     Args[0].Flags[0].setReturned();
6558bcb0991SDimitry Andric     break;
656e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMSET:
657e8d8bef9SDimitry Andric     RTLibcall = RTLIB::MEMSET;
658fe6060f1SDimitry Andric     Args[0].Flags[0].setReturned();
659e8d8bef9SDimitry Andric     break;
6608bcb0991SDimitry Andric   default:
661fe6060f1SDimitry Andric     llvm_unreachable("unsupported opcode");
6628bcb0991SDimitry Andric   }
6638bcb0991SDimitry Andric   const char *Name = TLI.getLibcallName(RTLibcall);
6648bcb0991SDimitry Andric 
665fe6060f1SDimitry Andric   // Unsupported libcall on the target.
666fe6060f1SDimitry Andric   if (!Name) {
667fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
668fe6060f1SDimitry Andric                       << MIRBuilder.getTII().getName(Opc) << "\n");
669fe6060f1SDimitry Andric     return LegalizerHelper::UnableToLegalize;
670fe6060f1SDimitry Andric   }
671fe6060f1SDimitry Andric 
6728bcb0991SDimitry Andric   CallLowering::CallLoweringInfo Info;
6738bcb0991SDimitry Andric   Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
6748bcb0991SDimitry Andric   Info.Callee = MachineOperand::CreateES(Name);
675fe6060f1SDimitry Andric   Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
6761db9f3b2SDimitry Andric   Info.IsTailCall =
6771db9f3b2SDimitry Andric       MI.getOperand(MI.getNumOperands() - 1).getImm() &&
6781db9f3b2SDimitry Andric       isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
6798bcb0991SDimitry Andric 
6808bcb0991SDimitry Andric   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
6818bcb0991SDimitry Andric   if (!CLI.lowerCall(MIRBuilder, Info))
6828bcb0991SDimitry Andric     return LegalizerHelper::UnableToLegalize;
6838bcb0991SDimitry Andric 
6848bcb0991SDimitry Andric   if (Info.LoweredTailCall) {
6858bcb0991SDimitry Andric     assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
686fe6060f1SDimitry Andric 
687fe6060f1SDimitry Andric     // Check debug locations before removing the return.
688fe6060f1SDimitry Andric     LocObserver.checkpoint(true);
689fe6060f1SDimitry Andric 
6905ffd83dbSDimitry Andric     // We must have a return following the call (or debug insts) to get past
6918bcb0991SDimitry Andric     // isLibCallInTailPosition.
6925ffd83dbSDimitry Andric     do {
6935ffd83dbSDimitry Andric       MachineInstr *Next = MI.getNextNode();
694fe6060f1SDimitry Andric       assert(Next &&
695fe6060f1SDimitry Andric              (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
6965ffd83dbSDimitry Andric              "Expected instr following MI to be return or debug inst?");
6978bcb0991SDimitry Andric       // We lowered a tail call, so the call is now the return from the block.
6988bcb0991SDimitry Andric       // Delete the old return.
6995ffd83dbSDimitry Andric       Next->eraseFromParent();
7005ffd83dbSDimitry Andric     } while (MI.getNextNode());
701fe6060f1SDimitry Andric 
702fe6060f1SDimitry Andric     // We expect to lose the debug location from the return.
703fe6060f1SDimitry Andric     LocObserver.checkpoint(false);
7048bcb0991SDimitry Andric   }
7058bcb0991SDimitry Andric 
7068bcb0991SDimitry Andric   return LegalizerHelper::Legalized;
7078bcb0991SDimitry Andric }
7088bcb0991SDimitry Andric 
7091db9f3b2SDimitry Andric static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
7101db9f3b2SDimitry Andric   unsigned Opc = MI.getOpcode();
7111db9f3b2SDimitry Andric   auto &AtomicMI = cast<GMemOperation>(MI);
7121db9f3b2SDimitry Andric   auto &MMO = AtomicMI.getMMO();
7131db9f3b2SDimitry Andric   auto Ordering = MMO.getMergedOrdering();
7141db9f3b2SDimitry Andric   LLT MemType = MMO.getMemoryType();
7151db9f3b2SDimitry Andric   uint64_t MemSize = MemType.getSizeInBytes();
7161db9f3b2SDimitry Andric   if (MemType.isVector())
7171db9f3b2SDimitry Andric     return RTLIB::UNKNOWN_LIBCALL;
7181db9f3b2SDimitry Andric 
7191db9f3b2SDimitry Andric #define LCALLS(A, B)                                                           \
7201db9f3b2SDimitry Andric   { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
7211db9f3b2SDimitry Andric #define LCALL5(A)                                                              \
7221db9f3b2SDimitry Andric   LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
7231db9f3b2SDimitry Andric   switch (Opc) {
7241db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG:
7251db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
7261db9f3b2SDimitry Andric     const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
7271db9f3b2SDimitry Andric     return getOutlineAtomicHelper(LC, Ordering, MemSize);
7281db9f3b2SDimitry Andric   }
7291db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XCHG: {
7301db9f3b2SDimitry Andric     const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
7311db9f3b2SDimitry Andric     return getOutlineAtomicHelper(LC, Ordering, MemSize);
7321db9f3b2SDimitry Andric   }
7331db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_ADD:
7341db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_SUB: {
7351db9f3b2SDimitry Andric     const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
7361db9f3b2SDimitry Andric     return getOutlineAtomicHelper(LC, Ordering, MemSize);
7371db9f3b2SDimitry Andric   }
7381db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_AND: {
7391db9f3b2SDimitry Andric     const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
7401db9f3b2SDimitry Andric     return getOutlineAtomicHelper(LC, Ordering, MemSize);
7411db9f3b2SDimitry Andric   }
7421db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_OR: {
7431db9f3b2SDimitry Andric     const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
7441db9f3b2SDimitry Andric     return getOutlineAtomicHelper(LC, Ordering, MemSize);
7451db9f3b2SDimitry Andric   }
7461db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XOR: {
7471db9f3b2SDimitry Andric     const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
7481db9f3b2SDimitry Andric     return getOutlineAtomicHelper(LC, Ordering, MemSize);
7491db9f3b2SDimitry Andric   }
7501db9f3b2SDimitry Andric   default:
7511db9f3b2SDimitry Andric     return RTLIB::UNKNOWN_LIBCALL;
7521db9f3b2SDimitry Andric   }
7531db9f3b2SDimitry Andric #undef LCALLS
7541db9f3b2SDimitry Andric #undef LCALL5
7551db9f3b2SDimitry Andric }
7561db9f3b2SDimitry Andric 
7571db9f3b2SDimitry Andric static LegalizerHelper::LegalizeResult
7581db9f3b2SDimitry Andric createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI) {
7591db9f3b2SDimitry Andric   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
7601db9f3b2SDimitry Andric 
7611db9f3b2SDimitry Andric   Type *RetTy;
7621db9f3b2SDimitry Andric   SmallVector<Register> RetRegs;
7631db9f3b2SDimitry Andric   SmallVector<CallLowering::ArgInfo, 3> Args;
7641db9f3b2SDimitry Andric   unsigned Opc = MI.getOpcode();
7651db9f3b2SDimitry Andric   switch (Opc) {
7661db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG:
7671db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
7681db9f3b2SDimitry Andric     Register Success;
7691db9f3b2SDimitry Andric     LLT SuccessLLT;
7701db9f3b2SDimitry Andric     auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
7711db9f3b2SDimitry Andric         MI.getFirst4RegLLTs();
7721db9f3b2SDimitry Andric     RetRegs.push_back(Ret);
7731db9f3b2SDimitry Andric     RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
7741db9f3b2SDimitry Andric     if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
7751db9f3b2SDimitry Andric       std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
7761db9f3b2SDimitry Andric                NewLLT) = MI.getFirst5RegLLTs();
7771db9f3b2SDimitry Andric       RetRegs.push_back(Success);
7781db9f3b2SDimitry Andric       RetTy = StructType::get(
7791db9f3b2SDimitry Andric           Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
7801db9f3b2SDimitry Andric     }
7811db9f3b2SDimitry Andric     Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
7821db9f3b2SDimitry Andric     Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
7831db9f3b2SDimitry Andric     Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
7841db9f3b2SDimitry Andric     break;
7851db9f3b2SDimitry Andric   }
7861db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XCHG:
7871db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_ADD:
7881db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_SUB:
7891db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_AND:
7901db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_OR:
7911db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XOR: {
7921db9f3b2SDimitry Andric     auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
7931db9f3b2SDimitry Andric     RetRegs.push_back(Ret);
7941db9f3b2SDimitry Andric     RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
7951db9f3b2SDimitry Andric     if (Opc == TargetOpcode::G_ATOMICRMW_AND)
7961db9f3b2SDimitry Andric       Val =
7971db9f3b2SDimitry Andric           MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
7981db9f3b2SDimitry Andric               .getReg(0);
7991db9f3b2SDimitry Andric     else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
8001db9f3b2SDimitry Andric       Val =
8011db9f3b2SDimitry Andric           MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
8021db9f3b2SDimitry Andric               .getReg(0);
8031db9f3b2SDimitry Andric     Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
8041db9f3b2SDimitry Andric     Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
8051db9f3b2SDimitry Andric     break;
8061db9f3b2SDimitry Andric   }
8071db9f3b2SDimitry Andric   default:
8081db9f3b2SDimitry Andric     llvm_unreachable("unsupported opcode");
8091db9f3b2SDimitry Andric   }
8101db9f3b2SDimitry Andric 
8111db9f3b2SDimitry Andric   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
8121db9f3b2SDimitry Andric   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
8131db9f3b2SDimitry Andric   RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
8141db9f3b2SDimitry Andric   const char *Name = TLI.getLibcallName(RTLibcall);
8151db9f3b2SDimitry Andric 
8161db9f3b2SDimitry Andric   // Unsupported libcall on the target.
8171db9f3b2SDimitry Andric   if (!Name) {
8181db9f3b2SDimitry Andric     LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
8191db9f3b2SDimitry Andric                       << MIRBuilder.getTII().getName(Opc) << "\n");
8201db9f3b2SDimitry Andric     return LegalizerHelper::UnableToLegalize;
8211db9f3b2SDimitry Andric   }
8221db9f3b2SDimitry Andric 
8231db9f3b2SDimitry Andric   CallLowering::CallLoweringInfo Info;
8241db9f3b2SDimitry Andric   Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
8251db9f3b2SDimitry Andric   Info.Callee = MachineOperand::CreateES(Name);
8261db9f3b2SDimitry Andric   Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
8271db9f3b2SDimitry Andric 
8281db9f3b2SDimitry Andric   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
8291db9f3b2SDimitry Andric   if (!CLI.lowerCall(MIRBuilder, Info))
8301db9f3b2SDimitry Andric     return LegalizerHelper::UnableToLegalize;
8311db9f3b2SDimitry Andric 
8321db9f3b2SDimitry Andric   return LegalizerHelper::Legalized;
8331db9f3b2SDimitry Andric }
8341db9f3b2SDimitry Andric 
8350b57cec5SDimitry Andric static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
8360b57cec5SDimitry Andric                                        Type *FromType) {
8370b57cec5SDimitry Andric   auto ToMVT = MVT::getVT(ToType);
8380b57cec5SDimitry Andric   auto FromMVT = MVT::getVT(FromType);
8390b57cec5SDimitry Andric 
8400b57cec5SDimitry Andric   switch (Opcode) {
8410b57cec5SDimitry Andric   case TargetOpcode::G_FPEXT:
8420b57cec5SDimitry Andric     return RTLIB::getFPEXT(FromMVT, ToMVT);
8430b57cec5SDimitry Andric   case TargetOpcode::G_FPTRUNC:
8440b57cec5SDimitry Andric     return RTLIB::getFPROUND(FromMVT, ToMVT);
8450b57cec5SDimitry Andric   case TargetOpcode::G_FPTOSI:
8460b57cec5SDimitry Andric     return RTLIB::getFPTOSINT(FromMVT, ToMVT);
8470b57cec5SDimitry Andric   case TargetOpcode::G_FPTOUI:
8480b57cec5SDimitry Andric     return RTLIB::getFPTOUINT(FromMVT, ToMVT);
8490b57cec5SDimitry Andric   case TargetOpcode::G_SITOFP:
8500b57cec5SDimitry Andric     return RTLIB::getSINTTOFP(FromMVT, ToMVT);
8510b57cec5SDimitry Andric   case TargetOpcode::G_UITOFP:
8520b57cec5SDimitry Andric     return RTLIB::getUINTTOFP(FromMVT, ToMVT);
8530b57cec5SDimitry Andric   }
8540b57cec5SDimitry Andric   llvm_unreachable("Unsupported libcall function");
8550b57cec5SDimitry Andric }
8560b57cec5SDimitry Andric 
8570b57cec5SDimitry Andric static LegalizerHelper::LegalizeResult
8580b57cec5SDimitry Andric conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
8591db9f3b2SDimitry Andric                   Type *FromType, LostDebugLocObserver &LocObserver) {
8600b57cec5SDimitry Andric   RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
8611db9f3b2SDimitry Andric   return createLibcall(
8621db9f3b2SDimitry Andric       MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType, 0},
8631db9f3b2SDimitry Andric       {{MI.getOperand(1).getReg(), FromType, 0}}, LocObserver, &MI);
8640b57cec5SDimitry Andric }
8650b57cec5SDimitry Andric 
8665f757f3fSDimitry Andric static RTLIB::Libcall
8675f757f3fSDimitry Andric getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) {
8685f757f3fSDimitry Andric   RTLIB::Libcall RTLibcall;
8695f757f3fSDimitry Andric   switch (MI.getOpcode()) {
870297eecfbSDimitry Andric   case TargetOpcode::G_GET_FPENV:
871297eecfbSDimitry Andric     RTLibcall = RTLIB::FEGETENV;
872297eecfbSDimitry Andric     break;
873297eecfbSDimitry Andric   case TargetOpcode::G_SET_FPENV:
874297eecfbSDimitry Andric   case TargetOpcode::G_RESET_FPENV:
875297eecfbSDimitry Andric     RTLibcall = RTLIB::FESETENV;
876297eecfbSDimitry Andric     break;
8775f757f3fSDimitry Andric   case TargetOpcode::G_GET_FPMODE:
8785f757f3fSDimitry Andric     RTLibcall = RTLIB::FEGETMODE;
8795f757f3fSDimitry Andric     break;
8805f757f3fSDimitry Andric   case TargetOpcode::G_SET_FPMODE:
8815f757f3fSDimitry Andric   case TargetOpcode::G_RESET_FPMODE:
8825f757f3fSDimitry Andric     RTLibcall = RTLIB::FESETMODE;
8835f757f3fSDimitry Andric     break;
8845f757f3fSDimitry Andric   default:
8855f757f3fSDimitry Andric     llvm_unreachable("Unexpected opcode");
8865f757f3fSDimitry Andric   }
8875f757f3fSDimitry Andric   return RTLibcall;
8885f757f3fSDimitry Andric }
8895f757f3fSDimitry Andric 
8905f757f3fSDimitry Andric // Some library functions that read FP state (fegetmode, fegetenv) write the
8915f757f3fSDimitry Andric // state into a region in memory. IR intrinsics that do the same operations
8925f757f3fSDimitry Andric // (get_fpmode, get_fpenv) return the state as integer value. To implement these
8935f757f3fSDimitry Andric // intrinsics via the library functions, we need to use temporary variable,
8945f757f3fSDimitry Andric // for example:
8955f757f3fSDimitry Andric //
8965f757f3fSDimitry Andric //     %0:_(s32) = G_GET_FPMODE
8975f757f3fSDimitry Andric //
8985f757f3fSDimitry Andric // is transformed to:
8995f757f3fSDimitry Andric //
9005f757f3fSDimitry Andric //     %1:_(p0) = G_FRAME_INDEX %stack.0
9015f757f3fSDimitry Andric //     BL &fegetmode
9025f757f3fSDimitry Andric //     %0:_(s32) = G_LOAD % 1
9035f757f3fSDimitry Andric //
9045f757f3fSDimitry Andric LegalizerHelper::LegalizeResult
9055f757f3fSDimitry Andric LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
9061db9f3b2SDimitry Andric                                        MachineInstr &MI,
9071db9f3b2SDimitry Andric                                        LostDebugLocObserver &LocObserver) {
9085f757f3fSDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
9095f757f3fSDimitry Andric   auto &MF = MIRBuilder.getMF();
9105f757f3fSDimitry Andric   auto &MRI = *MIRBuilder.getMRI();
9115f757f3fSDimitry Andric   auto &Ctx = MF.getFunction().getContext();
9125f757f3fSDimitry Andric 
9135f757f3fSDimitry Andric   // Create temporary, where library function will put the read state.
9145f757f3fSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
9155f757f3fSDimitry Andric   LLT StateTy = MRI.getType(Dst);
9165f757f3fSDimitry Andric   TypeSize StateSize = StateTy.getSizeInBytes();
9175f757f3fSDimitry Andric   Align TempAlign = getStackTemporaryAlignment(StateTy);
9185f757f3fSDimitry Andric   MachinePointerInfo TempPtrInfo;
9195f757f3fSDimitry Andric   auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
9205f757f3fSDimitry Andric 
9215f757f3fSDimitry Andric   // Create a call to library function, with the temporary as an argument.
9225f757f3fSDimitry Andric   unsigned TempAddrSpace = DL.getAllocaAddrSpace();
9235f757f3fSDimitry Andric   Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
9245f757f3fSDimitry Andric   RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
9255f757f3fSDimitry Andric   auto Res =
9265f757f3fSDimitry Andric       createLibcall(MIRBuilder, RTLibcall,
9275f757f3fSDimitry Andric                     CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
9281db9f3b2SDimitry Andric                     CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
9291db9f3b2SDimitry Andric                     LocObserver, nullptr);
9305f757f3fSDimitry Andric   if (Res != LegalizerHelper::Legalized)
9315f757f3fSDimitry Andric     return Res;
9325f757f3fSDimitry Andric 
9335f757f3fSDimitry Andric   // Create a load from the temporary.
9345f757f3fSDimitry Andric   MachineMemOperand *MMO = MF.getMachineMemOperand(
9355f757f3fSDimitry Andric       TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
9365f757f3fSDimitry Andric   MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
9375f757f3fSDimitry Andric 
9385f757f3fSDimitry Andric   return LegalizerHelper::Legalized;
9395f757f3fSDimitry Andric }
9405f757f3fSDimitry Andric 
9415f757f3fSDimitry Andric // Similar to `createGetStateLibcall` the function calls a library function
9425f757f3fSDimitry Andric // using transient space in stack. In this case the library function reads
9435f757f3fSDimitry Andric // content of memory region.
9445f757f3fSDimitry Andric LegalizerHelper::LegalizeResult
9455f757f3fSDimitry Andric LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
9461db9f3b2SDimitry Andric                                        MachineInstr &MI,
9471db9f3b2SDimitry Andric                                        LostDebugLocObserver &LocObserver) {
9485f757f3fSDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
9495f757f3fSDimitry Andric   auto &MF = MIRBuilder.getMF();
9505f757f3fSDimitry Andric   auto &MRI = *MIRBuilder.getMRI();
9515f757f3fSDimitry Andric   auto &Ctx = MF.getFunction().getContext();
9525f757f3fSDimitry Andric 
9535f757f3fSDimitry Andric   // Create temporary, where library function will get the new state.
9545f757f3fSDimitry Andric   Register Src = MI.getOperand(0).getReg();
9555f757f3fSDimitry Andric   LLT StateTy = MRI.getType(Src);
9565f757f3fSDimitry Andric   TypeSize StateSize = StateTy.getSizeInBytes();
9575f757f3fSDimitry Andric   Align TempAlign = getStackTemporaryAlignment(StateTy);
9585f757f3fSDimitry Andric   MachinePointerInfo TempPtrInfo;
9595f757f3fSDimitry Andric   auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
9605f757f3fSDimitry Andric 
9615f757f3fSDimitry Andric   // Put the new state into the temporary.
9625f757f3fSDimitry Andric   MachineMemOperand *MMO = MF.getMachineMemOperand(
9635f757f3fSDimitry Andric       TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
9645f757f3fSDimitry Andric   MIRBuilder.buildStore(Src, Temp, *MMO);
9655f757f3fSDimitry Andric 
9665f757f3fSDimitry Andric   // Create a call to library function, with the temporary as an argument.
9675f757f3fSDimitry Andric   unsigned TempAddrSpace = DL.getAllocaAddrSpace();
9685f757f3fSDimitry Andric   Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
9695f757f3fSDimitry Andric   RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
9705f757f3fSDimitry Andric   return createLibcall(MIRBuilder, RTLibcall,
9715f757f3fSDimitry Andric                        CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
9721db9f3b2SDimitry Andric                        CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
9731db9f3b2SDimitry Andric                        LocObserver, nullptr);
9745f757f3fSDimitry Andric }
9755f757f3fSDimitry Andric 
9765f757f3fSDimitry Andric // The function is used to legalize operations that set default environment
9775f757f3fSDimitry Andric // state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
9785f757f3fSDimitry Andric // On most targets supported in glibc FE_DFL_MODE is defined as
9795f757f3fSDimitry Andric // `((const femode_t *) -1)`. Such assumption is used here. If for some target
9805f757f3fSDimitry Andric // it is not true, the target must provide custom lowering.
9815f757f3fSDimitry Andric LegalizerHelper::LegalizeResult
9825f757f3fSDimitry Andric LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
9831db9f3b2SDimitry Andric                                          MachineInstr &MI,
9841db9f3b2SDimitry Andric                                          LostDebugLocObserver &LocObserver) {
9855f757f3fSDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
9865f757f3fSDimitry Andric   auto &MF = MIRBuilder.getMF();
9875f757f3fSDimitry Andric   auto &Ctx = MF.getFunction().getContext();
9885f757f3fSDimitry Andric 
9895f757f3fSDimitry Andric   // Create an argument for the library function.
9905f757f3fSDimitry Andric   unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
9915f757f3fSDimitry Andric   Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
9925f757f3fSDimitry Andric   unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
9935f757f3fSDimitry Andric   LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
9945f757f3fSDimitry Andric   auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
9955f757f3fSDimitry Andric   DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
9965f757f3fSDimitry Andric   MIRBuilder.buildIntToPtr(Dest, DefValue);
9975f757f3fSDimitry Andric 
9985f757f3fSDimitry Andric   RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
9995f757f3fSDimitry Andric   return createLibcall(MIRBuilder, RTLibcall,
10005f757f3fSDimitry Andric                        CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
10011db9f3b2SDimitry Andric                        CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
10021db9f3b2SDimitry Andric                        LocObserver, &MI);
10035f757f3fSDimitry Andric }
10045f757f3fSDimitry Andric 
10050b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
1006fe6060f1SDimitry Andric LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
10070b57cec5SDimitry Andric   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
10080b57cec5SDimitry Andric 
10090b57cec5SDimitry Andric   switch (MI.getOpcode()) {
10100b57cec5SDimitry Andric   default:
10110b57cec5SDimitry Andric     return UnableToLegalize;
1012bdd1243dSDimitry Andric   case TargetOpcode::G_MUL:
10130b57cec5SDimitry Andric   case TargetOpcode::G_SDIV:
10140b57cec5SDimitry Andric   case TargetOpcode::G_UDIV:
10150b57cec5SDimitry Andric   case TargetOpcode::G_SREM:
10160b57cec5SDimitry Andric   case TargetOpcode::G_UREM:
10170b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
10185f757f3fSDimitry Andric     LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
10195f757f3fSDimitry Andric     unsigned Size = LLTy.getSizeInBits();
10200b57cec5SDimitry Andric     Type *HLTy = IntegerType::get(Ctx, Size);
10211db9f3b2SDimitry Andric     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
10220b57cec5SDimitry Andric     if (Status != Legalized)
10230b57cec5SDimitry Andric       return Status;
10240b57cec5SDimitry Andric     break;
10250b57cec5SDimitry Andric   }
10260b57cec5SDimitry Andric   case TargetOpcode::G_FADD:
10270b57cec5SDimitry Andric   case TargetOpcode::G_FSUB:
10280b57cec5SDimitry Andric   case TargetOpcode::G_FMUL:
10290b57cec5SDimitry Andric   case TargetOpcode::G_FDIV:
10300b57cec5SDimitry Andric   case TargetOpcode::G_FMA:
10310b57cec5SDimitry Andric   case TargetOpcode::G_FPOW:
10320b57cec5SDimitry Andric   case TargetOpcode::G_FREM:
10330b57cec5SDimitry Andric   case TargetOpcode::G_FCOS:
10340b57cec5SDimitry Andric   case TargetOpcode::G_FSIN:
10350b57cec5SDimitry Andric   case TargetOpcode::G_FLOG10:
10360b57cec5SDimitry Andric   case TargetOpcode::G_FLOG:
10370b57cec5SDimitry Andric   case TargetOpcode::G_FLOG2:
103806c3fb27SDimitry Andric   case TargetOpcode::G_FLDEXP:
10390b57cec5SDimitry Andric   case TargetOpcode::G_FEXP:
10400b57cec5SDimitry Andric   case TargetOpcode::G_FEXP2:
10415f757f3fSDimitry Andric   case TargetOpcode::G_FEXP10:
10420b57cec5SDimitry Andric   case TargetOpcode::G_FCEIL:
10435ffd83dbSDimitry Andric   case TargetOpcode::G_FFLOOR:
10445ffd83dbSDimitry Andric   case TargetOpcode::G_FMINNUM:
10455ffd83dbSDimitry Andric   case TargetOpcode::G_FMAXNUM:
10465ffd83dbSDimitry Andric   case TargetOpcode::G_FSQRT:
10475ffd83dbSDimitry Andric   case TargetOpcode::G_FRINT:
1048e8d8bef9SDimitry Andric   case TargetOpcode::G_FNEARBYINT:
1049e8d8bef9SDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
10505f757f3fSDimitry Andric     LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
10515f757f3fSDimitry Andric     unsigned Size = LLTy.getSizeInBits();
10525ffd83dbSDimitry Andric     Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1053e8d8bef9SDimitry Andric     if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1054e8d8bef9SDimitry Andric       LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
10550b57cec5SDimitry Andric       return UnableToLegalize;
10560b57cec5SDimitry Andric     }
10571db9f3b2SDimitry Andric     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
10581db9f3b2SDimitry Andric     if (Status != Legalized)
10591db9f3b2SDimitry Andric       return Status;
10601db9f3b2SDimitry Andric     break;
10611db9f3b2SDimitry Andric   }
10621db9f3b2SDimitry Andric   case TargetOpcode::G_FPOWI: {
10631db9f3b2SDimitry Andric     LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
10641db9f3b2SDimitry Andric     unsigned Size = LLTy.getSizeInBits();
10651db9f3b2SDimitry Andric     Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
10661db9f3b2SDimitry Andric     Type *ITy = IntegerType::get(
10671db9f3b2SDimitry Andric         Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
10681db9f3b2SDimitry Andric     if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
10691db9f3b2SDimitry Andric       LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
10701db9f3b2SDimitry Andric       return UnableToLegalize;
10711db9f3b2SDimitry Andric     }
10721db9f3b2SDimitry Andric     auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
10731db9f3b2SDimitry Andric     std::initializer_list<CallLowering::ArgInfo> Args = {
10741db9f3b2SDimitry Andric         {MI.getOperand(1).getReg(), HLTy, 0},
10751db9f3b2SDimitry Andric         {MI.getOperand(2).getReg(), ITy, 1}};
10761db9f3b2SDimitry Andric     LegalizeResult Status =
10771db9f3b2SDimitry Andric         createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
10781db9f3b2SDimitry Andric                       Args, LocObserver, &MI);
10790b57cec5SDimitry Andric     if (Status != Legalized)
10800b57cec5SDimitry Andric       return Status;
10810b57cec5SDimitry Andric     break;
10820b57cec5SDimitry Andric   }
10835ffd83dbSDimitry Andric   case TargetOpcode::G_FPEXT:
10840b57cec5SDimitry Andric   case TargetOpcode::G_FPTRUNC: {
10855ffd83dbSDimitry Andric     Type *FromTy = getFloatTypeForLLT(Ctx,  MRI.getType(MI.getOperand(1).getReg()));
10865ffd83dbSDimitry Andric     Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
10875ffd83dbSDimitry Andric     if (!FromTy || !ToTy)
10880b57cec5SDimitry Andric       return UnableToLegalize;
10891db9f3b2SDimitry Andric     LegalizeResult Status =
10901db9f3b2SDimitry Andric         conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver);
10910b57cec5SDimitry Andric     if (Status != Legalized)
10920b57cec5SDimitry Andric       return Status;
10930b57cec5SDimitry Andric     break;
10940b57cec5SDimitry Andric   }
10950b57cec5SDimitry Andric   case TargetOpcode::G_FPTOSI:
10960b57cec5SDimitry Andric   case TargetOpcode::G_FPTOUI: {
10970b57cec5SDimitry Andric     // FIXME: Support other types
10980b57cec5SDimitry Andric     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
10990b57cec5SDimitry Andric     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
11000b57cec5SDimitry Andric     if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
11010b57cec5SDimitry Andric       return UnableToLegalize;
11020b57cec5SDimitry Andric     LegalizeResult Status = conversionLibcall(
11030b57cec5SDimitry Andric         MI, MIRBuilder,
11040b57cec5SDimitry Andric         ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
11051db9f3b2SDimitry Andric         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
11061db9f3b2SDimitry Andric         LocObserver);
11070b57cec5SDimitry Andric     if (Status != Legalized)
11080b57cec5SDimitry Andric       return Status;
11090b57cec5SDimitry Andric     break;
11100b57cec5SDimitry Andric   }
11110b57cec5SDimitry Andric   case TargetOpcode::G_SITOFP:
11120b57cec5SDimitry Andric   case TargetOpcode::G_UITOFP: {
11130b57cec5SDimitry Andric     // FIXME: Support other types
11140b57cec5SDimitry Andric     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
11150b57cec5SDimitry Andric     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
11160b57cec5SDimitry Andric     if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
11170b57cec5SDimitry Andric       return UnableToLegalize;
11180b57cec5SDimitry Andric     LegalizeResult Status = conversionLibcall(
11190b57cec5SDimitry Andric         MI, MIRBuilder,
11200b57cec5SDimitry Andric         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
11211db9f3b2SDimitry Andric         FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
11221db9f3b2SDimitry Andric         LocObserver);
11231db9f3b2SDimitry Andric     if (Status != Legalized)
11241db9f3b2SDimitry Andric       return Status;
11251db9f3b2SDimitry Andric     break;
11261db9f3b2SDimitry Andric   }
11271db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XCHG:
11281db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_ADD:
11291db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_SUB:
11301db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_AND:
11311db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_OR:
11321db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XOR:
11331db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG:
11341db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
11351db9f3b2SDimitry Andric     auto Status = createAtomicLibcall(MIRBuilder, MI);
11360b57cec5SDimitry Andric     if (Status != Legalized)
11370b57cec5SDimitry Andric       return Status;
11380b57cec5SDimitry Andric     break;
11390b57cec5SDimitry Andric   }
1140fe6060f1SDimitry Andric   case TargetOpcode::G_BZERO:
1141e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMCPY:
1142e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMMOVE:
1143e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMSET: {
1144fe6060f1SDimitry Andric     LegalizeResult Result =
1145fe6060f1SDimitry Andric         createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1146fe6060f1SDimitry Andric     if (Result != Legalized)
1147fe6060f1SDimitry Andric       return Result;
1148e8d8bef9SDimitry Andric     MI.eraseFromParent();
1149e8d8bef9SDimitry Andric     return Result;
1150e8d8bef9SDimitry Andric   }
1151297eecfbSDimitry Andric   case TargetOpcode::G_GET_FPENV:
11525f757f3fSDimitry Andric   case TargetOpcode::G_GET_FPMODE: {
11531db9f3b2SDimitry Andric     LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
11545f757f3fSDimitry Andric     if (Result != Legalized)
11555f757f3fSDimitry Andric       return Result;
11565f757f3fSDimitry Andric     break;
11575f757f3fSDimitry Andric   }
1158297eecfbSDimitry Andric   case TargetOpcode::G_SET_FPENV:
11595f757f3fSDimitry Andric   case TargetOpcode::G_SET_FPMODE: {
11601db9f3b2SDimitry Andric     LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
11615f757f3fSDimitry Andric     if (Result != Legalized)
11625f757f3fSDimitry Andric       return Result;
11635f757f3fSDimitry Andric     break;
11645f757f3fSDimitry Andric   }
1165297eecfbSDimitry Andric   case TargetOpcode::G_RESET_FPENV:
11665f757f3fSDimitry Andric   case TargetOpcode::G_RESET_FPMODE: {
11671db9f3b2SDimitry Andric     LegalizeResult Result =
11681db9f3b2SDimitry Andric         createResetStateLibcall(MIRBuilder, MI, LocObserver);
11695f757f3fSDimitry Andric     if (Result != Legalized)
11705f757f3fSDimitry Andric       return Result;
11715f757f3fSDimitry Andric     break;
11725f757f3fSDimitry Andric   }
11730b57cec5SDimitry Andric   }
11740b57cec5SDimitry Andric 
11750b57cec5SDimitry Andric   MI.eraseFromParent();
11760b57cec5SDimitry Andric   return Legalized;
11770b57cec5SDimitry Andric }
11780b57cec5SDimitry Andric 
11790b57cec5SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
11800b57cec5SDimitry Andric                                                               unsigned TypeIdx,
11810b57cec5SDimitry Andric                                                               LLT NarrowTy) {
11820b57cec5SDimitry Andric   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
11830b57cec5SDimitry Andric   uint64_t NarrowSize = NarrowTy.getSizeInBits();
11840b57cec5SDimitry Andric 
11850b57cec5SDimitry Andric   switch (MI.getOpcode()) {
11860b57cec5SDimitry Andric   default:
11870b57cec5SDimitry Andric     return UnableToLegalize;
11880b57cec5SDimitry Andric   case TargetOpcode::G_IMPLICIT_DEF: {
11895ffd83dbSDimitry Andric     Register DstReg = MI.getOperand(0).getReg();
11905ffd83dbSDimitry Andric     LLT DstTy = MRI.getType(DstReg);
11915ffd83dbSDimitry Andric 
11925ffd83dbSDimitry Andric     // If SizeOp0 is not an exact multiple of NarrowSize, emit
11935ffd83dbSDimitry Andric     // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
11945ffd83dbSDimitry Andric     // FIXME: Although this would also be legal for the general case, it causes
11955ffd83dbSDimitry Andric     //  a lot of regressions in the emitted code (superfluous COPYs, artifact
11965ffd83dbSDimitry Andric     //  combines not being hit). This seems to be a problem related to the
11975ffd83dbSDimitry Andric     //  artifact combiner.
11985ffd83dbSDimitry Andric     if (SizeOp0 % NarrowSize != 0) {
11995ffd83dbSDimitry Andric       LLT ImplicitTy = NarrowTy;
12005ffd83dbSDimitry Andric       if (DstTy.isVector())
1201fe6060f1SDimitry Andric         ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
12025ffd83dbSDimitry Andric 
12035ffd83dbSDimitry Andric       Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
12045ffd83dbSDimitry Andric       MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
12055ffd83dbSDimitry Andric 
12065ffd83dbSDimitry Andric       MI.eraseFromParent();
12075ffd83dbSDimitry Andric       return Legalized;
12085ffd83dbSDimitry Andric     }
12095ffd83dbSDimitry Andric 
12100b57cec5SDimitry Andric     int NumParts = SizeOp0 / NarrowSize;
12110b57cec5SDimitry Andric 
12120b57cec5SDimitry Andric     SmallVector<Register, 2> DstRegs;
12130b57cec5SDimitry Andric     for (int i = 0; i < NumParts; ++i)
12145ffd83dbSDimitry Andric       DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
12150b57cec5SDimitry Andric 
12165ffd83dbSDimitry Andric     if (DstTy.isVector())
12170b57cec5SDimitry Andric       MIRBuilder.buildBuildVector(DstReg, DstRegs);
12180b57cec5SDimitry Andric     else
1219bdd1243dSDimitry Andric       MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
12200b57cec5SDimitry Andric     MI.eraseFromParent();
12210b57cec5SDimitry Andric     return Legalized;
12220b57cec5SDimitry Andric   }
12230b57cec5SDimitry Andric   case TargetOpcode::G_CONSTANT: {
12240b57cec5SDimitry Andric     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
12250b57cec5SDimitry Andric     const APInt &Val = MI.getOperand(1).getCImm()->getValue();
12260b57cec5SDimitry Andric     unsigned TotalSize = Ty.getSizeInBits();
12270b57cec5SDimitry Andric     unsigned NarrowSize = NarrowTy.getSizeInBits();
12280b57cec5SDimitry Andric     int NumParts = TotalSize / NarrowSize;
12290b57cec5SDimitry Andric 
12300b57cec5SDimitry Andric     SmallVector<Register, 4> PartRegs;
12310b57cec5SDimitry Andric     for (int I = 0; I != NumParts; ++I) {
12320b57cec5SDimitry Andric       unsigned Offset = I * NarrowSize;
12330b57cec5SDimitry Andric       auto K = MIRBuilder.buildConstant(NarrowTy,
12340b57cec5SDimitry Andric                                         Val.lshr(Offset).trunc(NarrowSize));
12350b57cec5SDimitry Andric       PartRegs.push_back(K.getReg(0));
12360b57cec5SDimitry Andric     }
12370b57cec5SDimitry Andric 
12380b57cec5SDimitry Andric     LLT LeftoverTy;
12390b57cec5SDimitry Andric     unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
12400b57cec5SDimitry Andric     SmallVector<Register, 1> LeftoverRegs;
12410b57cec5SDimitry Andric     if (LeftoverBits != 0) {
12420b57cec5SDimitry Andric       LeftoverTy = LLT::scalar(LeftoverBits);
12430b57cec5SDimitry Andric       auto K = MIRBuilder.buildConstant(
12440b57cec5SDimitry Andric         LeftoverTy,
12450b57cec5SDimitry Andric         Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
12460b57cec5SDimitry Andric       LeftoverRegs.push_back(K.getReg(0));
12470b57cec5SDimitry Andric     }
12480b57cec5SDimitry Andric 
12490b57cec5SDimitry Andric     insertParts(MI.getOperand(0).getReg(),
12500b57cec5SDimitry Andric                 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
12510b57cec5SDimitry Andric 
12520b57cec5SDimitry Andric     MI.eraseFromParent();
12530b57cec5SDimitry Andric     return Legalized;
12540b57cec5SDimitry Andric   }
12555ffd83dbSDimitry Andric   case TargetOpcode::G_SEXT:
12565ffd83dbSDimitry Andric   case TargetOpcode::G_ZEXT:
12575ffd83dbSDimitry Andric   case TargetOpcode::G_ANYEXT:
12585ffd83dbSDimitry Andric     return narrowScalarExt(MI, TypeIdx, NarrowTy);
12598bcb0991SDimitry Andric   case TargetOpcode::G_TRUNC: {
12608bcb0991SDimitry Andric     if (TypeIdx != 1)
12618bcb0991SDimitry Andric       return UnableToLegalize;
12628bcb0991SDimitry Andric 
12638bcb0991SDimitry Andric     uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
12648bcb0991SDimitry Andric     if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
12658bcb0991SDimitry Andric       LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
12668bcb0991SDimitry Andric       return UnableToLegalize;
12678bcb0991SDimitry Andric     }
12688bcb0991SDimitry Andric 
12695ffd83dbSDimitry Andric     auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
12705ffd83dbSDimitry Andric     MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
12718bcb0991SDimitry Andric     MI.eraseFromParent();
12728bcb0991SDimitry Andric     return Legalized;
12738bcb0991SDimitry Andric   }
12748bcb0991SDimitry Andric 
12750eae32dcSDimitry Andric   case TargetOpcode::G_FREEZE: {
12760eae32dcSDimitry Andric     if (TypeIdx != 0)
12770eae32dcSDimitry Andric       return UnableToLegalize;
12780eae32dcSDimitry Andric 
12790eae32dcSDimitry Andric     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
12800eae32dcSDimitry Andric     // Should widen scalar first
12810eae32dcSDimitry Andric     if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
12820eae32dcSDimitry Andric       return UnableToLegalize;
12830eae32dcSDimitry Andric 
12840eae32dcSDimitry Andric     auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
12850eae32dcSDimitry Andric     SmallVector<Register, 8> Parts;
12860eae32dcSDimitry Andric     for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
12870eae32dcSDimitry Andric       Parts.push_back(
12880eae32dcSDimitry Andric           MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
12890eae32dcSDimitry Andric     }
12900eae32dcSDimitry Andric 
1291bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
12920eae32dcSDimitry Andric     MI.eraseFromParent();
12930eae32dcSDimitry Andric     return Legalized;
12940eae32dcSDimitry Andric   }
1295fe6060f1SDimitry Andric   case TargetOpcode::G_ADD:
1296fe6060f1SDimitry Andric   case TargetOpcode::G_SUB:
1297fe6060f1SDimitry Andric   case TargetOpcode::G_SADDO:
1298fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBO:
1299fe6060f1SDimitry Andric   case TargetOpcode::G_SADDE:
1300fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBE:
1301fe6060f1SDimitry Andric   case TargetOpcode::G_UADDO:
1302fe6060f1SDimitry Andric   case TargetOpcode::G_USUBO:
1303fe6060f1SDimitry Andric   case TargetOpcode::G_UADDE:
1304fe6060f1SDimitry Andric   case TargetOpcode::G_USUBE:
1305fe6060f1SDimitry Andric     return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
13060b57cec5SDimitry Andric   case TargetOpcode::G_MUL:
13070b57cec5SDimitry Andric   case TargetOpcode::G_UMULH:
13080b57cec5SDimitry Andric     return narrowScalarMul(MI, NarrowTy);
13090b57cec5SDimitry Andric   case TargetOpcode::G_EXTRACT:
13100b57cec5SDimitry Andric     return narrowScalarExtract(MI, TypeIdx, NarrowTy);
13110b57cec5SDimitry Andric   case TargetOpcode::G_INSERT:
13120b57cec5SDimitry Andric     return narrowScalarInsert(MI, TypeIdx, NarrowTy);
13130b57cec5SDimitry Andric   case TargetOpcode::G_LOAD: {
1314fe6060f1SDimitry Andric     auto &LoadMI = cast<GLoad>(MI);
1315fe6060f1SDimitry Andric     Register DstReg = LoadMI.getDstReg();
13160b57cec5SDimitry Andric     LLT DstTy = MRI.getType(DstReg);
13170b57cec5SDimitry Andric     if (DstTy.isVector())
13180b57cec5SDimitry Andric       return UnableToLegalize;
13190b57cec5SDimitry Andric 
1320fe6060f1SDimitry Andric     if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
13210b57cec5SDimitry Andric       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1322fe6060f1SDimitry Andric       MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
13230b57cec5SDimitry Andric       MIRBuilder.buildAnyExt(DstReg, TmpReg);
1324fe6060f1SDimitry Andric       LoadMI.eraseFromParent();
13250b57cec5SDimitry Andric       return Legalized;
13260b57cec5SDimitry Andric     }
13270b57cec5SDimitry Andric 
1328fe6060f1SDimitry Andric     return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
13290b57cec5SDimitry Andric   }
13300b57cec5SDimitry Andric   case TargetOpcode::G_ZEXTLOAD:
13310b57cec5SDimitry Andric   case TargetOpcode::G_SEXTLOAD: {
1332fe6060f1SDimitry Andric     auto &LoadMI = cast<GExtLoad>(MI);
1333fe6060f1SDimitry Andric     Register DstReg = LoadMI.getDstReg();
1334fe6060f1SDimitry Andric     Register PtrReg = LoadMI.getPointerReg();
13350b57cec5SDimitry Andric 
13360b57cec5SDimitry Andric     Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1337fe6060f1SDimitry Andric     auto &MMO = LoadMI.getMMO();
1338e8d8bef9SDimitry Andric     unsigned MemSize = MMO.getSizeInBits();
1339e8d8bef9SDimitry Andric 
1340e8d8bef9SDimitry Andric     if (MemSize == NarrowSize) {
13410b57cec5SDimitry Andric       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1342e8d8bef9SDimitry Andric     } else if (MemSize < NarrowSize) {
1343fe6060f1SDimitry Andric       MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1344e8d8bef9SDimitry Andric     } else if (MemSize > NarrowSize) {
1345e8d8bef9SDimitry Andric       // FIXME: Need to split the load.
1346e8d8bef9SDimitry Andric       return UnableToLegalize;
13470b57cec5SDimitry Andric     }
13480b57cec5SDimitry Andric 
1349fe6060f1SDimitry Andric     if (isa<GZExtLoad>(LoadMI))
13500b57cec5SDimitry Andric       MIRBuilder.buildZExt(DstReg, TmpReg);
13510b57cec5SDimitry Andric     else
13520b57cec5SDimitry Andric       MIRBuilder.buildSExt(DstReg, TmpReg);
13530b57cec5SDimitry Andric 
1354fe6060f1SDimitry Andric     LoadMI.eraseFromParent();
13550b57cec5SDimitry Andric     return Legalized;
13560b57cec5SDimitry Andric   }
13570b57cec5SDimitry Andric   case TargetOpcode::G_STORE: {
1358fe6060f1SDimitry Andric     auto &StoreMI = cast<GStore>(MI);
13590b57cec5SDimitry Andric 
1360fe6060f1SDimitry Andric     Register SrcReg = StoreMI.getValueReg();
13610b57cec5SDimitry Andric     LLT SrcTy = MRI.getType(SrcReg);
13620b57cec5SDimitry Andric     if (SrcTy.isVector())
13630b57cec5SDimitry Andric       return UnableToLegalize;
13640b57cec5SDimitry Andric 
13650b57cec5SDimitry Andric     int NumParts = SizeOp0 / NarrowSize;
13660b57cec5SDimitry Andric     unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
13670b57cec5SDimitry Andric     unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
13680b57cec5SDimitry Andric     if (SrcTy.isVector() && LeftoverBits != 0)
13690b57cec5SDimitry Andric       return UnableToLegalize;
13700b57cec5SDimitry Andric 
1371fe6060f1SDimitry Andric     if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
13720b57cec5SDimitry Andric       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
13730b57cec5SDimitry Andric       MIRBuilder.buildTrunc(TmpReg, SrcReg);
1374fe6060f1SDimitry Andric       MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1375fe6060f1SDimitry Andric       StoreMI.eraseFromParent();
13760b57cec5SDimitry Andric       return Legalized;
13770b57cec5SDimitry Andric     }
13780b57cec5SDimitry Andric 
1379fe6060f1SDimitry Andric     return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
13800b57cec5SDimitry Andric   }
13810b57cec5SDimitry Andric   case TargetOpcode::G_SELECT:
13820b57cec5SDimitry Andric     return narrowScalarSelect(MI, TypeIdx, NarrowTy);
13830b57cec5SDimitry Andric   case TargetOpcode::G_AND:
13840b57cec5SDimitry Andric   case TargetOpcode::G_OR:
13850b57cec5SDimitry Andric   case TargetOpcode::G_XOR: {
13860b57cec5SDimitry Andric     // Legalize bitwise operation:
13870b57cec5SDimitry Andric     // A = BinOp<Ty> B, C
13880b57cec5SDimitry Andric     // into:
13890b57cec5SDimitry Andric     // B1, ..., BN = G_UNMERGE_VALUES B
13900b57cec5SDimitry Andric     // C1, ..., CN = G_UNMERGE_VALUES C
13910b57cec5SDimitry Andric     // A1 = BinOp<Ty/N> B1, C2
13920b57cec5SDimitry Andric     // ...
13930b57cec5SDimitry Andric     // AN = BinOp<Ty/N> BN, CN
13940b57cec5SDimitry Andric     // A = G_MERGE_VALUES A1, ..., AN
13950b57cec5SDimitry Andric     return narrowScalarBasic(MI, TypeIdx, NarrowTy);
13960b57cec5SDimitry Andric   }
13970b57cec5SDimitry Andric   case TargetOpcode::G_SHL:
13980b57cec5SDimitry Andric   case TargetOpcode::G_LSHR:
13990b57cec5SDimitry Andric   case TargetOpcode::G_ASHR:
14000b57cec5SDimitry Andric     return narrowScalarShift(MI, TypeIdx, NarrowTy);
14010b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ:
14020b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
14030b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ:
14040b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
14050b57cec5SDimitry Andric   case TargetOpcode::G_CTPOP:
14065ffd83dbSDimitry Andric     if (TypeIdx == 1)
14075ffd83dbSDimitry Andric       switch (MI.getOpcode()) {
14085ffd83dbSDimitry Andric       case TargetOpcode::G_CTLZ:
14095ffd83dbSDimitry Andric       case TargetOpcode::G_CTLZ_ZERO_UNDEF:
14105ffd83dbSDimitry Andric         return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
14115ffd83dbSDimitry Andric       case TargetOpcode::G_CTTZ:
14125ffd83dbSDimitry Andric       case TargetOpcode::G_CTTZ_ZERO_UNDEF:
14135ffd83dbSDimitry Andric         return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
14145ffd83dbSDimitry Andric       case TargetOpcode::G_CTPOP:
14155ffd83dbSDimitry Andric         return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
14165ffd83dbSDimitry Andric       default:
14175ffd83dbSDimitry Andric         return UnableToLegalize;
14185ffd83dbSDimitry Andric       }
14190b57cec5SDimitry Andric 
14200b57cec5SDimitry Andric     Observer.changingInstr(MI);
14210b57cec5SDimitry Andric     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
14220b57cec5SDimitry Andric     Observer.changedInstr(MI);
14230b57cec5SDimitry Andric     return Legalized;
14240b57cec5SDimitry Andric   case TargetOpcode::G_INTTOPTR:
14250b57cec5SDimitry Andric     if (TypeIdx != 1)
14260b57cec5SDimitry Andric       return UnableToLegalize;
14270b57cec5SDimitry Andric 
14280b57cec5SDimitry Andric     Observer.changingInstr(MI);
14290b57cec5SDimitry Andric     narrowScalarSrc(MI, NarrowTy, 1);
14300b57cec5SDimitry Andric     Observer.changedInstr(MI);
14310b57cec5SDimitry Andric     return Legalized;
14320b57cec5SDimitry Andric   case TargetOpcode::G_PTRTOINT:
14330b57cec5SDimitry Andric     if (TypeIdx != 0)
14340b57cec5SDimitry Andric       return UnableToLegalize;
14350b57cec5SDimitry Andric 
14360b57cec5SDimitry Andric     Observer.changingInstr(MI);
14370b57cec5SDimitry Andric     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
14380b57cec5SDimitry Andric     Observer.changedInstr(MI);
14390b57cec5SDimitry Andric     return Legalized;
14400b57cec5SDimitry Andric   case TargetOpcode::G_PHI: {
1441d409305fSDimitry Andric     // FIXME: add support for when SizeOp0 isn't an exact multiple of
1442d409305fSDimitry Andric     // NarrowSize.
1443d409305fSDimitry Andric     if (SizeOp0 % NarrowSize != 0)
1444d409305fSDimitry Andric       return UnableToLegalize;
1445d409305fSDimitry Andric 
14460b57cec5SDimitry Andric     unsigned NumParts = SizeOp0 / NarrowSize;
14475ffd83dbSDimitry Andric     SmallVector<Register, 2> DstRegs(NumParts);
14485ffd83dbSDimitry Andric     SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
14490b57cec5SDimitry Andric     Observer.changingInstr(MI);
14500b57cec5SDimitry Andric     for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
14510b57cec5SDimitry Andric       MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1452bdd1243dSDimitry Andric       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
14530b57cec5SDimitry Andric       extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
14547a6dacacSDimitry Andric                    SrcRegs[i / 2], MIRBuilder, MRI);
14550b57cec5SDimitry Andric     }
14560b57cec5SDimitry Andric     MachineBasicBlock &MBB = *MI.getParent();
14570b57cec5SDimitry Andric     MIRBuilder.setInsertPt(MBB, MI);
14580b57cec5SDimitry Andric     for (unsigned i = 0; i < NumParts; ++i) {
14590b57cec5SDimitry Andric       DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
14600b57cec5SDimitry Andric       MachineInstrBuilder MIB =
14610b57cec5SDimitry Andric           MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
14620b57cec5SDimitry Andric       for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
14630b57cec5SDimitry Andric         MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
14640b57cec5SDimitry Andric     }
14658bcb0991SDimitry Andric     MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1466bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
14670b57cec5SDimitry Andric     Observer.changedInstr(MI);
14680b57cec5SDimitry Andric     MI.eraseFromParent();
14690b57cec5SDimitry Andric     return Legalized;
14700b57cec5SDimitry Andric   }
14710b57cec5SDimitry Andric   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
14720b57cec5SDimitry Andric   case TargetOpcode::G_INSERT_VECTOR_ELT: {
14730b57cec5SDimitry Andric     if (TypeIdx != 2)
14740b57cec5SDimitry Andric       return UnableToLegalize;
14750b57cec5SDimitry Andric 
14760b57cec5SDimitry Andric     int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
14770b57cec5SDimitry Andric     Observer.changingInstr(MI);
14780b57cec5SDimitry Andric     narrowScalarSrc(MI, NarrowTy, OpIdx);
14790b57cec5SDimitry Andric     Observer.changedInstr(MI);
14800b57cec5SDimitry Andric     return Legalized;
14810b57cec5SDimitry Andric   }
14820b57cec5SDimitry Andric   case TargetOpcode::G_ICMP: {
1483fe6060f1SDimitry Andric     Register LHS = MI.getOperand(2).getReg();
1484fe6060f1SDimitry Andric     LLT SrcTy = MRI.getType(LHS);
1485fe6060f1SDimitry Andric     uint64_t SrcSize = SrcTy.getSizeInBits();
14860b57cec5SDimitry Andric     CmpInst::Predicate Pred =
14870b57cec5SDimitry Andric         static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
14880b57cec5SDimitry Andric 
1489fe6060f1SDimitry Andric     // TODO: Handle the non-equality case for weird sizes.
1490fe6060f1SDimitry Andric     if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1491fe6060f1SDimitry Andric       return UnableToLegalize;
1492fe6060f1SDimitry Andric 
1493fe6060f1SDimitry Andric     LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1494fe6060f1SDimitry Andric     SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1495fe6060f1SDimitry Andric     if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
14967a6dacacSDimitry Andric                       LHSLeftoverRegs, MIRBuilder, MRI))
1497fe6060f1SDimitry Andric       return UnableToLegalize;
1498fe6060f1SDimitry Andric 
1499fe6060f1SDimitry Andric     LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1500fe6060f1SDimitry Andric     SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1501fe6060f1SDimitry Andric     if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
15027a6dacacSDimitry Andric                       RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1503fe6060f1SDimitry Andric       return UnableToLegalize;
1504fe6060f1SDimitry Andric 
1505fe6060f1SDimitry Andric     // We now have the LHS and RHS of the compare split into narrow-type
1506fe6060f1SDimitry Andric     // registers, plus potentially some leftover type.
1507fe6060f1SDimitry Andric     Register Dst = MI.getOperand(0).getReg();
1508fe6060f1SDimitry Andric     LLT ResTy = MRI.getType(Dst);
1509fe6060f1SDimitry Andric     if (ICmpInst::isEquality(Pred)) {
1510fe6060f1SDimitry Andric       // For each part on the LHS and RHS, keep track of the result of XOR-ing
1511fe6060f1SDimitry Andric       // them together. For each equal part, the result should be all 0s. For
1512fe6060f1SDimitry Andric       // each non-equal part, we'll get at least one 1.
1513fe6060f1SDimitry Andric       auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1514fe6060f1SDimitry Andric       SmallVector<Register, 4> Xors;
1515fe6060f1SDimitry Andric       for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1516fe6060f1SDimitry Andric         auto LHS = std::get<0>(LHSAndRHS);
1517fe6060f1SDimitry Andric         auto RHS = std::get<1>(LHSAndRHS);
1518fe6060f1SDimitry Andric         auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1519fe6060f1SDimitry Andric         Xors.push_back(Xor);
1520fe6060f1SDimitry Andric       }
1521fe6060f1SDimitry Andric 
1522fe6060f1SDimitry Andric       // Build a G_XOR for each leftover register. Each G_XOR must be widened
1523fe6060f1SDimitry Andric       // to the desired narrow type so that we can OR them together later.
1524fe6060f1SDimitry Andric       SmallVector<Register, 4> WidenedXors;
1525fe6060f1SDimitry Andric       for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1526fe6060f1SDimitry Andric         auto LHS = std::get<0>(LHSAndRHS);
1527fe6060f1SDimitry Andric         auto RHS = std::get<1>(LHSAndRHS);
1528fe6060f1SDimitry Andric         auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1529fe6060f1SDimitry Andric         LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1530fe6060f1SDimitry Andric         buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1531fe6060f1SDimitry Andric                             /* PadStrategy = */ TargetOpcode::G_ZEXT);
1532fe6060f1SDimitry Andric         Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1533fe6060f1SDimitry Andric       }
1534fe6060f1SDimitry Andric 
1535fe6060f1SDimitry Andric       // Now, for each part we broke up, we know if they are equal/not equal
1536fe6060f1SDimitry Andric       // based off the G_XOR. We can OR these all together and compare against
1537fe6060f1SDimitry Andric       // 0 to get the result.
1538fe6060f1SDimitry Andric       assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1539fe6060f1SDimitry Andric       auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1540fe6060f1SDimitry Andric       for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1541fe6060f1SDimitry Andric         Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1542fe6060f1SDimitry Andric       MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
15430b57cec5SDimitry Andric     } else {
1544fe6060f1SDimitry Andric       // TODO: Handle non-power-of-two types.
1545fe6060f1SDimitry Andric       assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1546fe6060f1SDimitry Andric       assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1547fe6060f1SDimitry Andric       Register LHSL = LHSPartRegs[0];
1548fe6060f1SDimitry Andric       Register LHSH = LHSPartRegs[1];
1549fe6060f1SDimitry Andric       Register RHSL = RHSPartRegs[0];
1550fe6060f1SDimitry Andric       Register RHSH = RHSPartRegs[1];
15518bcb0991SDimitry Andric       MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
15520b57cec5SDimitry Andric       MachineInstrBuilder CmpHEQ =
15538bcb0991SDimitry Andric           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
15540b57cec5SDimitry Andric       MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
15558bcb0991SDimitry Andric           ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1556fe6060f1SDimitry Andric       MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
15570b57cec5SDimitry Andric     }
15580b57cec5SDimitry Andric     MI.eraseFromParent();
15590b57cec5SDimitry Andric     return Legalized;
15600b57cec5SDimitry Andric   }
15618bcb0991SDimitry Andric   case TargetOpcode::G_SEXT_INREG: {
15628bcb0991SDimitry Andric     if (TypeIdx != 0)
15638bcb0991SDimitry Andric       return UnableToLegalize;
15648bcb0991SDimitry Andric 
15658bcb0991SDimitry Andric     int64_t SizeInBits = MI.getOperand(2).getImm();
15668bcb0991SDimitry Andric 
15678bcb0991SDimitry Andric     // So long as the new type has more bits than the bits we're extending we
15688bcb0991SDimitry Andric     // don't need to break it apart.
15695f757f3fSDimitry Andric     if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
15708bcb0991SDimitry Andric       Observer.changingInstr(MI);
15718bcb0991SDimitry Andric       // We don't lose any non-extension bits by truncating the src and
15728bcb0991SDimitry Andric       // sign-extending the dst.
15738bcb0991SDimitry Andric       MachineOperand &MO1 = MI.getOperand(1);
15745ffd83dbSDimitry Andric       auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
15755ffd83dbSDimitry Andric       MO1.setReg(TruncMIB.getReg(0));
15768bcb0991SDimitry Andric 
15778bcb0991SDimitry Andric       MachineOperand &MO2 = MI.getOperand(0);
15788bcb0991SDimitry Andric       Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
15798bcb0991SDimitry Andric       MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
15805ffd83dbSDimitry Andric       MIRBuilder.buildSExt(MO2, DstExt);
15818bcb0991SDimitry Andric       MO2.setReg(DstExt);
15828bcb0991SDimitry Andric       Observer.changedInstr(MI);
15838bcb0991SDimitry Andric       return Legalized;
15848bcb0991SDimitry Andric     }
15858bcb0991SDimitry Andric 
15868bcb0991SDimitry Andric     // Break it apart. Components below the extension point are unmodified. The
15878bcb0991SDimitry Andric     // component containing the extension point becomes a narrower SEXT_INREG.
15888bcb0991SDimitry Andric     // Components above it are ashr'd from the component containing the
15898bcb0991SDimitry Andric     // extension point.
15908bcb0991SDimitry Andric     if (SizeOp0 % NarrowSize != 0)
15918bcb0991SDimitry Andric       return UnableToLegalize;
15928bcb0991SDimitry Andric     int NumParts = SizeOp0 / NarrowSize;
15938bcb0991SDimitry Andric 
15948bcb0991SDimitry Andric     // List the registers where the destination will be scattered.
15958bcb0991SDimitry Andric     SmallVector<Register, 2> DstRegs;
15968bcb0991SDimitry Andric     // List the registers where the source will be split.
15978bcb0991SDimitry Andric     SmallVector<Register, 2> SrcRegs;
15988bcb0991SDimitry Andric 
15998bcb0991SDimitry Andric     // Create all the temporary registers.
16008bcb0991SDimitry Andric     for (int i = 0; i < NumParts; ++i) {
16018bcb0991SDimitry Andric       Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
16028bcb0991SDimitry Andric 
16038bcb0991SDimitry Andric       SrcRegs.push_back(SrcReg);
16048bcb0991SDimitry Andric     }
16058bcb0991SDimitry Andric 
16068bcb0991SDimitry Andric     // Explode the big arguments into smaller chunks.
16075ffd83dbSDimitry Andric     MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
16088bcb0991SDimitry Andric 
16098bcb0991SDimitry Andric     Register AshrCstReg =
16108bcb0991SDimitry Andric         MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
16115ffd83dbSDimitry Andric             .getReg(0);
16125f757f3fSDimitry Andric     Register FullExtensionReg;
16135f757f3fSDimitry Andric     Register PartialExtensionReg;
16148bcb0991SDimitry Andric 
16158bcb0991SDimitry Andric     // Do the operation on each small part.
16168bcb0991SDimitry Andric     for (int i = 0; i < NumParts; ++i) {
16175f757f3fSDimitry Andric       if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
16188bcb0991SDimitry Andric         DstRegs.push_back(SrcRegs[i]);
16195f757f3fSDimitry Andric         PartialExtensionReg = DstRegs.back();
16205f757f3fSDimitry Andric       } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
16218bcb0991SDimitry Andric         assert(PartialExtensionReg &&
16228bcb0991SDimitry Andric                "Expected to visit partial extension before full");
16238bcb0991SDimitry Andric         if (FullExtensionReg) {
16248bcb0991SDimitry Andric           DstRegs.push_back(FullExtensionReg);
16258bcb0991SDimitry Andric           continue;
16268bcb0991SDimitry Andric         }
16275ffd83dbSDimitry Andric         DstRegs.push_back(
16285ffd83dbSDimitry Andric             MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
16295ffd83dbSDimitry Andric                 .getReg(0));
16308bcb0991SDimitry Andric         FullExtensionReg = DstRegs.back();
16318bcb0991SDimitry Andric       } else {
16328bcb0991SDimitry Andric         DstRegs.push_back(
16338bcb0991SDimitry Andric             MIRBuilder
16348bcb0991SDimitry Andric                 .buildInstr(
16358bcb0991SDimitry Andric                     TargetOpcode::G_SEXT_INREG, {NarrowTy},
16368bcb0991SDimitry Andric                     {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
16375ffd83dbSDimitry Andric                 .getReg(0));
16388bcb0991SDimitry Andric         PartialExtensionReg = DstRegs.back();
16398bcb0991SDimitry Andric       }
16408bcb0991SDimitry Andric     }
16418bcb0991SDimitry Andric 
16428bcb0991SDimitry Andric     // Gather the destination registers into the final destination.
16438bcb0991SDimitry Andric     Register DstReg = MI.getOperand(0).getReg();
1644bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
16458bcb0991SDimitry Andric     MI.eraseFromParent();
16468bcb0991SDimitry Andric     return Legalized;
16478bcb0991SDimitry Andric   }
1648480093f4SDimitry Andric   case TargetOpcode::G_BSWAP:
1649480093f4SDimitry Andric   case TargetOpcode::G_BITREVERSE: {
1650480093f4SDimitry Andric     if (SizeOp0 % NarrowSize != 0)
1651480093f4SDimitry Andric       return UnableToLegalize;
1652480093f4SDimitry Andric 
1653480093f4SDimitry Andric     Observer.changingInstr(MI);
1654480093f4SDimitry Andric     SmallVector<Register, 2> SrcRegs, DstRegs;
1655480093f4SDimitry Andric     unsigned NumParts = SizeOp0 / NarrowSize;
16567a6dacacSDimitry Andric     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
16577a6dacacSDimitry Andric                  MIRBuilder, MRI);
1658480093f4SDimitry Andric 
1659480093f4SDimitry Andric     for (unsigned i = 0; i < NumParts; ++i) {
1660480093f4SDimitry Andric       auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1661480093f4SDimitry Andric                                            {SrcRegs[NumParts - 1 - i]});
1662480093f4SDimitry Andric       DstRegs.push_back(DstPart.getReg(0));
1663480093f4SDimitry Andric     }
1664480093f4SDimitry Andric 
1665bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1666480093f4SDimitry Andric 
1667480093f4SDimitry Andric     Observer.changedInstr(MI);
1668480093f4SDimitry Andric     MI.eraseFromParent();
1669480093f4SDimitry Andric     return Legalized;
1670480093f4SDimitry Andric   }
1671e8d8bef9SDimitry Andric   case TargetOpcode::G_PTR_ADD:
16725ffd83dbSDimitry Andric   case TargetOpcode::G_PTRMASK: {
16735ffd83dbSDimitry Andric     if (TypeIdx != 1)
16745ffd83dbSDimitry Andric       return UnableToLegalize;
16755ffd83dbSDimitry Andric     Observer.changingInstr(MI);
16765ffd83dbSDimitry Andric     narrowScalarSrc(MI, NarrowTy, 2);
16775ffd83dbSDimitry Andric     Observer.changedInstr(MI);
16785ffd83dbSDimitry Andric     return Legalized;
16790b57cec5SDimitry Andric   }
168023408297SDimitry Andric   case TargetOpcode::G_FPTOUI:
168123408297SDimitry Andric   case TargetOpcode::G_FPTOSI:
168223408297SDimitry Andric     return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1683e8d8bef9SDimitry Andric   case TargetOpcode::G_FPEXT:
1684e8d8bef9SDimitry Andric     if (TypeIdx != 0)
1685e8d8bef9SDimitry Andric       return UnableToLegalize;
1686e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
1687e8d8bef9SDimitry Andric     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1688e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
1689e8d8bef9SDimitry Andric     return Legalized;
169006c3fb27SDimitry Andric   case TargetOpcode::G_FLDEXP:
169106c3fb27SDimitry Andric   case TargetOpcode::G_STRICT_FLDEXP:
169206c3fb27SDimitry Andric     return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
16930b57cec5SDimitry Andric   }
16945ffd83dbSDimitry Andric }
16955ffd83dbSDimitry Andric 
16965ffd83dbSDimitry Andric Register LegalizerHelper::coerceToScalar(Register Val) {
16975ffd83dbSDimitry Andric   LLT Ty = MRI.getType(Val);
16985ffd83dbSDimitry Andric   if (Ty.isScalar())
16995ffd83dbSDimitry Andric     return Val;
17005ffd83dbSDimitry Andric 
17015ffd83dbSDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
17025ffd83dbSDimitry Andric   LLT NewTy = LLT::scalar(Ty.getSizeInBits());
17035ffd83dbSDimitry Andric   if (Ty.isPointer()) {
17045ffd83dbSDimitry Andric     if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
17055ffd83dbSDimitry Andric       return Register();
17065ffd83dbSDimitry Andric     return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
17075ffd83dbSDimitry Andric   }
17085ffd83dbSDimitry Andric 
17095ffd83dbSDimitry Andric   Register NewVal = Val;
17105ffd83dbSDimitry Andric 
17115ffd83dbSDimitry Andric   assert(Ty.isVector());
17125ffd83dbSDimitry Andric   LLT EltTy = Ty.getElementType();
17135ffd83dbSDimitry Andric   if (EltTy.isPointer())
17145ffd83dbSDimitry Andric     NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
17155ffd83dbSDimitry Andric   return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
17165ffd83dbSDimitry Andric }
17170b57cec5SDimitry Andric 
17180b57cec5SDimitry Andric void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
17190b57cec5SDimitry Andric                                      unsigned OpIdx, unsigned ExtOpcode) {
17200b57cec5SDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
17215ffd83dbSDimitry Andric   auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
17225ffd83dbSDimitry Andric   MO.setReg(ExtB.getReg(0));
17230b57cec5SDimitry Andric }
17240b57cec5SDimitry Andric 
17250b57cec5SDimitry Andric void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
17260b57cec5SDimitry Andric                                       unsigned OpIdx) {
17270b57cec5SDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
17285ffd83dbSDimitry Andric   auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
17295ffd83dbSDimitry Andric   MO.setReg(ExtB.getReg(0));
17300b57cec5SDimitry Andric }
17310b57cec5SDimitry Andric 
17320b57cec5SDimitry Andric void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
17330b57cec5SDimitry Andric                                      unsigned OpIdx, unsigned TruncOpcode) {
17340b57cec5SDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
17350b57cec5SDimitry Andric   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
17360b57cec5SDimitry Andric   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
17375ffd83dbSDimitry Andric   MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
17380b57cec5SDimitry Andric   MO.setReg(DstExt);
17390b57cec5SDimitry Andric }
17400b57cec5SDimitry Andric 
17410b57cec5SDimitry Andric void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
17420b57cec5SDimitry Andric                                       unsigned OpIdx, unsigned ExtOpcode) {
17430b57cec5SDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
17440b57cec5SDimitry Andric   Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
17450b57cec5SDimitry Andric   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
17465ffd83dbSDimitry Andric   MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
17470b57cec5SDimitry Andric   MO.setReg(DstTrunc);
17480b57cec5SDimitry Andric }
17490b57cec5SDimitry Andric 
17500b57cec5SDimitry Andric void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
17510b57cec5SDimitry Andric                                             unsigned OpIdx) {
17520b57cec5SDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
17530b57cec5SDimitry Andric   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
17540eae32dcSDimitry Andric   Register Dst = MO.getReg();
17550eae32dcSDimitry Andric   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
17560eae32dcSDimitry Andric   MO.setReg(DstExt);
17570eae32dcSDimitry Andric   MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
17580b57cec5SDimitry Andric }
17590b57cec5SDimitry Andric 
17600b57cec5SDimitry Andric void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
17610b57cec5SDimitry Andric                                             unsigned OpIdx) {
17620b57cec5SDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
17630eae32dcSDimitry Andric   SmallVector<Register, 8> Regs;
17640eae32dcSDimitry Andric   MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
17650b57cec5SDimitry Andric }
17660b57cec5SDimitry Andric 
17675ffd83dbSDimitry Andric void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
17685ffd83dbSDimitry Andric   MachineOperand &Op = MI.getOperand(OpIdx);
17695ffd83dbSDimitry Andric   Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
17705ffd83dbSDimitry Andric }
17715ffd83dbSDimitry Andric 
17725ffd83dbSDimitry Andric void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
17735ffd83dbSDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
17745ffd83dbSDimitry Andric   Register CastDst = MRI.createGenericVirtualRegister(CastTy);
17755ffd83dbSDimitry Andric   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
17765ffd83dbSDimitry Andric   MIRBuilder.buildBitcast(MO, CastDst);
17775ffd83dbSDimitry Andric   MO.setReg(CastDst);
17785ffd83dbSDimitry Andric }
17795ffd83dbSDimitry Andric 
17800b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
17810b57cec5SDimitry Andric LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
17820b57cec5SDimitry Andric                                         LLT WideTy) {
17830b57cec5SDimitry Andric   if (TypeIdx != 1)
17840b57cec5SDimitry Andric     return UnableToLegalize;
17850b57cec5SDimitry Andric 
178606c3fb27SDimitry Andric   auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
17870b57cec5SDimitry Andric   if (DstTy.isVector())
17880b57cec5SDimitry Andric     return UnableToLegalize;
17890b57cec5SDimitry Andric 
179006c3fb27SDimitry Andric   LLT SrcTy = MRI.getType(Src1Reg);
17910b57cec5SDimitry Andric   const int DstSize = DstTy.getSizeInBits();
17920b57cec5SDimitry Andric   const int SrcSize = SrcTy.getSizeInBits();
17930b57cec5SDimitry Andric   const int WideSize = WideTy.getSizeInBits();
17940b57cec5SDimitry Andric   const int NumMerge = (DstSize + WideSize - 1) / WideSize;
17950b57cec5SDimitry Andric 
17960b57cec5SDimitry Andric   unsigned NumOps = MI.getNumOperands();
17970b57cec5SDimitry Andric   unsigned NumSrc = MI.getNumOperands() - 1;
17980b57cec5SDimitry Andric   unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
17990b57cec5SDimitry Andric 
18000b57cec5SDimitry Andric   if (WideSize >= DstSize) {
18010b57cec5SDimitry Andric     // Directly pack the bits in the target type.
180206c3fb27SDimitry Andric     Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
18030b57cec5SDimitry Andric 
18040b57cec5SDimitry Andric     for (unsigned I = 2; I != NumOps; ++I) {
18050b57cec5SDimitry Andric       const unsigned Offset = (I - 1) * PartSize;
18060b57cec5SDimitry Andric 
18070b57cec5SDimitry Andric       Register SrcReg = MI.getOperand(I).getReg();
18080b57cec5SDimitry Andric       assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
18090b57cec5SDimitry Andric 
18100b57cec5SDimitry Andric       auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
18110b57cec5SDimitry Andric 
18128bcb0991SDimitry Andric       Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
18130b57cec5SDimitry Andric         MRI.createGenericVirtualRegister(WideTy);
18140b57cec5SDimitry Andric 
18150b57cec5SDimitry Andric       auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
18160b57cec5SDimitry Andric       auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
18170b57cec5SDimitry Andric       MIRBuilder.buildOr(NextResult, ResultReg, Shl);
18180b57cec5SDimitry Andric       ResultReg = NextResult;
18190b57cec5SDimitry Andric     }
18200b57cec5SDimitry Andric 
18210b57cec5SDimitry Andric     if (WideSize > DstSize)
18220b57cec5SDimitry Andric       MIRBuilder.buildTrunc(DstReg, ResultReg);
18238bcb0991SDimitry Andric     else if (DstTy.isPointer())
18248bcb0991SDimitry Andric       MIRBuilder.buildIntToPtr(DstReg, ResultReg);
18250b57cec5SDimitry Andric 
18260b57cec5SDimitry Andric     MI.eraseFromParent();
18270b57cec5SDimitry Andric     return Legalized;
18280b57cec5SDimitry Andric   }
18290b57cec5SDimitry Andric 
18300b57cec5SDimitry Andric   // Unmerge the original values to the GCD type, and recombine to the next
18310b57cec5SDimitry Andric   // multiple greater than the original type.
18320b57cec5SDimitry Andric   //
18330b57cec5SDimitry Andric   // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
18340b57cec5SDimitry Andric   // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
18350b57cec5SDimitry Andric   // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
18360b57cec5SDimitry Andric   // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
18370b57cec5SDimitry Andric   // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
18380b57cec5SDimitry Andric   // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
18390b57cec5SDimitry Andric   // %12:_(s12) = G_MERGE_VALUES %10, %11
18400b57cec5SDimitry Andric   //
18410b57cec5SDimitry Andric   // Padding with undef if necessary:
18420b57cec5SDimitry Andric   //
18430b57cec5SDimitry Andric   // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
18440b57cec5SDimitry Andric   // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
18450b57cec5SDimitry Andric   // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
18460b57cec5SDimitry Andric   // %7:_(s2) = G_IMPLICIT_DEF
18470b57cec5SDimitry Andric   // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
18480b57cec5SDimitry Andric   // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
18490b57cec5SDimitry Andric   // %10:_(s12) = G_MERGE_VALUES %8, %9
18500b57cec5SDimitry Andric 
1851bdd1243dSDimitry Andric   const int GCD = std::gcd(SrcSize, WideSize);
18520b57cec5SDimitry Andric   LLT GCDTy = LLT::scalar(GCD);
18530b57cec5SDimitry Andric 
18540b57cec5SDimitry Andric   SmallVector<Register, 8> Parts;
18550b57cec5SDimitry Andric   SmallVector<Register, 8> NewMergeRegs;
18560b57cec5SDimitry Andric   SmallVector<Register, 8> Unmerges;
18570b57cec5SDimitry Andric   LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
18580b57cec5SDimitry Andric 
18590b57cec5SDimitry Andric   // Decompose the original operands if they don't evenly divide.
18604824e7fdSDimitry Andric   for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
18614824e7fdSDimitry Andric     Register SrcReg = MO.getReg();
18620b57cec5SDimitry Andric     if (GCD == SrcSize) {
18630b57cec5SDimitry Andric       Unmerges.push_back(SrcReg);
18640b57cec5SDimitry Andric     } else {
18650b57cec5SDimitry Andric       auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
18660b57cec5SDimitry Andric       for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
18670b57cec5SDimitry Andric         Unmerges.push_back(Unmerge.getReg(J));
18680b57cec5SDimitry Andric     }
18690b57cec5SDimitry Andric   }
18700b57cec5SDimitry Andric 
18710b57cec5SDimitry Andric   // Pad with undef to the next size that is a multiple of the requested size.
18720b57cec5SDimitry Andric   if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
18730b57cec5SDimitry Andric     Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
18740b57cec5SDimitry Andric     for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
18750b57cec5SDimitry Andric       Unmerges.push_back(UndefReg);
18760b57cec5SDimitry Andric   }
18770b57cec5SDimitry Andric 
18780b57cec5SDimitry Andric   const int PartsPerGCD = WideSize / GCD;
18790b57cec5SDimitry Andric 
18800b57cec5SDimitry Andric   // Build merges of each piece.
18810b57cec5SDimitry Andric   ArrayRef<Register> Slicer(Unmerges);
18820b57cec5SDimitry Andric   for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1883bdd1243dSDimitry Andric     auto Merge =
1884bdd1243dSDimitry Andric         MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
18850b57cec5SDimitry Andric     NewMergeRegs.push_back(Merge.getReg(0));
18860b57cec5SDimitry Andric   }
18870b57cec5SDimitry Andric 
18880b57cec5SDimitry Andric   // A truncate may be necessary if the requested type doesn't evenly divide the
18890b57cec5SDimitry Andric   // original result type.
18900b57cec5SDimitry Andric   if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1891bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
18920b57cec5SDimitry Andric   } else {
1893bdd1243dSDimitry Andric     auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
18940b57cec5SDimitry Andric     MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
18950b57cec5SDimitry Andric   }
18960b57cec5SDimitry Andric 
18970b57cec5SDimitry Andric   MI.eraseFromParent();
18980b57cec5SDimitry Andric   return Legalized;
18990b57cec5SDimitry Andric }
19000b57cec5SDimitry Andric 
19010b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
19020b57cec5SDimitry Andric LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
19030b57cec5SDimitry Andric                                           LLT WideTy) {
19040b57cec5SDimitry Andric   if (TypeIdx != 0)
19050b57cec5SDimitry Andric     return UnableToLegalize;
19060b57cec5SDimitry Andric 
19075ffd83dbSDimitry Andric   int NumDst = MI.getNumOperands() - 1;
19080b57cec5SDimitry Andric   Register SrcReg = MI.getOperand(NumDst).getReg();
19090b57cec5SDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
19105ffd83dbSDimitry Andric   if (SrcTy.isVector())
19110b57cec5SDimitry Andric     return UnableToLegalize;
19120b57cec5SDimitry Andric 
19130b57cec5SDimitry Andric   Register Dst0Reg = MI.getOperand(0).getReg();
19140b57cec5SDimitry Andric   LLT DstTy = MRI.getType(Dst0Reg);
19150b57cec5SDimitry Andric   if (!DstTy.isScalar())
19160b57cec5SDimitry Andric     return UnableToLegalize;
19170b57cec5SDimitry Andric 
19185ffd83dbSDimitry Andric   if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
19195ffd83dbSDimitry Andric     if (SrcTy.isPointer()) {
19205ffd83dbSDimitry Andric       const DataLayout &DL = MIRBuilder.getDataLayout();
19215ffd83dbSDimitry Andric       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
19225ffd83dbSDimitry Andric         LLVM_DEBUG(
19235ffd83dbSDimitry Andric             dbgs() << "Not casting non-integral address space integer\n");
19245ffd83dbSDimitry Andric         return UnableToLegalize;
19250b57cec5SDimitry Andric       }
19260b57cec5SDimitry Andric 
19275ffd83dbSDimitry Andric       SrcTy = LLT::scalar(SrcTy.getSizeInBits());
19285ffd83dbSDimitry Andric       SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
19295ffd83dbSDimitry Andric     }
19300b57cec5SDimitry Andric 
19315ffd83dbSDimitry Andric     // Widen SrcTy to WideTy. This does not affect the result, but since the
19325ffd83dbSDimitry Andric     // user requested this size, it is probably better handled than SrcTy and
193304eeddc0SDimitry Andric     // should reduce the total number of legalization artifacts.
19345ffd83dbSDimitry Andric     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
19355ffd83dbSDimitry Andric       SrcTy = WideTy;
19365ffd83dbSDimitry Andric       SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
19375ffd83dbSDimitry Andric     }
19380b57cec5SDimitry Andric 
19395ffd83dbSDimitry Andric     // Theres no unmerge type to target. Directly extract the bits from the
19405ffd83dbSDimitry Andric     // source type
19415ffd83dbSDimitry Andric     unsigned DstSize = DstTy.getSizeInBits();
19420b57cec5SDimitry Andric 
19435ffd83dbSDimitry Andric     MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
19445ffd83dbSDimitry Andric     for (int I = 1; I != NumDst; ++I) {
19455ffd83dbSDimitry Andric       auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
19465ffd83dbSDimitry Andric       auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
19475ffd83dbSDimitry Andric       MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
19485ffd83dbSDimitry Andric     }
19495ffd83dbSDimitry Andric 
19505ffd83dbSDimitry Andric     MI.eraseFromParent();
19515ffd83dbSDimitry Andric     return Legalized;
19525ffd83dbSDimitry Andric   }
19535ffd83dbSDimitry Andric 
19545ffd83dbSDimitry Andric   // Extend the source to a wider type.
19555ffd83dbSDimitry Andric   LLT LCMTy = getLCMType(SrcTy, WideTy);
19565ffd83dbSDimitry Andric 
19575ffd83dbSDimitry Andric   Register WideSrc = SrcReg;
19585ffd83dbSDimitry Andric   if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
19595ffd83dbSDimitry Andric     // TODO: If this is an integral address space, cast to integer and anyext.
19605ffd83dbSDimitry Andric     if (SrcTy.isPointer()) {
19615ffd83dbSDimitry Andric       LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
19625ffd83dbSDimitry Andric       return UnableToLegalize;
19635ffd83dbSDimitry Andric     }
19645ffd83dbSDimitry Andric 
19655ffd83dbSDimitry Andric     WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
19665ffd83dbSDimitry Andric   }
19675ffd83dbSDimitry Andric 
19685ffd83dbSDimitry Andric   auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
19695ffd83dbSDimitry Andric 
1970e8d8bef9SDimitry Andric   // Create a sequence of unmerges and merges to the original results. Since we
1971e8d8bef9SDimitry Andric   // may have widened the source, we will need to pad the results with dead defs
1972e8d8bef9SDimitry Andric   // to cover the source register.
1973e8d8bef9SDimitry Andric   // e.g. widen s48 to s64:
1974e8d8bef9SDimitry Andric   // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
19755ffd83dbSDimitry Andric   //
19765ffd83dbSDimitry Andric   // =>
1977e8d8bef9SDimitry Andric   //  %4:_(s192) = G_ANYEXT %0:_(s96)
1978e8d8bef9SDimitry Andric   //  %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
1979e8d8bef9SDimitry Andric   //  ; unpack to GCD type, with extra dead defs
1980e8d8bef9SDimitry Andric   //  %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
1981e8d8bef9SDimitry Andric   //  %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
1982e8d8bef9SDimitry Andric   //  dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
1983e8d8bef9SDimitry Andric   //  %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10   ; Remerge to destination
1984e8d8bef9SDimitry Andric   //  %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
1985e8d8bef9SDimitry Andric   const LLT GCDTy = getGCDType(WideTy, DstTy);
19865ffd83dbSDimitry Andric   const int NumUnmerge = Unmerge->getNumOperands() - 1;
1987e8d8bef9SDimitry Andric   const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
1988e8d8bef9SDimitry Andric 
1989e8d8bef9SDimitry Andric   // Directly unmerge to the destination without going through a GCD type
1990e8d8bef9SDimitry Andric   // if possible
1991e8d8bef9SDimitry Andric   if (PartsPerRemerge == 1) {
19925ffd83dbSDimitry Andric     const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
19935ffd83dbSDimitry Andric 
19945ffd83dbSDimitry Andric     for (int I = 0; I != NumUnmerge; ++I) {
19955ffd83dbSDimitry Andric       auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
19965ffd83dbSDimitry Andric 
19975ffd83dbSDimitry Andric       for (int J = 0; J != PartsPerUnmerge; ++J) {
19985ffd83dbSDimitry Andric         int Idx = I * PartsPerUnmerge + J;
19995ffd83dbSDimitry Andric         if (Idx < NumDst)
20005ffd83dbSDimitry Andric           MIB.addDef(MI.getOperand(Idx).getReg());
20015ffd83dbSDimitry Andric         else {
20025ffd83dbSDimitry Andric           // Create dead def for excess components.
20035ffd83dbSDimitry Andric           MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
20045ffd83dbSDimitry Andric         }
20055ffd83dbSDimitry Andric       }
20065ffd83dbSDimitry Andric 
20075ffd83dbSDimitry Andric       MIB.addUse(Unmerge.getReg(I));
20085ffd83dbSDimitry Andric     }
2009e8d8bef9SDimitry Andric   } else {
2010e8d8bef9SDimitry Andric     SmallVector<Register, 16> Parts;
2011e8d8bef9SDimitry Andric     for (int J = 0; J != NumUnmerge; ++J)
2012e8d8bef9SDimitry Andric       extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2013e8d8bef9SDimitry Andric 
2014e8d8bef9SDimitry Andric     SmallVector<Register, 8> RemergeParts;
2015e8d8bef9SDimitry Andric     for (int I = 0; I != NumDst; ++I) {
2016e8d8bef9SDimitry Andric       for (int J = 0; J < PartsPerRemerge; ++J) {
2017e8d8bef9SDimitry Andric         const int Idx = I * PartsPerRemerge + J;
2018e8d8bef9SDimitry Andric         RemergeParts.emplace_back(Parts[Idx]);
2019e8d8bef9SDimitry Andric       }
2020e8d8bef9SDimitry Andric 
2021bdd1243dSDimitry Andric       MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2022e8d8bef9SDimitry Andric       RemergeParts.clear();
2023e8d8bef9SDimitry Andric     }
2024e8d8bef9SDimitry Andric   }
20255ffd83dbSDimitry Andric 
20265ffd83dbSDimitry Andric   MI.eraseFromParent();
20270b57cec5SDimitry Andric   return Legalized;
20280b57cec5SDimitry Andric }
20290b57cec5SDimitry Andric 
20300b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
20310b57cec5SDimitry Andric LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
20320b57cec5SDimitry Andric                                     LLT WideTy) {
203306c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
20340b57cec5SDimitry Andric   unsigned Offset = MI.getOperand(2).getImm();
20350b57cec5SDimitry Andric 
20360b57cec5SDimitry Andric   if (TypeIdx == 0) {
20370b57cec5SDimitry Andric     if (SrcTy.isVector() || DstTy.isVector())
20380b57cec5SDimitry Andric       return UnableToLegalize;
20390b57cec5SDimitry Andric 
20400b57cec5SDimitry Andric     SrcOp Src(SrcReg);
20410b57cec5SDimitry Andric     if (SrcTy.isPointer()) {
20420b57cec5SDimitry Andric       // Extracts from pointers can be handled only if they are really just
20430b57cec5SDimitry Andric       // simple integers.
20440b57cec5SDimitry Andric       const DataLayout &DL = MIRBuilder.getDataLayout();
20450b57cec5SDimitry Andric       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
20460b57cec5SDimitry Andric         return UnableToLegalize;
20470b57cec5SDimitry Andric 
20480b57cec5SDimitry Andric       LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
20490b57cec5SDimitry Andric       Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
20500b57cec5SDimitry Andric       SrcTy = SrcAsIntTy;
20510b57cec5SDimitry Andric     }
20520b57cec5SDimitry Andric 
20530b57cec5SDimitry Andric     if (DstTy.isPointer())
20540b57cec5SDimitry Andric       return UnableToLegalize;
20550b57cec5SDimitry Andric 
20560b57cec5SDimitry Andric     if (Offset == 0) {
20570b57cec5SDimitry Andric       // Avoid a shift in the degenerate case.
20580b57cec5SDimitry Andric       MIRBuilder.buildTrunc(DstReg,
20590b57cec5SDimitry Andric                             MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
20600b57cec5SDimitry Andric       MI.eraseFromParent();
20610b57cec5SDimitry Andric       return Legalized;
20620b57cec5SDimitry Andric     }
20630b57cec5SDimitry Andric 
20640b57cec5SDimitry Andric     // Do a shift in the source type.
20650b57cec5SDimitry Andric     LLT ShiftTy = SrcTy;
20660b57cec5SDimitry Andric     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
20670b57cec5SDimitry Andric       Src = MIRBuilder.buildAnyExt(WideTy, Src);
20680b57cec5SDimitry Andric       ShiftTy = WideTy;
2069e8d8bef9SDimitry Andric     }
20700b57cec5SDimitry Andric 
20710b57cec5SDimitry Andric     auto LShr = MIRBuilder.buildLShr(
20720b57cec5SDimitry Andric       ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
20730b57cec5SDimitry Andric     MIRBuilder.buildTrunc(DstReg, LShr);
20740b57cec5SDimitry Andric     MI.eraseFromParent();
20750b57cec5SDimitry Andric     return Legalized;
20760b57cec5SDimitry Andric   }
20770b57cec5SDimitry Andric 
20780b57cec5SDimitry Andric   if (SrcTy.isScalar()) {
20790b57cec5SDimitry Andric     Observer.changingInstr(MI);
20800b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
20810b57cec5SDimitry Andric     Observer.changedInstr(MI);
20820b57cec5SDimitry Andric     return Legalized;
20830b57cec5SDimitry Andric   }
20840b57cec5SDimitry Andric 
20850b57cec5SDimitry Andric   if (!SrcTy.isVector())
20860b57cec5SDimitry Andric     return UnableToLegalize;
20870b57cec5SDimitry Andric 
20880b57cec5SDimitry Andric   if (DstTy != SrcTy.getElementType())
20890b57cec5SDimitry Andric     return UnableToLegalize;
20900b57cec5SDimitry Andric 
20910b57cec5SDimitry Andric   if (Offset % SrcTy.getScalarSizeInBits() != 0)
20920b57cec5SDimitry Andric     return UnableToLegalize;
20930b57cec5SDimitry Andric 
20940b57cec5SDimitry Andric   Observer.changingInstr(MI);
20950b57cec5SDimitry Andric   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
20960b57cec5SDimitry Andric 
20970b57cec5SDimitry Andric   MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
20980b57cec5SDimitry Andric                           Offset);
20990b57cec5SDimitry Andric   widenScalarDst(MI, WideTy.getScalarType(), 0);
21000b57cec5SDimitry Andric   Observer.changedInstr(MI);
21010b57cec5SDimitry Andric   return Legalized;
21020b57cec5SDimitry Andric }
21030b57cec5SDimitry Andric 
21040b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
21050b57cec5SDimitry Andric LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
21060b57cec5SDimitry Andric                                    LLT WideTy) {
2107e8d8bef9SDimitry Andric   if (TypeIdx != 0 || WideTy.isVector())
21080b57cec5SDimitry Andric     return UnableToLegalize;
21090b57cec5SDimitry Andric   Observer.changingInstr(MI);
21100b57cec5SDimitry Andric   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
21110b57cec5SDimitry Andric   widenScalarDst(MI, WideTy);
21120b57cec5SDimitry Andric   Observer.changedInstr(MI);
21130b57cec5SDimitry Andric   return Legalized;
21140b57cec5SDimitry Andric }
21150b57cec5SDimitry Andric 
21160b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
2117fe6060f1SDimitry Andric LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2118e8d8bef9SDimitry Andric                                            LLT WideTy) {
2119fe6060f1SDimitry Andric   unsigned Opcode;
2120fe6060f1SDimitry Andric   unsigned ExtOpcode;
2121bdd1243dSDimitry Andric   std::optional<Register> CarryIn;
2122fe6060f1SDimitry Andric   switch (MI.getOpcode()) {
2123fe6060f1SDimitry Andric   default:
2124fe6060f1SDimitry Andric     llvm_unreachable("Unexpected opcode!");
2125fe6060f1SDimitry Andric   case TargetOpcode::G_SADDO:
2126fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_ADD;
2127fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_SEXT;
2128fe6060f1SDimitry Andric     break;
2129fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBO:
2130fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_SUB;
2131fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_SEXT;
2132fe6060f1SDimitry Andric     break;
2133fe6060f1SDimitry Andric   case TargetOpcode::G_UADDO:
2134fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_ADD;
2135fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_ZEXT;
2136fe6060f1SDimitry Andric     break;
2137fe6060f1SDimitry Andric   case TargetOpcode::G_USUBO:
2138fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_SUB;
2139fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_ZEXT;
2140fe6060f1SDimitry Andric     break;
2141fe6060f1SDimitry Andric   case TargetOpcode::G_SADDE:
2142fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_UADDE;
2143fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_SEXT;
2144fe6060f1SDimitry Andric     CarryIn = MI.getOperand(4).getReg();
2145fe6060f1SDimitry Andric     break;
2146fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBE:
2147fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_USUBE;
2148fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_SEXT;
2149fe6060f1SDimitry Andric     CarryIn = MI.getOperand(4).getReg();
2150fe6060f1SDimitry Andric     break;
2151fe6060f1SDimitry Andric   case TargetOpcode::G_UADDE:
2152fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_UADDE;
2153fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_ZEXT;
2154fe6060f1SDimitry Andric     CarryIn = MI.getOperand(4).getReg();
2155fe6060f1SDimitry Andric     break;
2156fe6060f1SDimitry Andric   case TargetOpcode::G_USUBE:
2157fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_USUBE;
2158fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_ZEXT;
2159fe6060f1SDimitry Andric     CarryIn = MI.getOperand(4).getReg();
2160fe6060f1SDimitry Andric     break;
2161fe6060f1SDimitry Andric   }
2162fe6060f1SDimitry Andric 
216381ad6265SDimitry Andric   if (TypeIdx == 1) {
216481ad6265SDimitry Andric     unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
216581ad6265SDimitry Andric 
216681ad6265SDimitry Andric     Observer.changingInstr(MI);
216781ad6265SDimitry Andric     if (CarryIn)
216881ad6265SDimitry Andric       widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2169bdd1243dSDimitry Andric     widenScalarDst(MI, WideTy, 1);
217081ad6265SDimitry Andric 
217181ad6265SDimitry Andric     Observer.changedInstr(MI);
217281ad6265SDimitry Andric     return Legalized;
217381ad6265SDimitry Andric   }
217481ad6265SDimitry Andric 
2175e8d8bef9SDimitry Andric   auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2176e8d8bef9SDimitry Andric   auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2177e8d8bef9SDimitry Andric   // Do the arithmetic in the larger type.
2178fe6060f1SDimitry Andric   Register NewOp;
2179fe6060f1SDimitry Andric   if (CarryIn) {
2180fe6060f1SDimitry Andric     LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2181fe6060f1SDimitry Andric     NewOp = MIRBuilder
2182fe6060f1SDimitry Andric                 .buildInstr(Opcode, {WideTy, CarryOutTy},
2183fe6060f1SDimitry Andric                             {LHSExt, RHSExt, *CarryIn})
2184fe6060f1SDimitry Andric                 .getReg(0);
2185fe6060f1SDimitry Andric   } else {
2186fe6060f1SDimitry Andric     NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2187fe6060f1SDimitry Andric   }
2188e8d8bef9SDimitry Andric   LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2189e8d8bef9SDimitry Andric   auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2190e8d8bef9SDimitry Andric   auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2191e8d8bef9SDimitry Andric   // There is no overflow if the ExtOp is the same as NewOp.
2192e8d8bef9SDimitry Andric   MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2193e8d8bef9SDimitry Andric   // Now trunc the NewOp to the original result.
2194e8d8bef9SDimitry Andric   MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2195e8d8bef9SDimitry Andric   MI.eraseFromParent();
2196e8d8bef9SDimitry Andric   return Legalized;
2197e8d8bef9SDimitry Andric }
2198e8d8bef9SDimitry Andric 
2199e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
2200e8d8bef9SDimitry Andric LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
22015ffd83dbSDimitry Andric                                          LLT WideTy) {
22025ffd83dbSDimitry Andric   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2203e8d8bef9SDimitry Andric                   MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2204e8d8bef9SDimitry Andric                   MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2205e8d8bef9SDimitry Andric   bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2206e8d8bef9SDimitry Andric                  MI.getOpcode() == TargetOpcode::G_USHLSAT;
22075ffd83dbSDimitry Andric   // We can convert this to:
22085ffd83dbSDimitry Andric   //   1. Any extend iN to iM
22095ffd83dbSDimitry Andric   //   2. SHL by M-N
2210e8d8bef9SDimitry Andric   //   3. [US][ADD|SUB|SHL]SAT
22115ffd83dbSDimitry Andric   //   4. L/ASHR by M-N
22125ffd83dbSDimitry Andric   //
22135ffd83dbSDimitry Andric   // It may be more efficient to lower this to a min and a max operation in
22145ffd83dbSDimitry Andric   // the higher precision arithmetic if the promoted operation isn't legal,
22155ffd83dbSDimitry Andric   // but this decision is up to the target's lowering request.
22165ffd83dbSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
22170b57cec5SDimitry Andric 
22185ffd83dbSDimitry Andric   unsigned NewBits = WideTy.getScalarSizeInBits();
22195ffd83dbSDimitry Andric   unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
22205ffd83dbSDimitry Andric 
2221e8d8bef9SDimitry Andric   // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2222e8d8bef9SDimitry Andric   // must not left shift the RHS to preserve the shift amount.
22235ffd83dbSDimitry Andric   auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2224e8d8bef9SDimitry Andric   auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2225e8d8bef9SDimitry Andric                      : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
22265ffd83dbSDimitry Andric   auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
22275ffd83dbSDimitry Andric   auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2228e8d8bef9SDimitry Andric   auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
22295ffd83dbSDimitry Andric 
22305ffd83dbSDimitry Andric   auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
22315ffd83dbSDimitry Andric                                         {ShiftL, ShiftR}, MI.getFlags());
22325ffd83dbSDimitry Andric 
22335ffd83dbSDimitry Andric   // Use a shift that will preserve the number of sign bits when the trunc is
22345ffd83dbSDimitry Andric   // folded away.
22355ffd83dbSDimitry Andric   auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
22365ffd83dbSDimitry Andric                          : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
22375ffd83dbSDimitry Andric 
22385ffd83dbSDimitry Andric   MIRBuilder.buildTrunc(DstReg, Result);
22395ffd83dbSDimitry Andric   MI.eraseFromParent();
22405ffd83dbSDimitry Andric   return Legalized;
22415ffd83dbSDimitry Andric }
22425ffd83dbSDimitry Andric 
22435ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
2244fe6060f1SDimitry Andric LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2245fe6060f1SDimitry Andric                                  LLT WideTy) {
224681ad6265SDimitry Andric   if (TypeIdx == 1) {
224781ad6265SDimitry Andric     Observer.changingInstr(MI);
224881ad6265SDimitry Andric     widenScalarDst(MI, WideTy, 1);
224981ad6265SDimitry Andric     Observer.changedInstr(MI);
225081ad6265SDimitry Andric     return Legalized;
225181ad6265SDimitry Andric   }
2252fe6060f1SDimitry Andric 
2253fe6060f1SDimitry Andric   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
225406c3fb27SDimitry Andric   auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2255fe6060f1SDimitry Andric   LLT SrcTy = MRI.getType(LHS);
2256fe6060f1SDimitry Andric   LLT OverflowTy = MRI.getType(OriginalOverflow);
2257fe6060f1SDimitry Andric   unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2258fe6060f1SDimitry Andric 
2259fe6060f1SDimitry Andric   // To determine if the result overflowed in the larger type, we extend the
2260fe6060f1SDimitry Andric   // input to the larger type, do the multiply (checking if it overflows),
2261fe6060f1SDimitry Andric   // then also check the high bits of the result to see if overflow happened
2262fe6060f1SDimitry Andric   // there.
2263fe6060f1SDimitry Andric   unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2264fe6060f1SDimitry Andric   auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2265fe6060f1SDimitry Andric   auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2266fe6060f1SDimitry Andric 
22675f757f3fSDimitry Andric   // Multiplication cannot overflow if the WideTy is >= 2 * original width,
22685f757f3fSDimitry Andric   // so we don't need to check the overflow result of larger type Mulo.
22695f757f3fSDimitry Andric   bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
22705f757f3fSDimitry Andric 
22715f757f3fSDimitry Andric   unsigned MulOpc =
22725f757f3fSDimitry Andric       WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
22735f757f3fSDimitry Andric 
22745f757f3fSDimitry Andric   MachineInstrBuilder Mulo;
22755f757f3fSDimitry Andric   if (WideMulCanOverflow)
22765f757f3fSDimitry Andric     Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2277fe6060f1SDimitry Andric                                  {LeftOperand, RightOperand});
22785f757f3fSDimitry Andric   else
22795f757f3fSDimitry Andric     Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
22805f757f3fSDimitry Andric 
2281fe6060f1SDimitry Andric   auto Mul = Mulo->getOperand(0);
2282fe6060f1SDimitry Andric   MIRBuilder.buildTrunc(Result, Mul);
2283fe6060f1SDimitry Andric 
2284fe6060f1SDimitry Andric   MachineInstrBuilder ExtResult;
2285fe6060f1SDimitry Andric   // Overflow occurred if it occurred in the larger type, or if the high part
2286fe6060f1SDimitry Andric   // of the result does not zero/sign-extend the low part.  Check this second
2287fe6060f1SDimitry Andric   // possibility first.
2288fe6060f1SDimitry Andric   if (IsSigned) {
2289fe6060f1SDimitry Andric     // For signed, overflow occurred when the high part does not sign-extend
2290fe6060f1SDimitry Andric     // the low part.
2291fe6060f1SDimitry Andric     ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2292fe6060f1SDimitry Andric   } else {
2293fe6060f1SDimitry Andric     // Unsigned overflow occurred when the high part does not zero-extend the
2294fe6060f1SDimitry Andric     // low part.
2295fe6060f1SDimitry Andric     ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2296fe6060f1SDimitry Andric   }
2297fe6060f1SDimitry Andric 
22985f757f3fSDimitry Andric   if (WideMulCanOverflow) {
2299fe6060f1SDimitry Andric     auto Overflow =
2300fe6060f1SDimitry Andric         MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2301fe6060f1SDimitry Andric     // Finally check if the multiplication in the larger type itself overflowed.
2302fe6060f1SDimitry Andric     MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2303fe6060f1SDimitry Andric   } else {
2304fe6060f1SDimitry Andric     MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2305fe6060f1SDimitry Andric   }
2306fe6060f1SDimitry Andric   MI.eraseFromParent();
2307fe6060f1SDimitry Andric   return Legalized;
2308fe6060f1SDimitry Andric }
2309fe6060f1SDimitry Andric 
2310fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
23115ffd83dbSDimitry Andric LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
23120b57cec5SDimitry Andric   switch (MI.getOpcode()) {
23130b57cec5SDimitry Andric   default:
23140b57cec5SDimitry Andric     return UnableToLegalize;
2315fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XCHG:
2316fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_ADD:
2317fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_SUB:
2318fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_AND:
2319fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_OR:
2320fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XOR:
2321fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_MIN:
2322fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_MAX:
2323fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_UMIN:
2324fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_UMAX:
2325fe6060f1SDimitry Andric     assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2326fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2327fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2328fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy, 0);
2329fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2330fe6060f1SDimitry Andric     return Legalized;
2331fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG:
2332fe6060f1SDimitry Andric     assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2333fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2334fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2335fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2336fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy, 0);
2337fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2338fe6060f1SDimitry Andric     return Legalized;
2339fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2340fe6060f1SDimitry Andric     if (TypeIdx == 0) {
2341fe6060f1SDimitry Andric       Observer.changingInstr(MI);
2342fe6060f1SDimitry Andric       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2343fe6060f1SDimitry Andric       widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2344fe6060f1SDimitry Andric       widenScalarDst(MI, WideTy, 0);
2345fe6060f1SDimitry Andric       Observer.changedInstr(MI);
2346fe6060f1SDimitry Andric       return Legalized;
2347fe6060f1SDimitry Andric     }
2348fe6060f1SDimitry Andric     assert(TypeIdx == 1 &&
2349fe6060f1SDimitry Andric            "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2350fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2351fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy, 1);
2352fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2353fe6060f1SDimitry Andric     return Legalized;
23540b57cec5SDimitry Andric   case TargetOpcode::G_EXTRACT:
23550b57cec5SDimitry Andric     return widenScalarExtract(MI, TypeIdx, WideTy);
23560b57cec5SDimitry Andric   case TargetOpcode::G_INSERT:
23570b57cec5SDimitry Andric     return widenScalarInsert(MI, TypeIdx, WideTy);
23580b57cec5SDimitry Andric   case TargetOpcode::G_MERGE_VALUES:
23590b57cec5SDimitry Andric     return widenScalarMergeValues(MI, TypeIdx, WideTy);
23600b57cec5SDimitry Andric   case TargetOpcode::G_UNMERGE_VALUES:
23610b57cec5SDimitry Andric     return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2362e8d8bef9SDimitry Andric   case TargetOpcode::G_SADDO:
2363e8d8bef9SDimitry Andric   case TargetOpcode::G_SSUBO:
23640b57cec5SDimitry Andric   case TargetOpcode::G_UADDO:
2365e8d8bef9SDimitry Andric   case TargetOpcode::G_USUBO:
2366fe6060f1SDimitry Andric   case TargetOpcode::G_SADDE:
2367fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBE:
2368fe6060f1SDimitry Andric   case TargetOpcode::G_UADDE:
2369fe6060f1SDimitry Andric   case TargetOpcode::G_USUBE:
2370fe6060f1SDimitry Andric     return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2371fe6060f1SDimitry Andric   case TargetOpcode::G_UMULO:
2372fe6060f1SDimitry Andric   case TargetOpcode::G_SMULO:
2373fe6060f1SDimitry Andric     return widenScalarMulo(MI, TypeIdx, WideTy);
23745ffd83dbSDimitry Andric   case TargetOpcode::G_SADDSAT:
23755ffd83dbSDimitry Andric   case TargetOpcode::G_SSUBSAT:
2376e8d8bef9SDimitry Andric   case TargetOpcode::G_SSHLSAT:
23775ffd83dbSDimitry Andric   case TargetOpcode::G_UADDSAT:
23785ffd83dbSDimitry Andric   case TargetOpcode::G_USUBSAT:
2379e8d8bef9SDimitry Andric   case TargetOpcode::G_USHLSAT:
2380e8d8bef9SDimitry Andric     return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
23810b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ:
23820b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
23830b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ:
23840b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
23850b57cec5SDimitry Andric   case TargetOpcode::G_CTPOP: {
23860b57cec5SDimitry Andric     if (TypeIdx == 0) {
23870b57cec5SDimitry Andric       Observer.changingInstr(MI);
23880b57cec5SDimitry Andric       widenScalarDst(MI, WideTy, 0);
23890b57cec5SDimitry Andric       Observer.changedInstr(MI);
23900b57cec5SDimitry Andric       return Legalized;
23910b57cec5SDimitry Andric     }
23920b57cec5SDimitry Andric 
23930b57cec5SDimitry Andric     Register SrcReg = MI.getOperand(1).getReg();
23940b57cec5SDimitry Andric 
2395349cc55cSDimitry Andric     // First extend the input.
2396349cc55cSDimitry Andric     unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
2397349cc55cSDimitry Andric                               MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2398349cc55cSDimitry Andric                           ? TargetOpcode::G_ANYEXT
2399349cc55cSDimitry Andric                           : TargetOpcode::G_ZEXT;
2400349cc55cSDimitry Andric     auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
24010b57cec5SDimitry Andric     LLT CurTy = MRI.getType(SrcReg);
2402349cc55cSDimitry Andric     unsigned NewOpc = MI.getOpcode();
2403349cc55cSDimitry Andric     if (NewOpc == TargetOpcode::G_CTTZ) {
24040b57cec5SDimitry Andric       // The count is the same in the larger type except if the original
24050b57cec5SDimitry Andric       // value was zero.  This can be handled by setting the bit just off
24060b57cec5SDimitry Andric       // the top of the original type.
24070b57cec5SDimitry Andric       auto TopBit =
24080b57cec5SDimitry Andric           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
24090b57cec5SDimitry Andric       MIBSrc = MIRBuilder.buildOr(
24100b57cec5SDimitry Andric         WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2411349cc55cSDimitry Andric       // Now we know the operand is non-zero, use the more relaxed opcode.
2412349cc55cSDimitry Andric       NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
24130b57cec5SDimitry Andric     }
24140b57cec5SDimitry Andric 
24150b57cec5SDimitry Andric     // Perform the operation at the larger size.
2416349cc55cSDimitry Andric     auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
24170b57cec5SDimitry Andric     // This is already the correct result for CTPOP and CTTZs
24180b57cec5SDimitry Andric     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
24190b57cec5SDimitry Andric         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
24200b57cec5SDimitry Andric       // The correct result is NewOp - (Difference in widety and current ty).
24210b57cec5SDimitry Andric       unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
24225ffd83dbSDimitry Andric       MIBNewOp = MIRBuilder.buildSub(
24235ffd83dbSDimitry Andric           WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
24240b57cec5SDimitry Andric     }
24250b57cec5SDimitry Andric 
24260b57cec5SDimitry Andric     MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
24270b57cec5SDimitry Andric     MI.eraseFromParent();
24280b57cec5SDimitry Andric     return Legalized;
24290b57cec5SDimitry Andric   }
24300b57cec5SDimitry Andric   case TargetOpcode::G_BSWAP: {
24310b57cec5SDimitry Andric     Observer.changingInstr(MI);
24320b57cec5SDimitry Andric     Register DstReg = MI.getOperand(0).getReg();
24330b57cec5SDimitry Andric 
24340b57cec5SDimitry Andric     Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
24350b57cec5SDimitry Andric     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
24360b57cec5SDimitry Andric     Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
24370b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
24380b57cec5SDimitry Andric 
24390b57cec5SDimitry Andric     MI.getOperand(0).setReg(DstExt);
24400b57cec5SDimitry Andric 
24410b57cec5SDimitry Andric     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
24420b57cec5SDimitry Andric 
24430b57cec5SDimitry Andric     LLT Ty = MRI.getType(DstReg);
24440b57cec5SDimitry Andric     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
24450b57cec5SDimitry Andric     MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
24465ffd83dbSDimitry Andric     MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
24470b57cec5SDimitry Andric 
24480b57cec5SDimitry Andric     MIRBuilder.buildTrunc(DstReg, ShrReg);
24490b57cec5SDimitry Andric     Observer.changedInstr(MI);
24500b57cec5SDimitry Andric     return Legalized;
24510b57cec5SDimitry Andric   }
24528bcb0991SDimitry Andric   case TargetOpcode::G_BITREVERSE: {
24538bcb0991SDimitry Andric     Observer.changingInstr(MI);
24548bcb0991SDimitry Andric 
24558bcb0991SDimitry Andric     Register DstReg = MI.getOperand(0).getReg();
24568bcb0991SDimitry Andric     LLT Ty = MRI.getType(DstReg);
24578bcb0991SDimitry Andric     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
24588bcb0991SDimitry Andric 
24598bcb0991SDimitry Andric     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
24608bcb0991SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
24618bcb0991SDimitry Andric     MI.getOperand(0).setReg(DstExt);
24628bcb0991SDimitry Andric     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
24638bcb0991SDimitry Andric 
24648bcb0991SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
24658bcb0991SDimitry Andric     auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
24668bcb0991SDimitry Andric     MIRBuilder.buildTrunc(DstReg, Shift);
24678bcb0991SDimitry Andric     Observer.changedInstr(MI);
24688bcb0991SDimitry Andric     return Legalized;
24698bcb0991SDimitry Andric   }
24705ffd83dbSDimitry Andric   case TargetOpcode::G_FREEZE:
24715ffd83dbSDimitry Andric     Observer.changingInstr(MI);
24725ffd83dbSDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
24735ffd83dbSDimitry Andric     widenScalarDst(MI, WideTy);
24745ffd83dbSDimitry Andric     Observer.changedInstr(MI);
24755ffd83dbSDimitry Andric     return Legalized;
24765ffd83dbSDimitry Andric 
2477fe6060f1SDimitry Andric   case TargetOpcode::G_ABS:
2478fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2479fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2480fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy);
2481fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2482fe6060f1SDimitry Andric     return Legalized;
2483fe6060f1SDimitry Andric 
24840b57cec5SDimitry Andric   case TargetOpcode::G_ADD:
24850b57cec5SDimitry Andric   case TargetOpcode::G_AND:
24860b57cec5SDimitry Andric   case TargetOpcode::G_MUL:
24870b57cec5SDimitry Andric   case TargetOpcode::G_OR:
24880b57cec5SDimitry Andric   case TargetOpcode::G_XOR:
24890b57cec5SDimitry Andric   case TargetOpcode::G_SUB:
24900b57cec5SDimitry Andric     // Perform operation at larger width (any extension is fines here, high bits
24910b57cec5SDimitry Andric     // don't affect the result) and then truncate the result back to the
24920b57cec5SDimitry Andric     // original type.
24930b57cec5SDimitry Andric     Observer.changingInstr(MI);
24940b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
24950b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
24960b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
24970b57cec5SDimitry Andric     Observer.changedInstr(MI);
24980b57cec5SDimitry Andric     return Legalized;
24990b57cec5SDimitry Andric 
2500fe6060f1SDimitry Andric   case TargetOpcode::G_SBFX:
2501fe6060f1SDimitry Andric   case TargetOpcode::G_UBFX:
2502fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2503fe6060f1SDimitry Andric 
2504fe6060f1SDimitry Andric     if (TypeIdx == 0) {
2505fe6060f1SDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2506fe6060f1SDimitry Andric       widenScalarDst(MI, WideTy);
2507fe6060f1SDimitry Andric     } else {
2508fe6060f1SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2509fe6060f1SDimitry Andric       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2510fe6060f1SDimitry Andric     }
2511fe6060f1SDimitry Andric 
2512fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2513fe6060f1SDimitry Andric     return Legalized;
2514fe6060f1SDimitry Andric 
25150b57cec5SDimitry Andric   case TargetOpcode::G_SHL:
25160b57cec5SDimitry Andric     Observer.changingInstr(MI);
25170b57cec5SDimitry Andric 
25180b57cec5SDimitry Andric     if (TypeIdx == 0) {
25190b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
25200b57cec5SDimitry Andric       widenScalarDst(MI, WideTy);
25210b57cec5SDimitry Andric     } else {
25220b57cec5SDimitry Andric       assert(TypeIdx == 1);
25230b57cec5SDimitry Andric       // The "number of bits to shift" operand must preserve its value as an
25240b57cec5SDimitry Andric       // unsigned integer:
25250b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
25260b57cec5SDimitry Andric     }
25270b57cec5SDimitry Andric 
25280b57cec5SDimitry Andric     Observer.changedInstr(MI);
25290b57cec5SDimitry Andric     return Legalized;
25300b57cec5SDimitry Andric 
25315f757f3fSDimitry Andric   case TargetOpcode::G_ROTR:
25325f757f3fSDimitry Andric   case TargetOpcode::G_ROTL:
25335f757f3fSDimitry Andric     if (TypeIdx != 1)
25345f757f3fSDimitry Andric       return UnableToLegalize;
25355f757f3fSDimitry Andric 
25365f757f3fSDimitry Andric     Observer.changingInstr(MI);
25375f757f3fSDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
25385f757f3fSDimitry Andric     Observer.changedInstr(MI);
25395f757f3fSDimitry Andric     return Legalized;
25405f757f3fSDimitry Andric 
25410b57cec5SDimitry Andric   case TargetOpcode::G_SDIV:
25420b57cec5SDimitry Andric   case TargetOpcode::G_SREM:
25430b57cec5SDimitry Andric   case TargetOpcode::G_SMIN:
25440b57cec5SDimitry Andric   case TargetOpcode::G_SMAX:
25450b57cec5SDimitry Andric     Observer.changingInstr(MI);
25460b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
25470b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
25480b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
25490b57cec5SDimitry Andric     Observer.changedInstr(MI);
25500b57cec5SDimitry Andric     return Legalized;
25510b57cec5SDimitry Andric 
2552fe6060f1SDimitry Andric   case TargetOpcode::G_SDIVREM:
2553fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2554fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2555fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2556fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy);
2557fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy, 1);
2558fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2559fe6060f1SDimitry Andric     return Legalized;
2560fe6060f1SDimitry Andric 
25610b57cec5SDimitry Andric   case TargetOpcode::G_ASHR:
25620b57cec5SDimitry Andric   case TargetOpcode::G_LSHR:
25630b57cec5SDimitry Andric     Observer.changingInstr(MI);
25640b57cec5SDimitry Andric 
25650b57cec5SDimitry Andric     if (TypeIdx == 0) {
25660b57cec5SDimitry Andric       unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
25670b57cec5SDimitry Andric         TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
25680b57cec5SDimitry Andric 
25690b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 1, CvtOp);
25700b57cec5SDimitry Andric       widenScalarDst(MI, WideTy);
25710b57cec5SDimitry Andric     } else {
25720b57cec5SDimitry Andric       assert(TypeIdx == 1);
25730b57cec5SDimitry Andric       // The "number of bits to shift" operand must preserve its value as an
25740b57cec5SDimitry Andric       // unsigned integer:
25750b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
25760b57cec5SDimitry Andric     }
25770b57cec5SDimitry Andric 
25780b57cec5SDimitry Andric     Observer.changedInstr(MI);
25790b57cec5SDimitry Andric     return Legalized;
25800b57cec5SDimitry Andric   case TargetOpcode::G_UDIV:
25810b57cec5SDimitry Andric   case TargetOpcode::G_UREM:
25820b57cec5SDimitry Andric   case TargetOpcode::G_UMIN:
25830b57cec5SDimitry Andric   case TargetOpcode::G_UMAX:
25840b57cec5SDimitry Andric     Observer.changingInstr(MI);
25850b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
25860b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
25870b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
25880b57cec5SDimitry Andric     Observer.changedInstr(MI);
25890b57cec5SDimitry Andric     return Legalized;
25900b57cec5SDimitry Andric 
2591fe6060f1SDimitry Andric   case TargetOpcode::G_UDIVREM:
2592fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2593fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2594fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2595fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy);
2596fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy, 1);
2597fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2598fe6060f1SDimitry Andric     return Legalized;
2599fe6060f1SDimitry Andric 
26000b57cec5SDimitry Andric   case TargetOpcode::G_SELECT:
26010b57cec5SDimitry Andric     Observer.changingInstr(MI);
26020b57cec5SDimitry Andric     if (TypeIdx == 0) {
26030b57cec5SDimitry Andric       // Perform operation at larger width (any extension is fine here, high
26040b57cec5SDimitry Andric       // bits don't affect the result) and then truncate the result back to the
26050b57cec5SDimitry Andric       // original type.
26060b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
26070b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
26080b57cec5SDimitry Andric       widenScalarDst(MI, WideTy);
26090b57cec5SDimitry Andric     } else {
26100b57cec5SDimitry Andric       bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
26110b57cec5SDimitry Andric       // Explicit extension is required here since high bits affect the result.
26120b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
26130b57cec5SDimitry Andric     }
26140b57cec5SDimitry Andric     Observer.changedInstr(MI);
26150b57cec5SDimitry Andric     return Legalized;
26160b57cec5SDimitry Andric 
26170b57cec5SDimitry Andric   case TargetOpcode::G_FPTOSI:
26180b57cec5SDimitry Andric   case TargetOpcode::G_FPTOUI:
26195f757f3fSDimitry Andric   case TargetOpcode::G_IS_FPCLASS:
26200b57cec5SDimitry Andric     Observer.changingInstr(MI);
26218bcb0991SDimitry Andric 
26228bcb0991SDimitry Andric     if (TypeIdx == 0)
26230b57cec5SDimitry Andric       widenScalarDst(MI, WideTy);
26248bcb0991SDimitry Andric     else
26258bcb0991SDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
26268bcb0991SDimitry Andric 
26270b57cec5SDimitry Andric     Observer.changedInstr(MI);
26280b57cec5SDimitry Andric     return Legalized;
26290b57cec5SDimitry Andric   case TargetOpcode::G_SITOFP:
26300b57cec5SDimitry Andric     Observer.changingInstr(MI);
2631e8d8bef9SDimitry Andric 
2632e8d8bef9SDimitry Andric     if (TypeIdx == 0)
2633e8d8bef9SDimitry Andric       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2634e8d8bef9SDimitry Andric     else
26350b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2636e8d8bef9SDimitry Andric 
26370b57cec5SDimitry Andric     Observer.changedInstr(MI);
26380b57cec5SDimitry Andric     return Legalized;
26390b57cec5SDimitry Andric   case TargetOpcode::G_UITOFP:
26400b57cec5SDimitry Andric     Observer.changingInstr(MI);
2641e8d8bef9SDimitry Andric 
2642e8d8bef9SDimitry Andric     if (TypeIdx == 0)
2643e8d8bef9SDimitry Andric       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2644e8d8bef9SDimitry Andric     else
26450b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2646e8d8bef9SDimitry Andric 
26470b57cec5SDimitry Andric     Observer.changedInstr(MI);
26480b57cec5SDimitry Andric     return Legalized;
26490b57cec5SDimitry Andric   case TargetOpcode::G_LOAD:
26500b57cec5SDimitry Andric   case TargetOpcode::G_SEXTLOAD:
26510b57cec5SDimitry Andric   case TargetOpcode::G_ZEXTLOAD:
26520b57cec5SDimitry Andric     Observer.changingInstr(MI);
26530b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
26540b57cec5SDimitry Andric     Observer.changedInstr(MI);
26550b57cec5SDimitry Andric     return Legalized;
26560b57cec5SDimitry Andric 
26570b57cec5SDimitry Andric   case TargetOpcode::G_STORE: {
26580b57cec5SDimitry Andric     if (TypeIdx != 0)
26590b57cec5SDimitry Andric       return UnableToLegalize;
26600b57cec5SDimitry Andric 
26610b57cec5SDimitry Andric     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2662e8d8bef9SDimitry Andric     if (!Ty.isScalar())
26630b57cec5SDimitry Andric       return UnableToLegalize;
26640b57cec5SDimitry Andric 
26650b57cec5SDimitry Andric     Observer.changingInstr(MI);
26660b57cec5SDimitry Andric 
26670b57cec5SDimitry Andric     unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
26680b57cec5SDimitry Andric       TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
26690b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 0, ExtType);
26700b57cec5SDimitry Andric 
26710b57cec5SDimitry Andric     Observer.changedInstr(MI);
26720b57cec5SDimitry Andric     return Legalized;
26730b57cec5SDimitry Andric   }
26740b57cec5SDimitry Andric   case TargetOpcode::G_CONSTANT: {
26750b57cec5SDimitry Andric     MachineOperand &SrcMO = MI.getOperand(1);
26760b57cec5SDimitry Andric     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2677480093f4SDimitry Andric     unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2678480093f4SDimitry Andric         MRI.getType(MI.getOperand(0).getReg()));
2679480093f4SDimitry Andric     assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2680480093f4SDimitry Andric             ExtOpc == TargetOpcode::G_ANYEXT) &&
2681480093f4SDimitry Andric            "Illegal Extend");
2682480093f4SDimitry Andric     const APInt &SrcVal = SrcMO.getCImm()->getValue();
2683480093f4SDimitry Andric     const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2684480093f4SDimitry Andric                            ? SrcVal.sext(WideTy.getSizeInBits())
2685480093f4SDimitry Andric                            : SrcVal.zext(WideTy.getSizeInBits());
26860b57cec5SDimitry Andric     Observer.changingInstr(MI);
26870b57cec5SDimitry Andric     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
26880b57cec5SDimitry Andric 
26890b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
26900b57cec5SDimitry Andric     Observer.changedInstr(MI);
26910b57cec5SDimitry Andric     return Legalized;
26920b57cec5SDimitry Andric   }
26930b57cec5SDimitry Andric   case TargetOpcode::G_FCONSTANT: {
2694fcaf7f86SDimitry Andric     // To avoid changing the bits of the constant due to extension to a larger
2695fcaf7f86SDimitry Andric     // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
26960b57cec5SDimitry Andric     MachineOperand &SrcMO = MI.getOperand(1);
2697fcaf7f86SDimitry Andric     APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
2698fcaf7f86SDimitry Andric     MIRBuilder.setInstrAndDebugLoc(MI);
2699fcaf7f86SDimitry Andric     auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
2700fcaf7f86SDimitry Andric     widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
2701fcaf7f86SDimitry Andric     MI.eraseFromParent();
27020b57cec5SDimitry Andric     return Legalized;
27030b57cec5SDimitry Andric   }
27040b57cec5SDimitry Andric   case TargetOpcode::G_IMPLICIT_DEF: {
27050b57cec5SDimitry Andric     Observer.changingInstr(MI);
27060b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
27070b57cec5SDimitry Andric     Observer.changedInstr(MI);
27080b57cec5SDimitry Andric     return Legalized;
27090b57cec5SDimitry Andric   }
27100b57cec5SDimitry Andric   case TargetOpcode::G_BRCOND:
27110b57cec5SDimitry Andric     Observer.changingInstr(MI);
27120b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
27130b57cec5SDimitry Andric     Observer.changedInstr(MI);
27140b57cec5SDimitry Andric     return Legalized;
27150b57cec5SDimitry Andric 
27160b57cec5SDimitry Andric   case TargetOpcode::G_FCMP:
27170b57cec5SDimitry Andric     Observer.changingInstr(MI);
27180b57cec5SDimitry Andric     if (TypeIdx == 0)
27190b57cec5SDimitry Andric       widenScalarDst(MI, WideTy);
27200b57cec5SDimitry Andric     else {
27210b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
27220b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
27230b57cec5SDimitry Andric     }
27240b57cec5SDimitry Andric     Observer.changedInstr(MI);
27250b57cec5SDimitry Andric     return Legalized;
27260b57cec5SDimitry Andric 
27270b57cec5SDimitry Andric   case TargetOpcode::G_ICMP:
27280b57cec5SDimitry Andric     Observer.changingInstr(MI);
27290b57cec5SDimitry Andric     if (TypeIdx == 0)
27300b57cec5SDimitry Andric       widenScalarDst(MI, WideTy);
27310b57cec5SDimitry Andric     else {
27320b57cec5SDimitry Andric       unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
27330b57cec5SDimitry Andric                                MI.getOperand(1).getPredicate()))
27340b57cec5SDimitry Andric                                ? TargetOpcode::G_SEXT
27350b57cec5SDimitry Andric                                : TargetOpcode::G_ZEXT;
27360b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
27370b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
27380b57cec5SDimitry Andric     }
27390b57cec5SDimitry Andric     Observer.changedInstr(MI);
27400b57cec5SDimitry Andric     return Legalized;
27410b57cec5SDimitry Andric 
2742480093f4SDimitry Andric   case TargetOpcode::G_PTR_ADD:
2743480093f4SDimitry Andric     assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
27440b57cec5SDimitry Andric     Observer.changingInstr(MI);
27450b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
27460b57cec5SDimitry Andric     Observer.changedInstr(MI);
27470b57cec5SDimitry Andric     return Legalized;
27480b57cec5SDimitry Andric 
27490b57cec5SDimitry Andric   case TargetOpcode::G_PHI: {
27500b57cec5SDimitry Andric     assert(TypeIdx == 0 && "Expecting only Idx 0");
27510b57cec5SDimitry Andric 
27520b57cec5SDimitry Andric     Observer.changingInstr(MI);
27530b57cec5SDimitry Andric     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
27540b57cec5SDimitry Andric       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2755bdd1243dSDimitry Andric       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
27560b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
27570b57cec5SDimitry Andric     }
27580b57cec5SDimitry Andric 
27590b57cec5SDimitry Andric     MachineBasicBlock &MBB = *MI.getParent();
27600b57cec5SDimitry Andric     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
27610b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
27620b57cec5SDimitry Andric     Observer.changedInstr(MI);
27630b57cec5SDimitry Andric     return Legalized;
27640b57cec5SDimitry Andric   }
27650b57cec5SDimitry Andric   case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
27660b57cec5SDimitry Andric     if (TypeIdx == 0) {
27670b57cec5SDimitry Andric       Register VecReg = MI.getOperand(1).getReg();
27680b57cec5SDimitry Andric       LLT VecTy = MRI.getType(VecReg);
27690b57cec5SDimitry Andric       Observer.changingInstr(MI);
27700b57cec5SDimitry Andric 
2771fe6060f1SDimitry Andric       widenScalarSrc(
2772fe6060f1SDimitry Andric           MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
2773349cc55cSDimitry Andric           TargetOpcode::G_ANYEXT);
27740b57cec5SDimitry Andric 
27750b57cec5SDimitry Andric       widenScalarDst(MI, WideTy, 0);
27760b57cec5SDimitry Andric       Observer.changedInstr(MI);
27770b57cec5SDimitry Andric       return Legalized;
27780b57cec5SDimitry Andric     }
27790b57cec5SDimitry Andric 
27800b57cec5SDimitry Andric     if (TypeIdx != 2)
27810b57cec5SDimitry Andric       return UnableToLegalize;
27820b57cec5SDimitry Andric     Observer.changingInstr(MI);
2783480093f4SDimitry Andric     // TODO: Probably should be zext
27840b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
27850b57cec5SDimitry Andric     Observer.changedInstr(MI);
27860b57cec5SDimitry Andric     return Legalized;
27870b57cec5SDimitry Andric   }
2788480093f4SDimitry Andric   case TargetOpcode::G_INSERT_VECTOR_ELT: {
27895f757f3fSDimitry Andric     if (TypeIdx == 0) {
27905f757f3fSDimitry Andric       Observer.changingInstr(MI);
27915f757f3fSDimitry Andric       const LLT WideEltTy = WideTy.getElementType();
27925f757f3fSDimitry Andric 
27935f757f3fSDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
27945f757f3fSDimitry Andric       widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
27955f757f3fSDimitry Andric       widenScalarDst(MI, WideTy, 0);
27965f757f3fSDimitry Andric       Observer.changedInstr(MI);
27975f757f3fSDimitry Andric       return Legalized;
27985f757f3fSDimitry Andric     }
27995f757f3fSDimitry Andric 
2800480093f4SDimitry Andric     if (TypeIdx == 1) {
2801480093f4SDimitry Andric       Observer.changingInstr(MI);
2802480093f4SDimitry Andric 
2803480093f4SDimitry Andric       Register VecReg = MI.getOperand(1).getReg();
2804480093f4SDimitry Andric       LLT VecTy = MRI.getType(VecReg);
2805fe6060f1SDimitry Andric       LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
2806480093f4SDimitry Andric 
2807480093f4SDimitry Andric       widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2808480093f4SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2809480093f4SDimitry Andric       widenScalarDst(MI, WideVecTy, 0);
2810480093f4SDimitry Andric       Observer.changedInstr(MI);
2811480093f4SDimitry Andric       return Legalized;
2812480093f4SDimitry Andric     }
2813480093f4SDimitry Andric 
2814480093f4SDimitry Andric     if (TypeIdx == 2) {
2815480093f4SDimitry Andric       Observer.changingInstr(MI);
2816480093f4SDimitry Andric       // TODO: Probably should be zext
2817480093f4SDimitry Andric       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2818480093f4SDimitry Andric       Observer.changedInstr(MI);
28195ffd83dbSDimitry Andric       return Legalized;
2820480093f4SDimitry Andric     }
2821480093f4SDimitry Andric 
28225ffd83dbSDimitry Andric     return UnableToLegalize;
2823480093f4SDimitry Andric   }
28240b57cec5SDimitry Andric   case TargetOpcode::G_FADD:
28250b57cec5SDimitry Andric   case TargetOpcode::G_FMUL:
28260b57cec5SDimitry Andric   case TargetOpcode::G_FSUB:
28270b57cec5SDimitry Andric   case TargetOpcode::G_FMA:
28288bcb0991SDimitry Andric   case TargetOpcode::G_FMAD:
28290b57cec5SDimitry Andric   case TargetOpcode::G_FNEG:
28300b57cec5SDimitry Andric   case TargetOpcode::G_FABS:
28310b57cec5SDimitry Andric   case TargetOpcode::G_FCANONICALIZE:
28320b57cec5SDimitry Andric   case TargetOpcode::G_FMINNUM:
28330b57cec5SDimitry Andric   case TargetOpcode::G_FMAXNUM:
28340b57cec5SDimitry Andric   case TargetOpcode::G_FMINNUM_IEEE:
28350b57cec5SDimitry Andric   case TargetOpcode::G_FMAXNUM_IEEE:
28360b57cec5SDimitry Andric   case TargetOpcode::G_FMINIMUM:
28370b57cec5SDimitry Andric   case TargetOpcode::G_FMAXIMUM:
28380b57cec5SDimitry Andric   case TargetOpcode::G_FDIV:
28390b57cec5SDimitry Andric   case TargetOpcode::G_FREM:
28400b57cec5SDimitry Andric   case TargetOpcode::G_FCEIL:
28410b57cec5SDimitry Andric   case TargetOpcode::G_FFLOOR:
28420b57cec5SDimitry Andric   case TargetOpcode::G_FCOS:
28430b57cec5SDimitry Andric   case TargetOpcode::G_FSIN:
28440b57cec5SDimitry Andric   case TargetOpcode::G_FLOG10:
28450b57cec5SDimitry Andric   case TargetOpcode::G_FLOG:
28460b57cec5SDimitry Andric   case TargetOpcode::G_FLOG2:
28470b57cec5SDimitry Andric   case TargetOpcode::G_FRINT:
28480b57cec5SDimitry Andric   case TargetOpcode::G_FNEARBYINT:
28490b57cec5SDimitry Andric   case TargetOpcode::G_FSQRT:
28500b57cec5SDimitry Andric   case TargetOpcode::G_FEXP:
28510b57cec5SDimitry Andric   case TargetOpcode::G_FEXP2:
28525f757f3fSDimitry Andric   case TargetOpcode::G_FEXP10:
28530b57cec5SDimitry Andric   case TargetOpcode::G_FPOW:
28540b57cec5SDimitry Andric   case TargetOpcode::G_INTRINSIC_TRUNC:
28550b57cec5SDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUND:
2856e8d8bef9SDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
28570b57cec5SDimitry Andric     assert(TypeIdx == 0);
28580b57cec5SDimitry Andric     Observer.changingInstr(MI);
28590b57cec5SDimitry Andric 
28600b57cec5SDimitry Andric     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
28610b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
28620b57cec5SDimitry Andric 
28630b57cec5SDimitry Andric     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
28640b57cec5SDimitry Andric     Observer.changedInstr(MI);
28650b57cec5SDimitry Andric     return Legalized;
286606c3fb27SDimitry Andric   case TargetOpcode::G_FPOWI:
286706c3fb27SDimitry Andric   case TargetOpcode::G_FLDEXP:
286806c3fb27SDimitry Andric   case TargetOpcode::G_STRICT_FLDEXP: {
286906c3fb27SDimitry Andric     if (TypeIdx == 0) {
287006c3fb27SDimitry Andric       if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
2871e8d8bef9SDimitry Andric         return UnableToLegalize;
287206c3fb27SDimitry Andric 
2873e8d8bef9SDimitry Andric       Observer.changingInstr(MI);
2874e8d8bef9SDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2875e8d8bef9SDimitry Andric       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2876e8d8bef9SDimitry Andric       Observer.changedInstr(MI);
2877e8d8bef9SDimitry Andric       return Legalized;
2878e8d8bef9SDimitry Andric     }
287906c3fb27SDimitry Andric 
288006c3fb27SDimitry Andric     if (TypeIdx == 1) {
288106c3fb27SDimitry Andric       // For some reason SelectionDAG tries to promote to a libcall without
288206c3fb27SDimitry Andric       // actually changing the integer type for promotion.
288306c3fb27SDimitry Andric       Observer.changingInstr(MI);
288406c3fb27SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
288506c3fb27SDimitry Andric       Observer.changedInstr(MI);
288606c3fb27SDimitry Andric       return Legalized;
288706c3fb27SDimitry Andric     }
288806c3fb27SDimitry Andric 
288906c3fb27SDimitry Andric     return UnableToLegalize;
289006c3fb27SDimitry Andric   }
289106c3fb27SDimitry Andric   case TargetOpcode::G_FFREXP: {
289206c3fb27SDimitry Andric     Observer.changingInstr(MI);
289306c3fb27SDimitry Andric 
289406c3fb27SDimitry Andric     if (TypeIdx == 0) {
289506c3fb27SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
289606c3fb27SDimitry Andric       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
289706c3fb27SDimitry Andric     } else {
289806c3fb27SDimitry Andric       widenScalarDst(MI, WideTy, 1);
289906c3fb27SDimitry Andric     }
290006c3fb27SDimitry Andric 
290106c3fb27SDimitry Andric     Observer.changedInstr(MI);
290206c3fb27SDimitry Andric     return Legalized;
290306c3fb27SDimitry Andric   }
29040b57cec5SDimitry Andric   case TargetOpcode::G_INTTOPTR:
29050b57cec5SDimitry Andric     if (TypeIdx != 1)
29060b57cec5SDimitry Andric       return UnableToLegalize;
29070b57cec5SDimitry Andric 
29080b57cec5SDimitry Andric     Observer.changingInstr(MI);
29090b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
29100b57cec5SDimitry Andric     Observer.changedInstr(MI);
29110b57cec5SDimitry Andric     return Legalized;
29120b57cec5SDimitry Andric   case TargetOpcode::G_PTRTOINT:
29130b57cec5SDimitry Andric     if (TypeIdx != 0)
29140b57cec5SDimitry Andric       return UnableToLegalize;
29150b57cec5SDimitry Andric 
29160b57cec5SDimitry Andric     Observer.changingInstr(MI);
29170b57cec5SDimitry Andric     widenScalarDst(MI, WideTy, 0);
29180b57cec5SDimitry Andric     Observer.changedInstr(MI);
29190b57cec5SDimitry Andric     return Legalized;
29200b57cec5SDimitry Andric   case TargetOpcode::G_BUILD_VECTOR: {
29210b57cec5SDimitry Andric     Observer.changingInstr(MI);
29220b57cec5SDimitry Andric 
29230b57cec5SDimitry Andric     const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
29240b57cec5SDimitry Andric     for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
29250b57cec5SDimitry Andric       widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
29260b57cec5SDimitry Andric 
29270b57cec5SDimitry Andric     // Avoid changing the result vector type if the source element type was
29280b57cec5SDimitry Andric     // requested.
29290b57cec5SDimitry Andric     if (TypeIdx == 1) {
2930e8d8bef9SDimitry Andric       MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
29310b57cec5SDimitry Andric     } else {
29320b57cec5SDimitry Andric       widenScalarDst(MI, WideTy, 0);
29330b57cec5SDimitry Andric     }
29340b57cec5SDimitry Andric 
29350b57cec5SDimitry Andric     Observer.changedInstr(MI);
29360b57cec5SDimitry Andric     return Legalized;
29370b57cec5SDimitry Andric   }
29388bcb0991SDimitry Andric   case TargetOpcode::G_SEXT_INREG:
29398bcb0991SDimitry Andric     if (TypeIdx != 0)
29408bcb0991SDimitry Andric       return UnableToLegalize;
29418bcb0991SDimitry Andric 
29428bcb0991SDimitry Andric     Observer.changingInstr(MI);
29438bcb0991SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
29448bcb0991SDimitry Andric     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
29458bcb0991SDimitry Andric     Observer.changedInstr(MI);
29468bcb0991SDimitry Andric     return Legalized;
29475ffd83dbSDimitry Andric   case TargetOpcode::G_PTRMASK: {
29485ffd83dbSDimitry Andric     if (TypeIdx != 1)
29495ffd83dbSDimitry Andric       return UnableToLegalize;
29505ffd83dbSDimitry Andric     Observer.changingInstr(MI);
29515ffd83dbSDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
29525ffd83dbSDimitry Andric     Observer.changedInstr(MI);
29535ffd83dbSDimitry Andric     return Legalized;
29545ffd83dbSDimitry Andric   }
29555f757f3fSDimitry Andric   case TargetOpcode::G_VECREDUCE_FADD:
29561db9f3b2SDimitry Andric   case TargetOpcode::G_VECREDUCE_FMUL:
29575f757f3fSDimitry Andric   case TargetOpcode::G_VECREDUCE_FMIN:
29585f757f3fSDimitry Andric   case TargetOpcode::G_VECREDUCE_FMAX:
29595f757f3fSDimitry Andric   case TargetOpcode::G_VECREDUCE_FMINIMUM:
29605f757f3fSDimitry Andric   case TargetOpcode::G_VECREDUCE_FMAXIMUM:
29615f757f3fSDimitry Andric     if (TypeIdx != 0)
29625f757f3fSDimitry Andric       return UnableToLegalize;
29635f757f3fSDimitry Andric     Observer.changingInstr(MI);
29645f757f3fSDimitry Andric     Register VecReg = MI.getOperand(1).getReg();
29655f757f3fSDimitry Andric     LLT VecTy = MRI.getType(VecReg);
29665f757f3fSDimitry Andric     LLT WideVecTy = VecTy.isVector()
29675f757f3fSDimitry Andric                         ? LLT::vector(VecTy.getElementCount(), WideTy)
29685f757f3fSDimitry Andric                         : WideTy;
29695f757f3fSDimitry Andric     widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
29705f757f3fSDimitry Andric     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
29715f757f3fSDimitry Andric     Observer.changedInstr(MI);
29725f757f3fSDimitry Andric     return Legalized;
29735ffd83dbSDimitry Andric   }
29745ffd83dbSDimitry Andric }
29755ffd83dbSDimitry Andric 
29765ffd83dbSDimitry Andric static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
29775ffd83dbSDimitry Andric                              MachineIRBuilder &B, Register Src, LLT Ty) {
29785ffd83dbSDimitry Andric   auto Unmerge = B.buildUnmerge(Ty, Src);
29795ffd83dbSDimitry Andric   for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
29805ffd83dbSDimitry Andric     Pieces.push_back(Unmerge.getReg(I));
29815ffd83dbSDimitry Andric }
29825ffd83dbSDimitry Andric 
29835ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
298406c3fb27SDimitry Andric LegalizerHelper::lowerFConstant(MachineInstr &MI) {
29855ffd83dbSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
29865ffd83dbSDimitry Andric 
298706c3fb27SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
298806c3fb27SDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
298906c3fb27SDimitry Andric 
299006c3fb27SDimitry Andric   unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
299106c3fb27SDimitry Andric   LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
299206c3fb27SDimitry Andric   Align Alignment = Align(DL.getABITypeAlign(
299306c3fb27SDimitry Andric       getFloatTypeForLLT(MF.getFunction().getContext(), MRI.getType(Dst))));
299406c3fb27SDimitry Andric 
299506c3fb27SDimitry Andric   auto Addr = MIRBuilder.buildConstantPool(
299606c3fb27SDimitry Andric       AddrPtrTy, MF.getConstantPool()->getConstantPoolIndex(
299706c3fb27SDimitry Andric                      MI.getOperand(1).getFPImm(), Alignment));
299806c3fb27SDimitry Andric 
299906c3fb27SDimitry Andric   MachineMemOperand *MMO = MF.getMachineMemOperand(
300006c3fb27SDimitry Andric       MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
300106c3fb27SDimitry Andric       MRI.getType(Dst), Alignment);
300206c3fb27SDimitry Andric 
300306c3fb27SDimitry Andric   MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Addr, *MMO);
300406c3fb27SDimitry Andric   MI.eraseFromParent();
300506c3fb27SDimitry Andric 
300606c3fb27SDimitry Andric   return Legalized;
300706c3fb27SDimitry Andric }
300806c3fb27SDimitry Andric 
300906c3fb27SDimitry Andric LegalizerHelper::LegalizeResult
301006c3fb27SDimitry Andric LegalizerHelper::lowerBitcast(MachineInstr &MI) {
301106c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
30125ffd83dbSDimitry Andric   if (SrcTy.isVector()) {
30135ffd83dbSDimitry Andric     LLT SrcEltTy = SrcTy.getElementType();
30145ffd83dbSDimitry Andric     SmallVector<Register, 8> SrcRegs;
30155ffd83dbSDimitry Andric 
30165ffd83dbSDimitry Andric     if (DstTy.isVector()) {
30175ffd83dbSDimitry Andric       int NumDstElt = DstTy.getNumElements();
30185ffd83dbSDimitry Andric       int NumSrcElt = SrcTy.getNumElements();
30195ffd83dbSDimitry Andric 
30205ffd83dbSDimitry Andric       LLT DstEltTy = DstTy.getElementType();
30215ffd83dbSDimitry Andric       LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
30225ffd83dbSDimitry Andric       LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
30235ffd83dbSDimitry Andric 
30245ffd83dbSDimitry Andric       // If there's an element size mismatch, insert intermediate casts to match
30255ffd83dbSDimitry Andric       // the result element type.
30265ffd83dbSDimitry Andric       if (NumSrcElt < NumDstElt) { // Source element type is larger.
30275ffd83dbSDimitry Andric         // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
30285ffd83dbSDimitry Andric         //
30295ffd83dbSDimitry Andric         // =>
30305ffd83dbSDimitry Andric         //
30315ffd83dbSDimitry Andric         // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
30325ffd83dbSDimitry Andric         // %3:_(<2 x s8>) = G_BITCAST %2
30335ffd83dbSDimitry Andric         // %4:_(<2 x s8>) = G_BITCAST %3
30345ffd83dbSDimitry Andric         // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3035fe6060f1SDimitry Andric         DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
30365ffd83dbSDimitry Andric         SrcPartTy = SrcEltTy;
30375ffd83dbSDimitry Andric       } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
30385ffd83dbSDimitry Andric         //
30395ffd83dbSDimitry Andric         // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
30405ffd83dbSDimitry Andric         //
30415ffd83dbSDimitry Andric         // =>
30425ffd83dbSDimitry Andric         //
30435ffd83dbSDimitry Andric         // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
30445ffd83dbSDimitry Andric         // %3:_(s16) = G_BITCAST %2
30455ffd83dbSDimitry Andric         // %4:_(s16) = G_BITCAST %3
30465ffd83dbSDimitry Andric         // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3047fe6060f1SDimitry Andric         SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
30485ffd83dbSDimitry Andric         DstCastTy = DstEltTy;
30495ffd83dbSDimitry Andric       }
30505ffd83dbSDimitry Andric 
30515ffd83dbSDimitry Andric       getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
30525ffd83dbSDimitry Andric       for (Register &SrcReg : SrcRegs)
30535ffd83dbSDimitry Andric         SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
30545ffd83dbSDimitry Andric     } else
30555ffd83dbSDimitry Andric       getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
30565ffd83dbSDimitry Andric 
3057bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
30585ffd83dbSDimitry Andric     MI.eraseFromParent();
30595ffd83dbSDimitry Andric     return Legalized;
30605ffd83dbSDimitry Andric   }
30615ffd83dbSDimitry Andric 
30625ffd83dbSDimitry Andric   if (DstTy.isVector()) {
30635ffd83dbSDimitry Andric     SmallVector<Register, 8> SrcRegs;
30645ffd83dbSDimitry Andric     getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3065bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
30665ffd83dbSDimitry Andric     MI.eraseFromParent();
30675ffd83dbSDimitry Andric     return Legalized;
30685ffd83dbSDimitry Andric   }
30695ffd83dbSDimitry Andric 
30705ffd83dbSDimitry Andric   return UnableToLegalize;
30715ffd83dbSDimitry Andric }
30725ffd83dbSDimitry Andric 
3073e8d8bef9SDimitry Andric /// Figure out the bit offset into a register when coercing a vector index for
3074e8d8bef9SDimitry Andric /// the wide element type. This is only for the case when promoting vector to
3075e8d8bef9SDimitry Andric /// one with larger elements.
3076e8d8bef9SDimitry Andric //
3077e8d8bef9SDimitry Andric ///
3078e8d8bef9SDimitry Andric /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3079e8d8bef9SDimitry Andric /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3080e8d8bef9SDimitry Andric static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
3081e8d8bef9SDimitry Andric                                                    Register Idx,
3082e8d8bef9SDimitry Andric                                                    unsigned NewEltSize,
3083e8d8bef9SDimitry Andric                                                    unsigned OldEltSize) {
3084e8d8bef9SDimitry Andric   const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3085e8d8bef9SDimitry Andric   LLT IdxTy = B.getMRI()->getType(Idx);
3086e8d8bef9SDimitry Andric 
3087e8d8bef9SDimitry Andric   // Now figure out the amount we need to shift to get the target bits.
3088e8d8bef9SDimitry Andric   auto OffsetMask = B.buildConstant(
3089349cc55cSDimitry Andric       IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3090e8d8bef9SDimitry Andric   auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3091e8d8bef9SDimitry Andric   return B.buildShl(IdxTy, OffsetIdx,
3092e8d8bef9SDimitry Andric                     B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3093e8d8bef9SDimitry Andric }
3094e8d8bef9SDimitry Andric 
3095e8d8bef9SDimitry Andric /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3096e8d8bef9SDimitry Andric /// is casting to a vector with a smaller element size, perform multiple element
3097e8d8bef9SDimitry Andric /// extracts and merge the results. If this is coercing to a vector with larger
3098e8d8bef9SDimitry Andric /// elements, index the bitcasted vector and extract the target element with bit
3099e8d8bef9SDimitry Andric /// operations. This is intended to force the indexing in the native register
3100e8d8bef9SDimitry Andric /// size for architectures that can dynamically index the register file.
31015ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
3102e8d8bef9SDimitry Andric LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
3103e8d8bef9SDimitry Andric                                          LLT CastTy) {
3104e8d8bef9SDimitry Andric   if (TypeIdx != 1)
3105e8d8bef9SDimitry Andric     return UnableToLegalize;
3106e8d8bef9SDimitry Andric 
310706c3fb27SDimitry Andric   auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3108e8d8bef9SDimitry Andric 
3109e8d8bef9SDimitry Andric   LLT SrcEltTy = SrcVecTy.getElementType();
3110e8d8bef9SDimitry Andric   unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3111e8d8bef9SDimitry Andric   unsigned OldNumElts = SrcVecTy.getNumElements();
3112e8d8bef9SDimitry Andric 
3113e8d8bef9SDimitry Andric   LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3114e8d8bef9SDimitry Andric   Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3115e8d8bef9SDimitry Andric 
3116e8d8bef9SDimitry Andric   const unsigned NewEltSize = NewEltTy.getSizeInBits();
3117e8d8bef9SDimitry Andric   const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3118e8d8bef9SDimitry Andric   if (NewNumElts > OldNumElts) {
3119e8d8bef9SDimitry Andric     // Decreasing the vector element size
3120e8d8bef9SDimitry Andric     //
3121e8d8bef9SDimitry Andric     // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3122e8d8bef9SDimitry Andric     //  =>
3123e8d8bef9SDimitry Andric     //  v4i32:castx = bitcast x:v2i64
3124e8d8bef9SDimitry Andric     //
3125e8d8bef9SDimitry Andric     // i64 = bitcast
3126e8d8bef9SDimitry Andric     //   (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3127e8d8bef9SDimitry Andric     //                       (i32 (extract_vector_elt castx, (2 * y + 1)))
3128e8d8bef9SDimitry Andric     //
3129e8d8bef9SDimitry Andric     if (NewNumElts % OldNumElts != 0)
3130e8d8bef9SDimitry Andric       return UnableToLegalize;
3131e8d8bef9SDimitry Andric 
3132e8d8bef9SDimitry Andric     // Type of the intermediate result vector.
3133e8d8bef9SDimitry Andric     const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3134fe6060f1SDimitry Andric     LLT MidTy =
3135fe6060f1SDimitry Andric         LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3136e8d8bef9SDimitry Andric 
3137e8d8bef9SDimitry Andric     auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3138e8d8bef9SDimitry Andric 
3139e8d8bef9SDimitry Andric     SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3140e8d8bef9SDimitry Andric     auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3141e8d8bef9SDimitry Andric 
3142e8d8bef9SDimitry Andric     for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3143e8d8bef9SDimitry Andric       auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3144e8d8bef9SDimitry Andric       auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3145e8d8bef9SDimitry Andric       auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3146e8d8bef9SDimitry Andric       NewOps[I] = Elt.getReg(0);
3147e8d8bef9SDimitry Andric     }
3148e8d8bef9SDimitry Andric 
3149e8d8bef9SDimitry Andric     auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3150e8d8bef9SDimitry Andric     MIRBuilder.buildBitcast(Dst, NewVec);
3151e8d8bef9SDimitry Andric     MI.eraseFromParent();
3152e8d8bef9SDimitry Andric     return Legalized;
3153e8d8bef9SDimitry Andric   }
3154e8d8bef9SDimitry Andric 
3155e8d8bef9SDimitry Andric   if (NewNumElts < OldNumElts) {
3156e8d8bef9SDimitry Andric     if (NewEltSize % OldEltSize != 0)
3157e8d8bef9SDimitry Andric       return UnableToLegalize;
3158e8d8bef9SDimitry Andric 
3159e8d8bef9SDimitry Andric     // This only depends on powers of 2 because we use bit tricks to figure out
3160e8d8bef9SDimitry Andric     // the bit offset we need to shift to get the target element. A general
3161e8d8bef9SDimitry Andric     // expansion could emit division/multiply.
3162e8d8bef9SDimitry Andric     if (!isPowerOf2_32(NewEltSize / OldEltSize))
3163e8d8bef9SDimitry Andric       return UnableToLegalize;
3164e8d8bef9SDimitry Andric 
3165e8d8bef9SDimitry Andric     // Increasing the vector element size.
3166e8d8bef9SDimitry Andric     // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3167e8d8bef9SDimitry Andric     //
3168e8d8bef9SDimitry Andric     //   =>
3169e8d8bef9SDimitry Andric     //
3170e8d8bef9SDimitry Andric     // %cast = G_BITCAST %vec
3171e8d8bef9SDimitry Andric     // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3172e8d8bef9SDimitry Andric     // %wide_elt  = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3173e8d8bef9SDimitry Andric     // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3174e8d8bef9SDimitry Andric     // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3175e8d8bef9SDimitry Andric     // %elt_bits = G_LSHR %wide_elt, %offset_bits
3176e8d8bef9SDimitry Andric     // %elt = G_TRUNC %elt_bits
3177e8d8bef9SDimitry Andric 
3178e8d8bef9SDimitry Andric     const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3179e8d8bef9SDimitry Andric     auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3180e8d8bef9SDimitry Andric 
3181e8d8bef9SDimitry Andric     // Divide to get the index in the wider element type.
3182e8d8bef9SDimitry Andric     auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3183e8d8bef9SDimitry Andric 
3184e8d8bef9SDimitry Andric     Register WideElt = CastVec;
3185e8d8bef9SDimitry Andric     if (CastTy.isVector()) {
3186e8d8bef9SDimitry Andric       WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3187e8d8bef9SDimitry Andric                                                      ScaledIdx).getReg(0);
3188e8d8bef9SDimitry Andric     }
3189e8d8bef9SDimitry Andric 
3190e8d8bef9SDimitry Andric     // Compute the bit offset into the register of the target element.
3191e8d8bef9SDimitry Andric     Register OffsetBits = getBitcastWiderVectorElementOffset(
3192e8d8bef9SDimitry Andric       MIRBuilder, Idx, NewEltSize, OldEltSize);
3193e8d8bef9SDimitry Andric 
3194e8d8bef9SDimitry Andric     // Shift the wide element to get the target element.
3195e8d8bef9SDimitry Andric     auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3196e8d8bef9SDimitry Andric     MIRBuilder.buildTrunc(Dst, ExtractedBits);
3197e8d8bef9SDimitry Andric     MI.eraseFromParent();
3198e8d8bef9SDimitry Andric     return Legalized;
3199e8d8bef9SDimitry Andric   }
3200e8d8bef9SDimitry Andric 
3201e8d8bef9SDimitry Andric   return UnableToLegalize;
3202e8d8bef9SDimitry Andric }
3203e8d8bef9SDimitry Andric 
3204e8d8bef9SDimitry Andric /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3205e8d8bef9SDimitry Andric /// TargetReg, while preserving other bits in \p TargetReg.
3206e8d8bef9SDimitry Andric ///
3207e8d8bef9SDimitry Andric /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3208e8d8bef9SDimitry Andric static Register buildBitFieldInsert(MachineIRBuilder &B,
3209e8d8bef9SDimitry Andric                                     Register TargetReg, Register InsertReg,
3210e8d8bef9SDimitry Andric                                     Register OffsetBits) {
3211e8d8bef9SDimitry Andric   LLT TargetTy = B.getMRI()->getType(TargetReg);
3212e8d8bef9SDimitry Andric   LLT InsertTy = B.getMRI()->getType(InsertReg);
3213e8d8bef9SDimitry Andric   auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3214e8d8bef9SDimitry Andric   auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3215e8d8bef9SDimitry Andric 
3216e8d8bef9SDimitry Andric   // Produce a bitmask of the value to insert
3217e8d8bef9SDimitry Andric   auto EltMask = B.buildConstant(
3218e8d8bef9SDimitry Andric     TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3219e8d8bef9SDimitry Andric                                    InsertTy.getSizeInBits()));
3220e8d8bef9SDimitry Andric   // Shift it into position
3221e8d8bef9SDimitry Andric   auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3222e8d8bef9SDimitry Andric   auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3223e8d8bef9SDimitry Andric 
3224e8d8bef9SDimitry Andric   // Clear out the bits in the wide element
3225e8d8bef9SDimitry Andric   auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3226e8d8bef9SDimitry Andric 
3227e8d8bef9SDimitry Andric   // The value to insert has all zeros already, so stick it into the masked
3228e8d8bef9SDimitry Andric   // wide element.
3229e8d8bef9SDimitry Andric   return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3230e8d8bef9SDimitry Andric }
3231e8d8bef9SDimitry Andric 
3232e8d8bef9SDimitry Andric /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3233e8d8bef9SDimitry Andric /// is increasing the element size, perform the indexing in the target element
3234e8d8bef9SDimitry Andric /// type, and use bit operations to insert at the element position. This is
3235e8d8bef9SDimitry Andric /// intended for architectures that can dynamically index the register file and
3236e8d8bef9SDimitry Andric /// want to force indexing in the native register size.
3237e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
3238e8d8bef9SDimitry Andric LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
3239e8d8bef9SDimitry Andric                                         LLT CastTy) {
32405ffd83dbSDimitry Andric   if (TypeIdx != 0)
32415ffd83dbSDimitry Andric     return UnableToLegalize;
32425ffd83dbSDimitry Andric 
324306c3fb27SDimitry Andric   auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
324406c3fb27SDimitry Andric       MI.getFirst4RegLLTs();
324506c3fb27SDimitry Andric   LLT VecTy = DstTy;
3246e8d8bef9SDimitry Andric 
3247e8d8bef9SDimitry Andric   LLT VecEltTy = VecTy.getElementType();
3248e8d8bef9SDimitry Andric   LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3249e8d8bef9SDimitry Andric   const unsigned NewEltSize = NewEltTy.getSizeInBits();
3250e8d8bef9SDimitry Andric   const unsigned OldEltSize = VecEltTy.getSizeInBits();
3251e8d8bef9SDimitry Andric 
3252e8d8bef9SDimitry Andric   unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3253e8d8bef9SDimitry Andric   unsigned OldNumElts = VecTy.getNumElements();
3254e8d8bef9SDimitry Andric 
3255e8d8bef9SDimitry Andric   Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3256e8d8bef9SDimitry Andric   if (NewNumElts < OldNumElts) {
3257e8d8bef9SDimitry Andric     if (NewEltSize % OldEltSize != 0)
32585ffd83dbSDimitry Andric       return UnableToLegalize;
32595ffd83dbSDimitry Andric 
3260e8d8bef9SDimitry Andric     // This only depends on powers of 2 because we use bit tricks to figure out
3261e8d8bef9SDimitry Andric     // the bit offset we need to shift to get the target element. A general
3262e8d8bef9SDimitry Andric     // expansion could emit division/multiply.
3263e8d8bef9SDimitry Andric     if (!isPowerOf2_32(NewEltSize / OldEltSize))
32645ffd83dbSDimitry Andric       return UnableToLegalize;
32655ffd83dbSDimitry Andric 
3266e8d8bef9SDimitry Andric     const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3267e8d8bef9SDimitry Andric     auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3268e8d8bef9SDimitry Andric 
3269e8d8bef9SDimitry Andric     // Divide to get the index in the wider element type.
3270e8d8bef9SDimitry Andric     auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3271e8d8bef9SDimitry Andric 
3272e8d8bef9SDimitry Andric     Register ExtractedElt = CastVec;
3273e8d8bef9SDimitry Andric     if (CastTy.isVector()) {
3274e8d8bef9SDimitry Andric       ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3275e8d8bef9SDimitry Andric                                                           ScaledIdx).getReg(0);
32765ffd83dbSDimitry Andric     }
32775ffd83dbSDimitry Andric 
3278e8d8bef9SDimitry Andric     // Compute the bit offset into the register of the target element.
3279e8d8bef9SDimitry Andric     Register OffsetBits = getBitcastWiderVectorElementOffset(
3280e8d8bef9SDimitry Andric       MIRBuilder, Idx, NewEltSize, OldEltSize);
3281e8d8bef9SDimitry Andric 
3282e8d8bef9SDimitry Andric     Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3283e8d8bef9SDimitry Andric                                                Val, OffsetBits);
3284e8d8bef9SDimitry Andric     if (CastTy.isVector()) {
3285e8d8bef9SDimitry Andric       InsertedElt = MIRBuilder.buildInsertVectorElement(
3286e8d8bef9SDimitry Andric         CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3287e8d8bef9SDimitry Andric     }
3288e8d8bef9SDimitry Andric 
3289e8d8bef9SDimitry Andric     MIRBuilder.buildBitcast(Dst, InsertedElt);
3290e8d8bef9SDimitry Andric     MI.eraseFromParent();
32915ffd83dbSDimitry Andric     return Legalized;
32925ffd83dbSDimitry Andric   }
3293e8d8bef9SDimitry Andric 
32945ffd83dbSDimitry Andric   return UnableToLegalize;
32950b57cec5SDimitry Andric }
32960b57cec5SDimitry Andric 
3297fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
32980b57cec5SDimitry Andric   // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
3299fe6060f1SDimitry Andric   Register DstReg = LoadMI.getDstReg();
3300fe6060f1SDimitry Andric   Register PtrReg = LoadMI.getPointerReg();
33010b57cec5SDimitry Andric   LLT DstTy = MRI.getType(DstReg);
3302fe6060f1SDimitry Andric   MachineMemOperand &MMO = LoadMI.getMMO();
3303fe6060f1SDimitry Andric   LLT MemTy = MMO.getMemoryType();
3304fe6060f1SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
33050b57cec5SDimitry Andric 
3306fe6060f1SDimitry Andric   unsigned MemSizeInBits = MemTy.getSizeInBits();
3307fe6060f1SDimitry Andric   unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3308fe6060f1SDimitry Andric 
3309fe6060f1SDimitry Andric   if (MemSizeInBits != MemStoreSizeInBits) {
3310349cc55cSDimitry Andric     if (MemTy.isVector())
3311349cc55cSDimitry Andric       return UnableToLegalize;
3312349cc55cSDimitry Andric 
3313fe6060f1SDimitry Andric     // Promote to a byte-sized load if not loading an integral number of
3314fe6060f1SDimitry Andric     // bytes.  For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
3315fe6060f1SDimitry Andric     LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
3316fe6060f1SDimitry Andric     MachineMemOperand *NewMMO =
3317fe6060f1SDimitry Andric         MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
3318fe6060f1SDimitry Andric 
3319fe6060f1SDimitry Andric     Register LoadReg = DstReg;
3320fe6060f1SDimitry Andric     LLT LoadTy = DstTy;
3321fe6060f1SDimitry Andric 
3322fe6060f1SDimitry Andric     // If this wasn't already an extending load, we need to widen the result
3323fe6060f1SDimitry Andric     // register to avoid creating a load with a narrower result than the source.
3324fe6060f1SDimitry Andric     if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
3325fe6060f1SDimitry Andric       LoadTy = WideMemTy;
3326fe6060f1SDimitry Andric       LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
3327fe6060f1SDimitry Andric     }
3328fe6060f1SDimitry Andric 
3329fe6060f1SDimitry Andric     if (isa<GSExtLoad>(LoadMI)) {
3330fe6060f1SDimitry Andric       auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3331fe6060f1SDimitry Andric       MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
333281ad6265SDimitry Andric     } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3333fe6060f1SDimitry Andric       auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3334fe6060f1SDimitry Andric       // The extra bits are guaranteed to be zero, since we stored them that
3335fe6060f1SDimitry Andric       // way.  A zext load from Wide thus automatically gives zext from MemVT.
3336fe6060f1SDimitry Andric       MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
3337fe6060f1SDimitry Andric     } else {
3338fe6060f1SDimitry Andric       MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
3339fe6060f1SDimitry Andric     }
3340fe6060f1SDimitry Andric 
3341fe6060f1SDimitry Andric     if (DstTy != LoadTy)
3342fe6060f1SDimitry Andric       MIRBuilder.buildTrunc(DstReg, LoadReg);
3343fe6060f1SDimitry Andric 
3344fe6060f1SDimitry Andric     LoadMI.eraseFromParent();
3345fe6060f1SDimitry Andric     return Legalized;
3346fe6060f1SDimitry Andric   }
3347fe6060f1SDimitry Andric 
3348fe6060f1SDimitry Andric   // Big endian lowering not implemented.
3349fe6060f1SDimitry Andric   if (MIRBuilder.getDataLayout().isBigEndian())
3350fe6060f1SDimitry Andric     return UnableToLegalize;
3351fe6060f1SDimitry Andric 
3352349cc55cSDimitry Andric   // This load needs splitting into power of 2 sized loads.
3353349cc55cSDimitry Andric   //
33548bcb0991SDimitry Andric   // Our strategy here is to generate anyextending loads for the smaller
33558bcb0991SDimitry Andric   // types up to next power-2 result type, and then combine the two larger
33568bcb0991SDimitry Andric   // result values together, before truncating back down to the non-pow-2
33578bcb0991SDimitry Andric   // type.
33588bcb0991SDimitry Andric   // E.g. v1 = i24 load =>
33595ffd83dbSDimitry Andric   // v2 = i32 zextload (2 byte)
33608bcb0991SDimitry Andric   // v3 = i32 load (1 byte)
33618bcb0991SDimitry Andric   // v4 = i32 shl v3, 16
33628bcb0991SDimitry Andric   // v5 = i32 or v4, v2
33638bcb0991SDimitry Andric   // v1 = i24 trunc v5
33648bcb0991SDimitry Andric   // By doing this we generate the correct truncate which should get
33658bcb0991SDimitry Andric   // combined away as an artifact with a matching extend.
3366349cc55cSDimitry Andric 
3367349cc55cSDimitry Andric   uint64_t LargeSplitSize, SmallSplitSize;
3368349cc55cSDimitry Andric 
3369349cc55cSDimitry Andric   if (!isPowerOf2_32(MemSizeInBits)) {
3370349cc55cSDimitry Andric     // This load needs splitting into power of 2 sized loads.
337106c3fb27SDimitry Andric     LargeSplitSize = llvm::bit_floor(MemSizeInBits);
3372349cc55cSDimitry Andric     SmallSplitSize = MemSizeInBits - LargeSplitSize;
3373349cc55cSDimitry Andric   } else {
3374349cc55cSDimitry Andric     // This is already a power of 2, but we still need to split this in half.
3375349cc55cSDimitry Andric     //
3376349cc55cSDimitry Andric     // Assume we're being asked to decompose an unaligned load.
3377349cc55cSDimitry Andric     // TODO: If this requires multiple splits, handle them all at once.
3378349cc55cSDimitry Andric     auto &Ctx = MF.getFunction().getContext();
3379349cc55cSDimitry Andric     if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3380349cc55cSDimitry Andric       return UnableToLegalize;
3381349cc55cSDimitry Andric 
3382349cc55cSDimitry Andric     SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3383349cc55cSDimitry Andric   }
3384349cc55cSDimitry Andric 
3385349cc55cSDimitry Andric   if (MemTy.isVector()) {
3386349cc55cSDimitry Andric     // TODO: Handle vector extloads
3387349cc55cSDimitry Andric     if (MemTy != DstTy)
3388349cc55cSDimitry Andric       return UnableToLegalize;
3389349cc55cSDimitry Andric 
3390349cc55cSDimitry Andric     // TODO: We can do better than scalarizing the vector and at least split it
3391349cc55cSDimitry Andric     // in half.
3392349cc55cSDimitry Andric     return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
3393349cc55cSDimitry Andric   }
33948bcb0991SDimitry Andric 
33958bcb0991SDimitry Andric   MachineMemOperand *LargeMMO =
33968bcb0991SDimitry Andric       MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3397fe6060f1SDimitry Andric   MachineMemOperand *SmallMMO =
3398fe6060f1SDimitry Andric       MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
33998bcb0991SDimitry Andric 
34008bcb0991SDimitry Andric   LLT PtrTy = MRI.getType(PtrReg);
3401fe6060f1SDimitry Andric   unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
34028bcb0991SDimitry Andric   LLT AnyExtTy = LLT::scalar(AnyExtSize);
3403fe6060f1SDimitry Andric   auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
3404fe6060f1SDimitry Andric                                              PtrReg, *LargeMMO);
34058bcb0991SDimitry Andric 
3406fe6060f1SDimitry Andric   auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
3407fe6060f1SDimitry Andric                                             LargeSplitSize / 8);
3408480093f4SDimitry Andric   Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3409fe6060f1SDimitry Andric   auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3410fe6060f1SDimitry Andric   auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
3411fe6060f1SDimitry Andric                                              SmallPtr, *SmallMMO);
34128bcb0991SDimitry Andric 
34138bcb0991SDimitry Andric   auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
34148bcb0991SDimitry Andric   auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
3415fe6060f1SDimitry Andric 
3416fe6060f1SDimitry Andric   if (AnyExtTy == DstTy)
3417fe6060f1SDimitry Andric     MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
3418349cc55cSDimitry Andric   else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
34198bcb0991SDimitry Andric     auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3420fe6060f1SDimitry Andric     MIRBuilder.buildTrunc(DstReg, {Or});
3421349cc55cSDimitry Andric   } else {
3422349cc55cSDimitry Andric     assert(DstTy.isPointer() && "expected pointer");
3423349cc55cSDimitry Andric     auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3424349cc55cSDimitry Andric 
3425349cc55cSDimitry Andric     // FIXME: We currently consider this to be illegal for non-integral address
3426349cc55cSDimitry Andric     // spaces, but we need still need a way to reinterpret the bits.
3427349cc55cSDimitry Andric     MIRBuilder.buildIntToPtr(DstReg, Or);
3428fe6060f1SDimitry Andric   }
3429fe6060f1SDimitry Andric 
3430fe6060f1SDimitry Andric   LoadMI.eraseFromParent();
34318bcb0991SDimitry Andric   return Legalized;
34328bcb0991SDimitry Andric }
3433e8d8bef9SDimitry Andric 
3434fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
34358bcb0991SDimitry Andric   // Lower a non-power of 2 store into multiple pow-2 stores.
34368bcb0991SDimitry Andric   // E.g. split an i24 store into an i16 store + i8 store.
34378bcb0991SDimitry Andric   // We do this by first extending the stored value to the next largest power
34388bcb0991SDimitry Andric   // of 2 type, and then using truncating stores to store the components.
34398bcb0991SDimitry Andric   // By doing this, likewise with G_LOAD, generate an extend that can be
34408bcb0991SDimitry Andric   // artifact-combined away instead of leaving behind extracts.
3441fe6060f1SDimitry Andric   Register SrcReg = StoreMI.getValueReg();
3442fe6060f1SDimitry Andric   Register PtrReg = StoreMI.getPointerReg();
34438bcb0991SDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
3444fe6060f1SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
3445fe6060f1SDimitry Andric   MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3446fe6060f1SDimitry Andric   LLT MemTy = MMO.getMemoryType();
3447fe6060f1SDimitry Andric 
3448fe6060f1SDimitry Andric   unsigned StoreWidth = MemTy.getSizeInBits();
3449fe6060f1SDimitry Andric   unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3450fe6060f1SDimitry Andric 
3451fe6060f1SDimitry Andric   if (StoreWidth != StoreSizeInBits) {
3452349cc55cSDimitry Andric     if (SrcTy.isVector())
3453349cc55cSDimitry Andric       return UnableToLegalize;
3454349cc55cSDimitry Andric 
3455fe6060f1SDimitry Andric     // Promote to a byte-sized store with upper bits zero if not
3456fe6060f1SDimitry Andric     // storing an integral number of bytes.  For example, promote
3457fe6060f1SDimitry Andric     // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3458fe6060f1SDimitry Andric     LLT WideTy = LLT::scalar(StoreSizeInBits);
3459fe6060f1SDimitry Andric 
3460fe6060f1SDimitry Andric     if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3461fe6060f1SDimitry Andric       // Avoid creating a store with a narrower source than result.
3462fe6060f1SDimitry Andric       SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3463fe6060f1SDimitry Andric       SrcTy = WideTy;
3464fe6060f1SDimitry Andric     }
3465fe6060f1SDimitry Andric 
3466fe6060f1SDimitry Andric     auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3467fe6060f1SDimitry Andric 
3468fe6060f1SDimitry Andric     MachineMemOperand *NewMMO =
3469fe6060f1SDimitry Andric         MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3470fe6060f1SDimitry Andric     MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3471fe6060f1SDimitry Andric     StoreMI.eraseFromParent();
3472fe6060f1SDimitry Andric     return Legalized;
3473fe6060f1SDimitry Andric   }
3474fe6060f1SDimitry Andric 
3475349cc55cSDimitry Andric   if (MemTy.isVector()) {
3476349cc55cSDimitry Andric     // TODO: Handle vector trunc stores
3477349cc55cSDimitry Andric     if (MemTy != SrcTy)
3478349cc55cSDimitry Andric       return UnableToLegalize;
3479349cc55cSDimitry Andric 
3480349cc55cSDimitry Andric     // TODO: We can do better than scalarizing the vector and at least split it
3481349cc55cSDimitry Andric     // in half.
3482349cc55cSDimitry Andric     return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
3483349cc55cSDimitry Andric   }
3484349cc55cSDimitry Andric 
3485349cc55cSDimitry Andric   unsigned MemSizeInBits = MemTy.getSizeInBits();
3486349cc55cSDimitry Andric   uint64_t LargeSplitSize, SmallSplitSize;
3487349cc55cSDimitry Andric 
3488349cc55cSDimitry Andric   if (!isPowerOf2_32(MemSizeInBits)) {
348906c3fb27SDimitry Andric     LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
3490349cc55cSDimitry Andric     SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3491349cc55cSDimitry Andric   } else {
3492349cc55cSDimitry Andric     auto &Ctx = MF.getFunction().getContext();
3493349cc55cSDimitry Andric     if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
34948bcb0991SDimitry Andric       return UnableToLegalize; // Don't know what we're being asked to do.
34958bcb0991SDimitry Andric 
3496349cc55cSDimitry Andric     SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3497349cc55cSDimitry Andric   }
3498349cc55cSDimitry Andric 
3499fe6060f1SDimitry Andric   // Extend to the next pow-2. If this store was itself the result of lowering,
3500fe6060f1SDimitry Andric   // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3501349cc55cSDimitry Andric   // that's wider than the stored size.
3502349cc55cSDimitry Andric   unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
3503349cc55cSDimitry Andric   const LLT NewSrcTy = LLT::scalar(AnyExtSize);
3504349cc55cSDimitry Andric 
3505349cc55cSDimitry Andric   if (SrcTy.isPointer()) {
3506349cc55cSDimitry Andric     const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
3507349cc55cSDimitry Andric     SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
3508349cc55cSDimitry Andric   }
3509349cc55cSDimitry Andric 
3510fe6060f1SDimitry Andric   auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
35118bcb0991SDimitry Andric 
35128bcb0991SDimitry Andric   // Obtain the smaller value by shifting away the larger value.
3513fe6060f1SDimitry Andric   auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3514fe6060f1SDimitry Andric   auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
35158bcb0991SDimitry Andric 
3516480093f4SDimitry Andric   // Generate the PtrAdd and truncating stores.
35178bcb0991SDimitry Andric   LLT PtrTy = MRI.getType(PtrReg);
35185ffd83dbSDimitry Andric   auto OffsetCst = MIRBuilder.buildConstant(
35195ffd83dbSDimitry Andric     LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3520480093f4SDimitry Andric   auto SmallPtr =
3521349cc55cSDimitry Andric     MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
35228bcb0991SDimitry Andric 
35238bcb0991SDimitry Andric   MachineMemOperand *LargeMMO =
35248bcb0991SDimitry Andric     MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
35258bcb0991SDimitry Andric   MachineMemOperand *SmallMMO =
35268bcb0991SDimitry Andric     MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3527fe6060f1SDimitry Andric   MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3528fe6060f1SDimitry Andric   MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3529fe6060f1SDimitry Andric   StoreMI.eraseFromParent();
35308bcb0991SDimitry Andric   return Legalized;
35318bcb0991SDimitry Andric }
3532e8d8bef9SDimitry Andric 
3533e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
3534e8d8bef9SDimitry Andric LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3535e8d8bef9SDimitry Andric   switch (MI.getOpcode()) {
3536e8d8bef9SDimitry Andric   case TargetOpcode::G_LOAD: {
3537e8d8bef9SDimitry Andric     if (TypeIdx != 0)
3538e8d8bef9SDimitry Andric       return UnableToLegalize;
3539fe6060f1SDimitry Andric     MachineMemOperand &MMO = **MI.memoperands_begin();
3540fe6060f1SDimitry Andric 
3541fe6060f1SDimitry Andric     // Not sure how to interpret a bitcast of an extending load.
3542fe6060f1SDimitry Andric     if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3543fe6060f1SDimitry Andric       return UnableToLegalize;
3544e8d8bef9SDimitry Andric 
3545e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
3546e8d8bef9SDimitry Andric     bitcastDst(MI, CastTy, 0);
3547fe6060f1SDimitry Andric     MMO.setType(CastTy);
3548e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
3549e8d8bef9SDimitry Andric     return Legalized;
3550e8d8bef9SDimitry Andric   }
3551e8d8bef9SDimitry Andric   case TargetOpcode::G_STORE: {
3552e8d8bef9SDimitry Andric     if (TypeIdx != 0)
3553e8d8bef9SDimitry Andric       return UnableToLegalize;
3554e8d8bef9SDimitry Andric 
3555fe6060f1SDimitry Andric     MachineMemOperand &MMO = **MI.memoperands_begin();
3556fe6060f1SDimitry Andric 
3557fe6060f1SDimitry Andric     // Not sure how to interpret a bitcast of a truncating store.
3558fe6060f1SDimitry Andric     if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3559fe6060f1SDimitry Andric       return UnableToLegalize;
3560fe6060f1SDimitry Andric 
3561e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
3562e8d8bef9SDimitry Andric     bitcastSrc(MI, CastTy, 0);
3563fe6060f1SDimitry Andric     MMO.setType(CastTy);
3564e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
3565e8d8bef9SDimitry Andric     return Legalized;
3566e8d8bef9SDimitry Andric   }
3567e8d8bef9SDimitry Andric   case TargetOpcode::G_SELECT: {
3568e8d8bef9SDimitry Andric     if (TypeIdx != 0)
3569e8d8bef9SDimitry Andric       return UnableToLegalize;
3570e8d8bef9SDimitry Andric 
3571e8d8bef9SDimitry Andric     if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3572e8d8bef9SDimitry Andric       LLVM_DEBUG(
3573e8d8bef9SDimitry Andric           dbgs() << "bitcast action not implemented for vector select\n");
3574e8d8bef9SDimitry Andric       return UnableToLegalize;
3575e8d8bef9SDimitry Andric     }
3576e8d8bef9SDimitry Andric 
3577e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
3578e8d8bef9SDimitry Andric     bitcastSrc(MI, CastTy, 2);
3579e8d8bef9SDimitry Andric     bitcastSrc(MI, CastTy, 3);
3580e8d8bef9SDimitry Andric     bitcastDst(MI, CastTy, 0);
3581e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
3582e8d8bef9SDimitry Andric     return Legalized;
3583e8d8bef9SDimitry Andric   }
3584e8d8bef9SDimitry Andric   case TargetOpcode::G_AND:
3585e8d8bef9SDimitry Andric   case TargetOpcode::G_OR:
3586e8d8bef9SDimitry Andric   case TargetOpcode::G_XOR: {
3587e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
3588e8d8bef9SDimitry Andric     bitcastSrc(MI, CastTy, 1);
3589e8d8bef9SDimitry Andric     bitcastSrc(MI, CastTy, 2);
3590e8d8bef9SDimitry Andric     bitcastDst(MI, CastTy, 0);
3591e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
3592e8d8bef9SDimitry Andric     return Legalized;
3593e8d8bef9SDimitry Andric   }
3594e8d8bef9SDimitry Andric   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3595e8d8bef9SDimitry Andric     return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3596e8d8bef9SDimitry Andric   case TargetOpcode::G_INSERT_VECTOR_ELT:
3597e8d8bef9SDimitry Andric     return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3598e8d8bef9SDimitry Andric   default:
3599e8d8bef9SDimitry Andric     return UnableToLegalize;
3600e8d8bef9SDimitry Andric   }
3601e8d8bef9SDimitry Andric }
3602e8d8bef9SDimitry Andric 
3603e8d8bef9SDimitry Andric // Legalize an instruction by changing the opcode in place.
3604e8d8bef9SDimitry Andric void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3605e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
3606e8d8bef9SDimitry Andric     MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3607e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
3608e8d8bef9SDimitry Andric }
3609e8d8bef9SDimitry Andric 
3610e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
3611e8d8bef9SDimitry Andric LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3612e8d8bef9SDimitry Andric   using namespace TargetOpcode;
3613e8d8bef9SDimitry Andric 
3614e8d8bef9SDimitry Andric   switch(MI.getOpcode()) {
3615e8d8bef9SDimitry Andric   default:
3616e8d8bef9SDimitry Andric     return UnableToLegalize;
361706c3fb27SDimitry Andric   case TargetOpcode::G_FCONSTANT:
361806c3fb27SDimitry Andric     return lowerFConstant(MI);
3619e8d8bef9SDimitry Andric   case TargetOpcode::G_BITCAST:
3620e8d8bef9SDimitry Andric     return lowerBitcast(MI);
3621e8d8bef9SDimitry Andric   case TargetOpcode::G_SREM:
3622e8d8bef9SDimitry Andric   case TargetOpcode::G_UREM: {
3623e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3624e8d8bef9SDimitry Andric     auto Quot =
3625e8d8bef9SDimitry Andric         MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3626e8d8bef9SDimitry Andric                               {MI.getOperand(1), MI.getOperand(2)});
3627e8d8bef9SDimitry Andric 
3628e8d8bef9SDimitry Andric     auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3629e8d8bef9SDimitry Andric     MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3630e8d8bef9SDimitry Andric     MI.eraseFromParent();
3631e8d8bef9SDimitry Andric     return Legalized;
3632e8d8bef9SDimitry Andric   }
3633e8d8bef9SDimitry Andric   case TargetOpcode::G_SADDO:
3634e8d8bef9SDimitry Andric   case TargetOpcode::G_SSUBO:
3635e8d8bef9SDimitry Andric     return lowerSADDO_SSUBO(MI);
3636e8d8bef9SDimitry Andric   case TargetOpcode::G_UMULH:
3637e8d8bef9SDimitry Andric   case TargetOpcode::G_SMULH:
3638e8d8bef9SDimitry Andric     return lowerSMULH_UMULH(MI);
3639e8d8bef9SDimitry Andric   case TargetOpcode::G_SMULO:
3640e8d8bef9SDimitry Andric   case TargetOpcode::G_UMULO: {
3641e8d8bef9SDimitry Andric     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3642e8d8bef9SDimitry Andric     // result.
364306c3fb27SDimitry Andric     auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
3644e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(Res);
3645e8d8bef9SDimitry Andric 
3646e8d8bef9SDimitry Andric     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3647e8d8bef9SDimitry Andric                           ? TargetOpcode::G_SMULH
3648e8d8bef9SDimitry Andric                           : TargetOpcode::G_UMULH;
3649e8d8bef9SDimitry Andric 
3650e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
3651e8d8bef9SDimitry Andric     const auto &TII = MIRBuilder.getTII();
3652e8d8bef9SDimitry Andric     MI.setDesc(TII.get(TargetOpcode::G_MUL));
365381ad6265SDimitry Andric     MI.removeOperand(1);
3654e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
3655e8d8bef9SDimitry Andric 
3656e8d8bef9SDimitry Andric     auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3657e8d8bef9SDimitry Andric     auto Zero = MIRBuilder.buildConstant(Ty, 0);
3658e8d8bef9SDimitry Andric 
3659e8d8bef9SDimitry Andric     // Move insert point forward so we can use the Res register if needed.
3660e8d8bef9SDimitry Andric     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3661e8d8bef9SDimitry Andric 
3662e8d8bef9SDimitry Andric     // For *signed* multiply, overflow is detected by checking:
3663e8d8bef9SDimitry Andric     // (hi != (lo >> bitwidth-1))
3664e8d8bef9SDimitry Andric     if (Opcode == TargetOpcode::G_SMULH) {
3665e8d8bef9SDimitry Andric       auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3666e8d8bef9SDimitry Andric       auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3667e8d8bef9SDimitry Andric       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3668e8d8bef9SDimitry Andric     } else {
3669e8d8bef9SDimitry Andric       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3670e8d8bef9SDimitry Andric     }
3671e8d8bef9SDimitry Andric     return Legalized;
3672e8d8bef9SDimitry Andric   }
3673e8d8bef9SDimitry Andric   case TargetOpcode::G_FNEG: {
367406c3fb27SDimitry Andric     auto [Res, SubByReg] = MI.getFirst2Regs();
3675e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(Res);
3676e8d8bef9SDimitry Andric 
3677e8d8bef9SDimitry Andric     // TODO: Handle vector types once we are able to
3678e8d8bef9SDimitry Andric     // represent them.
3679e8d8bef9SDimitry Andric     if (Ty.isVector())
3680e8d8bef9SDimitry Andric       return UnableToLegalize;
3681e8d8bef9SDimitry Andric     auto SignMask =
3682e8d8bef9SDimitry Andric         MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
3683e8d8bef9SDimitry Andric     MIRBuilder.buildXor(Res, SubByReg, SignMask);
3684e8d8bef9SDimitry Andric     MI.eraseFromParent();
3685e8d8bef9SDimitry Andric     return Legalized;
3686e8d8bef9SDimitry Andric   }
3687bdd1243dSDimitry Andric   case TargetOpcode::G_FSUB:
3688bdd1243dSDimitry Andric   case TargetOpcode::G_STRICT_FSUB: {
368906c3fb27SDimitry Andric     auto [Res, LHS, RHS] = MI.getFirst3Regs();
3690e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(Res);
3691e8d8bef9SDimitry Andric 
3692e8d8bef9SDimitry Andric     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3693bdd1243dSDimitry Andric     auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
3694bdd1243dSDimitry Andric 
3695bdd1243dSDimitry Andric     if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3696bdd1243dSDimitry Andric       MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
3697bdd1243dSDimitry Andric     else
3698e8d8bef9SDimitry Andric       MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3699bdd1243dSDimitry Andric 
3700e8d8bef9SDimitry Andric     MI.eraseFromParent();
3701e8d8bef9SDimitry Andric     return Legalized;
3702e8d8bef9SDimitry Andric   }
3703e8d8bef9SDimitry Andric   case TargetOpcode::G_FMAD:
3704e8d8bef9SDimitry Andric     return lowerFMad(MI);
3705e8d8bef9SDimitry Andric   case TargetOpcode::G_FFLOOR:
3706e8d8bef9SDimitry Andric     return lowerFFloor(MI);
3707e8d8bef9SDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUND:
3708e8d8bef9SDimitry Andric     return lowerIntrinsicRound(MI);
37095f757f3fSDimitry Andric   case TargetOpcode::G_FRINT: {
3710e8d8bef9SDimitry Andric     // Since round even is the assumed rounding mode for unconstrained FP
3711e8d8bef9SDimitry Andric     // operations, rint and roundeven are the same operation.
37125f757f3fSDimitry Andric     changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
3713e8d8bef9SDimitry Andric     return Legalized;
3714e8d8bef9SDimitry Andric   }
3715e8d8bef9SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
371606c3fb27SDimitry Andric     auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
3717e8d8bef9SDimitry Andric     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
3718e8d8bef9SDimitry Andric                                   **MI.memoperands_begin());
3719e8d8bef9SDimitry Andric     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
3720e8d8bef9SDimitry Andric     MI.eraseFromParent();
3721e8d8bef9SDimitry Andric     return Legalized;
3722e8d8bef9SDimitry Andric   }
3723e8d8bef9SDimitry Andric   case TargetOpcode::G_LOAD:
3724e8d8bef9SDimitry Andric   case TargetOpcode::G_SEXTLOAD:
3725e8d8bef9SDimitry Andric   case TargetOpcode::G_ZEXTLOAD:
3726fe6060f1SDimitry Andric     return lowerLoad(cast<GAnyLoad>(MI));
3727e8d8bef9SDimitry Andric   case TargetOpcode::G_STORE:
3728fe6060f1SDimitry Andric     return lowerStore(cast<GStore>(MI));
37290b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
37300b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
37310b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ:
37320b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ:
37330b57cec5SDimitry Andric   case TargetOpcode::G_CTPOP:
3734e8d8bef9SDimitry Andric     return lowerBitCount(MI);
37350b57cec5SDimitry Andric   case G_UADDO: {
373606c3fb27SDimitry Andric     auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
37370b57cec5SDimitry Andric 
37380b57cec5SDimitry Andric     MIRBuilder.buildAdd(Res, LHS, RHS);
37390b57cec5SDimitry Andric     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
37400b57cec5SDimitry Andric 
37410b57cec5SDimitry Andric     MI.eraseFromParent();
37420b57cec5SDimitry Andric     return Legalized;
37430b57cec5SDimitry Andric   }
37440b57cec5SDimitry Andric   case G_UADDE: {
374506c3fb27SDimitry Andric     auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
37465f757f3fSDimitry Andric     const LLT CondTy = MRI.getType(CarryOut);
37475f757f3fSDimitry Andric     const LLT Ty = MRI.getType(Res);
37480b57cec5SDimitry Andric 
37495f757f3fSDimitry Andric     // Initial add of the two operands.
37505ffd83dbSDimitry Andric     auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
37515f757f3fSDimitry Andric 
37525f757f3fSDimitry Andric     // Initial check for carry.
37535f757f3fSDimitry Andric     auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
37545f757f3fSDimitry Andric 
37555f757f3fSDimitry Andric     // Add the sum and the carry.
37565ffd83dbSDimitry Andric     auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
37570b57cec5SDimitry Andric     MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
37585f757f3fSDimitry Andric 
37595f757f3fSDimitry Andric     // Second check for carry. We can only carry if the initial sum is all 1s
37605f757f3fSDimitry Andric     // and the carry is set, resulting in a new sum of 0.
37615f757f3fSDimitry Andric     auto Zero = MIRBuilder.buildConstant(Ty, 0);
37625f757f3fSDimitry Andric     auto ResEqZero = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, Res, Zero);
37635f757f3fSDimitry Andric     auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
37645f757f3fSDimitry Andric     MIRBuilder.buildOr(CarryOut, Carry, Carry2);
37650b57cec5SDimitry Andric 
37660b57cec5SDimitry Andric     MI.eraseFromParent();
37670b57cec5SDimitry Andric     return Legalized;
37680b57cec5SDimitry Andric   }
37690b57cec5SDimitry Andric   case G_USUBO: {
377006c3fb27SDimitry Andric     auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
37710b57cec5SDimitry Andric 
37720b57cec5SDimitry Andric     MIRBuilder.buildSub(Res, LHS, RHS);
37730b57cec5SDimitry Andric     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
37740b57cec5SDimitry Andric 
37750b57cec5SDimitry Andric     MI.eraseFromParent();
37760b57cec5SDimitry Andric     return Legalized;
37770b57cec5SDimitry Andric   }
37780b57cec5SDimitry Andric   case G_USUBE: {
377906c3fb27SDimitry Andric     auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
37805ffd83dbSDimitry Andric     const LLT CondTy = MRI.getType(BorrowOut);
37815ffd83dbSDimitry Andric     const LLT Ty = MRI.getType(Res);
37820b57cec5SDimitry Andric 
37835f757f3fSDimitry Andric     // Initial subtract of the two operands.
37845ffd83dbSDimitry Andric     auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
37855f757f3fSDimitry Andric 
37865f757f3fSDimitry Andric     // Initial check for borrow.
37875f757f3fSDimitry Andric     auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
37885f757f3fSDimitry Andric 
37895f757f3fSDimitry Andric     // Subtract the borrow from the first subtract.
37905ffd83dbSDimitry Andric     auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
37910b57cec5SDimitry Andric     MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
37925ffd83dbSDimitry Andric 
37935f757f3fSDimitry Andric     // Second check for borrow. We can only borrow if the initial difference is
37945f757f3fSDimitry Andric     // 0 and the borrow is set, resulting in a new difference of all 1s.
37955f757f3fSDimitry Andric     auto Zero = MIRBuilder.buildConstant(Ty, 0);
37965f757f3fSDimitry Andric     auto TmpResEqZero =
37975f757f3fSDimitry Andric         MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
37985f757f3fSDimitry Andric     auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
37995f757f3fSDimitry Andric     MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
38000b57cec5SDimitry Andric 
38010b57cec5SDimitry Andric     MI.eraseFromParent();
38020b57cec5SDimitry Andric     return Legalized;
38030b57cec5SDimitry Andric   }
38040b57cec5SDimitry Andric   case G_UITOFP:
3805e8d8bef9SDimitry Andric     return lowerUITOFP(MI);
38060b57cec5SDimitry Andric   case G_SITOFP:
3807e8d8bef9SDimitry Andric     return lowerSITOFP(MI);
38088bcb0991SDimitry Andric   case G_FPTOUI:
3809e8d8bef9SDimitry Andric     return lowerFPTOUI(MI);
38105ffd83dbSDimitry Andric   case G_FPTOSI:
38115ffd83dbSDimitry Andric     return lowerFPTOSI(MI);
38125ffd83dbSDimitry Andric   case G_FPTRUNC:
3813e8d8bef9SDimitry Andric     return lowerFPTRUNC(MI);
3814e8d8bef9SDimitry Andric   case G_FPOWI:
3815e8d8bef9SDimitry Andric     return lowerFPOWI(MI);
38160b57cec5SDimitry Andric   case G_SMIN:
38170b57cec5SDimitry Andric   case G_SMAX:
38180b57cec5SDimitry Andric   case G_UMIN:
38190b57cec5SDimitry Andric   case G_UMAX:
3820e8d8bef9SDimitry Andric     return lowerMinMax(MI);
38210b57cec5SDimitry Andric   case G_FCOPYSIGN:
3822e8d8bef9SDimitry Andric     return lowerFCopySign(MI);
38230b57cec5SDimitry Andric   case G_FMINNUM:
38240b57cec5SDimitry Andric   case G_FMAXNUM:
38250b57cec5SDimitry Andric     return lowerFMinNumMaxNum(MI);
38265ffd83dbSDimitry Andric   case G_MERGE_VALUES:
38275ffd83dbSDimitry Andric     return lowerMergeValues(MI);
38288bcb0991SDimitry Andric   case G_UNMERGE_VALUES:
38298bcb0991SDimitry Andric     return lowerUnmergeValues(MI);
38308bcb0991SDimitry Andric   case TargetOpcode::G_SEXT_INREG: {
38318bcb0991SDimitry Andric     assert(MI.getOperand(2).isImm() && "Expected immediate");
38328bcb0991SDimitry Andric     int64_t SizeInBits = MI.getOperand(2).getImm();
38338bcb0991SDimitry Andric 
383406c3fb27SDimitry Andric     auto [DstReg, SrcReg] = MI.getFirst2Regs();
38358bcb0991SDimitry Andric     LLT DstTy = MRI.getType(DstReg);
38368bcb0991SDimitry Andric     Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
38378bcb0991SDimitry Andric 
38388bcb0991SDimitry Andric     auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
38395ffd83dbSDimitry Andric     MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
38405ffd83dbSDimitry Andric     MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
38418bcb0991SDimitry Andric     MI.eraseFromParent();
38428bcb0991SDimitry Andric     return Legalized;
38438bcb0991SDimitry Andric   }
3844e8d8bef9SDimitry Andric   case G_EXTRACT_VECTOR_ELT:
3845e8d8bef9SDimitry Andric   case G_INSERT_VECTOR_ELT:
3846e8d8bef9SDimitry Andric     return lowerExtractInsertVectorElt(MI);
38478bcb0991SDimitry Andric   case G_SHUFFLE_VECTOR:
38488bcb0991SDimitry Andric     return lowerShuffleVector(MI);
38498bcb0991SDimitry Andric   case G_DYN_STACKALLOC:
38508bcb0991SDimitry Andric     return lowerDynStackAlloc(MI);
38515f757f3fSDimitry Andric   case G_STACKSAVE:
38525f757f3fSDimitry Andric     return lowerStackSave(MI);
38535f757f3fSDimitry Andric   case G_STACKRESTORE:
38545f757f3fSDimitry Andric     return lowerStackRestore(MI);
38558bcb0991SDimitry Andric   case G_EXTRACT:
38568bcb0991SDimitry Andric     return lowerExtract(MI);
38578bcb0991SDimitry Andric   case G_INSERT:
38588bcb0991SDimitry Andric     return lowerInsert(MI);
3859480093f4SDimitry Andric   case G_BSWAP:
3860480093f4SDimitry Andric     return lowerBswap(MI);
3861480093f4SDimitry Andric   case G_BITREVERSE:
3862480093f4SDimitry Andric     return lowerBitreverse(MI);
3863480093f4SDimitry Andric   case G_READ_REGISTER:
38645ffd83dbSDimitry Andric   case G_WRITE_REGISTER:
38655ffd83dbSDimitry Andric     return lowerReadWriteRegister(MI);
3866e8d8bef9SDimitry Andric   case G_UADDSAT:
3867e8d8bef9SDimitry Andric   case G_USUBSAT: {
3868e8d8bef9SDimitry Andric     // Try to make a reasonable guess about which lowering strategy to use. The
3869e8d8bef9SDimitry Andric     // target can override this with custom lowering and calling the
3870e8d8bef9SDimitry Andric     // implementation functions.
3871e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3872e8d8bef9SDimitry Andric     if (LI.isLegalOrCustom({G_UMIN, Ty}))
3873e8d8bef9SDimitry Andric       return lowerAddSubSatToMinMax(MI);
3874e8d8bef9SDimitry Andric     return lowerAddSubSatToAddoSubo(MI);
38750b57cec5SDimitry Andric   }
3876e8d8bef9SDimitry Andric   case G_SADDSAT:
3877e8d8bef9SDimitry Andric   case G_SSUBSAT: {
3878e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3879e8d8bef9SDimitry Andric 
3880e8d8bef9SDimitry Andric     // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3881e8d8bef9SDimitry Andric     // since it's a shorter expansion. However, we would need to figure out the
3882e8d8bef9SDimitry Andric     // preferred boolean type for the carry out for the query.
3883e8d8bef9SDimitry Andric     if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3884e8d8bef9SDimitry Andric       return lowerAddSubSatToMinMax(MI);
3885e8d8bef9SDimitry Andric     return lowerAddSubSatToAddoSubo(MI);
3886e8d8bef9SDimitry Andric   }
3887e8d8bef9SDimitry Andric   case G_SSHLSAT:
3888e8d8bef9SDimitry Andric   case G_USHLSAT:
3889e8d8bef9SDimitry Andric     return lowerShlSat(MI);
3890fe6060f1SDimitry Andric   case G_ABS:
3891fe6060f1SDimitry Andric     return lowerAbsToAddXor(MI);
3892e8d8bef9SDimitry Andric   case G_SELECT:
3893e8d8bef9SDimitry Andric     return lowerSelect(MI);
3894bdd1243dSDimitry Andric   case G_IS_FPCLASS:
3895bdd1243dSDimitry Andric     return lowerISFPCLASS(MI);
3896fe6060f1SDimitry Andric   case G_SDIVREM:
3897fe6060f1SDimitry Andric   case G_UDIVREM:
3898fe6060f1SDimitry Andric     return lowerDIVREM(MI);
3899fe6060f1SDimitry Andric   case G_FSHL:
3900fe6060f1SDimitry Andric   case G_FSHR:
3901fe6060f1SDimitry Andric     return lowerFunnelShift(MI);
3902fe6060f1SDimitry Andric   case G_ROTL:
3903fe6060f1SDimitry Andric   case G_ROTR:
3904fe6060f1SDimitry Andric     return lowerRotate(MI);
3905349cc55cSDimitry Andric   case G_MEMSET:
3906349cc55cSDimitry Andric   case G_MEMCPY:
3907349cc55cSDimitry Andric   case G_MEMMOVE:
3908349cc55cSDimitry Andric     return lowerMemCpyFamily(MI);
3909349cc55cSDimitry Andric   case G_MEMCPY_INLINE:
3910349cc55cSDimitry Andric     return lowerMemcpyInline(MI);
39115f757f3fSDimitry Andric   case G_ZEXT:
39125f757f3fSDimitry Andric   case G_SEXT:
39135f757f3fSDimitry Andric   case G_ANYEXT:
39145f757f3fSDimitry Andric     return lowerEXT(MI);
39155f757f3fSDimitry Andric   case G_TRUNC:
39165f757f3fSDimitry Andric     return lowerTRUNC(MI);
3917349cc55cSDimitry Andric   GISEL_VECREDUCE_CASES_NONSEQ
3918349cc55cSDimitry Andric     return lowerVectorReduction(MI);
39195f757f3fSDimitry Andric   case G_VAARG:
39205f757f3fSDimitry Andric     return lowerVAArg(MI);
3921e8d8bef9SDimitry Andric   }
3922e8d8bef9SDimitry Andric }
3923e8d8bef9SDimitry Andric 
3924e8d8bef9SDimitry Andric Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
3925e8d8bef9SDimitry Andric                                                   Align MinAlign) const {
3926e8d8bef9SDimitry Andric   // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3927e8d8bef9SDimitry Andric   // datalayout for the preferred alignment. Also there should be a target hook
3928e8d8bef9SDimitry Andric   // for this to allow targets to reduce the alignment and ignore the
3929e8d8bef9SDimitry Andric   // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3930e8d8bef9SDimitry Andric   // the type.
3931e8d8bef9SDimitry Andric   return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
3932e8d8bef9SDimitry Andric }
3933e8d8bef9SDimitry Andric 
3934e8d8bef9SDimitry Andric MachineInstrBuilder
3935e8d8bef9SDimitry Andric LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
3936e8d8bef9SDimitry Andric                                       MachinePointerInfo &PtrInfo) {
3937e8d8bef9SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
3938e8d8bef9SDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
3939e8d8bef9SDimitry Andric   int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3940e8d8bef9SDimitry Andric 
3941e8d8bef9SDimitry Andric   unsigned AddrSpace = DL.getAllocaAddrSpace();
3942e8d8bef9SDimitry Andric   LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3943e8d8bef9SDimitry Andric 
3944e8d8bef9SDimitry Andric   PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3945e8d8bef9SDimitry Andric   return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
3946e8d8bef9SDimitry Andric }
3947e8d8bef9SDimitry Andric 
3948e8d8bef9SDimitry Andric static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
3949e8d8bef9SDimitry Andric                                         LLT VecTy) {
3950e8d8bef9SDimitry Andric   int64_t IdxVal;
3951e8d8bef9SDimitry Andric   if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
3952e8d8bef9SDimitry Andric     return IdxReg;
3953e8d8bef9SDimitry Andric 
3954e8d8bef9SDimitry Andric   LLT IdxTy = B.getMRI()->getType(IdxReg);
3955e8d8bef9SDimitry Andric   unsigned NElts = VecTy.getNumElements();
3956e8d8bef9SDimitry Andric   if (isPowerOf2_32(NElts)) {
3957e8d8bef9SDimitry Andric     APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
3958e8d8bef9SDimitry Andric     return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
3959e8d8bef9SDimitry Andric   }
3960e8d8bef9SDimitry Andric 
3961e8d8bef9SDimitry Andric   return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
3962e8d8bef9SDimitry Andric       .getReg(0);
3963e8d8bef9SDimitry Andric }
3964e8d8bef9SDimitry Andric 
3965e8d8bef9SDimitry Andric Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
3966e8d8bef9SDimitry Andric                                                   Register Index) {
3967e8d8bef9SDimitry Andric   LLT EltTy = VecTy.getElementType();
3968e8d8bef9SDimitry Andric 
3969e8d8bef9SDimitry Andric   // Calculate the element offset and add it to the pointer.
3970e8d8bef9SDimitry Andric   unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
3971e8d8bef9SDimitry Andric   assert(EltSize * 8 == EltTy.getSizeInBits() &&
3972e8d8bef9SDimitry Andric          "Converting bits to bytes lost precision");
3973e8d8bef9SDimitry Andric 
3974e8d8bef9SDimitry Andric   Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
3975e8d8bef9SDimitry Andric 
3976e8d8bef9SDimitry Andric   LLT IdxTy = MRI.getType(Index);
3977e8d8bef9SDimitry Andric   auto Mul = MIRBuilder.buildMul(IdxTy, Index,
3978e8d8bef9SDimitry Andric                                  MIRBuilder.buildConstant(IdxTy, EltSize));
3979e8d8bef9SDimitry Andric 
3980e8d8bef9SDimitry Andric   LLT PtrTy = MRI.getType(VecPtr);
3981e8d8bef9SDimitry Andric   return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
39820b57cec5SDimitry Andric }
39830b57cec5SDimitry Andric 
39840eae32dcSDimitry Andric #ifndef NDEBUG
39850eae32dcSDimitry Andric /// Check that all vector operands have same number of elements. Other operands
39860eae32dcSDimitry Andric /// should be listed in NonVecOp.
39870eae32dcSDimitry Andric static bool hasSameNumEltsOnAllVectorOperands(
39880eae32dcSDimitry Andric     GenericMachineInstr &MI, MachineRegisterInfo &MRI,
39890eae32dcSDimitry Andric     std::initializer_list<unsigned> NonVecOpIndices) {
39900eae32dcSDimitry Andric   if (MI.getNumMemOperands() != 0)
39910eae32dcSDimitry Andric     return false;
39920b57cec5SDimitry Andric 
39930eae32dcSDimitry Andric   LLT VecTy = MRI.getType(MI.getReg(0));
39940eae32dcSDimitry Andric   if (!VecTy.isVector())
39950eae32dcSDimitry Andric     return false;
39960eae32dcSDimitry Andric   unsigned NumElts = VecTy.getNumElements();
39970b57cec5SDimitry Andric 
39980eae32dcSDimitry Andric   for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
39990eae32dcSDimitry Andric     MachineOperand &Op = MI.getOperand(OpIdx);
40000eae32dcSDimitry Andric     if (!Op.isReg()) {
40010eae32dcSDimitry Andric       if (!is_contained(NonVecOpIndices, OpIdx))
40020eae32dcSDimitry Andric         return false;
40030eae32dcSDimitry Andric       continue;
40040eae32dcSDimitry Andric     }
40050b57cec5SDimitry Andric 
40060eae32dcSDimitry Andric     LLT Ty = MRI.getType(Op.getReg());
40070eae32dcSDimitry Andric     if (!Ty.isVector()) {
40080eae32dcSDimitry Andric       if (!is_contained(NonVecOpIndices, OpIdx))
40090eae32dcSDimitry Andric         return false;
40100eae32dcSDimitry Andric       continue;
40110eae32dcSDimitry Andric     }
40120eae32dcSDimitry Andric 
40130eae32dcSDimitry Andric     if (Ty.getNumElements() != NumElts)
40140eae32dcSDimitry Andric       return false;
40150eae32dcSDimitry Andric   }
40160eae32dcSDimitry Andric 
40170eae32dcSDimitry Andric   return true;
40180eae32dcSDimitry Andric }
40190eae32dcSDimitry Andric #endif
40200eae32dcSDimitry Andric 
40210eae32dcSDimitry Andric /// Fill \p DstOps with DstOps that have same number of elements combined as
40220eae32dcSDimitry Andric /// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
40230eae32dcSDimitry Andric /// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
40240eae32dcSDimitry Andric /// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
40250eae32dcSDimitry Andric static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
40260eae32dcSDimitry Andric                        unsigned NumElts) {
40270eae32dcSDimitry Andric   LLT LeftoverTy;
40280eae32dcSDimitry Andric   assert(Ty.isVector() && "Expected vector type");
40290eae32dcSDimitry Andric   LLT EltTy = Ty.getElementType();
40300eae32dcSDimitry Andric   LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
40310eae32dcSDimitry Andric   int NumParts, NumLeftover;
40320eae32dcSDimitry Andric   std::tie(NumParts, NumLeftover) =
40330eae32dcSDimitry Andric       getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
40340eae32dcSDimitry Andric 
40350eae32dcSDimitry Andric   assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
40360eae32dcSDimitry Andric   for (int i = 0; i < NumParts; ++i) {
40370eae32dcSDimitry Andric     DstOps.push_back(NarrowTy);
40380eae32dcSDimitry Andric   }
40390eae32dcSDimitry Andric 
40400eae32dcSDimitry Andric   if (LeftoverTy.isValid()) {
40410eae32dcSDimitry Andric     assert(NumLeftover == 1 && "expected exactly one leftover");
40420eae32dcSDimitry Andric     DstOps.push_back(LeftoverTy);
40430eae32dcSDimitry Andric   }
40440eae32dcSDimitry Andric }
40450eae32dcSDimitry Andric 
40460eae32dcSDimitry Andric /// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
40470eae32dcSDimitry Andric /// made from \p Op depending on operand type.
40480eae32dcSDimitry Andric static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
40490eae32dcSDimitry Andric                            MachineOperand &Op) {
40500eae32dcSDimitry Andric   for (unsigned i = 0; i < N; ++i) {
40510eae32dcSDimitry Andric     if (Op.isReg())
40520eae32dcSDimitry Andric       Ops.push_back(Op.getReg());
40530eae32dcSDimitry Andric     else if (Op.isImm())
40540eae32dcSDimitry Andric       Ops.push_back(Op.getImm());
40550eae32dcSDimitry Andric     else if (Op.isPredicate())
40560eae32dcSDimitry Andric       Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
40570eae32dcSDimitry Andric     else
40580eae32dcSDimitry Andric       llvm_unreachable("Unsupported type");
40590eae32dcSDimitry Andric   }
40600b57cec5SDimitry Andric }
40610b57cec5SDimitry Andric 
40620b57cec5SDimitry Andric // Handle splitting vector operations which need to have the same number of
40630b57cec5SDimitry Andric // elements in each type index, but each type index may have a different element
40640b57cec5SDimitry Andric // type.
40650b57cec5SDimitry Andric //
40660b57cec5SDimitry Andric // e.g.  <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
40670b57cec5SDimitry Andric //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
40680b57cec5SDimitry Andric //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
40690b57cec5SDimitry Andric //
40700b57cec5SDimitry Andric // Also handles some irregular breakdown cases, e.g.
40710b57cec5SDimitry Andric // e.g.  <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
40720b57cec5SDimitry Andric //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
40730b57cec5SDimitry Andric //             s64 = G_SHL s64, s32
40740b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
40750b57cec5SDimitry Andric LegalizerHelper::fewerElementsVectorMultiEltType(
40760eae32dcSDimitry Andric     GenericMachineInstr &MI, unsigned NumElts,
40770eae32dcSDimitry Andric     std::initializer_list<unsigned> NonVecOpIndices) {
40780eae32dcSDimitry Andric   assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
40790eae32dcSDimitry Andric          "Non-compatible opcode or not specified non-vector operands");
40800eae32dcSDimitry Andric   unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
40810b57cec5SDimitry Andric 
40820eae32dcSDimitry Andric   unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
40830eae32dcSDimitry Andric   unsigned NumDefs = MI.getNumDefs();
40840b57cec5SDimitry Andric 
40850eae32dcSDimitry Andric   // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
40860eae32dcSDimitry Andric   // Build instructions with DstOps to use instruction found by CSE directly.
40870eae32dcSDimitry Andric   // CSE copies found instruction into given vreg when building with vreg dest.
40880eae32dcSDimitry Andric   SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
40890eae32dcSDimitry Andric   // Output registers will be taken from created instructions.
40900eae32dcSDimitry Andric   SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
40910eae32dcSDimitry Andric   for (unsigned i = 0; i < NumDefs; ++i) {
40920eae32dcSDimitry Andric     makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
40930b57cec5SDimitry Andric   }
40940b57cec5SDimitry Andric 
40950eae32dcSDimitry Andric   // Split vector input operands into sub-vectors with NumElts elts + Leftover.
40960eae32dcSDimitry Andric   // Operands listed in NonVecOpIndices will be used as is without splitting;
40970eae32dcSDimitry Andric   // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
40980eae32dcSDimitry Andric   // scalar condition (op 1), immediate in sext_inreg (op 2).
40990eae32dcSDimitry Andric   SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
41000eae32dcSDimitry Andric   for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
41010eae32dcSDimitry Andric        ++UseIdx, ++UseNo) {
41020eae32dcSDimitry Andric     if (is_contained(NonVecOpIndices, UseIdx)) {
41030eae32dcSDimitry Andric       broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
41040eae32dcSDimitry Andric                      MI.getOperand(UseIdx));
41050b57cec5SDimitry Andric     } else {
41060eae32dcSDimitry Andric       SmallVector<Register, 8> SplitPieces;
41077a6dacacSDimitry Andric       extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
41087a6dacacSDimitry Andric                          MRI);
41090eae32dcSDimitry Andric       for (auto Reg : SplitPieces)
41100eae32dcSDimitry Andric         InputOpsPieces[UseNo].push_back(Reg);
41110eae32dcSDimitry Andric     }
41120b57cec5SDimitry Andric   }
41130b57cec5SDimitry Andric 
41140eae32dcSDimitry Andric   unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
41150eae32dcSDimitry Andric 
41160eae32dcSDimitry Andric   // Take i-th piece of each input operand split and build sub-vector/scalar
41170eae32dcSDimitry Andric   // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
41180eae32dcSDimitry Andric   for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
41190eae32dcSDimitry Andric     SmallVector<DstOp, 2> Defs;
41200eae32dcSDimitry Andric     for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
41210eae32dcSDimitry Andric       Defs.push_back(OutputOpsPieces[DstNo][i]);
41220eae32dcSDimitry Andric 
41230eae32dcSDimitry Andric     SmallVector<SrcOp, 3> Uses;
41240eae32dcSDimitry Andric     for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
41250eae32dcSDimitry Andric       Uses.push_back(InputOpsPieces[InputNo][i]);
41260eae32dcSDimitry Andric 
41270eae32dcSDimitry Andric     auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
41280eae32dcSDimitry Andric     for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
41290eae32dcSDimitry Andric       OutputRegs[DstNo].push_back(I.getReg(DstNo));
41300b57cec5SDimitry Andric   }
41310b57cec5SDimitry Andric 
41320eae32dcSDimitry Andric   // Merge small outputs into MI's output for each def operand.
41330eae32dcSDimitry Andric   if (NumLeftovers) {
41340eae32dcSDimitry Andric     for (unsigned i = 0; i < NumDefs; ++i)
41350eae32dcSDimitry Andric       mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
41360eae32dcSDimitry Andric   } else {
41370eae32dcSDimitry Andric     for (unsigned i = 0; i < NumDefs; ++i)
4138bdd1243dSDimitry Andric       MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
41390eae32dcSDimitry Andric   }
41400b57cec5SDimitry Andric 
41410b57cec5SDimitry Andric   MI.eraseFromParent();
41420b57cec5SDimitry Andric   return Legalized;
41430b57cec5SDimitry Andric }
41440b57cec5SDimitry Andric 
41450b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
41460eae32dcSDimitry Andric LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
41470eae32dcSDimitry Andric                                         unsigned NumElts) {
41480eae32dcSDimitry Andric   unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
41490b57cec5SDimitry Andric 
41500eae32dcSDimitry Andric   unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
41510eae32dcSDimitry Andric   unsigned NumDefs = MI.getNumDefs();
41520b57cec5SDimitry Andric 
41530eae32dcSDimitry Andric   SmallVector<DstOp, 8> OutputOpsPieces;
41540eae32dcSDimitry Andric   SmallVector<Register, 8> OutputRegs;
41550eae32dcSDimitry Andric   makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
41560b57cec5SDimitry Andric 
41570eae32dcSDimitry Andric   // Instructions that perform register split will be inserted in basic block
41580eae32dcSDimitry Andric   // where register is defined (basic block is in the next operand).
41590eae32dcSDimitry Andric   SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
41600eae32dcSDimitry Andric   for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
41610eae32dcSDimitry Andric        UseIdx += 2, ++UseNo) {
41620eae32dcSDimitry Andric     MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
4163bdd1243dSDimitry Andric     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
41647a6dacacSDimitry Andric     extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
41657a6dacacSDimitry Andric                        MIRBuilder, MRI);
41660b57cec5SDimitry Andric   }
41670eae32dcSDimitry Andric 
41680eae32dcSDimitry Andric   // Build PHIs with fewer elements.
41690eae32dcSDimitry Andric   unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
41700eae32dcSDimitry Andric   MIRBuilder.setInsertPt(*MI.getParent(), MI);
41710eae32dcSDimitry Andric   for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
41720eae32dcSDimitry Andric     auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
41730eae32dcSDimitry Andric     Phi.addDef(
41740eae32dcSDimitry Andric         MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
41750eae32dcSDimitry Andric     OutputRegs.push_back(Phi.getReg(0));
41760eae32dcSDimitry Andric 
41770eae32dcSDimitry Andric     for (unsigned j = 0; j < NumInputs / 2; ++j) {
41780eae32dcSDimitry Andric       Phi.addUse(InputOpsPieces[j][i]);
41790eae32dcSDimitry Andric       Phi.add(MI.getOperand(1 + j * 2 + 1));
41800eae32dcSDimitry Andric     }
41810eae32dcSDimitry Andric   }
41820eae32dcSDimitry Andric 
41830eae32dcSDimitry Andric   // Merge small outputs into MI's def.
41840eae32dcSDimitry Andric   if (NumLeftovers) {
41850eae32dcSDimitry Andric     mergeMixedSubvectors(MI.getReg(0), OutputRegs);
41860eae32dcSDimitry Andric   } else {
4187bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
41880b57cec5SDimitry Andric   }
41890b57cec5SDimitry Andric 
41900b57cec5SDimitry Andric   MI.eraseFromParent();
41910b57cec5SDimitry Andric   return Legalized;
41920b57cec5SDimitry Andric }
41930b57cec5SDimitry Andric 
41940b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
41958bcb0991SDimitry Andric LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
41968bcb0991SDimitry Andric                                                   unsigned TypeIdx,
41978bcb0991SDimitry Andric                                                   LLT NarrowTy) {
41988bcb0991SDimitry Andric   const int NumDst = MI.getNumOperands() - 1;
41998bcb0991SDimitry Andric   const Register SrcReg = MI.getOperand(NumDst).getReg();
42000eae32dcSDimitry Andric   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
42018bcb0991SDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
42028bcb0991SDimitry Andric 
42030eae32dcSDimitry Andric   if (TypeIdx != 1 || NarrowTy == DstTy)
42048bcb0991SDimitry Andric     return UnableToLegalize;
42058bcb0991SDimitry Andric 
42060eae32dcSDimitry Andric   // Requires compatible types. Otherwise SrcReg should have been defined by
42070eae32dcSDimitry Andric   // merge-like instruction that would get artifact combined. Most likely
42080eae32dcSDimitry Andric   // instruction that defines SrcReg has to perform more/fewer elements
42090eae32dcSDimitry Andric   // legalization compatible with NarrowTy.
42100eae32dcSDimitry Andric   assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
42110eae32dcSDimitry Andric   assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
42128bcb0991SDimitry Andric 
42130eae32dcSDimitry Andric   if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
42140eae32dcSDimitry Andric       (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
42150eae32dcSDimitry Andric     return UnableToLegalize;
42160eae32dcSDimitry Andric 
42170eae32dcSDimitry Andric   // This is most likely DstTy (smaller then register size) packed in SrcTy
42180eae32dcSDimitry Andric   // (larger then register size) and since unmerge was not combined it will be
42190eae32dcSDimitry Andric   // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
42200eae32dcSDimitry Andric   // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
42210eae32dcSDimitry Andric 
42220eae32dcSDimitry Andric   // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
42230eae32dcSDimitry Andric   //
42240eae32dcSDimitry Andric   // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
42250eae32dcSDimitry Andric   // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
42260eae32dcSDimitry Andric   // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
42270eae32dcSDimitry Andric   auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
42288bcb0991SDimitry Andric   const int NumUnmerge = Unmerge->getNumOperands() - 1;
42298bcb0991SDimitry Andric   const int PartsPerUnmerge = NumDst / NumUnmerge;
42308bcb0991SDimitry Andric 
42318bcb0991SDimitry Andric   for (int I = 0; I != NumUnmerge; ++I) {
42328bcb0991SDimitry Andric     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
42338bcb0991SDimitry Andric 
42348bcb0991SDimitry Andric     for (int J = 0; J != PartsPerUnmerge; ++J)
42358bcb0991SDimitry Andric       MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
42368bcb0991SDimitry Andric     MIB.addUse(Unmerge.getReg(I));
42378bcb0991SDimitry Andric   }
42388bcb0991SDimitry Andric 
42398bcb0991SDimitry Andric   MI.eraseFromParent();
42408bcb0991SDimitry Andric   return Legalized;
42418bcb0991SDimitry Andric }
42428bcb0991SDimitry Andric 
4243fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
4244e8d8bef9SDimitry Andric LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
4245e8d8bef9SDimitry Andric                                           LLT NarrowTy) {
424606c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
42470eae32dcSDimitry Andric   // Requires compatible types. Otherwise user of DstReg did not perform unmerge
42480eae32dcSDimitry Andric   // that should have been artifact combined. Most likely instruction that uses
42490eae32dcSDimitry Andric   // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
42500eae32dcSDimitry Andric   assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
42510eae32dcSDimitry Andric   assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
42520eae32dcSDimitry Andric   if (NarrowTy == SrcTy)
42530eae32dcSDimitry Andric     return UnableToLegalize;
42548bcb0991SDimitry Andric 
42550eae32dcSDimitry Andric   // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
42560eae32dcSDimitry Andric   // is for old mir tests. Since the changes to more/fewer elements it should no
42570eae32dcSDimitry Andric   // longer be possible to generate MIR like this when starting from llvm-ir
42580eae32dcSDimitry Andric   // because LCMTy approach was replaced with merge/unmerge to vector elements.
42590eae32dcSDimitry Andric   if (TypeIdx == 1) {
42600eae32dcSDimitry Andric     assert(SrcTy.isVector() && "Expected vector types");
42610eae32dcSDimitry Andric     assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
42620eae32dcSDimitry Andric     if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
42630eae32dcSDimitry Andric         (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
42640eae32dcSDimitry Andric       return UnableToLegalize;
42650eae32dcSDimitry Andric     // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
42660eae32dcSDimitry Andric     //
42670eae32dcSDimitry Andric     // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
42680eae32dcSDimitry Andric     // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
42690eae32dcSDimitry Andric     // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
42700eae32dcSDimitry Andric     // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
42710eae32dcSDimitry Andric     // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
42720eae32dcSDimitry Andric     // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
4273e8d8bef9SDimitry Andric 
42740eae32dcSDimitry Andric     SmallVector<Register, 8> Elts;
42750eae32dcSDimitry Andric     LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
42760eae32dcSDimitry Andric     for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
42770eae32dcSDimitry Andric       auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
42780eae32dcSDimitry Andric       for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
42790eae32dcSDimitry Andric         Elts.push_back(Unmerge.getReg(j));
42800eae32dcSDimitry Andric     }
4281e8d8bef9SDimitry Andric 
42820eae32dcSDimitry Andric     SmallVector<Register, 8> NarrowTyElts;
42830eae32dcSDimitry Andric     unsigned NumNarrowTyElts = NarrowTy.getNumElements();
42840eae32dcSDimitry Andric     unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
42850eae32dcSDimitry Andric     for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
42860eae32dcSDimitry Andric          ++i, Offset += NumNarrowTyElts) {
42870eae32dcSDimitry Andric       ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
4288bdd1243dSDimitry Andric       NarrowTyElts.push_back(
4289bdd1243dSDimitry Andric           MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
42900eae32dcSDimitry Andric     }
4291e8d8bef9SDimitry Andric 
4292bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
42930eae32dcSDimitry Andric     MI.eraseFromParent();
42940eae32dcSDimitry Andric     return Legalized;
42950eae32dcSDimitry Andric   }
42960eae32dcSDimitry Andric 
42970eae32dcSDimitry Andric   assert(TypeIdx == 0 && "Bad type index");
42980eae32dcSDimitry Andric   if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
42990eae32dcSDimitry Andric       (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
43000eae32dcSDimitry Andric     return UnableToLegalize;
43010eae32dcSDimitry Andric 
43020eae32dcSDimitry Andric   // This is most likely SrcTy (smaller then register size) packed in DstTy
43030eae32dcSDimitry Andric   // (larger then register size) and since merge was not combined it will be
43040eae32dcSDimitry Andric   // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
43050eae32dcSDimitry Andric   // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
43060eae32dcSDimitry Andric 
43070eae32dcSDimitry Andric   // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
43080eae32dcSDimitry Andric   //
43090eae32dcSDimitry Andric   // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
43100eae32dcSDimitry Andric   // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
43110eae32dcSDimitry Andric   // %0:_(DstTy)  = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
43120eae32dcSDimitry Andric   SmallVector<Register, 8> NarrowTyElts;
43130eae32dcSDimitry Andric   unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
43140eae32dcSDimitry Andric   unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
43150eae32dcSDimitry Andric   unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
43160eae32dcSDimitry Andric   for (unsigned i = 0; i < NumParts; ++i) {
43170eae32dcSDimitry Andric     SmallVector<Register, 8> Sources;
43180eae32dcSDimitry Andric     for (unsigned j = 0; j < NumElts; ++j)
43190eae32dcSDimitry Andric       Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
4320bdd1243dSDimitry Andric     NarrowTyElts.push_back(
4321bdd1243dSDimitry Andric         MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
43220eae32dcSDimitry Andric   }
43230eae32dcSDimitry Andric 
4324bdd1243dSDimitry Andric   MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4325e8d8bef9SDimitry Andric   MI.eraseFromParent();
4326e8d8bef9SDimitry Andric   return Legalized;
43278bcb0991SDimitry Andric }
43288bcb0991SDimitry Andric 
4329e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
4330e8d8bef9SDimitry Andric LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
4331e8d8bef9SDimitry Andric                                                            unsigned TypeIdx,
4332e8d8bef9SDimitry Andric                                                            LLT NarrowVecTy) {
433306c3fb27SDimitry Andric   auto [DstReg, SrcVec] = MI.getFirst2Regs();
4334e8d8bef9SDimitry Andric   Register InsertVal;
4335e8d8bef9SDimitry Andric   bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4336e8d8bef9SDimitry Andric 
4337e8d8bef9SDimitry Andric   assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
4338e8d8bef9SDimitry Andric   if (IsInsert)
4339e8d8bef9SDimitry Andric     InsertVal = MI.getOperand(2).getReg();
4340e8d8bef9SDimitry Andric 
4341e8d8bef9SDimitry Andric   Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
4342e8d8bef9SDimitry Andric 
4343e8d8bef9SDimitry Andric   // TODO: Handle total scalarization case.
4344e8d8bef9SDimitry Andric   if (!NarrowVecTy.isVector())
4345e8d8bef9SDimitry Andric     return UnableToLegalize;
4346e8d8bef9SDimitry Andric 
4347e8d8bef9SDimitry Andric   LLT VecTy = MRI.getType(SrcVec);
4348e8d8bef9SDimitry Andric 
4349e8d8bef9SDimitry Andric   // If the index is a constant, we can really break this down as you would
4350e8d8bef9SDimitry Andric   // expect, and index into the target size pieces.
4351e8d8bef9SDimitry Andric   int64_t IdxVal;
4352349cc55cSDimitry Andric   auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
4353fe6060f1SDimitry Andric   if (MaybeCst) {
4354fe6060f1SDimitry Andric     IdxVal = MaybeCst->Value.getSExtValue();
4355e8d8bef9SDimitry Andric     // Avoid out of bounds indexing the pieces.
4356e8d8bef9SDimitry Andric     if (IdxVal >= VecTy.getNumElements()) {
4357e8d8bef9SDimitry Andric       MIRBuilder.buildUndef(DstReg);
4358e8d8bef9SDimitry Andric       MI.eraseFromParent();
4359e8d8bef9SDimitry Andric       return Legalized;
43608bcb0991SDimitry Andric     }
43618bcb0991SDimitry Andric 
4362e8d8bef9SDimitry Andric     SmallVector<Register, 8> VecParts;
4363e8d8bef9SDimitry Andric     LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4364e8d8bef9SDimitry Andric 
4365e8d8bef9SDimitry Andric     // Build a sequence of NarrowTy pieces in VecParts for this operand.
4366e8d8bef9SDimitry Andric     LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4367e8d8bef9SDimitry Andric                                     TargetOpcode::G_ANYEXT);
4368e8d8bef9SDimitry Andric 
4369e8d8bef9SDimitry Andric     unsigned NewNumElts = NarrowVecTy.getNumElements();
4370e8d8bef9SDimitry Andric 
4371e8d8bef9SDimitry Andric     LLT IdxTy = MRI.getType(Idx);
4372e8d8bef9SDimitry Andric     int64_t PartIdx = IdxVal / NewNumElts;
4373e8d8bef9SDimitry Andric     auto NewIdx =
4374e8d8bef9SDimitry Andric         MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
4375e8d8bef9SDimitry Andric 
4376e8d8bef9SDimitry Andric     if (IsInsert) {
4377e8d8bef9SDimitry Andric       LLT PartTy = MRI.getType(VecParts[PartIdx]);
4378e8d8bef9SDimitry Andric 
4379e8d8bef9SDimitry Andric       // Use the adjusted index to insert into one of the subvectors.
4380e8d8bef9SDimitry Andric       auto InsertPart = MIRBuilder.buildInsertVectorElement(
4381e8d8bef9SDimitry Andric           PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4382e8d8bef9SDimitry Andric       VecParts[PartIdx] = InsertPart.getReg(0);
4383e8d8bef9SDimitry Andric 
4384e8d8bef9SDimitry Andric       // Recombine the inserted subvector with the others to reform the result
4385e8d8bef9SDimitry Andric       // vector.
4386e8d8bef9SDimitry Andric       buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4387e8d8bef9SDimitry Andric     } else {
4388e8d8bef9SDimitry Andric       MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
43898bcb0991SDimitry Andric     }
43908bcb0991SDimitry Andric 
43918bcb0991SDimitry Andric     MI.eraseFromParent();
43928bcb0991SDimitry Andric     return Legalized;
43938bcb0991SDimitry Andric   }
43948bcb0991SDimitry Andric 
4395e8d8bef9SDimitry Andric   // With a variable index, we can't perform the operation in a smaller type, so
4396e8d8bef9SDimitry Andric   // we're forced to expand this.
4397e8d8bef9SDimitry Andric   //
4398e8d8bef9SDimitry Andric   // TODO: We could emit a chain of compare/select to figure out which piece to
4399e8d8bef9SDimitry Andric   // index.
4400e8d8bef9SDimitry Andric   return lowerExtractInsertVectorElt(MI);
4401e8d8bef9SDimitry Andric }
4402e8d8bef9SDimitry Andric 
44038bcb0991SDimitry Andric LegalizerHelper::LegalizeResult
4404fe6060f1SDimitry Andric LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
44050b57cec5SDimitry Andric                                       LLT NarrowTy) {
44060b57cec5SDimitry Andric   // FIXME: Don't know how to handle secondary types yet.
44070b57cec5SDimitry Andric   if (TypeIdx != 0)
44080b57cec5SDimitry Andric     return UnableToLegalize;
44090b57cec5SDimitry Andric 
44100b57cec5SDimitry Andric   // This implementation doesn't work for atomics. Give up instead of doing
44110b57cec5SDimitry Andric   // something invalid.
4412fe6060f1SDimitry Andric   if (LdStMI.isAtomic())
44130b57cec5SDimitry Andric     return UnableToLegalize;
44140b57cec5SDimitry Andric 
4415fe6060f1SDimitry Andric   bool IsLoad = isa<GLoad>(LdStMI);
4416fe6060f1SDimitry Andric   Register ValReg = LdStMI.getReg(0);
4417fe6060f1SDimitry Andric   Register AddrReg = LdStMI.getPointerReg();
44180b57cec5SDimitry Andric   LLT ValTy = MRI.getType(ValReg);
44190b57cec5SDimitry Andric 
44205ffd83dbSDimitry Andric   // FIXME: Do we need a distinct NarrowMemory legalize action?
4421fe6060f1SDimitry Andric   if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) {
44225ffd83dbSDimitry Andric     LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
44235ffd83dbSDimitry Andric     return UnableToLegalize;
44245ffd83dbSDimitry Andric   }
44255ffd83dbSDimitry Andric 
44260b57cec5SDimitry Andric   int NumParts = -1;
44270b57cec5SDimitry Andric   int NumLeftover = -1;
44280b57cec5SDimitry Andric   LLT LeftoverTy;
44290b57cec5SDimitry Andric   SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
44300b57cec5SDimitry Andric   if (IsLoad) {
44310b57cec5SDimitry Andric     std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
44320b57cec5SDimitry Andric   } else {
44330b57cec5SDimitry Andric     if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
44347a6dacacSDimitry Andric                      NarrowLeftoverRegs, MIRBuilder, MRI)) {
44350b57cec5SDimitry Andric       NumParts = NarrowRegs.size();
44360b57cec5SDimitry Andric       NumLeftover = NarrowLeftoverRegs.size();
44370b57cec5SDimitry Andric     }
44380b57cec5SDimitry Andric   }
44390b57cec5SDimitry Andric 
44400b57cec5SDimitry Andric   if (NumParts == -1)
44410b57cec5SDimitry Andric     return UnableToLegalize;
44420b57cec5SDimitry Andric 
4443e8d8bef9SDimitry Andric   LLT PtrTy = MRI.getType(AddrReg);
4444e8d8bef9SDimitry Andric   const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
44450b57cec5SDimitry Andric 
44460b57cec5SDimitry Andric   unsigned TotalSize = ValTy.getSizeInBits();
44470b57cec5SDimitry Andric 
44480b57cec5SDimitry Andric   // Split the load/store into PartTy sized pieces starting at Offset. If this
44490b57cec5SDimitry Andric   // is a load, return the new registers in ValRegs. For a store, each elements
44500b57cec5SDimitry Andric   // of ValRegs should be PartTy. Returns the next offset that needs to be
44510b57cec5SDimitry Andric   // handled.
445281ad6265SDimitry Andric   bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
4453fe6060f1SDimitry Andric   auto MMO = LdStMI.getMMO();
44540b57cec5SDimitry Andric   auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
445581ad6265SDimitry Andric                              unsigned NumParts, unsigned Offset) -> unsigned {
44560b57cec5SDimitry Andric     MachineFunction &MF = MIRBuilder.getMF();
44570b57cec5SDimitry Andric     unsigned PartSize = PartTy.getSizeInBits();
44580b57cec5SDimitry Andric     for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
445981ad6265SDimitry Andric          ++Idx) {
44600b57cec5SDimitry Andric       unsigned ByteOffset = Offset / 8;
44610b57cec5SDimitry Andric       Register NewAddrReg;
44620b57cec5SDimitry Andric 
4463480093f4SDimitry Andric       MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
44640b57cec5SDimitry Andric 
44650b57cec5SDimitry Andric       MachineMemOperand *NewMMO =
4466fe6060f1SDimitry Andric           MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
44670b57cec5SDimitry Andric 
44680b57cec5SDimitry Andric       if (IsLoad) {
44690b57cec5SDimitry Andric         Register Dst = MRI.createGenericVirtualRegister(PartTy);
44700b57cec5SDimitry Andric         ValRegs.push_back(Dst);
44710b57cec5SDimitry Andric         MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
44720b57cec5SDimitry Andric       } else {
44730b57cec5SDimitry Andric         MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
44740b57cec5SDimitry Andric       }
447581ad6265SDimitry Andric       Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
44760b57cec5SDimitry Andric     }
44770b57cec5SDimitry Andric 
44780b57cec5SDimitry Andric     return Offset;
44790b57cec5SDimitry Andric   };
44800b57cec5SDimitry Andric 
448181ad6265SDimitry Andric   unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
448281ad6265SDimitry Andric   unsigned HandledOffset =
448381ad6265SDimitry Andric       splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
44840b57cec5SDimitry Andric 
44850b57cec5SDimitry Andric   // Handle the rest of the register if this isn't an even type breakdown.
44860b57cec5SDimitry Andric   if (LeftoverTy.isValid())
448781ad6265SDimitry Andric     splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
44880b57cec5SDimitry Andric 
44890b57cec5SDimitry Andric   if (IsLoad) {
44900b57cec5SDimitry Andric     insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
44910b57cec5SDimitry Andric                 LeftoverTy, NarrowLeftoverRegs);
44920b57cec5SDimitry Andric   }
44930b57cec5SDimitry Andric 
4494fe6060f1SDimitry Andric   LdStMI.eraseFromParent();
44950b57cec5SDimitry Andric   return Legalized;
44960b57cec5SDimitry Andric }
44970b57cec5SDimitry Andric 
44980b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
44990b57cec5SDimitry Andric LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
45000b57cec5SDimitry Andric                                      LLT NarrowTy) {
45010b57cec5SDimitry Andric   using namespace TargetOpcode;
45020eae32dcSDimitry Andric   GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
45030eae32dcSDimitry Andric   unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
45040b57cec5SDimitry Andric 
45050b57cec5SDimitry Andric   switch (MI.getOpcode()) {
45060b57cec5SDimitry Andric   case G_IMPLICIT_DEF:
45075ffd83dbSDimitry Andric   case G_TRUNC:
45080b57cec5SDimitry Andric   case G_AND:
45090b57cec5SDimitry Andric   case G_OR:
45100b57cec5SDimitry Andric   case G_XOR:
45110b57cec5SDimitry Andric   case G_ADD:
45120b57cec5SDimitry Andric   case G_SUB:
45130b57cec5SDimitry Andric   case G_MUL:
4514e8d8bef9SDimitry Andric   case G_PTR_ADD:
45150b57cec5SDimitry Andric   case G_SMULH:
45160b57cec5SDimitry Andric   case G_UMULH:
45170b57cec5SDimitry Andric   case G_FADD:
45180b57cec5SDimitry Andric   case G_FMUL:
45190b57cec5SDimitry Andric   case G_FSUB:
45200b57cec5SDimitry Andric   case G_FNEG:
45210b57cec5SDimitry Andric   case G_FABS:
45220b57cec5SDimitry Andric   case G_FCANONICALIZE:
45230b57cec5SDimitry Andric   case G_FDIV:
45240b57cec5SDimitry Andric   case G_FREM:
45250b57cec5SDimitry Andric   case G_FMA:
45268bcb0991SDimitry Andric   case G_FMAD:
45270b57cec5SDimitry Andric   case G_FPOW:
45280b57cec5SDimitry Andric   case G_FEXP:
45290b57cec5SDimitry Andric   case G_FEXP2:
45305f757f3fSDimitry Andric   case G_FEXP10:
45310b57cec5SDimitry Andric   case G_FLOG:
45320b57cec5SDimitry Andric   case G_FLOG2:
45330b57cec5SDimitry Andric   case G_FLOG10:
453406c3fb27SDimitry Andric   case G_FLDEXP:
45350b57cec5SDimitry Andric   case G_FNEARBYINT:
45360b57cec5SDimitry Andric   case G_FCEIL:
45370b57cec5SDimitry Andric   case G_FFLOOR:
45380b57cec5SDimitry Andric   case G_FRINT:
45390b57cec5SDimitry Andric   case G_INTRINSIC_ROUND:
4540e8d8bef9SDimitry Andric   case G_INTRINSIC_ROUNDEVEN:
45410b57cec5SDimitry Andric   case G_INTRINSIC_TRUNC:
45420b57cec5SDimitry Andric   case G_FCOS:
45430b57cec5SDimitry Andric   case G_FSIN:
45440b57cec5SDimitry Andric   case G_FSQRT:
45450b57cec5SDimitry Andric   case G_BSWAP:
45468bcb0991SDimitry Andric   case G_BITREVERSE:
45470b57cec5SDimitry Andric   case G_SDIV:
4548480093f4SDimitry Andric   case G_UDIV:
4549480093f4SDimitry Andric   case G_SREM:
4550480093f4SDimitry Andric   case G_UREM:
4551fe6060f1SDimitry Andric   case G_SDIVREM:
4552fe6060f1SDimitry Andric   case G_UDIVREM:
45530b57cec5SDimitry Andric   case G_SMIN:
45540b57cec5SDimitry Andric   case G_SMAX:
45550b57cec5SDimitry Andric   case G_UMIN:
45560b57cec5SDimitry Andric   case G_UMAX:
4557fe6060f1SDimitry Andric   case G_ABS:
45580b57cec5SDimitry Andric   case G_FMINNUM:
45590b57cec5SDimitry Andric   case G_FMAXNUM:
45600b57cec5SDimitry Andric   case G_FMINNUM_IEEE:
45610b57cec5SDimitry Andric   case G_FMAXNUM_IEEE:
45620b57cec5SDimitry Andric   case G_FMINIMUM:
45630b57cec5SDimitry Andric   case G_FMAXIMUM:
45645ffd83dbSDimitry Andric   case G_FSHL:
45655ffd83dbSDimitry Andric   case G_FSHR:
4566349cc55cSDimitry Andric   case G_ROTL:
4567349cc55cSDimitry Andric   case G_ROTR:
45685ffd83dbSDimitry Andric   case G_FREEZE:
45695ffd83dbSDimitry Andric   case G_SADDSAT:
45705ffd83dbSDimitry Andric   case G_SSUBSAT:
45715ffd83dbSDimitry Andric   case G_UADDSAT:
45725ffd83dbSDimitry Andric   case G_USUBSAT:
4573fe6060f1SDimitry Andric   case G_UMULO:
4574fe6060f1SDimitry Andric   case G_SMULO:
45750b57cec5SDimitry Andric   case G_SHL:
45760b57cec5SDimitry Andric   case G_LSHR:
45770b57cec5SDimitry Andric   case G_ASHR:
4578e8d8bef9SDimitry Andric   case G_SSHLSAT:
4579e8d8bef9SDimitry Andric   case G_USHLSAT:
45800b57cec5SDimitry Andric   case G_CTLZ:
45810b57cec5SDimitry Andric   case G_CTLZ_ZERO_UNDEF:
45820b57cec5SDimitry Andric   case G_CTTZ:
45830b57cec5SDimitry Andric   case G_CTTZ_ZERO_UNDEF:
45840b57cec5SDimitry Andric   case G_CTPOP:
45850b57cec5SDimitry Andric   case G_FCOPYSIGN:
45860b57cec5SDimitry Andric   case G_ZEXT:
45870b57cec5SDimitry Andric   case G_SEXT:
45880b57cec5SDimitry Andric   case G_ANYEXT:
45890b57cec5SDimitry Andric   case G_FPEXT:
45900b57cec5SDimitry Andric   case G_FPTRUNC:
45910b57cec5SDimitry Andric   case G_SITOFP:
45920b57cec5SDimitry Andric   case G_UITOFP:
45930b57cec5SDimitry Andric   case G_FPTOSI:
45940b57cec5SDimitry Andric   case G_FPTOUI:
45950b57cec5SDimitry Andric   case G_INTTOPTR:
45960b57cec5SDimitry Andric   case G_PTRTOINT:
45970b57cec5SDimitry Andric   case G_ADDRSPACE_CAST:
459881ad6265SDimitry Andric   case G_UADDO:
459981ad6265SDimitry Andric   case G_USUBO:
460081ad6265SDimitry Andric   case G_UADDE:
460181ad6265SDimitry Andric   case G_USUBE:
460281ad6265SDimitry Andric   case G_SADDO:
460381ad6265SDimitry Andric   case G_SSUBO:
460481ad6265SDimitry Andric   case G_SADDE:
460581ad6265SDimitry Andric   case G_SSUBE:
4606bdd1243dSDimitry Andric   case G_STRICT_FADD:
4607bdd1243dSDimitry Andric   case G_STRICT_FSUB:
4608bdd1243dSDimitry Andric   case G_STRICT_FMUL:
4609bdd1243dSDimitry Andric   case G_STRICT_FMA:
461006c3fb27SDimitry Andric   case G_STRICT_FLDEXP:
461106c3fb27SDimitry Andric   case G_FFREXP:
46120eae32dcSDimitry Andric     return fewerElementsVectorMultiEltType(GMI, NumElts);
46130b57cec5SDimitry Andric   case G_ICMP:
46140b57cec5SDimitry Andric   case G_FCMP:
46150eae32dcSDimitry Andric     return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
4616bdd1243dSDimitry Andric   case G_IS_FPCLASS:
4617bdd1243dSDimitry Andric     return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
46180b57cec5SDimitry Andric   case G_SELECT:
46190eae32dcSDimitry Andric     if (MRI.getType(MI.getOperand(1).getReg()).isVector())
46200eae32dcSDimitry Andric       return fewerElementsVectorMultiEltType(GMI, NumElts);
46210eae32dcSDimitry Andric     return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
46220b57cec5SDimitry Andric   case G_PHI:
46230eae32dcSDimitry Andric     return fewerElementsVectorPhi(GMI, NumElts);
46248bcb0991SDimitry Andric   case G_UNMERGE_VALUES:
46258bcb0991SDimitry Andric     return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
46268bcb0991SDimitry Andric   case G_BUILD_VECTOR:
4627e8d8bef9SDimitry Andric     assert(TypeIdx == 0 && "not a vector type index");
4628e8d8bef9SDimitry Andric     return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4629e8d8bef9SDimitry Andric   case G_CONCAT_VECTORS:
4630e8d8bef9SDimitry Andric     if (TypeIdx != 1) // TODO: This probably does work as expected already.
4631e8d8bef9SDimitry Andric       return UnableToLegalize;
4632e8d8bef9SDimitry Andric     return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4633e8d8bef9SDimitry Andric   case G_EXTRACT_VECTOR_ELT:
4634e8d8bef9SDimitry Andric   case G_INSERT_VECTOR_ELT:
4635e8d8bef9SDimitry Andric     return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
46360b57cec5SDimitry Andric   case G_LOAD:
46370b57cec5SDimitry Andric   case G_STORE:
4638fe6060f1SDimitry Andric     return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
46395ffd83dbSDimitry Andric   case G_SEXT_INREG:
46400eae32dcSDimitry Andric     return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
4641fe6060f1SDimitry Andric   GISEL_VECREDUCE_CASES_NONSEQ
4642fe6060f1SDimitry Andric     return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
46431db9f3b2SDimitry Andric   case TargetOpcode::G_VECREDUCE_SEQ_FADD:
46441db9f3b2SDimitry Andric   case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
46451db9f3b2SDimitry Andric     return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
4646fe6060f1SDimitry Andric   case G_SHUFFLE_VECTOR:
4647fe6060f1SDimitry Andric     return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
46481db9f3b2SDimitry Andric   case G_FPOWI:
46491db9f3b2SDimitry Andric     return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
46500b57cec5SDimitry Andric   default:
46510b57cec5SDimitry Andric     return UnableToLegalize;
46520b57cec5SDimitry Andric   }
46530b57cec5SDimitry Andric }
46540b57cec5SDimitry Andric 
4655fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
4656fe6060f1SDimitry Andric     MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4657fe6060f1SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4658fe6060f1SDimitry Andric   if (TypeIdx != 0)
4659fe6060f1SDimitry Andric     return UnableToLegalize;
4660fe6060f1SDimitry Andric 
466106c3fb27SDimitry Andric   auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
466206c3fb27SDimitry Andric       MI.getFirst3RegLLTs();
4663fe6060f1SDimitry Andric   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4664fe6060f1SDimitry Andric   // The shuffle should be canonicalized by now.
4665fe6060f1SDimitry Andric   if (DstTy != Src1Ty)
4666fe6060f1SDimitry Andric     return UnableToLegalize;
4667fe6060f1SDimitry Andric   if (DstTy != Src2Ty)
4668fe6060f1SDimitry Andric     return UnableToLegalize;
4669fe6060f1SDimitry Andric 
4670fe6060f1SDimitry Andric   if (!isPowerOf2_32(DstTy.getNumElements()))
4671fe6060f1SDimitry Andric     return UnableToLegalize;
4672fe6060f1SDimitry Andric 
4673fe6060f1SDimitry Andric   // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4674fe6060f1SDimitry Andric   // Further legalization attempts will be needed to do split further.
4675fe6060f1SDimitry Andric   NarrowTy =
4676fe6060f1SDimitry Andric       DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
4677fe6060f1SDimitry Andric   unsigned NewElts = NarrowTy.getNumElements();
4678fe6060f1SDimitry Andric 
4679fe6060f1SDimitry Andric   SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
46807a6dacacSDimitry Andric   extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
46817a6dacacSDimitry Andric   extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
4682fe6060f1SDimitry Andric   Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4683fe6060f1SDimitry Andric                         SplitSrc2Regs[1]};
4684fe6060f1SDimitry Andric 
4685fe6060f1SDimitry Andric   Register Hi, Lo;
4686fe6060f1SDimitry Andric 
4687fe6060f1SDimitry Andric   // If Lo or Hi uses elements from at most two of the four input vectors, then
4688fe6060f1SDimitry Andric   // express it as a vector shuffle of those two inputs.  Otherwise extract the
4689fe6060f1SDimitry Andric   // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4690fe6060f1SDimitry Andric   SmallVector<int, 16> Ops;
4691fe6060f1SDimitry Andric   for (unsigned High = 0; High < 2; ++High) {
4692fe6060f1SDimitry Andric     Register &Output = High ? Hi : Lo;
4693fe6060f1SDimitry Andric 
4694fe6060f1SDimitry Andric     // Build a shuffle mask for the output, discovering on the fly which
4695fe6060f1SDimitry Andric     // input vectors to use as shuffle operands (recorded in InputUsed).
4696fe6060f1SDimitry Andric     // If building a suitable shuffle vector proves too hard, then bail
4697fe6060f1SDimitry Andric     // out with useBuildVector set.
4698fe6060f1SDimitry Andric     unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4699fe6060f1SDimitry Andric     unsigned FirstMaskIdx = High * NewElts;
4700fe6060f1SDimitry Andric     bool UseBuildVector = false;
4701fe6060f1SDimitry Andric     for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4702fe6060f1SDimitry Andric       // The mask element.  This indexes into the input.
4703fe6060f1SDimitry Andric       int Idx = Mask[FirstMaskIdx + MaskOffset];
4704fe6060f1SDimitry Andric 
4705fe6060f1SDimitry Andric       // The input vector this mask element indexes into.
4706fe6060f1SDimitry Andric       unsigned Input = (unsigned)Idx / NewElts;
4707fe6060f1SDimitry Andric 
4708bdd1243dSDimitry Andric       if (Input >= std::size(Inputs)) {
4709fe6060f1SDimitry Andric         // The mask element does not index into any input vector.
4710fe6060f1SDimitry Andric         Ops.push_back(-1);
4711fe6060f1SDimitry Andric         continue;
4712fe6060f1SDimitry Andric       }
4713fe6060f1SDimitry Andric 
4714fe6060f1SDimitry Andric       // Turn the index into an offset from the start of the input vector.
4715fe6060f1SDimitry Andric       Idx -= Input * NewElts;
4716fe6060f1SDimitry Andric 
4717fe6060f1SDimitry Andric       // Find or create a shuffle vector operand to hold this input.
4718fe6060f1SDimitry Andric       unsigned OpNo;
4719bdd1243dSDimitry Andric       for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
4720fe6060f1SDimitry Andric         if (InputUsed[OpNo] == Input) {
4721fe6060f1SDimitry Andric           // This input vector is already an operand.
4722fe6060f1SDimitry Andric           break;
4723fe6060f1SDimitry Andric         } else if (InputUsed[OpNo] == -1U) {
4724fe6060f1SDimitry Andric           // Create a new operand for this input vector.
4725fe6060f1SDimitry Andric           InputUsed[OpNo] = Input;
4726fe6060f1SDimitry Andric           break;
4727fe6060f1SDimitry Andric         }
4728fe6060f1SDimitry Andric       }
4729fe6060f1SDimitry Andric 
4730bdd1243dSDimitry Andric       if (OpNo >= std::size(InputUsed)) {
4731fe6060f1SDimitry Andric         // More than two input vectors used!  Give up on trying to create a
4732fe6060f1SDimitry Andric         // shuffle vector.  Insert all elements into a BUILD_VECTOR instead.
4733fe6060f1SDimitry Andric         UseBuildVector = true;
4734fe6060f1SDimitry Andric         break;
4735fe6060f1SDimitry Andric       }
4736fe6060f1SDimitry Andric 
4737fe6060f1SDimitry Andric       // Add the mask index for the new shuffle vector.
4738fe6060f1SDimitry Andric       Ops.push_back(Idx + OpNo * NewElts);
4739fe6060f1SDimitry Andric     }
4740fe6060f1SDimitry Andric 
4741fe6060f1SDimitry Andric     if (UseBuildVector) {
4742fe6060f1SDimitry Andric       LLT EltTy = NarrowTy.getElementType();
4743fe6060f1SDimitry Andric       SmallVector<Register, 16> SVOps;
4744fe6060f1SDimitry Andric 
4745fe6060f1SDimitry Andric       // Extract the input elements by hand.
4746fe6060f1SDimitry Andric       for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4747fe6060f1SDimitry Andric         // The mask element.  This indexes into the input.
4748fe6060f1SDimitry Andric         int Idx = Mask[FirstMaskIdx + MaskOffset];
4749fe6060f1SDimitry Andric 
4750fe6060f1SDimitry Andric         // The input vector this mask element indexes into.
4751fe6060f1SDimitry Andric         unsigned Input = (unsigned)Idx / NewElts;
4752fe6060f1SDimitry Andric 
4753bdd1243dSDimitry Andric         if (Input >= std::size(Inputs)) {
4754fe6060f1SDimitry Andric           // The mask element is "undef" or indexes off the end of the input.
4755fe6060f1SDimitry Andric           SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
4756fe6060f1SDimitry Andric           continue;
4757fe6060f1SDimitry Andric         }
4758fe6060f1SDimitry Andric 
4759fe6060f1SDimitry Andric         // Turn the index into an offset from the start of the input vector.
4760fe6060f1SDimitry Andric         Idx -= Input * NewElts;
4761fe6060f1SDimitry Andric 
4762fe6060f1SDimitry Andric         // Extract the vector element by hand.
4763fe6060f1SDimitry Andric         SVOps.push_back(MIRBuilder
4764fe6060f1SDimitry Andric                             .buildExtractVectorElement(
4765fe6060f1SDimitry Andric                                 EltTy, Inputs[Input],
4766fe6060f1SDimitry Andric                                 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
4767fe6060f1SDimitry Andric                             .getReg(0));
4768fe6060f1SDimitry Andric       }
4769fe6060f1SDimitry Andric 
4770fe6060f1SDimitry Andric       // Construct the Lo/Hi output using a G_BUILD_VECTOR.
4771fe6060f1SDimitry Andric       Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
4772fe6060f1SDimitry Andric     } else if (InputUsed[0] == -1U) {
4773fe6060f1SDimitry Andric       // No input vectors were used! The result is undefined.
4774fe6060f1SDimitry Andric       Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
4775fe6060f1SDimitry Andric     } else {
4776fe6060f1SDimitry Andric       Register Op0 = Inputs[InputUsed[0]];
4777fe6060f1SDimitry Andric       // If only one input was used, use an undefined vector for the other.
4778fe6060f1SDimitry Andric       Register Op1 = InputUsed[1] == -1U
4779fe6060f1SDimitry Andric                          ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
4780fe6060f1SDimitry Andric                          : Inputs[InputUsed[1]];
4781fe6060f1SDimitry Andric       // At least one input vector was used. Create a new shuffle vector.
4782fe6060f1SDimitry Andric       Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
4783fe6060f1SDimitry Andric     }
4784fe6060f1SDimitry Andric 
4785fe6060f1SDimitry Andric     Ops.clear();
4786fe6060f1SDimitry Andric   }
4787fe6060f1SDimitry Andric 
4788fe6060f1SDimitry Andric   MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
4789fe6060f1SDimitry Andric   MI.eraseFromParent();
4790fe6060f1SDimitry Andric   return Legalized;
4791fe6060f1SDimitry Andric }
4792fe6060f1SDimitry Andric 
4793349cc55cSDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
4794349cc55cSDimitry Andric     MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
47955f757f3fSDimitry Andric   auto &RdxMI = cast<GVecReduce>(MI);
4796349cc55cSDimitry Andric 
4797349cc55cSDimitry Andric   if (TypeIdx != 1)
4798349cc55cSDimitry Andric     return UnableToLegalize;
4799349cc55cSDimitry Andric 
4800349cc55cSDimitry Andric   // The semantics of the normal non-sequential reductions allow us to freely
4801349cc55cSDimitry Andric   // re-associate the operation.
48025f757f3fSDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
4803349cc55cSDimitry Andric 
4804349cc55cSDimitry Andric   if (NarrowTy.isVector() &&
4805349cc55cSDimitry Andric       (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
4806349cc55cSDimitry Andric     return UnableToLegalize;
4807349cc55cSDimitry Andric 
48085f757f3fSDimitry Andric   unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
4809349cc55cSDimitry Andric   SmallVector<Register> SplitSrcs;
4810349cc55cSDimitry Andric   // If NarrowTy is a scalar then we're being asked to scalarize.
4811349cc55cSDimitry Andric   const unsigned NumParts =
4812349cc55cSDimitry Andric       NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
4813349cc55cSDimitry Andric                           : SrcTy.getNumElements();
4814349cc55cSDimitry Andric 
48157a6dacacSDimitry Andric   extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
4816349cc55cSDimitry Andric   if (NarrowTy.isScalar()) {
4817349cc55cSDimitry Andric     if (DstTy != NarrowTy)
4818349cc55cSDimitry Andric       return UnableToLegalize; // FIXME: handle implicit extensions.
4819349cc55cSDimitry Andric 
4820349cc55cSDimitry Andric     if (isPowerOf2_32(NumParts)) {
4821349cc55cSDimitry Andric       // Generate a tree of scalar operations to reduce the critical path.
4822349cc55cSDimitry Andric       SmallVector<Register> PartialResults;
4823349cc55cSDimitry Andric       unsigned NumPartsLeft = NumParts;
4824349cc55cSDimitry Andric       while (NumPartsLeft > 1) {
4825349cc55cSDimitry Andric         for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
4826349cc55cSDimitry Andric           PartialResults.emplace_back(
4827349cc55cSDimitry Andric               MIRBuilder
4828349cc55cSDimitry Andric                   .buildInstr(ScalarOpc, {NarrowTy},
4829349cc55cSDimitry Andric                               {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
4830349cc55cSDimitry Andric                   .getReg(0));
4831349cc55cSDimitry Andric         }
4832349cc55cSDimitry Andric         SplitSrcs = PartialResults;
4833349cc55cSDimitry Andric         PartialResults.clear();
4834349cc55cSDimitry Andric         NumPartsLeft = SplitSrcs.size();
4835349cc55cSDimitry Andric       }
4836349cc55cSDimitry Andric       assert(SplitSrcs.size() == 1);
4837349cc55cSDimitry Andric       MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
4838349cc55cSDimitry Andric       MI.eraseFromParent();
4839349cc55cSDimitry Andric       return Legalized;
4840349cc55cSDimitry Andric     }
4841349cc55cSDimitry Andric     // If we can't generate a tree, then just do sequential operations.
4842349cc55cSDimitry Andric     Register Acc = SplitSrcs[0];
4843349cc55cSDimitry Andric     for (unsigned Idx = 1; Idx < NumParts; ++Idx)
4844349cc55cSDimitry Andric       Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
4845349cc55cSDimitry Andric                 .getReg(0);
4846349cc55cSDimitry Andric     MIRBuilder.buildCopy(DstReg, Acc);
4847349cc55cSDimitry Andric     MI.eraseFromParent();
4848349cc55cSDimitry Andric     return Legalized;
4849349cc55cSDimitry Andric   }
4850349cc55cSDimitry Andric   SmallVector<Register> PartialReductions;
4851349cc55cSDimitry Andric   for (unsigned Part = 0; Part < NumParts; ++Part) {
4852349cc55cSDimitry Andric     PartialReductions.push_back(
48535f757f3fSDimitry Andric         MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
48545f757f3fSDimitry Andric             .getReg(0));
4855349cc55cSDimitry Andric   }
4856349cc55cSDimitry Andric 
4857fe6060f1SDimitry Andric   // If the types involved are powers of 2, we can generate intermediate vector
4858fe6060f1SDimitry Andric   // ops, before generating a final reduction operation.
4859fe6060f1SDimitry Andric   if (isPowerOf2_32(SrcTy.getNumElements()) &&
4860fe6060f1SDimitry Andric       isPowerOf2_32(NarrowTy.getNumElements())) {
4861fe6060f1SDimitry Andric     return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
4862fe6060f1SDimitry Andric   }
4863fe6060f1SDimitry Andric 
4864fe6060f1SDimitry Andric   Register Acc = PartialReductions[0];
4865fe6060f1SDimitry Andric   for (unsigned Part = 1; Part < NumParts; ++Part) {
4866fe6060f1SDimitry Andric     if (Part == NumParts - 1) {
4867fe6060f1SDimitry Andric       MIRBuilder.buildInstr(ScalarOpc, {DstReg},
4868fe6060f1SDimitry Andric                             {Acc, PartialReductions[Part]});
4869fe6060f1SDimitry Andric     } else {
4870fe6060f1SDimitry Andric       Acc = MIRBuilder
4871fe6060f1SDimitry Andric                 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
4872fe6060f1SDimitry Andric                 .getReg(0);
4873fe6060f1SDimitry Andric     }
4874fe6060f1SDimitry Andric   }
4875fe6060f1SDimitry Andric   MI.eraseFromParent();
4876fe6060f1SDimitry Andric   return Legalized;
4877fe6060f1SDimitry Andric }
4878fe6060f1SDimitry Andric 
4879fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
48801db9f3b2SDimitry Andric LegalizerHelper::fewerElementsVectorSeqReductions(MachineInstr &MI,
48811db9f3b2SDimitry Andric                                                   unsigned int TypeIdx,
48821db9f3b2SDimitry Andric                                                   LLT NarrowTy) {
48831db9f3b2SDimitry Andric   auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
48841db9f3b2SDimitry Andric       MI.getFirst3RegLLTs();
48851db9f3b2SDimitry Andric   if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
48861db9f3b2SDimitry Andric       DstTy != NarrowTy)
48871db9f3b2SDimitry Andric     return UnableToLegalize;
48881db9f3b2SDimitry Andric 
48891db9f3b2SDimitry Andric   assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
48901db9f3b2SDimitry Andric           MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
48911db9f3b2SDimitry Andric          "Unexpected vecreduce opcode");
48921db9f3b2SDimitry Andric   unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
48931db9f3b2SDimitry Andric                            ? TargetOpcode::G_FADD
48941db9f3b2SDimitry Andric                            : TargetOpcode::G_FMUL;
48951db9f3b2SDimitry Andric 
48961db9f3b2SDimitry Andric   SmallVector<Register> SplitSrcs;
48971db9f3b2SDimitry Andric   unsigned NumParts = SrcTy.getNumElements();
48987a6dacacSDimitry Andric   extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
48991db9f3b2SDimitry Andric   Register Acc = ScalarReg;
49001db9f3b2SDimitry Andric   for (unsigned i = 0; i < NumParts; i++)
49011db9f3b2SDimitry Andric     Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
49021db9f3b2SDimitry Andric               .getReg(0);
49031db9f3b2SDimitry Andric 
49041db9f3b2SDimitry Andric   MIRBuilder.buildCopy(DstReg, Acc);
49051db9f3b2SDimitry Andric   MI.eraseFromParent();
49061db9f3b2SDimitry Andric   return Legalized;
49071db9f3b2SDimitry Andric }
49081db9f3b2SDimitry Andric 
49091db9f3b2SDimitry Andric LegalizerHelper::LegalizeResult
4910fe6060f1SDimitry Andric LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
4911fe6060f1SDimitry Andric                                         LLT SrcTy, LLT NarrowTy,
4912fe6060f1SDimitry Andric                                         unsigned ScalarOpc) {
4913fe6060f1SDimitry Andric   SmallVector<Register> SplitSrcs;
4914fe6060f1SDimitry Andric   // Split the sources into NarrowTy size pieces.
4915fe6060f1SDimitry Andric   extractParts(SrcReg, NarrowTy,
49167a6dacacSDimitry Andric                SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
49177a6dacacSDimitry Andric                MIRBuilder, MRI);
4918fe6060f1SDimitry Andric   // We're going to do a tree reduction using vector operations until we have
4919fe6060f1SDimitry Andric   // one NarrowTy size value left.
4920fe6060f1SDimitry Andric   while (SplitSrcs.size() > 1) {
4921fe6060f1SDimitry Andric     SmallVector<Register> PartialRdxs;
4922fe6060f1SDimitry Andric     for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
4923fe6060f1SDimitry Andric       Register LHS = SplitSrcs[Idx];
4924fe6060f1SDimitry Andric       Register RHS = SplitSrcs[Idx + 1];
4925fe6060f1SDimitry Andric       // Create the intermediate vector op.
4926fe6060f1SDimitry Andric       Register Res =
4927fe6060f1SDimitry Andric           MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
4928fe6060f1SDimitry Andric       PartialRdxs.push_back(Res);
4929fe6060f1SDimitry Andric     }
4930fe6060f1SDimitry Andric     SplitSrcs = std::move(PartialRdxs);
4931fe6060f1SDimitry Andric   }
4932fe6060f1SDimitry Andric   // Finally generate the requested NarrowTy based reduction.
4933fe6060f1SDimitry Andric   Observer.changingInstr(MI);
4934fe6060f1SDimitry Andric   MI.getOperand(1).setReg(SplitSrcs[0]);
4935fe6060f1SDimitry Andric   Observer.changedInstr(MI);
4936fe6060f1SDimitry Andric   return Legalized;
4937fe6060f1SDimitry Andric }
4938fe6060f1SDimitry Andric 
49390b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
49400b57cec5SDimitry Andric LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
49410b57cec5SDimitry Andric                                              const LLT HalfTy, const LLT AmtTy) {
49420b57cec5SDimitry Andric 
49430b57cec5SDimitry Andric   Register InL = MRI.createGenericVirtualRegister(HalfTy);
49440b57cec5SDimitry Andric   Register InH = MRI.createGenericVirtualRegister(HalfTy);
49455ffd83dbSDimitry Andric   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
49460b57cec5SDimitry Andric 
4947349cc55cSDimitry Andric   if (Amt.isZero()) {
4948bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
49490b57cec5SDimitry Andric     MI.eraseFromParent();
49500b57cec5SDimitry Andric     return Legalized;
49510b57cec5SDimitry Andric   }
49520b57cec5SDimitry Andric 
49530b57cec5SDimitry Andric   LLT NVT = HalfTy;
49540b57cec5SDimitry Andric   unsigned NVTBits = HalfTy.getSizeInBits();
49550b57cec5SDimitry Andric   unsigned VTBits = 2 * NVTBits;
49560b57cec5SDimitry Andric 
49570b57cec5SDimitry Andric   SrcOp Lo(Register(0)), Hi(Register(0));
49580b57cec5SDimitry Andric   if (MI.getOpcode() == TargetOpcode::G_SHL) {
49590b57cec5SDimitry Andric     if (Amt.ugt(VTBits)) {
49600b57cec5SDimitry Andric       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
49610b57cec5SDimitry Andric     } else if (Amt.ugt(NVTBits)) {
49620b57cec5SDimitry Andric       Lo = MIRBuilder.buildConstant(NVT, 0);
49630b57cec5SDimitry Andric       Hi = MIRBuilder.buildShl(NVT, InL,
49640b57cec5SDimitry Andric                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
49650b57cec5SDimitry Andric     } else if (Amt == NVTBits) {
49660b57cec5SDimitry Andric       Lo = MIRBuilder.buildConstant(NVT, 0);
49670b57cec5SDimitry Andric       Hi = InL;
49680b57cec5SDimitry Andric     } else {
49690b57cec5SDimitry Andric       Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
49700b57cec5SDimitry Andric       auto OrLHS =
49710b57cec5SDimitry Andric           MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
49720b57cec5SDimitry Andric       auto OrRHS = MIRBuilder.buildLShr(
49730b57cec5SDimitry Andric           NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
49740b57cec5SDimitry Andric       Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
49750b57cec5SDimitry Andric     }
49760b57cec5SDimitry Andric   } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
49770b57cec5SDimitry Andric     if (Amt.ugt(VTBits)) {
49780b57cec5SDimitry Andric       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
49790b57cec5SDimitry Andric     } else if (Amt.ugt(NVTBits)) {
49800b57cec5SDimitry Andric       Lo = MIRBuilder.buildLShr(NVT, InH,
49810b57cec5SDimitry Andric                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
49820b57cec5SDimitry Andric       Hi = MIRBuilder.buildConstant(NVT, 0);
49830b57cec5SDimitry Andric     } else if (Amt == NVTBits) {
49840b57cec5SDimitry Andric       Lo = InH;
49850b57cec5SDimitry Andric       Hi = MIRBuilder.buildConstant(NVT, 0);
49860b57cec5SDimitry Andric     } else {
49870b57cec5SDimitry Andric       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
49880b57cec5SDimitry Andric 
49890b57cec5SDimitry Andric       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
49900b57cec5SDimitry Andric       auto OrRHS = MIRBuilder.buildShl(
49910b57cec5SDimitry Andric           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
49920b57cec5SDimitry Andric 
49930b57cec5SDimitry Andric       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
49940b57cec5SDimitry Andric       Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
49950b57cec5SDimitry Andric     }
49960b57cec5SDimitry Andric   } else {
49970b57cec5SDimitry Andric     if (Amt.ugt(VTBits)) {
49980b57cec5SDimitry Andric       Hi = Lo = MIRBuilder.buildAShr(
49990b57cec5SDimitry Andric           NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
50000b57cec5SDimitry Andric     } else if (Amt.ugt(NVTBits)) {
50010b57cec5SDimitry Andric       Lo = MIRBuilder.buildAShr(NVT, InH,
50020b57cec5SDimitry Andric                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
50030b57cec5SDimitry Andric       Hi = MIRBuilder.buildAShr(NVT, InH,
50040b57cec5SDimitry Andric                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
50050b57cec5SDimitry Andric     } else if (Amt == NVTBits) {
50060b57cec5SDimitry Andric       Lo = InH;
50070b57cec5SDimitry Andric       Hi = MIRBuilder.buildAShr(NVT, InH,
50080b57cec5SDimitry Andric                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
50090b57cec5SDimitry Andric     } else {
50100b57cec5SDimitry Andric       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
50110b57cec5SDimitry Andric 
50120b57cec5SDimitry Andric       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
50130b57cec5SDimitry Andric       auto OrRHS = MIRBuilder.buildShl(
50140b57cec5SDimitry Andric           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
50150b57cec5SDimitry Andric 
50160b57cec5SDimitry Andric       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
50170b57cec5SDimitry Andric       Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
50180b57cec5SDimitry Andric     }
50190b57cec5SDimitry Andric   }
50200b57cec5SDimitry Andric 
5021bdd1243dSDimitry Andric   MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
50220b57cec5SDimitry Andric   MI.eraseFromParent();
50230b57cec5SDimitry Andric 
50240b57cec5SDimitry Andric   return Legalized;
50250b57cec5SDimitry Andric }
50260b57cec5SDimitry Andric 
50270b57cec5SDimitry Andric // TODO: Optimize if constant shift amount.
50280b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
50290b57cec5SDimitry Andric LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
50300b57cec5SDimitry Andric                                    LLT RequestedTy) {
50310b57cec5SDimitry Andric   if (TypeIdx == 1) {
50320b57cec5SDimitry Andric     Observer.changingInstr(MI);
50330b57cec5SDimitry Andric     narrowScalarSrc(MI, RequestedTy, 2);
50340b57cec5SDimitry Andric     Observer.changedInstr(MI);
50350b57cec5SDimitry Andric     return Legalized;
50360b57cec5SDimitry Andric   }
50370b57cec5SDimitry Andric 
50380b57cec5SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
50390b57cec5SDimitry Andric   LLT DstTy = MRI.getType(DstReg);
50400b57cec5SDimitry Andric   if (DstTy.isVector())
50410b57cec5SDimitry Andric     return UnableToLegalize;
50420b57cec5SDimitry Andric 
50430b57cec5SDimitry Andric   Register Amt = MI.getOperand(2).getReg();
50440b57cec5SDimitry Andric   LLT ShiftAmtTy = MRI.getType(Amt);
50450b57cec5SDimitry Andric   const unsigned DstEltSize = DstTy.getScalarSizeInBits();
50460b57cec5SDimitry Andric   if (DstEltSize % 2 != 0)
50470b57cec5SDimitry Andric     return UnableToLegalize;
50480b57cec5SDimitry Andric 
50490b57cec5SDimitry Andric   // Ignore the input type. We can only go to exactly half the size of the
50500b57cec5SDimitry Andric   // input. If that isn't small enough, the resulting pieces will be further
50510b57cec5SDimitry Andric   // legalized.
50520b57cec5SDimitry Andric   const unsigned NewBitSize = DstEltSize / 2;
50530b57cec5SDimitry Andric   const LLT HalfTy = LLT::scalar(NewBitSize);
50540b57cec5SDimitry Andric   const LLT CondTy = LLT::scalar(1);
50550b57cec5SDimitry Andric 
5056349cc55cSDimitry Andric   if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
5057349cc55cSDimitry Andric     return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
5058349cc55cSDimitry Andric                                        ShiftAmtTy);
50590b57cec5SDimitry Andric   }
50600b57cec5SDimitry Andric 
50610b57cec5SDimitry Andric   // TODO: Expand with known bits.
50620b57cec5SDimitry Andric 
50630b57cec5SDimitry Andric   // Handle the fully general expansion by an unknown amount.
50640b57cec5SDimitry Andric   auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
50650b57cec5SDimitry Andric 
50660b57cec5SDimitry Andric   Register InL = MRI.createGenericVirtualRegister(HalfTy);
50670b57cec5SDimitry Andric   Register InH = MRI.createGenericVirtualRegister(HalfTy);
50685ffd83dbSDimitry Andric   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
50690b57cec5SDimitry Andric 
50700b57cec5SDimitry Andric   auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
50710b57cec5SDimitry Andric   auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
50720b57cec5SDimitry Andric 
50730b57cec5SDimitry Andric   auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
50740b57cec5SDimitry Andric   auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
50750b57cec5SDimitry Andric   auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
50760b57cec5SDimitry Andric 
50770b57cec5SDimitry Andric   Register ResultRegs[2];
50780b57cec5SDimitry Andric   switch (MI.getOpcode()) {
50790b57cec5SDimitry Andric   case TargetOpcode::G_SHL: {
50800b57cec5SDimitry Andric     // Short: ShAmt < NewBitSize
50818bcb0991SDimitry Andric     auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
50820b57cec5SDimitry Andric 
50838bcb0991SDimitry Andric     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
50848bcb0991SDimitry Andric     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
50858bcb0991SDimitry Andric     auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
50860b57cec5SDimitry Andric 
50870b57cec5SDimitry Andric     // Long: ShAmt >= NewBitSize
50880b57cec5SDimitry Andric     auto LoL = MIRBuilder.buildConstant(HalfTy, 0);         // Lo part is zero.
50890b57cec5SDimitry Andric     auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
50900b57cec5SDimitry Andric 
50910b57cec5SDimitry Andric     auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
50920b57cec5SDimitry Andric     auto Hi = MIRBuilder.buildSelect(
50930b57cec5SDimitry Andric         HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
50940b57cec5SDimitry Andric 
50950b57cec5SDimitry Andric     ResultRegs[0] = Lo.getReg(0);
50960b57cec5SDimitry Andric     ResultRegs[1] = Hi.getReg(0);
50970b57cec5SDimitry Andric     break;
50980b57cec5SDimitry Andric   }
50998bcb0991SDimitry Andric   case TargetOpcode::G_LSHR:
51000b57cec5SDimitry Andric   case TargetOpcode::G_ASHR: {
51010b57cec5SDimitry Andric     // Short: ShAmt < NewBitSize
51028bcb0991SDimitry Andric     auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
51030b57cec5SDimitry Andric 
51048bcb0991SDimitry Andric     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
51058bcb0991SDimitry Andric     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
51068bcb0991SDimitry Andric     auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
51070b57cec5SDimitry Andric 
51080b57cec5SDimitry Andric     // Long: ShAmt >= NewBitSize
51098bcb0991SDimitry Andric     MachineInstrBuilder HiL;
51108bcb0991SDimitry Andric     if (MI.getOpcode() == TargetOpcode::G_LSHR) {
51118bcb0991SDimitry Andric       HiL = MIRBuilder.buildConstant(HalfTy, 0);            // Hi part is zero.
51128bcb0991SDimitry Andric     } else {
51138bcb0991SDimitry Andric       auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
51148bcb0991SDimitry Andric       HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);    // Sign of Hi part.
51158bcb0991SDimitry Andric     }
51168bcb0991SDimitry Andric     auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
51178bcb0991SDimitry Andric                                      {InH, AmtExcess});     // Lo from Hi part.
51180b57cec5SDimitry Andric 
51190b57cec5SDimitry Andric     auto Lo = MIRBuilder.buildSelect(
51200b57cec5SDimitry Andric         HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
51210b57cec5SDimitry Andric 
51220b57cec5SDimitry Andric     auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
51230b57cec5SDimitry Andric 
51240b57cec5SDimitry Andric     ResultRegs[0] = Lo.getReg(0);
51250b57cec5SDimitry Andric     ResultRegs[1] = Hi.getReg(0);
51260b57cec5SDimitry Andric     break;
51270b57cec5SDimitry Andric   }
51280b57cec5SDimitry Andric   default:
51290b57cec5SDimitry Andric     llvm_unreachable("not a shift");
51300b57cec5SDimitry Andric   }
51310b57cec5SDimitry Andric 
5132bdd1243dSDimitry Andric   MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
51330b57cec5SDimitry Andric   MI.eraseFromParent();
51340b57cec5SDimitry Andric   return Legalized;
51350b57cec5SDimitry Andric }
51360b57cec5SDimitry Andric 
51370b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
51380b57cec5SDimitry Andric LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
51390b57cec5SDimitry Andric                                        LLT MoreTy) {
51400b57cec5SDimitry Andric   assert(TypeIdx == 0 && "Expecting only Idx 0");
51410b57cec5SDimitry Andric 
51420b57cec5SDimitry Andric   Observer.changingInstr(MI);
51430b57cec5SDimitry Andric   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
51440b57cec5SDimitry Andric     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
51450b57cec5SDimitry Andric     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
51460b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, I);
51470b57cec5SDimitry Andric   }
51480b57cec5SDimitry Andric 
51490b57cec5SDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
51500b57cec5SDimitry Andric   MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
51510b57cec5SDimitry Andric   moreElementsVectorDst(MI, MoreTy, 0);
51520b57cec5SDimitry Andric   Observer.changedInstr(MI);
51530b57cec5SDimitry Andric   return Legalized;
51540b57cec5SDimitry Andric }
51550b57cec5SDimitry Andric 
51560b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
51570b57cec5SDimitry Andric LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
51580b57cec5SDimitry Andric                                     LLT MoreTy) {
51590b57cec5SDimitry Andric   unsigned Opc = MI.getOpcode();
51600b57cec5SDimitry Andric   switch (Opc) {
51618bcb0991SDimitry Andric   case TargetOpcode::G_IMPLICIT_DEF:
51628bcb0991SDimitry Andric   case TargetOpcode::G_LOAD: {
51638bcb0991SDimitry Andric     if (TypeIdx != 0)
51648bcb0991SDimitry Andric       return UnableToLegalize;
51650b57cec5SDimitry Andric     Observer.changingInstr(MI);
51660b57cec5SDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
51670b57cec5SDimitry Andric     Observer.changedInstr(MI);
51680b57cec5SDimitry Andric     return Legalized;
51690b57cec5SDimitry Andric   }
51708bcb0991SDimitry Andric   case TargetOpcode::G_STORE:
51718bcb0991SDimitry Andric     if (TypeIdx != 0)
51728bcb0991SDimitry Andric       return UnableToLegalize;
51738bcb0991SDimitry Andric     Observer.changingInstr(MI);
51748bcb0991SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 0);
51758bcb0991SDimitry Andric     Observer.changedInstr(MI);
51768bcb0991SDimitry Andric     return Legalized;
51770b57cec5SDimitry Andric   case TargetOpcode::G_AND:
51780b57cec5SDimitry Andric   case TargetOpcode::G_OR:
51790b57cec5SDimitry Andric   case TargetOpcode::G_XOR:
51800eae32dcSDimitry Andric   case TargetOpcode::G_ADD:
51810eae32dcSDimitry Andric   case TargetOpcode::G_SUB:
51820eae32dcSDimitry Andric   case TargetOpcode::G_MUL:
51830eae32dcSDimitry Andric   case TargetOpcode::G_FADD:
51845f757f3fSDimitry Andric   case TargetOpcode::G_FSUB:
51850eae32dcSDimitry Andric   case TargetOpcode::G_FMUL:
51865f757f3fSDimitry Andric   case TargetOpcode::G_FDIV:
51870eae32dcSDimitry Andric   case TargetOpcode::G_UADDSAT:
51880eae32dcSDimitry Andric   case TargetOpcode::G_USUBSAT:
51890eae32dcSDimitry Andric   case TargetOpcode::G_SADDSAT:
51900eae32dcSDimitry Andric   case TargetOpcode::G_SSUBSAT:
51910b57cec5SDimitry Andric   case TargetOpcode::G_SMIN:
51920b57cec5SDimitry Andric   case TargetOpcode::G_SMAX:
51930b57cec5SDimitry Andric   case TargetOpcode::G_UMIN:
5194480093f4SDimitry Andric   case TargetOpcode::G_UMAX:
5195480093f4SDimitry Andric   case TargetOpcode::G_FMINNUM:
5196480093f4SDimitry Andric   case TargetOpcode::G_FMAXNUM:
5197480093f4SDimitry Andric   case TargetOpcode::G_FMINNUM_IEEE:
5198480093f4SDimitry Andric   case TargetOpcode::G_FMAXNUM_IEEE:
5199480093f4SDimitry Andric   case TargetOpcode::G_FMINIMUM:
5200bdd1243dSDimitry Andric   case TargetOpcode::G_FMAXIMUM:
5201bdd1243dSDimitry Andric   case TargetOpcode::G_STRICT_FADD:
5202bdd1243dSDimitry Andric   case TargetOpcode::G_STRICT_FSUB:
52037a6dacacSDimitry Andric   case TargetOpcode::G_STRICT_FMUL:
52047a6dacacSDimitry Andric   case TargetOpcode::G_SHL:
52057a6dacacSDimitry Andric   case TargetOpcode::G_ASHR:
52067a6dacacSDimitry Andric   case TargetOpcode::G_LSHR: {
52070b57cec5SDimitry Andric     Observer.changingInstr(MI);
52080b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 1);
52090b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 2);
52100b57cec5SDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
52110b57cec5SDimitry Andric     Observer.changedInstr(MI);
52120b57cec5SDimitry Andric     return Legalized;
52130b57cec5SDimitry Andric   }
52140eae32dcSDimitry Andric   case TargetOpcode::G_FMA:
5215bdd1243dSDimitry Andric   case TargetOpcode::G_STRICT_FMA:
52160eae32dcSDimitry Andric   case TargetOpcode::G_FSHR:
52170eae32dcSDimitry Andric   case TargetOpcode::G_FSHL: {
52180eae32dcSDimitry Andric     Observer.changingInstr(MI);
52190eae32dcSDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 1);
52200eae32dcSDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 2);
52210eae32dcSDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 3);
52220eae32dcSDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
52230eae32dcSDimitry Andric     Observer.changedInstr(MI);
52240eae32dcSDimitry Andric     return Legalized;
52250eae32dcSDimitry Andric   }
522606c3fb27SDimitry Andric   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
52270b57cec5SDimitry Andric   case TargetOpcode::G_EXTRACT:
52280b57cec5SDimitry Andric     if (TypeIdx != 1)
52290b57cec5SDimitry Andric       return UnableToLegalize;
52300b57cec5SDimitry Andric     Observer.changingInstr(MI);
52310b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 1);
52320b57cec5SDimitry Andric     Observer.changedInstr(MI);
52330b57cec5SDimitry Andric     return Legalized;
52340b57cec5SDimitry Andric   case TargetOpcode::G_INSERT:
523506c3fb27SDimitry Andric   case TargetOpcode::G_INSERT_VECTOR_ELT:
52365ffd83dbSDimitry Andric   case TargetOpcode::G_FREEZE:
52370eae32dcSDimitry Andric   case TargetOpcode::G_FNEG:
52380eae32dcSDimitry Andric   case TargetOpcode::G_FABS:
52395f757f3fSDimitry Andric   case TargetOpcode::G_FSQRT:
52405f757f3fSDimitry Andric   case TargetOpcode::G_FCEIL:
52415f757f3fSDimitry Andric   case TargetOpcode::G_FFLOOR:
52425f757f3fSDimitry Andric   case TargetOpcode::G_FNEARBYINT:
52435f757f3fSDimitry Andric   case TargetOpcode::G_FRINT:
52445f757f3fSDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUND:
52455f757f3fSDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
52465f757f3fSDimitry Andric   case TargetOpcode::G_INTRINSIC_TRUNC:
52470eae32dcSDimitry Andric   case TargetOpcode::G_BSWAP:
52480eae32dcSDimitry Andric   case TargetOpcode::G_FCANONICALIZE:
52490eae32dcSDimitry Andric   case TargetOpcode::G_SEXT_INREG:
52500b57cec5SDimitry Andric     if (TypeIdx != 0)
52510b57cec5SDimitry Andric       return UnableToLegalize;
52520b57cec5SDimitry Andric     Observer.changingInstr(MI);
52530b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 1);
52540b57cec5SDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
52550b57cec5SDimitry Andric     Observer.changedInstr(MI);
52560b57cec5SDimitry Andric     return Legalized;
525781ad6265SDimitry Andric   case TargetOpcode::G_SELECT: {
525806c3fb27SDimitry Andric     auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
525981ad6265SDimitry Andric     if (TypeIdx == 1) {
526081ad6265SDimitry Andric       if (!CondTy.isScalar() ||
526181ad6265SDimitry Andric           DstTy.getElementCount() != MoreTy.getElementCount())
52620b57cec5SDimitry Andric         return UnableToLegalize;
526381ad6265SDimitry Andric 
526481ad6265SDimitry Andric       // This is turning a scalar select of vectors into a vector
526581ad6265SDimitry Andric       // select. Broadcast the select condition.
526681ad6265SDimitry Andric       auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
526781ad6265SDimitry Andric       Observer.changingInstr(MI);
526881ad6265SDimitry Andric       MI.getOperand(1).setReg(ShufSplat.getReg(0));
526981ad6265SDimitry Andric       Observer.changedInstr(MI);
527081ad6265SDimitry Andric       return Legalized;
527181ad6265SDimitry Andric     }
527281ad6265SDimitry Andric 
527381ad6265SDimitry Andric     if (CondTy.isVector())
52740b57cec5SDimitry Andric       return UnableToLegalize;
52750b57cec5SDimitry Andric 
52760b57cec5SDimitry Andric     Observer.changingInstr(MI);
52770b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 2);
52780b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 3);
52790b57cec5SDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
52800b57cec5SDimitry Andric     Observer.changedInstr(MI);
52810b57cec5SDimitry Andric     return Legalized;
528281ad6265SDimitry Andric   }
52830eae32dcSDimitry Andric   case TargetOpcode::G_UNMERGE_VALUES:
52848bcb0991SDimitry Andric     return UnableToLegalize;
52850b57cec5SDimitry Andric   case TargetOpcode::G_PHI:
52860b57cec5SDimitry Andric     return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
5287fe6060f1SDimitry Andric   case TargetOpcode::G_SHUFFLE_VECTOR:
5288fe6060f1SDimitry Andric     return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
52890eae32dcSDimitry Andric   case TargetOpcode::G_BUILD_VECTOR: {
52900eae32dcSDimitry Andric     SmallVector<SrcOp, 8> Elts;
52910eae32dcSDimitry Andric     for (auto Op : MI.uses()) {
52920eae32dcSDimitry Andric       Elts.push_back(Op.getReg());
52930eae32dcSDimitry Andric     }
52940eae32dcSDimitry Andric 
52950eae32dcSDimitry Andric     for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
52960eae32dcSDimitry Andric       Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
52970eae32dcSDimitry Andric     }
52980eae32dcSDimitry Andric 
52990eae32dcSDimitry Andric     MIRBuilder.buildDeleteTrailingVectorElements(
53000eae32dcSDimitry Andric         MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
53010eae32dcSDimitry Andric     MI.eraseFromParent();
53020eae32dcSDimitry Andric     return Legalized;
53030eae32dcSDimitry Andric   }
53045f757f3fSDimitry Andric   case TargetOpcode::G_TRUNC:
530506c3fb27SDimitry Andric   case TargetOpcode::G_FPTRUNC:
53065f757f3fSDimitry Andric   case TargetOpcode::G_FPEXT:
53075f757f3fSDimitry Andric   case TargetOpcode::G_FPTOSI:
53085f757f3fSDimitry Andric   case TargetOpcode::G_FPTOUI:
53095f757f3fSDimitry Andric   case TargetOpcode::G_SITOFP:
53105f757f3fSDimitry Andric   case TargetOpcode::G_UITOFP: {
531106c3fb27SDimitry Andric     if (TypeIdx != 0)
531206c3fb27SDimitry Andric       return UnableToLegalize;
531306c3fb27SDimitry Andric     Observer.changingInstr(MI);
531406c3fb27SDimitry Andric     LLT SrcTy = LLT::fixed_vector(
531506c3fb27SDimitry Andric         MoreTy.getNumElements(),
531606c3fb27SDimitry Andric         MRI.getType(MI.getOperand(1).getReg()).getElementType());
531706c3fb27SDimitry Andric     moreElementsVectorSrc(MI, SrcTy, 1);
531806c3fb27SDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
531906c3fb27SDimitry Andric     Observer.changedInstr(MI);
532006c3fb27SDimitry Andric     return Legalized;
532106c3fb27SDimitry Andric   }
53227a6dacacSDimitry Andric   case TargetOpcode::G_ICMP: {
53237a6dacacSDimitry Andric     // TODO: the symmetric MoreTy works for targets like, e.g. NEON.
53247a6dacacSDimitry Andric     // For targets, like e.g. MVE, the result is a predicated vector (i1).
53257a6dacacSDimitry Andric     // This will need some refactoring.
53267a6dacacSDimitry Andric     Observer.changingInstr(MI);
53277a6dacacSDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 2);
53287a6dacacSDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 3);
53297a6dacacSDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
53307a6dacacSDimitry Andric     Observer.changedInstr(MI);
53317a6dacacSDimitry Andric     return Legalized;
53327a6dacacSDimitry Andric   }
53330b57cec5SDimitry Andric   default:
53340b57cec5SDimitry Andric     return UnableToLegalize;
53350b57cec5SDimitry Andric   }
53360b57cec5SDimitry Andric }
53370b57cec5SDimitry Andric 
533806c3fb27SDimitry Andric LegalizerHelper::LegalizeResult
533906c3fb27SDimitry Andric LegalizerHelper::equalizeVectorShuffleLengths(MachineInstr &MI) {
534006c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5341bdd1243dSDimitry Andric   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5342bdd1243dSDimitry Andric   unsigned MaskNumElts = Mask.size();
5343bdd1243dSDimitry Andric   unsigned SrcNumElts = SrcTy.getNumElements();
5344bdd1243dSDimitry Andric   LLT DestEltTy = DstTy.getElementType();
5345bdd1243dSDimitry Andric 
534606c3fb27SDimitry Andric   if (MaskNumElts == SrcNumElts)
534706c3fb27SDimitry Andric     return Legalized;
534806c3fb27SDimitry Andric 
534906c3fb27SDimitry Andric   if (MaskNumElts < SrcNumElts) {
535006c3fb27SDimitry Andric     // Extend mask to match new destination vector size with
535106c3fb27SDimitry Andric     // undef values.
535206c3fb27SDimitry Andric     SmallVector<int, 16> NewMask(Mask);
535306c3fb27SDimitry Andric     for (unsigned I = MaskNumElts; I < SrcNumElts; ++I)
535406c3fb27SDimitry Andric       NewMask.push_back(-1);
535506c3fb27SDimitry Andric 
535606c3fb27SDimitry Andric     moreElementsVectorDst(MI, SrcTy, 0);
535706c3fb27SDimitry Andric     MIRBuilder.setInstrAndDebugLoc(MI);
535806c3fb27SDimitry Andric     MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
535906c3fb27SDimitry Andric                                   MI.getOperand(1).getReg(),
536006c3fb27SDimitry Andric                                   MI.getOperand(2).getReg(), NewMask);
536106c3fb27SDimitry Andric     MI.eraseFromParent();
536206c3fb27SDimitry Andric 
536306c3fb27SDimitry Andric     return Legalized;
5364bdd1243dSDimitry Andric   }
5365bdd1243dSDimitry Andric 
5366bdd1243dSDimitry Andric   unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
5367bdd1243dSDimitry Andric   unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
5368bdd1243dSDimitry Andric   LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
5369bdd1243dSDimitry Andric 
5370bdd1243dSDimitry Andric   // Create new source vectors by concatenating the initial
5371bdd1243dSDimitry Andric   // source vectors with undefined vectors of the same size.
5372bdd1243dSDimitry Andric   auto Undef = MIRBuilder.buildUndef(SrcTy);
5373bdd1243dSDimitry Andric   SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
5374bdd1243dSDimitry Andric   SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
5375bdd1243dSDimitry Andric   MOps1[0] = MI.getOperand(1).getReg();
5376bdd1243dSDimitry Andric   MOps2[0] = MI.getOperand(2).getReg();
5377bdd1243dSDimitry Andric 
5378bdd1243dSDimitry Andric   auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
5379bdd1243dSDimitry Andric   auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
5380bdd1243dSDimitry Andric 
5381bdd1243dSDimitry Andric   // Readjust mask for new input vector length.
5382bdd1243dSDimitry Andric   SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
5383bdd1243dSDimitry Andric   for (unsigned I = 0; I != MaskNumElts; ++I) {
5384bdd1243dSDimitry Andric     int Idx = Mask[I];
5385bdd1243dSDimitry Andric     if (Idx >= static_cast<int>(SrcNumElts))
5386bdd1243dSDimitry Andric       Idx += PaddedMaskNumElts - SrcNumElts;
5387bdd1243dSDimitry Andric     MappedOps[I] = Idx;
5388bdd1243dSDimitry Andric   }
5389bdd1243dSDimitry Andric 
5390bdd1243dSDimitry Andric   // If we got more elements than required, extract subvector.
5391bdd1243dSDimitry Andric   if (MaskNumElts != PaddedMaskNumElts) {
5392bdd1243dSDimitry Andric     auto Shuffle =
5393bdd1243dSDimitry Andric         MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
5394bdd1243dSDimitry Andric 
5395bdd1243dSDimitry Andric     SmallVector<Register, 16> Elts(MaskNumElts);
5396bdd1243dSDimitry Andric     for (unsigned I = 0; I < MaskNumElts; ++I) {
5397bdd1243dSDimitry Andric       Elts[I] =
5398bdd1243dSDimitry Andric           MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
5399bdd1243dSDimitry Andric               .getReg(0);
5400bdd1243dSDimitry Andric     }
5401bdd1243dSDimitry Andric     MIRBuilder.buildBuildVector(DstReg, Elts);
5402bdd1243dSDimitry Andric   } else {
5403bdd1243dSDimitry Andric     MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
5404bdd1243dSDimitry Andric   }
5405bdd1243dSDimitry Andric 
5406bdd1243dSDimitry Andric   MI.eraseFromParent();
5407bdd1243dSDimitry Andric   return LegalizerHelper::LegalizeResult::Legalized;
5408bdd1243dSDimitry Andric }
5409bdd1243dSDimitry Andric 
5410fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
5411fe6060f1SDimitry Andric LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
5412fe6060f1SDimitry Andric                                            unsigned int TypeIdx, LLT MoreTy) {
541306c3fb27SDimitry Andric   auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
5414fe6060f1SDimitry Andric   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5415fe6060f1SDimitry Andric   unsigned NumElts = DstTy.getNumElements();
5416fe6060f1SDimitry Andric   unsigned WidenNumElts = MoreTy.getNumElements();
5417fe6060f1SDimitry Andric 
5418bdd1243dSDimitry Andric   if (DstTy.isVector() && Src1Ty.isVector() &&
541906c3fb27SDimitry Andric       DstTy.getNumElements() != Src1Ty.getNumElements()) {
542006c3fb27SDimitry Andric     return equalizeVectorShuffleLengths(MI);
5421bdd1243dSDimitry Andric   }
5422bdd1243dSDimitry Andric 
5423bdd1243dSDimitry Andric   if (TypeIdx != 0)
5424bdd1243dSDimitry Andric     return UnableToLegalize;
5425bdd1243dSDimitry Andric 
5426fe6060f1SDimitry Andric   // Expect a canonicalized shuffle.
5427fe6060f1SDimitry Andric   if (DstTy != Src1Ty || DstTy != Src2Ty)
5428fe6060f1SDimitry Andric     return UnableToLegalize;
5429fe6060f1SDimitry Andric 
5430fe6060f1SDimitry Andric   moreElementsVectorSrc(MI, MoreTy, 1);
5431fe6060f1SDimitry Andric   moreElementsVectorSrc(MI, MoreTy, 2);
5432fe6060f1SDimitry Andric 
5433fe6060f1SDimitry Andric   // Adjust mask based on new input vector length.
5434fe6060f1SDimitry Andric   SmallVector<int, 16> NewMask;
5435fe6060f1SDimitry Andric   for (unsigned I = 0; I != NumElts; ++I) {
5436fe6060f1SDimitry Andric     int Idx = Mask[I];
5437fe6060f1SDimitry Andric     if (Idx < static_cast<int>(NumElts))
5438fe6060f1SDimitry Andric       NewMask.push_back(Idx);
5439fe6060f1SDimitry Andric     else
5440fe6060f1SDimitry Andric       NewMask.push_back(Idx - NumElts + WidenNumElts);
5441fe6060f1SDimitry Andric   }
5442fe6060f1SDimitry Andric   for (unsigned I = NumElts; I != WidenNumElts; ++I)
5443fe6060f1SDimitry Andric     NewMask.push_back(-1);
5444fe6060f1SDimitry Andric   moreElementsVectorDst(MI, MoreTy, 0);
5445fe6060f1SDimitry Andric   MIRBuilder.setInstrAndDebugLoc(MI);
5446fe6060f1SDimitry Andric   MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
5447fe6060f1SDimitry Andric                                 MI.getOperand(1).getReg(),
5448fe6060f1SDimitry Andric                                 MI.getOperand(2).getReg(), NewMask);
5449fe6060f1SDimitry Andric   MI.eraseFromParent();
5450fe6060f1SDimitry Andric   return Legalized;
5451fe6060f1SDimitry Andric }
5452fe6060f1SDimitry Andric 
54530b57cec5SDimitry Andric void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
54540b57cec5SDimitry Andric                                         ArrayRef<Register> Src1Regs,
54550b57cec5SDimitry Andric                                         ArrayRef<Register> Src2Regs,
54560b57cec5SDimitry Andric                                         LLT NarrowTy) {
54570b57cec5SDimitry Andric   MachineIRBuilder &B = MIRBuilder;
54580b57cec5SDimitry Andric   unsigned SrcParts = Src1Regs.size();
54590b57cec5SDimitry Andric   unsigned DstParts = DstRegs.size();
54600b57cec5SDimitry Andric 
54610b57cec5SDimitry Andric   unsigned DstIdx = 0; // Low bits of the result.
54620b57cec5SDimitry Andric   Register FactorSum =
54630b57cec5SDimitry Andric       B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
54640b57cec5SDimitry Andric   DstRegs[DstIdx] = FactorSum;
54650b57cec5SDimitry Andric 
54660b57cec5SDimitry Andric   unsigned CarrySumPrevDstIdx;
54670b57cec5SDimitry Andric   SmallVector<Register, 4> Factors;
54680b57cec5SDimitry Andric 
54690b57cec5SDimitry Andric   for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
54700b57cec5SDimitry Andric     // Collect low parts of muls for DstIdx.
54710b57cec5SDimitry Andric     for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
54720b57cec5SDimitry Andric          i <= std::min(DstIdx, SrcParts - 1); ++i) {
54730b57cec5SDimitry Andric       MachineInstrBuilder Mul =
54740b57cec5SDimitry Andric           B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
54750b57cec5SDimitry Andric       Factors.push_back(Mul.getReg(0));
54760b57cec5SDimitry Andric     }
54770b57cec5SDimitry Andric     // Collect high parts of muls from previous DstIdx.
54780b57cec5SDimitry Andric     for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
54790b57cec5SDimitry Andric          i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
54800b57cec5SDimitry Andric       MachineInstrBuilder Umulh =
54810b57cec5SDimitry Andric           B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
54820b57cec5SDimitry Andric       Factors.push_back(Umulh.getReg(0));
54830b57cec5SDimitry Andric     }
5484480093f4SDimitry Andric     // Add CarrySum from additions calculated for previous DstIdx.
54850b57cec5SDimitry Andric     if (DstIdx != 1) {
54860b57cec5SDimitry Andric       Factors.push_back(CarrySumPrevDstIdx);
54870b57cec5SDimitry Andric     }
54880b57cec5SDimitry Andric 
54890b57cec5SDimitry Andric     Register CarrySum;
54900b57cec5SDimitry Andric     // Add all factors and accumulate all carries into CarrySum.
54910b57cec5SDimitry Andric     if (DstIdx != DstParts - 1) {
54920b57cec5SDimitry Andric       MachineInstrBuilder Uaddo =
54930b57cec5SDimitry Andric           B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
54940b57cec5SDimitry Andric       FactorSum = Uaddo.getReg(0);
54950b57cec5SDimitry Andric       CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
54960b57cec5SDimitry Andric       for (unsigned i = 2; i < Factors.size(); ++i) {
54970b57cec5SDimitry Andric         MachineInstrBuilder Uaddo =
54980b57cec5SDimitry Andric             B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
54990b57cec5SDimitry Andric         FactorSum = Uaddo.getReg(0);
55000b57cec5SDimitry Andric         MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
55010b57cec5SDimitry Andric         CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
55020b57cec5SDimitry Andric       }
55030b57cec5SDimitry Andric     } else {
55040b57cec5SDimitry Andric       // Since value for the next index is not calculated, neither is CarrySum.
55050b57cec5SDimitry Andric       FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
55060b57cec5SDimitry Andric       for (unsigned i = 2; i < Factors.size(); ++i)
55070b57cec5SDimitry Andric         FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
55080b57cec5SDimitry Andric     }
55090b57cec5SDimitry Andric 
55100b57cec5SDimitry Andric     CarrySumPrevDstIdx = CarrySum;
55110b57cec5SDimitry Andric     DstRegs[DstIdx] = FactorSum;
55120b57cec5SDimitry Andric     Factors.clear();
55130b57cec5SDimitry Andric   }
55140b57cec5SDimitry Andric }
55150b57cec5SDimitry Andric 
55160b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
5517fe6060f1SDimitry Andric LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
5518fe6060f1SDimitry Andric                                     LLT NarrowTy) {
5519fe6060f1SDimitry Andric   if (TypeIdx != 0)
5520fe6060f1SDimitry Andric     return UnableToLegalize;
5521fe6060f1SDimitry Andric 
5522fe6060f1SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
5523fe6060f1SDimitry Andric   LLT DstType = MRI.getType(DstReg);
5524fe6060f1SDimitry Andric   // FIXME: add support for vector types
5525fe6060f1SDimitry Andric   if (DstType.isVector())
5526fe6060f1SDimitry Andric     return UnableToLegalize;
5527fe6060f1SDimitry Andric 
5528fe6060f1SDimitry Andric   unsigned Opcode = MI.getOpcode();
5529fe6060f1SDimitry Andric   unsigned OpO, OpE, OpF;
5530fe6060f1SDimitry Andric   switch (Opcode) {
5531fe6060f1SDimitry Andric   case TargetOpcode::G_SADDO:
5532fe6060f1SDimitry Andric   case TargetOpcode::G_SADDE:
5533fe6060f1SDimitry Andric   case TargetOpcode::G_UADDO:
5534fe6060f1SDimitry Andric   case TargetOpcode::G_UADDE:
5535fe6060f1SDimitry Andric   case TargetOpcode::G_ADD:
5536fe6060f1SDimitry Andric     OpO = TargetOpcode::G_UADDO;
5537fe6060f1SDimitry Andric     OpE = TargetOpcode::G_UADDE;
5538fe6060f1SDimitry Andric     OpF = TargetOpcode::G_UADDE;
5539fe6060f1SDimitry Andric     if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
5540fe6060f1SDimitry Andric       OpF = TargetOpcode::G_SADDE;
5541fe6060f1SDimitry Andric     break;
5542fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBO:
5543fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBE:
5544fe6060f1SDimitry Andric   case TargetOpcode::G_USUBO:
5545fe6060f1SDimitry Andric   case TargetOpcode::G_USUBE:
5546fe6060f1SDimitry Andric   case TargetOpcode::G_SUB:
5547fe6060f1SDimitry Andric     OpO = TargetOpcode::G_USUBO;
5548fe6060f1SDimitry Andric     OpE = TargetOpcode::G_USUBE;
5549fe6060f1SDimitry Andric     OpF = TargetOpcode::G_USUBE;
5550fe6060f1SDimitry Andric     if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
5551fe6060f1SDimitry Andric       OpF = TargetOpcode::G_SSUBE;
5552fe6060f1SDimitry Andric     break;
5553fe6060f1SDimitry Andric   default:
5554fe6060f1SDimitry Andric     llvm_unreachable("Unexpected add/sub opcode!");
5555fe6060f1SDimitry Andric   }
5556fe6060f1SDimitry Andric 
5557fe6060f1SDimitry Andric   // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
5558fe6060f1SDimitry Andric   unsigned NumDefs = MI.getNumExplicitDefs();
5559fe6060f1SDimitry Andric   Register Src1 = MI.getOperand(NumDefs).getReg();
5560fe6060f1SDimitry Andric   Register Src2 = MI.getOperand(NumDefs + 1).getReg();
5561fe6060f1SDimitry Andric   Register CarryDst, CarryIn;
5562fe6060f1SDimitry Andric   if (NumDefs == 2)
5563fe6060f1SDimitry Andric     CarryDst = MI.getOperand(1).getReg();
5564fe6060f1SDimitry Andric   if (MI.getNumOperands() == NumDefs + 3)
5565fe6060f1SDimitry Andric     CarryIn = MI.getOperand(NumDefs + 2).getReg();
5566fe6060f1SDimitry Andric 
5567fe6060f1SDimitry Andric   LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
5568fe6060f1SDimitry Andric   LLT LeftoverTy, DummyTy;
5569fe6060f1SDimitry Andric   SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
55707a6dacacSDimitry Andric   extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
55717a6dacacSDimitry Andric                MIRBuilder, MRI);
55727a6dacacSDimitry Andric   extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
55737a6dacacSDimitry Andric                MRI);
5574fe6060f1SDimitry Andric 
5575fe6060f1SDimitry Andric   int NarrowParts = Src1Regs.size();
5576fe6060f1SDimitry Andric   for (int I = 0, E = Src1Left.size(); I != E; ++I) {
5577fe6060f1SDimitry Andric     Src1Regs.push_back(Src1Left[I]);
5578fe6060f1SDimitry Andric     Src2Regs.push_back(Src2Left[I]);
5579fe6060f1SDimitry Andric   }
5580fe6060f1SDimitry Andric   DstRegs.reserve(Src1Regs.size());
5581fe6060f1SDimitry Andric 
5582fe6060f1SDimitry Andric   for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
5583fe6060f1SDimitry Andric     Register DstReg =
5584fe6060f1SDimitry Andric         MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
5585fe6060f1SDimitry Andric     Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
5586fe6060f1SDimitry Andric     // Forward the final carry-out to the destination register
5587fe6060f1SDimitry Andric     if (i == e - 1 && CarryDst)
5588fe6060f1SDimitry Andric       CarryOut = CarryDst;
5589fe6060f1SDimitry Andric 
5590fe6060f1SDimitry Andric     if (!CarryIn) {
5591fe6060f1SDimitry Andric       MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
5592fe6060f1SDimitry Andric                             {Src1Regs[i], Src2Regs[i]});
5593fe6060f1SDimitry Andric     } else if (i == e - 1) {
5594fe6060f1SDimitry Andric       MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
5595fe6060f1SDimitry Andric                             {Src1Regs[i], Src2Regs[i], CarryIn});
5596fe6060f1SDimitry Andric     } else {
5597fe6060f1SDimitry Andric       MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
5598fe6060f1SDimitry Andric                             {Src1Regs[i], Src2Regs[i], CarryIn});
5599fe6060f1SDimitry Andric     }
5600fe6060f1SDimitry Andric 
5601fe6060f1SDimitry Andric     DstRegs.push_back(DstReg);
5602fe6060f1SDimitry Andric     CarryIn = CarryOut;
5603fe6060f1SDimitry Andric   }
5604fe6060f1SDimitry Andric   insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
5605bdd1243dSDimitry Andric               ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
5606bdd1243dSDimitry Andric               ArrayRef(DstRegs).drop_front(NarrowParts));
5607fe6060f1SDimitry Andric 
5608fe6060f1SDimitry Andric   MI.eraseFromParent();
5609fe6060f1SDimitry Andric   return Legalized;
5610fe6060f1SDimitry Andric }
5611fe6060f1SDimitry Andric 
5612fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
56130b57cec5SDimitry Andric LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
561406c3fb27SDimitry Andric   auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
56150b57cec5SDimitry Andric 
56160b57cec5SDimitry Andric   LLT Ty = MRI.getType(DstReg);
56170b57cec5SDimitry Andric   if (Ty.isVector())
56180b57cec5SDimitry Andric     return UnableToLegalize;
56190b57cec5SDimitry Andric 
5620349cc55cSDimitry Andric   unsigned Size = Ty.getSizeInBits();
56210b57cec5SDimitry Andric   unsigned NarrowSize = NarrowTy.getSizeInBits();
5622349cc55cSDimitry Andric   if (Size % NarrowSize != 0)
56230b57cec5SDimitry Andric     return UnableToLegalize;
56240b57cec5SDimitry Andric 
5625349cc55cSDimitry Andric   unsigned NumParts = Size / NarrowSize;
56260b57cec5SDimitry Andric   bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
5627349cc55cSDimitry Andric   unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
56280b57cec5SDimitry Andric 
56295ffd83dbSDimitry Andric   SmallVector<Register, 2> Src1Parts, Src2Parts;
56305ffd83dbSDimitry Andric   SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
56317a6dacacSDimitry Andric   extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
56327a6dacacSDimitry Andric   extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
56330b57cec5SDimitry Andric   multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
56340b57cec5SDimitry Andric 
56350b57cec5SDimitry Andric   // Take only high half of registers if this is high mul.
5636349cc55cSDimitry Andric   ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
5637bdd1243dSDimitry Andric   MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
56380b57cec5SDimitry Andric   MI.eraseFromParent();
56390b57cec5SDimitry Andric   return Legalized;
56400b57cec5SDimitry Andric }
56410b57cec5SDimitry Andric 
56420b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
564323408297SDimitry Andric LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
564423408297SDimitry Andric                                    LLT NarrowTy) {
564523408297SDimitry Andric   if (TypeIdx != 0)
564623408297SDimitry Andric     return UnableToLegalize;
564723408297SDimitry Andric 
564823408297SDimitry Andric   bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
564923408297SDimitry Andric 
565023408297SDimitry Andric   Register Src = MI.getOperand(1).getReg();
565123408297SDimitry Andric   LLT SrcTy = MRI.getType(Src);
565223408297SDimitry Andric 
565323408297SDimitry Andric   // If all finite floats fit into the narrowed integer type, we can just swap
565423408297SDimitry Andric   // out the result type. This is practically only useful for conversions from
565523408297SDimitry Andric   // half to at least 16-bits, so just handle the one case.
565623408297SDimitry Andric   if (SrcTy.getScalarType() != LLT::scalar(16) ||
5657fe6060f1SDimitry Andric       NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
565823408297SDimitry Andric     return UnableToLegalize;
565923408297SDimitry Andric 
566023408297SDimitry Andric   Observer.changingInstr(MI);
566123408297SDimitry Andric   narrowScalarDst(MI, NarrowTy, 0,
566223408297SDimitry Andric                   IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
566323408297SDimitry Andric   Observer.changedInstr(MI);
566423408297SDimitry Andric   return Legalized;
566523408297SDimitry Andric }
566623408297SDimitry Andric 
566723408297SDimitry Andric LegalizerHelper::LegalizeResult
56680b57cec5SDimitry Andric LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
56690b57cec5SDimitry Andric                                      LLT NarrowTy) {
56700b57cec5SDimitry Andric   if (TypeIdx != 1)
56710b57cec5SDimitry Andric     return UnableToLegalize;
56720b57cec5SDimitry Andric 
56730b57cec5SDimitry Andric   uint64_t NarrowSize = NarrowTy.getSizeInBits();
56740b57cec5SDimitry Andric 
56750b57cec5SDimitry Andric   int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
56760b57cec5SDimitry Andric   // FIXME: add support for when SizeOp1 isn't an exact multiple of
56770b57cec5SDimitry Andric   // NarrowSize.
56780b57cec5SDimitry Andric   if (SizeOp1 % NarrowSize != 0)
56790b57cec5SDimitry Andric     return UnableToLegalize;
56800b57cec5SDimitry Andric   int NumParts = SizeOp1 / NarrowSize;
56810b57cec5SDimitry Andric 
56820b57cec5SDimitry Andric   SmallVector<Register, 2> SrcRegs, DstRegs;
56830b57cec5SDimitry Andric   SmallVector<uint64_t, 2> Indexes;
56847a6dacacSDimitry Andric   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
56857a6dacacSDimitry Andric                MIRBuilder, MRI);
56860b57cec5SDimitry Andric 
56870b57cec5SDimitry Andric   Register OpReg = MI.getOperand(0).getReg();
56880b57cec5SDimitry Andric   uint64_t OpStart = MI.getOperand(2).getImm();
56890b57cec5SDimitry Andric   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
56900b57cec5SDimitry Andric   for (int i = 0; i < NumParts; ++i) {
56910b57cec5SDimitry Andric     unsigned SrcStart = i * NarrowSize;
56920b57cec5SDimitry Andric 
56930b57cec5SDimitry Andric     if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
56940b57cec5SDimitry Andric       // No part of the extract uses this subregister, ignore it.
56950b57cec5SDimitry Andric       continue;
56960b57cec5SDimitry Andric     } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
56970b57cec5SDimitry Andric       // The entire subregister is extracted, forward the value.
56980b57cec5SDimitry Andric       DstRegs.push_back(SrcRegs[i]);
56990b57cec5SDimitry Andric       continue;
57000b57cec5SDimitry Andric     }
57010b57cec5SDimitry Andric 
57020b57cec5SDimitry Andric     // OpSegStart is where this destination segment would start in OpReg if it
57030b57cec5SDimitry Andric     // extended infinitely in both directions.
57040b57cec5SDimitry Andric     int64_t ExtractOffset;
57050b57cec5SDimitry Andric     uint64_t SegSize;
57060b57cec5SDimitry Andric     if (OpStart < SrcStart) {
57070b57cec5SDimitry Andric       ExtractOffset = 0;
57080b57cec5SDimitry Andric       SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
57090b57cec5SDimitry Andric     } else {
57100b57cec5SDimitry Andric       ExtractOffset = OpStart - SrcStart;
57110b57cec5SDimitry Andric       SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
57120b57cec5SDimitry Andric     }
57130b57cec5SDimitry Andric 
57140b57cec5SDimitry Andric     Register SegReg = SrcRegs[i];
57150b57cec5SDimitry Andric     if (ExtractOffset != 0 || SegSize != NarrowSize) {
57160b57cec5SDimitry Andric       // A genuine extract is needed.
57170b57cec5SDimitry Andric       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
57180b57cec5SDimitry Andric       MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
57190b57cec5SDimitry Andric     }
57200b57cec5SDimitry Andric 
57210b57cec5SDimitry Andric     DstRegs.push_back(SegReg);
57220b57cec5SDimitry Andric   }
57230b57cec5SDimitry Andric 
57240b57cec5SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
57250b57cec5SDimitry Andric   if (MRI.getType(DstReg).isVector())
57260b57cec5SDimitry Andric     MIRBuilder.buildBuildVector(DstReg, DstRegs);
57275ffd83dbSDimitry Andric   else if (DstRegs.size() > 1)
5728bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
57295ffd83dbSDimitry Andric   else
57305ffd83dbSDimitry Andric     MIRBuilder.buildCopy(DstReg, DstRegs[0]);
57310b57cec5SDimitry Andric   MI.eraseFromParent();
57320b57cec5SDimitry Andric   return Legalized;
57330b57cec5SDimitry Andric }
57340b57cec5SDimitry Andric 
57350b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
57360b57cec5SDimitry Andric LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
57370b57cec5SDimitry Andric                                     LLT NarrowTy) {
57380b57cec5SDimitry Andric   // FIXME: Don't know how to handle secondary types yet.
57390b57cec5SDimitry Andric   if (TypeIdx != 0)
57400b57cec5SDimitry Andric     return UnableToLegalize;
57410b57cec5SDimitry Andric 
5742fe6060f1SDimitry Andric   SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
57430b57cec5SDimitry Andric   SmallVector<uint64_t, 2> Indexes;
5744fe6060f1SDimitry Andric   LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
5745fe6060f1SDimitry Andric   LLT LeftoverTy;
5746fe6060f1SDimitry Andric   extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
57477a6dacacSDimitry Andric                LeftoverRegs, MIRBuilder, MRI);
57480b57cec5SDimitry Andric 
5749fe6060f1SDimitry Andric   for (Register Reg : LeftoverRegs)
5750fe6060f1SDimitry Andric     SrcRegs.push_back(Reg);
5751fe6060f1SDimitry Andric 
5752fe6060f1SDimitry Andric   uint64_t NarrowSize = NarrowTy.getSizeInBits();
57530b57cec5SDimitry Andric   Register OpReg = MI.getOperand(2).getReg();
57540b57cec5SDimitry Andric   uint64_t OpStart = MI.getOperand(3).getImm();
57550b57cec5SDimitry Andric   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
5756fe6060f1SDimitry Andric   for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
5757fe6060f1SDimitry Andric     unsigned DstStart = I * NarrowSize;
57580b57cec5SDimitry Andric 
5759fe6060f1SDimitry Andric     if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
57600b57cec5SDimitry Andric       // The entire subregister is defined by this insert, forward the new
57610b57cec5SDimitry Andric       // value.
57620b57cec5SDimitry Andric       DstRegs.push_back(OpReg);
57630b57cec5SDimitry Andric       continue;
57640b57cec5SDimitry Andric     }
57650b57cec5SDimitry Andric 
5766fe6060f1SDimitry Andric     Register SrcReg = SrcRegs[I];
5767fe6060f1SDimitry Andric     if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
5768fe6060f1SDimitry Andric       // The leftover reg is smaller than NarrowTy, so we need to extend it.
5769fe6060f1SDimitry Andric       SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
5770fe6060f1SDimitry Andric       MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
5771fe6060f1SDimitry Andric     }
5772fe6060f1SDimitry Andric 
5773fe6060f1SDimitry Andric     if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
5774fe6060f1SDimitry Andric       // No part of the insert affects this subregister, forward the original.
5775fe6060f1SDimitry Andric       DstRegs.push_back(SrcReg);
5776fe6060f1SDimitry Andric       continue;
5777fe6060f1SDimitry Andric     }
5778fe6060f1SDimitry Andric 
57790b57cec5SDimitry Andric     // OpSegStart is where this destination segment would start in OpReg if it
57800b57cec5SDimitry Andric     // extended infinitely in both directions.
57810b57cec5SDimitry Andric     int64_t ExtractOffset, InsertOffset;
57820b57cec5SDimitry Andric     uint64_t SegSize;
57830b57cec5SDimitry Andric     if (OpStart < DstStart) {
57840b57cec5SDimitry Andric       InsertOffset = 0;
57850b57cec5SDimitry Andric       ExtractOffset = DstStart - OpStart;
57860b57cec5SDimitry Andric       SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
57870b57cec5SDimitry Andric     } else {
57880b57cec5SDimitry Andric       InsertOffset = OpStart - DstStart;
57890b57cec5SDimitry Andric       ExtractOffset = 0;
57900b57cec5SDimitry Andric       SegSize =
57910b57cec5SDimitry Andric         std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
57920b57cec5SDimitry Andric     }
57930b57cec5SDimitry Andric 
57940b57cec5SDimitry Andric     Register SegReg = OpReg;
57950b57cec5SDimitry Andric     if (ExtractOffset != 0 || SegSize != OpSize) {
57960b57cec5SDimitry Andric       // A genuine extract is needed.
57970b57cec5SDimitry Andric       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
57980b57cec5SDimitry Andric       MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
57990b57cec5SDimitry Andric     }
58000b57cec5SDimitry Andric 
58010b57cec5SDimitry Andric     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
5802fe6060f1SDimitry Andric     MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
58030b57cec5SDimitry Andric     DstRegs.push_back(DstReg);
58040b57cec5SDimitry Andric   }
58050b57cec5SDimitry Andric 
5806fe6060f1SDimitry Andric   uint64_t WideSize = DstRegs.size() * NarrowSize;
58070b57cec5SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
5808fe6060f1SDimitry Andric   if (WideSize > RegTy.getSizeInBits()) {
5809fe6060f1SDimitry Andric     Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
5810bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
5811fe6060f1SDimitry Andric     MIRBuilder.buildTrunc(DstReg, MergeReg);
5812fe6060f1SDimitry Andric   } else
5813bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
5814fe6060f1SDimitry Andric 
58150b57cec5SDimitry Andric   MI.eraseFromParent();
58160b57cec5SDimitry Andric   return Legalized;
58170b57cec5SDimitry Andric }
58180b57cec5SDimitry Andric 
58190b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
58200b57cec5SDimitry Andric LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
58210b57cec5SDimitry Andric                                    LLT NarrowTy) {
58220b57cec5SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
58230b57cec5SDimitry Andric   LLT DstTy = MRI.getType(DstReg);
58240b57cec5SDimitry Andric 
58250b57cec5SDimitry Andric   assert(MI.getNumOperands() == 3 && TypeIdx == 0);
58260b57cec5SDimitry Andric 
58270b57cec5SDimitry Andric   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
58280b57cec5SDimitry Andric   SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
58290b57cec5SDimitry Andric   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
58300b57cec5SDimitry Andric   LLT LeftoverTy;
58310b57cec5SDimitry Andric   if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
58327a6dacacSDimitry Andric                     Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
58330b57cec5SDimitry Andric     return UnableToLegalize;
58340b57cec5SDimitry Andric 
58350b57cec5SDimitry Andric   LLT Unused;
58360b57cec5SDimitry Andric   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
58377a6dacacSDimitry Andric                     Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
58380b57cec5SDimitry Andric     llvm_unreachable("inconsistent extractParts result");
58390b57cec5SDimitry Andric 
58400b57cec5SDimitry Andric   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
58410b57cec5SDimitry Andric     auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
58420b57cec5SDimitry Andric                                         {Src0Regs[I], Src1Regs[I]});
58435ffd83dbSDimitry Andric     DstRegs.push_back(Inst.getReg(0));
58440b57cec5SDimitry Andric   }
58450b57cec5SDimitry Andric 
58460b57cec5SDimitry Andric   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
58470b57cec5SDimitry Andric     auto Inst = MIRBuilder.buildInstr(
58480b57cec5SDimitry Andric       MI.getOpcode(),
58490b57cec5SDimitry Andric       {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
58505ffd83dbSDimitry Andric     DstLeftoverRegs.push_back(Inst.getReg(0));
58510b57cec5SDimitry Andric   }
58520b57cec5SDimitry Andric 
58530b57cec5SDimitry Andric   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
58540b57cec5SDimitry Andric               LeftoverTy, DstLeftoverRegs);
58550b57cec5SDimitry Andric 
58560b57cec5SDimitry Andric   MI.eraseFromParent();
58570b57cec5SDimitry Andric   return Legalized;
58580b57cec5SDimitry Andric }
58590b57cec5SDimitry Andric 
58600b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
58615ffd83dbSDimitry Andric LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
58625ffd83dbSDimitry Andric                                  LLT NarrowTy) {
58635ffd83dbSDimitry Andric   if (TypeIdx != 0)
58645ffd83dbSDimitry Andric     return UnableToLegalize;
58655ffd83dbSDimitry Andric 
586606c3fb27SDimitry Andric   auto [DstReg, SrcReg] = MI.getFirst2Regs();
58675ffd83dbSDimitry Andric 
58685ffd83dbSDimitry Andric   LLT DstTy = MRI.getType(DstReg);
58695ffd83dbSDimitry Andric   if (DstTy.isVector())
58705ffd83dbSDimitry Andric     return UnableToLegalize;
58715ffd83dbSDimitry Andric 
58725ffd83dbSDimitry Andric   SmallVector<Register, 8> Parts;
58735ffd83dbSDimitry Andric   LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
58745ffd83dbSDimitry Andric   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
58755ffd83dbSDimitry Andric   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
58765ffd83dbSDimitry Andric 
58775ffd83dbSDimitry Andric   MI.eraseFromParent();
58785ffd83dbSDimitry Andric   return Legalized;
58795ffd83dbSDimitry Andric }
58805ffd83dbSDimitry Andric 
58815ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
58820b57cec5SDimitry Andric LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
58830b57cec5SDimitry Andric                                     LLT NarrowTy) {
58840b57cec5SDimitry Andric   if (TypeIdx != 0)
58850b57cec5SDimitry Andric     return UnableToLegalize;
58860b57cec5SDimitry Andric 
58870b57cec5SDimitry Andric   Register CondReg = MI.getOperand(1).getReg();
58880b57cec5SDimitry Andric   LLT CondTy = MRI.getType(CondReg);
58890b57cec5SDimitry Andric   if (CondTy.isVector()) // TODO: Handle vselect
58900b57cec5SDimitry Andric     return UnableToLegalize;
58910b57cec5SDimitry Andric 
58920b57cec5SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
58930b57cec5SDimitry Andric   LLT DstTy = MRI.getType(DstReg);
58940b57cec5SDimitry Andric 
58950b57cec5SDimitry Andric   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
58960b57cec5SDimitry Andric   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
58970b57cec5SDimitry Andric   SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
58980b57cec5SDimitry Andric   LLT LeftoverTy;
58990b57cec5SDimitry Andric   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
59007a6dacacSDimitry Andric                     Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
59010b57cec5SDimitry Andric     return UnableToLegalize;
59020b57cec5SDimitry Andric 
59030b57cec5SDimitry Andric   LLT Unused;
59040b57cec5SDimitry Andric   if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
59057a6dacacSDimitry Andric                     Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
59060b57cec5SDimitry Andric     llvm_unreachable("inconsistent extractParts result");
59070b57cec5SDimitry Andric 
59080b57cec5SDimitry Andric   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
59090b57cec5SDimitry Andric     auto Select = MIRBuilder.buildSelect(NarrowTy,
59100b57cec5SDimitry Andric                                          CondReg, Src1Regs[I], Src2Regs[I]);
59115ffd83dbSDimitry Andric     DstRegs.push_back(Select.getReg(0));
59120b57cec5SDimitry Andric   }
59130b57cec5SDimitry Andric 
59140b57cec5SDimitry Andric   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
59150b57cec5SDimitry Andric     auto Select = MIRBuilder.buildSelect(
59160b57cec5SDimitry Andric       LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
59175ffd83dbSDimitry Andric     DstLeftoverRegs.push_back(Select.getReg(0));
59180b57cec5SDimitry Andric   }
59190b57cec5SDimitry Andric 
59200b57cec5SDimitry Andric   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
59210b57cec5SDimitry Andric               LeftoverTy, DstLeftoverRegs);
59220b57cec5SDimitry Andric 
59230b57cec5SDimitry Andric   MI.eraseFromParent();
59240b57cec5SDimitry Andric   return Legalized;
59250b57cec5SDimitry Andric }
59260b57cec5SDimitry Andric 
59270b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
59285ffd83dbSDimitry Andric LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
59295ffd83dbSDimitry Andric                                   LLT NarrowTy) {
59305ffd83dbSDimitry Andric   if (TypeIdx != 1)
59315ffd83dbSDimitry Andric     return UnableToLegalize;
59325ffd83dbSDimitry Andric 
593306c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
59345ffd83dbSDimitry Andric   unsigned NarrowSize = NarrowTy.getSizeInBits();
59355ffd83dbSDimitry Andric 
59365ffd83dbSDimitry Andric   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
59375ffd83dbSDimitry Andric     const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
59385ffd83dbSDimitry Andric 
59395ffd83dbSDimitry Andric     MachineIRBuilder &B = MIRBuilder;
59405ffd83dbSDimitry Andric     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
59415ffd83dbSDimitry Andric     // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
59425ffd83dbSDimitry Andric     auto C_0 = B.buildConstant(NarrowTy, 0);
59435ffd83dbSDimitry Andric     auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
59445ffd83dbSDimitry Andric                                 UnmergeSrc.getReg(1), C_0);
59455ffd83dbSDimitry Andric     auto LoCTLZ = IsUndef ?
59465ffd83dbSDimitry Andric       B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
59475ffd83dbSDimitry Andric       B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
59485ffd83dbSDimitry Andric     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
59495ffd83dbSDimitry Andric     auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
59505ffd83dbSDimitry Andric     auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
59515ffd83dbSDimitry Andric     B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
59525ffd83dbSDimitry Andric 
59535ffd83dbSDimitry Andric     MI.eraseFromParent();
59545ffd83dbSDimitry Andric     return Legalized;
59555ffd83dbSDimitry Andric   }
59565ffd83dbSDimitry Andric 
59575ffd83dbSDimitry Andric   return UnableToLegalize;
59585ffd83dbSDimitry Andric }
59595ffd83dbSDimitry Andric 
59605ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
59615ffd83dbSDimitry Andric LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
59625ffd83dbSDimitry Andric                                   LLT NarrowTy) {
59635ffd83dbSDimitry Andric   if (TypeIdx != 1)
59645ffd83dbSDimitry Andric     return UnableToLegalize;
59655ffd83dbSDimitry Andric 
596606c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
59675ffd83dbSDimitry Andric   unsigned NarrowSize = NarrowTy.getSizeInBits();
59685ffd83dbSDimitry Andric 
59695ffd83dbSDimitry Andric   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
59705ffd83dbSDimitry Andric     const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
59715ffd83dbSDimitry Andric 
59725ffd83dbSDimitry Andric     MachineIRBuilder &B = MIRBuilder;
59735ffd83dbSDimitry Andric     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
59745ffd83dbSDimitry Andric     // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
59755ffd83dbSDimitry Andric     auto C_0 = B.buildConstant(NarrowTy, 0);
59765ffd83dbSDimitry Andric     auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
59775ffd83dbSDimitry Andric                                 UnmergeSrc.getReg(0), C_0);
59785ffd83dbSDimitry Andric     auto HiCTTZ = IsUndef ?
59795ffd83dbSDimitry Andric       B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
59805ffd83dbSDimitry Andric       B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
59815ffd83dbSDimitry Andric     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
59825ffd83dbSDimitry Andric     auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
59835ffd83dbSDimitry Andric     auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
59845ffd83dbSDimitry Andric     B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
59855ffd83dbSDimitry Andric 
59865ffd83dbSDimitry Andric     MI.eraseFromParent();
59875ffd83dbSDimitry Andric     return Legalized;
59885ffd83dbSDimitry Andric   }
59895ffd83dbSDimitry Andric 
59905ffd83dbSDimitry Andric   return UnableToLegalize;
59915ffd83dbSDimitry Andric }
59925ffd83dbSDimitry Andric 
59935ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
59945ffd83dbSDimitry Andric LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
59955ffd83dbSDimitry Andric                                    LLT NarrowTy) {
59965ffd83dbSDimitry Andric   if (TypeIdx != 1)
59975ffd83dbSDimitry Andric     return UnableToLegalize;
59985ffd83dbSDimitry Andric 
599906c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
60005ffd83dbSDimitry Andric   unsigned NarrowSize = NarrowTy.getSizeInBits();
60015ffd83dbSDimitry Andric 
60025ffd83dbSDimitry Andric   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
60035ffd83dbSDimitry Andric     auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
60045ffd83dbSDimitry Andric 
60055ffd83dbSDimitry Andric     auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
60065ffd83dbSDimitry Andric     auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
60075ffd83dbSDimitry Andric     MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
60085ffd83dbSDimitry Andric 
60095ffd83dbSDimitry Andric     MI.eraseFromParent();
60105ffd83dbSDimitry Andric     return Legalized;
60115ffd83dbSDimitry Andric   }
60125ffd83dbSDimitry Andric 
60135ffd83dbSDimitry Andric   return UnableToLegalize;
60145ffd83dbSDimitry Andric }
60155ffd83dbSDimitry Andric 
60165ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
601706c3fb27SDimitry Andric LegalizerHelper::narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx,
601806c3fb27SDimitry Andric                                     LLT NarrowTy) {
601906c3fb27SDimitry Andric   if (TypeIdx != 1)
602006c3fb27SDimitry Andric     return UnableToLegalize;
602106c3fb27SDimitry Andric 
602206c3fb27SDimitry Andric   MachineIRBuilder &B = MIRBuilder;
602306c3fb27SDimitry Andric   Register ExpReg = MI.getOperand(2).getReg();
602406c3fb27SDimitry Andric   LLT ExpTy = MRI.getType(ExpReg);
602506c3fb27SDimitry Andric 
602606c3fb27SDimitry Andric   unsigned ClampSize = NarrowTy.getScalarSizeInBits();
602706c3fb27SDimitry Andric 
602806c3fb27SDimitry Andric   // Clamp the exponent to the range of the target type.
602906c3fb27SDimitry Andric   auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
603006c3fb27SDimitry Andric   auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
603106c3fb27SDimitry Andric   auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
603206c3fb27SDimitry Andric   auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
603306c3fb27SDimitry Andric 
603406c3fb27SDimitry Andric   auto Trunc = B.buildTrunc(NarrowTy, Clamp);
603506c3fb27SDimitry Andric   Observer.changingInstr(MI);
603606c3fb27SDimitry Andric   MI.getOperand(2).setReg(Trunc.getReg(0));
603706c3fb27SDimitry Andric   Observer.changedInstr(MI);
603806c3fb27SDimitry Andric   return Legalized;
603906c3fb27SDimitry Andric }
604006c3fb27SDimitry Andric 
604106c3fb27SDimitry Andric LegalizerHelper::LegalizeResult
6042e8d8bef9SDimitry Andric LegalizerHelper::lowerBitCount(MachineInstr &MI) {
60430b57cec5SDimitry Andric   unsigned Opc = MI.getOpcode();
6044e8d8bef9SDimitry Andric   const auto &TII = MIRBuilder.getTII();
60450b57cec5SDimitry Andric   auto isSupported = [this](const LegalityQuery &Q) {
60460b57cec5SDimitry Andric     auto QAction = LI.getAction(Q).Action;
60470b57cec5SDimitry Andric     return QAction == Legal || QAction == Libcall || QAction == Custom;
60480b57cec5SDimitry Andric   };
60490b57cec5SDimitry Andric   switch (Opc) {
60500b57cec5SDimitry Andric   default:
60510b57cec5SDimitry Andric     return UnableToLegalize;
60520b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
60530b57cec5SDimitry Andric     // This trivially expands to CTLZ.
60540b57cec5SDimitry Andric     Observer.changingInstr(MI);
60550b57cec5SDimitry Andric     MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
60560b57cec5SDimitry Andric     Observer.changedInstr(MI);
60570b57cec5SDimitry Andric     return Legalized;
60580b57cec5SDimitry Andric   }
60590b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ: {
606006c3fb27SDimitry Andric     auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
60615ffd83dbSDimitry Andric     unsigned Len = SrcTy.getSizeInBits();
60625ffd83dbSDimitry Andric 
60635ffd83dbSDimitry Andric     if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
60640b57cec5SDimitry Andric       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
60655ffd83dbSDimitry Andric       auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
60665ffd83dbSDimitry Andric       auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
60675ffd83dbSDimitry Andric       auto ICmp = MIRBuilder.buildICmp(
60685ffd83dbSDimitry Andric           CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
60695ffd83dbSDimitry Andric       auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
60705ffd83dbSDimitry Andric       MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
60710b57cec5SDimitry Andric       MI.eraseFromParent();
60720b57cec5SDimitry Andric       return Legalized;
60730b57cec5SDimitry Andric     }
60740b57cec5SDimitry Andric     // for now, we do this:
60750b57cec5SDimitry Andric     // NewLen = NextPowerOf2(Len);
60760b57cec5SDimitry Andric     // x = x | (x >> 1);
60770b57cec5SDimitry Andric     // x = x | (x >> 2);
60780b57cec5SDimitry Andric     // ...
60790b57cec5SDimitry Andric     // x = x | (x >>16);
60800b57cec5SDimitry Andric     // x = x | (x >>32); // for 64-bit input
60810b57cec5SDimitry Andric     // Upto NewLen/2
60820b57cec5SDimitry Andric     // return Len - popcount(x);
60830b57cec5SDimitry Andric     //
60840b57cec5SDimitry Andric     // Ref: "Hacker's Delight" by Henry Warren
60850b57cec5SDimitry Andric     Register Op = SrcReg;
60860b57cec5SDimitry Andric     unsigned NewLen = PowerOf2Ceil(Len);
60870b57cec5SDimitry Andric     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
60885ffd83dbSDimitry Andric       auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
60895ffd83dbSDimitry Andric       auto MIBOp = MIRBuilder.buildOr(
60905ffd83dbSDimitry Andric           SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
60915ffd83dbSDimitry Andric       Op = MIBOp.getReg(0);
60920b57cec5SDimitry Andric     }
60935ffd83dbSDimitry Andric     auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
60945ffd83dbSDimitry Andric     MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
60955ffd83dbSDimitry Andric                         MIBPop);
60960b57cec5SDimitry Andric     MI.eraseFromParent();
60970b57cec5SDimitry Andric     return Legalized;
60980b57cec5SDimitry Andric   }
60990b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
61000b57cec5SDimitry Andric     // This trivially expands to CTTZ.
61010b57cec5SDimitry Andric     Observer.changingInstr(MI);
61020b57cec5SDimitry Andric     MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
61030b57cec5SDimitry Andric     Observer.changedInstr(MI);
61040b57cec5SDimitry Andric     return Legalized;
61050b57cec5SDimitry Andric   }
61060b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ: {
610706c3fb27SDimitry Andric     auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
61085ffd83dbSDimitry Andric 
61095ffd83dbSDimitry Andric     unsigned Len = SrcTy.getSizeInBits();
61105ffd83dbSDimitry Andric     if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
61110b57cec5SDimitry Andric       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
61120b57cec5SDimitry Andric       // zero.
61135ffd83dbSDimitry Andric       auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
61145ffd83dbSDimitry Andric       auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
61155ffd83dbSDimitry Andric       auto ICmp = MIRBuilder.buildICmp(
61165ffd83dbSDimitry Andric           CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
61175ffd83dbSDimitry Andric       auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
61185ffd83dbSDimitry Andric       MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
61190b57cec5SDimitry Andric       MI.eraseFromParent();
61200b57cec5SDimitry Andric       return Legalized;
61210b57cec5SDimitry Andric     }
61220b57cec5SDimitry Andric     // for now, we use: { return popcount(~x & (x - 1)); }
61230b57cec5SDimitry Andric     // unless the target has ctlz but not ctpop, in which case we use:
61240b57cec5SDimitry Andric     // { return 32 - nlz(~x & (x-1)); }
61250b57cec5SDimitry Andric     // Ref: "Hacker's Delight" by Henry Warren
6126e8d8bef9SDimitry Andric     auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
6127e8d8bef9SDimitry Andric     auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
61285ffd83dbSDimitry Andric     auto MIBTmp = MIRBuilder.buildAnd(
6129e8d8bef9SDimitry Andric         SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
6130e8d8bef9SDimitry Andric     if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
6131e8d8bef9SDimitry Andric         isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
6132e8d8bef9SDimitry Andric       auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
61335ffd83dbSDimitry Andric       MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
6134e8d8bef9SDimitry Andric                           MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
61350b57cec5SDimitry Andric       MI.eraseFromParent();
61360b57cec5SDimitry Andric       return Legalized;
61370b57cec5SDimitry Andric     }
61385f757f3fSDimitry Andric     Observer.changingInstr(MI);
61390b57cec5SDimitry Andric     MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
61405ffd83dbSDimitry Andric     MI.getOperand(1).setReg(MIBTmp.getReg(0));
61415f757f3fSDimitry Andric     Observer.changedInstr(MI);
61425ffd83dbSDimitry Andric     return Legalized;
61435ffd83dbSDimitry Andric   }
61445ffd83dbSDimitry Andric   case TargetOpcode::G_CTPOP: {
6145e8d8bef9SDimitry Andric     Register SrcReg = MI.getOperand(1).getReg();
6146e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(SrcReg);
61475ffd83dbSDimitry Andric     unsigned Size = Ty.getSizeInBits();
61485ffd83dbSDimitry Andric     MachineIRBuilder &B = MIRBuilder;
61495ffd83dbSDimitry Andric 
61505ffd83dbSDimitry Andric     // Count set bits in blocks of 2 bits. Default approach would be
61515ffd83dbSDimitry Andric     // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
61525ffd83dbSDimitry Andric     // We use following formula instead:
61535ffd83dbSDimitry Andric     // B2Count = val - { (val >> 1) & 0x55555555 }
61545ffd83dbSDimitry Andric     // since it gives same result in blocks of 2 with one instruction less.
61555ffd83dbSDimitry Andric     auto C_1 = B.buildConstant(Ty, 1);
6156e8d8bef9SDimitry Andric     auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
61575ffd83dbSDimitry Andric     APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
61585ffd83dbSDimitry Andric     auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
61595ffd83dbSDimitry Andric     auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
6160e8d8bef9SDimitry Andric     auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
61615ffd83dbSDimitry Andric 
61625ffd83dbSDimitry Andric     // In order to get count in blocks of 4 add values from adjacent block of 2.
61635ffd83dbSDimitry Andric     // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
61645ffd83dbSDimitry Andric     auto C_2 = B.buildConstant(Ty, 2);
61655ffd83dbSDimitry Andric     auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
61665ffd83dbSDimitry Andric     APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
61675ffd83dbSDimitry Andric     auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
61685ffd83dbSDimitry Andric     auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
61695ffd83dbSDimitry Andric     auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
61705ffd83dbSDimitry Andric     auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
61715ffd83dbSDimitry Andric 
61725ffd83dbSDimitry Andric     // For count in blocks of 8 bits we don't have to mask high 4 bits before
61735ffd83dbSDimitry Andric     // addition since count value sits in range {0,...,8} and 4 bits are enough
61745ffd83dbSDimitry Andric     // to hold such binary values. After addition high 4 bits still hold count
61755ffd83dbSDimitry Andric     // of set bits in high 4 bit block, set them to zero and get 8 bit result.
61765ffd83dbSDimitry Andric     // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
61775ffd83dbSDimitry Andric     auto C_4 = B.buildConstant(Ty, 4);
61785ffd83dbSDimitry Andric     auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
61795ffd83dbSDimitry Andric     auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
61805ffd83dbSDimitry Andric     APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
61815ffd83dbSDimitry Andric     auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
61825ffd83dbSDimitry Andric     auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
61835ffd83dbSDimitry Andric 
61845ffd83dbSDimitry Andric     assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
61855ffd83dbSDimitry Andric     // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
61865ffd83dbSDimitry Andric     // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
61875ffd83dbSDimitry Andric     auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
61885ffd83dbSDimitry Andric     auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
61895ffd83dbSDimitry Andric 
61905ffd83dbSDimitry Andric     // Shift count result from 8 high bits to low bits.
61915ffd83dbSDimitry Andric     auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
61925ffd83dbSDimitry Andric     B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
61935ffd83dbSDimitry Andric 
61945ffd83dbSDimitry Andric     MI.eraseFromParent();
61950b57cec5SDimitry Andric     return Legalized;
61960b57cec5SDimitry Andric   }
61970b57cec5SDimitry Andric   }
61980b57cec5SDimitry Andric }
61990b57cec5SDimitry Andric 
6200fe6060f1SDimitry Andric // Check that (every element of) Reg is undef or not an exact multiple of BW.
6201fe6060f1SDimitry Andric static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
6202fe6060f1SDimitry Andric                                         Register Reg, unsigned BW) {
6203fe6060f1SDimitry Andric   return matchUnaryPredicate(
6204fe6060f1SDimitry Andric       MRI, Reg,
6205fe6060f1SDimitry Andric       [=](const Constant *C) {
6206fe6060f1SDimitry Andric         // Null constant here means an undef.
6207fe6060f1SDimitry Andric         const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
6208fe6060f1SDimitry Andric         return !CI || CI->getValue().urem(BW) != 0;
6209fe6060f1SDimitry Andric       },
6210fe6060f1SDimitry Andric       /*AllowUndefs*/ true);
6211fe6060f1SDimitry Andric }
6212fe6060f1SDimitry Andric 
6213fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
6214fe6060f1SDimitry Andric LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
621506c3fb27SDimitry Andric   auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6216fe6060f1SDimitry Andric   LLT Ty = MRI.getType(Dst);
6217fe6060f1SDimitry Andric   LLT ShTy = MRI.getType(Z);
6218fe6060f1SDimitry Andric 
6219fe6060f1SDimitry Andric   unsigned BW = Ty.getScalarSizeInBits();
6220fe6060f1SDimitry Andric 
6221fe6060f1SDimitry Andric   if (!isPowerOf2_32(BW))
6222fe6060f1SDimitry Andric     return UnableToLegalize;
6223fe6060f1SDimitry Andric 
6224fe6060f1SDimitry Andric   const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6225fe6060f1SDimitry Andric   unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6226fe6060f1SDimitry Andric 
6227fe6060f1SDimitry Andric   if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6228fe6060f1SDimitry Andric     // fshl X, Y, Z -> fshr X, Y, -Z
6229fe6060f1SDimitry Andric     // fshr X, Y, Z -> fshl X, Y, -Z
6230fe6060f1SDimitry Andric     auto Zero = MIRBuilder.buildConstant(ShTy, 0);
6231fe6060f1SDimitry Andric     Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
6232fe6060f1SDimitry Andric   } else {
6233fe6060f1SDimitry Andric     // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
6234fe6060f1SDimitry Andric     // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
6235fe6060f1SDimitry Andric     auto One = MIRBuilder.buildConstant(ShTy, 1);
6236fe6060f1SDimitry Andric     if (IsFSHL) {
6237fe6060f1SDimitry Andric       Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6238fe6060f1SDimitry Andric       X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
6239fe6060f1SDimitry Andric     } else {
6240fe6060f1SDimitry Andric       X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6241fe6060f1SDimitry Andric       Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
6242fe6060f1SDimitry Andric     }
6243fe6060f1SDimitry Andric 
6244fe6060f1SDimitry Andric     Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
6245fe6060f1SDimitry Andric   }
6246fe6060f1SDimitry Andric 
6247fe6060f1SDimitry Andric   MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
6248fe6060f1SDimitry Andric   MI.eraseFromParent();
6249fe6060f1SDimitry Andric   return Legalized;
6250fe6060f1SDimitry Andric }
6251fe6060f1SDimitry Andric 
6252fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
6253fe6060f1SDimitry Andric LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
625406c3fb27SDimitry Andric   auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6255fe6060f1SDimitry Andric   LLT Ty = MRI.getType(Dst);
6256fe6060f1SDimitry Andric   LLT ShTy = MRI.getType(Z);
6257fe6060f1SDimitry Andric 
6258fe6060f1SDimitry Andric   const unsigned BW = Ty.getScalarSizeInBits();
6259fe6060f1SDimitry Andric   const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6260fe6060f1SDimitry Andric 
6261fe6060f1SDimitry Andric   Register ShX, ShY;
6262fe6060f1SDimitry Andric   Register ShAmt, InvShAmt;
6263fe6060f1SDimitry Andric 
6264fe6060f1SDimitry Andric   // FIXME: Emit optimized urem by constant instead of letting it expand later.
6265fe6060f1SDimitry Andric   if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6266fe6060f1SDimitry Andric     // fshl: X << C | Y >> (BW - C)
6267fe6060f1SDimitry Andric     // fshr: X << (BW - C) | Y >> C
6268fe6060f1SDimitry Andric     // where C = Z % BW is not zero
6269fe6060f1SDimitry Andric     auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6270fe6060f1SDimitry Andric     ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6271fe6060f1SDimitry Andric     InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
6272fe6060f1SDimitry Andric     ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
6273fe6060f1SDimitry Andric     ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
6274fe6060f1SDimitry Andric   } else {
6275fe6060f1SDimitry Andric     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
6276fe6060f1SDimitry Andric     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
6277fe6060f1SDimitry Andric     auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
6278fe6060f1SDimitry Andric     if (isPowerOf2_32(BW)) {
6279fe6060f1SDimitry Andric       // Z % BW -> Z & (BW - 1)
6280fe6060f1SDimitry Andric       ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
6281fe6060f1SDimitry Andric       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
6282fe6060f1SDimitry Andric       auto NotZ = MIRBuilder.buildNot(ShTy, Z);
6283fe6060f1SDimitry Andric       InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
6284fe6060f1SDimitry Andric     } else {
6285fe6060f1SDimitry Andric       auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6286fe6060f1SDimitry Andric       ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6287fe6060f1SDimitry Andric       InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
6288fe6060f1SDimitry Andric     }
6289fe6060f1SDimitry Andric 
6290fe6060f1SDimitry Andric     auto One = MIRBuilder.buildConstant(ShTy, 1);
6291fe6060f1SDimitry Andric     if (IsFSHL) {
6292fe6060f1SDimitry Andric       ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
6293fe6060f1SDimitry Andric       auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
6294fe6060f1SDimitry Andric       ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
6295fe6060f1SDimitry Andric     } else {
6296fe6060f1SDimitry Andric       auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
6297fe6060f1SDimitry Andric       ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
6298fe6060f1SDimitry Andric       ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
6299fe6060f1SDimitry Andric     }
6300fe6060f1SDimitry Andric   }
6301fe6060f1SDimitry Andric 
6302fe6060f1SDimitry Andric   MIRBuilder.buildOr(Dst, ShX, ShY);
6303fe6060f1SDimitry Andric   MI.eraseFromParent();
6304fe6060f1SDimitry Andric   return Legalized;
6305fe6060f1SDimitry Andric }
6306fe6060f1SDimitry Andric 
6307fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
6308fe6060f1SDimitry Andric LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
6309fe6060f1SDimitry Andric   // These operations approximately do the following (while avoiding undefined
6310fe6060f1SDimitry Andric   // shifts by BW):
6311fe6060f1SDimitry Andric   // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
6312fe6060f1SDimitry Andric   // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
6313fe6060f1SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
6314fe6060f1SDimitry Andric   LLT Ty = MRI.getType(Dst);
6315fe6060f1SDimitry Andric   LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
6316fe6060f1SDimitry Andric 
6317fe6060f1SDimitry Andric   bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6318fe6060f1SDimitry Andric   unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6319fe6060f1SDimitry Andric 
6320fe6060f1SDimitry Andric   // TODO: Use smarter heuristic that accounts for vector legalization.
6321fe6060f1SDimitry Andric   if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
6322fe6060f1SDimitry Andric     return lowerFunnelShiftAsShifts(MI);
6323fe6060f1SDimitry Andric 
6324fe6060f1SDimitry Andric   // This only works for powers of 2, fallback to shifts if it fails.
6325fe6060f1SDimitry Andric   LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
6326fe6060f1SDimitry Andric   if (Result == UnableToLegalize)
6327fe6060f1SDimitry Andric     return lowerFunnelShiftAsShifts(MI);
6328fe6060f1SDimitry Andric   return Result;
6329fe6060f1SDimitry Andric }
6330fe6060f1SDimitry Andric 
63315f757f3fSDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerEXT(MachineInstr &MI) {
63325f757f3fSDimitry Andric   auto [Dst, Src] = MI.getFirst2Regs();
63335f757f3fSDimitry Andric   LLT DstTy = MRI.getType(Dst);
63345f757f3fSDimitry Andric   LLT SrcTy = MRI.getType(Src);
63355f757f3fSDimitry Andric 
63365f757f3fSDimitry Andric   uint32_t DstTySize = DstTy.getSizeInBits();
63375f757f3fSDimitry Andric   uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
63385f757f3fSDimitry Andric   uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
63395f757f3fSDimitry Andric 
63405f757f3fSDimitry Andric   if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
63415f757f3fSDimitry Andric       !isPowerOf2_32(SrcTyScalarSize))
63425f757f3fSDimitry Andric     return UnableToLegalize;
63435f757f3fSDimitry Andric 
63445f757f3fSDimitry Andric   // The step between extend is too large, split it by creating an intermediate
63455f757f3fSDimitry Andric   // extend instruction
63465f757f3fSDimitry Andric   if (SrcTyScalarSize * 2 < DstTyScalarSize) {
63475f757f3fSDimitry Andric     LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
63485f757f3fSDimitry Andric     // If the destination type is illegal, split it into multiple statements
63495f757f3fSDimitry Andric     // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
63505f757f3fSDimitry Andric     auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
63515f757f3fSDimitry Andric     // Unmerge the vector
63525f757f3fSDimitry Andric     LLT EltTy = MidTy.changeElementCount(
63535f757f3fSDimitry Andric         MidTy.getElementCount().divideCoefficientBy(2));
63545f757f3fSDimitry Andric     auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
63555f757f3fSDimitry Andric 
63565f757f3fSDimitry Andric     // ZExt the vectors
63575f757f3fSDimitry Andric     LLT ZExtResTy = DstTy.changeElementCount(
63585f757f3fSDimitry Andric         DstTy.getElementCount().divideCoefficientBy(2));
63595f757f3fSDimitry Andric     auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
63605f757f3fSDimitry Andric                                           {UnmergeSrc.getReg(0)});
63615f757f3fSDimitry Andric     auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
63625f757f3fSDimitry Andric                                           {UnmergeSrc.getReg(1)});
63635f757f3fSDimitry Andric 
63645f757f3fSDimitry Andric     // Merge the ending vectors
63655f757f3fSDimitry Andric     MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
63665f757f3fSDimitry Andric 
63675f757f3fSDimitry Andric     MI.eraseFromParent();
63685f757f3fSDimitry Andric     return Legalized;
63695f757f3fSDimitry Andric   }
63705f757f3fSDimitry Andric   return UnableToLegalize;
63715f757f3fSDimitry Andric }
63725f757f3fSDimitry Andric 
63735f757f3fSDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerTRUNC(MachineInstr &MI) {
63745f757f3fSDimitry Andric   // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
63755f757f3fSDimitry Andric   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
63765f757f3fSDimitry Andric   // Similar to how operand splitting is done in SelectiondDAG, we can handle
63775f757f3fSDimitry Andric   // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
63785f757f3fSDimitry Andric   //   %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
63795f757f3fSDimitry Andric   //   %lo16(<4 x s16>) = G_TRUNC %inlo
63805f757f3fSDimitry Andric   //   %hi16(<4 x s16>) = G_TRUNC %inhi
63815f757f3fSDimitry Andric   //   %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
63825f757f3fSDimitry Andric   //   %res(<8 x s8>) = G_TRUNC %in16
63835f757f3fSDimitry Andric 
63845f757f3fSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
63855f757f3fSDimitry Andric 
63865f757f3fSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
63875f757f3fSDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
63885f757f3fSDimitry Andric   LLT DstTy = MRI.getType(DstReg);
63895f757f3fSDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
63905f757f3fSDimitry Andric 
63915f757f3fSDimitry Andric   if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
63925f757f3fSDimitry Andric       isPowerOf2_32(DstTy.getScalarSizeInBits()) &&
63935f757f3fSDimitry Andric       isPowerOf2_32(SrcTy.getNumElements()) &&
63945f757f3fSDimitry Andric       isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
63955f757f3fSDimitry Andric     // Split input type.
63965f757f3fSDimitry Andric     LLT SplitSrcTy = SrcTy.changeElementCount(
63975f757f3fSDimitry Andric         SrcTy.getElementCount().divideCoefficientBy(2));
63985f757f3fSDimitry Andric 
63995f757f3fSDimitry Andric     // First, split the source into two smaller vectors.
64005f757f3fSDimitry Andric     SmallVector<Register, 2> SplitSrcs;
64017a6dacacSDimitry Andric     extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
64025f757f3fSDimitry Andric 
64035f757f3fSDimitry Andric     // Truncate the splits into intermediate narrower elements.
64045f757f3fSDimitry Andric     LLT InterTy;
64055f757f3fSDimitry Andric     if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
64065f757f3fSDimitry Andric       InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
64075f757f3fSDimitry Andric     else
64085f757f3fSDimitry Andric       InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
64095f757f3fSDimitry Andric     for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
64105f757f3fSDimitry Andric       SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
64115f757f3fSDimitry Andric     }
64125f757f3fSDimitry Andric 
64135f757f3fSDimitry Andric     // Combine the new truncates into one vector
64145f757f3fSDimitry Andric     auto Merge = MIRBuilder.buildMergeLikeInstr(
64155f757f3fSDimitry Andric         DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
64165f757f3fSDimitry Andric 
64175f757f3fSDimitry Andric     // Truncate the new vector to the final result type
64185f757f3fSDimitry Andric     if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
64195f757f3fSDimitry Andric       MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
64205f757f3fSDimitry Andric     else
64215f757f3fSDimitry Andric       MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
64225f757f3fSDimitry Andric 
64235f757f3fSDimitry Andric     MI.eraseFromParent();
64245f757f3fSDimitry Andric 
64255f757f3fSDimitry Andric     return Legalized;
64265f757f3fSDimitry Andric   }
64275f757f3fSDimitry Andric   return UnableToLegalize;
64285f757f3fSDimitry Andric }
64295f757f3fSDimitry Andric 
6430fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
6431fe6060f1SDimitry Andric LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
643206c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6433fe6060f1SDimitry Andric   auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6434fe6060f1SDimitry Andric   bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6435fe6060f1SDimitry Andric   unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6436fe6060f1SDimitry Andric   auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6437fe6060f1SDimitry Andric   MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
6438fe6060f1SDimitry Andric   MI.eraseFromParent();
6439fe6060f1SDimitry Andric   return Legalized;
6440fe6060f1SDimitry Andric }
6441fe6060f1SDimitry Andric 
6442fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
644306c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6444fe6060f1SDimitry Andric 
6445fe6060f1SDimitry Andric   unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
6446fe6060f1SDimitry Andric   bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6447fe6060f1SDimitry Andric 
6448fe6060f1SDimitry Andric   MIRBuilder.setInstrAndDebugLoc(MI);
6449fe6060f1SDimitry Andric 
6450fe6060f1SDimitry Andric   // If a rotate in the other direction is supported, use it.
6451fe6060f1SDimitry Andric   unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6452fe6060f1SDimitry Andric   if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
6453fe6060f1SDimitry Andric       isPowerOf2_32(EltSizeInBits))
6454fe6060f1SDimitry Andric     return lowerRotateWithReverseRotate(MI);
6455fe6060f1SDimitry Andric 
6456349cc55cSDimitry Andric   // If a funnel shift is supported, use it.
6457349cc55cSDimitry Andric   unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6458349cc55cSDimitry Andric   unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6459349cc55cSDimitry Andric   bool IsFShLegal = false;
6460349cc55cSDimitry Andric   if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
6461349cc55cSDimitry Andric       LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
6462349cc55cSDimitry Andric     auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
6463349cc55cSDimitry Andric                                 Register R3) {
6464349cc55cSDimitry Andric       MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
6465349cc55cSDimitry Andric       MI.eraseFromParent();
6466349cc55cSDimitry Andric       return Legalized;
6467349cc55cSDimitry Andric     };
6468349cc55cSDimitry Andric     // If a funnel shift in the other direction is supported, use it.
6469349cc55cSDimitry Andric     if (IsFShLegal) {
6470349cc55cSDimitry Andric       return buildFunnelShift(FShOpc, Dst, Src, Amt);
6471349cc55cSDimitry Andric     } else if (isPowerOf2_32(EltSizeInBits)) {
6472349cc55cSDimitry Andric       Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
6473349cc55cSDimitry Andric       return buildFunnelShift(RevFsh, Dst, Src, Amt);
6474349cc55cSDimitry Andric     }
6475349cc55cSDimitry Andric   }
6476349cc55cSDimitry Andric 
6477fe6060f1SDimitry Andric   auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6478fe6060f1SDimitry Andric   unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
6479fe6060f1SDimitry Andric   unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
6480fe6060f1SDimitry Andric   auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
6481fe6060f1SDimitry Andric   Register ShVal;
6482fe6060f1SDimitry Andric   Register RevShiftVal;
6483fe6060f1SDimitry Andric   if (isPowerOf2_32(EltSizeInBits)) {
6484fe6060f1SDimitry Andric     // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
6485fe6060f1SDimitry Andric     // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
6486fe6060f1SDimitry Andric     auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6487fe6060f1SDimitry Andric     auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
6488fe6060f1SDimitry Andric     ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
6489fe6060f1SDimitry Andric     auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
6490fe6060f1SDimitry Andric     RevShiftVal =
6491fe6060f1SDimitry Andric         MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
6492fe6060f1SDimitry Andric   } else {
6493fe6060f1SDimitry Andric     // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
6494fe6060f1SDimitry Andric     // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
6495fe6060f1SDimitry Andric     auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
6496fe6060f1SDimitry Andric     auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
6497fe6060f1SDimitry Andric     ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
6498fe6060f1SDimitry Andric     auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
6499fe6060f1SDimitry Andric     auto One = MIRBuilder.buildConstant(AmtTy, 1);
6500fe6060f1SDimitry Andric     auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
6501fe6060f1SDimitry Andric     RevShiftVal =
6502fe6060f1SDimitry Andric         MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
6503fe6060f1SDimitry Andric   }
6504fe6060f1SDimitry Andric   MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
6505fe6060f1SDimitry Andric   MI.eraseFromParent();
6506fe6060f1SDimitry Andric   return Legalized;
6507fe6060f1SDimitry Andric }
6508fe6060f1SDimitry Andric 
65090b57cec5SDimitry Andric // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
65100b57cec5SDimitry Andric // representation.
65110b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
65120b57cec5SDimitry Andric LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
651306c3fb27SDimitry Andric   auto [Dst, Src] = MI.getFirst2Regs();
65140b57cec5SDimitry Andric   const LLT S64 = LLT::scalar(64);
65150b57cec5SDimitry Andric   const LLT S32 = LLT::scalar(32);
65160b57cec5SDimitry Andric   const LLT S1 = LLT::scalar(1);
65170b57cec5SDimitry Andric 
65180b57cec5SDimitry Andric   assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
65190b57cec5SDimitry Andric 
65200b57cec5SDimitry Andric   // unsigned cul2f(ulong u) {
65210b57cec5SDimitry Andric   //   uint lz = clz(u);
65220b57cec5SDimitry Andric   //   uint e = (u != 0) ? 127U + 63U - lz : 0;
65230b57cec5SDimitry Andric   //   u = (u << lz) & 0x7fffffffffffffffUL;
65240b57cec5SDimitry Andric   //   ulong t = u & 0xffffffffffUL;
65250b57cec5SDimitry Andric   //   uint v = (e << 23) | (uint)(u >> 40);
65260b57cec5SDimitry Andric   //   uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
65270b57cec5SDimitry Andric   //   return as_float(v + r);
65280b57cec5SDimitry Andric   // }
65290b57cec5SDimitry Andric 
65300b57cec5SDimitry Andric   auto Zero32 = MIRBuilder.buildConstant(S32, 0);
65310b57cec5SDimitry Andric   auto Zero64 = MIRBuilder.buildConstant(S64, 0);
65320b57cec5SDimitry Andric 
65330b57cec5SDimitry Andric   auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
65340b57cec5SDimitry Andric 
65350b57cec5SDimitry Andric   auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
65360b57cec5SDimitry Andric   auto Sub = MIRBuilder.buildSub(S32, K, LZ);
65370b57cec5SDimitry Andric 
65380b57cec5SDimitry Andric   auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
65390b57cec5SDimitry Andric   auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
65400b57cec5SDimitry Andric 
65410b57cec5SDimitry Andric   auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
65420b57cec5SDimitry Andric   auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
65430b57cec5SDimitry Andric 
65440b57cec5SDimitry Andric   auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
65450b57cec5SDimitry Andric 
65460b57cec5SDimitry Andric   auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
65470b57cec5SDimitry Andric   auto T = MIRBuilder.buildAnd(S64, U, Mask1);
65480b57cec5SDimitry Andric 
65490b57cec5SDimitry Andric   auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
65500b57cec5SDimitry Andric   auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
65510b57cec5SDimitry Andric   auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
65520b57cec5SDimitry Andric 
65530b57cec5SDimitry Andric   auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
65540b57cec5SDimitry Andric   auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
65550b57cec5SDimitry Andric   auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
65560b57cec5SDimitry Andric   auto One = MIRBuilder.buildConstant(S32, 1);
65570b57cec5SDimitry Andric 
65580b57cec5SDimitry Andric   auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
65590b57cec5SDimitry Andric   auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
65600b57cec5SDimitry Andric   auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
65610b57cec5SDimitry Andric   MIRBuilder.buildAdd(Dst, V, R);
65620b57cec5SDimitry Andric 
65635ffd83dbSDimitry Andric   MI.eraseFromParent();
65640b57cec5SDimitry Andric   return Legalized;
65650b57cec5SDimitry Andric }
65660b57cec5SDimitry Andric 
6567e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
656806c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
65690b57cec5SDimitry Andric 
6570480093f4SDimitry Andric   if (SrcTy == LLT::scalar(1)) {
6571480093f4SDimitry Andric     auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
6572480093f4SDimitry Andric     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6573480093f4SDimitry Andric     MIRBuilder.buildSelect(Dst, Src, True, False);
6574480093f4SDimitry Andric     MI.eraseFromParent();
6575480093f4SDimitry Andric     return Legalized;
6576480093f4SDimitry Andric   }
6577480093f4SDimitry Andric 
65780b57cec5SDimitry Andric   if (SrcTy != LLT::scalar(64))
65790b57cec5SDimitry Andric     return UnableToLegalize;
65800b57cec5SDimitry Andric 
65810b57cec5SDimitry Andric   if (DstTy == LLT::scalar(32)) {
65820b57cec5SDimitry Andric     // TODO: SelectionDAG has several alternative expansions to port which may
65830b57cec5SDimitry Andric     // be more reasonble depending on the available instructions. If a target
65840b57cec5SDimitry Andric     // has sitofp, does not have CTLZ, or can efficiently use f64 as an
65850b57cec5SDimitry Andric     // intermediate type, this is probably worse.
65860b57cec5SDimitry Andric     return lowerU64ToF32BitOps(MI);
65870b57cec5SDimitry Andric   }
65880b57cec5SDimitry Andric 
65890b57cec5SDimitry Andric   return UnableToLegalize;
65900b57cec5SDimitry Andric }
65910b57cec5SDimitry Andric 
6592e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
659306c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
65940b57cec5SDimitry Andric 
65950b57cec5SDimitry Andric   const LLT S64 = LLT::scalar(64);
65960b57cec5SDimitry Andric   const LLT S32 = LLT::scalar(32);
65970b57cec5SDimitry Andric   const LLT S1 = LLT::scalar(1);
65980b57cec5SDimitry Andric 
6599480093f4SDimitry Andric   if (SrcTy == S1) {
6600480093f4SDimitry Andric     auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
6601480093f4SDimitry Andric     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6602480093f4SDimitry Andric     MIRBuilder.buildSelect(Dst, Src, True, False);
6603480093f4SDimitry Andric     MI.eraseFromParent();
6604480093f4SDimitry Andric     return Legalized;
6605480093f4SDimitry Andric   }
6606480093f4SDimitry Andric 
66070b57cec5SDimitry Andric   if (SrcTy != S64)
66080b57cec5SDimitry Andric     return UnableToLegalize;
66090b57cec5SDimitry Andric 
66100b57cec5SDimitry Andric   if (DstTy == S32) {
66110b57cec5SDimitry Andric     // signed cl2f(long l) {
66120b57cec5SDimitry Andric     //   long s = l >> 63;
66130b57cec5SDimitry Andric     //   float r = cul2f((l + s) ^ s);
66140b57cec5SDimitry Andric     //   return s ? -r : r;
66150b57cec5SDimitry Andric     // }
66160b57cec5SDimitry Andric     Register L = Src;
66170b57cec5SDimitry Andric     auto SignBit = MIRBuilder.buildConstant(S64, 63);
66180b57cec5SDimitry Andric     auto S = MIRBuilder.buildAShr(S64, L, SignBit);
66190b57cec5SDimitry Andric 
66200b57cec5SDimitry Andric     auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
66210b57cec5SDimitry Andric     auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
66220b57cec5SDimitry Andric     auto R = MIRBuilder.buildUITOFP(S32, Xor);
66230b57cec5SDimitry Andric 
66240b57cec5SDimitry Andric     auto RNeg = MIRBuilder.buildFNeg(S32, R);
66250b57cec5SDimitry Andric     auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
66260b57cec5SDimitry Andric                                             MIRBuilder.buildConstant(S64, 0));
66270b57cec5SDimitry Andric     MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
66285ffd83dbSDimitry Andric     MI.eraseFromParent();
66290b57cec5SDimitry Andric     return Legalized;
66300b57cec5SDimitry Andric   }
66310b57cec5SDimitry Andric 
66320b57cec5SDimitry Andric   return UnableToLegalize;
66330b57cec5SDimitry Andric }
66340b57cec5SDimitry Andric 
6635e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
663606c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
66378bcb0991SDimitry Andric   const LLT S64 = LLT::scalar(64);
66388bcb0991SDimitry Andric   const LLT S32 = LLT::scalar(32);
66398bcb0991SDimitry Andric 
66408bcb0991SDimitry Andric   if (SrcTy != S64 && SrcTy != S32)
66418bcb0991SDimitry Andric     return UnableToLegalize;
66428bcb0991SDimitry Andric   if (DstTy != S32 && DstTy != S64)
66438bcb0991SDimitry Andric     return UnableToLegalize;
66448bcb0991SDimitry Andric 
66458bcb0991SDimitry Andric   // FPTOSI gives same result as FPTOUI for positive signed integers.
66468bcb0991SDimitry Andric   // FPTOUI needs to deal with fp values that convert to unsigned integers
66478bcb0991SDimitry Andric   // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
66488bcb0991SDimitry Andric 
66498bcb0991SDimitry Andric   APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
66508bcb0991SDimitry Andric   APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
66518bcb0991SDimitry Andric                                                 : APFloat::IEEEdouble(),
6652349cc55cSDimitry Andric                     APInt::getZero(SrcTy.getSizeInBits()));
66538bcb0991SDimitry Andric   TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
66548bcb0991SDimitry Andric 
66558bcb0991SDimitry Andric   MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
66568bcb0991SDimitry Andric 
66578bcb0991SDimitry Andric   MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
66588bcb0991SDimitry Andric   // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
66598bcb0991SDimitry Andric   // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
66608bcb0991SDimitry Andric   MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
66618bcb0991SDimitry Andric   MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
66628bcb0991SDimitry Andric   MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
66638bcb0991SDimitry Andric   MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
66648bcb0991SDimitry Andric 
6665480093f4SDimitry Andric   const LLT S1 = LLT::scalar(1);
6666480093f4SDimitry Andric 
66678bcb0991SDimitry Andric   MachineInstrBuilder FCMP =
6668480093f4SDimitry Andric       MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
66698bcb0991SDimitry Andric   MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
66708bcb0991SDimitry Andric 
66718bcb0991SDimitry Andric   MI.eraseFromParent();
66728bcb0991SDimitry Andric   return Legalized;
66738bcb0991SDimitry Andric }
66748bcb0991SDimitry Andric 
66755ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
667606c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
66775ffd83dbSDimitry Andric   const LLT S64 = LLT::scalar(64);
66785ffd83dbSDimitry Andric   const LLT S32 = LLT::scalar(32);
66795ffd83dbSDimitry Andric 
66805ffd83dbSDimitry Andric   // FIXME: Only f32 to i64 conversions are supported.
66815ffd83dbSDimitry Andric   if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
66825ffd83dbSDimitry Andric     return UnableToLegalize;
66835ffd83dbSDimitry Andric 
66845ffd83dbSDimitry Andric   // Expand f32 -> i64 conversion
66855ffd83dbSDimitry Andric   // This algorithm comes from compiler-rt's implementation of fixsfdi:
6686fe6060f1SDimitry Andric   // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
66875ffd83dbSDimitry Andric 
66885ffd83dbSDimitry Andric   unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
66895ffd83dbSDimitry Andric 
66905ffd83dbSDimitry Andric   auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
66915ffd83dbSDimitry Andric   auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
66925ffd83dbSDimitry Andric 
66935ffd83dbSDimitry Andric   auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
66945ffd83dbSDimitry Andric   auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
66955ffd83dbSDimitry Andric 
66965ffd83dbSDimitry Andric   auto SignMask = MIRBuilder.buildConstant(SrcTy,
66975ffd83dbSDimitry Andric                                            APInt::getSignMask(SrcEltBits));
66985ffd83dbSDimitry Andric   auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
66995ffd83dbSDimitry Andric   auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
67005ffd83dbSDimitry Andric   auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
67015ffd83dbSDimitry Andric   Sign = MIRBuilder.buildSExt(DstTy, Sign);
67025ffd83dbSDimitry Andric 
67035ffd83dbSDimitry Andric   auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
67045ffd83dbSDimitry Andric   auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
67055ffd83dbSDimitry Andric   auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
67065ffd83dbSDimitry Andric 
67075ffd83dbSDimitry Andric   auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
67085ffd83dbSDimitry Andric   R = MIRBuilder.buildZExt(DstTy, R);
67095ffd83dbSDimitry Andric 
67105ffd83dbSDimitry Andric   auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
67115ffd83dbSDimitry Andric   auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
67125ffd83dbSDimitry Andric   auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
67135ffd83dbSDimitry Andric   auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
67145ffd83dbSDimitry Andric 
67155ffd83dbSDimitry Andric   auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
67165ffd83dbSDimitry Andric   auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
67175ffd83dbSDimitry Andric 
67185ffd83dbSDimitry Andric   const LLT S1 = LLT::scalar(1);
67195ffd83dbSDimitry Andric   auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
67205ffd83dbSDimitry Andric                                     S1, Exponent, ExponentLoBit);
67215ffd83dbSDimitry Andric 
67225ffd83dbSDimitry Andric   R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
67235ffd83dbSDimitry Andric 
67245ffd83dbSDimitry Andric   auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
67255ffd83dbSDimitry Andric   auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
67265ffd83dbSDimitry Andric 
67275ffd83dbSDimitry Andric   auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
67285ffd83dbSDimitry Andric 
67295ffd83dbSDimitry Andric   auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
67305ffd83dbSDimitry Andric                                           S1, Exponent, ZeroSrcTy);
67315ffd83dbSDimitry Andric 
67325ffd83dbSDimitry Andric   auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
67335ffd83dbSDimitry Andric   MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
67345ffd83dbSDimitry Andric 
67355ffd83dbSDimitry Andric   MI.eraseFromParent();
67365ffd83dbSDimitry Andric   return Legalized;
67375ffd83dbSDimitry Andric }
67385ffd83dbSDimitry Andric 
67395ffd83dbSDimitry Andric // f64 -> f16 conversion using round-to-nearest-even rounding mode.
67405ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
67415ffd83dbSDimitry Andric LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
674206c3fb27SDimitry Andric   const LLT S1 = LLT::scalar(1);
674306c3fb27SDimitry Andric   const LLT S32 = LLT::scalar(32);
674406c3fb27SDimitry Andric 
674506c3fb27SDimitry Andric   auto [Dst, Src] = MI.getFirst2Regs();
674606c3fb27SDimitry Andric   assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
674706c3fb27SDimitry Andric          MRI.getType(Src).getScalarType() == LLT::scalar(64));
67485ffd83dbSDimitry Andric 
67495ffd83dbSDimitry Andric   if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
67505ffd83dbSDimitry Andric     return UnableToLegalize;
67515ffd83dbSDimitry Andric 
675206c3fb27SDimitry Andric   if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) {
675306c3fb27SDimitry Andric     unsigned Flags = MI.getFlags();
675406c3fb27SDimitry Andric     auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
675506c3fb27SDimitry Andric     MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
675606c3fb27SDimitry Andric     MI.eraseFromParent();
675706c3fb27SDimitry Andric     return Legalized;
675806c3fb27SDimitry Andric   }
675906c3fb27SDimitry Andric 
67605ffd83dbSDimitry Andric   const unsigned ExpMask = 0x7ff;
67615ffd83dbSDimitry Andric   const unsigned ExpBiasf64 = 1023;
67625ffd83dbSDimitry Andric   const unsigned ExpBiasf16 = 15;
67635ffd83dbSDimitry Andric 
67645ffd83dbSDimitry Andric   auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
67655ffd83dbSDimitry Andric   Register U = Unmerge.getReg(0);
67665ffd83dbSDimitry Andric   Register UH = Unmerge.getReg(1);
67675ffd83dbSDimitry Andric 
67685ffd83dbSDimitry Andric   auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
67695ffd83dbSDimitry Andric   E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
67705ffd83dbSDimitry Andric 
67715ffd83dbSDimitry Andric   // Subtract the fp64 exponent bias (1023) to get the real exponent and
67725ffd83dbSDimitry Andric   // add the f16 bias (15) to get the biased exponent for the f16 format.
67735ffd83dbSDimitry Andric   E = MIRBuilder.buildAdd(
67745ffd83dbSDimitry Andric     S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
67755ffd83dbSDimitry Andric 
67765ffd83dbSDimitry Andric   auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
67775ffd83dbSDimitry Andric   M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
67785ffd83dbSDimitry Andric 
67795ffd83dbSDimitry Andric   auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
67805ffd83dbSDimitry Andric                                        MIRBuilder.buildConstant(S32, 0x1ff));
67815ffd83dbSDimitry Andric   MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
67825ffd83dbSDimitry Andric 
67835ffd83dbSDimitry Andric   auto Zero = MIRBuilder.buildConstant(S32, 0);
67845ffd83dbSDimitry Andric   auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
67855ffd83dbSDimitry Andric   auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
67865ffd83dbSDimitry Andric   M = MIRBuilder.buildOr(S32, M, Lo40Set);
67875ffd83dbSDimitry Andric 
67885ffd83dbSDimitry Andric   // (M != 0 ? 0x0200 : 0) | 0x7c00;
67895ffd83dbSDimitry Andric   auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
67905ffd83dbSDimitry Andric   auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
67915ffd83dbSDimitry Andric   auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
67925ffd83dbSDimitry Andric 
67935ffd83dbSDimitry Andric   auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
67945ffd83dbSDimitry Andric   auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
67955ffd83dbSDimitry Andric 
67965ffd83dbSDimitry Andric   // N = M | (E << 12);
67975ffd83dbSDimitry Andric   auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
67985ffd83dbSDimitry Andric   auto N = MIRBuilder.buildOr(S32, M, EShl12);
67995ffd83dbSDimitry Andric 
68005ffd83dbSDimitry Andric   // B = clamp(1-E, 0, 13);
68015ffd83dbSDimitry Andric   auto One = MIRBuilder.buildConstant(S32, 1);
68025ffd83dbSDimitry Andric   auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
68035ffd83dbSDimitry Andric   auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
68045ffd83dbSDimitry Andric   B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
68055ffd83dbSDimitry Andric 
68065ffd83dbSDimitry Andric   auto SigSetHigh = MIRBuilder.buildOr(S32, M,
68075ffd83dbSDimitry Andric                                        MIRBuilder.buildConstant(S32, 0x1000));
68085ffd83dbSDimitry Andric 
68095ffd83dbSDimitry Andric   auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
68105ffd83dbSDimitry Andric   auto D0 = MIRBuilder.buildShl(S32, D, B);
68115ffd83dbSDimitry Andric 
68125ffd83dbSDimitry Andric   auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
68135ffd83dbSDimitry Andric                                              D0, SigSetHigh);
68145ffd83dbSDimitry Andric   auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
68155ffd83dbSDimitry Andric   D = MIRBuilder.buildOr(S32, D, D1);
68165ffd83dbSDimitry Andric 
68175ffd83dbSDimitry Andric   auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
68185ffd83dbSDimitry Andric   auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
68195ffd83dbSDimitry Andric 
68205ffd83dbSDimitry Andric   auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
68215ffd83dbSDimitry Andric   V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
68225ffd83dbSDimitry Andric 
68235ffd83dbSDimitry Andric   auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
68245ffd83dbSDimitry Andric                                        MIRBuilder.buildConstant(S32, 3));
68255ffd83dbSDimitry Andric   auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
68265ffd83dbSDimitry Andric 
68275ffd83dbSDimitry Andric   auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
68285ffd83dbSDimitry Andric                                        MIRBuilder.buildConstant(S32, 5));
68295ffd83dbSDimitry Andric   auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
68305ffd83dbSDimitry Andric 
68315ffd83dbSDimitry Andric   V1 = MIRBuilder.buildOr(S32, V0, V1);
68325ffd83dbSDimitry Andric   V = MIRBuilder.buildAdd(S32, V, V1);
68335ffd83dbSDimitry Andric 
68345ffd83dbSDimitry Andric   auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,  S1,
68355ffd83dbSDimitry Andric                                        E, MIRBuilder.buildConstant(S32, 30));
68365ffd83dbSDimitry Andric   V = MIRBuilder.buildSelect(S32, CmpEGt30,
68375ffd83dbSDimitry Andric                              MIRBuilder.buildConstant(S32, 0x7c00), V);
68385ffd83dbSDimitry Andric 
68395ffd83dbSDimitry Andric   auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
68405ffd83dbSDimitry Andric                                          E, MIRBuilder.buildConstant(S32, 1039));
68415ffd83dbSDimitry Andric   V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
68425ffd83dbSDimitry Andric 
68435ffd83dbSDimitry Andric   // Extract the sign bit.
68445ffd83dbSDimitry Andric   auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
68455ffd83dbSDimitry Andric   Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
68465ffd83dbSDimitry Andric 
68475ffd83dbSDimitry Andric   // Insert the sign bit
68485ffd83dbSDimitry Andric   V = MIRBuilder.buildOr(S32, Sign, V);
68495ffd83dbSDimitry Andric 
68505ffd83dbSDimitry Andric   MIRBuilder.buildTrunc(Dst, V);
68515ffd83dbSDimitry Andric   MI.eraseFromParent();
68525ffd83dbSDimitry Andric   return Legalized;
68535ffd83dbSDimitry Andric }
68545ffd83dbSDimitry Andric 
68555ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
6856e8d8bef9SDimitry Andric LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
685706c3fb27SDimitry Andric   auto [DstTy, SrcTy] = MI.getFirst2LLTs();
68585ffd83dbSDimitry Andric   const LLT S64 = LLT::scalar(64);
68595ffd83dbSDimitry Andric   const LLT S16 = LLT::scalar(16);
68605ffd83dbSDimitry Andric 
68615ffd83dbSDimitry Andric   if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
68625ffd83dbSDimitry Andric     return lowerFPTRUNC_F64_TO_F16(MI);
68635ffd83dbSDimitry Andric 
68645ffd83dbSDimitry Andric   return UnableToLegalize;
68655ffd83dbSDimitry Andric }
68665ffd83dbSDimitry Andric 
6867e8d8bef9SDimitry Andric // TODO: If RHS is a constant SelectionDAGBuilder expands this into a
6868e8d8bef9SDimitry Andric // multiplication tree.
6869e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
687006c3fb27SDimitry Andric   auto [Dst, Src0, Src1] = MI.getFirst3Regs();
6871e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(Dst);
6872e8d8bef9SDimitry Andric 
6873e8d8bef9SDimitry Andric   auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
6874e8d8bef9SDimitry Andric   MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
6875e8d8bef9SDimitry Andric   MI.eraseFromParent();
6876e8d8bef9SDimitry Andric   return Legalized;
6877e8d8bef9SDimitry Andric }
6878e8d8bef9SDimitry Andric 
68790b57cec5SDimitry Andric static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
68800b57cec5SDimitry Andric   switch (Opc) {
68810b57cec5SDimitry Andric   case TargetOpcode::G_SMIN:
68820b57cec5SDimitry Andric     return CmpInst::ICMP_SLT;
68830b57cec5SDimitry Andric   case TargetOpcode::G_SMAX:
68840b57cec5SDimitry Andric     return CmpInst::ICMP_SGT;
68850b57cec5SDimitry Andric   case TargetOpcode::G_UMIN:
68860b57cec5SDimitry Andric     return CmpInst::ICMP_ULT;
68870b57cec5SDimitry Andric   case TargetOpcode::G_UMAX:
68880b57cec5SDimitry Andric     return CmpInst::ICMP_UGT;
68890b57cec5SDimitry Andric   default:
68900b57cec5SDimitry Andric     llvm_unreachable("not in integer min/max");
68910b57cec5SDimitry Andric   }
68920b57cec5SDimitry Andric }
68930b57cec5SDimitry Andric 
6894e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
689506c3fb27SDimitry Andric   auto [Dst, Src0, Src1] = MI.getFirst3Regs();
68960b57cec5SDimitry Andric 
68970b57cec5SDimitry Andric   const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
68980b57cec5SDimitry Andric   LLT CmpType = MRI.getType(Dst).changeElementSize(1);
68990b57cec5SDimitry Andric 
69000b57cec5SDimitry Andric   auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
69010b57cec5SDimitry Andric   MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
69020b57cec5SDimitry Andric 
69030b57cec5SDimitry Andric   MI.eraseFromParent();
69040b57cec5SDimitry Andric   return Legalized;
69050b57cec5SDimitry Andric }
69060b57cec5SDimitry Andric 
69070b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
6908e8d8bef9SDimitry Andric LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
690906c3fb27SDimitry Andric   auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
69100b57cec5SDimitry Andric   const int Src0Size = Src0Ty.getScalarSizeInBits();
69110b57cec5SDimitry Andric   const int Src1Size = Src1Ty.getScalarSizeInBits();
69120b57cec5SDimitry Andric 
69130b57cec5SDimitry Andric   auto SignBitMask = MIRBuilder.buildConstant(
69140b57cec5SDimitry Andric     Src0Ty, APInt::getSignMask(Src0Size));
69150b57cec5SDimitry Andric 
69160b57cec5SDimitry Andric   auto NotSignBitMask = MIRBuilder.buildConstant(
69170b57cec5SDimitry Andric     Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
69180b57cec5SDimitry Andric 
6919fe6060f1SDimitry Andric   Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
6920fe6060f1SDimitry Andric   Register And1;
69210b57cec5SDimitry Andric   if (Src0Ty == Src1Ty) {
6922fe6060f1SDimitry Andric     And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
69230b57cec5SDimitry Andric   } else if (Src0Size > Src1Size) {
69240b57cec5SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
69250b57cec5SDimitry Andric     auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
69260b57cec5SDimitry Andric     auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
6927fe6060f1SDimitry Andric     And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
69280b57cec5SDimitry Andric   } else {
69290b57cec5SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
69300b57cec5SDimitry Andric     auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
69310b57cec5SDimitry Andric     auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
6932fe6060f1SDimitry Andric     And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
69330b57cec5SDimitry Andric   }
69340b57cec5SDimitry Andric 
69350b57cec5SDimitry Andric   // Be careful about setting nsz/nnan/ninf on every instruction, since the
69360b57cec5SDimitry Andric   // constants are a nan and -0.0, but the final result should preserve
69370b57cec5SDimitry Andric   // everything.
6938fe6060f1SDimitry Andric   unsigned Flags = MI.getFlags();
6939fe6060f1SDimitry Andric   MIRBuilder.buildOr(Dst, And0, And1, Flags);
69400b57cec5SDimitry Andric 
69410b57cec5SDimitry Andric   MI.eraseFromParent();
69420b57cec5SDimitry Andric   return Legalized;
69430b57cec5SDimitry Andric }
69440b57cec5SDimitry Andric 
69450b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
69460b57cec5SDimitry Andric LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
69470b57cec5SDimitry Andric   unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
69480b57cec5SDimitry Andric     TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
69490b57cec5SDimitry Andric 
695006c3fb27SDimitry Andric   auto [Dst, Src0, Src1] = MI.getFirst3Regs();
69510b57cec5SDimitry Andric   LLT Ty = MRI.getType(Dst);
69520b57cec5SDimitry Andric 
69530b57cec5SDimitry Andric   if (!MI.getFlag(MachineInstr::FmNoNans)) {
69540b57cec5SDimitry Andric     // Insert canonicalizes if it's possible we need to quiet to get correct
69550b57cec5SDimitry Andric     // sNaN behavior.
69560b57cec5SDimitry Andric 
69570b57cec5SDimitry Andric     // Note this must be done here, and not as an optimization combine in the
69580b57cec5SDimitry Andric     // absence of a dedicate quiet-snan instruction as we're using an
69590b57cec5SDimitry Andric     // omni-purpose G_FCANONICALIZE.
69600b57cec5SDimitry Andric     if (!isKnownNeverSNaN(Src0, MRI))
69610b57cec5SDimitry Andric       Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
69620b57cec5SDimitry Andric 
69630b57cec5SDimitry Andric     if (!isKnownNeverSNaN(Src1, MRI))
69640b57cec5SDimitry Andric       Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
69650b57cec5SDimitry Andric   }
69660b57cec5SDimitry Andric 
69670b57cec5SDimitry Andric   // If there are no nans, it's safe to simply replace this with the non-IEEE
69680b57cec5SDimitry Andric   // version.
69690b57cec5SDimitry Andric   MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
69700b57cec5SDimitry Andric   MI.eraseFromParent();
69710b57cec5SDimitry Andric   return Legalized;
69720b57cec5SDimitry Andric }
69738bcb0991SDimitry Andric 
69748bcb0991SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
69758bcb0991SDimitry Andric   // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
69768bcb0991SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
69778bcb0991SDimitry Andric   LLT Ty = MRI.getType(DstReg);
69788bcb0991SDimitry Andric   unsigned Flags = MI.getFlags();
69798bcb0991SDimitry Andric 
69808bcb0991SDimitry Andric   auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
69818bcb0991SDimitry Andric                                   Flags);
69828bcb0991SDimitry Andric   MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
69838bcb0991SDimitry Andric   MI.eraseFromParent();
69848bcb0991SDimitry Andric   return Legalized;
69858bcb0991SDimitry Andric }
69868bcb0991SDimitry Andric 
69878bcb0991SDimitry Andric LegalizerHelper::LegalizeResult
6988480093f4SDimitry Andric LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
698906c3fb27SDimitry Andric   auto [DstReg, X] = MI.getFirst2Regs();
69905ffd83dbSDimitry Andric   const unsigned Flags = MI.getFlags();
69915ffd83dbSDimitry Andric   const LLT Ty = MRI.getType(DstReg);
69925ffd83dbSDimitry Andric   const LLT CondTy = Ty.changeElementSize(1);
69935ffd83dbSDimitry Andric 
69945ffd83dbSDimitry Andric   // round(x) =>
69955ffd83dbSDimitry Andric   //  t = trunc(x);
69965ffd83dbSDimitry Andric   //  d = fabs(x - t);
69975f757f3fSDimitry Andric   //  o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
69985f757f3fSDimitry Andric   //  return t + o;
69995ffd83dbSDimitry Andric 
70005ffd83dbSDimitry Andric   auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
70015ffd83dbSDimitry Andric 
70025ffd83dbSDimitry Andric   auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
70035ffd83dbSDimitry Andric   auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
70045f757f3fSDimitry Andric 
70055ffd83dbSDimitry Andric   auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
70065f757f3fSDimitry Andric   auto Cmp =
70075f757f3fSDimitry Andric       MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
70085ffd83dbSDimitry Andric 
70095f757f3fSDimitry Andric   // Could emit G_UITOFP instead
70105f757f3fSDimitry Andric   auto One = MIRBuilder.buildFConstant(Ty, 1.0);
70115f757f3fSDimitry Andric   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
70125f757f3fSDimitry Andric   auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
70135f757f3fSDimitry Andric   auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
70145ffd83dbSDimitry Andric 
70155f757f3fSDimitry Andric   MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
70165ffd83dbSDimitry Andric 
70175ffd83dbSDimitry Andric   MI.eraseFromParent();
70185ffd83dbSDimitry Andric   return Legalized;
70195ffd83dbSDimitry Andric }
70205ffd83dbSDimitry Andric 
702106c3fb27SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerFFloor(MachineInstr &MI) {
702206c3fb27SDimitry Andric   auto [DstReg, SrcReg] = MI.getFirst2Regs();
7023480093f4SDimitry Andric   unsigned Flags = MI.getFlags();
7024480093f4SDimitry Andric   LLT Ty = MRI.getType(DstReg);
7025480093f4SDimitry Andric   const LLT CondTy = Ty.changeElementSize(1);
7026480093f4SDimitry Andric 
7027480093f4SDimitry Andric   // result = trunc(src);
7028480093f4SDimitry Andric   // if (src < 0.0 && src != result)
7029480093f4SDimitry Andric   //   result += -1.0.
7030480093f4SDimitry Andric 
7031480093f4SDimitry Andric   auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
70325ffd83dbSDimitry Andric   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
7033480093f4SDimitry Andric 
7034480093f4SDimitry Andric   auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
7035480093f4SDimitry Andric                                   SrcReg, Zero, Flags);
7036480093f4SDimitry Andric   auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
7037480093f4SDimitry Andric                                       SrcReg, Trunc, Flags);
7038480093f4SDimitry Andric   auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
7039480093f4SDimitry Andric   auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
7040480093f4SDimitry Andric 
70415ffd83dbSDimitry Andric   MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
70425ffd83dbSDimitry Andric   MI.eraseFromParent();
70435ffd83dbSDimitry Andric   return Legalized;
70445ffd83dbSDimitry Andric }
70455ffd83dbSDimitry Andric 
70465ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
70475ffd83dbSDimitry Andric LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
70485ffd83dbSDimitry Andric   const unsigned NumOps = MI.getNumOperands();
704906c3fb27SDimitry Andric   auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
705006c3fb27SDimitry Andric   unsigned PartSize = Src0Ty.getSizeInBits();
70515ffd83dbSDimitry Andric 
70525ffd83dbSDimitry Andric   LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
70535ffd83dbSDimitry Andric   Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
70545ffd83dbSDimitry Andric 
70555ffd83dbSDimitry Andric   for (unsigned I = 2; I != NumOps; ++I) {
70565ffd83dbSDimitry Andric     const unsigned Offset = (I - 1) * PartSize;
70575ffd83dbSDimitry Andric 
70585ffd83dbSDimitry Andric     Register SrcReg = MI.getOperand(I).getReg();
70595ffd83dbSDimitry Andric     auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
70605ffd83dbSDimitry Andric 
70615ffd83dbSDimitry Andric     Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
70625ffd83dbSDimitry Andric       MRI.createGenericVirtualRegister(WideTy);
70635ffd83dbSDimitry Andric 
70645ffd83dbSDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
70655ffd83dbSDimitry Andric     auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
70665ffd83dbSDimitry Andric     MIRBuilder.buildOr(NextResult, ResultReg, Shl);
70675ffd83dbSDimitry Andric     ResultReg = NextResult;
70685ffd83dbSDimitry Andric   }
70695ffd83dbSDimitry Andric 
70705ffd83dbSDimitry Andric   if (DstTy.isPointer()) {
70715ffd83dbSDimitry Andric     if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
70725ffd83dbSDimitry Andric           DstTy.getAddressSpace())) {
70735ffd83dbSDimitry Andric       LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
70745ffd83dbSDimitry Andric       return UnableToLegalize;
70755ffd83dbSDimitry Andric     }
70765ffd83dbSDimitry Andric 
70775ffd83dbSDimitry Andric     MIRBuilder.buildIntToPtr(DstReg, ResultReg);
70785ffd83dbSDimitry Andric   }
70795ffd83dbSDimitry Andric 
7080480093f4SDimitry Andric   MI.eraseFromParent();
7081480093f4SDimitry Andric   return Legalized;
7082480093f4SDimitry Andric }
7083480093f4SDimitry Andric 
7084480093f4SDimitry Andric LegalizerHelper::LegalizeResult
70858bcb0991SDimitry Andric LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
70868bcb0991SDimitry Andric   const unsigned NumDst = MI.getNumOperands() - 1;
70875ffd83dbSDimitry Andric   Register SrcReg = MI.getOperand(NumDst).getReg();
70888bcb0991SDimitry Andric   Register Dst0Reg = MI.getOperand(0).getReg();
70898bcb0991SDimitry Andric   LLT DstTy = MRI.getType(Dst0Reg);
70905ffd83dbSDimitry Andric   if (DstTy.isPointer())
70915ffd83dbSDimitry Andric     return UnableToLegalize; // TODO
70928bcb0991SDimitry Andric 
70935ffd83dbSDimitry Andric   SrcReg = coerceToScalar(SrcReg);
70945ffd83dbSDimitry Andric   if (!SrcReg)
70955ffd83dbSDimitry Andric     return UnableToLegalize;
70968bcb0991SDimitry Andric 
70978bcb0991SDimitry Andric   // Expand scalarizing unmerge as bitcast to integer and shift.
70985ffd83dbSDimitry Andric   LLT IntTy = MRI.getType(SrcReg);
70998bcb0991SDimitry Andric 
71005ffd83dbSDimitry Andric   MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
71018bcb0991SDimitry Andric 
71028bcb0991SDimitry Andric   const unsigned DstSize = DstTy.getSizeInBits();
71038bcb0991SDimitry Andric   unsigned Offset = DstSize;
71048bcb0991SDimitry Andric   for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
71058bcb0991SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
71065ffd83dbSDimitry Andric     auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
71078bcb0991SDimitry Andric     MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
71088bcb0991SDimitry Andric   }
71098bcb0991SDimitry Andric 
71108bcb0991SDimitry Andric   MI.eraseFromParent();
71118bcb0991SDimitry Andric   return Legalized;
71128bcb0991SDimitry Andric }
71138bcb0991SDimitry Andric 
7114e8d8bef9SDimitry Andric /// Lower a vector extract or insert by writing the vector to a stack temporary
7115e8d8bef9SDimitry Andric /// and reloading the element or vector.
7116e8d8bef9SDimitry Andric ///
7117e8d8bef9SDimitry Andric /// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
7118e8d8bef9SDimitry Andric ///  =>
7119e8d8bef9SDimitry Andric ///  %stack_temp = G_FRAME_INDEX
7120e8d8bef9SDimitry Andric ///  G_STORE %vec, %stack_temp
7121e8d8bef9SDimitry Andric ///  %idx = clamp(%idx, %vec.getNumElements())
7122e8d8bef9SDimitry Andric ///  %element_ptr = G_PTR_ADD %stack_temp, %idx
7123e8d8bef9SDimitry Andric ///  %dst = G_LOAD %element_ptr
7124e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
7125e8d8bef9SDimitry Andric LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
7126e8d8bef9SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
7127e8d8bef9SDimitry Andric   Register SrcVec = MI.getOperand(1).getReg();
7128e8d8bef9SDimitry Andric   Register InsertVal;
7129e8d8bef9SDimitry Andric   if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
7130e8d8bef9SDimitry Andric     InsertVal = MI.getOperand(2).getReg();
7131e8d8bef9SDimitry Andric 
7132e8d8bef9SDimitry Andric   Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
7133e8d8bef9SDimitry Andric 
7134e8d8bef9SDimitry Andric   LLT VecTy = MRI.getType(SrcVec);
7135e8d8bef9SDimitry Andric   LLT EltTy = VecTy.getElementType();
71360eae32dcSDimitry Andric   unsigned NumElts = VecTy.getNumElements();
71370eae32dcSDimitry Andric 
71380eae32dcSDimitry Andric   int64_t IdxVal;
71390eae32dcSDimitry Andric   if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
71400eae32dcSDimitry Andric     SmallVector<Register, 8> SrcRegs;
71417a6dacacSDimitry Andric     extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
71420eae32dcSDimitry Andric 
71430eae32dcSDimitry Andric     if (InsertVal) {
71440eae32dcSDimitry Andric       SrcRegs[IdxVal] = MI.getOperand(2).getReg();
7145bdd1243dSDimitry Andric       MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
71460eae32dcSDimitry Andric     } else {
71470eae32dcSDimitry Andric       MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
71480eae32dcSDimitry Andric     }
71490eae32dcSDimitry Andric 
71500eae32dcSDimitry Andric     MI.eraseFromParent();
71510eae32dcSDimitry Andric     return Legalized;
71520eae32dcSDimitry Andric   }
71530eae32dcSDimitry Andric 
7154e8d8bef9SDimitry Andric   if (!EltTy.isByteSized()) { // Not implemented.
7155e8d8bef9SDimitry Andric     LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
7156e8d8bef9SDimitry Andric     return UnableToLegalize;
7157e8d8bef9SDimitry Andric   }
7158e8d8bef9SDimitry Andric 
7159e8d8bef9SDimitry Andric   unsigned EltBytes = EltTy.getSizeInBytes();
7160e8d8bef9SDimitry Andric   Align VecAlign = getStackTemporaryAlignment(VecTy);
7161e8d8bef9SDimitry Andric   Align EltAlign;
7162e8d8bef9SDimitry Andric 
7163e8d8bef9SDimitry Andric   MachinePointerInfo PtrInfo;
71645f757f3fSDimitry Andric   auto StackTemp = createStackTemporary(
71655f757f3fSDimitry Andric       TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
7166e8d8bef9SDimitry Andric   MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
7167e8d8bef9SDimitry Andric 
7168e8d8bef9SDimitry Andric   // Get the pointer to the element, and be sure not to hit undefined behavior
7169e8d8bef9SDimitry Andric   // if the index is out of bounds.
7170e8d8bef9SDimitry Andric   Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
7171e8d8bef9SDimitry Andric 
7172e8d8bef9SDimitry Andric   if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
7173e8d8bef9SDimitry Andric     int64_t Offset = IdxVal * EltBytes;
7174e8d8bef9SDimitry Andric     PtrInfo = PtrInfo.getWithOffset(Offset);
7175e8d8bef9SDimitry Andric     EltAlign = commonAlignment(VecAlign, Offset);
7176e8d8bef9SDimitry Andric   } else {
7177e8d8bef9SDimitry Andric     // We lose information with a variable offset.
7178e8d8bef9SDimitry Andric     EltAlign = getStackTemporaryAlignment(EltTy);
7179e8d8bef9SDimitry Andric     PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
7180e8d8bef9SDimitry Andric   }
7181e8d8bef9SDimitry Andric 
7182e8d8bef9SDimitry Andric   if (InsertVal) {
7183e8d8bef9SDimitry Andric     // Write the inserted element
7184e8d8bef9SDimitry Andric     MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
7185e8d8bef9SDimitry Andric 
7186e8d8bef9SDimitry Andric     // Reload the whole vector.
7187e8d8bef9SDimitry Andric     MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
7188e8d8bef9SDimitry Andric   } else {
7189e8d8bef9SDimitry Andric     MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
7190e8d8bef9SDimitry Andric   }
7191e8d8bef9SDimitry Andric 
7192e8d8bef9SDimitry Andric   MI.eraseFromParent();
7193e8d8bef9SDimitry Andric   return Legalized;
7194e8d8bef9SDimitry Andric }
7195e8d8bef9SDimitry Andric 
71968bcb0991SDimitry Andric LegalizerHelper::LegalizeResult
71978bcb0991SDimitry Andric LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
719806c3fb27SDimitry Andric   auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
719906c3fb27SDimitry Andric       MI.getFirst3RegLLTs();
72008bcb0991SDimitry Andric   LLT IdxTy = LLT::scalar(32);
72018bcb0991SDimitry Andric 
7202480093f4SDimitry Andric   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
72038bcb0991SDimitry Andric   Register Undef;
72048bcb0991SDimitry Andric   SmallVector<Register, 32> BuildVec;
72055f757f3fSDimitry Andric   LLT EltTy = DstTy.getScalarType();
72068bcb0991SDimitry Andric 
72078bcb0991SDimitry Andric   for (int Idx : Mask) {
72088bcb0991SDimitry Andric     if (Idx < 0) {
72098bcb0991SDimitry Andric       if (!Undef.isValid())
72108bcb0991SDimitry Andric         Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
72118bcb0991SDimitry Andric       BuildVec.push_back(Undef);
72128bcb0991SDimitry Andric       continue;
72138bcb0991SDimitry Andric     }
72148bcb0991SDimitry Andric 
72158bcb0991SDimitry Andric     if (Src0Ty.isScalar()) {
72168bcb0991SDimitry Andric       BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
72178bcb0991SDimitry Andric     } else {
72188bcb0991SDimitry Andric       int NumElts = Src0Ty.getNumElements();
72198bcb0991SDimitry Andric       Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
72208bcb0991SDimitry Andric       int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
72218bcb0991SDimitry Andric       auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
72228bcb0991SDimitry Andric       auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
72238bcb0991SDimitry Andric       BuildVec.push_back(Extract.getReg(0));
72248bcb0991SDimitry Andric     }
72258bcb0991SDimitry Andric   }
72268bcb0991SDimitry Andric 
72275f757f3fSDimitry Andric   if (DstTy.isScalar())
72285f757f3fSDimitry Andric     MIRBuilder.buildCopy(DstReg, BuildVec[0]);
72295f757f3fSDimitry Andric   else
72308bcb0991SDimitry Andric     MIRBuilder.buildBuildVector(DstReg, BuildVec);
72318bcb0991SDimitry Andric   MI.eraseFromParent();
72328bcb0991SDimitry Andric   return Legalized;
72338bcb0991SDimitry Andric }
72348bcb0991SDimitry Andric 
72355f757f3fSDimitry Andric Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg,
72365f757f3fSDimitry Andric                                                     Register AllocSize,
72375f757f3fSDimitry Andric                                                     Align Alignment,
72385f757f3fSDimitry Andric                                                     LLT PtrTy) {
72398bcb0991SDimitry Andric   LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
72408bcb0991SDimitry Andric 
72418bcb0991SDimitry Andric   auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
72428bcb0991SDimitry Andric   SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
72438bcb0991SDimitry Andric 
72448bcb0991SDimitry Andric   // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
72458bcb0991SDimitry Andric   // have to generate an extra instruction to negate the alloc and then use
7246480093f4SDimitry Andric   // G_PTR_ADD to add the negative offset.
72478bcb0991SDimitry Andric   auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
72485ffd83dbSDimitry Andric   if (Alignment > Align(1)) {
72495ffd83dbSDimitry Andric     APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
72508bcb0991SDimitry Andric     AlignMask.negate();
72518bcb0991SDimitry Andric     auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
72528bcb0991SDimitry Andric     Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
72538bcb0991SDimitry Andric   }
72548bcb0991SDimitry Andric 
72555f757f3fSDimitry Andric   return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
72565f757f3fSDimitry Andric }
72575f757f3fSDimitry Andric 
72585f757f3fSDimitry Andric LegalizerHelper::LegalizeResult
72595f757f3fSDimitry Andric LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
72605f757f3fSDimitry Andric   const auto &MF = *MI.getMF();
72615f757f3fSDimitry Andric   const auto &TFI = *MF.getSubtarget().getFrameLowering();
72625f757f3fSDimitry Andric   if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
72635f757f3fSDimitry Andric     return UnableToLegalize;
72645f757f3fSDimitry Andric 
72655f757f3fSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
72665f757f3fSDimitry Andric   Register AllocSize = MI.getOperand(1).getReg();
72675f757f3fSDimitry Andric   Align Alignment = assumeAligned(MI.getOperand(2).getImm());
72685f757f3fSDimitry Andric 
72695f757f3fSDimitry Andric   LLT PtrTy = MRI.getType(Dst);
72705f757f3fSDimitry Andric   Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
72715f757f3fSDimitry Andric   Register SPTmp =
72725f757f3fSDimitry Andric       getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
72735f757f3fSDimitry Andric 
72748bcb0991SDimitry Andric   MIRBuilder.buildCopy(SPReg, SPTmp);
72758bcb0991SDimitry Andric   MIRBuilder.buildCopy(Dst, SPTmp);
72768bcb0991SDimitry Andric 
72778bcb0991SDimitry Andric   MI.eraseFromParent();
72788bcb0991SDimitry Andric   return Legalized;
72798bcb0991SDimitry Andric }
72808bcb0991SDimitry Andric 
72818bcb0991SDimitry Andric LegalizerHelper::LegalizeResult
72825f757f3fSDimitry Andric LegalizerHelper::lowerStackSave(MachineInstr &MI) {
72835f757f3fSDimitry Andric   Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
72845f757f3fSDimitry Andric   if (!StackPtr)
72855f757f3fSDimitry Andric     return UnableToLegalize;
72865f757f3fSDimitry Andric 
72875f757f3fSDimitry Andric   MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
72885f757f3fSDimitry Andric   MI.eraseFromParent();
72895f757f3fSDimitry Andric   return Legalized;
72905f757f3fSDimitry Andric }
72915f757f3fSDimitry Andric 
72925f757f3fSDimitry Andric LegalizerHelper::LegalizeResult
72935f757f3fSDimitry Andric LegalizerHelper::lowerStackRestore(MachineInstr &MI) {
72945f757f3fSDimitry Andric   Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
72955f757f3fSDimitry Andric   if (!StackPtr)
72965f757f3fSDimitry Andric     return UnableToLegalize;
72975f757f3fSDimitry Andric 
72985f757f3fSDimitry Andric   MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
72995f757f3fSDimitry Andric   MI.eraseFromParent();
73005f757f3fSDimitry Andric   return Legalized;
73015f757f3fSDimitry Andric }
73025f757f3fSDimitry Andric 
73035f757f3fSDimitry Andric LegalizerHelper::LegalizeResult
73048bcb0991SDimitry Andric LegalizerHelper::lowerExtract(MachineInstr &MI) {
730506c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
73068bcb0991SDimitry Andric   unsigned Offset = MI.getOperand(2).getImm();
73078bcb0991SDimitry Andric 
73080eae32dcSDimitry Andric   // Extract sub-vector or one element
73090eae32dcSDimitry Andric   if (SrcTy.isVector()) {
73100eae32dcSDimitry Andric     unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
73110eae32dcSDimitry Andric     unsigned DstSize = DstTy.getSizeInBits();
73120eae32dcSDimitry Andric 
73130eae32dcSDimitry Andric     if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
73140eae32dcSDimitry Andric         (Offset + DstSize <= SrcTy.getSizeInBits())) {
73150eae32dcSDimitry Andric       // Unmerge and allow access to each Src element for the artifact combiner.
731606c3fb27SDimitry Andric       auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
73170eae32dcSDimitry Andric 
73180eae32dcSDimitry Andric       // Take element(s) we need to extract and copy it (merge them).
73190eae32dcSDimitry Andric       SmallVector<Register, 8> SubVectorElts;
73200eae32dcSDimitry Andric       for (unsigned Idx = Offset / SrcEltSize;
73210eae32dcSDimitry Andric            Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
73220eae32dcSDimitry Andric         SubVectorElts.push_back(Unmerge.getReg(Idx));
73230eae32dcSDimitry Andric       }
73240eae32dcSDimitry Andric       if (SubVectorElts.size() == 1)
732506c3fb27SDimitry Andric         MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
73260eae32dcSDimitry Andric       else
732706c3fb27SDimitry Andric         MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
73280eae32dcSDimitry Andric 
73290eae32dcSDimitry Andric       MI.eraseFromParent();
73300eae32dcSDimitry Andric       return Legalized;
73310eae32dcSDimitry Andric     }
73320eae32dcSDimitry Andric   }
73330eae32dcSDimitry Andric 
73348bcb0991SDimitry Andric   if (DstTy.isScalar() &&
73358bcb0991SDimitry Andric       (SrcTy.isScalar() ||
73368bcb0991SDimitry Andric        (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
73378bcb0991SDimitry Andric     LLT SrcIntTy = SrcTy;
73388bcb0991SDimitry Andric     if (!SrcTy.isScalar()) {
73398bcb0991SDimitry Andric       SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
734006c3fb27SDimitry Andric       SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
73418bcb0991SDimitry Andric     }
73428bcb0991SDimitry Andric 
73438bcb0991SDimitry Andric     if (Offset == 0)
734406c3fb27SDimitry Andric       MIRBuilder.buildTrunc(DstReg, SrcReg);
73458bcb0991SDimitry Andric     else {
73468bcb0991SDimitry Andric       auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
734706c3fb27SDimitry Andric       auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
734806c3fb27SDimitry Andric       MIRBuilder.buildTrunc(DstReg, Shr);
73498bcb0991SDimitry Andric     }
73508bcb0991SDimitry Andric 
73518bcb0991SDimitry Andric     MI.eraseFromParent();
73528bcb0991SDimitry Andric     return Legalized;
73538bcb0991SDimitry Andric   }
73548bcb0991SDimitry Andric 
73558bcb0991SDimitry Andric   return UnableToLegalize;
73568bcb0991SDimitry Andric }
73578bcb0991SDimitry Andric 
73588bcb0991SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
735906c3fb27SDimitry Andric   auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
73608bcb0991SDimitry Andric   uint64_t Offset = MI.getOperand(3).getImm();
73618bcb0991SDimitry Andric 
73628bcb0991SDimitry Andric   LLT DstTy = MRI.getType(Src);
73638bcb0991SDimitry Andric   LLT InsertTy = MRI.getType(InsertSrc);
73648bcb0991SDimitry Andric 
73650eae32dcSDimitry Andric   // Insert sub-vector or one element
73660eae32dcSDimitry Andric   if (DstTy.isVector() && !InsertTy.isPointer()) {
73670eae32dcSDimitry Andric     LLT EltTy = DstTy.getElementType();
73680eae32dcSDimitry Andric     unsigned EltSize = EltTy.getSizeInBits();
73690eae32dcSDimitry Andric     unsigned InsertSize = InsertTy.getSizeInBits();
73700eae32dcSDimitry Andric 
73710eae32dcSDimitry Andric     if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
73720eae32dcSDimitry Andric         (Offset + InsertSize <= DstTy.getSizeInBits())) {
73730eae32dcSDimitry Andric       auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
73740eae32dcSDimitry Andric       SmallVector<Register, 8> DstElts;
73750eae32dcSDimitry Andric       unsigned Idx = 0;
73760eae32dcSDimitry Andric       // Elements from Src before insert start Offset
73770eae32dcSDimitry Andric       for (; Idx < Offset / EltSize; ++Idx) {
73780eae32dcSDimitry Andric         DstElts.push_back(UnmergeSrc.getReg(Idx));
73790eae32dcSDimitry Andric       }
73800eae32dcSDimitry Andric 
73810eae32dcSDimitry Andric       // Replace elements in Src with elements from InsertSrc
73820eae32dcSDimitry Andric       if (InsertTy.getSizeInBits() > EltSize) {
73830eae32dcSDimitry Andric         auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
73840eae32dcSDimitry Andric         for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
73850eae32dcSDimitry Andric              ++Idx, ++i) {
73860eae32dcSDimitry Andric           DstElts.push_back(UnmergeInsertSrc.getReg(i));
73870eae32dcSDimitry Andric         }
73880eae32dcSDimitry Andric       } else {
73890eae32dcSDimitry Andric         DstElts.push_back(InsertSrc);
73900eae32dcSDimitry Andric         ++Idx;
73910eae32dcSDimitry Andric       }
73920eae32dcSDimitry Andric 
73930eae32dcSDimitry Andric       // Remaining elements from Src after insert
73940eae32dcSDimitry Andric       for (; Idx < DstTy.getNumElements(); ++Idx) {
73950eae32dcSDimitry Andric         DstElts.push_back(UnmergeSrc.getReg(Idx));
73960eae32dcSDimitry Andric       }
73970eae32dcSDimitry Andric 
7398bdd1243dSDimitry Andric       MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
73990eae32dcSDimitry Andric       MI.eraseFromParent();
74000eae32dcSDimitry Andric       return Legalized;
74010eae32dcSDimitry Andric     }
74020eae32dcSDimitry Andric   }
74030eae32dcSDimitry Andric 
74045ffd83dbSDimitry Andric   if (InsertTy.isVector() ||
74055ffd83dbSDimitry Andric       (DstTy.isVector() && DstTy.getElementType() != InsertTy))
74065ffd83dbSDimitry Andric     return UnableToLegalize;
74075ffd83dbSDimitry Andric 
74085ffd83dbSDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
74095ffd83dbSDimitry Andric   if ((DstTy.isPointer() &&
74105ffd83dbSDimitry Andric        DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
74115ffd83dbSDimitry Andric       (InsertTy.isPointer() &&
74125ffd83dbSDimitry Andric        DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
74135ffd83dbSDimitry Andric     LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
74145ffd83dbSDimitry Andric     return UnableToLegalize;
74155ffd83dbSDimitry Andric   }
74165ffd83dbSDimitry Andric 
74178bcb0991SDimitry Andric   LLT IntDstTy = DstTy;
74185ffd83dbSDimitry Andric 
74198bcb0991SDimitry Andric   if (!DstTy.isScalar()) {
74208bcb0991SDimitry Andric     IntDstTy = LLT::scalar(DstTy.getSizeInBits());
74215ffd83dbSDimitry Andric     Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
74225ffd83dbSDimitry Andric   }
74235ffd83dbSDimitry Andric 
74245ffd83dbSDimitry Andric   if (!InsertTy.isScalar()) {
74255ffd83dbSDimitry Andric     const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
74265ffd83dbSDimitry Andric     InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
74278bcb0991SDimitry Andric   }
74288bcb0991SDimitry Andric 
74298bcb0991SDimitry Andric   Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
74308bcb0991SDimitry Andric   if (Offset != 0) {
74318bcb0991SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
74328bcb0991SDimitry Andric     ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
74338bcb0991SDimitry Andric   }
74348bcb0991SDimitry Andric 
74355ffd83dbSDimitry Andric   APInt MaskVal = APInt::getBitsSetWithWrap(
74365ffd83dbSDimitry Andric       DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
74378bcb0991SDimitry Andric 
74388bcb0991SDimitry Andric   auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
74398bcb0991SDimitry Andric   auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
74408bcb0991SDimitry Andric   auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
74418bcb0991SDimitry Andric 
74425ffd83dbSDimitry Andric   MIRBuilder.buildCast(Dst, Or);
74438bcb0991SDimitry Andric   MI.eraseFromParent();
74448bcb0991SDimitry Andric   return Legalized;
74458bcb0991SDimitry Andric }
74468bcb0991SDimitry Andric 
74478bcb0991SDimitry Andric LegalizerHelper::LegalizeResult
74488bcb0991SDimitry Andric LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
744906c3fb27SDimitry Andric   auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
745006c3fb27SDimitry Andric       MI.getFirst4RegLLTs();
74518bcb0991SDimitry Andric   const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
74528bcb0991SDimitry Andric 
745306c3fb27SDimitry Andric   LLT Ty = Dst0Ty;
745406c3fb27SDimitry Andric   LLT BoolTy = Dst1Ty;
74558bcb0991SDimitry Andric 
74568bcb0991SDimitry Andric   if (IsAdd)
74578bcb0991SDimitry Andric     MIRBuilder.buildAdd(Dst0, LHS, RHS);
74588bcb0991SDimitry Andric   else
74598bcb0991SDimitry Andric     MIRBuilder.buildSub(Dst0, LHS, RHS);
74608bcb0991SDimitry Andric 
74618bcb0991SDimitry Andric   // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
74628bcb0991SDimitry Andric 
74638bcb0991SDimitry Andric   auto Zero = MIRBuilder.buildConstant(Ty, 0);
74648bcb0991SDimitry Andric 
74658bcb0991SDimitry Andric   // For an addition, the result should be less than one of the operands (LHS)
74668bcb0991SDimitry Andric   // if and only if the other operand (RHS) is negative, otherwise there will
74678bcb0991SDimitry Andric   // be overflow.
74688bcb0991SDimitry Andric   // For a subtraction, the result should be less than one of the operands
74698bcb0991SDimitry Andric   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
74708bcb0991SDimitry Andric   // otherwise there will be overflow.
74718bcb0991SDimitry Andric   auto ResultLowerThanLHS =
74728bcb0991SDimitry Andric       MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
74738bcb0991SDimitry Andric   auto ConditionRHS = MIRBuilder.buildICmp(
74748bcb0991SDimitry Andric       IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
74758bcb0991SDimitry Andric 
74768bcb0991SDimitry Andric   MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
74778bcb0991SDimitry Andric   MI.eraseFromParent();
74788bcb0991SDimitry Andric   return Legalized;
74798bcb0991SDimitry Andric }
7480480093f4SDimitry Andric 
7481480093f4SDimitry Andric LegalizerHelper::LegalizeResult
7482e8d8bef9SDimitry Andric LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
748306c3fb27SDimitry Andric   auto [Res, LHS, RHS] = MI.getFirst3Regs();
7484e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(Res);
7485e8d8bef9SDimitry Andric   bool IsSigned;
7486e8d8bef9SDimitry Andric   bool IsAdd;
7487e8d8bef9SDimitry Andric   unsigned BaseOp;
7488e8d8bef9SDimitry Andric   switch (MI.getOpcode()) {
7489e8d8bef9SDimitry Andric   default:
7490e8d8bef9SDimitry Andric     llvm_unreachable("unexpected addsat/subsat opcode");
7491e8d8bef9SDimitry Andric   case TargetOpcode::G_UADDSAT:
7492e8d8bef9SDimitry Andric     IsSigned = false;
7493e8d8bef9SDimitry Andric     IsAdd = true;
7494e8d8bef9SDimitry Andric     BaseOp = TargetOpcode::G_ADD;
7495e8d8bef9SDimitry Andric     break;
7496e8d8bef9SDimitry Andric   case TargetOpcode::G_SADDSAT:
7497e8d8bef9SDimitry Andric     IsSigned = true;
7498e8d8bef9SDimitry Andric     IsAdd = true;
7499e8d8bef9SDimitry Andric     BaseOp = TargetOpcode::G_ADD;
7500e8d8bef9SDimitry Andric     break;
7501e8d8bef9SDimitry Andric   case TargetOpcode::G_USUBSAT:
7502e8d8bef9SDimitry Andric     IsSigned = false;
7503e8d8bef9SDimitry Andric     IsAdd = false;
7504e8d8bef9SDimitry Andric     BaseOp = TargetOpcode::G_SUB;
7505e8d8bef9SDimitry Andric     break;
7506e8d8bef9SDimitry Andric   case TargetOpcode::G_SSUBSAT:
7507e8d8bef9SDimitry Andric     IsSigned = true;
7508e8d8bef9SDimitry Andric     IsAdd = false;
7509e8d8bef9SDimitry Andric     BaseOp = TargetOpcode::G_SUB;
7510e8d8bef9SDimitry Andric     break;
7511e8d8bef9SDimitry Andric   }
7512e8d8bef9SDimitry Andric 
7513e8d8bef9SDimitry Andric   if (IsSigned) {
7514e8d8bef9SDimitry Andric     // sadd.sat(a, b) ->
7515e8d8bef9SDimitry Andric     //   hi = 0x7fffffff - smax(a, 0)
7516e8d8bef9SDimitry Andric     //   lo = 0x80000000 - smin(a, 0)
7517e8d8bef9SDimitry Andric     //   a + smin(smax(lo, b), hi)
7518e8d8bef9SDimitry Andric     // ssub.sat(a, b) ->
7519e8d8bef9SDimitry Andric     //   lo = smax(a, -1) - 0x7fffffff
7520e8d8bef9SDimitry Andric     //   hi = smin(a, -1) - 0x80000000
7521e8d8bef9SDimitry Andric     //   a - smin(smax(lo, b), hi)
7522e8d8bef9SDimitry Andric     // TODO: AMDGPU can use a "median of 3" instruction here:
7523e8d8bef9SDimitry Andric     //   a +/- med3(lo, b, hi)
7524e8d8bef9SDimitry Andric     uint64_t NumBits = Ty.getScalarSizeInBits();
7525e8d8bef9SDimitry Andric     auto MaxVal =
7526e8d8bef9SDimitry Andric         MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
7527e8d8bef9SDimitry Andric     auto MinVal =
7528e8d8bef9SDimitry Andric         MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
7529e8d8bef9SDimitry Andric     MachineInstrBuilder Hi, Lo;
7530e8d8bef9SDimitry Andric     if (IsAdd) {
7531e8d8bef9SDimitry Andric       auto Zero = MIRBuilder.buildConstant(Ty, 0);
7532e8d8bef9SDimitry Andric       Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
7533e8d8bef9SDimitry Andric       Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
7534e8d8bef9SDimitry Andric     } else {
7535e8d8bef9SDimitry Andric       auto NegOne = MIRBuilder.buildConstant(Ty, -1);
7536e8d8bef9SDimitry Andric       Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
7537e8d8bef9SDimitry Andric                                MaxVal);
7538e8d8bef9SDimitry Andric       Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
7539e8d8bef9SDimitry Andric                                MinVal);
7540e8d8bef9SDimitry Andric     }
7541e8d8bef9SDimitry Andric     auto RHSClamped =
7542e8d8bef9SDimitry Andric         MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
7543e8d8bef9SDimitry Andric     MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
7544e8d8bef9SDimitry Andric   } else {
7545e8d8bef9SDimitry Andric     // uadd.sat(a, b) -> a + umin(~a, b)
7546e8d8bef9SDimitry Andric     // usub.sat(a, b) -> a - umin(a, b)
7547e8d8bef9SDimitry Andric     Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
7548e8d8bef9SDimitry Andric     auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
7549e8d8bef9SDimitry Andric     MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
7550e8d8bef9SDimitry Andric   }
7551e8d8bef9SDimitry Andric 
7552e8d8bef9SDimitry Andric   MI.eraseFromParent();
7553e8d8bef9SDimitry Andric   return Legalized;
7554e8d8bef9SDimitry Andric }
7555e8d8bef9SDimitry Andric 
7556e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
7557e8d8bef9SDimitry Andric LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
755806c3fb27SDimitry Andric   auto [Res, LHS, RHS] = MI.getFirst3Regs();
7559e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(Res);
7560e8d8bef9SDimitry Andric   LLT BoolTy = Ty.changeElementSize(1);
7561e8d8bef9SDimitry Andric   bool IsSigned;
7562e8d8bef9SDimitry Andric   bool IsAdd;
7563e8d8bef9SDimitry Andric   unsigned OverflowOp;
7564e8d8bef9SDimitry Andric   switch (MI.getOpcode()) {
7565e8d8bef9SDimitry Andric   default:
7566e8d8bef9SDimitry Andric     llvm_unreachable("unexpected addsat/subsat opcode");
7567e8d8bef9SDimitry Andric   case TargetOpcode::G_UADDSAT:
7568e8d8bef9SDimitry Andric     IsSigned = false;
7569e8d8bef9SDimitry Andric     IsAdd = true;
7570e8d8bef9SDimitry Andric     OverflowOp = TargetOpcode::G_UADDO;
7571e8d8bef9SDimitry Andric     break;
7572e8d8bef9SDimitry Andric   case TargetOpcode::G_SADDSAT:
7573e8d8bef9SDimitry Andric     IsSigned = true;
7574e8d8bef9SDimitry Andric     IsAdd = true;
7575e8d8bef9SDimitry Andric     OverflowOp = TargetOpcode::G_SADDO;
7576e8d8bef9SDimitry Andric     break;
7577e8d8bef9SDimitry Andric   case TargetOpcode::G_USUBSAT:
7578e8d8bef9SDimitry Andric     IsSigned = false;
7579e8d8bef9SDimitry Andric     IsAdd = false;
7580e8d8bef9SDimitry Andric     OverflowOp = TargetOpcode::G_USUBO;
7581e8d8bef9SDimitry Andric     break;
7582e8d8bef9SDimitry Andric   case TargetOpcode::G_SSUBSAT:
7583e8d8bef9SDimitry Andric     IsSigned = true;
7584e8d8bef9SDimitry Andric     IsAdd = false;
7585e8d8bef9SDimitry Andric     OverflowOp = TargetOpcode::G_SSUBO;
7586e8d8bef9SDimitry Andric     break;
7587e8d8bef9SDimitry Andric   }
7588e8d8bef9SDimitry Andric 
7589e8d8bef9SDimitry Andric   auto OverflowRes =
7590e8d8bef9SDimitry Andric       MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
7591e8d8bef9SDimitry Andric   Register Tmp = OverflowRes.getReg(0);
7592e8d8bef9SDimitry Andric   Register Ov = OverflowRes.getReg(1);
7593e8d8bef9SDimitry Andric   MachineInstrBuilder Clamp;
7594e8d8bef9SDimitry Andric   if (IsSigned) {
7595e8d8bef9SDimitry Andric     // sadd.sat(a, b) ->
7596e8d8bef9SDimitry Andric     //   {tmp, ov} = saddo(a, b)
7597e8d8bef9SDimitry Andric     //   ov ? (tmp >>s 31) + 0x80000000 : r
7598e8d8bef9SDimitry Andric     // ssub.sat(a, b) ->
7599e8d8bef9SDimitry Andric     //   {tmp, ov} = ssubo(a, b)
7600e8d8bef9SDimitry Andric     //   ov ? (tmp >>s 31) + 0x80000000 : r
7601e8d8bef9SDimitry Andric     uint64_t NumBits = Ty.getScalarSizeInBits();
7602e8d8bef9SDimitry Andric     auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
7603e8d8bef9SDimitry Andric     auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
7604e8d8bef9SDimitry Andric     auto MinVal =
7605e8d8bef9SDimitry Andric         MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
7606e8d8bef9SDimitry Andric     Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
7607e8d8bef9SDimitry Andric   } else {
7608e8d8bef9SDimitry Andric     // uadd.sat(a, b) ->
7609e8d8bef9SDimitry Andric     //   {tmp, ov} = uaddo(a, b)
7610e8d8bef9SDimitry Andric     //   ov ? 0xffffffff : tmp
7611e8d8bef9SDimitry Andric     // usub.sat(a, b) ->
7612e8d8bef9SDimitry Andric     //   {tmp, ov} = usubo(a, b)
7613e8d8bef9SDimitry Andric     //   ov ? 0 : tmp
7614e8d8bef9SDimitry Andric     Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
7615e8d8bef9SDimitry Andric   }
7616e8d8bef9SDimitry Andric   MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
7617e8d8bef9SDimitry Andric 
7618e8d8bef9SDimitry Andric   MI.eraseFromParent();
7619e8d8bef9SDimitry Andric   return Legalized;
7620e8d8bef9SDimitry Andric }
7621e8d8bef9SDimitry Andric 
7622e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
7623e8d8bef9SDimitry Andric LegalizerHelper::lowerShlSat(MachineInstr &MI) {
7624e8d8bef9SDimitry Andric   assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
7625e8d8bef9SDimitry Andric           MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
7626e8d8bef9SDimitry Andric          "Expected shlsat opcode!");
7627e8d8bef9SDimitry Andric   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
762806c3fb27SDimitry Andric   auto [Res, LHS, RHS] = MI.getFirst3Regs();
7629e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(Res);
7630e8d8bef9SDimitry Andric   LLT BoolTy = Ty.changeElementSize(1);
7631e8d8bef9SDimitry Andric 
7632e8d8bef9SDimitry Andric   unsigned BW = Ty.getScalarSizeInBits();
7633e8d8bef9SDimitry Andric   auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
7634e8d8bef9SDimitry Andric   auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
7635e8d8bef9SDimitry Andric                        : MIRBuilder.buildLShr(Ty, Result, RHS);
7636e8d8bef9SDimitry Andric 
7637e8d8bef9SDimitry Andric   MachineInstrBuilder SatVal;
7638e8d8bef9SDimitry Andric   if (IsSigned) {
7639e8d8bef9SDimitry Andric     auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
7640e8d8bef9SDimitry Andric     auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
7641e8d8bef9SDimitry Andric     auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
7642e8d8bef9SDimitry Andric                                     MIRBuilder.buildConstant(Ty, 0));
7643e8d8bef9SDimitry Andric     SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
7644e8d8bef9SDimitry Andric   } else {
7645e8d8bef9SDimitry Andric     SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
7646e8d8bef9SDimitry Andric   }
7647e8d8bef9SDimitry Andric   auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
7648e8d8bef9SDimitry Andric   MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
7649e8d8bef9SDimitry Andric 
7650e8d8bef9SDimitry Andric   MI.eraseFromParent();
7651e8d8bef9SDimitry Andric   return Legalized;
7652e8d8bef9SDimitry Andric }
7653e8d8bef9SDimitry Andric 
765406c3fb27SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) {
765506c3fb27SDimitry Andric   auto [Dst, Src] = MI.getFirst2Regs();
7656480093f4SDimitry Andric   const LLT Ty = MRI.getType(Src);
76575ffd83dbSDimitry Andric   unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
7658480093f4SDimitry Andric   unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
7659480093f4SDimitry Andric 
7660480093f4SDimitry Andric   // Swap most and least significant byte, set remaining bytes in Res to zero.
7661480093f4SDimitry Andric   auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
7662480093f4SDimitry Andric   auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
7663480093f4SDimitry Andric   auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
7664480093f4SDimitry Andric   auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
7665480093f4SDimitry Andric 
7666480093f4SDimitry Andric   // Set i-th high/low byte in Res to i-th low/high byte from Src.
7667480093f4SDimitry Andric   for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
7668480093f4SDimitry Andric     // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
7669480093f4SDimitry Andric     APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
7670480093f4SDimitry Andric     auto Mask = MIRBuilder.buildConstant(Ty, APMask);
7671480093f4SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
7672480093f4SDimitry Andric     // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
7673480093f4SDimitry Andric     auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
7674480093f4SDimitry Andric     auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
7675480093f4SDimitry Andric     Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
7676480093f4SDimitry Andric     // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
7677480093f4SDimitry Andric     auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
7678480093f4SDimitry Andric     auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
7679480093f4SDimitry Andric     Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
7680480093f4SDimitry Andric   }
7681480093f4SDimitry Andric   Res.getInstr()->getOperand(0).setReg(Dst);
7682480093f4SDimitry Andric 
7683480093f4SDimitry Andric   MI.eraseFromParent();
7684480093f4SDimitry Andric   return Legalized;
7685480093f4SDimitry Andric }
7686480093f4SDimitry Andric 
7687480093f4SDimitry Andric //{ (Src & Mask) >> N } | { (Src << N) & Mask }
7688480093f4SDimitry Andric static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
7689480093f4SDimitry Andric                                  MachineInstrBuilder Src, APInt Mask) {
7690480093f4SDimitry Andric   const LLT Ty = Dst.getLLTTy(*B.getMRI());
7691480093f4SDimitry Andric   MachineInstrBuilder C_N = B.buildConstant(Ty, N);
7692480093f4SDimitry Andric   MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
7693480093f4SDimitry Andric   auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
7694480093f4SDimitry Andric   auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
7695480093f4SDimitry Andric   return B.buildOr(Dst, LHS, RHS);
7696480093f4SDimitry Andric }
7697480093f4SDimitry Andric 
7698480093f4SDimitry Andric LegalizerHelper::LegalizeResult
7699480093f4SDimitry Andric LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
770006c3fb27SDimitry Andric   auto [Dst, Src] = MI.getFirst2Regs();
7701480093f4SDimitry Andric   const LLT Ty = MRI.getType(Src);
7702480093f4SDimitry Andric   unsigned Size = Ty.getSizeInBits();
7703480093f4SDimitry Andric 
7704480093f4SDimitry Andric   MachineInstrBuilder BSWAP =
7705480093f4SDimitry Andric       MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
7706480093f4SDimitry Andric 
7707480093f4SDimitry Andric   // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
7708480093f4SDimitry Andric   //    [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
7709480093f4SDimitry Andric   // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
7710480093f4SDimitry Andric   MachineInstrBuilder Swap4 =
7711480093f4SDimitry Andric       SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
7712480093f4SDimitry Andric 
7713480093f4SDimitry Andric   // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
7714480093f4SDimitry Andric   //    [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
7715480093f4SDimitry Andric   // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
7716480093f4SDimitry Andric   MachineInstrBuilder Swap2 =
7717480093f4SDimitry Andric       SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
7718480093f4SDimitry Andric 
7719480093f4SDimitry Andric   // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
7720480093f4SDimitry Andric   //    [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
7721480093f4SDimitry Andric   // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
7722480093f4SDimitry Andric   SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
7723480093f4SDimitry Andric 
7724480093f4SDimitry Andric   MI.eraseFromParent();
7725480093f4SDimitry Andric   return Legalized;
7726480093f4SDimitry Andric }
7727480093f4SDimitry Andric 
7728480093f4SDimitry Andric LegalizerHelper::LegalizeResult
77295ffd83dbSDimitry Andric LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
7730480093f4SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
77315ffd83dbSDimitry Andric 
77325ffd83dbSDimitry Andric   bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
77335ffd83dbSDimitry Andric   int NameOpIdx = IsRead ? 1 : 0;
77345ffd83dbSDimitry Andric   int ValRegIndex = IsRead ? 0 : 1;
77355ffd83dbSDimitry Andric 
77365ffd83dbSDimitry Andric   Register ValReg = MI.getOperand(ValRegIndex).getReg();
77375ffd83dbSDimitry Andric   const LLT Ty = MRI.getType(ValReg);
77385ffd83dbSDimitry Andric   const MDString *RegStr = cast<MDString>(
77395ffd83dbSDimitry Andric     cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
77405ffd83dbSDimitry Andric 
7741e8d8bef9SDimitry Andric   Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
77425ffd83dbSDimitry Andric   if (!PhysReg.isValid())
7743480093f4SDimitry Andric     return UnableToLegalize;
7744480093f4SDimitry Andric 
77455ffd83dbSDimitry Andric   if (IsRead)
77465ffd83dbSDimitry Andric     MIRBuilder.buildCopy(ValReg, PhysReg);
77475ffd83dbSDimitry Andric   else
77485ffd83dbSDimitry Andric     MIRBuilder.buildCopy(PhysReg, ValReg);
77495ffd83dbSDimitry Andric 
7750480093f4SDimitry Andric   MI.eraseFromParent();
7751480093f4SDimitry Andric   return Legalized;
7752480093f4SDimitry Andric }
7753e8d8bef9SDimitry Andric 
7754e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
7755e8d8bef9SDimitry Andric LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
7756e8d8bef9SDimitry Andric   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
7757e8d8bef9SDimitry Andric   unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
7758e8d8bef9SDimitry Andric   Register Result = MI.getOperand(0).getReg();
7759e8d8bef9SDimitry Andric   LLT OrigTy = MRI.getType(Result);
7760e8d8bef9SDimitry Andric   auto SizeInBits = OrigTy.getScalarSizeInBits();
7761e8d8bef9SDimitry Andric   LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
7762e8d8bef9SDimitry Andric 
7763e8d8bef9SDimitry Andric   auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
7764e8d8bef9SDimitry Andric   auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
7765e8d8bef9SDimitry Andric   auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
7766e8d8bef9SDimitry Andric   unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
7767e8d8bef9SDimitry Andric 
7768e8d8bef9SDimitry Andric   auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
7769e8d8bef9SDimitry Andric   auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
7770e8d8bef9SDimitry Andric   MIRBuilder.buildTrunc(Result, Shifted);
7771e8d8bef9SDimitry Andric 
7772e8d8bef9SDimitry Andric   MI.eraseFromParent();
7773e8d8bef9SDimitry Andric   return Legalized;
7774e8d8bef9SDimitry Andric }
7775e8d8bef9SDimitry Andric 
7776bdd1243dSDimitry Andric LegalizerHelper::LegalizeResult
7777bdd1243dSDimitry Andric LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
777806c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
777906c3fb27SDimitry Andric   FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
7780bdd1243dSDimitry Andric 
778106c3fb27SDimitry Andric   if (Mask == fcNone) {
7782bdd1243dSDimitry Andric     MIRBuilder.buildConstant(DstReg, 0);
7783bdd1243dSDimitry Andric     MI.eraseFromParent();
7784bdd1243dSDimitry Andric     return Legalized;
7785bdd1243dSDimitry Andric   }
778606c3fb27SDimitry Andric   if (Mask == fcAllFlags) {
7787bdd1243dSDimitry Andric     MIRBuilder.buildConstant(DstReg, 1);
7788bdd1243dSDimitry Andric     MI.eraseFromParent();
7789bdd1243dSDimitry Andric     return Legalized;
7790bdd1243dSDimitry Andric   }
7791bdd1243dSDimitry Andric 
779206c3fb27SDimitry Andric   // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
779306c3fb27SDimitry Andric   // version
779406c3fb27SDimitry Andric 
7795bdd1243dSDimitry Andric   unsigned BitSize = SrcTy.getScalarSizeInBits();
7796bdd1243dSDimitry Andric   const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
7797bdd1243dSDimitry Andric 
7798bdd1243dSDimitry Andric   LLT IntTy = LLT::scalar(BitSize);
7799bdd1243dSDimitry Andric   if (SrcTy.isVector())
7800bdd1243dSDimitry Andric     IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
7801bdd1243dSDimitry Andric   auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
7802bdd1243dSDimitry Andric 
7803bdd1243dSDimitry Andric   // Various masks.
7804bdd1243dSDimitry Andric   APInt SignBit = APInt::getSignMask(BitSize);
7805bdd1243dSDimitry Andric   APInt ValueMask = APInt::getSignedMaxValue(BitSize);     // All bits but sign.
7806bdd1243dSDimitry Andric   APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
7807bdd1243dSDimitry Andric   APInt ExpMask = Inf;
7808bdd1243dSDimitry Andric   APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
7809bdd1243dSDimitry Andric   APInt QNaNBitMask =
7810bdd1243dSDimitry Andric       APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
781106c3fb27SDimitry Andric   APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
7812bdd1243dSDimitry Andric 
7813bdd1243dSDimitry Andric   auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
7814bdd1243dSDimitry Andric   auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
7815bdd1243dSDimitry Andric   auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
7816bdd1243dSDimitry Andric   auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
7817bdd1243dSDimitry Andric   auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
7818bdd1243dSDimitry Andric 
7819bdd1243dSDimitry Andric   auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
7820bdd1243dSDimitry Andric   auto Sign =
7821bdd1243dSDimitry Andric       MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
7822bdd1243dSDimitry Andric 
7823bdd1243dSDimitry Andric   auto Res = MIRBuilder.buildConstant(DstTy, 0);
782406c3fb27SDimitry Andric   // Clang doesn't support capture of structured bindings:
782506c3fb27SDimitry Andric   LLT DstTyCopy = DstTy;
7826bdd1243dSDimitry Andric   const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
782706c3fb27SDimitry Andric     Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
7828bdd1243dSDimitry Andric   };
7829bdd1243dSDimitry Andric 
7830bdd1243dSDimitry Andric   // Tests that involve more than one class should be processed first.
7831bdd1243dSDimitry Andric   if ((Mask & fcFinite) == fcFinite) {
7832bdd1243dSDimitry Andric     // finite(V) ==> abs(V) u< exp_mask
7833bdd1243dSDimitry Andric     appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
7834bdd1243dSDimitry Andric                                      ExpMaskC));
7835bdd1243dSDimitry Andric     Mask &= ~fcFinite;
7836bdd1243dSDimitry Andric   } else if ((Mask & fcFinite) == fcPosFinite) {
7837bdd1243dSDimitry Andric     // finite(V) && V > 0 ==> V u< exp_mask
7838bdd1243dSDimitry Andric     appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
7839bdd1243dSDimitry Andric                                      ExpMaskC));
7840bdd1243dSDimitry Andric     Mask &= ~fcPosFinite;
7841bdd1243dSDimitry Andric   } else if ((Mask & fcFinite) == fcNegFinite) {
7842bdd1243dSDimitry Andric     // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
7843bdd1243dSDimitry Andric     auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
7844bdd1243dSDimitry Andric                                     ExpMaskC);
7845bdd1243dSDimitry Andric     auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
7846bdd1243dSDimitry Andric     appendToRes(And);
7847bdd1243dSDimitry Andric     Mask &= ~fcNegFinite;
7848bdd1243dSDimitry Andric   }
7849bdd1243dSDimitry Andric 
785006c3fb27SDimitry Andric   if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
785106c3fb27SDimitry Andric     // fcZero | fcSubnormal => test all exponent bits are 0
785206c3fb27SDimitry Andric     // TODO: Handle sign bit specific cases
785306c3fb27SDimitry Andric     // TODO: Handle inverted case
785406c3fb27SDimitry Andric     if (PartialCheck == (fcZero | fcSubnormal)) {
785506c3fb27SDimitry Andric       auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
785606c3fb27SDimitry Andric       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
785706c3fb27SDimitry Andric                                        ExpBits, ZeroC));
785806c3fb27SDimitry Andric       Mask &= ~PartialCheck;
785906c3fb27SDimitry Andric     }
786006c3fb27SDimitry Andric   }
786106c3fb27SDimitry Andric 
7862bdd1243dSDimitry Andric   // Check for individual classes.
786306c3fb27SDimitry Andric   if (FPClassTest PartialCheck = Mask & fcZero) {
7864bdd1243dSDimitry Andric     if (PartialCheck == fcPosZero)
7865bdd1243dSDimitry Andric       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
7866bdd1243dSDimitry Andric                                        AsInt, ZeroC));
7867bdd1243dSDimitry Andric     else if (PartialCheck == fcZero)
7868bdd1243dSDimitry Andric       appendToRes(
7869bdd1243dSDimitry Andric           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
7870bdd1243dSDimitry Andric     else // fcNegZero
7871bdd1243dSDimitry Andric       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
7872bdd1243dSDimitry Andric                                        AsInt, SignBitC));
7873bdd1243dSDimitry Andric   }
7874bdd1243dSDimitry Andric 
787506c3fb27SDimitry Andric   if (FPClassTest PartialCheck = Mask & fcSubnormal) {
787606c3fb27SDimitry Andric     // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
787706c3fb27SDimitry Andric     // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
787806c3fb27SDimitry Andric     auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
787906c3fb27SDimitry Andric     auto OneC = MIRBuilder.buildConstant(IntTy, 1);
788006c3fb27SDimitry Andric     auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
788106c3fb27SDimitry Andric     auto SubnormalRes =
788206c3fb27SDimitry Andric         MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
788306c3fb27SDimitry Andric                              MIRBuilder.buildConstant(IntTy, AllOneMantissa));
788406c3fb27SDimitry Andric     if (PartialCheck == fcNegSubnormal)
788506c3fb27SDimitry Andric       SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
788606c3fb27SDimitry Andric     appendToRes(SubnormalRes);
788706c3fb27SDimitry Andric   }
788806c3fb27SDimitry Andric 
788906c3fb27SDimitry Andric   if (FPClassTest PartialCheck = Mask & fcInf) {
7890bdd1243dSDimitry Andric     if (PartialCheck == fcPosInf)
7891bdd1243dSDimitry Andric       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
7892bdd1243dSDimitry Andric                                        AsInt, InfC));
7893bdd1243dSDimitry Andric     else if (PartialCheck == fcInf)
7894bdd1243dSDimitry Andric       appendToRes(
7895bdd1243dSDimitry Andric           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
7896bdd1243dSDimitry Andric     else { // fcNegInf
7897bdd1243dSDimitry Andric       APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
7898bdd1243dSDimitry Andric       auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
7899bdd1243dSDimitry Andric       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
7900bdd1243dSDimitry Andric                                        AsInt, NegInfC));
7901bdd1243dSDimitry Andric     }
7902bdd1243dSDimitry Andric   }
7903bdd1243dSDimitry Andric 
790406c3fb27SDimitry Andric   if (FPClassTest PartialCheck = Mask & fcNan) {
7905bdd1243dSDimitry Andric     auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
7906bdd1243dSDimitry Andric     if (PartialCheck == fcNan) {
7907bdd1243dSDimitry Andric       // isnan(V) ==> abs(V) u> int(inf)
7908bdd1243dSDimitry Andric       appendToRes(
7909bdd1243dSDimitry Andric           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
7910bdd1243dSDimitry Andric     } else if (PartialCheck == fcQNan) {
7911bdd1243dSDimitry Andric       // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
7912bdd1243dSDimitry Andric       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
7913bdd1243dSDimitry Andric                                        InfWithQnanBitC));
7914bdd1243dSDimitry Andric     } else { // fcSNan
7915bdd1243dSDimitry Andric       // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
7916bdd1243dSDimitry Andric       //                    abs(V) u< (unsigned(Inf) | quiet_bit)
7917bdd1243dSDimitry Andric       auto IsNan =
7918bdd1243dSDimitry Andric           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
7919bdd1243dSDimitry Andric       auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
7920bdd1243dSDimitry Andric                                             Abs, InfWithQnanBitC);
7921bdd1243dSDimitry Andric       appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
7922bdd1243dSDimitry Andric     }
7923bdd1243dSDimitry Andric   }
7924bdd1243dSDimitry Andric 
792506c3fb27SDimitry Andric   if (FPClassTest PartialCheck = Mask & fcNormal) {
7926bdd1243dSDimitry Andric     // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
7927bdd1243dSDimitry Andric     // (max_exp-1))
7928bdd1243dSDimitry Andric     APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
7929bdd1243dSDimitry Andric     auto ExpMinusOne = MIRBuilder.buildSub(
7930bdd1243dSDimitry Andric         IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
7931bdd1243dSDimitry Andric     APInt MaxExpMinusOne = ExpMask - ExpLSB;
7932bdd1243dSDimitry Andric     auto NormalRes =
7933bdd1243dSDimitry Andric         MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
7934bdd1243dSDimitry Andric                              MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
7935bdd1243dSDimitry Andric     if (PartialCheck == fcNegNormal)
7936bdd1243dSDimitry Andric       NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
7937bdd1243dSDimitry Andric     else if (PartialCheck == fcPosNormal) {
7938bdd1243dSDimitry Andric       auto PosSign = MIRBuilder.buildXor(
7939bdd1243dSDimitry Andric           DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
7940bdd1243dSDimitry Andric       NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
7941bdd1243dSDimitry Andric     }
7942bdd1243dSDimitry Andric     appendToRes(NormalRes);
7943bdd1243dSDimitry Andric   }
7944bdd1243dSDimitry Andric 
7945bdd1243dSDimitry Andric   MIRBuilder.buildCopy(DstReg, Res);
7946bdd1243dSDimitry Andric   MI.eraseFromParent();
7947bdd1243dSDimitry Andric   return Legalized;
7948bdd1243dSDimitry Andric }
7949bdd1243dSDimitry Andric 
7950e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
7951e8d8bef9SDimitry Andric   // Implement vector G_SELECT in terms of XOR, AND, OR.
795206c3fb27SDimitry Andric   auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
795306c3fb27SDimitry Andric       MI.getFirst4RegLLTs();
7954e8d8bef9SDimitry Andric   if (!DstTy.isVector())
7955e8d8bef9SDimitry Andric     return UnableToLegalize;
7956e8d8bef9SDimitry Andric 
7957bdd1243dSDimitry Andric   bool IsEltPtr = DstTy.getElementType().isPointer();
7958bdd1243dSDimitry Andric   if (IsEltPtr) {
7959bdd1243dSDimitry Andric     LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
7960bdd1243dSDimitry Andric     LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
7961bdd1243dSDimitry Andric     Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
7962bdd1243dSDimitry Andric     Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
7963bdd1243dSDimitry Andric     DstTy = NewTy;
7964bdd1243dSDimitry Andric   }
7965bdd1243dSDimitry Andric 
7966e8d8bef9SDimitry Andric   if (MaskTy.isScalar()) {
796781ad6265SDimitry Andric     // Turn the scalar condition into a vector condition mask.
796881ad6265SDimitry Andric 
7969e8d8bef9SDimitry Andric     Register MaskElt = MaskReg;
797081ad6265SDimitry Andric 
797181ad6265SDimitry Andric     // The condition was potentially zero extended before, but we want a sign
797281ad6265SDimitry Andric     // extended boolean.
7973bdd1243dSDimitry Andric     if (MaskTy != LLT::scalar(1))
797481ad6265SDimitry Andric       MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
7975e8d8bef9SDimitry Andric 
797681ad6265SDimitry Andric     // Continue the sign extension (or truncate) to match the data type.
797781ad6265SDimitry Andric     MaskElt = MIRBuilder.buildSExtOrTrunc(DstTy.getElementType(),
797881ad6265SDimitry Andric                                           MaskElt).getReg(0);
797981ad6265SDimitry Andric 
798081ad6265SDimitry Andric     // Generate a vector splat idiom.
798181ad6265SDimitry Andric     auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
798281ad6265SDimitry Andric     MaskReg = ShufSplat.getReg(0);
798381ad6265SDimitry Andric     MaskTy = DstTy;
798481ad6265SDimitry Andric   }
798581ad6265SDimitry Andric 
798681ad6265SDimitry Andric   if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
7987e8d8bef9SDimitry Andric     return UnableToLegalize;
7988e8d8bef9SDimitry Andric   }
7989e8d8bef9SDimitry Andric 
7990e8d8bef9SDimitry Andric   auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
7991e8d8bef9SDimitry Andric   auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
7992e8d8bef9SDimitry Andric   auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
7993bdd1243dSDimitry Andric   if (IsEltPtr) {
7994bdd1243dSDimitry Andric     auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
7995bdd1243dSDimitry Andric     MIRBuilder.buildIntToPtr(DstReg, Or);
7996bdd1243dSDimitry Andric   } else {
7997e8d8bef9SDimitry Andric     MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
7998bdd1243dSDimitry Andric   }
7999e8d8bef9SDimitry Andric   MI.eraseFromParent();
8000e8d8bef9SDimitry Andric   return Legalized;
8001e8d8bef9SDimitry Andric }
8002fe6060f1SDimitry Andric 
8003fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) {
8004fe6060f1SDimitry Andric   // Split DIVREM into individual instructions.
8005fe6060f1SDimitry Andric   unsigned Opcode = MI.getOpcode();
8006fe6060f1SDimitry Andric 
8007fe6060f1SDimitry Andric   MIRBuilder.buildInstr(
8008fe6060f1SDimitry Andric       Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
8009fe6060f1SDimitry Andric                                         : TargetOpcode::G_UDIV,
8010fe6060f1SDimitry Andric       {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
8011fe6060f1SDimitry Andric   MIRBuilder.buildInstr(
8012fe6060f1SDimitry Andric       Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
8013fe6060f1SDimitry Andric                                         : TargetOpcode::G_UREM,
8014fe6060f1SDimitry Andric       {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
8015fe6060f1SDimitry Andric   MI.eraseFromParent();
8016fe6060f1SDimitry Andric   return Legalized;
8017fe6060f1SDimitry Andric }
8018fe6060f1SDimitry Andric 
8019fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
8020fe6060f1SDimitry Andric LegalizerHelper::lowerAbsToAddXor(MachineInstr &MI) {
8021fe6060f1SDimitry Andric   // Expand %res = G_ABS %a into:
8022fe6060f1SDimitry Andric   // %v1 = G_ASHR %a, scalar_size-1
8023fe6060f1SDimitry Andric   // %v2 = G_ADD %a, %v1
8024fe6060f1SDimitry Andric   // %res = G_XOR %v2, %v1
8025fe6060f1SDimitry Andric   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
8026fe6060f1SDimitry Andric   Register OpReg = MI.getOperand(1).getReg();
8027fe6060f1SDimitry Andric   auto ShiftAmt =
8028fe6060f1SDimitry Andric       MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
8029fe6060f1SDimitry Andric   auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
8030fe6060f1SDimitry Andric   auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
8031fe6060f1SDimitry Andric   MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
8032fe6060f1SDimitry Andric   MI.eraseFromParent();
8033fe6060f1SDimitry Andric   return Legalized;
8034fe6060f1SDimitry Andric }
8035fe6060f1SDimitry Andric 
8036fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
8037fe6060f1SDimitry Andric LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
8038fe6060f1SDimitry Andric   // Expand %res = G_ABS %a into:
8039fe6060f1SDimitry Andric   // %v1 = G_CONSTANT 0
8040fe6060f1SDimitry Andric   // %v2 = G_SUB %v1, %a
8041fe6060f1SDimitry Andric   // %res = G_SMAX %a, %v2
8042fe6060f1SDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
8043fe6060f1SDimitry Andric   LLT Ty = MRI.getType(SrcReg);
8044fe6060f1SDimitry Andric   auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
8045fe6060f1SDimitry Andric   auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
8046fe6060f1SDimitry Andric   MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
8047fe6060f1SDimitry Andric   MI.eraseFromParent();
8048fe6060f1SDimitry Andric   return Legalized;
8049fe6060f1SDimitry Andric }
8050349cc55cSDimitry Andric 
8051349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8052349cc55cSDimitry Andric LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
8053349cc55cSDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
8054349cc55cSDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
8055349cc55cSDimitry Andric   LLT DstTy = MRI.getType(SrcReg);
8056349cc55cSDimitry Andric 
8057349cc55cSDimitry Andric   // The source could be a scalar if the IR type was <1 x sN>.
8058349cc55cSDimitry Andric   if (SrcTy.isScalar()) {
8059349cc55cSDimitry Andric     if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
8060349cc55cSDimitry Andric       return UnableToLegalize; // FIXME: handle extension.
8061349cc55cSDimitry Andric     // This can be just a plain copy.
8062349cc55cSDimitry Andric     Observer.changingInstr(MI);
8063349cc55cSDimitry Andric     MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
8064349cc55cSDimitry Andric     Observer.changedInstr(MI);
8065349cc55cSDimitry Andric     return Legalized;
8066349cc55cSDimitry Andric   }
806706c3fb27SDimitry Andric   return UnableToLegalize;
8068349cc55cSDimitry Andric }
8069349cc55cSDimitry Andric 
80705f757f3fSDimitry Andric static Type *getTypeForLLT(LLT Ty, LLVMContext &C);
80715f757f3fSDimitry Andric 
80725f757f3fSDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerVAArg(MachineInstr &MI) {
80735f757f3fSDimitry Andric   MachineFunction &MF = *MI.getMF();
80745f757f3fSDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
80755f757f3fSDimitry Andric   LLVMContext &Ctx = MF.getFunction().getContext();
80765f757f3fSDimitry Andric   Register ListPtr = MI.getOperand(1).getReg();
80775f757f3fSDimitry Andric   LLT PtrTy = MRI.getType(ListPtr);
80785f757f3fSDimitry Andric 
80795f757f3fSDimitry Andric   // LstPtr is a pointer to the head of the list. Get the address
80805f757f3fSDimitry Andric   // of the head of the list.
80815f757f3fSDimitry Andric   Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
80825f757f3fSDimitry Andric   MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
80835f757f3fSDimitry Andric       MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
80845f757f3fSDimitry Andric   auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
80855f757f3fSDimitry Andric 
80865f757f3fSDimitry Andric   const Align A(MI.getOperand(2).getImm());
80875f757f3fSDimitry Andric   LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
80885f757f3fSDimitry Andric   if (A > TLI.getMinStackArgumentAlignment()) {
80895f757f3fSDimitry Andric     Register AlignAmt =
80905f757f3fSDimitry Andric         MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
80915f757f3fSDimitry Andric     auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
80925f757f3fSDimitry Andric     auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
80935f757f3fSDimitry Andric     VAList = AndDst.getReg(0);
80945f757f3fSDimitry Andric   }
80955f757f3fSDimitry Andric 
80965f757f3fSDimitry Andric   // Increment the pointer, VAList, to the next vaarg
80975f757f3fSDimitry Andric   // The list should be bumped by the size of element in the current head of
80985f757f3fSDimitry Andric   // list.
80995f757f3fSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
81005f757f3fSDimitry Andric   LLT LLTTy = MRI.getType(Dst);
81015f757f3fSDimitry Andric   Type *Ty = getTypeForLLT(LLTTy, Ctx);
81025f757f3fSDimitry Andric   auto IncAmt =
81035f757f3fSDimitry Andric       MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
81045f757f3fSDimitry Andric   auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
81055f757f3fSDimitry Andric 
81065f757f3fSDimitry Andric   // Store the increment VAList to the legalized pointer
81075f757f3fSDimitry Andric   MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
81085f757f3fSDimitry Andric       MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
81095f757f3fSDimitry Andric   MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
81105f757f3fSDimitry Andric   // Load the actual argument out of the pointer VAList
81115f757f3fSDimitry Andric   Align EltAlignment = DL.getABITypeAlign(Ty);
81125f757f3fSDimitry Andric   MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
81135f757f3fSDimitry Andric       MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
81145f757f3fSDimitry Andric   MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
81155f757f3fSDimitry Andric 
81165f757f3fSDimitry Andric   MI.eraseFromParent();
81175f757f3fSDimitry Andric   return Legalized;
81185f757f3fSDimitry Andric }
81195f757f3fSDimitry Andric 
8120349cc55cSDimitry Andric static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
8121349cc55cSDimitry Andric   // On Darwin, -Os means optimize for size without hurting performance, so
8122349cc55cSDimitry Andric   // only really optimize for size when -Oz (MinSize) is used.
8123349cc55cSDimitry Andric   if (MF.getTarget().getTargetTriple().isOSDarwin())
8124349cc55cSDimitry Andric     return MF.getFunction().hasMinSize();
8125349cc55cSDimitry Andric   return MF.getFunction().hasOptSize();
8126349cc55cSDimitry Andric }
8127349cc55cSDimitry Andric 
8128349cc55cSDimitry Andric // Returns a list of types to use for memory op lowering in MemOps. A partial
8129349cc55cSDimitry Andric // port of findOptimalMemOpLowering in TargetLowering.
8130349cc55cSDimitry Andric static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
8131349cc55cSDimitry Andric                                           unsigned Limit, const MemOp &Op,
8132349cc55cSDimitry Andric                                           unsigned DstAS, unsigned SrcAS,
8133349cc55cSDimitry Andric                                           const AttributeList &FuncAttributes,
8134349cc55cSDimitry Andric                                           const TargetLowering &TLI) {
8135349cc55cSDimitry Andric   if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
8136349cc55cSDimitry Andric     return false;
8137349cc55cSDimitry Andric 
8138349cc55cSDimitry Andric   LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
8139349cc55cSDimitry Andric 
8140349cc55cSDimitry Andric   if (Ty == LLT()) {
8141349cc55cSDimitry Andric     // Use the largest scalar type whose alignment constraints are satisfied.
8142349cc55cSDimitry Andric     // We only need to check DstAlign here as SrcAlign is always greater or
8143349cc55cSDimitry Andric     // equal to DstAlign (or zero).
8144349cc55cSDimitry Andric     Ty = LLT::scalar(64);
8145349cc55cSDimitry Andric     if (Op.isFixedDstAlign())
8146349cc55cSDimitry Andric       while (Op.getDstAlign() < Ty.getSizeInBytes() &&
8147349cc55cSDimitry Andric              !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
8148349cc55cSDimitry Andric         Ty = LLT::scalar(Ty.getSizeInBytes());
8149349cc55cSDimitry Andric     assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
8150349cc55cSDimitry Andric     // FIXME: check for the largest legal type we can load/store to.
8151349cc55cSDimitry Andric   }
8152349cc55cSDimitry Andric 
8153349cc55cSDimitry Andric   unsigned NumMemOps = 0;
8154349cc55cSDimitry Andric   uint64_t Size = Op.size();
8155349cc55cSDimitry Andric   while (Size) {
8156349cc55cSDimitry Andric     unsigned TySize = Ty.getSizeInBytes();
8157349cc55cSDimitry Andric     while (TySize > Size) {
8158349cc55cSDimitry Andric       // For now, only use non-vector load / store's for the left-over pieces.
8159349cc55cSDimitry Andric       LLT NewTy = Ty;
8160349cc55cSDimitry Andric       // FIXME: check for mem op safety and legality of the types. Not all of
8161349cc55cSDimitry Andric       // SDAGisms map cleanly to GISel concepts.
8162349cc55cSDimitry Andric       if (NewTy.isVector())
8163349cc55cSDimitry Andric         NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
816406c3fb27SDimitry Andric       NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
8165349cc55cSDimitry Andric       unsigned NewTySize = NewTy.getSizeInBytes();
8166349cc55cSDimitry Andric       assert(NewTySize > 0 && "Could not find appropriate type");
8167349cc55cSDimitry Andric 
8168349cc55cSDimitry Andric       // If the new LLT cannot cover all of the remaining bits, then consider
8169349cc55cSDimitry Andric       // issuing a (or a pair of) unaligned and overlapping load / store.
8170bdd1243dSDimitry Andric       unsigned Fast;
8171349cc55cSDimitry Andric       // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
8172349cc55cSDimitry Andric       MVT VT = getMVTForLLT(Ty);
8173349cc55cSDimitry Andric       if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
8174349cc55cSDimitry Andric           TLI.allowsMisalignedMemoryAccesses(
8175349cc55cSDimitry Andric               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
8176349cc55cSDimitry Andric               MachineMemOperand::MONone, &Fast) &&
8177349cc55cSDimitry Andric           Fast)
8178349cc55cSDimitry Andric         TySize = Size;
8179349cc55cSDimitry Andric       else {
8180349cc55cSDimitry Andric         Ty = NewTy;
8181349cc55cSDimitry Andric         TySize = NewTySize;
8182349cc55cSDimitry Andric       }
8183349cc55cSDimitry Andric     }
8184349cc55cSDimitry Andric 
8185349cc55cSDimitry Andric     if (++NumMemOps > Limit)
8186349cc55cSDimitry Andric       return false;
8187349cc55cSDimitry Andric 
8188349cc55cSDimitry Andric     MemOps.push_back(Ty);
8189349cc55cSDimitry Andric     Size -= TySize;
8190349cc55cSDimitry Andric   }
8191349cc55cSDimitry Andric 
8192349cc55cSDimitry Andric   return true;
8193349cc55cSDimitry Andric }
8194349cc55cSDimitry Andric 
8195349cc55cSDimitry Andric static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
8196349cc55cSDimitry Andric   if (Ty.isVector())
8197349cc55cSDimitry Andric     return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
8198349cc55cSDimitry Andric                                 Ty.getNumElements());
8199349cc55cSDimitry Andric   return IntegerType::get(C, Ty.getSizeInBits());
8200349cc55cSDimitry Andric }
8201349cc55cSDimitry Andric 
8202349cc55cSDimitry Andric // Get a vectorized representation of the memset value operand, GISel edition.
8203349cc55cSDimitry Andric static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
8204349cc55cSDimitry Andric   MachineRegisterInfo &MRI = *MIB.getMRI();
8205349cc55cSDimitry Andric   unsigned NumBits = Ty.getScalarSizeInBits();
8206349cc55cSDimitry Andric   auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8207349cc55cSDimitry Andric   if (!Ty.isVector() && ValVRegAndVal) {
820881ad6265SDimitry Andric     APInt Scalar = ValVRegAndVal->Value.trunc(8);
8209349cc55cSDimitry Andric     APInt SplatVal = APInt::getSplat(NumBits, Scalar);
8210349cc55cSDimitry Andric     return MIB.buildConstant(Ty, SplatVal).getReg(0);
8211349cc55cSDimitry Andric   }
8212349cc55cSDimitry Andric 
8213349cc55cSDimitry Andric   // Extend the byte value to the larger type, and then multiply by a magic
8214349cc55cSDimitry Andric   // value 0x010101... in order to replicate it across every byte.
8215349cc55cSDimitry Andric   // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
8216349cc55cSDimitry Andric   if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
8217349cc55cSDimitry Andric     return MIB.buildConstant(Ty, 0).getReg(0);
8218349cc55cSDimitry Andric   }
8219349cc55cSDimitry Andric 
8220349cc55cSDimitry Andric   LLT ExtType = Ty.getScalarType();
8221349cc55cSDimitry Andric   auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
8222349cc55cSDimitry Andric   if (NumBits > 8) {
8223349cc55cSDimitry Andric     APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
8224349cc55cSDimitry Andric     auto MagicMI = MIB.buildConstant(ExtType, Magic);
8225349cc55cSDimitry Andric     Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
8226349cc55cSDimitry Andric   }
8227349cc55cSDimitry Andric 
8228349cc55cSDimitry Andric   // For vector types create a G_BUILD_VECTOR.
8229349cc55cSDimitry Andric   if (Ty.isVector())
8230349cc55cSDimitry Andric     Val = MIB.buildSplatVector(Ty, Val).getReg(0);
8231349cc55cSDimitry Andric 
8232349cc55cSDimitry Andric   return Val;
8233349cc55cSDimitry Andric }
8234349cc55cSDimitry Andric 
8235349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8236349cc55cSDimitry Andric LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
8237349cc55cSDimitry Andric                              uint64_t KnownLen, Align Alignment,
8238349cc55cSDimitry Andric                              bool IsVolatile) {
8239349cc55cSDimitry Andric   auto &MF = *MI.getParent()->getParent();
8240349cc55cSDimitry Andric   const auto &TLI = *MF.getSubtarget().getTargetLowering();
8241349cc55cSDimitry Andric   auto &DL = MF.getDataLayout();
8242349cc55cSDimitry Andric   LLVMContext &C = MF.getFunction().getContext();
8243349cc55cSDimitry Andric 
8244349cc55cSDimitry Andric   assert(KnownLen != 0 && "Have a zero length memset length!");
8245349cc55cSDimitry Andric 
8246349cc55cSDimitry Andric   bool DstAlignCanChange = false;
8247349cc55cSDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
8248349cc55cSDimitry Andric   bool OptSize = shouldLowerMemFuncForSize(MF);
8249349cc55cSDimitry Andric 
8250349cc55cSDimitry Andric   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8251349cc55cSDimitry Andric   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8252349cc55cSDimitry Andric     DstAlignCanChange = true;
8253349cc55cSDimitry Andric 
8254349cc55cSDimitry Andric   unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
8255349cc55cSDimitry Andric   std::vector<LLT> MemOps;
8256349cc55cSDimitry Andric 
8257349cc55cSDimitry Andric   const auto &DstMMO = **MI.memoperands_begin();
8258349cc55cSDimitry Andric   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8259349cc55cSDimitry Andric 
8260349cc55cSDimitry Andric   auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8261349cc55cSDimitry Andric   bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
8262349cc55cSDimitry Andric 
8263349cc55cSDimitry Andric   if (!findGISelOptimalMemOpLowering(MemOps, Limit,
8264349cc55cSDimitry Andric                                      MemOp::Set(KnownLen, DstAlignCanChange,
8265349cc55cSDimitry Andric                                                 Alignment,
8266349cc55cSDimitry Andric                                                 /*IsZeroMemset=*/IsZeroVal,
8267349cc55cSDimitry Andric                                                 /*IsVolatile=*/IsVolatile),
8268349cc55cSDimitry Andric                                      DstPtrInfo.getAddrSpace(), ~0u,
8269349cc55cSDimitry Andric                                      MF.getFunction().getAttributes(), TLI))
8270349cc55cSDimitry Andric     return UnableToLegalize;
8271349cc55cSDimitry Andric 
8272349cc55cSDimitry Andric   if (DstAlignCanChange) {
8273349cc55cSDimitry Andric     // Get an estimate of the type from the LLT.
8274349cc55cSDimitry Andric     Type *IRTy = getTypeForLLT(MemOps[0], C);
8275349cc55cSDimitry Andric     Align NewAlign = DL.getABITypeAlign(IRTy);
8276349cc55cSDimitry Andric     if (NewAlign > Alignment) {
8277349cc55cSDimitry Andric       Alignment = NewAlign;
8278349cc55cSDimitry Andric       unsigned FI = FIDef->getOperand(1).getIndex();
8279349cc55cSDimitry Andric       // Give the stack frame object a larger alignment if needed.
8280349cc55cSDimitry Andric       if (MFI.getObjectAlign(FI) < Alignment)
8281349cc55cSDimitry Andric         MFI.setObjectAlignment(FI, Alignment);
8282349cc55cSDimitry Andric     }
8283349cc55cSDimitry Andric   }
8284349cc55cSDimitry Andric 
8285349cc55cSDimitry Andric   MachineIRBuilder MIB(MI);
8286349cc55cSDimitry Andric   // Find the largest store and generate the bit pattern for it.
8287349cc55cSDimitry Andric   LLT LargestTy = MemOps[0];
8288349cc55cSDimitry Andric   for (unsigned i = 1; i < MemOps.size(); i++)
8289349cc55cSDimitry Andric     if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
8290349cc55cSDimitry Andric       LargestTy = MemOps[i];
8291349cc55cSDimitry Andric 
8292349cc55cSDimitry Andric   // The memset stored value is always defined as an s8, so in order to make it
8293349cc55cSDimitry Andric   // work with larger store types we need to repeat the bit pattern across the
8294349cc55cSDimitry Andric   // wider type.
8295349cc55cSDimitry Andric   Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
8296349cc55cSDimitry Andric 
8297349cc55cSDimitry Andric   if (!MemSetValue)
8298349cc55cSDimitry Andric     return UnableToLegalize;
8299349cc55cSDimitry Andric 
8300349cc55cSDimitry Andric   // Generate the stores. For each store type in the list, we generate the
8301349cc55cSDimitry Andric   // matching store of that type to the destination address.
8302349cc55cSDimitry Andric   LLT PtrTy = MRI.getType(Dst);
8303349cc55cSDimitry Andric   unsigned DstOff = 0;
8304349cc55cSDimitry Andric   unsigned Size = KnownLen;
8305349cc55cSDimitry Andric   for (unsigned I = 0; I < MemOps.size(); I++) {
8306349cc55cSDimitry Andric     LLT Ty = MemOps[I];
8307349cc55cSDimitry Andric     unsigned TySize = Ty.getSizeInBytes();
8308349cc55cSDimitry Andric     if (TySize > Size) {
8309349cc55cSDimitry Andric       // Issuing an unaligned load / store pair that overlaps with the previous
8310349cc55cSDimitry Andric       // pair. Adjust the offset accordingly.
8311349cc55cSDimitry Andric       assert(I == MemOps.size() - 1 && I != 0);
8312349cc55cSDimitry Andric       DstOff -= TySize - Size;
8313349cc55cSDimitry Andric     }
8314349cc55cSDimitry Andric 
8315349cc55cSDimitry Andric     // If this store is smaller than the largest store see whether we can get
8316349cc55cSDimitry Andric     // the smaller value for free with a truncate.
8317349cc55cSDimitry Andric     Register Value = MemSetValue;
8318349cc55cSDimitry Andric     if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
8319349cc55cSDimitry Andric       MVT VT = getMVTForLLT(Ty);
8320349cc55cSDimitry Andric       MVT LargestVT = getMVTForLLT(LargestTy);
8321349cc55cSDimitry Andric       if (!LargestTy.isVector() && !Ty.isVector() &&
8322349cc55cSDimitry Andric           TLI.isTruncateFree(LargestVT, VT))
8323349cc55cSDimitry Andric         Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
8324349cc55cSDimitry Andric       else
8325349cc55cSDimitry Andric         Value = getMemsetValue(Val, Ty, MIB);
8326349cc55cSDimitry Andric       if (!Value)
8327349cc55cSDimitry Andric         return UnableToLegalize;
8328349cc55cSDimitry Andric     }
8329349cc55cSDimitry Andric 
8330349cc55cSDimitry Andric     auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
8331349cc55cSDimitry Andric 
8332349cc55cSDimitry Andric     Register Ptr = Dst;
8333349cc55cSDimitry Andric     if (DstOff != 0) {
8334349cc55cSDimitry Andric       auto Offset =
8335349cc55cSDimitry Andric           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
8336349cc55cSDimitry Andric       Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
8337349cc55cSDimitry Andric     }
8338349cc55cSDimitry Andric 
8339349cc55cSDimitry Andric     MIB.buildStore(Value, Ptr, *StoreMMO);
8340349cc55cSDimitry Andric     DstOff += Ty.getSizeInBytes();
8341349cc55cSDimitry Andric     Size -= TySize;
8342349cc55cSDimitry Andric   }
8343349cc55cSDimitry Andric 
8344349cc55cSDimitry Andric   MI.eraseFromParent();
8345349cc55cSDimitry Andric   return Legalized;
8346349cc55cSDimitry Andric }
8347349cc55cSDimitry Andric 
8348349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8349349cc55cSDimitry Andric LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
8350349cc55cSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8351349cc55cSDimitry Andric 
835206c3fb27SDimitry Andric   auto [Dst, Src, Len] = MI.getFirst3Regs();
8353349cc55cSDimitry Andric 
8354349cc55cSDimitry Andric   const auto *MMOIt = MI.memoperands_begin();
8355349cc55cSDimitry Andric   const MachineMemOperand *MemOp = *MMOIt;
8356349cc55cSDimitry Andric   bool IsVolatile = MemOp->isVolatile();
8357349cc55cSDimitry Andric 
8358349cc55cSDimitry Andric   // See if this is a constant length copy
8359349cc55cSDimitry Andric   auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
8360349cc55cSDimitry Andric   // FIXME: support dynamically sized G_MEMCPY_INLINE
836181ad6265SDimitry Andric   assert(LenVRegAndVal &&
8362349cc55cSDimitry Andric          "inline memcpy with dynamic size is not yet supported");
8363349cc55cSDimitry Andric   uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8364349cc55cSDimitry Andric   if (KnownLen == 0) {
8365349cc55cSDimitry Andric     MI.eraseFromParent();
8366349cc55cSDimitry Andric     return Legalized;
8367349cc55cSDimitry Andric   }
8368349cc55cSDimitry Andric 
8369349cc55cSDimitry Andric   const auto &DstMMO = **MI.memoperands_begin();
8370349cc55cSDimitry Andric   const auto &SrcMMO = **std::next(MI.memoperands_begin());
8371349cc55cSDimitry Andric   Align DstAlign = DstMMO.getBaseAlign();
8372349cc55cSDimitry Andric   Align SrcAlign = SrcMMO.getBaseAlign();
8373349cc55cSDimitry Andric 
8374349cc55cSDimitry Andric   return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8375349cc55cSDimitry Andric                            IsVolatile);
8376349cc55cSDimitry Andric }
8377349cc55cSDimitry Andric 
8378349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8379349cc55cSDimitry Andric LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
8380349cc55cSDimitry Andric                                    uint64_t KnownLen, Align DstAlign,
8381349cc55cSDimitry Andric                                    Align SrcAlign, bool IsVolatile) {
8382349cc55cSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8383349cc55cSDimitry Andric   return lowerMemcpy(MI, Dst, Src, KnownLen,
8384349cc55cSDimitry Andric                      std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
8385349cc55cSDimitry Andric                      IsVolatile);
8386349cc55cSDimitry Andric }
8387349cc55cSDimitry Andric 
8388349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8389349cc55cSDimitry Andric LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
8390349cc55cSDimitry Andric                              uint64_t KnownLen, uint64_t Limit, Align DstAlign,
8391349cc55cSDimitry Andric                              Align SrcAlign, bool IsVolatile) {
8392349cc55cSDimitry Andric   auto &MF = *MI.getParent()->getParent();
8393349cc55cSDimitry Andric   const auto &TLI = *MF.getSubtarget().getTargetLowering();
8394349cc55cSDimitry Andric   auto &DL = MF.getDataLayout();
8395349cc55cSDimitry Andric   LLVMContext &C = MF.getFunction().getContext();
8396349cc55cSDimitry Andric 
8397349cc55cSDimitry Andric   assert(KnownLen != 0 && "Have a zero length memcpy length!");
8398349cc55cSDimitry Andric 
8399349cc55cSDimitry Andric   bool DstAlignCanChange = false;
8400349cc55cSDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
840181ad6265SDimitry Andric   Align Alignment = std::min(DstAlign, SrcAlign);
8402349cc55cSDimitry Andric 
8403349cc55cSDimitry Andric   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8404349cc55cSDimitry Andric   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8405349cc55cSDimitry Andric     DstAlignCanChange = true;
8406349cc55cSDimitry Andric 
8407349cc55cSDimitry Andric   // FIXME: infer better src pointer alignment like SelectionDAG does here.
8408349cc55cSDimitry Andric   // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
8409349cc55cSDimitry Andric   // if the memcpy is in a tail call position.
8410349cc55cSDimitry Andric 
8411349cc55cSDimitry Andric   std::vector<LLT> MemOps;
8412349cc55cSDimitry Andric 
8413349cc55cSDimitry Andric   const auto &DstMMO = **MI.memoperands_begin();
8414349cc55cSDimitry Andric   const auto &SrcMMO = **std::next(MI.memoperands_begin());
8415349cc55cSDimitry Andric   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8416349cc55cSDimitry Andric   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
8417349cc55cSDimitry Andric 
8418349cc55cSDimitry Andric   if (!findGISelOptimalMemOpLowering(
8419349cc55cSDimitry Andric           MemOps, Limit,
8420349cc55cSDimitry Andric           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8421349cc55cSDimitry Andric                       IsVolatile),
8422349cc55cSDimitry Andric           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
8423349cc55cSDimitry Andric           MF.getFunction().getAttributes(), TLI))
8424349cc55cSDimitry Andric     return UnableToLegalize;
8425349cc55cSDimitry Andric 
8426349cc55cSDimitry Andric   if (DstAlignCanChange) {
8427349cc55cSDimitry Andric     // Get an estimate of the type from the LLT.
8428349cc55cSDimitry Andric     Type *IRTy = getTypeForLLT(MemOps[0], C);
8429349cc55cSDimitry Andric     Align NewAlign = DL.getABITypeAlign(IRTy);
8430349cc55cSDimitry Andric 
8431349cc55cSDimitry Andric     // Don't promote to an alignment that would require dynamic stack
8432349cc55cSDimitry Andric     // realignment.
8433349cc55cSDimitry Andric     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8434349cc55cSDimitry Andric     if (!TRI->hasStackRealignment(MF))
8435349cc55cSDimitry Andric       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
843681ad6265SDimitry Andric         NewAlign = NewAlign.previous();
8437349cc55cSDimitry Andric 
8438349cc55cSDimitry Andric     if (NewAlign > Alignment) {
8439349cc55cSDimitry Andric       Alignment = NewAlign;
8440349cc55cSDimitry Andric       unsigned FI = FIDef->getOperand(1).getIndex();
8441349cc55cSDimitry Andric       // Give the stack frame object a larger alignment if needed.
8442349cc55cSDimitry Andric       if (MFI.getObjectAlign(FI) < Alignment)
8443349cc55cSDimitry Andric         MFI.setObjectAlignment(FI, Alignment);
8444349cc55cSDimitry Andric     }
8445349cc55cSDimitry Andric   }
8446349cc55cSDimitry Andric 
8447349cc55cSDimitry Andric   LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
8448349cc55cSDimitry Andric 
8449349cc55cSDimitry Andric   MachineIRBuilder MIB(MI);
8450349cc55cSDimitry Andric   // Now we need to emit a pair of load and stores for each of the types we've
8451349cc55cSDimitry Andric   // collected. I.e. for each type, generate a load from the source pointer of
8452349cc55cSDimitry Andric   // that type width, and then generate a corresponding store to the dest buffer
8453349cc55cSDimitry Andric   // of that value loaded. This can result in a sequence of loads and stores
8454349cc55cSDimitry Andric   // mixed types, depending on what the target specifies as good types to use.
8455349cc55cSDimitry Andric   unsigned CurrOffset = 0;
8456349cc55cSDimitry Andric   unsigned Size = KnownLen;
8457349cc55cSDimitry Andric   for (auto CopyTy : MemOps) {
8458349cc55cSDimitry Andric     // Issuing an unaligned load / store pair  that overlaps with the previous
8459349cc55cSDimitry Andric     // pair. Adjust the offset accordingly.
8460349cc55cSDimitry Andric     if (CopyTy.getSizeInBytes() > Size)
8461349cc55cSDimitry Andric       CurrOffset -= CopyTy.getSizeInBytes() - Size;
8462349cc55cSDimitry Andric 
8463349cc55cSDimitry Andric     // Construct MMOs for the accesses.
8464349cc55cSDimitry Andric     auto *LoadMMO =
8465349cc55cSDimitry Andric         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
8466349cc55cSDimitry Andric     auto *StoreMMO =
8467349cc55cSDimitry Andric         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
8468349cc55cSDimitry Andric 
8469349cc55cSDimitry Andric     // Create the load.
8470349cc55cSDimitry Andric     Register LoadPtr = Src;
8471349cc55cSDimitry Andric     Register Offset;
8472349cc55cSDimitry Andric     if (CurrOffset != 0) {
84734824e7fdSDimitry Andric       LLT SrcTy = MRI.getType(Src);
84744824e7fdSDimitry Andric       Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
8475349cc55cSDimitry Andric                    .getReg(0);
84764824e7fdSDimitry Andric       LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
8477349cc55cSDimitry Andric     }
8478349cc55cSDimitry Andric     auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
8479349cc55cSDimitry Andric 
8480349cc55cSDimitry Andric     // Create the store.
84814824e7fdSDimitry Andric     Register StorePtr = Dst;
84824824e7fdSDimitry Andric     if (CurrOffset != 0) {
84834824e7fdSDimitry Andric       LLT DstTy = MRI.getType(Dst);
84844824e7fdSDimitry Andric       StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
84854824e7fdSDimitry Andric     }
8486349cc55cSDimitry Andric     MIB.buildStore(LdVal, StorePtr, *StoreMMO);
8487349cc55cSDimitry Andric     CurrOffset += CopyTy.getSizeInBytes();
8488349cc55cSDimitry Andric     Size -= CopyTy.getSizeInBytes();
8489349cc55cSDimitry Andric   }
8490349cc55cSDimitry Andric 
8491349cc55cSDimitry Andric   MI.eraseFromParent();
8492349cc55cSDimitry Andric   return Legalized;
8493349cc55cSDimitry Andric }
8494349cc55cSDimitry Andric 
8495349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8496349cc55cSDimitry Andric LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
8497349cc55cSDimitry Andric                               uint64_t KnownLen, Align DstAlign, Align SrcAlign,
8498349cc55cSDimitry Andric                               bool IsVolatile) {
8499349cc55cSDimitry Andric   auto &MF = *MI.getParent()->getParent();
8500349cc55cSDimitry Andric   const auto &TLI = *MF.getSubtarget().getTargetLowering();
8501349cc55cSDimitry Andric   auto &DL = MF.getDataLayout();
8502349cc55cSDimitry Andric   LLVMContext &C = MF.getFunction().getContext();
8503349cc55cSDimitry Andric 
8504349cc55cSDimitry Andric   assert(KnownLen != 0 && "Have a zero length memmove length!");
8505349cc55cSDimitry Andric 
8506349cc55cSDimitry Andric   bool DstAlignCanChange = false;
8507349cc55cSDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
8508349cc55cSDimitry Andric   bool OptSize = shouldLowerMemFuncForSize(MF);
850981ad6265SDimitry Andric   Align Alignment = std::min(DstAlign, SrcAlign);
8510349cc55cSDimitry Andric 
8511349cc55cSDimitry Andric   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8512349cc55cSDimitry Andric   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8513349cc55cSDimitry Andric     DstAlignCanChange = true;
8514349cc55cSDimitry Andric 
8515349cc55cSDimitry Andric   unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
8516349cc55cSDimitry Andric   std::vector<LLT> MemOps;
8517349cc55cSDimitry Andric 
8518349cc55cSDimitry Andric   const auto &DstMMO = **MI.memoperands_begin();
8519349cc55cSDimitry Andric   const auto &SrcMMO = **std::next(MI.memoperands_begin());
8520349cc55cSDimitry Andric   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8521349cc55cSDimitry Andric   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
8522349cc55cSDimitry Andric 
8523349cc55cSDimitry Andric   // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
8524349cc55cSDimitry Andric   // to a bug in it's findOptimalMemOpLowering implementation. For now do the
8525349cc55cSDimitry Andric   // same thing here.
8526349cc55cSDimitry Andric   if (!findGISelOptimalMemOpLowering(
8527349cc55cSDimitry Andric           MemOps, Limit,
8528349cc55cSDimitry Andric           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8529349cc55cSDimitry Andric                       /*IsVolatile*/ true),
8530349cc55cSDimitry Andric           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
8531349cc55cSDimitry Andric           MF.getFunction().getAttributes(), TLI))
8532349cc55cSDimitry Andric     return UnableToLegalize;
8533349cc55cSDimitry Andric 
8534349cc55cSDimitry Andric   if (DstAlignCanChange) {
8535349cc55cSDimitry Andric     // Get an estimate of the type from the LLT.
8536349cc55cSDimitry Andric     Type *IRTy = getTypeForLLT(MemOps[0], C);
8537349cc55cSDimitry Andric     Align NewAlign = DL.getABITypeAlign(IRTy);
8538349cc55cSDimitry Andric 
8539349cc55cSDimitry Andric     // Don't promote to an alignment that would require dynamic stack
8540349cc55cSDimitry Andric     // realignment.
8541349cc55cSDimitry Andric     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8542349cc55cSDimitry Andric     if (!TRI->hasStackRealignment(MF))
8543349cc55cSDimitry Andric       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
854481ad6265SDimitry Andric         NewAlign = NewAlign.previous();
8545349cc55cSDimitry Andric 
8546349cc55cSDimitry Andric     if (NewAlign > Alignment) {
8547349cc55cSDimitry Andric       Alignment = NewAlign;
8548349cc55cSDimitry Andric       unsigned FI = FIDef->getOperand(1).getIndex();
8549349cc55cSDimitry Andric       // Give the stack frame object a larger alignment if needed.
8550349cc55cSDimitry Andric       if (MFI.getObjectAlign(FI) < Alignment)
8551349cc55cSDimitry Andric         MFI.setObjectAlignment(FI, Alignment);
8552349cc55cSDimitry Andric     }
8553349cc55cSDimitry Andric   }
8554349cc55cSDimitry Andric 
8555349cc55cSDimitry Andric   LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
8556349cc55cSDimitry Andric 
8557349cc55cSDimitry Andric   MachineIRBuilder MIB(MI);
8558349cc55cSDimitry Andric   // Memmove requires that we perform the loads first before issuing the stores.
8559349cc55cSDimitry Andric   // Apart from that, this loop is pretty much doing the same thing as the
8560349cc55cSDimitry Andric   // memcpy codegen function.
8561349cc55cSDimitry Andric   unsigned CurrOffset = 0;
8562349cc55cSDimitry Andric   SmallVector<Register, 16> LoadVals;
8563349cc55cSDimitry Andric   for (auto CopyTy : MemOps) {
8564349cc55cSDimitry Andric     // Construct MMO for the load.
8565349cc55cSDimitry Andric     auto *LoadMMO =
8566349cc55cSDimitry Andric         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
8567349cc55cSDimitry Andric 
8568349cc55cSDimitry Andric     // Create the load.
8569349cc55cSDimitry Andric     Register LoadPtr = Src;
8570349cc55cSDimitry Andric     if (CurrOffset != 0) {
85714824e7fdSDimitry Andric       LLT SrcTy = MRI.getType(Src);
8572349cc55cSDimitry Andric       auto Offset =
85734824e7fdSDimitry Andric           MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
85744824e7fdSDimitry Andric       LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
8575349cc55cSDimitry Andric     }
8576349cc55cSDimitry Andric     LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
8577349cc55cSDimitry Andric     CurrOffset += CopyTy.getSizeInBytes();
8578349cc55cSDimitry Andric   }
8579349cc55cSDimitry Andric 
8580349cc55cSDimitry Andric   CurrOffset = 0;
8581349cc55cSDimitry Andric   for (unsigned I = 0; I < MemOps.size(); ++I) {
8582349cc55cSDimitry Andric     LLT CopyTy = MemOps[I];
8583349cc55cSDimitry Andric     // Now store the values loaded.
8584349cc55cSDimitry Andric     auto *StoreMMO =
8585349cc55cSDimitry Andric         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
8586349cc55cSDimitry Andric 
8587349cc55cSDimitry Andric     Register StorePtr = Dst;
8588349cc55cSDimitry Andric     if (CurrOffset != 0) {
85894824e7fdSDimitry Andric       LLT DstTy = MRI.getType(Dst);
8590349cc55cSDimitry Andric       auto Offset =
85914824e7fdSDimitry Andric           MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
85924824e7fdSDimitry Andric       StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
8593349cc55cSDimitry Andric     }
8594349cc55cSDimitry Andric     MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
8595349cc55cSDimitry Andric     CurrOffset += CopyTy.getSizeInBytes();
8596349cc55cSDimitry Andric   }
8597349cc55cSDimitry Andric   MI.eraseFromParent();
8598349cc55cSDimitry Andric   return Legalized;
8599349cc55cSDimitry Andric }
8600349cc55cSDimitry Andric 
8601349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8602349cc55cSDimitry Andric LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
8603349cc55cSDimitry Andric   const unsigned Opc = MI.getOpcode();
8604349cc55cSDimitry Andric   // This combine is fairly complex so it's not written with a separate
8605349cc55cSDimitry Andric   // matcher function.
8606349cc55cSDimitry Andric   assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
8607349cc55cSDimitry Andric           Opc == TargetOpcode::G_MEMSET) &&
8608349cc55cSDimitry Andric          "Expected memcpy like instruction");
8609349cc55cSDimitry Andric 
8610349cc55cSDimitry Andric   auto MMOIt = MI.memoperands_begin();
8611349cc55cSDimitry Andric   const MachineMemOperand *MemOp = *MMOIt;
8612349cc55cSDimitry Andric 
8613349cc55cSDimitry Andric   Align DstAlign = MemOp->getBaseAlign();
8614349cc55cSDimitry Andric   Align SrcAlign;
861506c3fb27SDimitry Andric   auto [Dst, Src, Len] = MI.getFirst3Regs();
8616349cc55cSDimitry Andric 
8617349cc55cSDimitry Andric   if (Opc != TargetOpcode::G_MEMSET) {
8618349cc55cSDimitry Andric     assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
8619349cc55cSDimitry Andric     MemOp = *(++MMOIt);
8620349cc55cSDimitry Andric     SrcAlign = MemOp->getBaseAlign();
8621349cc55cSDimitry Andric   }
8622349cc55cSDimitry Andric 
8623349cc55cSDimitry Andric   // See if this is a constant length copy
8624349cc55cSDimitry Andric   auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
8625349cc55cSDimitry Andric   if (!LenVRegAndVal)
8626349cc55cSDimitry Andric     return UnableToLegalize;
8627349cc55cSDimitry Andric   uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8628349cc55cSDimitry Andric 
8629349cc55cSDimitry Andric   if (KnownLen == 0) {
8630349cc55cSDimitry Andric     MI.eraseFromParent();
8631349cc55cSDimitry Andric     return Legalized;
8632349cc55cSDimitry Andric   }
8633349cc55cSDimitry Andric 
8634349cc55cSDimitry Andric   bool IsVolatile = MemOp->isVolatile();
8635349cc55cSDimitry Andric   if (Opc == TargetOpcode::G_MEMCPY_INLINE)
8636349cc55cSDimitry Andric     return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8637349cc55cSDimitry Andric                              IsVolatile);
8638349cc55cSDimitry Andric 
8639349cc55cSDimitry Andric   // Don't try to optimize volatile.
8640349cc55cSDimitry Andric   if (IsVolatile)
8641349cc55cSDimitry Andric     return UnableToLegalize;
8642349cc55cSDimitry Andric 
8643349cc55cSDimitry Andric   if (MaxLen && KnownLen > MaxLen)
8644349cc55cSDimitry Andric     return UnableToLegalize;
8645349cc55cSDimitry Andric 
8646349cc55cSDimitry Andric   if (Opc == TargetOpcode::G_MEMCPY) {
8647349cc55cSDimitry Andric     auto &MF = *MI.getParent()->getParent();
8648349cc55cSDimitry Andric     const auto &TLI = *MF.getSubtarget().getTargetLowering();
8649349cc55cSDimitry Andric     bool OptSize = shouldLowerMemFuncForSize(MF);
8650349cc55cSDimitry Andric     uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
8651349cc55cSDimitry Andric     return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
8652349cc55cSDimitry Andric                        IsVolatile);
8653349cc55cSDimitry Andric   }
8654349cc55cSDimitry Andric   if (Opc == TargetOpcode::G_MEMMOVE)
8655349cc55cSDimitry Andric     return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
8656349cc55cSDimitry Andric   if (Opc == TargetOpcode::G_MEMSET)
8657349cc55cSDimitry Andric     return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
8658349cc55cSDimitry Andric   return UnableToLegalize;
8659349cc55cSDimitry Andric }
8660