xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file This file implements the LegalizerHelper class to legalize
100b57cec5SDimitry Andric /// individual instructions and the LegalizeMachineIR wrapper pass for the
110b57cec5SDimitry Andric /// primary legalization.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
160b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/CallLowering.h"
170b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
1806c3fb27SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
1981ad6265SDimitry Andric #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
200b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
21fe6060f1SDimitry Andric #include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
22e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
2381ad6265SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
24fe6060f1SDimitry Andric #include "llvm/CodeGen/GlobalISel/Utils.h"
2506c3fb27SDimitry Andric #include "llvm/CodeGen/MachineConstantPool.h"
2681ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
288bcb0991SDimitry Andric #include "llvm/CodeGen/TargetFrameLowering.h"
290b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
300b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLowering.h"
31fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetOpcodes.h"
320b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
33fe6060f1SDimitry Andric #include "llvm/IR/Instructions.h"
340b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
350b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h"
360b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
37349cc55cSDimitry Andric #include "llvm/Target/TargetMachine.h"
38bdd1243dSDimitry Andric #include <numeric>
39bdd1243dSDimitry Andric #include <optional>
400b57cec5SDimitry Andric 
410b57cec5SDimitry Andric #define DEBUG_TYPE "legalizer"
420b57cec5SDimitry Andric 
430b57cec5SDimitry Andric using namespace llvm;
440b57cec5SDimitry Andric using namespace LegalizeActions;
45e8d8bef9SDimitry Andric using namespace MIPatternMatch;
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
480b57cec5SDimitry Andric ///
490b57cec5SDimitry Andric /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
500b57cec5SDimitry Andric /// with any leftover piece as type \p LeftoverTy
510b57cec5SDimitry Andric ///
520b57cec5SDimitry Andric /// Returns -1 in the first element of the pair if the breakdown is not
530b57cec5SDimitry Andric /// satisfiable.
540b57cec5SDimitry Andric static std::pair<int, int>
550b57cec5SDimitry Andric getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
560b57cec5SDimitry Andric   assert(!LeftoverTy.isValid() && "this is an out argument");
570b57cec5SDimitry Andric 
580b57cec5SDimitry Andric   unsigned Size = OrigTy.getSizeInBits();
590b57cec5SDimitry Andric   unsigned NarrowSize = NarrowTy.getSizeInBits();
600b57cec5SDimitry Andric   unsigned NumParts = Size / NarrowSize;
610b57cec5SDimitry Andric   unsigned LeftoverSize = Size - NumParts * NarrowSize;
620b57cec5SDimitry Andric   assert(Size > NarrowSize);
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric   if (LeftoverSize == 0)
650b57cec5SDimitry Andric     return {NumParts, 0};
660b57cec5SDimitry Andric 
670b57cec5SDimitry Andric   if (NarrowTy.isVector()) {
680b57cec5SDimitry Andric     unsigned EltSize = OrigTy.getScalarSizeInBits();
690b57cec5SDimitry Andric     if (LeftoverSize % EltSize != 0)
700b57cec5SDimitry Andric       return {-1, -1};
71fe6060f1SDimitry Andric     LeftoverTy = LLT::scalarOrVector(
72fe6060f1SDimitry Andric         ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
730b57cec5SDimitry Andric   } else {
740b57cec5SDimitry Andric     LeftoverTy = LLT::scalar(LeftoverSize);
750b57cec5SDimitry Andric   }
760b57cec5SDimitry Andric 
770b57cec5SDimitry Andric   int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
780b57cec5SDimitry Andric   return std::make_pair(NumParts, NumLeftover);
790b57cec5SDimitry Andric }
800b57cec5SDimitry Andric 
815ffd83dbSDimitry Andric static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
825ffd83dbSDimitry Andric 
835ffd83dbSDimitry Andric   if (!Ty.isScalar())
845ffd83dbSDimitry Andric     return nullptr;
855ffd83dbSDimitry Andric 
865ffd83dbSDimitry Andric   switch (Ty.getSizeInBits()) {
875ffd83dbSDimitry Andric   case 16:
885ffd83dbSDimitry Andric     return Type::getHalfTy(Ctx);
895ffd83dbSDimitry Andric   case 32:
905ffd83dbSDimitry Andric     return Type::getFloatTy(Ctx);
915ffd83dbSDimitry Andric   case 64:
925ffd83dbSDimitry Andric     return Type::getDoubleTy(Ctx);
93e8d8bef9SDimitry Andric   case 80:
94e8d8bef9SDimitry Andric     return Type::getX86_FP80Ty(Ctx);
955ffd83dbSDimitry Andric   case 128:
965ffd83dbSDimitry Andric     return Type::getFP128Ty(Ctx);
975ffd83dbSDimitry Andric   default:
985ffd83dbSDimitry Andric     return nullptr;
995ffd83dbSDimitry Andric   }
1005ffd83dbSDimitry Andric }
1015ffd83dbSDimitry Andric 
1020b57cec5SDimitry Andric LegalizerHelper::LegalizerHelper(MachineFunction &MF,
1030b57cec5SDimitry Andric                                  GISelChangeObserver &Observer,
1040b57cec5SDimitry Andric                                  MachineIRBuilder &Builder)
1055ffd83dbSDimitry Andric     : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
106e8d8bef9SDimitry Andric       LI(*MF.getSubtarget().getLegalizerInfo()),
10706c3fb27SDimitry Andric       TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
1080b57cec5SDimitry Andric 
1090b57cec5SDimitry Andric LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
1100b57cec5SDimitry Andric                                  GISelChangeObserver &Observer,
11106c3fb27SDimitry Andric                                  MachineIRBuilder &B, GISelKnownBits *KB)
112e8d8bef9SDimitry Andric     : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
11306c3fb27SDimitry Andric       TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
114e8d8bef9SDimitry Andric 
1150b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
116fe6060f1SDimitry Andric LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
117fe6060f1SDimitry Andric                                    LostDebugLocObserver &LocObserver) {
1185ffd83dbSDimitry Andric   LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
1195ffd83dbSDimitry Andric 
1205ffd83dbSDimitry Andric   MIRBuilder.setInstrAndDebugLoc(MI);
1210b57cec5SDimitry Andric 
122*5f757f3fSDimitry Andric   if (isa<GIntrinsic>(MI))
1235ffd83dbSDimitry Andric     return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
1240b57cec5SDimitry Andric   auto Step = LI.getAction(MI, MRI);
1250b57cec5SDimitry Andric   switch (Step.Action) {
1260b57cec5SDimitry Andric   case Legal:
1270b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Already legal\n");
1280b57cec5SDimitry Andric     return AlreadyLegal;
1290b57cec5SDimitry Andric   case Libcall:
1300b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
131fe6060f1SDimitry Andric     return libcall(MI, LocObserver);
1320b57cec5SDimitry Andric   case NarrowScalar:
1330b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
1340b57cec5SDimitry Andric     return narrowScalar(MI, Step.TypeIdx, Step.NewType);
1350b57cec5SDimitry Andric   case WidenScalar:
1360b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
1370b57cec5SDimitry Andric     return widenScalar(MI, Step.TypeIdx, Step.NewType);
1385ffd83dbSDimitry Andric   case Bitcast:
1395ffd83dbSDimitry Andric     LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
1405ffd83dbSDimitry Andric     return bitcast(MI, Step.TypeIdx, Step.NewType);
1410b57cec5SDimitry Andric   case Lower:
1420b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Lower\n");
1430b57cec5SDimitry Andric     return lower(MI, Step.TypeIdx, Step.NewType);
1440b57cec5SDimitry Andric   case FewerElements:
1450b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
1460b57cec5SDimitry Andric     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
1470b57cec5SDimitry Andric   case MoreElements:
1480b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
1490b57cec5SDimitry Andric     return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
1500b57cec5SDimitry Andric   case Custom:
1510b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
1525ffd83dbSDimitry Andric     return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
1530b57cec5SDimitry Andric   default:
1540b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
1550b57cec5SDimitry Andric     return UnableToLegalize;
1560b57cec5SDimitry Andric   }
1570b57cec5SDimitry Andric }
1580b57cec5SDimitry Andric 
1590b57cec5SDimitry Andric void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
1600b57cec5SDimitry Andric                                    SmallVectorImpl<Register> &VRegs) {
1610b57cec5SDimitry Andric   for (int i = 0; i < NumParts; ++i)
1620b57cec5SDimitry Andric     VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
1630b57cec5SDimitry Andric   MIRBuilder.buildUnmerge(VRegs, Reg);
1640b57cec5SDimitry Andric }
1650b57cec5SDimitry Andric 
1660b57cec5SDimitry Andric bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
1670b57cec5SDimitry Andric                                    LLT MainTy, LLT &LeftoverTy,
1680b57cec5SDimitry Andric                                    SmallVectorImpl<Register> &VRegs,
1690b57cec5SDimitry Andric                                    SmallVectorImpl<Register> &LeftoverRegs) {
1700b57cec5SDimitry Andric   assert(!LeftoverTy.isValid() && "this is an out argument");
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric   unsigned RegSize = RegTy.getSizeInBits();
1730b57cec5SDimitry Andric   unsigned MainSize = MainTy.getSizeInBits();
1740b57cec5SDimitry Andric   unsigned NumParts = RegSize / MainSize;
1750b57cec5SDimitry Andric   unsigned LeftoverSize = RegSize - NumParts * MainSize;
1760b57cec5SDimitry Andric 
1770b57cec5SDimitry Andric   // Use an unmerge when possible.
1780b57cec5SDimitry Andric   if (LeftoverSize == 0) {
1790b57cec5SDimitry Andric     for (unsigned I = 0; I < NumParts; ++I)
1800b57cec5SDimitry Andric       VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
1810b57cec5SDimitry Andric     MIRBuilder.buildUnmerge(VRegs, Reg);
1820b57cec5SDimitry Andric     return true;
1830b57cec5SDimitry Andric   }
1840b57cec5SDimitry Andric 
1850eae32dcSDimitry Andric   // Perform irregular split. Leftover is last element of RegPieces.
1860b57cec5SDimitry Andric   if (MainTy.isVector()) {
1870eae32dcSDimitry Andric     SmallVector<Register, 8> RegPieces;
1880eae32dcSDimitry Andric     extractVectorParts(Reg, MainTy.getNumElements(), RegPieces);
1890eae32dcSDimitry Andric     for (unsigned i = 0; i < RegPieces.size() - 1; ++i)
1900eae32dcSDimitry Andric       VRegs.push_back(RegPieces[i]);
1910eae32dcSDimitry Andric     LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
1920eae32dcSDimitry Andric     LeftoverTy = MRI.getType(LeftoverRegs[0]);
1930eae32dcSDimitry Andric     return true;
1940b57cec5SDimitry Andric   }
1950b57cec5SDimitry Andric 
1960eae32dcSDimitry Andric   LeftoverTy = LLT::scalar(LeftoverSize);
1970b57cec5SDimitry Andric   // For irregular sizes, extract the individual parts.
1980b57cec5SDimitry Andric   for (unsigned I = 0; I != NumParts; ++I) {
1990b57cec5SDimitry Andric     Register NewReg = MRI.createGenericVirtualRegister(MainTy);
2000b57cec5SDimitry Andric     VRegs.push_back(NewReg);
2010b57cec5SDimitry Andric     MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
2020b57cec5SDimitry Andric   }
2030b57cec5SDimitry Andric 
2040b57cec5SDimitry Andric   for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
2050b57cec5SDimitry Andric        Offset += LeftoverSize) {
2060b57cec5SDimitry Andric     Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
2070b57cec5SDimitry Andric     LeftoverRegs.push_back(NewReg);
2080b57cec5SDimitry Andric     MIRBuilder.buildExtract(NewReg, Reg, Offset);
2090b57cec5SDimitry Andric   }
2100b57cec5SDimitry Andric 
2110b57cec5SDimitry Andric   return true;
2120b57cec5SDimitry Andric }
2130b57cec5SDimitry Andric 
2140eae32dcSDimitry Andric void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
2150eae32dcSDimitry Andric                                          SmallVectorImpl<Register> &VRegs) {
2160eae32dcSDimitry Andric   LLT RegTy = MRI.getType(Reg);
2170eae32dcSDimitry Andric   assert(RegTy.isVector() && "Expected a vector type");
2180eae32dcSDimitry Andric 
2190eae32dcSDimitry Andric   LLT EltTy = RegTy.getElementType();
2200eae32dcSDimitry Andric   LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
2210eae32dcSDimitry Andric   unsigned RegNumElts = RegTy.getNumElements();
2220eae32dcSDimitry Andric   unsigned LeftoverNumElts = RegNumElts % NumElts;
2230eae32dcSDimitry Andric   unsigned NumNarrowTyPieces = RegNumElts / NumElts;
2240eae32dcSDimitry Andric 
2250eae32dcSDimitry Andric   // Perfect split without leftover
2260eae32dcSDimitry Andric   if (LeftoverNumElts == 0)
2270eae32dcSDimitry Andric     return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
2280eae32dcSDimitry Andric 
2290eae32dcSDimitry Andric   // Irregular split. Provide direct access to all elements for artifact
2300eae32dcSDimitry Andric   // combiner using unmerge to elements. Then build vectors with NumElts
2310eae32dcSDimitry Andric   // elements. Remaining element(s) will be (used to build vector) Leftover.
2320eae32dcSDimitry Andric   SmallVector<Register, 8> Elts;
2330eae32dcSDimitry Andric   extractParts(Reg, EltTy, RegNumElts, Elts);
2340eae32dcSDimitry Andric 
2350eae32dcSDimitry Andric   unsigned Offset = 0;
2360eae32dcSDimitry Andric   // Requested sub-vectors of NarrowTy.
2370eae32dcSDimitry Andric   for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
2380eae32dcSDimitry Andric     ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
239bdd1243dSDimitry Andric     VRegs.push_back(MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
2400eae32dcSDimitry Andric   }
2410eae32dcSDimitry Andric 
2420eae32dcSDimitry Andric   // Leftover element(s).
2430eae32dcSDimitry Andric   if (LeftoverNumElts == 1) {
2440eae32dcSDimitry Andric     VRegs.push_back(Elts[Offset]);
2450eae32dcSDimitry Andric   } else {
2460eae32dcSDimitry Andric     LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
2470eae32dcSDimitry Andric     ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
248bdd1243dSDimitry Andric     VRegs.push_back(
249bdd1243dSDimitry Andric         MIRBuilder.buildMergeLikeInstr(LeftoverTy, Pieces).getReg(0));
2500eae32dcSDimitry Andric   }
2510eae32dcSDimitry Andric }
2520eae32dcSDimitry Andric 
2530b57cec5SDimitry Andric void LegalizerHelper::insertParts(Register DstReg,
2540b57cec5SDimitry Andric                                   LLT ResultTy, LLT PartTy,
2550b57cec5SDimitry Andric                                   ArrayRef<Register> PartRegs,
2560b57cec5SDimitry Andric                                   LLT LeftoverTy,
2570b57cec5SDimitry Andric                                   ArrayRef<Register> LeftoverRegs) {
2580b57cec5SDimitry Andric   if (!LeftoverTy.isValid()) {
2590b57cec5SDimitry Andric     assert(LeftoverRegs.empty());
2600b57cec5SDimitry Andric 
2610b57cec5SDimitry Andric     if (!ResultTy.isVector()) {
262bdd1243dSDimitry Andric       MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
2630b57cec5SDimitry Andric       return;
2640b57cec5SDimitry Andric     }
2650b57cec5SDimitry Andric 
2660b57cec5SDimitry Andric     if (PartTy.isVector())
2670b57cec5SDimitry Andric       MIRBuilder.buildConcatVectors(DstReg, PartRegs);
2680b57cec5SDimitry Andric     else
2690b57cec5SDimitry Andric       MIRBuilder.buildBuildVector(DstReg, PartRegs);
2700b57cec5SDimitry Andric     return;
2710b57cec5SDimitry Andric   }
2720b57cec5SDimitry Andric 
2730eae32dcSDimitry Andric   // Merge sub-vectors with different number of elements and insert into DstReg.
2740eae32dcSDimitry Andric   if (ResultTy.isVector()) {
2750eae32dcSDimitry Andric     assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
2760eae32dcSDimitry Andric     SmallVector<Register, 8> AllRegs;
2770eae32dcSDimitry Andric     for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
2780eae32dcSDimitry Andric       AllRegs.push_back(Reg);
2790eae32dcSDimitry Andric     return mergeMixedSubvectors(DstReg, AllRegs);
2800eae32dcSDimitry Andric   }
2810eae32dcSDimitry Andric 
282fe6060f1SDimitry Andric   SmallVector<Register> GCDRegs;
283fe6060f1SDimitry Andric   LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
284fe6060f1SDimitry Andric   for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
285fe6060f1SDimitry Andric     extractGCDType(GCDRegs, GCDTy, PartReg);
286fe6060f1SDimitry Andric   LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
287fe6060f1SDimitry Andric   buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
2880b57cec5SDimitry Andric }
2890b57cec5SDimitry Andric 
2900eae32dcSDimitry Andric void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
2910eae32dcSDimitry Andric                                        Register Reg) {
2920eae32dcSDimitry Andric   LLT Ty = MRI.getType(Reg);
2930eae32dcSDimitry Andric   SmallVector<Register, 8> RegElts;
2940eae32dcSDimitry Andric   extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts);
2950eae32dcSDimitry Andric   Elts.append(RegElts);
2960eae32dcSDimitry Andric }
2970eae32dcSDimitry Andric 
2980eae32dcSDimitry Andric /// Merge \p PartRegs with different types into \p DstReg.
2990eae32dcSDimitry Andric void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
3000eae32dcSDimitry Andric                                            ArrayRef<Register> PartRegs) {
3010eae32dcSDimitry Andric   SmallVector<Register, 8> AllElts;
3020eae32dcSDimitry Andric   for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
3030eae32dcSDimitry Andric     appendVectorElts(AllElts, PartRegs[i]);
3040eae32dcSDimitry Andric 
3050eae32dcSDimitry Andric   Register Leftover = PartRegs[PartRegs.size() - 1];
3060eae32dcSDimitry Andric   if (MRI.getType(Leftover).isScalar())
3070eae32dcSDimitry Andric     AllElts.push_back(Leftover);
3080eae32dcSDimitry Andric   else
3090eae32dcSDimitry Andric     appendVectorElts(AllElts, Leftover);
3100eae32dcSDimitry Andric 
311bdd1243dSDimitry Andric   MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
3120eae32dcSDimitry Andric }
3130eae32dcSDimitry Andric 
314e8d8bef9SDimitry Andric /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
3155ffd83dbSDimitry Andric static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
3165ffd83dbSDimitry Andric                               const MachineInstr &MI) {
3175ffd83dbSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
3185ffd83dbSDimitry Andric 
319e8d8bef9SDimitry Andric   const int StartIdx = Regs.size();
3205ffd83dbSDimitry Andric   const int NumResults = MI.getNumOperands() - 1;
321e8d8bef9SDimitry Andric   Regs.resize(Regs.size() + NumResults);
3225ffd83dbSDimitry Andric   for (int I = 0; I != NumResults; ++I)
323e8d8bef9SDimitry Andric     Regs[StartIdx + I] = MI.getOperand(I).getReg();
3245ffd83dbSDimitry Andric }
3255ffd83dbSDimitry Andric 
326e8d8bef9SDimitry Andric void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
327e8d8bef9SDimitry Andric                                      LLT GCDTy, Register SrcReg) {
3285ffd83dbSDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
3295ffd83dbSDimitry Andric   if (SrcTy == GCDTy) {
3305ffd83dbSDimitry Andric     // If the source already evenly divides the result type, we don't need to do
3315ffd83dbSDimitry Andric     // anything.
3325ffd83dbSDimitry Andric     Parts.push_back(SrcReg);
3335ffd83dbSDimitry Andric   } else {
3345ffd83dbSDimitry Andric     // Need to split into common type sized pieces.
3355ffd83dbSDimitry Andric     auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
3365ffd83dbSDimitry Andric     getUnmergeResults(Parts, *Unmerge);
3375ffd83dbSDimitry Andric   }
338e8d8bef9SDimitry Andric }
3395ffd83dbSDimitry Andric 
340e8d8bef9SDimitry Andric LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
341e8d8bef9SDimitry Andric                                     LLT NarrowTy, Register SrcReg) {
342e8d8bef9SDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
343e8d8bef9SDimitry Andric   LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
344e8d8bef9SDimitry Andric   extractGCDType(Parts, GCDTy, SrcReg);
3455ffd83dbSDimitry Andric   return GCDTy;
3465ffd83dbSDimitry Andric }
3475ffd83dbSDimitry Andric 
3485ffd83dbSDimitry Andric LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
3495ffd83dbSDimitry Andric                                          SmallVectorImpl<Register> &VRegs,
3505ffd83dbSDimitry Andric                                          unsigned PadStrategy) {
3515ffd83dbSDimitry Andric   LLT LCMTy = getLCMType(DstTy, NarrowTy);
3525ffd83dbSDimitry Andric 
3535ffd83dbSDimitry Andric   int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
3545ffd83dbSDimitry Andric   int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
3555ffd83dbSDimitry Andric   int NumOrigSrc = VRegs.size();
3565ffd83dbSDimitry Andric 
3575ffd83dbSDimitry Andric   Register PadReg;
3585ffd83dbSDimitry Andric 
3595ffd83dbSDimitry Andric   // Get a value we can use to pad the source value if the sources won't evenly
3605ffd83dbSDimitry Andric   // cover the result type.
3615ffd83dbSDimitry Andric   if (NumOrigSrc < NumParts * NumSubParts) {
3625ffd83dbSDimitry Andric     if (PadStrategy == TargetOpcode::G_ZEXT)
3635ffd83dbSDimitry Andric       PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
3645ffd83dbSDimitry Andric     else if (PadStrategy == TargetOpcode::G_ANYEXT)
3655ffd83dbSDimitry Andric       PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
3665ffd83dbSDimitry Andric     else {
3675ffd83dbSDimitry Andric       assert(PadStrategy == TargetOpcode::G_SEXT);
3685ffd83dbSDimitry Andric 
3695ffd83dbSDimitry Andric       // Shift the sign bit of the low register through the high register.
3705ffd83dbSDimitry Andric       auto ShiftAmt =
3715ffd83dbSDimitry Andric         MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
3725ffd83dbSDimitry Andric       PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
3735ffd83dbSDimitry Andric     }
3745ffd83dbSDimitry Andric   }
3755ffd83dbSDimitry Andric 
3765ffd83dbSDimitry Andric   // Registers for the final merge to be produced.
3775ffd83dbSDimitry Andric   SmallVector<Register, 4> Remerge(NumParts);
3785ffd83dbSDimitry Andric 
3795ffd83dbSDimitry Andric   // Registers needed for intermediate merges, which will be merged into a
3805ffd83dbSDimitry Andric   // source for Remerge.
3815ffd83dbSDimitry Andric   SmallVector<Register, 4> SubMerge(NumSubParts);
3825ffd83dbSDimitry Andric 
3835ffd83dbSDimitry Andric   // Once we've fully read off the end of the original source bits, we can reuse
3845ffd83dbSDimitry Andric   // the same high bits for remaining padding elements.
3855ffd83dbSDimitry Andric   Register AllPadReg;
3865ffd83dbSDimitry Andric 
3875ffd83dbSDimitry Andric   // Build merges to the LCM type to cover the original result type.
3885ffd83dbSDimitry Andric   for (int I = 0; I != NumParts; ++I) {
3895ffd83dbSDimitry Andric     bool AllMergePartsArePadding = true;
3905ffd83dbSDimitry Andric 
3915ffd83dbSDimitry Andric     // Build the requested merges to the requested type.
3925ffd83dbSDimitry Andric     for (int J = 0; J != NumSubParts; ++J) {
3935ffd83dbSDimitry Andric       int Idx = I * NumSubParts + J;
3945ffd83dbSDimitry Andric       if (Idx >= NumOrigSrc) {
3955ffd83dbSDimitry Andric         SubMerge[J] = PadReg;
3965ffd83dbSDimitry Andric         continue;
3975ffd83dbSDimitry Andric       }
3985ffd83dbSDimitry Andric 
3995ffd83dbSDimitry Andric       SubMerge[J] = VRegs[Idx];
4005ffd83dbSDimitry Andric 
4015ffd83dbSDimitry Andric       // There are meaningful bits here we can't reuse later.
4025ffd83dbSDimitry Andric       AllMergePartsArePadding = false;
4035ffd83dbSDimitry Andric     }
4045ffd83dbSDimitry Andric 
4055ffd83dbSDimitry Andric     // If we've filled up a complete piece with padding bits, we can directly
4065ffd83dbSDimitry Andric     // emit the natural sized constant if applicable, rather than a merge of
4075ffd83dbSDimitry Andric     // smaller constants.
4085ffd83dbSDimitry Andric     if (AllMergePartsArePadding && !AllPadReg) {
4095ffd83dbSDimitry Andric       if (PadStrategy == TargetOpcode::G_ANYEXT)
4105ffd83dbSDimitry Andric         AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
4115ffd83dbSDimitry Andric       else if (PadStrategy == TargetOpcode::G_ZEXT)
4125ffd83dbSDimitry Andric         AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
4135ffd83dbSDimitry Andric 
4145ffd83dbSDimitry Andric       // If this is a sign extension, we can't materialize a trivial constant
4155ffd83dbSDimitry Andric       // with the right type and have to produce a merge.
4165ffd83dbSDimitry Andric     }
4175ffd83dbSDimitry Andric 
4185ffd83dbSDimitry Andric     if (AllPadReg) {
4195ffd83dbSDimitry Andric       // Avoid creating additional instructions if we're just adding additional
4205ffd83dbSDimitry Andric       // copies of padding bits.
4215ffd83dbSDimitry Andric       Remerge[I] = AllPadReg;
4225ffd83dbSDimitry Andric       continue;
4235ffd83dbSDimitry Andric     }
4245ffd83dbSDimitry Andric 
4255ffd83dbSDimitry Andric     if (NumSubParts == 1)
4265ffd83dbSDimitry Andric       Remerge[I] = SubMerge[0];
4275ffd83dbSDimitry Andric     else
428bdd1243dSDimitry Andric       Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
4295ffd83dbSDimitry Andric 
4305ffd83dbSDimitry Andric     // In the sign extend padding case, re-use the first all-signbit merge.
4315ffd83dbSDimitry Andric     if (AllMergePartsArePadding && !AllPadReg)
4325ffd83dbSDimitry Andric       AllPadReg = Remerge[I];
4335ffd83dbSDimitry Andric   }
4345ffd83dbSDimitry Andric 
4355ffd83dbSDimitry Andric   VRegs = std::move(Remerge);
4365ffd83dbSDimitry Andric   return LCMTy;
4375ffd83dbSDimitry Andric }
4385ffd83dbSDimitry Andric 
4395ffd83dbSDimitry Andric void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
4405ffd83dbSDimitry Andric                                                ArrayRef<Register> RemergeRegs) {
4415ffd83dbSDimitry Andric   LLT DstTy = MRI.getType(DstReg);
4425ffd83dbSDimitry Andric 
4435ffd83dbSDimitry Andric   // Create the merge to the widened source, and extract the relevant bits into
4445ffd83dbSDimitry Andric   // the result.
4455ffd83dbSDimitry Andric 
4465ffd83dbSDimitry Andric   if (DstTy == LCMTy) {
447bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
4485ffd83dbSDimitry Andric     return;
4495ffd83dbSDimitry Andric   }
4505ffd83dbSDimitry Andric 
451bdd1243dSDimitry Andric   auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
4525ffd83dbSDimitry Andric   if (DstTy.isScalar() && LCMTy.isScalar()) {
4535ffd83dbSDimitry Andric     MIRBuilder.buildTrunc(DstReg, Remerge);
4545ffd83dbSDimitry Andric     return;
4555ffd83dbSDimitry Andric   }
4565ffd83dbSDimitry Andric 
4575ffd83dbSDimitry Andric   if (LCMTy.isVector()) {
458e8d8bef9SDimitry Andric     unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
459e8d8bef9SDimitry Andric     SmallVector<Register, 8> UnmergeDefs(NumDefs);
460e8d8bef9SDimitry Andric     UnmergeDefs[0] = DstReg;
461e8d8bef9SDimitry Andric     for (unsigned I = 1; I != NumDefs; ++I)
462e8d8bef9SDimitry Andric       UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
463e8d8bef9SDimitry Andric 
464e8d8bef9SDimitry Andric     MIRBuilder.buildUnmerge(UnmergeDefs,
465bdd1243dSDimitry Andric                             MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
4665ffd83dbSDimitry Andric     return;
4675ffd83dbSDimitry Andric   }
4685ffd83dbSDimitry Andric 
4695ffd83dbSDimitry Andric   llvm_unreachable("unhandled case");
4705ffd83dbSDimitry Andric }
4715ffd83dbSDimitry Andric 
4720b57cec5SDimitry Andric static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
473e8d8bef9SDimitry Andric #define RTLIBCASE_INT(LibcallPrefix)                                           \
4745ffd83dbSDimitry Andric   do {                                                                         \
4755ffd83dbSDimitry Andric     switch (Size) {                                                            \
4765ffd83dbSDimitry Andric     case 32:                                                                   \
4775ffd83dbSDimitry Andric       return RTLIB::LibcallPrefix##32;                                         \
4785ffd83dbSDimitry Andric     case 64:                                                                   \
4795ffd83dbSDimitry Andric       return RTLIB::LibcallPrefix##64;                                         \
4805ffd83dbSDimitry Andric     case 128:                                                                  \
4815ffd83dbSDimitry Andric       return RTLIB::LibcallPrefix##128;                                        \
4825ffd83dbSDimitry Andric     default:                                                                   \
4835ffd83dbSDimitry Andric       llvm_unreachable("unexpected size");                                     \
4845ffd83dbSDimitry Andric     }                                                                          \
4855ffd83dbSDimitry Andric   } while (0)
4865ffd83dbSDimitry Andric 
487e8d8bef9SDimitry Andric #define RTLIBCASE(LibcallPrefix)                                               \
488e8d8bef9SDimitry Andric   do {                                                                         \
489e8d8bef9SDimitry Andric     switch (Size) {                                                            \
490e8d8bef9SDimitry Andric     case 32:                                                                   \
491e8d8bef9SDimitry Andric       return RTLIB::LibcallPrefix##32;                                         \
492e8d8bef9SDimitry Andric     case 64:                                                                   \
493e8d8bef9SDimitry Andric       return RTLIB::LibcallPrefix##64;                                         \
494e8d8bef9SDimitry Andric     case 80:                                                                   \
495e8d8bef9SDimitry Andric       return RTLIB::LibcallPrefix##80;                                         \
496e8d8bef9SDimitry Andric     case 128:                                                                  \
497e8d8bef9SDimitry Andric       return RTLIB::LibcallPrefix##128;                                        \
498e8d8bef9SDimitry Andric     default:                                                                   \
499e8d8bef9SDimitry Andric       llvm_unreachable("unexpected size");                                     \
500e8d8bef9SDimitry Andric     }                                                                          \
501e8d8bef9SDimitry Andric   } while (0)
5025ffd83dbSDimitry Andric 
5030b57cec5SDimitry Andric   switch (Opcode) {
504bdd1243dSDimitry Andric   case TargetOpcode::G_MUL:
505bdd1243dSDimitry Andric     RTLIBCASE_INT(MUL_I);
5060b57cec5SDimitry Andric   case TargetOpcode::G_SDIV:
507e8d8bef9SDimitry Andric     RTLIBCASE_INT(SDIV_I);
5080b57cec5SDimitry Andric   case TargetOpcode::G_UDIV:
509e8d8bef9SDimitry Andric     RTLIBCASE_INT(UDIV_I);
5100b57cec5SDimitry Andric   case TargetOpcode::G_SREM:
511e8d8bef9SDimitry Andric     RTLIBCASE_INT(SREM_I);
5120b57cec5SDimitry Andric   case TargetOpcode::G_UREM:
513e8d8bef9SDimitry Andric     RTLIBCASE_INT(UREM_I);
5140b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
515e8d8bef9SDimitry Andric     RTLIBCASE_INT(CTLZ_I);
5160b57cec5SDimitry Andric   case TargetOpcode::G_FADD:
5175ffd83dbSDimitry Andric     RTLIBCASE(ADD_F);
5180b57cec5SDimitry Andric   case TargetOpcode::G_FSUB:
5195ffd83dbSDimitry Andric     RTLIBCASE(SUB_F);
5200b57cec5SDimitry Andric   case TargetOpcode::G_FMUL:
5215ffd83dbSDimitry Andric     RTLIBCASE(MUL_F);
5220b57cec5SDimitry Andric   case TargetOpcode::G_FDIV:
5235ffd83dbSDimitry Andric     RTLIBCASE(DIV_F);
5240b57cec5SDimitry Andric   case TargetOpcode::G_FEXP:
5255ffd83dbSDimitry Andric     RTLIBCASE(EXP_F);
5260b57cec5SDimitry Andric   case TargetOpcode::G_FEXP2:
5275ffd83dbSDimitry Andric     RTLIBCASE(EXP2_F);
528*5f757f3fSDimitry Andric   case TargetOpcode::G_FEXP10:
529*5f757f3fSDimitry Andric     RTLIBCASE(EXP10_F);
5300b57cec5SDimitry Andric   case TargetOpcode::G_FREM:
5315ffd83dbSDimitry Andric     RTLIBCASE(REM_F);
5320b57cec5SDimitry Andric   case TargetOpcode::G_FPOW:
5335ffd83dbSDimitry Andric     RTLIBCASE(POW_F);
5340b57cec5SDimitry Andric   case TargetOpcode::G_FMA:
5355ffd83dbSDimitry Andric     RTLIBCASE(FMA_F);
5360b57cec5SDimitry Andric   case TargetOpcode::G_FSIN:
5375ffd83dbSDimitry Andric     RTLIBCASE(SIN_F);
5380b57cec5SDimitry Andric   case TargetOpcode::G_FCOS:
5395ffd83dbSDimitry Andric     RTLIBCASE(COS_F);
5400b57cec5SDimitry Andric   case TargetOpcode::G_FLOG10:
5415ffd83dbSDimitry Andric     RTLIBCASE(LOG10_F);
5420b57cec5SDimitry Andric   case TargetOpcode::G_FLOG:
5435ffd83dbSDimitry Andric     RTLIBCASE(LOG_F);
5440b57cec5SDimitry Andric   case TargetOpcode::G_FLOG2:
5455ffd83dbSDimitry Andric     RTLIBCASE(LOG2_F);
54606c3fb27SDimitry Andric   case TargetOpcode::G_FLDEXP:
54706c3fb27SDimitry Andric     RTLIBCASE(LDEXP_F);
5480b57cec5SDimitry Andric   case TargetOpcode::G_FCEIL:
5495ffd83dbSDimitry Andric     RTLIBCASE(CEIL_F);
5500b57cec5SDimitry Andric   case TargetOpcode::G_FFLOOR:
5515ffd83dbSDimitry Andric     RTLIBCASE(FLOOR_F);
5525ffd83dbSDimitry Andric   case TargetOpcode::G_FMINNUM:
5535ffd83dbSDimitry Andric     RTLIBCASE(FMIN_F);
5545ffd83dbSDimitry Andric   case TargetOpcode::G_FMAXNUM:
5555ffd83dbSDimitry Andric     RTLIBCASE(FMAX_F);
5565ffd83dbSDimitry Andric   case TargetOpcode::G_FSQRT:
5575ffd83dbSDimitry Andric     RTLIBCASE(SQRT_F);
5585ffd83dbSDimitry Andric   case TargetOpcode::G_FRINT:
5595ffd83dbSDimitry Andric     RTLIBCASE(RINT_F);
5605ffd83dbSDimitry Andric   case TargetOpcode::G_FNEARBYINT:
5615ffd83dbSDimitry Andric     RTLIBCASE(NEARBYINT_F);
562e8d8bef9SDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
563e8d8bef9SDimitry Andric     RTLIBCASE(ROUNDEVEN_F);
5640b57cec5SDimitry Andric   }
5650b57cec5SDimitry Andric   llvm_unreachable("Unknown libcall function");
5660b57cec5SDimitry Andric }
5670b57cec5SDimitry Andric 
5688bcb0991SDimitry Andric /// True if an instruction is in tail position in its caller. Intended for
5698bcb0991SDimitry Andric /// legalizing libcalls as tail calls when possible.
570fe6060f1SDimitry Andric static bool isLibCallInTailPosition(MachineInstr &MI,
571fe6060f1SDimitry Andric                                     const TargetInstrInfo &TII,
572fe6060f1SDimitry Andric                                     MachineRegisterInfo &MRI) {
5735ffd83dbSDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
5745ffd83dbSDimitry Andric   const Function &F = MBB.getParent()->getFunction();
5758bcb0991SDimitry Andric 
5768bcb0991SDimitry Andric   // Conservatively require the attributes of the call to match those of
5778bcb0991SDimitry Andric   // the return. Ignore NoAlias and NonNull because they don't affect the
5788bcb0991SDimitry Andric   // call sequence.
5798bcb0991SDimitry Andric   AttributeList CallerAttrs = F.getAttributes();
58004eeddc0SDimitry Andric   if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
5818bcb0991SDimitry Andric           .removeAttribute(Attribute::NoAlias)
5828bcb0991SDimitry Andric           .removeAttribute(Attribute::NonNull)
5838bcb0991SDimitry Andric           .hasAttributes())
5848bcb0991SDimitry Andric     return false;
5858bcb0991SDimitry Andric 
5868bcb0991SDimitry Andric   // It's not safe to eliminate the sign / zero extension of the return value.
587349cc55cSDimitry Andric   if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
588349cc55cSDimitry Andric       CallerAttrs.hasRetAttr(Attribute::SExt))
5898bcb0991SDimitry Andric     return false;
5908bcb0991SDimitry Andric 
591fe6060f1SDimitry Andric   // Only tail call if the following instruction is a standard return or if we
592fe6060f1SDimitry Andric   // have a `thisreturn` callee, and a sequence like:
593fe6060f1SDimitry Andric   //
594fe6060f1SDimitry Andric   //   G_MEMCPY %0, %1, %2
595fe6060f1SDimitry Andric   //   $x0 = COPY %0
596fe6060f1SDimitry Andric   //   RET_ReallyLR implicit $x0
5975ffd83dbSDimitry Andric   auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
598fe6060f1SDimitry Andric   if (Next != MBB.instr_end() && Next->isCopy()) {
599fe6060f1SDimitry Andric     switch (MI.getOpcode()) {
600fe6060f1SDimitry Andric     default:
601fe6060f1SDimitry Andric       llvm_unreachable("unsupported opcode");
602fe6060f1SDimitry Andric     case TargetOpcode::G_BZERO:
603fe6060f1SDimitry Andric       return false;
604fe6060f1SDimitry Andric     case TargetOpcode::G_MEMCPY:
605fe6060f1SDimitry Andric     case TargetOpcode::G_MEMMOVE:
606fe6060f1SDimitry Andric     case TargetOpcode::G_MEMSET:
607fe6060f1SDimitry Andric       break;
608fe6060f1SDimitry Andric     }
609fe6060f1SDimitry Andric 
610fe6060f1SDimitry Andric     Register VReg = MI.getOperand(0).getReg();
611fe6060f1SDimitry Andric     if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
612fe6060f1SDimitry Andric       return false;
613fe6060f1SDimitry Andric 
614fe6060f1SDimitry Andric     Register PReg = Next->getOperand(0).getReg();
615fe6060f1SDimitry Andric     if (!PReg.isPhysical())
616fe6060f1SDimitry Andric       return false;
617fe6060f1SDimitry Andric 
618fe6060f1SDimitry Andric     auto Ret = next_nodbg(Next, MBB.instr_end());
619fe6060f1SDimitry Andric     if (Ret == MBB.instr_end() || !Ret->isReturn())
620fe6060f1SDimitry Andric       return false;
621fe6060f1SDimitry Andric 
622fe6060f1SDimitry Andric     if (Ret->getNumImplicitOperands() != 1)
623fe6060f1SDimitry Andric       return false;
624fe6060f1SDimitry Andric 
625fe6060f1SDimitry Andric     if (PReg != Ret->getOperand(0).getReg())
626fe6060f1SDimitry Andric       return false;
627fe6060f1SDimitry Andric 
628fe6060f1SDimitry Andric     // Skip over the COPY that we just validated.
629fe6060f1SDimitry Andric     Next = Ret;
630fe6060f1SDimitry Andric   }
631fe6060f1SDimitry Andric 
6325ffd83dbSDimitry Andric   if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
6338bcb0991SDimitry Andric     return false;
6348bcb0991SDimitry Andric 
6358bcb0991SDimitry Andric   return true;
6368bcb0991SDimitry Andric }
6378bcb0991SDimitry Andric 
6380b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
6395ffd83dbSDimitry Andric llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
6400b57cec5SDimitry Andric                     const CallLowering::ArgInfo &Result,
6415ffd83dbSDimitry Andric                     ArrayRef<CallLowering::ArgInfo> Args,
6425ffd83dbSDimitry Andric                     const CallingConv::ID CC) {
6430b57cec5SDimitry Andric   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
6440b57cec5SDimitry Andric 
6458bcb0991SDimitry Andric   CallLowering::CallLoweringInfo Info;
6465ffd83dbSDimitry Andric   Info.CallConv = CC;
6478bcb0991SDimitry Andric   Info.Callee = MachineOperand::CreateES(Name);
6488bcb0991SDimitry Andric   Info.OrigRet = Result;
6498bcb0991SDimitry Andric   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
6508bcb0991SDimitry Andric   if (!CLI.lowerCall(MIRBuilder, Info))
6510b57cec5SDimitry Andric     return LegalizerHelper::UnableToLegalize;
6520b57cec5SDimitry Andric 
6530b57cec5SDimitry Andric   return LegalizerHelper::Legalized;
6540b57cec5SDimitry Andric }
6550b57cec5SDimitry Andric 
6565ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
6575ffd83dbSDimitry Andric llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
6585ffd83dbSDimitry Andric                     const CallLowering::ArgInfo &Result,
6595ffd83dbSDimitry Andric                     ArrayRef<CallLowering::ArgInfo> Args) {
6605ffd83dbSDimitry Andric   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
6615ffd83dbSDimitry Andric   const char *Name = TLI.getLibcallName(Libcall);
6625ffd83dbSDimitry Andric   const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
6635ffd83dbSDimitry Andric   return createLibcall(MIRBuilder, Name, Result, Args, CC);
6645ffd83dbSDimitry Andric }
6655ffd83dbSDimitry Andric 
6660b57cec5SDimitry Andric // Useful for libcalls where all operands have the same type.
6670b57cec5SDimitry Andric static LegalizerHelper::LegalizeResult
6680b57cec5SDimitry Andric simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
6690b57cec5SDimitry Andric               Type *OpType) {
6700b57cec5SDimitry Andric   auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
6710b57cec5SDimitry Andric 
672fe6060f1SDimitry Andric   // FIXME: What does the original arg index mean here?
6730b57cec5SDimitry Andric   SmallVector<CallLowering::ArgInfo, 3> Args;
6744824e7fdSDimitry Andric   for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
6754824e7fdSDimitry Andric     Args.push_back({MO.getReg(), OpType, 0});
676fe6060f1SDimitry Andric   return createLibcall(MIRBuilder, Libcall,
677fe6060f1SDimitry Andric                        {MI.getOperand(0).getReg(), OpType, 0}, Args);
6780b57cec5SDimitry Andric }
6790b57cec5SDimitry Andric 
6808bcb0991SDimitry Andric LegalizerHelper::LegalizeResult
6818bcb0991SDimitry Andric llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
682fe6060f1SDimitry Andric                        MachineInstr &MI, LostDebugLocObserver &LocObserver) {
6838bcb0991SDimitry Andric   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
6848bcb0991SDimitry Andric 
6858bcb0991SDimitry Andric   SmallVector<CallLowering::ArgInfo, 3> Args;
6868bcb0991SDimitry Andric   // Add all the args, except for the last which is an imm denoting 'tail'.
687e8d8bef9SDimitry Andric   for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
6888bcb0991SDimitry Andric     Register Reg = MI.getOperand(i).getReg();
6898bcb0991SDimitry Andric 
6908bcb0991SDimitry Andric     // Need derive an IR type for call lowering.
6918bcb0991SDimitry Andric     LLT OpLLT = MRI.getType(Reg);
6928bcb0991SDimitry Andric     Type *OpTy = nullptr;
6938bcb0991SDimitry Andric     if (OpLLT.isPointer())
694*5f757f3fSDimitry Andric       OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
6958bcb0991SDimitry Andric     else
6968bcb0991SDimitry Andric       OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
697fe6060f1SDimitry Andric     Args.push_back({Reg, OpTy, 0});
6988bcb0991SDimitry Andric   }
6998bcb0991SDimitry Andric 
7008bcb0991SDimitry Andric   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
7018bcb0991SDimitry Andric   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
7028bcb0991SDimitry Andric   RTLIB::Libcall RTLibcall;
703fe6060f1SDimitry Andric   unsigned Opc = MI.getOpcode();
704fe6060f1SDimitry Andric   switch (Opc) {
705fe6060f1SDimitry Andric   case TargetOpcode::G_BZERO:
706fe6060f1SDimitry Andric     RTLibcall = RTLIB::BZERO;
707fe6060f1SDimitry Andric     break;
708e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMCPY:
7098bcb0991SDimitry Andric     RTLibcall = RTLIB::MEMCPY;
710fe6060f1SDimitry Andric     Args[0].Flags[0].setReturned();
7118bcb0991SDimitry Andric     break;
712e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMMOVE:
7138bcb0991SDimitry Andric     RTLibcall = RTLIB::MEMMOVE;
714fe6060f1SDimitry Andric     Args[0].Flags[0].setReturned();
7158bcb0991SDimitry Andric     break;
716e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMSET:
717e8d8bef9SDimitry Andric     RTLibcall = RTLIB::MEMSET;
718fe6060f1SDimitry Andric     Args[0].Flags[0].setReturned();
719e8d8bef9SDimitry Andric     break;
7208bcb0991SDimitry Andric   default:
721fe6060f1SDimitry Andric     llvm_unreachable("unsupported opcode");
7228bcb0991SDimitry Andric   }
7238bcb0991SDimitry Andric   const char *Name = TLI.getLibcallName(RTLibcall);
7248bcb0991SDimitry Andric 
725fe6060f1SDimitry Andric   // Unsupported libcall on the target.
726fe6060f1SDimitry Andric   if (!Name) {
727fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
728fe6060f1SDimitry Andric                       << MIRBuilder.getTII().getName(Opc) << "\n");
729fe6060f1SDimitry Andric     return LegalizerHelper::UnableToLegalize;
730fe6060f1SDimitry Andric   }
731fe6060f1SDimitry Andric 
7328bcb0991SDimitry Andric   CallLowering::CallLoweringInfo Info;
7338bcb0991SDimitry Andric   Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
7348bcb0991SDimitry Andric   Info.Callee = MachineOperand::CreateES(Name);
735fe6060f1SDimitry Andric   Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
736e8d8bef9SDimitry Andric   Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
737fe6060f1SDimitry Andric                     isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI);
7388bcb0991SDimitry Andric 
7398bcb0991SDimitry Andric   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
7408bcb0991SDimitry Andric   if (!CLI.lowerCall(MIRBuilder, Info))
7418bcb0991SDimitry Andric     return LegalizerHelper::UnableToLegalize;
7428bcb0991SDimitry Andric 
7438bcb0991SDimitry Andric   if (Info.LoweredTailCall) {
7448bcb0991SDimitry Andric     assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
745fe6060f1SDimitry Andric 
746fe6060f1SDimitry Andric     // Check debug locations before removing the return.
747fe6060f1SDimitry Andric     LocObserver.checkpoint(true);
748fe6060f1SDimitry Andric 
7495ffd83dbSDimitry Andric     // We must have a return following the call (or debug insts) to get past
7508bcb0991SDimitry Andric     // isLibCallInTailPosition.
7515ffd83dbSDimitry Andric     do {
7525ffd83dbSDimitry Andric       MachineInstr *Next = MI.getNextNode();
753fe6060f1SDimitry Andric       assert(Next &&
754fe6060f1SDimitry Andric              (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
7555ffd83dbSDimitry Andric              "Expected instr following MI to be return or debug inst?");
7568bcb0991SDimitry Andric       // We lowered a tail call, so the call is now the return from the block.
7578bcb0991SDimitry Andric       // Delete the old return.
7585ffd83dbSDimitry Andric       Next->eraseFromParent();
7595ffd83dbSDimitry Andric     } while (MI.getNextNode());
760fe6060f1SDimitry Andric 
761fe6060f1SDimitry Andric     // We expect to lose the debug location from the return.
762fe6060f1SDimitry Andric     LocObserver.checkpoint(false);
7638bcb0991SDimitry Andric   }
7648bcb0991SDimitry Andric 
7658bcb0991SDimitry Andric   return LegalizerHelper::Legalized;
7668bcb0991SDimitry Andric }
7678bcb0991SDimitry Andric 
7680b57cec5SDimitry Andric static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
7690b57cec5SDimitry Andric                                        Type *FromType) {
7700b57cec5SDimitry Andric   auto ToMVT = MVT::getVT(ToType);
7710b57cec5SDimitry Andric   auto FromMVT = MVT::getVT(FromType);
7720b57cec5SDimitry Andric 
7730b57cec5SDimitry Andric   switch (Opcode) {
7740b57cec5SDimitry Andric   case TargetOpcode::G_FPEXT:
7750b57cec5SDimitry Andric     return RTLIB::getFPEXT(FromMVT, ToMVT);
7760b57cec5SDimitry Andric   case TargetOpcode::G_FPTRUNC:
7770b57cec5SDimitry Andric     return RTLIB::getFPROUND(FromMVT, ToMVT);
7780b57cec5SDimitry Andric   case TargetOpcode::G_FPTOSI:
7790b57cec5SDimitry Andric     return RTLIB::getFPTOSINT(FromMVT, ToMVT);
7800b57cec5SDimitry Andric   case TargetOpcode::G_FPTOUI:
7810b57cec5SDimitry Andric     return RTLIB::getFPTOUINT(FromMVT, ToMVT);
7820b57cec5SDimitry Andric   case TargetOpcode::G_SITOFP:
7830b57cec5SDimitry Andric     return RTLIB::getSINTTOFP(FromMVT, ToMVT);
7840b57cec5SDimitry Andric   case TargetOpcode::G_UITOFP:
7850b57cec5SDimitry Andric     return RTLIB::getUINTTOFP(FromMVT, ToMVT);
7860b57cec5SDimitry Andric   }
7870b57cec5SDimitry Andric   llvm_unreachable("Unsupported libcall function");
7880b57cec5SDimitry Andric }
7890b57cec5SDimitry Andric 
7900b57cec5SDimitry Andric static LegalizerHelper::LegalizeResult
7910b57cec5SDimitry Andric conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
7920b57cec5SDimitry Andric                   Type *FromType) {
7930b57cec5SDimitry Andric   RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
794fe6060f1SDimitry Andric   return createLibcall(MIRBuilder, Libcall,
795fe6060f1SDimitry Andric                        {MI.getOperand(0).getReg(), ToType, 0},
796fe6060f1SDimitry Andric                        {{MI.getOperand(1).getReg(), FromType, 0}});
7970b57cec5SDimitry Andric }
7980b57cec5SDimitry Andric 
799*5f757f3fSDimitry Andric static RTLIB::Libcall
800*5f757f3fSDimitry Andric getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) {
801*5f757f3fSDimitry Andric   RTLIB::Libcall RTLibcall;
802*5f757f3fSDimitry Andric   switch (MI.getOpcode()) {
803*5f757f3fSDimitry Andric   case TargetOpcode::G_GET_FPMODE:
804*5f757f3fSDimitry Andric     RTLibcall = RTLIB::FEGETMODE;
805*5f757f3fSDimitry Andric     break;
806*5f757f3fSDimitry Andric   case TargetOpcode::G_SET_FPMODE:
807*5f757f3fSDimitry Andric   case TargetOpcode::G_RESET_FPMODE:
808*5f757f3fSDimitry Andric     RTLibcall = RTLIB::FESETMODE;
809*5f757f3fSDimitry Andric     break;
810*5f757f3fSDimitry Andric   default:
811*5f757f3fSDimitry Andric     llvm_unreachable("Unexpected opcode");
812*5f757f3fSDimitry Andric   }
813*5f757f3fSDimitry Andric   return RTLibcall;
814*5f757f3fSDimitry Andric }
815*5f757f3fSDimitry Andric 
816*5f757f3fSDimitry Andric // Some library functions that read FP state (fegetmode, fegetenv) write the
817*5f757f3fSDimitry Andric // state into a region in memory. IR intrinsics that do the same operations
818*5f757f3fSDimitry Andric // (get_fpmode, get_fpenv) return the state as integer value. To implement these
819*5f757f3fSDimitry Andric // intrinsics via the library functions, we need to use temporary variable,
820*5f757f3fSDimitry Andric // for example:
821*5f757f3fSDimitry Andric //
822*5f757f3fSDimitry Andric //     %0:_(s32) = G_GET_FPMODE
823*5f757f3fSDimitry Andric //
824*5f757f3fSDimitry Andric // is transformed to:
825*5f757f3fSDimitry Andric //
826*5f757f3fSDimitry Andric //     %1:_(p0) = G_FRAME_INDEX %stack.0
827*5f757f3fSDimitry Andric //     BL &fegetmode
828*5f757f3fSDimitry Andric //     %0:_(s32) = G_LOAD % 1
829*5f757f3fSDimitry Andric //
830*5f757f3fSDimitry Andric LegalizerHelper::LegalizeResult
831*5f757f3fSDimitry Andric LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
832*5f757f3fSDimitry Andric                                        MachineInstr &MI) {
833*5f757f3fSDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
834*5f757f3fSDimitry Andric   auto &MF = MIRBuilder.getMF();
835*5f757f3fSDimitry Andric   auto &MRI = *MIRBuilder.getMRI();
836*5f757f3fSDimitry Andric   auto &Ctx = MF.getFunction().getContext();
837*5f757f3fSDimitry Andric 
838*5f757f3fSDimitry Andric   // Create temporary, where library function will put the read state.
839*5f757f3fSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
840*5f757f3fSDimitry Andric   LLT StateTy = MRI.getType(Dst);
841*5f757f3fSDimitry Andric   TypeSize StateSize = StateTy.getSizeInBytes();
842*5f757f3fSDimitry Andric   Align TempAlign = getStackTemporaryAlignment(StateTy);
843*5f757f3fSDimitry Andric   MachinePointerInfo TempPtrInfo;
844*5f757f3fSDimitry Andric   auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
845*5f757f3fSDimitry Andric 
846*5f757f3fSDimitry Andric   // Create a call to library function, with the temporary as an argument.
847*5f757f3fSDimitry Andric   unsigned TempAddrSpace = DL.getAllocaAddrSpace();
848*5f757f3fSDimitry Andric   Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
849*5f757f3fSDimitry Andric   RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
850*5f757f3fSDimitry Andric   auto Res =
851*5f757f3fSDimitry Andric       createLibcall(MIRBuilder, RTLibcall,
852*5f757f3fSDimitry Andric                     CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
853*5f757f3fSDimitry Andric                     CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}));
854*5f757f3fSDimitry Andric   if (Res != LegalizerHelper::Legalized)
855*5f757f3fSDimitry Andric     return Res;
856*5f757f3fSDimitry Andric 
857*5f757f3fSDimitry Andric   // Create a load from the temporary.
858*5f757f3fSDimitry Andric   MachineMemOperand *MMO = MF.getMachineMemOperand(
859*5f757f3fSDimitry Andric       TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
860*5f757f3fSDimitry Andric   MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
861*5f757f3fSDimitry Andric 
862*5f757f3fSDimitry Andric   return LegalizerHelper::Legalized;
863*5f757f3fSDimitry Andric }
864*5f757f3fSDimitry Andric 
865*5f757f3fSDimitry Andric // Similar to `createGetStateLibcall` the function calls a library function
866*5f757f3fSDimitry Andric // using transient space in stack. In this case the library function reads
867*5f757f3fSDimitry Andric // content of memory region.
868*5f757f3fSDimitry Andric LegalizerHelper::LegalizeResult
869*5f757f3fSDimitry Andric LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
870*5f757f3fSDimitry Andric                                        MachineInstr &MI) {
871*5f757f3fSDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
872*5f757f3fSDimitry Andric   auto &MF = MIRBuilder.getMF();
873*5f757f3fSDimitry Andric   auto &MRI = *MIRBuilder.getMRI();
874*5f757f3fSDimitry Andric   auto &Ctx = MF.getFunction().getContext();
875*5f757f3fSDimitry Andric 
876*5f757f3fSDimitry Andric   // Create temporary, where library function will get the new state.
877*5f757f3fSDimitry Andric   Register Src = MI.getOperand(0).getReg();
878*5f757f3fSDimitry Andric   LLT StateTy = MRI.getType(Src);
879*5f757f3fSDimitry Andric   TypeSize StateSize = StateTy.getSizeInBytes();
880*5f757f3fSDimitry Andric   Align TempAlign = getStackTemporaryAlignment(StateTy);
881*5f757f3fSDimitry Andric   MachinePointerInfo TempPtrInfo;
882*5f757f3fSDimitry Andric   auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
883*5f757f3fSDimitry Andric 
884*5f757f3fSDimitry Andric   // Put the new state into the temporary.
885*5f757f3fSDimitry Andric   MachineMemOperand *MMO = MF.getMachineMemOperand(
886*5f757f3fSDimitry Andric       TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
887*5f757f3fSDimitry Andric   MIRBuilder.buildStore(Src, Temp, *MMO);
888*5f757f3fSDimitry Andric 
889*5f757f3fSDimitry Andric   // Create a call to library function, with the temporary as an argument.
890*5f757f3fSDimitry Andric   unsigned TempAddrSpace = DL.getAllocaAddrSpace();
891*5f757f3fSDimitry Andric   Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
892*5f757f3fSDimitry Andric   RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
893*5f757f3fSDimitry Andric   return createLibcall(MIRBuilder, RTLibcall,
894*5f757f3fSDimitry Andric                        CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
895*5f757f3fSDimitry Andric                        CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}));
896*5f757f3fSDimitry Andric }
897*5f757f3fSDimitry Andric 
898*5f757f3fSDimitry Andric // The function is used to legalize operations that set default environment
899*5f757f3fSDimitry Andric // state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
900*5f757f3fSDimitry Andric // On most targets supported in glibc FE_DFL_MODE is defined as
901*5f757f3fSDimitry Andric // `((const femode_t *) -1)`. Such assumption is used here. If for some target
902*5f757f3fSDimitry Andric // it is not true, the target must provide custom lowering.
903*5f757f3fSDimitry Andric LegalizerHelper::LegalizeResult
904*5f757f3fSDimitry Andric LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
905*5f757f3fSDimitry Andric                                          MachineInstr &MI) {
906*5f757f3fSDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
907*5f757f3fSDimitry Andric   auto &MF = MIRBuilder.getMF();
908*5f757f3fSDimitry Andric   auto &Ctx = MF.getFunction().getContext();
909*5f757f3fSDimitry Andric 
910*5f757f3fSDimitry Andric   // Create an argument for the library function.
911*5f757f3fSDimitry Andric   unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
912*5f757f3fSDimitry Andric   Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
913*5f757f3fSDimitry Andric   unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
914*5f757f3fSDimitry Andric   LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
915*5f757f3fSDimitry Andric   auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
916*5f757f3fSDimitry Andric   DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
917*5f757f3fSDimitry Andric   MIRBuilder.buildIntToPtr(Dest, DefValue);
918*5f757f3fSDimitry Andric 
919*5f757f3fSDimitry Andric   RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
920*5f757f3fSDimitry Andric   return createLibcall(MIRBuilder, RTLibcall,
921*5f757f3fSDimitry Andric                        CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
922*5f757f3fSDimitry Andric                        CallLowering::ArgInfo({ Dest.getReg(), StatePtrTy, 0}));
923*5f757f3fSDimitry Andric }
924*5f757f3fSDimitry Andric 
9250b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
926fe6060f1SDimitry Andric LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
9270b57cec5SDimitry Andric   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
9280b57cec5SDimitry Andric 
9290b57cec5SDimitry Andric   switch (MI.getOpcode()) {
9300b57cec5SDimitry Andric   default:
9310b57cec5SDimitry Andric     return UnableToLegalize;
932bdd1243dSDimitry Andric   case TargetOpcode::G_MUL:
9330b57cec5SDimitry Andric   case TargetOpcode::G_SDIV:
9340b57cec5SDimitry Andric   case TargetOpcode::G_UDIV:
9350b57cec5SDimitry Andric   case TargetOpcode::G_SREM:
9360b57cec5SDimitry Andric   case TargetOpcode::G_UREM:
9370b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
938*5f757f3fSDimitry Andric     LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
939*5f757f3fSDimitry Andric     unsigned Size = LLTy.getSizeInBits();
9400b57cec5SDimitry Andric     Type *HLTy = IntegerType::get(Ctx, Size);
9410b57cec5SDimitry Andric     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
9420b57cec5SDimitry Andric     if (Status != Legalized)
9430b57cec5SDimitry Andric       return Status;
9440b57cec5SDimitry Andric     break;
9450b57cec5SDimitry Andric   }
9460b57cec5SDimitry Andric   case TargetOpcode::G_FADD:
9470b57cec5SDimitry Andric   case TargetOpcode::G_FSUB:
9480b57cec5SDimitry Andric   case TargetOpcode::G_FMUL:
9490b57cec5SDimitry Andric   case TargetOpcode::G_FDIV:
9500b57cec5SDimitry Andric   case TargetOpcode::G_FMA:
9510b57cec5SDimitry Andric   case TargetOpcode::G_FPOW:
9520b57cec5SDimitry Andric   case TargetOpcode::G_FREM:
9530b57cec5SDimitry Andric   case TargetOpcode::G_FCOS:
9540b57cec5SDimitry Andric   case TargetOpcode::G_FSIN:
9550b57cec5SDimitry Andric   case TargetOpcode::G_FLOG10:
9560b57cec5SDimitry Andric   case TargetOpcode::G_FLOG:
9570b57cec5SDimitry Andric   case TargetOpcode::G_FLOG2:
95806c3fb27SDimitry Andric   case TargetOpcode::G_FLDEXP:
9590b57cec5SDimitry Andric   case TargetOpcode::G_FEXP:
9600b57cec5SDimitry Andric   case TargetOpcode::G_FEXP2:
961*5f757f3fSDimitry Andric   case TargetOpcode::G_FEXP10:
9620b57cec5SDimitry Andric   case TargetOpcode::G_FCEIL:
9635ffd83dbSDimitry Andric   case TargetOpcode::G_FFLOOR:
9645ffd83dbSDimitry Andric   case TargetOpcode::G_FMINNUM:
9655ffd83dbSDimitry Andric   case TargetOpcode::G_FMAXNUM:
9665ffd83dbSDimitry Andric   case TargetOpcode::G_FSQRT:
9675ffd83dbSDimitry Andric   case TargetOpcode::G_FRINT:
968e8d8bef9SDimitry Andric   case TargetOpcode::G_FNEARBYINT:
969e8d8bef9SDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
970*5f757f3fSDimitry Andric     LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
971*5f757f3fSDimitry Andric     unsigned Size = LLTy.getSizeInBits();
9725ffd83dbSDimitry Andric     Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
973e8d8bef9SDimitry Andric     if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
974e8d8bef9SDimitry Andric       LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
9750b57cec5SDimitry Andric       return UnableToLegalize;
9760b57cec5SDimitry Andric     }
9770b57cec5SDimitry Andric     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
9780b57cec5SDimitry Andric     if (Status != Legalized)
9790b57cec5SDimitry Andric       return Status;
9800b57cec5SDimitry Andric     break;
9810b57cec5SDimitry Andric   }
9825ffd83dbSDimitry Andric   case TargetOpcode::G_FPEXT:
9830b57cec5SDimitry Andric   case TargetOpcode::G_FPTRUNC: {
9845ffd83dbSDimitry Andric     Type *FromTy = getFloatTypeForLLT(Ctx,  MRI.getType(MI.getOperand(1).getReg()));
9855ffd83dbSDimitry Andric     Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
9865ffd83dbSDimitry Andric     if (!FromTy || !ToTy)
9870b57cec5SDimitry Andric       return UnableToLegalize;
9885ffd83dbSDimitry Andric     LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
9890b57cec5SDimitry Andric     if (Status != Legalized)
9900b57cec5SDimitry Andric       return Status;
9910b57cec5SDimitry Andric     break;
9920b57cec5SDimitry Andric   }
9930b57cec5SDimitry Andric   case TargetOpcode::G_FPTOSI:
9940b57cec5SDimitry Andric   case TargetOpcode::G_FPTOUI: {
9950b57cec5SDimitry Andric     // FIXME: Support other types
9960b57cec5SDimitry Andric     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
9970b57cec5SDimitry Andric     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
9980b57cec5SDimitry Andric     if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
9990b57cec5SDimitry Andric       return UnableToLegalize;
10000b57cec5SDimitry Andric     LegalizeResult Status = conversionLibcall(
10010b57cec5SDimitry Andric         MI, MIRBuilder,
10020b57cec5SDimitry Andric         ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
10030b57cec5SDimitry Andric         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
10040b57cec5SDimitry Andric     if (Status != Legalized)
10050b57cec5SDimitry Andric       return Status;
10060b57cec5SDimitry Andric     break;
10070b57cec5SDimitry Andric   }
10080b57cec5SDimitry Andric   case TargetOpcode::G_SITOFP:
10090b57cec5SDimitry Andric   case TargetOpcode::G_UITOFP: {
10100b57cec5SDimitry Andric     // FIXME: Support other types
10110b57cec5SDimitry Andric     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
10120b57cec5SDimitry Andric     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
10130b57cec5SDimitry Andric     if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
10140b57cec5SDimitry Andric       return UnableToLegalize;
10150b57cec5SDimitry Andric     LegalizeResult Status = conversionLibcall(
10160b57cec5SDimitry Andric         MI, MIRBuilder,
10170b57cec5SDimitry Andric         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
10180b57cec5SDimitry Andric         FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
10190b57cec5SDimitry Andric     if (Status != Legalized)
10200b57cec5SDimitry Andric       return Status;
10210b57cec5SDimitry Andric     break;
10220b57cec5SDimitry Andric   }
1023fe6060f1SDimitry Andric   case TargetOpcode::G_BZERO:
1024e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMCPY:
1025e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMMOVE:
1026e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMSET: {
1027fe6060f1SDimitry Andric     LegalizeResult Result =
1028fe6060f1SDimitry Andric         createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1029fe6060f1SDimitry Andric     if (Result != Legalized)
1030fe6060f1SDimitry Andric       return Result;
1031e8d8bef9SDimitry Andric     MI.eraseFromParent();
1032e8d8bef9SDimitry Andric     return Result;
1033e8d8bef9SDimitry Andric   }
1034*5f757f3fSDimitry Andric   case TargetOpcode::G_GET_FPMODE: {
1035*5f757f3fSDimitry Andric     LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI);
1036*5f757f3fSDimitry Andric     if (Result != Legalized)
1037*5f757f3fSDimitry Andric       return Result;
1038*5f757f3fSDimitry Andric     break;
1039*5f757f3fSDimitry Andric   }
1040*5f757f3fSDimitry Andric   case TargetOpcode::G_SET_FPMODE: {
1041*5f757f3fSDimitry Andric     LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI);
1042*5f757f3fSDimitry Andric     if (Result != Legalized)
1043*5f757f3fSDimitry Andric       return Result;
1044*5f757f3fSDimitry Andric     break;
1045*5f757f3fSDimitry Andric   }
1046*5f757f3fSDimitry Andric   case TargetOpcode::G_RESET_FPMODE: {
1047*5f757f3fSDimitry Andric     LegalizeResult Result = createResetStateLibcall(MIRBuilder, MI);
1048*5f757f3fSDimitry Andric     if (Result != Legalized)
1049*5f757f3fSDimitry Andric       return Result;
1050*5f757f3fSDimitry Andric     break;
1051*5f757f3fSDimitry Andric   }
10520b57cec5SDimitry Andric   }
10530b57cec5SDimitry Andric 
10540b57cec5SDimitry Andric   MI.eraseFromParent();
10550b57cec5SDimitry Andric   return Legalized;
10560b57cec5SDimitry Andric }
10570b57cec5SDimitry Andric 
10580b57cec5SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
10590b57cec5SDimitry Andric                                                               unsigned TypeIdx,
10600b57cec5SDimitry Andric                                                               LLT NarrowTy) {
10610b57cec5SDimitry Andric   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
10620b57cec5SDimitry Andric   uint64_t NarrowSize = NarrowTy.getSizeInBits();
10630b57cec5SDimitry Andric 
10640b57cec5SDimitry Andric   switch (MI.getOpcode()) {
10650b57cec5SDimitry Andric   default:
10660b57cec5SDimitry Andric     return UnableToLegalize;
10670b57cec5SDimitry Andric   case TargetOpcode::G_IMPLICIT_DEF: {
10685ffd83dbSDimitry Andric     Register DstReg = MI.getOperand(0).getReg();
10695ffd83dbSDimitry Andric     LLT DstTy = MRI.getType(DstReg);
10705ffd83dbSDimitry Andric 
10715ffd83dbSDimitry Andric     // If SizeOp0 is not an exact multiple of NarrowSize, emit
10725ffd83dbSDimitry Andric     // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
10735ffd83dbSDimitry Andric     // FIXME: Although this would also be legal for the general case, it causes
10745ffd83dbSDimitry Andric     //  a lot of regressions in the emitted code (superfluous COPYs, artifact
10755ffd83dbSDimitry Andric     //  combines not being hit). This seems to be a problem related to the
10765ffd83dbSDimitry Andric     //  artifact combiner.
10775ffd83dbSDimitry Andric     if (SizeOp0 % NarrowSize != 0) {
10785ffd83dbSDimitry Andric       LLT ImplicitTy = NarrowTy;
10795ffd83dbSDimitry Andric       if (DstTy.isVector())
1080fe6060f1SDimitry Andric         ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
10815ffd83dbSDimitry Andric 
10825ffd83dbSDimitry Andric       Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
10835ffd83dbSDimitry Andric       MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
10845ffd83dbSDimitry Andric 
10855ffd83dbSDimitry Andric       MI.eraseFromParent();
10865ffd83dbSDimitry Andric       return Legalized;
10875ffd83dbSDimitry Andric     }
10885ffd83dbSDimitry Andric 
10890b57cec5SDimitry Andric     int NumParts = SizeOp0 / NarrowSize;
10900b57cec5SDimitry Andric 
10910b57cec5SDimitry Andric     SmallVector<Register, 2> DstRegs;
10920b57cec5SDimitry Andric     for (int i = 0; i < NumParts; ++i)
10935ffd83dbSDimitry Andric       DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
10940b57cec5SDimitry Andric 
10955ffd83dbSDimitry Andric     if (DstTy.isVector())
10960b57cec5SDimitry Andric       MIRBuilder.buildBuildVector(DstReg, DstRegs);
10970b57cec5SDimitry Andric     else
1098bdd1243dSDimitry Andric       MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
10990b57cec5SDimitry Andric     MI.eraseFromParent();
11000b57cec5SDimitry Andric     return Legalized;
11010b57cec5SDimitry Andric   }
11020b57cec5SDimitry Andric   case TargetOpcode::G_CONSTANT: {
11030b57cec5SDimitry Andric     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
11040b57cec5SDimitry Andric     const APInt &Val = MI.getOperand(1).getCImm()->getValue();
11050b57cec5SDimitry Andric     unsigned TotalSize = Ty.getSizeInBits();
11060b57cec5SDimitry Andric     unsigned NarrowSize = NarrowTy.getSizeInBits();
11070b57cec5SDimitry Andric     int NumParts = TotalSize / NarrowSize;
11080b57cec5SDimitry Andric 
11090b57cec5SDimitry Andric     SmallVector<Register, 4> PartRegs;
11100b57cec5SDimitry Andric     for (int I = 0; I != NumParts; ++I) {
11110b57cec5SDimitry Andric       unsigned Offset = I * NarrowSize;
11120b57cec5SDimitry Andric       auto K = MIRBuilder.buildConstant(NarrowTy,
11130b57cec5SDimitry Andric                                         Val.lshr(Offset).trunc(NarrowSize));
11140b57cec5SDimitry Andric       PartRegs.push_back(K.getReg(0));
11150b57cec5SDimitry Andric     }
11160b57cec5SDimitry Andric 
11170b57cec5SDimitry Andric     LLT LeftoverTy;
11180b57cec5SDimitry Andric     unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
11190b57cec5SDimitry Andric     SmallVector<Register, 1> LeftoverRegs;
11200b57cec5SDimitry Andric     if (LeftoverBits != 0) {
11210b57cec5SDimitry Andric       LeftoverTy = LLT::scalar(LeftoverBits);
11220b57cec5SDimitry Andric       auto K = MIRBuilder.buildConstant(
11230b57cec5SDimitry Andric         LeftoverTy,
11240b57cec5SDimitry Andric         Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
11250b57cec5SDimitry Andric       LeftoverRegs.push_back(K.getReg(0));
11260b57cec5SDimitry Andric     }
11270b57cec5SDimitry Andric 
11280b57cec5SDimitry Andric     insertParts(MI.getOperand(0).getReg(),
11290b57cec5SDimitry Andric                 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
11300b57cec5SDimitry Andric 
11310b57cec5SDimitry Andric     MI.eraseFromParent();
11320b57cec5SDimitry Andric     return Legalized;
11330b57cec5SDimitry Andric   }
11345ffd83dbSDimitry Andric   case TargetOpcode::G_SEXT:
11355ffd83dbSDimitry Andric   case TargetOpcode::G_ZEXT:
11365ffd83dbSDimitry Andric   case TargetOpcode::G_ANYEXT:
11375ffd83dbSDimitry Andric     return narrowScalarExt(MI, TypeIdx, NarrowTy);
11388bcb0991SDimitry Andric   case TargetOpcode::G_TRUNC: {
11398bcb0991SDimitry Andric     if (TypeIdx != 1)
11408bcb0991SDimitry Andric       return UnableToLegalize;
11418bcb0991SDimitry Andric 
11428bcb0991SDimitry Andric     uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
11438bcb0991SDimitry Andric     if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
11448bcb0991SDimitry Andric       LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
11458bcb0991SDimitry Andric       return UnableToLegalize;
11468bcb0991SDimitry Andric     }
11478bcb0991SDimitry Andric 
11485ffd83dbSDimitry Andric     auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
11495ffd83dbSDimitry Andric     MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
11508bcb0991SDimitry Andric     MI.eraseFromParent();
11518bcb0991SDimitry Andric     return Legalized;
11528bcb0991SDimitry Andric   }
11538bcb0991SDimitry Andric 
11540eae32dcSDimitry Andric   case TargetOpcode::G_FREEZE: {
11550eae32dcSDimitry Andric     if (TypeIdx != 0)
11560eae32dcSDimitry Andric       return UnableToLegalize;
11570eae32dcSDimitry Andric 
11580eae32dcSDimitry Andric     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
11590eae32dcSDimitry Andric     // Should widen scalar first
11600eae32dcSDimitry Andric     if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
11610eae32dcSDimitry Andric       return UnableToLegalize;
11620eae32dcSDimitry Andric 
11630eae32dcSDimitry Andric     auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
11640eae32dcSDimitry Andric     SmallVector<Register, 8> Parts;
11650eae32dcSDimitry Andric     for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
11660eae32dcSDimitry Andric       Parts.push_back(
11670eae32dcSDimitry Andric           MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
11680eae32dcSDimitry Andric     }
11690eae32dcSDimitry Andric 
1170bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
11710eae32dcSDimitry Andric     MI.eraseFromParent();
11720eae32dcSDimitry Andric     return Legalized;
11730eae32dcSDimitry Andric   }
1174fe6060f1SDimitry Andric   case TargetOpcode::G_ADD:
1175fe6060f1SDimitry Andric   case TargetOpcode::G_SUB:
1176fe6060f1SDimitry Andric   case TargetOpcode::G_SADDO:
1177fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBO:
1178fe6060f1SDimitry Andric   case TargetOpcode::G_SADDE:
1179fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBE:
1180fe6060f1SDimitry Andric   case TargetOpcode::G_UADDO:
1181fe6060f1SDimitry Andric   case TargetOpcode::G_USUBO:
1182fe6060f1SDimitry Andric   case TargetOpcode::G_UADDE:
1183fe6060f1SDimitry Andric   case TargetOpcode::G_USUBE:
1184fe6060f1SDimitry Andric     return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
11850b57cec5SDimitry Andric   case TargetOpcode::G_MUL:
11860b57cec5SDimitry Andric   case TargetOpcode::G_UMULH:
11870b57cec5SDimitry Andric     return narrowScalarMul(MI, NarrowTy);
11880b57cec5SDimitry Andric   case TargetOpcode::G_EXTRACT:
11890b57cec5SDimitry Andric     return narrowScalarExtract(MI, TypeIdx, NarrowTy);
11900b57cec5SDimitry Andric   case TargetOpcode::G_INSERT:
11910b57cec5SDimitry Andric     return narrowScalarInsert(MI, TypeIdx, NarrowTy);
11920b57cec5SDimitry Andric   case TargetOpcode::G_LOAD: {
1193fe6060f1SDimitry Andric     auto &LoadMI = cast<GLoad>(MI);
1194fe6060f1SDimitry Andric     Register DstReg = LoadMI.getDstReg();
11950b57cec5SDimitry Andric     LLT DstTy = MRI.getType(DstReg);
11960b57cec5SDimitry Andric     if (DstTy.isVector())
11970b57cec5SDimitry Andric       return UnableToLegalize;
11980b57cec5SDimitry Andric 
1199fe6060f1SDimitry Andric     if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
12000b57cec5SDimitry Andric       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1201fe6060f1SDimitry Andric       MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
12020b57cec5SDimitry Andric       MIRBuilder.buildAnyExt(DstReg, TmpReg);
1203fe6060f1SDimitry Andric       LoadMI.eraseFromParent();
12040b57cec5SDimitry Andric       return Legalized;
12050b57cec5SDimitry Andric     }
12060b57cec5SDimitry Andric 
1207fe6060f1SDimitry Andric     return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
12080b57cec5SDimitry Andric   }
12090b57cec5SDimitry Andric   case TargetOpcode::G_ZEXTLOAD:
12100b57cec5SDimitry Andric   case TargetOpcode::G_SEXTLOAD: {
1211fe6060f1SDimitry Andric     auto &LoadMI = cast<GExtLoad>(MI);
1212fe6060f1SDimitry Andric     Register DstReg = LoadMI.getDstReg();
1213fe6060f1SDimitry Andric     Register PtrReg = LoadMI.getPointerReg();
12140b57cec5SDimitry Andric 
12150b57cec5SDimitry Andric     Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1216fe6060f1SDimitry Andric     auto &MMO = LoadMI.getMMO();
1217e8d8bef9SDimitry Andric     unsigned MemSize = MMO.getSizeInBits();
1218e8d8bef9SDimitry Andric 
1219e8d8bef9SDimitry Andric     if (MemSize == NarrowSize) {
12200b57cec5SDimitry Andric       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1221e8d8bef9SDimitry Andric     } else if (MemSize < NarrowSize) {
1222fe6060f1SDimitry Andric       MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1223e8d8bef9SDimitry Andric     } else if (MemSize > NarrowSize) {
1224e8d8bef9SDimitry Andric       // FIXME: Need to split the load.
1225e8d8bef9SDimitry Andric       return UnableToLegalize;
12260b57cec5SDimitry Andric     }
12270b57cec5SDimitry Andric 
1228fe6060f1SDimitry Andric     if (isa<GZExtLoad>(LoadMI))
12290b57cec5SDimitry Andric       MIRBuilder.buildZExt(DstReg, TmpReg);
12300b57cec5SDimitry Andric     else
12310b57cec5SDimitry Andric       MIRBuilder.buildSExt(DstReg, TmpReg);
12320b57cec5SDimitry Andric 
1233fe6060f1SDimitry Andric     LoadMI.eraseFromParent();
12340b57cec5SDimitry Andric     return Legalized;
12350b57cec5SDimitry Andric   }
12360b57cec5SDimitry Andric   case TargetOpcode::G_STORE: {
1237fe6060f1SDimitry Andric     auto &StoreMI = cast<GStore>(MI);
12380b57cec5SDimitry Andric 
1239fe6060f1SDimitry Andric     Register SrcReg = StoreMI.getValueReg();
12400b57cec5SDimitry Andric     LLT SrcTy = MRI.getType(SrcReg);
12410b57cec5SDimitry Andric     if (SrcTy.isVector())
12420b57cec5SDimitry Andric       return UnableToLegalize;
12430b57cec5SDimitry Andric 
12440b57cec5SDimitry Andric     int NumParts = SizeOp0 / NarrowSize;
12450b57cec5SDimitry Andric     unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
12460b57cec5SDimitry Andric     unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
12470b57cec5SDimitry Andric     if (SrcTy.isVector() && LeftoverBits != 0)
12480b57cec5SDimitry Andric       return UnableToLegalize;
12490b57cec5SDimitry Andric 
1250fe6060f1SDimitry Andric     if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
12510b57cec5SDimitry Andric       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
12520b57cec5SDimitry Andric       MIRBuilder.buildTrunc(TmpReg, SrcReg);
1253fe6060f1SDimitry Andric       MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1254fe6060f1SDimitry Andric       StoreMI.eraseFromParent();
12550b57cec5SDimitry Andric       return Legalized;
12560b57cec5SDimitry Andric     }
12570b57cec5SDimitry Andric 
1258fe6060f1SDimitry Andric     return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
12590b57cec5SDimitry Andric   }
12600b57cec5SDimitry Andric   case TargetOpcode::G_SELECT:
12610b57cec5SDimitry Andric     return narrowScalarSelect(MI, TypeIdx, NarrowTy);
12620b57cec5SDimitry Andric   case TargetOpcode::G_AND:
12630b57cec5SDimitry Andric   case TargetOpcode::G_OR:
12640b57cec5SDimitry Andric   case TargetOpcode::G_XOR: {
12650b57cec5SDimitry Andric     // Legalize bitwise operation:
12660b57cec5SDimitry Andric     // A = BinOp<Ty> B, C
12670b57cec5SDimitry Andric     // into:
12680b57cec5SDimitry Andric     // B1, ..., BN = G_UNMERGE_VALUES B
12690b57cec5SDimitry Andric     // C1, ..., CN = G_UNMERGE_VALUES C
12700b57cec5SDimitry Andric     // A1 = BinOp<Ty/N> B1, C2
12710b57cec5SDimitry Andric     // ...
12720b57cec5SDimitry Andric     // AN = BinOp<Ty/N> BN, CN
12730b57cec5SDimitry Andric     // A = G_MERGE_VALUES A1, ..., AN
12740b57cec5SDimitry Andric     return narrowScalarBasic(MI, TypeIdx, NarrowTy);
12750b57cec5SDimitry Andric   }
12760b57cec5SDimitry Andric   case TargetOpcode::G_SHL:
12770b57cec5SDimitry Andric   case TargetOpcode::G_LSHR:
12780b57cec5SDimitry Andric   case TargetOpcode::G_ASHR:
12790b57cec5SDimitry Andric     return narrowScalarShift(MI, TypeIdx, NarrowTy);
12800b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ:
12810b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
12820b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ:
12830b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
12840b57cec5SDimitry Andric   case TargetOpcode::G_CTPOP:
12855ffd83dbSDimitry Andric     if (TypeIdx == 1)
12865ffd83dbSDimitry Andric       switch (MI.getOpcode()) {
12875ffd83dbSDimitry Andric       case TargetOpcode::G_CTLZ:
12885ffd83dbSDimitry Andric       case TargetOpcode::G_CTLZ_ZERO_UNDEF:
12895ffd83dbSDimitry Andric         return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
12905ffd83dbSDimitry Andric       case TargetOpcode::G_CTTZ:
12915ffd83dbSDimitry Andric       case TargetOpcode::G_CTTZ_ZERO_UNDEF:
12925ffd83dbSDimitry Andric         return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
12935ffd83dbSDimitry Andric       case TargetOpcode::G_CTPOP:
12945ffd83dbSDimitry Andric         return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
12955ffd83dbSDimitry Andric       default:
12965ffd83dbSDimitry Andric         return UnableToLegalize;
12975ffd83dbSDimitry Andric       }
12980b57cec5SDimitry Andric 
12990b57cec5SDimitry Andric     Observer.changingInstr(MI);
13000b57cec5SDimitry Andric     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
13010b57cec5SDimitry Andric     Observer.changedInstr(MI);
13020b57cec5SDimitry Andric     return Legalized;
13030b57cec5SDimitry Andric   case TargetOpcode::G_INTTOPTR:
13040b57cec5SDimitry Andric     if (TypeIdx != 1)
13050b57cec5SDimitry Andric       return UnableToLegalize;
13060b57cec5SDimitry Andric 
13070b57cec5SDimitry Andric     Observer.changingInstr(MI);
13080b57cec5SDimitry Andric     narrowScalarSrc(MI, NarrowTy, 1);
13090b57cec5SDimitry Andric     Observer.changedInstr(MI);
13100b57cec5SDimitry Andric     return Legalized;
13110b57cec5SDimitry Andric   case TargetOpcode::G_PTRTOINT:
13120b57cec5SDimitry Andric     if (TypeIdx != 0)
13130b57cec5SDimitry Andric       return UnableToLegalize;
13140b57cec5SDimitry Andric 
13150b57cec5SDimitry Andric     Observer.changingInstr(MI);
13160b57cec5SDimitry Andric     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
13170b57cec5SDimitry Andric     Observer.changedInstr(MI);
13180b57cec5SDimitry Andric     return Legalized;
13190b57cec5SDimitry Andric   case TargetOpcode::G_PHI: {
1320d409305fSDimitry Andric     // FIXME: add support for when SizeOp0 isn't an exact multiple of
1321d409305fSDimitry Andric     // NarrowSize.
1322d409305fSDimitry Andric     if (SizeOp0 % NarrowSize != 0)
1323d409305fSDimitry Andric       return UnableToLegalize;
1324d409305fSDimitry Andric 
13250b57cec5SDimitry Andric     unsigned NumParts = SizeOp0 / NarrowSize;
13265ffd83dbSDimitry Andric     SmallVector<Register, 2> DstRegs(NumParts);
13275ffd83dbSDimitry Andric     SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
13280b57cec5SDimitry Andric     Observer.changingInstr(MI);
13290b57cec5SDimitry Andric     for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
13300b57cec5SDimitry Andric       MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1331bdd1243dSDimitry Andric       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
13320b57cec5SDimitry Andric       extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
13330b57cec5SDimitry Andric                    SrcRegs[i / 2]);
13340b57cec5SDimitry Andric     }
13350b57cec5SDimitry Andric     MachineBasicBlock &MBB = *MI.getParent();
13360b57cec5SDimitry Andric     MIRBuilder.setInsertPt(MBB, MI);
13370b57cec5SDimitry Andric     for (unsigned i = 0; i < NumParts; ++i) {
13380b57cec5SDimitry Andric       DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
13390b57cec5SDimitry Andric       MachineInstrBuilder MIB =
13400b57cec5SDimitry Andric           MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
13410b57cec5SDimitry Andric       for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
13420b57cec5SDimitry Andric         MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
13430b57cec5SDimitry Andric     }
13448bcb0991SDimitry Andric     MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1345bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
13460b57cec5SDimitry Andric     Observer.changedInstr(MI);
13470b57cec5SDimitry Andric     MI.eraseFromParent();
13480b57cec5SDimitry Andric     return Legalized;
13490b57cec5SDimitry Andric   }
13500b57cec5SDimitry Andric   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
13510b57cec5SDimitry Andric   case TargetOpcode::G_INSERT_VECTOR_ELT: {
13520b57cec5SDimitry Andric     if (TypeIdx != 2)
13530b57cec5SDimitry Andric       return UnableToLegalize;
13540b57cec5SDimitry Andric 
13550b57cec5SDimitry Andric     int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
13560b57cec5SDimitry Andric     Observer.changingInstr(MI);
13570b57cec5SDimitry Andric     narrowScalarSrc(MI, NarrowTy, OpIdx);
13580b57cec5SDimitry Andric     Observer.changedInstr(MI);
13590b57cec5SDimitry Andric     return Legalized;
13600b57cec5SDimitry Andric   }
13610b57cec5SDimitry Andric   case TargetOpcode::G_ICMP: {
1362fe6060f1SDimitry Andric     Register LHS = MI.getOperand(2).getReg();
1363fe6060f1SDimitry Andric     LLT SrcTy = MRI.getType(LHS);
1364fe6060f1SDimitry Andric     uint64_t SrcSize = SrcTy.getSizeInBits();
13650b57cec5SDimitry Andric     CmpInst::Predicate Pred =
13660b57cec5SDimitry Andric         static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
13670b57cec5SDimitry Andric 
1368fe6060f1SDimitry Andric     // TODO: Handle the non-equality case for weird sizes.
1369fe6060f1SDimitry Andric     if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1370fe6060f1SDimitry Andric       return UnableToLegalize;
1371fe6060f1SDimitry Andric 
1372fe6060f1SDimitry Andric     LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1373fe6060f1SDimitry Andric     SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1374fe6060f1SDimitry Andric     if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1375fe6060f1SDimitry Andric                       LHSLeftoverRegs))
1376fe6060f1SDimitry Andric       return UnableToLegalize;
1377fe6060f1SDimitry Andric 
1378fe6060f1SDimitry Andric     LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1379fe6060f1SDimitry Andric     SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1380fe6060f1SDimitry Andric     if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1381fe6060f1SDimitry Andric                       RHSPartRegs, RHSLeftoverRegs))
1382fe6060f1SDimitry Andric       return UnableToLegalize;
1383fe6060f1SDimitry Andric 
1384fe6060f1SDimitry Andric     // We now have the LHS and RHS of the compare split into narrow-type
1385fe6060f1SDimitry Andric     // registers, plus potentially some leftover type.
1386fe6060f1SDimitry Andric     Register Dst = MI.getOperand(0).getReg();
1387fe6060f1SDimitry Andric     LLT ResTy = MRI.getType(Dst);
1388fe6060f1SDimitry Andric     if (ICmpInst::isEquality(Pred)) {
1389fe6060f1SDimitry Andric       // For each part on the LHS and RHS, keep track of the result of XOR-ing
1390fe6060f1SDimitry Andric       // them together. For each equal part, the result should be all 0s. For
1391fe6060f1SDimitry Andric       // each non-equal part, we'll get at least one 1.
1392fe6060f1SDimitry Andric       auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1393fe6060f1SDimitry Andric       SmallVector<Register, 4> Xors;
1394fe6060f1SDimitry Andric       for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1395fe6060f1SDimitry Andric         auto LHS = std::get<0>(LHSAndRHS);
1396fe6060f1SDimitry Andric         auto RHS = std::get<1>(LHSAndRHS);
1397fe6060f1SDimitry Andric         auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1398fe6060f1SDimitry Andric         Xors.push_back(Xor);
1399fe6060f1SDimitry Andric       }
1400fe6060f1SDimitry Andric 
1401fe6060f1SDimitry Andric       // Build a G_XOR for each leftover register. Each G_XOR must be widened
1402fe6060f1SDimitry Andric       // to the desired narrow type so that we can OR them together later.
1403fe6060f1SDimitry Andric       SmallVector<Register, 4> WidenedXors;
1404fe6060f1SDimitry Andric       for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1405fe6060f1SDimitry Andric         auto LHS = std::get<0>(LHSAndRHS);
1406fe6060f1SDimitry Andric         auto RHS = std::get<1>(LHSAndRHS);
1407fe6060f1SDimitry Andric         auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1408fe6060f1SDimitry Andric         LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1409fe6060f1SDimitry Andric         buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1410fe6060f1SDimitry Andric                             /* PadStrategy = */ TargetOpcode::G_ZEXT);
1411fe6060f1SDimitry Andric         Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1412fe6060f1SDimitry Andric       }
1413fe6060f1SDimitry Andric 
1414fe6060f1SDimitry Andric       // Now, for each part we broke up, we know if they are equal/not equal
1415fe6060f1SDimitry Andric       // based off the G_XOR. We can OR these all together and compare against
1416fe6060f1SDimitry Andric       // 0 to get the result.
1417fe6060f1SDimitry Andric       assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1418fe6060f1SDimitry Andric       auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1419fe6060f1SDimitry Andric       for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1420fe6060f1SDimitry Andric         Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1421fe6060f1SDimitry Andric       MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
14220b57cec5SDimitry Andric     } else {
1423fe6060f1SDimitry Andric       // TODO: Handle non-power-of-two types.
1424fe6060f1SDimitry Andric       assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1425fe6060f1SDimitry Andric       assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1426fe6060f1SDimitry Andric       Register LHSL = LHSPartRegs[0];
1427fe6060f1SDimitry Andric       Register LHSH = LHSPartRegs[1];
1428fe6060f1SDimitry Andric       Register RHSL = RHSPartRegs[0];
1429fe6060f1SDimitry Andric       Register RHSH = RHSPartRegs[1];
14308bcb0991SDimitry Andric       MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
14310b57cec5SDimitry Andric       MachineInstrBuilder CmpHEQ =
14328bcb0991SDimitry Andric           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
14330b57cec5SDimitry Andric       MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
14348bcb0991SDimitry Andric           ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1435fe6060f1SDimitry Andric       MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
14360b57cec5SDimitry Andric     }
14370b57cec5SDimitry Andric     MI.eraseFromParent();
14380b57cec5SDimitry Andric     return Legalized;
14390b57cec5SDimitry Andric   }
14408bcb0991SDimitry Andric   case TargetOpcode::G_SEXT_INREG: {
14418bcb0991SDimitry Andric     if (TypeIdx != 0)
14428bcb0991SDimitry Andric       return UnableToLegalize;
14438bcb0991SDimitry Andric 
14448bcb0991SDimitry Andric     int64_t SizeInBits = MI.getOperand(2).getImm();
14458bcb0991SDimitry Andric 
14468bcb0991SDimitry Andric     // So long as the new type has more bits than the bits we're extending we
14478bcb0991SDimitry Andric     // don't need to break it apart.
1448*5f757f3fSDimitry Andric     if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
14498bcb0991SDimitry Andric       Observer.changingInstr(MI);
14508bcb0991SDimitry Andric       // We don't lose any non-extension bits by truncating the src and
14518bcb0991SDimitry Andric       // sign-extending the dst.
14528bcb0991SDimitry Andric       MachineOperand &MO1 = MI.getOperand(1);
14535ffd83dbSDimitry Andric       auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
14545ffd83dbSDimitry Andric       MO1.setReg(TruncMIB.getReg(0));
14558bcb0991SDimitry Andric 
14568bcb0991SDimitry Andric       MachineOperand &MO2 = MI.getOperand(0);
14578bcb0991SDimitry Andric       Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
14588bcb0991SDimitry Andric       MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
14595ffd83dbSDimitry Andric       MIRBuilder.buildSExt(MO2, DstExt);
14608bcb0991SDimitry Andric       MO2.setReg(DstExt);
14618bcb0991SDimitry Andric       Observer.changedInstr(MI);
14628bcb0991SDimitry Andric       return Legalized;
14638bcb0991SDimitry Andric     }
14648bcb0991SDimitry Andric 
14658bcb0991SDimitry Andric     // Break it apart. Components below the extension point are unmodified. The
14668bcb0991SDimitry Andric     // component containing the extension point becomes a narrower SEXT_INREG.
14678bcb0991SDimitry Andric     // Components above it are ashr'd from the component containing the
14688bcb0991SDimitry Andric     // extension point.
14698bcb0991SDimitry Andric     if (SizeOp0 % NarrowSize != 0)
14708bcb0991SDimitry Andric       return UnableToLegalize;
14718bcb0991SDimitry Andric     int NumParts = SizeOp0 / NarrowSize;
14728bcb0991SDimitry Andric 
14738bcb0991SDimitry Andric     // List the registers where the destination will be scattered.
14748bcb0991SDimitry Andric     SmallVector<Register, 2> DstRegs;
14758bcb0991SDimitry Andric     // List the registers where the source will be split.
14768bcb0991SDimitry Andric     SmallVector<Register, 2> SrcRegs;
14778bcb0991SDimitry Andric 
14788bcb0991SDimitry Andric     // Create all the temporary registers.
14798bcb0991SDimitry Andric     for (int i = 0; i < NumParts; ++i) {
14808bcb0991SDimitry Andric       Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
14818bcb0991SDimitry Andric 
14828bcb0991SDimitry Andric       SrcRegs.push_back(SrcReg);
14838bcb0991SDimitry Andric     }
14848bcb0991SDimitry Andric 
14858bcb0991SDimitry Andric     // Explode the big arguments into smaller chunks.
14865ffd83dbSDimitry Andric     MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
14878bcb0991SDimitry Andric 
14888bcb0991SDimitry Andric     Register AshrCstReg =
14898bcb0991SDimitry Andric         MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
14905ffd83dbSDimitry Andric             .getReg(0);
1491*5f757f3fSDimitry Andric     Register FullExtensionReg;
1492*5f757f3fSDimitry Andric     Register PartialExtensionReg;
14938bcb0991SDimitry Andric 
14948bcb0991SDimitry Andric     // Do the operation on each small part.
14958bcb0991SDimitry Andric     for (int i = 0; i < NumParts; ++i) {
1496*5f757f3fSDimitry Andric       if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
14978bcb0991SDimitry Andric         DstRegs.push_back(SrcRegs[i]);
1498*5f757f3fSDimitry Andric         PartialExtensionReg = DstRegs.back();
1499*5f757f3fSDimitry Andric       } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
15008bcb0991SDimitry Andric         assert(PartialExtensionReg &&
15018bcb0991SDimitry Andric                "Expected to visit partial extension before full");
15028bcb0991SDimitry Andric         if (FullExtensionReg) {
15038bcb0991SDimitry Andric           DstRegs.push_back(FullExtensionReg);
15048bcb0991SDimitry Andric           continue;
15058bcb0991SDimitry Andric         }
15065ffd83dbSDimitry Andric         DstRegs.push_back(
15075ffd83dbSDimitry Andric             MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
15085ffd83dbSDimitry Andric                 .getReg(0));
15098bcb0991SDimitry Andric         FullExtensionReg = DstRegs.back();
15108bcb0991SDimitry Andric       } else {
15118bcb0991SDimitry Andric         DstRegs.push_back(
15128bcb0991SDimitry Andric             MIRBuilder
15138bcb0991SDimitry Andric                 .buildInstr(
15148bcb0991SDimitry Andric                     TargetOpcode::G_SEXT_INREG, {NarrowTy},
15158bcb0991SDimitry Andric                     {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
15165ffd83dbSDimitry Andric                 .getReg(0));
15178bcb0991SDimitry Andric         PartialExtensionReg = DstRegs.back();
15188bcb0991SDimitry Andric       }
15198bcb0991SDimitry Andric     }
15208bcb0991SDimitry Andric 
15218bcb0991SDimitry Andric     // Gather the destination registers into the final destination.
15228bcb0991SDimitry Andric     Register DstReg = MI.getOperand(0).getReg();
1523bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
15248bcb0991SDimitry Andric     MI.eraseFromParent();
15258bcb0991SDimitry Andric     return Legalized;
15268bcb0991SDimitry Andric   }
1527480093f4SDimitry Andric   case TargetOpcode::G_BSWAP:
1528480093f4SDimitry Andric   case TargetOpcode::G_BITREVERSE: {
1529480093f4SDimitry Andric     if (SizeOp0 % NarrowSize != 0)
1530480093f4SDimitry Andric       return UnableToLegalize;
1531480093f4SDimitry Andric 
1532480093f4SDimitry Andric     Observer.changingInstr(MI);
1533480093f4SDimitry Andric     SmallVector<Register, 2> SrcRegs, DstRegs;
1534480093f4SDimitry Andric     unsigned NumParts = SizeOp0 / NarrowSize;
1535480093f4SDimitry Andric     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1536480093f4SDimitry Andric 
1537480093f4SDimitry Andric     for (unsigned i = 0; i < NumParts; ++i) {
1538480093f4SDimitry Andric       auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1539480093f4SDimitry Andric                                            {SrcRegs[NumParts - 1 - i]});
1540480093f4SDimitry Andric       DstRegs.push_back(DstPart.getReg(0));
1541480093f4SDimitry Andric     }
1542480093f4SDimitry Andric 
1543bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1544480093f4SDimitry Andric 
1545480093f4SDimitry Andric     Observer.changedInstr(MI);
1546480093f4SDimitry Andric     MI.eraseFromParent();
1547480093f4SDimitry Andric     return Legalized;
1548480093f4SDimitry Andric   }
1549e8d8bef9SDimitry Andric   case TargetOpcode::G_PTR_ADD:
15505ffd83dbSDimitry Andric   case TargetOpcode::G_PTRMASK: {
15515ffd83dbSDimitry Andric     if (TypeIdx != 1)
15525ffd83dbSDimitry Andric       return UnableToLegalize;
15535ffd83dbSDimitry Andric     Observer.changingInstr(MI);
15545ffd83dbSDimitry Andric     narrowScalarSrc(MI, NarrowTy, 2);
15555ffd83dbSDimitry Andric     Observer.changedInstr(MI);
15565ffd83dbSDimitry Andric     return Legalized;
15570b57cec5SDimitry Andric   }
155823408297SDimitry Andric   case TargetOpcode::G_FPTOUI:
155923408297SDimitry Andric   case TargetOpcode::G_FPTOSI:
156023408297SDimitry Andric     return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1561e8d8bef9SDimitry Andric   case TargetOpcode::G_FPEXT:
1562e8d8bef9SDimitry Andric     if (TypeIdx != 0)
1563e8d8bef9SDimitry Andric       return UnableToLegalize;
1564e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
1565e8d8bef9SDimitry Andric     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1566e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
1567e8d8bef9SDimitry Andric     return Legalized;
156806c3fb27SDimitry Andric   case TargetOpcode::G_FLDEXP:
156906c3fb27SDimitry Andric   case TargetOpcode::G_STRICT_FLDEXP:
157006c3fb27SDimitry Andric     return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
15710b57cec5SDimitry Andric   }
15725ffd83dbSDimitry Andric }
15735ffd83dbSDimitry Andric 
15745ffd83dbSDimitry Andric Register LegalizerHelper::coerceToScalar(Register Val) {
15755ffd83dbSDimitry Andric   LLT Ty = MRI.getType(Val);
15765ffd83dbSDimitry Andric   if (Ty.isScalar())
15775ffd83dbSDimitry Andric     return Val;
15785ffd83dbSDimitry Andric 
15795ffd83dbSDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
15805ffd83dbSDimitry Andric   LLT NewTy = LLT::scalar(Ty.getSizeInBits());
15815ffd83dbSDimitry Andric   if (Ty.isPointer()) {
15825ffd83dbSDimitry Andric     if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
15835ffd83dbSDimitry Andric       return Register();
15845ffd83dbSDimitry Andric     return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
15855ffd83dbSDimitry Andric   }
15865ffd83dbSDimitry Andric 
15875ffd83dbSDimitry Andric   Register NewVal = Val;
15885ffd83dbSDimitry Andric 
15895ffd83dbSDimitry Andric   assert(Ty.isVector());
15905ffd83dbSDimitry Andric   LLT EltTy = Ty.getElementType();
15915ffd83dbSDimitry Andric   if (EltTy.isPointer())
15925ffd83dbSDimitry Andric     NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
15935ffd83dbSDimitry Andric   return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
15945ffd83dbSDimitry Andric }
15950b57cec5SDimitry Andric 
15960b57cec5SDimitry Andric void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
15970b57cec5SDimitry Andric                                      unsigned OpIdx, unsigned ExtOpcode) {
15980b57cec5SDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
15995ffd83dbSDimitry Andric   auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
16005ffd83dbSDimitry Andric   MO.setReg(ExtB.getReg(0));
16010b57cec5SDimitry Andric }
16020b57cec5SDimitry Andric 
16030b57cec5SDimitry Andric void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
16040b57cec5SDimitry Andric                                       unsigned OpIdx) {
16050b57cec5SDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
16065ffd83dbSDimitry Andric   auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
16075ffd83dbSDimitry Andric   MO.setReg(ExtB.getReg(0));
16080b57cec5SDimitry Andric }
16090b57cec5SDimitry Andric 
16100b57cec5SDimitry Andric void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
16110b57cec5SDimitry Andric                                      unsigned OpIdx, unsigned TruncOpcode) {
16120b57cec5SDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
16130b57cec5SDimitry Andric   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
16140b57cec5SDimitry Andric   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
16155ffd83dbSDimitry Andric   MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
16160b57cec5SDimitry Andric   MO.setReg(DstExt);
16170b57cec5SDimitry Andric }
16180b57cec5SDimitry Andric 
16190b57cec5SDimitry Andric void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
16200b57cec5SDimitry Andric                                       unsigned OpIdx, unsigned ExtOpcode) {
16210b57cec5SDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
16220b57cec5SDimitry Andric   Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
16230b57cec5SDimitry Andric   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
16245ffd83dbSDimitry Andric   MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
16250b57cec5SDimitry Andric   MO.setReg(DstTrunc);
16260b57cec5SDimitry Andric }
16270b57cec5SDimitry Andric 
16280b57cec5SDimitry Andric void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
16290b57cec5SDimitry Andric                                             unsigned OpIdx) {
16300b57cec5SDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
16310b57cec5SDimitry Andric   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
16320eae32dcSDimitry Andric   Register Dst = MO.getReg();
16330eae32dcSDimitry Andric   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
16340eae32dcSDimitry Andric   MO.setReg(DstExt);
16350eae32dcSDimitry Andric   MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
16360b57cec5SDimitry Andric }
16370b57cec5SDimitry Andric 
16380b57cec5SDimitry Andric void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
16390b57cec5SDimitry Andric                                             unsigned OpIdx) {
16400b57cec5SDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
16410eae32dcSDimitry Andric   SmallVector<Register, 8> Regs;
16420eae32dcSDimitry Andric   MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
16430b57cec5SDimitry Andric }
16440b57cec5SDimitry Andric 
16455ffd83dbSDimitry Andric void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
16465ffd83dbSDimitry Andric   MachineOperand &Op = MI.getOperand(OpIdx);
16475ffd83dbSDimitry Andric   Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
16485ffd83dbSDimitry Andric }
16495ffd83dbSDimitry Andric 
16505ffd83dbSDimitry Andric void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
16515ffd83dbSDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
16525ffd83dbSDimitry Andric   Register CastDst = MRI.createGenericVirtualRegister(CastTy);
16535ffd83dbSDimitry Andric   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
16545ffd83dbSDimitry Andric   MIRBuilder.buildBitcast(MO, CastDst);
16555ffd83dbSDimitry Andric   MO.setReg(CastDst);
16565ffd83dbSDimitry Andric }
16575ffd83dbSDimitry Andric 
16580b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
16590b57cec5SDimitry Andric LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
16600b57cec5SDimitry Andric                                         LLT WideTy) {
16610b57cec5SDimitry Andric   if (TypeIdx != 1)
16620b57cec5SDimitry Andric     return UnableToLegalize;
16630b57cec5SDimitry Andric 
166406c3fb27SDimitry Andric   auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
16650b57cec5SDimitry Andric   if (DstTy.isVector())
16660b57cec5SDimitry Andric     return UnableToLegalize;
16670b57cec5SDimitry Andric 
166806c3fb27SDimitry Andric   LLT SrcTy = MRI.getType(Src1Reg);
16690b57cec5SDimitry Andric   const int DstSize = DstTy.getSizeInBits();
16700b57cec5SDimitry Andric   const int SrcSize = SrcTy.getSizeInBits();
16710b57cec5SDimitry Andric   const int WideSize = WideTy.getSizeInBits();
16720b57cec5SDimitry Andric   const int NumMerge = (DstSize + WideSize - 1) / WideSize;
16730b57cec5SDimitry Andric 
16740b57cec5SDimitry Andric   unsigned NumOps = MI.getNumOperands();
16750b57cec5SDimitry Andric   unsigned NumSrc = MI.getNumOperands() - 1;
16760b57cec5SDimitry Andric   unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
16770b57cec5SDimitry Andric 
16780b57cec5SDimitry Andric   if (WideSize >= DstSize) {
16790b57cec5SDimitry Andric     // Directly pack the bits in the target type.
168006c3fb27SDimitry Andric     Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
16810b57cec5SDimitry Andric 
16820b57cec5SDimitry Andric     for (unsigned I = 2; I != NumOps; ++I) {
16830b57cec5SDimitry Andric       const unsigned Offset = (I - 1) * PartSize;
16840b57cec5SDimitry Andric 
16850b57cec5SDimitry Andric       Register SrcReg = MI.getOperand(I).getReg();
16860b57cec5SDimitry Andric       assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
16870b57cec5SDimitry Andric 
16880b57cec5SDimitry Andric       auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
16890b57cec5SDimitry Andric 
16908bcb0991SDimitry Andric       Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
16910b57cec5SDimitry Andric         MRI.createGenericVirtualRegister(WideTy);
16920b57cec5SDimitry Andric 
16930b57cec5SDimitry Andric       auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
16940b57cec5SDimitry Andric       auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
16950b57cec5SDimitry Andric       MIRBuilder.buildOr(NextResult, ResultReg, Shl);
16960b57cec5SDimitry Andric       ResultReg = NextResult;
16970b57cec5SDimitry Andric     }
16980b57cec5SDimitry Andric 
16990b57cec5SDimitry Andric     if (WideSize > DstSize)
17000b57cec5SDimitry Andric       MIRBuilder.buildTrunc(DstReg, ResultReg);
17018bcb0991SDimitry Andric     else if (DstTy.isPointer())
17028bcb0991SDimitry Andric       MIRBuilder.buildIntToPtr(DstReg, ResultReg);
17030b57cec5SDimitry Andric 
17040b57cec5SDimitry Andric     MI.eraseFromParent();
17050b57cec5SDimitry Andric     return Legalized;
17060b57cec5SDimitry Andric   }
17070b57cec5SDimitry Andric 
17080b57cec5SDimitry Andric   // Unmerge the original values to the GCD type, and recombine to the next
17090b57cec5SDimitry Andric   // multiple greater than the original type.
17100b57cec5SDimitry Andric   //
17110b57cec5SDimitry Andric   // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
17120b57cec5SDimitry Andric   // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
17130b57cec5SDimitry Andric   // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
17140b57cec5SDimitry Andric   // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
17150b57cec5SDimitry Andric   // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
17160b57cec5SDimitry Andric   // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
17170b57cec5SDimitry Andric   // %12:_(s12) = G_MERGE_VALUES %10, %11
17180b57cec5SDimitry Andric   //
17190b57cec5SDimitry Andric   // Padding with undef if necessary:
17200b57cec5SDimitry Andric   //
17210b57cec5SDimitry Andric   // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
17220b57cec5SDimitry Andric   // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
17230b57cec5SDimitry Andric   // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
17240b57cec5SDimitry Andric   // %7:_(s2) = G_IMPLICIT_DEF
17250b57cec5SDimitry Andric   // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
17260b57cec5SDimitry Andric   // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
17270b57cec5SDimitry Andric   // %10:_(s12) = G_MERGE_VALUES %8, %9
17280b57cec5SDimitry Andric 
1729bdd1243dSDimitry Andric   const int GCD = std::gcd(SrcSize, WideSize);
17300b57cec5SDimitry Andric   LLT GCDTy = LLT::scalar(GCD);
17310b57cec5SDimitry Andric 
17320b57cec5SDimitry Andric   SmallVector<Register, 8> Parts;
17330b57cec5SDimitry Andric   SmallVector<Register, 8> NewMergeRegs;
17340b57cec5SDimitry Andric   SmallVector<Register, 8> Unmerges;
17350b57cec5SDimitry Andric   LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
17360b57cec5SDimitry Andric 
17370b57cec5SDimitry Andric   // Decompose the original operands if they don't evenly divide.
17384824e7fdSDimitry Andric   for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
17394824e7fdSDimitry Andric     Register SrcReg = MO.getReg();
17400b57cec5SDimitry Andric     if (GCD == SrcSize) {
17410b57cec5SDimitry Andric       Unmerges.push_back(SrcReg);
17420b57cec5SDimitry Andric     } else {
17430b57cec5SDimitry Andric       auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
17440b57cec5SDimitry Andric       for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
17450b57cec5SDimitry Andric         Unmerges.push_back(Unmerge.getReg(J));
17460b57cec5SDimitry Andric     }
17470b57cec5SDimitry Andric   }
17480b57cec5SDimitry Andric 
17490b57cec5SDimitry Andric   // Pad with undef to the next size that is a multiple of the requested size.
17500b57cec5SDimitry Andric   if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
17510b57cec5SDimitry Andric     Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
17520b57cec5SDimitry Andric     for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
17530b57cec5SDimitry Andric       Unmerges.push_back(UndefReg);
17540b57cec5SDimitry Andric   }
17550b57cec5SDimitry Andric 
17560b57cec5SDimitry Andric   const int PartsPerGCD = WideSize / GCD;
17570b57cec5SDimitry Andric 
17580b57cec5SDimitry Andric   // Build merges of each piece.
17590b57cec5SDimitry Andric   ArrayRef<Register> Slicer(Unmerges);
17600b57cec5SDimitry Andric   for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1761bdd1243dSDimitry Andric     auto Merge =
1762bdd1243dSDimitry Andric         MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
17630b57cec5SDimitry Andric     NewMergeRegs.push_back(Merge.getReg(0));
17640b57cec5SDimitry Andric   }
17650b57cec5SDimitry Andric 
17660b57cec5SDimitry Andric   // A truncate may be necessary if the requested type doesn't evenly divide the
17670b57cec5SDimitry Andric   // original result type.
17680b57cec5SDimitry Andric   if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1769bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
17700b57cec5SDimitry Andric   } else {
1771bdd1243dSDimitry Andric     auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
17720b57cec5SDimitry Andric     MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
17730b57cec5SDimitry Andric   }
17740b57cec5SDimitry Andric 
17750b57cec5SDimitry Andric   MI.eraseFromParent();
17760b57cec5SDimitry Andric   return Legalized;
17770b57cec5SDimitry Andric }
17780b57cec5SDimitry Andric 
17790b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
17800b57cec5SDimitry Andric LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
17810b57cec5SDimitry Andric                                           LLT WideTy) {
17820b57cec5SDimitry Andric   if (TypeIdx != 0)
17830b57cec5SDimitry Andric     return UnableToLegalize;
17840b57cec5SDimitry Andric 
17855ffd83dbSDimitry Andric   int NumDst = MI.getNumOperands() - 1;
17860b57cec5SDimitry Andric   Register SrcReg = MI.getOperand(NumDst).getReg();
17870b57cec5SDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
17885ffd83dbSDimitry Andric   if (SrcTy.isVector())
17890b57cec5SDimitry Andric     return UnableToLegalize;
17900b57cec5SDimitry Andric 
17910b57cec5SDimitry Andric   Register Dst0Reg = MI.getOperand(0).getReg();
17920b57cec5SDimitry Andric   LLT DstTy = MRI.getType(Dst0Reg);
17930b57cec5SDimitry Andric   if (!DstTy.isScalar())
17940b57cec5SDimitry Andric     return UnableToLegalize;
17950b57cec5SDimitry Andric 
17965ffd83dbSDimitry Andric   if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
17975ffd83dbSDimitry Andric     if (SrcTy.isPointer()) {
17985ffd83dbSDimitry Andric       const DataLayout &DL = MIRBuilder.getDataLayout();
17995ffd83dbSDimitry Andric       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
18005ffd83dbSDimitry Andric         LLVM_DEBUG(
18015ffd83dbSDimitry Andric             dbgs() << "Not casting non-integral address space integer\n");
18025ffd83dbSDimitry Andric         return UnableToLegalize;
18030b57cec5SDimitry Andric       }
18040b57cec5SDimitry Andric 
18055ffd83dbSDimitry Andric       SrcTy = LLT::scalar(SrcTy.getSizeInBits());
18065ffd83dbSDimitry Andric       SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
18075ffd83dbSDimitry Andric     }
18080b57cec5SDimitry Andric 
18095ffd83dbSDimitry Andric     // Widen SrcTy to WideTy. This does not affect the result, but since the
18105ffd83dbSDimitry Andric     // user requested this size, it is probably better handled than SrcTy and
181104eeddc0SDimitry Andric     // should reduce the total number of legalization artifacts.
18125ffd83dbSDimitry Andric     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
18135ffd83dbSDimitry Andric       SrcTy = WideTy;
18145ffd83dbSDimitry Andric       SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
18155ffd83dbSDimitry Andric     }
18160b57cec5SDimitry Andric 
18175ffd83dbSDimitry Andric     // Theres no unmerge type to target. Directly extract the bits from the
18185ffd83dbSDimitry Andric     // source type
18195ffd83dbSDimitry Andric     unsigned DstSize = DstTy.getSizeInBits();
18200b57cec5SDimitry Andric 
18215ffd83dbSDimitry Andric     MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
18225ffd83dbSDimitry Andric     for (int I = 1; I != NumDst; ++I) {
18235ffd83dbSDimitry Andric       auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
18245ffd83dbSDimitry Andric       auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
18255ffd83dbSDimitry Andric       MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
18265ffd83dbSDimitry Andric     }
18275ffd83dbSDimitry Andric 
18285ffd83dbSDimitry Andric     MI.eraseFromParent();
18295ffd83dbSDimitry Andric     return Legalized;
18305ffd83dbSDimitry Andric   }
18315ffd83dbSDimitry Andric 
18325ffd83dbSDimitry Andric   // Extend the source to a wider type.
18335ffd83dbSDimitry Andric   LLT LCMTy = getLCMType(SrcTy, WideTy);
18345ffd83dbSDimitry Andric 
18355ffd83dbSDimitry Andric   Register WideSrc = SrcReg;
18365ffd83dbSDimitry Andric   if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
18375ffd83dbSDimitry Andric     // TODO: If this is an integral address space, cast to integer and anyext.
18385ffd83dbSDimitry Andric     if (SrcTy.isPointer()) {
18395ffd83dbSDimitry Andric       LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
18405ffd83dbSDimitry Andric       return UnableToLegalize;
18415ffd83dbSDimitry Andric     }
18425ffd83dbSDimitry Andric 
18435ffd83dbSDimitry Andric     WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
18445ffd83dbSDimitry Andric   }
18455ffd83dbSDimitry Andric 
18465ffd83dbSDimitry Andric   auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
18475ffd83dbSDimitry Andric 
1848e8d8bef9SDimitry Andric   // Create a sequence of unmerges and merges to the original results. Since we
1849e8d8bef9SDimitry Andric   // may have widened the source, we will need to pad the results with dead defs
1850e8d8bef9SDimitry Andric   // to cover the source register.
1851e8d8bef9SDimitry Andric   // e.g. widen s48 to s64:
1852e8d8bef9SDimitry Andric   // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
18535ffd83dbSDimitry Andric   //
18545ffd83dbSDimitry Andric   // =>
1855e8d8bef9SDimitry Andric   //  %4:_(s192) = G_ANYEXT %0:_(s96)
1856e8d8bef9SDimitry Andric   //  %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
1857e8d8bef9SDimitry Andric   //  ; unpack to GCD type, with extra dead defs
1858e8d8bef9SDimitry Andric   //  %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
1859e8d8bef9SDimitry Andric   //  %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
1860e8d8bef9SDimitry Andric   //  dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
1861e8d8bef9SDimitry Andric   //  %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10   ; Remerge to destination
1862e8d8bef9SDimitry Andric   //  %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
1863e8d8bef9SDimitry Andric   const LLT GCDTy = getGCDType(WideTy, DstTy);
18645ffd83dbSDimitry Andric   const int NumUnmerge = Unmerge->getNumOperands() - 1;
1865e8d8bef9SDimitry Andric   const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
1866e8d8bef9SDimitry Andric 
1867e8d8bef9SDimitry Andric   // Directly unmerge to the destination without going through a GCD type
1868e8d8bef9SDimitry Andric   // if possible
1869e8d8bef9SDimitry Andric   if (PartsPerRemerge == 1) {
18705ffd83dbSDimitry Andric     const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
18715ffd83dbSDimitry Andric 
18725ffd83dbSDimitry Andric     for (int I = 0; I != NumUnmerge; ++I) {
18735ffd83dbSDimitry Andric       auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
18745ffd83dbSDimitry Andric 
18755ffd83dbSDimitry Andric       for (int J = 0; J != PartsPerUnmerge; ++J) {
18765ffd83dbSDimitry Andric         int Idx = I * PartsPerUnmerge + J;
18775ffd83dbSDimitry Andric         if (Idx < NumDst)
18785ffd83dbSDimitry Andric           MIB.addDef(MI.getOperand(Idx).getReg());
18795ffd83dbSDimitry Andric         else {
18805ffd83dbSDimitry Andric           // Create dead def for excess components.
18815ffd83dbSDimitry Andric           MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
18825ffd83dbSDimitry Andric         }
18835ffd83dbSDimitry Andric       }
18845ffd83dbSDimitry Andric 
18855ffd83dbSDimitry Andric       MIB.addUse(Unmerge.getReg(I));
18865ffd83dbSDimitry Andric     }
1887e8d8bef9SDimitry Andric   } else {
1888e8d8bef9SDimitry Andric     SmallVector<Register, 16> Parts;
1889e8d8bef9SDimitry Andric     for (int J = 0; J != NumUnmerge; ++J)
1890e8d8bef9SDimitry Andric       extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1891e8d8bef9SDimitry Andric 
1892e8d8bef9SDimitry Andric     SmallVector<Register, 8> RemergeParts;
1893e8d8bef9SDimitry Andric     for (int I = 0; I != NumDst; ++I) {
1894e8d8bef9SDimitry Andric       for (int J = 0; J < PartsPerRemerge; ++J) {
1895e8d8bef9SDimitry Andric         const int Idx = I * PartsPerRemerge + J;
1896e8d8bef9SDimitry Andric         RemergeParts.emplace_back(Parts[Idx]);
1897e8d8bef9SDimitry Andric       }
1898e8d8bef9SDimitry Andric 
1899bdd1243dSDimitry Andric       MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
1900e8d8bef9SDimitry Andric       RemergeParts.clear();
1901e8d8bef9SDimitry Andric     }
1902e8d8bef9SDimitry Andric   }
19035ffd83dbSDimitry Andric 
19045ffd83dbSDimitry Andric   MI.eraseFromParent();
19050b57cec5SDimitry Andric   return Legalized;
19060b57cec5SDimitry Andric }
19070b57cec5SDimitry Andric 
19080b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
19090b57cec5SDimitry Andric LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
19100b57cec5SDimitry Andric                                     LLT WideTy) {
191106c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
19120b57cec5SDimitry Andric   unsigned Offset = MI.getOperand(2).getImm();
19130b57cec5SDimitry Andric 
19140b57cec5SDimitry Andric   if (TypeIdx == 0) {
19150b57cec5SDimitry Andric     if (SrcTy.isVector() || DstTy.isVector())
19160b57cec5SDimitry Andric       return UnableToLegalize;
19170b57cec5SDimitry Andric 
19180b57cec5SDimitry Andric     SrcOp Src(SrcReg);
19190b57cec5SDimitry Andric     if (SrcTy.isPointer()) {
19200b57cec5SDimitry Andric       // Extracts from pointers can be handled only if they are really just
19210b57cec5SDimitry Andric       // simple integers.
19220b57cec5SDimitry Andric       const DataLayout &DL = MIRBuilder.getDataLayout();
19230b57cec5SDimitry Andric       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
19240b57cec5SDimitry Andric         return UnableToLegalize;
19250b57cec5SDimitry Andric 
19260b57cec5SDimitry Andric       LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
19270b57cec5SDimitry Andric       Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
19280b57cec5SDimitry Andric       SrcTy = SrcAsIntTy;
19290b57cec5SDimitry Andric     }
19300b57cec5SDimitry Andric 
19310b57cec5SDimitry Andric     if (DstTy.isPointer())
19320b57cec5SDimitry Andric       return UnableToLegalize;
19330b57cec5SDimitry Andric 
19340b57cec5SDimitry Andric     if (Offset == 0) {
19350b57cec5SDimitry Andric       // Avoid a shift in the degenerate case.
19360b57cec5SDimitry Andric       MIRBuilder.buildTrunc(DstReg,
19370b57cec5SDimitry Andric                             MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
19380b57cec5SDimitry Andric       MI.eraseFromParent();
19390b57cec5SDimitry Andric       return Legalized;
19400b57cec5SDimitry Andric     }
19410b57cec5SDimitry Andric 
19420b57cec5SDimitry Andric     // Do a shift in the source type.
19430b57cec5SDimitry Andric     LLT ShiftTy = SrcTy;
19440b57cec5SDimitry Andric     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
19450b57cec5SDimitry Andric       Src = MIRBuilder.buildAnyExt(WideTy, Src);
19460b57cec5SDimitry Andric       ShiftTy = WideTy;
1947e8d8bef9SDimitry Andric     }
19480b57cec5SDimitry Andric 
19490b57cec5SDimitry Andric     auto LShr = MIRBuilder.buildLShr(
19500b57cec5SDimitry Andric       ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
19510b57cec5SDimitry Andric     MIRBuilder.buildTrunc(DstReg, LShr);
19520b57cec5SDimitry Andric     MI.eraseFromParent();
19530b57cec5SDimitry Andric     return Legalized;
19540b57cec5SDimitry Andric   }
19550b57cec5SDimitry Andric 
19560b57cec5SDimitry Andric   if (SrcTy.isScalar()) {
19570b57cec5SDimitry Andric     Observer.changingInstr(MI);
19580b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
19590b57cec5SDimitry Andric     Observer.changedInstr(MI);
19600b57cec5SDimitry Andric     return Legalized;
19610b57cec5SDimitry Andric   }
19620b57cec5SDimitry Andric 
19630b57cec5SDimitry Andric   if (!SrcTy.isVector())
19640b57cec5SDimitry Andric     return UnableToLegalize;
19650b57cec5SDimitry Andric 
19660b57cec5SDimitry Andric   if (DstTy != SrcTy.getElementType())
19670b57cec5SDimitry Andric     return UnableToLegalize;
19680b57cec5SDimitry Andric 
19690b57cec5SDimitry Andric   if (Offset % SrcTy.getScalarSizeInBits() != 0)
19700b57cec5SDimitry Andric     return UnableToLegalize;
19710b57cec5SDimitry Andric 
19720b57cec5SDimitry Andric   Observer.changingInstr(MI);
19730b57cec5SDimitry Andric   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
19740b57cec5SDimitry Andric 
19750b57cec5SDimitry Andric   MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
19760b57cec5SDimitry Andric                           Offset);
19770b57cec5SDimitry Andric   widenScalarDst(MI, WideTy.getScalarType(), 0);
19780b57cec5SDimitry Andric   Observer.changedInstr(MI);
19790b57cec5SDimitry Andric   return Legalized;
19800b57cec5SDimitry Andric }
19810b57cec5SDimitry Andric 
19820b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
19830b57cec5SDimitry Andric LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
19840b57cec5SDimitry Andric                                    LLT WideTy) {
1985e8d8bef9SDimitry Andric   if (TypeIdx != 0 || WideTy.isVector())
19860b57cec5SDimitry Andric     return UnableToLegalize;
19870b57cec5SDimitry Andric   Observer.changingInstr(MI);
19880b57cec5SDimitry Andric   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
19890b57cec5SDimitry Andric   widenScalarDst(MI, WideTy);
19900b57cec5SDimitry Andric   Observer.changedInstr(MI);
19910b57cec5SDimitry Andric   return Legalized;
19920b57cec5SDimitry Andric }
19930b57cec5SDimitry Andric 
19940b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
1995fe6060f1SDimitry Andric LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
1996e8d8bef9SDimitry Andric                                            LLT WideTy) {
1997fe6060f1SDimitry Andric   unsigned Opcode;
1998fe6060f1SDimitry Andric   unsigned ExtOpcode;
1999bdd1243dSDimitry Andric   std::optional<Register> CarryIn;
2000fe6060f1SDimitry Andric   switch (MI.getOpcode()) {
2001fe6060f1SDimitry Andric   default:
2002fe6060f1SDimitry Andric     llvm_unreachable("Unexpected opcode!");
2003fe6060f1SDimitry Andric   case TargetOpcode::G_SADDO:
2004fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_ADD;
2005fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_SEXT;
2006fe6060f1SDimitry Andric     break;
2007fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBO:
2008fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_SUB;
2009fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_SEXT;
2010fe6060f1SDimitry Andric     break;
2011fe6060f1SDimitry Andric   case TargetOpcode::G_UADDO:
2012fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_ADD;
2013fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_ZEXT;
2014fe6060f1SDimitry Andric     break;
2015fe6060f1SDimitry Andric   case TargetOpcode::G_USUBO:
2016fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_SUB;
2017fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_ZEXT;
2018fe6060f1SDimitry Andric     break;
2019fe6060f1SDimitry Andric   case TargetOpcode::G_SADDE:
2020fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_UADDE;
2021fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_SEXT;
2022fe6060f1SDimitry Andric     CarryIn = MI.getOperand(4).getReg();
2023fe6060f1SDimitry Andric     break;
2024fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBE:
2025fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_USUBE;
2026fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_SEXT;
2027fe6060f1SDimitry Andric     CarryIn = MI.getOperand(4).getReg();
2028fe6060f1SDimitry Andric     break;
2029fe6060f1SDimitry Andric   case TargetOpcode::G_UADDE:
2030fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_UADDE;
2031fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_ZEXT;
2032fe6060f1SDimitry Andric     CarryIn = MI.getOperand(4).getReg();
2033fe6060f1SDimitry Andric     break;
2034fe6060f1SDimitry Andric   case TargetOpcode::G_USUBE:
2035fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_USUBE;
2036fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_ZEXT;
2037fe6060f1SDimitry Andric     CarryIn = MI.getOperand(4).getReg();
2038fe6060f1SDimitry Andric     break;
2039fe6060f1SDimitry Andric   }
2040fe6060f1SDimitry Andric 
204181ad6265SDimitry Andric   if (TypeIdx == 1) {
204281ad6265SDimitry Andric     unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
204381ad6265SDimitry Andric 
204481ad6265SDimitry Andric     Observer.changingInstr(MI);
204581ad6265SDimitry Andric     if (CarryIn)
204681ad6265SDimitry Andric       widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2047bdd1243dSDimitry Andric     widenScalarDst(MI, WideTy, 1);
204881ad6265SDimitry Andric 
204981ad6265SDimitry Andric     Observer.changedInstr(MI);
205081ad6265SDimitry Andric     return Legalized;
205181ad6265SDimitry Andric   }
205281ad6265SDimitry Andric 
2053e8d8bef9SDimitry Andric   auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2054e8d8bef9SDimitry Andric   auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2055e8d8bef9SDimitry Andric   // Do the arithmetic in the larger type.
2056fe6060f1SDimitry Andric   Register NewOp;
2057fe6060f1SDimitry Andric   if (CarryIn) {
2058fe6060f1SDimitry Andric     LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2059fe6060f1SDimitry Andric     NewOp = MIRBuilder
2060fe6060f1SDimitry Andric                 .buildInstr(Opcode, {WideTy, CarryOutTy},
2061fe6060f1SDimitry Andric                             {LHSExt, RHSExt, *CarryIn})
2062fe6060f1SDimitry Andric                 .getReg(0);
2063fe6060f1SDimitry Andric   } else {
2064fe6060f1SDimitry Andric     NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2065fe6060f1SDimitry Andric   }
2066e8d8bef9SDimitry Andric   LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2067e8d8bef9SDimitry Andric   auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2068e8d8bef9SDimitry Andric   auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2069e8d8bef9SDimitry Andric   // There is no overflow if the ExtOp is the same as NewOp.
2070e8d8bef9SDimitry Andric   MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2071e8d8bef9SDimitry Andric   // Now trunc the NewOp to the original result.
2072e8d8bef9SDimitry Andric   MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2073e8d8bef9SDimitry Andric   MI.eraseFromParent();
2074e8d8bef9SDimitry Andric   return Legalized;
2075e8d8bef9SDimitry Andric }
2076e8d8bef9SDimitry Andric 
2077e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
2078e8d8bef9SDimitry Andric LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
20795ffd83dbSDimitry Andric                                          LLT WideTy) {
20805ffd83dbSDimitry Andric   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2081e8d8bef9SDimitry Andric                   MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2082e8d8bef9SDimitry Andric                   MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2083e8d8bef9SDimitry Andric   bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2084e8d8bef9SDimitry Andric                  MI.getOpcode() == TargetOpcode::G_USHLSAT;
20855ffd83dbSDimitry Andric   // We can convert this to:
20865ffd83dbSDimitry Andric   //   1. Any extend iN to iM
20875ffd83dbSDimitry Andric   //   2. SHL by M-N
2088e8d8bef9SDimitry Andric   //   3. [US][ADD|SUB|SHL]SAT
20895ffd83dbSDimitry Andric   //   4. L/ASHR by M-N
20905ffd83dbSDimitry Andric   //
20915ffd83dbSDimitry Andric   // It may be more efficient to lower this to a min and a max operation in
20925ffd83dbSDimitry Andric   // the higher precision arithmetic if the promoted operation isn't legal,
20935ffd83dbSDimitry Andric   // but this decision is up to the target's lowering request.
20945ffd83dbSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
20950b57cec5SDimitry Andric 
20965ffd83dbSDimitry Andric   unsigned NewBits = WideTy.getScalarSizeInBits();
20975ffd83dbSDimitry Andric   unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
20985ffd83dbSDimitry Andric 
2099e8d8bef9SDimitry Andric   // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2100e8d8bef9SDimitry Andric   // must not left shift the RHS to preserve the shift amount.
21015ffd83dbSDimitry Andric   auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2102e8d8bef9SDimitry Andric   auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2103e8d8bef9SDimitry Andric                      : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
21045ffd83dbSDimitry Andric   auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
21055ffd83dbSDimitry Andric   auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2106e8d8bef9SDimitry Andric   auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
21075ffd83dbSDimitry Andric 
21085ffd83dbSDimitry Andric   auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
21095ffd83dbSDimitry Andric                                         {ShiftL, ShiftR}, MI.getFlags());
21105ffd83dbSDimitry Andric 
21115ffd83dbSDimitry Andric   // Use a shift that will preserve the number of sign bits when the trunc is
21125ffd83dbSDimitry Andric   // folded away.
21135ffd83dbSDimitry Andric   auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
21145ffd83dbSDimitry Andric                          : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
21155ffd83dbSDimitry Andric 
21165ffd83dbSDimitry Andric   MIRBuilder.buildTrunc(DstReg, Result);
21175ffd83dbSDimitry Andric   MI.eraseFromParent();
21185ffd83dbSDimitry Andric   return Legalized;
21195ffd83dbSDimitry Andric }
21205ffd83dbSDimitry Andric 
21215ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
2122fe6060f1SDimitry Andric LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2123fe6060f1SDimitry Andric                                  LLT WideTy) {
212481ad6265SDimitry Andric   if (TypeIdx == 1) {
212581ad6265SDimitry Andric     Observer.changingInstr(MI);
212681ad6265SDimitry Andric     widenScalarDst(MI, WideTy, 1);
212781ad6265SDimitry Andric     Observer.changedInstr(MI);
212881ad6265SDimitry Andric     return Legalized;
212981ad6265SDimitry Andric   }
2130fe6060f1SDimitry Andric 
2131fe6060f1SDimitry Andric   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
213206c3fb27SDimitry Andric   auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2133fe6060f1SDimitry Andric   LLT SrcTy = MRI.getType(LHS);
2134fe6060f1SDimitry Andric   LLT OverflowTy = MRI.getType(OriginalOverflow);
2135fe6060f1SDimitry Andric   unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2136fe6060f1SDimitry Andric 
2137fe6060f1SDimitry Andric   // To determine if the result overflowed in the larger type, we extend the
2138fe6060f1SDimitry Andric   // input to the larger type, do the multiply (checking if it overflows),
2139fe6060f1SDimitry Andric   // then also check the high bits of the result to see if overflow happened
2140fe6060f1SDimitry Andric   // there.
2141fe6060f1SDimitry Andric   unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2142fe6060f1SDimitry Andric   auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2143fe6060f1SDimitry Andric   auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2144fe6060f1SDimitry Andric 
2145*5f757f3fSDimitry Andric   // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2146*5f757f3fSDimitry Andric   // so we don't need to check the overflow result of larger type Mulo.
2147*5f757f3fSDimitry Andric   bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2148*5f757f3fSDimitry Andric 
2149*5f757f3fSDimitry Andric   unsigned MulOpc =
2150*5f757f3fSDimitry Andric       WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2151*5f757f3fSDimitry Andric 
2152*5f757f3fSDimitry Andric   MachineInstrBuilder Mulo;
2153*5f757f3fSDimitry Andric   if (WideMulCanOverflow)
2154*5f757f3fSDimitry Andric     Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2155fe6060f1SDimitry Andric                                  {LeftOperand, RightOperand});
2156*5f757f3fSDimitry Andric   else
2157*5f757f3fSDimitry Andric     Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2158*5f757f3fSDimitry Andric 
2159fe6060f1SDimitry Andric   auto Mul = Mulo->getOperand(0);
2160fe6060f1SDimitry Andric   MIRBuilder.buildTrunc(Result, Mul);
2161fe6060f1SDimitry Andric 
2162fe6060f1SDimitry Andric   MachineInstrBuilder ExtResult;
2163fe6060f1SDimitry Andric   // Overflow occurred if it occurred in the larger type, or if the high part
2164fe6060f1SDimitry Andric   // of the result does not zero/sign-extend the low part.  Check this second
2165fe6060f1SDimitry Andric   // possibility first.
2166fe6060f1SDimitry Andric   if (IsSigned) {
2167fe6060f1SDimitry Andric     // For signed, overflow occurred when the high part does not sign-extend
2168fe6060f1SDimitry Andric     // the low part.
2169fe6060f1SDimitry Andric     ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2170fe6060f1SDimitry Andric   } else {
2171fe6060f1SDimitry Andric     // Unsigned overflow occurred when the high part does not zero-extend the
2172fe6060f1SDimitry Andric     // low part.
2173fe6060f1SDimitry Andric     ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2174fe6060f1SDimitry Andric   }
2175fe6060f1SDimitry Andric 
2176*5f757f3fSDimitry Andric   if (WideMulCanOverflow) {
2177fe6060f1SDimitry Andric     auto Overflow =
2178fe6060f1SDimitry Andric         MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2179fe6060f1SDimitry Andric     // Finally check if the multiplication in the larger type itself overflowed.
2180fe6060f1SDimitry Andric     MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2181fe6060f1SDimitry Andric   } else {
2182fe6060f1SDimitry Andric     MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2183fe6060f1SDimitry Andric   }
2184fe6060f1SDimitry Andric   MI.eraseFromParent();
2185fe6060f1SDimitry Andric   return Legalized;
2186fe6060f1SDimitry Andric }
2187fe6060f1SDimitry Andric 
2188fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
21895ffd83dbSDimitry Andric LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
21900b57cec5SDimitry Andric   switch (MI.getOpcode()) {
21910b57cec5SDimitry Andric   default:
21920b57cec5SDimitry Andric     return UnableToLegalize;
2193fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XCHG:
2194fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_ADD:
2195fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_SUB:
2196fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_AND:
2197fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_OR:
2198fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XOR:
2199fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_MIN:
2200fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_MAX:
2201fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_UMIN:
2202fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_UMAX:
2203fe6060f1SDimitry Andric     assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2204fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2205fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2206fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy, 0);
2207fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2208fe6060f1SDimitry Andric     return Legalized;
2209fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG:
2210fe6060f1SDimitry Andric     assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2211fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2212fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2213fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2214fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy, 0);
2215fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2216fe6060f1SDimitry Andric     return Legalized;
2217fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2218fe6060f1SDimitry Andric     if (TypeIdx == 0) {
2219fe6060f1SDimitry Andric       Observer.changingInstr(MI);
2220fe6060f1SDimitry Andric       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2221fe6060f1SDimitry Andric       widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2222fe6060f1SDimitry Andric       widenScalarDst(MI, WideTy, 0);
2223fe6060f1SDimitry Andric       Observer.changedInstr(MI);
2224fe6060f1SDimitry Andric       return Legalized;
2225fe6060f1SDimitry Andric     }
2226fe6060f1SDimitry Andric     assert(TypeIdx == 1 &&
2227fe6060f1SDimitry Andric            "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2228fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2229fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy, 1);
2230fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2231fe6060f1SDimitry Andric     return Legalized;
22320b57cec5SDimitry Andric   case TargetOpcode::G_EXTRACT:
22330b57cec5SDimitry Andric     return widenScalarExtract(MI, TypeIdx, WideTy);
22340b57cec5SDimitry Andric   case TargetOpcode::G_INSERT:
22350b57cec5SDimitry Andric     return widenScalarInsert(MI, TypeIdx, WideTy);
22360b57cec5SDimitry Andric   case TargetOpcode::G_MERGE_VALUES:
22370b57cec5SDimitry Andric     return widenScalarMergeValues(MI, TypeIdx, WideTy);
22380b57cec5SDimitry Andric   case TargetOpcode::G_UNMERGE_VALUES:
22390b57cec5SDimitry Andric     return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2240e8d8bef9SDimitry Andric   case TargetOpcode::G_SADDO:
2241e8d8bef9SDimitry Andric   case TargetOpcode::G_SSUBO:
22420b57cec5SDimitry Andric   case TargetOpcode::G_UADDO:
2243e8d8bef9SDimitry Andric   case TargetOpcode::G_USUBO:
2244fe6060f1SDimitry Andric   case TargetOpcode::G_SADDE:
2245fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBE:
2246fe6060f1SDimitry Andric   case TargetOpcode::G_UADDE:
2247fe6060f1SDimitry Andric   case TargetOpcode::G_USUBE:
2248fe6060f1SDimitry Andric     return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2249fe6060f1SDimitry Andric   case TargetOpcode::G_UMULO:
2250fe6060f1SDimitry Andric   case TargetOpcode::G_SMULO:
2251fe6060f1SDimitry Andric     return widenScalarMulo(MI, TypeIdx, WideTy);
22525ffd83dbSDimitry Andric   case TargetOpcode::G_SADDSAT:
22535ffd83dbSDimitry Andric   case TargetOpcode::G_SSUBSAT:
2254e8d8bef9SDimitry Andric   case TargetOpcode::G_SSHLSAT:
22555ffd83dbSDimitry Andric   case TargetOpcode::G_UADDSAT:
22565ffd83dbSDimitry Andric   case TargetOpcode::G_USUBSAT:
2257e8d8bef9SDimitry Andric   case TargetOpcode::G_USHLSAT:
2258e8d8bef9SDimitry Andric     return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
22590b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ:
22600b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
22610b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ:
22620b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
22630b57cec5SDimitry Andric   case TargetOpcode::G_CTPOP: {
22640b57cec5SDimitry Andric     if (TypeIdx == 0) {
22650b57cec5SDimitry Andric       Observer.changingInstr(MI);
22660b57cec5SDimitry Andric       widenScalarDst(MI, WideTy, 0);
22670b57cec5SDimitry Andric       Observer.changedInstr(MI);
22680b57cec5SDimitry Andric       return Legalized;
22690b57cec5SDimitry Andric     }
22700b57cec5SDimitry Andric 
22710b57cec5SDimitry Andric     Register SrcReg = MI.getOperand(1).getReg();
22720b57cec5SDimitry Andric 
2273349cc55cSDimitry Andric     // First extend the input.
2274349cc55cSDimitry Andric     unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
2275349cc55cSDimitry Andric                               MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2276349cc55cSDimitry Andric                           ? TargetOpcode::G_ANYEXT
2277349cc55cSDimitry Andric                           : TargetOpcode::G_ZEXT;
2278349cc55cSDimitry Andric     auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
22790b57cec5SDimitry Andric     LLT CurTy = MRI.getType(SrcReg);
2280349cc55cSDimitry Andric     unsigned NewOpc = MI.getOpcode();
2281349cc55cSDimitry Andric     if (NewOpc == TargetOpcode::G_CTTZ) {
22820b57cec5SDimitry Andric       // The count is the same in the larger type except if the original
22830b57cec5SDimitry Andric       // value was zero.  This can be handled by setting the bit just off
22840b57cec5SDimitry Andric       // the top of the original type.
22850b57cec5SDimitry Andric       auto TopBit =
22860b57cec5SDimitry Andric           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
22870b57cec5SDimitry Andric       MIBSrc = MIRBuilder.buildOr(
22880b57cec5SDimitry Andric         WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2289349cc55cSDimitry Andric       // Now we know the operand is non-zero, use the more relaxed opcode.
2290349cc55cSDimitry Andric       NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
22910b57cec5SDimitry Andric     }
22920b57cec5SDimitry Andric 
22930b57cec5SDimitry Andric     // Perform the operation at the larger size.
2294349cc55cSDimitry Andric     auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
22950b57cec5SDimitry Andric     // This is already the correct result for CTPOP and CTTZs
22960b57cec5SDimitry Andric     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
22970b57cec5SDimitry Andric         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
22980b57cec5SDimitry Andric       // The correct result is NewOp - (Difference in widety and current ty).
22990b57cec5SDimitry Andric       unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
23005ffd83dbSDimitry Andric       MIBNewOp = MIRBuilder.buildSub(
23015ffd83dbSDimitry Andric           WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
23020b57cec5SDimitry Andric     }
23030b57cec5SDimitry Andric 
23040b57cec5SDimitry Andric     MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
23050b57cec5SDimitry Andric     MI.eraseFromParent();
23060b57cec5SDimitry Andric     return Legalized;
23070b57cec5SDimitry Andric   }
23080b57cec5SDimitry Andric   case TargetOpcode::G_BSWAP: {
23090b57cec5SDimitry Andric     Observer.changingInstr(MI);
23100b57cec5SDimitry Andric     Register DstReg = MI.getOperand(0).getReg();
23110b57cec5SDimitry Andric 
23120b57cec5SDimitry Andric     Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
23130b57cec5SDimitry Andric     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
23140b57cec5SDimitry Andric     Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
23150b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
23160b57cec5SDimitry Andric 
23170b57cec5SDimitry Andric     MI.getOperand(0).setReg(DstExt);
23180b57cec5SDimitry Andric 
23190b57cec5SDimitry Andric     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
23200b57cec5SDimitry Andric 
23210b57cec5SDimitry Andric     LLT Ty = MRI.getType(DstReg);
23220b57cec5SDimitry Andric     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
23230b57cec5SDimitry Andric     MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
23245ffd83dbSDimitry Andric     MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
23250b57cec5SDimitry Andric 
23260b57cec5SDimitry Andric     MIRBuilder.buildTrunc(DstReg, ShrReg);
23270b57cec5SDimitry Andric     Observer.changedInstr(MI);
23280b57cec5SDimitry Andric     return Legalized;
23290b57cec5SDimitry Andric   }
23308bcb0991SDimitry Andric   case TargetOpcode::G_BITREVERSE: {
23318bcb0991SDimitry Andric     Observer.changingInstr(MI);
23328bcb0991SDimitry Andric 
23338bcb0991SDimitry Andric     Register DstReg = MI.getOperand(0).getReg();
23348bcb0991SDimitry Andric     LLT Ty = MRI.getType(DstReg);
23358bcb0991SDimitry Andric     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
23368bcb0991SDimitry Andric 
23378bcb0991SDimitry Andric     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
23388bcb0991SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
23398bcb0991SDimitry Andric     MI.getOperand(0).setReg(DstExt);
23408bcb0991SDimitry Andric     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
23418bcb0991SDimitry Andric 
23428bcb0991SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
23438bcb0991SDimitry Andric     auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
23448bcb0991SDimitry Andric     MIRBuilder.buildTrunc(DstReg, Shift);
23458bcb0991SDimitry Andric     Observer.changedInstr(MI);
23468bcb0991SDimitry Andric     return Legalized;
23478bcb0991SDimitry Andric   }
23485ffd83dbSDimitry Andric   case TargetOpcode::G_FREEZE:
23495ffd83dbSDimitry Andric     Observer.changingInstr(MI);
23505ffd83dbSDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
23515ffd83dbSDimitry Andric     widenScalarDst(MI, WideTy);
23525ffd83dbSDimitry Andric     Observer.changedInstr(MI);
23535ffd83dbSDimitry Andric     return Legalized;
23545ffd83dbSDimitry Andric 
2355fe6060f1SDimitry Andric   case TargetOpcode::G_ABS:
2356fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2357fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2358fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy);
2359fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2360fe6060f1SDimitry Andric     return Legalized;
2361fe6060f1SDimitry Andric 
23620b57cec5SDimitry Andric   case TargetOpcode::G_ADD:
23630b57cec5SDimitry Andric   case TargetOpcode::G_AND:
23640b57cec5SDimitry Andric   case TargetOpcode::G_MUL:
23650b57cec5SDimitry Andric   case TargetOpcode::G_OR:
23660b57cec5SDimitry Andric   case TargetOpcode::G_XOR:
23670b57cec5SDimitry Andric   case TargetOpcode::G_SUB:
23680b57cec5SDimitry Andric     // Perform operation at larger width (any extension is fines here, high bits
23690b57cec5SDimitry Andric     // don't affect the result) and then truncate the result back to the
23700b57cec5SDimitry Andric     // original type.
23710b57cec5SDimitry Andric     Observer.changingInstr(MI);
23720b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
23730b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
23740b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
23750b57cec5SDimitry Andric     Observer.changedInstr(MI);
23760b57cec5SDimitry Andric     return Legalized;
23770b57cec5SDimitry Andric 
2378fe6060f1SDimitry Andric   case TargetOpcode::G_SBFX:
2379fe6060f1SDimitry Andric   case TargetOpcode::G_UBFX:
2380fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2381fe6060f1SDimitry Andric 
2382fe6060f1SDimitry Andric     if (TypeIdx == 0) {
2383fe6060f1SDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2384fe6060f1SDimitry Andric       widenScalarDst(MI, WideTy);
2385fe6060f1SDimitry Andric     } else {
2386fe6060f1SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2387fe6060f1SDimitry Andric       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2388fe6060f1SDimitry Andric     }
2389fe6060f1SDimitry Andric 
2390fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2391fe6060f1SDimitry Andric     return Legalized;
2392fe6060f1SDimitry Andric 
23930b57cec5SDimitry Andric   case TargetOpcode::G_SHL:
23940b57cec5SDimitry Andric     Observer.changingInstr(MI);
23950b57cec5SDimitry Andric 
23960b57cec5SDimitry Andric     if (TypeIdx == 0) {
23970b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
23980b57cec5SDimitry Andric       widenScalarDst(MI, WideTy);
23990b57cec5SDimitry Andric     } else {
24000b57cec5SDimitry Andric       assert(TypeIdx == 1);
24010b57cec5SDimitry Andric       // The "number of bits to shift" operand must preserve its value as an
24020b57cec5SDimitry Andric       // unsigned integer:
24030b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
24040b57cec5SDimitry Andric     }
24050b57cec5SDimitry Andric 
24060b57cec5SDimitry Andric     Observer.changedInstr(MI);
24070b57cec5SDimitry Andric     return Legalized;
24080b57cec5SDimitry Andric 
2409*5f757f3fSDimitry Andric   case TargetOpcode::G_ROTR:
2410*5f757f3fSDimitry Andric   case TargetOpcode::G_ROTL:
2411*5f757f3fSDimitry Andric     if (TypeIdx != 1)
2412*5f757f3fSDimitry Andric       return UnableToLegalize;
2413*5f757f3fSDimitry Andric 
2414*5f757f3fSDimitry Andric     Observer.changingInstr(MI);
2415*5f757f3fSDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2416*5f757f3fSDimitry Andric     Observer.changedInstr(MI);
2417*5f757f3fSDimitry Andric     return Legalized;
2418*5f757f3fSDimitry Andric 
24190b57cec5SDimitry Andric   case TargetOpcode::G_SDIV:
24200b57cec5SDimitry Andric   case TargetOpcode::G_SREM:
24210b57cec5SDimitry Andric   case TargetOpcode::G_SMIN:
24220b57cec5SDimitry Andric   case TargetOpcode::G_SMAX:
24230b57cec5SDimitry Andric     Observer.changingInstr(MI);
24240b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
24250b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
24260b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
24270b57cec5SDimitry Andric     Observer.changedInstr(MI);
24280b57cec5SDimitry Andric     return Legalized;
24290b57cec5SDimitry Andric 
2430fe6060f1SDimitry Andric   case TargetOpcode::G_SDIVREM:
2431fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2432fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2433fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2434fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy);
2435fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy, 1);
2436fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2437fe6060f1SDimitry Andric     return Legalized;
2438fe6060f1SDimitry Andric 
24390b57cec5SDimitry Andric   case TargetOpcode::G_ASHR:
24400b57cec5SDimitry Andric   case TargetOpcode::G_LSHR:
24410b57cec5SDimitry Andric     Observer.changingInstr(MI);
24420b57cec5SDimitry Andric 
24430b57cec5SDimitry Andric     if (TypeIdx == 0) {
24440b57cec5SDimitry Andric       unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
24450b57cec5SDimitry Andric         TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
24460b57cec5SDimitry Andric 
24470b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 1, CvtOp);
24480b57cec5SDimitry Andric       widenScalarDst(MI, WideTy);
24490b57cec5SDimitry Andric     } else {
24500b57cec5SDimitry Andric       assert(TypeIdx == 1);
24510b57cec5SDimitry Andric       // The "number of bits to shift" operand must preserve its value as an
24520b57cec5SDimitry Andric       // unsigned integer:
24530b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
24540b57cec5SDimitry Andric     }
24550b57cec5SDimitry Andric 
24560b57cec5SDimitry Andric     Observer.changedInstr(MI);
24570b57cec5SDimitry Andric     return Legalized;
24580b57cec5SDimitry Andric   case TargetOpcode::G_UDIV:
24590b57cec5SDimitry Andric   case TargetOpcode::G_UREM:
24600b57cec5SDimitry Andric   case TargetOpcode::G_UMIN:
24610b57cec5SDimitry Andric   case TargetOpcode::G_UMAX:
24620b57cec5SDimitry Andric     Observer.changingInstr(MI);
24630b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
24640b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
24650b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
24660b57cec5SDimitry Andric     Observer.changedInstr(MI);
24670b57cec5SDimitry Andric     return Legalized;
24680b57cec5SDimitry Andric 
2469fe6060f1SDimitry Andric   case TargetOpcode::G_UDIVREM:
2470fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2471fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2472fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2473fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy);
2474fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy, 1);
2475fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2476fe6060f1SDimitry Andric     return Legalized;
2477fe6060f1SDimitry Andric 
24780b57cec5SDimitry Andric   case TargetOpcode::G_SELECT:
24790b57cec5SDimitry Andric     Observer.changingInstr(MI);
24800b57cec5SDimitry Andric     if (TypeIdx == 0) {
24810b57cec5SDimitry Andric       // Perform operation at larger width (any extension is fine here, high
24820b57cec5SDimitry Andric       // bits don't affect the result) and then truncate the result back to the
24830b57cec5SDimitry Andric       // original type.
24840b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
24850b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
24860b57cec5SDimitry Andric       widenScalarDst(MI, WideTy);
24870b57cec5SDimitry Andric     } else {
24880b57cec5SDimitry Andric       bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
24890b57cec5SDimitry Andric       // Explicit extension is required here since high bits affect the result.
24900b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
24910b57cec5SDimitry Andric     }
24920b57cec5SDimitry Andric     Observer.changedInstr(MI);
24930b57cec5SDimitry Andric     return Legalized;
24940b57cec5SDimitry Andric 
24950b57cec5SDimitry Andric   case TargetOpcode::G_FPTOSI:
24960b57cec5SDimitry Andric   case TargetOpcode::G_FPTOUI:
2497*5f757f3fSDimitry Andric   case TargetOpcode::G_IS_FPCLASS:
24980b57cec5SDimitry Andric     Observer.changingInstr(MI);
24998bcb0991SDimitry Andric 
25008bcb0991SDimitry Andric     if (TypeIdx == 0)
25010b57cec5SDimitry Andric       widenScalarDst(MI, WideTy);
25028bcb0991SDimitry Andric     else
25038bcb0991SDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
25048bcb0991SDimitry Andric 
25050b57cec5SDimitry Andric     Observer.changedInstr(MI);
25060b57cec5SDimitry Andric     return Legalized;
25070b57cec5SDimitry Andric   case TargetOpcode::G_SITOFP:
25080b57cec5SDimitry Andric     Observer.changingInstr(MI);
2509e8d8bef9SDimitry Andric 
2510e8d8bef9SDimitry Andric     if (TypeIdx == 0)
2511e8d8bef9SDimitry Andric       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2512e8d8bef9SDimitry Andric     else
25130b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2514e8d8bef9SDimitry Andric 
25150b57cec5SDimitry Andric     Observer.changedInstr(MI);
25160b57cec5SDimitry Andric     return Legalized;
25170b57cec5SDimitry Andric   case TargetOpcode::G_UITOFP:
25180b57cec5SDimitry Andric     Observer.changingInstr(MI);
2519e8d8bef9SDimitry Andric 
2520e8d8bef9SDimitry Andric     if (TypeIdx == 0)
2521e8d8bef9SDimitry Andric       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2522e8d8bef9SDimitry Andric     else
25230b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2524e8d8bef9SDimitry Andric 
25250b57cec5SDimitry Andric     Observer.changedInstr(MI);
25260b57cec5SDimitry Andric     return Legalized;
25270b57cec5SDimitry Andric   case TargetOpcode::G_LOAD:
25280b57cec5SDimitry Andric   case TargetOpcode::G_SEXTLOAD:
25290b57cec5SDimitry Andric   case TargetOpcode::G_ZEXTLOAD:
25300b57cec5SDimitry Andric     Observer.changingInstr(MI);
25310b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
25320b57cec5SDimitry Andric     Observer.changedInstr(MI);
25330b57cec5SDimitry Andric     return Legalized;
25340b57cec5SDimitry Andric 
25350b57cec5SDimitry Andric   case TargetOpcode::G_STORE: {
25360b57cec5SDimitry Andric     if (TypeIdx != 0)
25370b57cec5SDimitry Andric       return UnableToLegalize;
25380b57cec5SDimitry Andric 
25390b57cec5SDimitry Andric     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2540e8d8bef9SDimitry Andric     if (!Ty.isScalar())
25410b57cec5SDimitry Andric       return UnableToLegalize;
25420b57cec5SDimitry Andric 
25430b57cec5SDimitry Andric     Observer.changingInstr(MI);
25440b57cec5SDimitry Andric 
25450b57cec5SDimitry Andric     unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
25460b57cec5SDimitry Andric       TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
25470b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 0, ExtType);
25480b57cec5SDimitry Andric 
25490b57cec5SDimitry Andric     Observer.changedInstr(MI);
25500b57cec5SDimitry Andric     return Legalized;
25510b57cec5SDimitry Andric   }
25520b57cec5SDimitry Andric   case TargetOpcode::G_CONSTANT: {
25530b57cec5SDimitry Andric     MachineOperand &SrcMO = MI.getOperand(1);
25540b57cec5SDimitry Andric     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2555480093f4SDimitry Andric     unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2556480093f4SDimitry Andric         MRI.getType(MI.getOperand(0).getReg()));
2557480093f4SDimitry Andric     assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2558480093f4SDimitry Andric             ExtOpc == TargetOpcode::G_ANYEXT) &&
2559480093f4SDimitry Andric            "Illegal Extend");
2560480093f4SDimitry Andric     const APInt &SrcVal = SrcMO.getCImm()->getValue();
2561480093f4SDimitry Andric     const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2562480093f4SDimitry Andric                            ? SrcVal.sext(WideTy.getSizeInBits())
2563480093f4SDimitry Andric                            : SrcVal.zext(WideTy.getSizeInBits());
25640b57cec5SDimitry Andric     Observer.changingInstr(MI);
25650b57cec5SDimitry Andric     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
25660b57cec5SDimitry Andric 
25670b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
25680b57cec5SDimitry Andric     Observer.changedInstr(MI);
25690b57cec5SDimitry Andric     return Legalized;
25700b57cec5SDimitry Andric   }
25710b57cec5SDimitry Andric   case TargetOpcode::G_FCONSTANT: {
2572fcaf7f86SDimitry Andric     // To avoid changing the bits of the constant due to extension to a larger
2573fcaf7f86SDimitry Andric     // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
25740b57cec5SDimitry Andric     MachineOperand &SrcMO = MI.getOperand(1);
2575fcaf7f86SDimitry Andric     APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
2576fcaf7f86SDimitry Andric     MIRBuilder.setInstrAndDebugLoc(MI);
2577fcaf7f86SDimitry Andric     auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
2578fcaf7f86SDimitry Andric     widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
2579fcaf7f86SDimitry Andric     MI.eraseFromParent();
25800b57cec5SDimitry Andric     return Legalized;
25810b57cec5SDimitry Andric   }
25820b57cec5SDimitry Andric   case TargetOpcode::G_IMPLICIT_DEF: {
25830b57cec5SDimitry Andric     Observer.changingInstr(MI);
25840b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
25850b57cec5SDimitry Andric     Observer.changedInstr(MI);
25860b57cec5SDimitry Andric     return Legalized;
25870b57cec5SDimitry Andric   }
25880b57cec5SDimitry Andric   case TargetOpcode::G_BRCOND:
25890b57cec5SDimitry Andric     Observer.changingInstr(MI);
25900b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
25910b57cec5SDimitry Andric     Observer.changedInstr(MI);
25920b57cec5SDimitry Andric     return Legalized;
25930b57cec5SDimitry Andric 
25940b57cec5SDimitry Andric   case TargetOpcode::G_FCMP:
25950b57cec5SDimitry Andric     Observer.changingInstr(MI);
25960b57cec5SDimitry Andric     if (TypeIdx == 0)
25970b57cec5SDimitry Andric       widenScalarDst(MI, WideTy);
25980b57cec5SDimitry Andric     else {
25990b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
26000b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
26010b57cec5SDimitry Andric     }
26020b57cec5SDimitry Andric     Observer.changedInstr(MI);
26030b57cec5SDimitry Andric     return Legalized;
26040b57cec5SDimitry Andric 
26050b57cec5SDimitry Andric   case TargetOpcode::G_ICMP:
26060b57cec5SDimitry Andric     Observer.changingInstr(MI);
26070b57cec5SDimitry Andric     if (TypeIdx == 0)
26080b57cec5SDimitry Andric       widenScalarDst(MI, WideTy);
26090b57cec5SDimitry Andric     else {
26100b57cec5SDimitry Andric       unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
26110b57cec5SDimitry Andric                                MI.getOperand(1).getPredicate()))
26120b57cec5SDimitry Andric                                ? TargetOpcode::G_SEXT
26130b57cec5SDimitry Andric                                : TargetOpcode::G_ZEXT;
26140b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
26150b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
26160b57cec5SDimitry Andric     }
26170b57cec5SDimitry Andric     Observer.changedInstr(MI);
26180b57cec5SDimitry Andric     return Legalized;
26190b57cec5SDimitry Andric 
2620480093f4SDimitry Andric   case TargetOpcode::G_PTR_ADD:
2621480093f4SDimitry Andric     assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
26220b57cec5SDimitry Andric     Observer.changingInstr(MI);
26230b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
26240b57cec5SDimitry Andric     Observer.changedInstr(MI);
26250b57cec5SDimitry Andric     return Legalized;
26260b57cec5SDimitry Andric 
26270b57cec5SDimitry Andric   case TargetOpcode::G_PHI: {
26280b57cec5SDimitry Andric     assert(TypeIdx == 0 && "Expecting only Idx 0");
26290b57cec5SDimitry Andric 
26300b57cec5SDimitry Andric     Observer.changingInstr(MI);
26310b57cec5SDimitry Andric     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
26320b57cec5SDimitry Andric       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2633bdd1243dSDimitry Andric       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
26340b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
26350b57cec5SDimitry Andric     }
26360b57cec5SDimitry Andric 
26370b57cec5SDimitry Andric     MachineBasicBlock &MBB = *MI.getParent();
26380b57cec5SDimitry Andric     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
26390b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
26400b57cec5SDimitry Andric     Observer.changedInstr(MI);
26410b57cec5SDimitry Andric     return Legalized;
26420b57cec5SDimitry Andric   }
26430b57cec5SDimitry Andric   case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
26440b57cec5SDimitry Andric     if (TypeIdx == 0) {
26450b57cec5SDimitry Andric       Register VecReg = MI.getOperand(1).getReg();
26460b57cec5SDimitry Andric       LLT VecTy = MRI.getType(VecReg);
26470b57cec5SDimitry Andric       Observer.changingInstr(MI);
26480b57cec5SDimitry Andric 
2649fe6060f1SDimitry Andric       widenScalarSrc(
2650fe6060f1SDimitry Andric           MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
2651349cc55cSDimitry Andric           TargetOpcode::G_ANYEXT);
26520b57cec5SDimitry Andric 
26530b57cec5SDimitry Andric       widenScalarDst(MI, WideTy, 0);
26540b57cec5SDimitry Andric       Observer.changedInstr(MI);
26550b57cec5SDimitry Andric       return Legalized;
26560b57cec5SDimitry Andric     }
26570b57cec5SDimitry Andric 
26580b57cec5SDimitry Andric     if (TypeIdx != 2)
26590b57cec5SDimitry Andric       return UnableToLegalize;
26600b57cec5SDimitry Andric     Observer.changingInstr(MI);
2661480093f4SDimitry Andric     // TODO: Probably should be zext
26620b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
26630b57cec5SDimitry Andric     Observer.changedInstr(MI);
26640b57cec5SDimitry Andric     return Legalized;
26650b57cec5SDimitry Andric   }
2666480093f4SDimitry Andric   case TargetOpcode::G_INSERT_VECTOR_ELT: {
2667*5f757f3fSDimitry Andric     if (TypeIdx == 0) {
2668*5f757f3fSDimitry Andric       Observer.changingInstr(MI);
2669*5f757f3fSDimitry Andric       const LLT WideEltTy = WideTy.getElementType();
2670*5f757f3fSDimitry Andric 
2671*5f757f3fSDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2672*5f757f3fSDimitry Andric       widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
2673*5f757f3fSDimitry Andric       widenScalarDst(MI, WideTy, 0);
2674*5f757f3fSDimitry Andric       Observer.changedInstr(MI);
2675*5f757f3fSDimitry Andric       return Legalized;
2676*5f757f3fSDimitry Andric     }
2677*5f757f3fSDimitry Andric 
2678480093f4SDimitry Andric     if (TypeIdx == 1) {
2679480093f4SDimitry Andric       Observer.changingInstr(MI);
2680480093f4SDimitry Andric 
2681480093f4SDimitry Andric       Register VecReg = MI.getOperand(1).getReg();
2682480093f4SDimitry Andric       LLT VecTy = MRI.getType(VecReg);
2683fe6060f1SDimitry Andric       LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
2684480093f4SDimitry Andric 
2685480093f4SDimitry Andric       widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2686480093f4SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2687480093f4SDimitry Andric       widenScalarDst(MI, WideVecTy, 0);
2688480093f4SDimitry Andric       Observer.changedInstr(MI);
2689480093f4SDimitry Andric       return Legalized;
2690480093f4SDimitry Andric     }
2691480093f4SDimitry Andric 
2692480093f4SDimitry Andric     if (TypeIdx == 2) {
2693480093f4SDimitry Andric       Observer.changingInstr(MI);
2694480093f4SDimitry Andric       // TODO: Probably should be zext
2695480093f4SDimitry Andric       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2696480093f4SDimitry Andric       Observer.changedInstr(MI);
26975ffd83dbSDimitry Andric       return Legalized;
2698480093f4SDimitry Andric     }
2699480093f4SDimitry Andric 
27005ffd83dbSDimitry Andric     return UnableToLegalize;
2701480093f4SDimitry Andric   }
27020b57cec5SDimitry Andric   case TargetOpcode::G_FADD:
27030b57cec5SDimitry Andric   case TargetOpcode::G_FMUL:
27040b57cec5SDimitry Andric   case TargetOpcode::G_FSUB:
27050b57cec5SDimitry Andric   case TargetOpcode::G_FMA:
27068bcb0991SDimitry Andric   case TargetOpcode::G_FMAD:
27070b57cec5SDimitry Andric   case TargetOpcode::G_FNEG:
27080b57cec5SDimitry Andric   case TargetOpcode::G_FABS:
27090b57cec5SDimitry Andric   case TargetOpcode::G_FCANONICALIZE:
27100b57cec5SDimitry Andric   case TargetOpcode::G_FMINNUM:
27110b57cec5SDimitry Andric   case TargetOpcode::G_FMAXNUM:
27120b57cec5SDimitry Andric   case TargetOpcode::G_FMINNUM_IEEE:
27130b57cec5SDimitry Andric   case TargetOpcode::G_FMAXNUM_IEEE:
27140b57cec5SDimitry Andric   case TargetOpcode::G_FMINIMUM:
27150b57cec5SDimitry Andric   case TargetOpcode::G_FMAXIMUM:
27160b57cec5SDimitry Andric   case TargetOpcode::G_FDIV:
27170b57cec5SDimitry Andric   case TargetOpcode::G_FREM:
27180b57cec5SDimitry Andric   case TargetOpcode::G_FCEIL:
27190b57cec5SDimitry Andric   case TargetOpcode::G_FFLOOR:
27200b57cec5SDimitry Andric   case TargetOpcode::G_FCOS:
27210b57cec5SDimitry Andric   case TargetOpcode::G_FSIN:
27220b57cec5SDimitry Andric   case TargetOpcode::G_FLOG10:
27230b57cec5SDimitry Andric   case TargetOpcode::G_FLOG:
27240b57cec5SDimitry Andric   case TargetOpcode::G_FLOG2:
27250b57cec5SDimitry Andric   case TargetOpcode::G_FRINT:
27260b57cec5SDimitry Andric   case TargetOpcode::G_FNEARBYINT:
27270b57cec5SDimitry Andric   case TargetOpcode::G_FSQRT:
27280b57cec5SDimitry Andric   case TargetOpcode::G_FEXP:
27290b57cec5SDimitry Andric   case TargetOpcode::G_FEXP2:
2730*5f757f3fSDimitry Andric   case TargetOpcode::G_FEXP10:
27310b57cec5SDimitry Andric   case TargetOpcode::G_FPOW:
27320b57cec5SDimitry Andric   case TargetOpcode::G_INTRINSIC_TRUNC:
27330b57cec5SDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUND:
2734e8d8bef9SDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
27350b57cec5SDimitry Andric     assert(TypeIdx == 0);
27360b57cec5SDimitry Andric     Observer.changingInstr(MI);
27370b57cec5SDimitry Andric 
27380b57cec5SDimitry Andric     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
27390b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
27400b57cec5SDimitry Andric 
27410b57cec5SDimitry Andric     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
27420b57cec5SDimitry Andric     Observer.changedInstr(MI);
27430b57cec5SDimitry Andric     return Legalized;
274406c3fb27SDimitry Andric   case TargetOpcode::G_FPOWI:
274506c3fb27SDimitry Andric   case TargetOpcode::G_FLDEXP:
274606c3fb27SDimitry Andric   case TargetOpcode::G_STRICT_FLDEXP: {
274706c3fb27SDimitry Andric     if (TypeIdx == 0) {
274806c3fb27SDimitry Andric       if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
2749e8d8bef9SDimitry Andric         return UnableToLegalize;
275006c3fb27SDimitry Andric 
2751e8d8bef9SDimitry Andric       Observer.changingInstr(MI);
2752e8d8bef9SDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2753e8d8bef9SDimitry Andric       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2754e8d8bef9SDimitry Andric       Observer.changedInstr(MI);
2755e8d8bef9SDimitry Andric       return Legalized;
2756e8d8bef9SDimitry Andric     }
275706c3fb27SDimitry Andric 
275806c3fb27SDimitry Andric     if (TypeIdx == 1) {
275906c3fb27SDimitry Andric       // For some reason SelectionDAG tries to promote to a libcall without
276006c3fb27SDimitry Andric       // actually changing the integer type for promotion.
276106c3fb27SDimitry Andric       Observer.changingInstr(MI);
276206c3fb27SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
276306c3fb27SDimitry Andric       Observer.changedInstr(MI);
276406c3fb27SDimitry Andric       return Legalized;
276506c3fb27SDimitry Andric     }
276606c3fb27SDimitry Andric 
276706c3fb27SDimitry Andric     return UnableToLegalize;
276806c3fb27SDimitry Andric   }
276906c3fb27SDimitry Andric   case TargetOpcode::G_FFREXP: {
277006c3fb27SDimitry Andric     Observer.changingInstr(MI);
277106c3fb27SDimitry Andric 
277206c3fb27SDimitry Andric     if (TypeIdx == 0) {
277306c3fb27SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
277406c3fb27SDimitry Andric       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
277506c3fb27SDimitry Andric     } else {
277606c3fb27SDimitry Andric       widenScalarDst(MI, WideTy, 1);
277706c3fb27SDimitry Andric     }
277806c3fb27SDimitry Andric 
277906c3fb27SDimitry Andric     Observer.changedInstr(MI);
278006c3fb27SDimitry Andric     return Legalized;
278106c3fb27SDimitry Andric   }
27820b57cec5SDimitry Andric   case TargetOpcode::G_INTTOPTR:
27830b57cec5SDimitry Andric     if (TypeIdx != 1)
27840b57cec5SDimitry Andric       return UnableToLegalize;
27850b57cec5SDimitry Andric 
27860b57cec5SDimitry Andric     Observer.changingInstr(MI);
27870b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
27880b57cec5SDimitry Andric     Observer.changedInstr(MI);
27890b57cec5SDimitry Andric     return Legalized;
27900b57cec5SDimitry Andric   case TargetOpcode::G_PTRTOINT:
27910b57cec5SDimitry Andric     if (TypeIdx != 0)
27920b57cec5SDimitry Andric       return UnableToLegalize;
27930b57cec5SDimitry Andric 
27940b57cec5SDimitry Andric     Observer.changingInstr(MI);
27950b57cec5SDimitry Andric     widenScalarDst(MI, WideTy, 0);
27960b57cec5SDimitry Andric     Observer.changedInstr(MI);
27970b57cec5SDimitry Andric     return Legalized;
27980b57cec5SDimitry Andric   case TargetOpcode::G_BUILD_VECTOR: {
27990b57cec5SDimitry Andric     Observer.changingInstr(MI);
28000b57cec5SDimitry Andric 
28010b57cec5SDimitry Andric     const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
28020b57cec5SDimitry Andric     for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
28030b57cec5SDimitry Andric       widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
28040b57cec5SDimitry Andric 
28050b57cec5SDimitry Andric     // Avoid changing the result vector type if the source element type was
28060b57cec5SDimitry Andric     // requested.
28070b57cec5SDimitry Andric     if (TypeIdx == 1) {
2808e8d8bef9SDimitry Andric       MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
28090b57cec5SDimitry Andric     } else {
28100b57cec5SDimitry Andric       widenScalarDst(MI, WideTy, 0);
28110b57cec5SDimitry Andric     }
28120b57cec5SDimitry Andric 
28130b57cec5SDimitry Andric     Observer.changedInstr(MI);
28140b57cec5SDimitry Andric     return Legalized;
28150b57cec5SDimitry Andric   }
28168bcb0991SDimitry Andric   case TargetOpcode::G_SEXT_INREG:
28178bcb0991SDimitry Andric     if (TypeIdx != 0)
28188bcb0991SDimitry Andric       return UnableToLegalize;
28198bcb0991SDimitry Andric 
28208bcb0991SDimitry Andric     Observer.changingInstr(MI);
28218bcb0991SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
28228bcb0991SDimitry Andric     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
28238bcb0991SDimitry Andric     Observer.changedInstr(MI);
28248bcb0991SDimitry Andric     return Legalized;
28255ffd83dbSDimitry Andric   case TargetOpcode::G_PTRMASK: {
28265ffd83dbSDimitry Andric     if (TypeIdx != 1)
28275ffd83dbSDimitry Andric       return UnableToLegalize;
28285ffd83dbSDimitry Andric     Observer.changingInstr(MI);
28295ffd83dbSDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
28305ffd83dbSDimitry Andric     Observer.changedInstr(MI);
28315ffd83dbSDimitry Andric     return Legalized;
28325ffd83dbSDimitry Andric   }
2833*5f757f3fSDimitry Andric   case TargetOpcode::G_VECREDUCE_FADD:
2834*5f757f3fSDimitry Andric   case TargetOpcode::G_VECREDUCE_FMIN:
2835*5f757f3fSDimitry Andric   case TargetOpcode::G_VECREDUCE_FMAX:
2836*5f757f3fSDimitry Andric   case TargetOpcode::G_VECREDUCE_FMINIMUM:
2837*5f757f3fSDimitry Andric   case TargetOpcode::G_VECREDUCE_FMAXIMUM:
2838*5f757f3fSDimitry Andric     if (TypeIdx != 0)
2839*5f757f3fSDimitry Andric       return UnableToLegalize;
2840*5f757f3fSDimitry Andric     Observer.changingInstr(MI);
2841*5f757f3fSDimitry Andric     Register VecReg = MI.getOperand(1).getReg();
2842*5f757f3fSDimitry Andric     LLT VecTy = MRI.getType(VecReg);
2843*5f757f3fSDimitry Andric     LLT WideVecTy = VecTy.isVector()
2844*5f757f3fSDimitry Andric                         ? LLT::vector(VecTy.getElementCount(), WideTy)
2845*5f757f3fSDimitry Andric                         : WideTy;
2846*5f757f3fSDimitry Andric     widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
2847*5f757f3fSDimitry Andric     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2848*5f757f3fSDimitry Andric     Observer.changedInstr(MI);
2849*5f757f3fSDimitry Andric     return Legalized;
28505ffd83dbSDimitry Andric   }
28515ffd83dbSDimitry Andric }
28525ffd83dbSDimitry Andric 
28535ffd83dbSDimitry Andric static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
28545ffd83dbSDimitry Andric                              MachineIRBuilder &B, Register Src, LLT Ty) {
28555ffd83dbSDimitry Andric   auto Unmerge = B.buildUnmerge(Ty, Src);
28565ffd83dbSDimitry Andric   for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
28575ffd83dbSDimitry Andric     Pieces.push_back(Unmerge.getReg(I));
28585ffd83dbSDimitry Andric }
28595ffd83dbSDimitry Andric 
28605ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
286106c3fb27SDimitry Andric LegalizerHelper::lowerFConstant(MachineInstr &MI) {
28625ffd83dbSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
28635ffd83dbSDimitry Andric 
286406c3fb27SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
286506c3fb27SDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
286606c3fb27SDimitry Andric 
286706c3fb27SDimitry Andric   unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
286806c3fb27SDimitry Andric   LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
286906c3fb27SDimitry Andric   Align Alignment = Align(DL.getABITypeAlign(
287006c3fb27SDimitry Andric       getFloatTypeForLLT(MF.getFunction().getContext(), MRI.getType(Dst))));
287106c3fb27SDimitry Andric 
287206c3fb27SDimitry Andric   auto Addr = MIRBuilder.buildConstantPool(
287306c3fb27SDimitry Andric       AddrPtrTy, MF.getConstantPool()->getConstantPoolIndex(
287406c3fb27SDimitry Andric                      MI.getOperand(1).getFPImm(), Alignment));
287506c3fb27SDimitry Andric 
287606c3fb27SDimitry Andric   MachineMemOperand *MMO = MF.getMachineMemOperand(
287706c3fb27SDimitry Andric       MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
287806c3fb27SDimitry Andric       MRI.getType(Dst), Alignment);
287906c3fb27SDimitry Andric 
288006c3fb27SDimitry Andric   MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Addr, *MMO);
288106c3fb27SDimitry Andric   MI.eraseFromParent();
288206c3fb27SDimitry Andric 
288306c3fb27SDimitry Andric   return Legalized;
288406c3fb27SDimitry Andric }
288506c3fb27SDimitry Andric 
288606c3fb27SDimitry Andric LegalizerHelper::LegalizeResult
288706c3fb27SDimitry Andric LegalizerHelper::lowerBitcast(MachineInstr &MI) {
288806c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
28895ffd83dbSDimitry Andric   if (SrcTy.isVector()) {
28905ffd83dbSDimitry Andric     LLT SrcEltTy = SrcTy.getElementType();
28915ffd83dbSDimitry Andric     SmallVector<Register, 8> SrcRegs;
28925ffd83dbSDimitry Andric 
28935ffd83dbSDimitry Andric     if (DstTy.isVector()) {
28945ffd83dbSDimitry Andric       int NumDstElt = DstTy.getNumElements();
28955ffd83dbSDimitry Andric       int NumSrcElt = SrcTy.getNumElements();
28965ffd83dbSDimitry Andric 
28975ffd83dbSDimitry Andric       LLT DstEltTy = DstTy.getElementType();
28985ffd83dbSDimitry Andric       LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
28995ffd83dbSDimitry Andric       LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
29005ffd83dbSDimitry Andric 
29015ffd83dbSDimitry Andric       // If there's an element size mismatch, insert intermediate casts to match
29025ffd83dbSDimitry Andric       // the result element type.
29035ffd83dbSDimitry Andric       if (NumSrcElt < NumDstElt) { // Source element type is larger.
29045ffd83dbSDimitry Andric         // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
29055ffd83dbSDimitry Andric         //
29065ffd83dbSDimitry Andric         // =>
29075ffd83dbSDimitry Andric         //
29085ffd83dbSDimitry Andric         // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
29095ffd83dbSDimitry Andric         // %3:_(<2 x s8>) = G_BITCAST %2
29105ffd83dbSDimitry Andric         // %4:_(<2 x s8>) = G_BITCAST %3
29115ffd83dbSDimitry Andric         // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
2912fe6060f1SDimitry Andric         DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
29135ffd83dbSDimitry Andric         SrcPartTy = SrcEltTy;
29145ffd83dbSDimitry Andric       } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
29155ffd83dbSDimitry Andric         //
29165ffd83dbSDimitry Andric         // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
29175ffd83dbSDimitry Andric         //
29185ffd83dbSDimitry Andric         // =>
29195ffd83dbSDimitry Andric         //
29205ffd83dbSDimitry Andric         // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
29215ffd83dbSDimitry Andric         // %3:_(s16) = G_BITCAST %2
29225ffd83dbSDimitry Andric         // %4:_(s16) = G_BITCAST %3
29235ffd83dbSDimitry Andric         // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
2924fe6060f1SDimitry Andric         SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
29255ffd83dbSDimitry Andric         DstCastTy = DstEltTy;
29265ffd83dbSDimitry Andric       }
29275ffd83dbSDimitry Andric 
29285ffd83dbSDimitry Andric       getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
29295ffd83dbSDimitry Andric       for (Register &SrcReg : SrcRegs)
29305ffd83dbSDimitry Andric         SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
29315ffd83dbSDimitry Andric     } else
29325ffd83dbSDimitry Andric       getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
29335ffd83dbSDimitry Andric 
2934bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
29355ffd83dbSDimitry Andric     MI.eraseFromParent();
29365ffd83dbSDimitry Andric     return Legalized;
29375ffd83dbSDimitry Andric   }
29385ffd83dbSDimitry Andric 
29395ffd83dbSDimitry Andric   if (DstTy.isVector()) {
29405ffd83dbSDimitry Andric     SmallVector<Register, 8> SrcRegs;
29415ffd83dbSDimitry Andric     getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2942bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
29435ffd83dbSDimitry Andric     MI.eraseFromParent();
29445ffd83dbSDimitry Andric     return Legalized;
29455ffd83dbSDimitry Andric   }
29465ffd83dbSDimitry Andric 
29475ffd83dbSDimitry Andric   return UnableToLegalize;
29485ffd83dbSDimitry Andric }
29495ffd83dbSDimitry Andric 
2950e8d8bef9SDimitry Andric /// Figure out the bit offset into a register when coercing a vector index for
2951e8d8bef9SDimitry Andric /// the wide element type. This is only for the case when promoting vector to
2952e8d8bef9SDimitry Andric /// one with larger elements.
2953e8d8bef9SDimitry Andric //
2954e8d8bef9SDimitry Andric ///
2955e8d8bef9SDimitry Andric /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2956e8d8bef9SDimitry Andric /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2957e8d8bef9SDimitry Andric static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
2958e8d8bef9SDimitry Andric                                                    Register Idx,
2959e8d8bef9SDimitry Andric                                                    unsigned NewEltSize,
2960e8d8bef9SDimitry Andric                                                    unsigned OldEltSize) {
2961e8d8bef9SDimitry Andric   const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2962e8d8bef9SDimitry Andric   LLT IdxTy = B.getMRI()->getType(Idx);
2963e8d8bef9SDimitry Andric 
2964e8d8bef9SDimitry Andric   // Now figure out the amount we need to shift to get the target bits.
2965e8d8bef9SDimitry Andric   auto OffsetMask = B.buildConstant(
2966349cc55cSDimitry Andric       IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
2967e8d8bef9SDimitry Andric   auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
2968e8d8bef9SDimitry Andric   return B.buildShl(IdxTy, OffsetIdx,
2969e8d8bef9SDimitry Andric                     B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
2970e8d8bef9SDimitry Andric }
2971e8d8bef9SDimitry Andric 
2972e8d8bef9SDimitry Andric /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
2973e8d8bef9SDimitry Andric /// is casting to a vector with a smaller element size, perform multiple element
2974e8d8bef9SDimitry Andric /// extracts and merge the results. If this is coercing to a vector with larger
2975e8d8bef9SDimitry Andric /// elements, index the bitcasted vector and extract the target element with bit
2976e8d8bef9SDimitry Andric /// operations. This is intended to force the indexing in the native register
2977e8d8bef9SDimitry Andric /// size for architectures that can dynamically index the register file.
29785ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
2979e8d8bef9SDimitry Andric LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
2980e8d8bef9SDimitry Andric                                          LLT CastTy) {
2981e8d8bef9SDimitry Andric   if (TypeIdx != 1)
2982e8d8bef9SDimitry Andric     return UnableToLegalize;
2983e8d8bef9SDimitry Andric 
298406c3fb27SDimitry Andric   auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
2985e8d8bef9SDimitry Andric 
2986e8d8bef9SDimitry Andric   LLT SrcEltTy = SrcVecTy.getElementType();
2987e8d8bef9SDimitry Andric   unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2988e8d8bef9SDimitry Andric   unsigned OldNumElts = SrcVecTy.getNumElements();
2989e8d8bef9SDimitry Andric 
2990e8d8bef9SDimitry Andric   LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2991e8d8bef9SDimitry Andric   Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2992e8d8bef9SDimitry Andric 
2993e8d8bef9SDimitry Andric   const unsigned NewEltSize = NewEltTy.getSizeInBits();
2994e8d8bef9SDimitry Andric   const unsigned OldEltSize = SrcEltTy.getSizeInBits();
2995e8d8bef9SDimitry Andric   if (NewNumElts > OldNumElts) {
2996e8d8bef9SDimitry Andric     // Decreasing the vector element size
2997e8d8bef9SDimitry Andric     //
2998e8d8bef9SDimitry Andric     // e.g. i64 = extract_vector_elt x:v2i64, y:i32
2999e8d8bef9SDimitry Andric     //  =>
3000e8d8bef9SDimitry Andric     //  v4i32:castx = bitcast x:v2i64
3001e8d8bef9SDimitry Andric     //
3002e8d8bef9SDimitry Andric     // i64 = bitcast
3003e8d8bef9SDimitry Andric     //   (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3004e8d8bef9SDimitry Andric     //                       (i32 (extract_vector_elt castx, (2 * y + 1)))
3005e8d8bef9SDimitry Andric     //
3006e8d8bef9SDimitry Andric     if (NewNumElts % OldNumElts != 0)
3007e8d8bef9SDimitry Andric       return UnableToLegalize;
3008e8d8bef9SDimitry Andric 
3009e8d8bef9SDimitry Andric     // Type of the intermediate result vector.
3010e8d8bef9SDimitry Andric     const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3011fe6060f1SDimitry Andric     LLT MidTy =
3012fe6060f1SDimitry Andric         LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3013e8d8bef9SDimitry Andric 
3014e8d8bef9SDimitry Andric     auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3015e8d8bef9SDimitry Andric 
3016e8d8bef9SDimitry Andric     SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3017e8d8bef9SDimitry Andric     auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3018e8d8bef9SDimitry Andric 
3019e8d8bef9SDimitry Andric     for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3020e8d8bef9SDimitry Andric       auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3021e8d8bef9SDimitry Andric       auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3022e8d8bef9SDimitry Andric       auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3023e8d8bef9SDimitry Andric       NewOps[I] = Elt.getReg(0);
3024e8d8bef9SDimitry Andric     }
3025e8d8bef9SDimitry Andric 
3026e8d8bef9SDimitry Andric     auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3027e8d8bef9SDimitry Andric     MIRBuilder.buildBitcast(Dst, NewVec);
3028e8d8bef9SDimitry Andric     MI.eraseFromParent();
3029e8d8bef9SDimitry Andric     return Legalized;
3030e8d8bef9SDimitry Andric   }
3031e8d8bef9SDimitry Andric 
3032e8d8bef9SDimitry Andric   if (NewNumElts < OldNumElts) {
3033e8d8bef9SDimitry Andric     if (NewEltSize % OldEltSize != 0)
3034e8d8bef9SDimitry Andric       return UnableToLegalize;
3035e8d8bef9SDimitry Andric 
3036e8d8bef9SDimitry Andric     // This only depends on powers of 2 because we use bit tricks to figure out
3037e8d8bef9SDimitry Andric     // the bit offset we need to shift to get the target element. A general
3038e8d8bef9SDimitry Andric     // expansion could emit division/multiply.
3039e8d8bef9SDimitry Andric     if (!isPowerOf2_32(NewEltSize / OldEltSize))
3040e8d8bef9SDimitry Andric       return UnableToLegalize;
3041e8d8bef9SDimitry Andric 
3042e8d8bef9SDimitry Andric     // Increasing the vector element size.
3043e8d8bef9SDimitry Andric     // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3044e8d8bef9SDimitry Andric     //
3045e8d8bef9SDimitry Andric     //   =>
3046e8d8bef9SDimitry Andric     //
3047e8d8bef9SDimitry Andric     // %cast = G_BITCAST %vec
3048e8d8bef9SDimitry Andric     // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3049e8d8bef9SDimitry Andric     // %wide_elt  = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3050e8d8bef9SDimitry Andric     // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3051e8d8bef9SDimitry Andric     // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3052e8d8bef9SDimitry Andric     // %elt_bits = G_LSHR %wide_elt, %offset_bits
3053e8d8bef9SDimitry Andric     // %elt = G_TRUNC %elt_bits
3054e8d8bef9SDimitry Andric 
3055e8d8bef9SDimitry Andric     const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3056e8d8bef9SDimitry Andric     auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3057e8d8bef9SDimitry Andric 
3058e8d8bef9SDimitry Andric     // Divide to get the index in the wider element type.
3059e8d8bef9SDimitry Andric     auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3060e8d8bef9SDimitry Andric 
3061e8d8bef9SDimitry Andric     Register WideElt = CastVec;
3062e8d8bef9SDimitry Andric     if (CastTy.isVector()) {
3063e8d8bef9SDimitry Andric       WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3064e8d8bef9SDimitry Andric                                                      ScaledIdx).getReg(0);
3065e8d8bef9SDimitry Andric     }
3066e8d8bef9SDimitry Andric 
3067e8d8bef9SDimitry Andric     // Compute the bit offset into the register of the target element.
3068e8d8bef9SDimitry Andric     Register OffsetBits = getBitcastWiderVectorElementOffset(
3069e8d8bef9SDimitry Andric       MIRBuilder, Idx, NewEltSize, OldEltSize);
3070e8d8bef9SDimitry Andric 
3071e8d8bef9SDimitry Andric     // Shift the wide element to get the target element.
3072e8d8bef9SDimitry Andric     auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3073e8d8bef9SDimitry Andric     MIRBuilder.buildTrunc(Dst, ExtractedBits);
3074e8d8bef9SDimitry Andric     MI.eraseFromParent();
3075e8d8bef9SDimitry Andric     return Legalized;
3076e8d8bef9SDimitry Andric   }
3077e8d8bef9SDimitry Andric 
3078e8d8bef9SDimitry Andric   return UnableToLegalize;
3079e8d8bef9SDimitry Andric }
3080e8d8bef9SDimitry Andric 
3081e8d8bef9SDimitry Andric /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3082e8d8bef9SDimitry Andric /// TargetReg, while preserving other bits in \p TargetReg.
3083e8d8bef9SDimitry Andric ///
3084e8d8bef9SDimitry Andric /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3085e8d8bef9SDimitry Andric static Register buildBitFieldInsert(MachineIRBuilder &B,
3086e8d8bef9SDimitry Andric                                     Register TargetReg, Register InsertReg,
3087e8d8bef9SDimitry Andric                                     Register OffsetBits) {
3088e8d8bef9SDimitry Andric   LLT TargetTy = B.getMRI()->getType(TargetReg);
3089e8d8bef9SDimitry Andric   LLT InsertTy = B.getMRI()->getType(InsertReg);
3090e8d8bef9SDimitry Andric   auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3091e8d8bef9SDimitry Andric   auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3092e8d8bef9SDimitry Andric 
3093e8d8bef9SDimitry Andric   // Produce a bitmask of the value to insert
3094e8d8bef9SDimitry Andric   auto EltMask = B.buildConstant(
3095e8d8bef9SDimitry Andric     TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3096e8d8bef9SDimitry Andric                                    InsertTy.getSizeInBits()));
3097e8d8bef9SDimitry Andric   // Shift it into position
3098e8d8bef9SDimitry Andric   auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3099e8d8bef9SDimitry Andric   auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3100e8d8bef9SDimitry Andric 
3101e8d8bef9SDimitry Andric   // Clear out the bits in the wide element
3102e8d8bef9SDimitry Andric   auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3103e8d8bef9SDimitry Andric 
3104e8d8bef9SDimitry Andric   // The value to insert has all zeros already, so stick it into the masked
3105e8d8bef9SDimitry Andric   // wide element.
3106e8d8bef9SDimitry Andric   return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3107e8d8bef9SDimitry Andric }
3108e8d8bef9SDimitry Andric 
3109e8d8bef9SDimitry Andric /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3110e8d8bef9SDimitry Andric /// is increasing the element size, perform the indexing in the target element
3111e8d8bef9SDimitry Andric /// type, and use bit operations to insert at the element position. This is
3112e8d8bef9SDimitry Andric /// intended for architectures that can dynamically index the register file and
3113e8d8bef9SDimitry Andric /// want to force indexing in the native register size.
3114e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
3115e8d8bef9SDimitry Andric LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
3116e8d8bef9SDimitry Andric                                         LLT CastTy) {
31175ffd83dbSDimitry Andric   if (TypeIdx != 0)
31185ffd83dbSDimitry Andric     return UnableToLegalize;
31195ffd83dbSDimitry Andric 
312006c3fb27SDimitry Andric   auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
312106c3fb27SDimitry Andric       MI.getFirst4RegLLTs();
312206c3fb27SDimitry Andric   LLT VecTy = DstTy;
3123e8d8bef9SDimitry Andric 
3124e8d8bef9SDimitry Andric   LLT VecEltTy = VecTy.getElementType();
3125e8d8bef9SDimitry Andric   LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3126e8d8bef9SDimitry Andric   const unsigned NewEltSize = NewEltTy.getSizeInBits();
3127e8d8bef9SDimitry Andric   const unsigned OldEltSize = VecEltTy.getSizeInBits();
3128e8d8bef9SDimitry Andric 
3129e8d8bef9SDimitry Andric   unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3130e8d8bef9SDimitry Andric   unsigned OldNumElts = VecTy.getNumElements();
3131e8d8bef9SDimitry Andric 
3132e8d8bef9SDimitry Andric   Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3133e8d8bef9SDimitry Andric   if (NewNumElts < OldNumElts) {
3134e8d8bef9SDimitry Andric     if (NewEltSize % OldEltSize != 0)
31355ffd83dbSDimitry Andric       return UnableToLegalize;
31365ffd83dbSDimitry Andric 
3137e8d8bef9SDimitry Andric     // This only depends on powers of 2 because we use bit tricks to figure out
3138e8d8bef9SDimitry Andric     // the bit offset we need to shift to get the target element. A general
3139e8d8bef9SDimitry Andric     // expansion could emit division/multiply.
3140e8d8bef9SDimitry Andric     if (!isPowerOf2_32(NewEltSize / OldEltSize))
31415ffd83dbSDimitry Andric       return UnableToLegalize;
31425ffd83dbSDimitry Andric 
3143e8d8bef9SDimitry Andric     const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3144e8d8bef9SDimitry Andric     auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3145e8d8bef9SDimitry Andric 
3146e8d8bef9SDimitry Andric     // Divide to get the index in the wider element type.
3147e8d8bef9SDimitry Andric     auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3148e8d8bef9SDimitry Andric 
3149e8d8bef9SDimitry Andric     Register ExtractedElt = CastVec;
3150e8d8bef9SDimitry Andric     if (CastTy.isVector()) {
3151e8d8bef9SDimitry Andric       ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3152e8d8bef9SDimitry Andric                                                           ScaledIdx).getReg(0);
31535ffd83dbSDimitry Andric     }
31545ffd83dbSDimitry Andric 
3155e8d8bef9SDimitry Andric     // Compute the bit offset into the register of the target element.
3156e8d8bef9SDimitry Andric     Register OffsetBits = getBitcastWiderVectorElementOffset(
3157e8d8bef9SDimitry Andric       MIRBuilder, Idx, NewEltSize, OldEltSize);
3158e8d8bef9SDimitry Andric 
3159e8d8bef9SDimitry Andric     Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3160e8d8bef9SDimitry Andric                                                Val, OffsetBits);
3161e8d8bef9SDimitry Andric     if (CastTy.isVector()) {
3162e8d8bef9SDimitry Andric       InsertedElt = MIRBuilder.buildInsertVectorElement(
3163e8d8bef9SDimitry Andric         CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3164e8d8bef9SDimitry Andric     }
3165e8d8bef9SDimitry Andric 
3166e8d8bef9SDimitry Andric     MIRBuilder.buildBitcast(Dst, InsertedElt);
3167e8d8bef9SDimitry Andric     MI.eraseFromParent();
31685ffd83dbSDimitry Andric     return Legalized;
31695ffd83dbSDimitry Andric   }
3170e8d8bef9SDimitry Andric 
31715ffd83dbSDimitry Andric   return UnableToLegalize;
31720b57cec5SDimitry Andric }
31730b57cec5SDimitry Andric 
3174fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
31750b57cec5SDimitry Andric   // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
3176fe6060f1SDimitry Andric   Register DstReg = LoadMI.getDstReg();
3177fe6060f1SDimitry Andric   Register PtrReg = LoadMI.getPointerReg();
31780b57cec5SDimitry Andric   LLT DstTy = MRI.getType(DstReg);
3179fe6060f1SDimitry Andric   MachineMemOperand &MMO = LoadMI.getMMO();
3180fe6060f1SDimitry Andric   LLT MemTy = MMO.getMemoryType();
3181fe6060f1SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
31820b57cec5SDimitry Andric 
3183fe6060f1SDimitry Andric   unsigned MemSizeInBits = MemTy.getSizeInBits();
3184fe6060f1SDimitry Andric   unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3185fe6060f1SDimitry Andric 
3186fe6060f1SDimitry Andric   if (MemSizeInBits != MemStoreSizeInBits) {
3187349cc55cSDimitry Andric     if (MemTy.isVector())
3188349cc55cSDimitry Andric       return UnableToLegalize;
3189349cc55cSDimitry Andric 
3190fe6060f1SDimitry Andric     // Promote to a byte-sized load if not loading an integral number of
3191fe6060f1SDimitry Andric     // bytes.  For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
3192fe6060f1SDimitry Andric     LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
3193fe6060f1SDimitry Andric     MachineMemOperand *NewMMO =
3194fe6060f1SDimitry Andric         MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
3195fe6060f1SDimitry Andric 
3196fe6060f1SDimitry Andric     Register LoadReg = DstReg;
3197fe6060f1SDimitry Andric     LLT LoadTy = DstTy;
3198fe6060f1SDimitry Andric 
3199fe6060f1SDimitry Andric     // If this wasn't already an extending load, we need to widen the result
3200fe6060f1SDimitry Andric     // register to avoid creating a load with a narrower result than the source.
3201fe6060f1SDimitry Andric     if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
3202fe6060f1SDimitry Andric       LoadTy = WideMemTy;
3203fe6060f1SDimitry Andric       LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
3204fe6060f1SDimitry Andric     }
3205fe6060f1SDimitry Andric 
3206fe6060f1SDimitry Andric     if (isa<GSExtLoad>(LoadMI)) {
3207fe6060f1SDimitry Andric       auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3208fe6060f1SDimitry Andric       MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
320981ad6265SDimitry Andric     } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3210fe6060f1SDimitry Andric       auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3211fe6060f1SDimitry Andric       // The extra bits are guaranteed to be zero, since we stored them that
3212fe6060f1SDimitry Andric       // way.  A zext load from Wide thus automatically gives zext from MemVT.
3213fe6060f1SDimitry Andric       MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
3214fe6060f1SDimitry Andric     } else {
3215fe6060f1SDimitry Andric       MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
3216fe6060f1SDimitry Andric     }
3217fe6060f1SDimitry Andric 
3218fe6060f1SDimitry Andric     if (DstTy != LoadTy)
3219fe6060f1SDimitry Andric       MIRBuilder.buildTrunc(DstReg, LoadReg);
3220fe6060f1SDimitry Andric 
3221fe6060f1SDimitry Andric     LoadMI.eraseFromParent();
3222fe6060f1SDimitry Andric     return Legalized;
3223fe6060f1SDimitry Andric   }
3224fe6060f1SDimitry Andric 
3225fe6060f1SDimitry Andric   // Big endian lowering not implemented.
3226fe6060f1SDimitry Andric   if (MIRBuilder.getDataLayout().isBigEndian())
3227fe6060f1SDimitry Andric     return UnableToLegalize;
3228fe6060f1SDimitry Andric 
3229349cc55cSDimitry Andric   // This load needs splitting into power of 2 sized loads.
3230349cc55cSDimitry Andric   //
32318bcb0991SDimitry Andric   // Our strategy here is to generate anyextending loads for the smaller
32328bcb0991SDimitry Andric   // types up to next power-2 result type, and then combine the two larger
32338bcb0991SDimitry Andric   // result values together, before truncating back down to the non-pow-2
32348bcb0991SDimitry Andric   // type.
32358bcb0991SDimitry Andric   // E.g. v1 = i24 load =>
32365ffd83dbSDimitry Andric   // v2 = i32 zextload (2 byte)
32378bcb0991SDimitry Andric   // v3 = i32 load (1 byte)
32388bcb0991SDimitry Andric   // v4 = i32 shl v3, 16
32398bcb0991SDimitry Andric   // v5 = i32 or v4, v2
32408bcb0991SDimitry Andric   // v1 = i24 trunc v5
32418bcb0991SDimitry Andric   // By doing this we generate the correct truncate which should get
32428bcb0991SDimitry Andric   // combined away as an artifact with a matching extend.
3243349cc55cSDimitry Andric 
3244349cc55cSDimitry Andric   uint64_t LargeSplitSize, SmallSplitSize;
3245349cc55cSDimitry Andric 
3246349cc55cSDimitry Andric   if (!isPowerOf2_32(MemSizeInBits)) {
3247349cc55cSDimitry Andric     // This load needs splitting into power of 2 sized loads.
324806c3fb27SDimitry Andric     LargeSplitSize = llvm::bit_floor(MemSizeInBits);
3249349cc55cSDimitry Andric     SmallSplitSize = MemSizeInBits - LargeSplitSize;
3250349cc55cSDimitry Andric   } else {
3251349cc55cSDimitry Andric     // This is already a power of 2, but we still need to split this in half.
3252349cc55cSDimitry Andric     //
3253349cc55cSDimitry Andric     // Assume we're being asked to decompose an unaligned load.
3254349cc55cSDimitry Andric     // TODO: If this requires multiple splits, handle them all at once.
3255349cc55cSDimitry Andric     auto &Ctx = MF.getFunction().getContext();
3256349cc55cSDimitry Andric     if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3257349cc55cSDimitry Andric       return UnableToLegalize;
3258349cc55cSDimitry Andric 
3259349cc55cSDimitry Andric     SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3260349cc55cSDimitry Andric   }
3261349cc55cSDimitry Andric 
3262349cc55cSDimitry Andric   if (MemTy.isVector()) {
3263349cc55cSDimitry Andric     // TODO: Handle vector extloads
3264349cc55cSDimitry Andric     if (MemTy != DstTy)
3265349cc55cSDimitry Andric       return UnableToLegalize;
3266349cc55cSDimitry Andric 
3267349cc55cSDimitry Andric     // TODO: We can do better than scalarizing the vector and at least split it
3268349cc55cSDimitry Andric     // in half.
3269349cc55cSDimitry Andric     return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
3270349cc55cSDimitry Andric   }
32718bcb0991SDimitry Andric 
32728bcb0991SDimitry Andric   MachineMemOperand *LargeMMO =
32738bcb0991SDimitry Andric       MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3274fe6060f1SDimitry Andric   MachineMemOperand *SmallMMO =
3275fe6060f1SDimitry Andric       MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
32768bcb0991SDimitry Andric 
32778bcb0991SDimitry Andric   LLT PtrTy = MRI.getType(PtrReg);
3278fe6060f1SDimitry Andric   unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
32798bcb0991SDimitry Andric   LLT AnyExtTy = LLT::scalar(AnyExtSize);
3280fe6060f1SDimitry Andric   auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
3281fe6060f1SDimitry Andric                                              PtrReg, *LargeMMO);
32828bcb0991SDimitry Andric 
3283fe6060f1SDimitry Andric   auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
3284fe6060f1SDimitry Andric                                             LargeSplitSize / 8);
3285480093f4SDimitry Andric   Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3286fe6060f1SDimitry Andric   auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3287fe6060f1SDimitry Andric   auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
3288fe6060f1SDimitry Andric                                              SmallPtr, *SmallMMO);
32898bcb0991SDimitry Andric 
32908bcb0991SDimitry Andric   auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
32918bcb0991SDimitry Andric   auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
3292fe6060f1SDimitry Andric 
3293fe6060f1SDimitry Andric   if (AnyExtTy == DstTy)
3294fe6060f1SDimitry Andric     MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
3295349cc55cSDimitry Andric   else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
32968bcb0991SDimitry Andric     auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3297fe6060f1SDimitry Andric     MIRBuilder.buildTrunc(DstReg, {Or});
3298349cc55cSDimitry Andric   } else {
3299349cc55cSDimitry Andric     assert(DstTy.isPointer() && "expected pointer");
3300349cc55cSDimitry Andric     auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3301349cc55cSDimitry Andric 
3302349cc55cSDimitry Andric     // FIXME: We currently consider this to be illegal for non-integral address
3303349cc55cSDimitry Andric     // spaces, but we need still need a way to reinterpret the bits.
3304349cc55cSDimitry Andric     MIRBuilder.buildIntToPtr(DstReg, Or);
3305fe6060f1SDimitry Andric   }
3306fe6060f1SDimitry Andric 
3307fe6060f1SDimitry Andric   LoadMI.eraseFromParent();
33088bcb0991SDimitry Andric   return Legalized;
33098bcb0991SDimitry Andric }
3310e8d8bef9SDimitry Andric 
3311fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
33128bcb0991SDimitry Andric   // Lower a non-power of 2 store into multiple pow-2 stores.
33138bcb0991SDimitry Andric   // E.g. split an i24 store into an i16 store + i8 store.
33148bcb0991SDimitry Andric   // We do this by first extending the stored value to the next largest power
33158bcb0991SDimitry Andric   // of 2 type, and then using truncating stores to store the components.
33168bcb0991SDimitry Andric   // By doing this, likewise with G_LOAD, generate an extend that can be
33178bcb0991SDimitry Andric   // artifact-combined away instead of leaving behind extracts.
3318fe6060f1SDimitry Andric   Register SrcReg = StoreMI.getValueReg();
3319fe6060f1SDimitry Andric   Register PtrReg = StoreMI.getPointerReg();
33208bcb0991SDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
3321fe6060f1SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
3322fe6060f1SDimitry Andric   MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3323fe6060f1SDimitry Andric   LLT MemTy = MMO.getMemoryType();
3324fe6060f1SDimitry Andric 
3325fe6060f1SDimitry Andric   unsigned StoreWidth = MemTy.getSizeInBits();
3326fe6060f1SDimitry Andric   unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3327fe6060f1SDimitry Andric 
3328fe6060f1SDimitry Andric   if (StoreWidth != StoreSizeInBits) {
3329349cc55cSDimitry Andric     if (SrcTy.isVector())
3330349cc55cSDimitry Andric       return UnableToLegalize;
3331349cc55cSDimitry Andric 
3332fe6060f1SDimitry Andric     // Promote to a byte-sized store with upper bits zero if not
3333fe6060f1SDimitry Andric     // storing an integral number of bytes.  For example, promote
3334fe6060f1SDimitry Andric     // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3335fe6060f1SDimitry Andric     LLT WideTy = LLT::scalar(StoreSizeInBits);
3336fe6060f1SDimitry Andric 
3337fe6060f1SDimitry Andric     if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3338fe6060f1SDimitry Andric       // Avoid creating a store with a narrower source than result.
3339fe6060f1SDimitry Andric       SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3340fe6060f1SDimitry Andric       SrcTy = WideTy;
3341fe6060f1SDimitry Andric     }
3342fe6060f1SDimitry Andric 
3343fe6060f1SDimitry Andric     auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3344fe6060f1SDimitry Andric 
3345fe6060f1SDimitry Andric     MachineMemOperand *NewMMO =
3346fe6060f1SDimitry Andric         MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3347fe6060f1SDimitry Andric     MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3348fe6060f1SDimitry Andric     StoreMI.eraseFromParent();
3349fe6060f1SDimitry Andric     return Legalized;
3350fe6060f1SDimitry Andric   }
3351fe6060f1SDimitry Andric 
3352349cc55cSDimitry Andric   if (MemTy.isVector()) {
3353349cc55cSDimitry Andric     // TODO: Handle vector trunc stores
3354349cc55cSDimitry Andric     if (MemTy != SrcTy)
3355349cc55cSDimitry Andric       return UnableToLegalize;
3356349cc55cSDimitry Andric 
3357349cc55cSDimitry Andric     // TODO: We can do better than scalarizing the vector and at least split it
3358349cc55cSDimitry Andric     // in half.
3359349cc55cSDimitry Andric     return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
3360349cc55cSDimitry Andric   }
3361349cc55cSDimitry Andric 
3362349cc55cSDimitry Andric   unsigned MemSizeInBits = MemTy.getSizeInBits();
3363349cc55cSDimitry Andric   uint64_t LargeSplitSize, SmallSplitSize;
3364349cc55cSDimitry Andric 
3365349cc55cSDimitry Andric   if (!isPowerOf2_32(MemSizeInBits)) {
336606c3fb27SDimitry Andric     LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
3367349cc55cSDimitry Andric     SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3368349cc55cSDimitry Andric   } else {
3369349cc55cSDimitry Andric     auto &Ctx = MF.getFunction().getContext();
3370349cc55cSDimitry Andric     if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
33718bcb0991SDimitry Andric       return UnableToLegalize; // Don't know what we're being asked to do.
33728bcb0991SDimitry Andric 
3373349cc55cSDimitry Andric     SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3374349cc55cSDimitry Andric   }
3375349cc55cSDimitry Andric 
3376fe6060f1SDimitry Andric   // Extend to the next pow-2. If this store was itself the result of lowering,
3377fe6060f1SDimitry Andric   // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3378349cc55cSDimitry Andric   // that's wider than the stored size.
3379349cc55cSDimitry Andric   unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
3380349cc55cSDimitry Andric   const LLT NewSrcTy = LLT::scalar(AnyExtSize);
3381349cc55cSDimitry Andric 
3382349cc55cSDimitry Andric   if (SrcTy.isPointer()) {
3383349cc55cSDimitry Andric     const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
3384349cc55cSDimitry Andric     SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
3385349cc55cSDimitry Andric   }
3386349cc55cSDimitry Andric 
3387fe6060f1SDimitry Andric   auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
33888bcb0991SDimitry Andric 
33898bcb0991SDimitry Andric   // Obtain the smaller value by shifting away the larger value.
3390fe6060f1SDimitry Andric   auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3391fe6060f1SDimitry Andric   auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
33928bcb0991SDimitry Andric 
3393480093f4SDimitry Andric   // Generate the PtrAdd and truncating stores.
33948bcb0991SDimitry Andric   LLT PtrTy = MRI.getType(PtrReg);
33955ffd83dbSDimitry Andric   auto OffsetCst = MIRBuilder.buildConstant(
33965ffd83dbSDimitry Andric     LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3397480093f4SDimitry Andric   auto SmallPtr =
3398349cc55cSDimitry Andric     MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
33998bcb0991SDimitry Andric 
34008bcb0991SDimitry Andric   MachineMemOperand *LargeMMO =
34018bcb0991SDimitry Andric     MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
34028bcb0991SDimitry Andric   MachineMemOperand *SmallMMO =
34038bcb0991SDimitry Andric     MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3404fe6060f1SDimitry Andric   MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3405fe6060f1SDimitry Andric   MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3406fe6060f1SDimitry Andric   StoreMI.eraseFromParent();
34078bcb0991SDimitry Andric   return Legalized;
34088bcb0991SDimitry Andric }
3409e8d8bef9SDimitry Andric 
3410e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
3411e8d8bef9SDimitry Andric LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3412e8d8bef9SDimitry Andric   switch (MI.getOpcode()) {
3413e8d8bef9SDimitry Andric   case TargetOpcode::G_LOAD: {
3414e8d8bef9SDimitry Andric     if (TypeIdx != 0)
3415e8d8bef9SDimitry Andric       return UnableToLegalize;
3416fe6060f1SDimitry Andric     MachineMemOperand &MMO = **MI.memoperands_begin();
3417fe6060f1SDimitry Andric 
3418fe6060f1SDimitry Andric     // Not sure how to interpret a bitcast of an extending load.
3419fe6060f1SDimitry Andric     if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3420fe6060f1SDimitry Andric       return UnableToLegalize;
3421e8d8bef9SDimitry Andric 
3422e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
3423e8d8bef9SDimitry Andric     bitcastDst(MI, CastTy, 0);
3424fe6060f1SDimitry Andric     MMO.setType(CastTy);
3425e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
3426e8d8bef9SDimitry Andric     return Legalized;
3427e8d8bef9SDimitry Andric   }
3428e8d8bef9SDimitry Andric   case TargetOpcode::G_STORE: {
3429e8d8bef9SDimitry Andric     if (TypeIdx != 0)
3430e8d8bef9SDimitry Andric       return UnableToLegalize;
3431e8d8bef9SDimitry Andric 
3432fe6060f1SDimitry Andric     MachineMemOperand &MMO = **MI.memoperands_begin();
3433fe6060f1SDimitry Andric 
3434fe6060f1SDimitry Andric     // Not sure how to interpret a bitcast of a truncating store.
3435fe6060f1SDimitry Andric     if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3436fe6060f1SDimitry Andric       return UnableToLegalize;
3437fe6060f1SDimitry Andric 
3438e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
3439e8d8bef9SDimitry Andric     bitcastSrc(MI, CastTy, 0);
3440fe6060f1SDimitry Andric     MMO.setType(CastTy);
3441e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
3442e8d8bef9SDimitry Andric     return Legalized;
3443e8d8bef9SDimitry Andric   }
3444e8d8bef9SDimitry Andric   case TargetOpcode::G_SELECT: {
3445e8d8bef9SDimitry Andric     if (TypeIdx != 0)
3446e8d8bef9SDimitry Andric       return UnableToLegalize;
3447e8d8bef9SDimitry Andric 
3448e8d8bef9SDimitry Andric     if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3449e8d8bef9SDimitry Andric       LLVM_DEBUG(
3450e8d8bef9SDimitry Andric           dbgs() << "bitcast action not implemented for vector select\n");
3451e8d8bef9SDimitry Andric       return UnableToLegalize;
3452e8d8bef9SDimitry Andric     }
3453e8d8bef9SDimitry Andric 
3454e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
3455e8d8bef9SDimitry Andric     bitcastSrc(MI, CastTy, 2);
3456e8d8bef9SDimitry Andric     bitcastSrc(MI, CastTy, 3);
3457e8d8bef9SDimitry Andric     bitcastDst(MI, CastTy, 0);
3458e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
3459e8d8bef9SDimitry Andric     return Legalized;
3460e8d8bef9SDimitry Andric   }
3461e8d8bef9SDimitry Andric   case TargetOpcode::G_AND:
3462e8d8bef9SDimitry Andric   case TargetOpcode::G_OR:
3463e8d8bef9SDimitry Andric   case TargetOpcode::G_XOR: {
3464e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
3465e8d8bef9SDimitry Andric     bitcastSrc(MI, CastTy, 1);
3466e8d8bef9SDimitry Andric     bitcastSrc(MI, CastTy, 2);
3467e8d8bef9SDimitry Andric     bitcastDst(MI, CastTy, 0);
3468e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
3469e8d8bef9SDimitry Andric     return Legalized;
3470e8d8bef9SDimitry Andric   }
3471e8d8bef9SDimitry Andric   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3472e8d8bef9SDimitry Andric     return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3473e8d8bef9SDimitry Andric   case TargetOpcode::G_INSERT_VECTOR_ELT:
3474e8d8bef9SDimitry Andric     return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3475e8d8bef9SDimitry Andric   default:
3476e8d8bef9SDimitry Andric     return UnableToLegalize;
3477e8d8bef9SDimitry Andric   }
3478e8d8bef9SDimitry Andric }
3479e8d8bef9SDimitry Andric 
3480e8d8bef9SDimitry Andric // Legalize an instruction by changing the opcode in place.
3481e8d8bef9SDimitry Andric void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3482e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
3483e8d8bef9SDimitry Andric     MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3484e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
3485e8d8bef9SDimitry Andric }
3486e8d8bef9SDimitry Andric 
3487e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
3488e8d8bef9SDimitry Andric LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3489e8d8bef9SDimitry Andric   using namespace TargetOpcode;
3490e8d8bef9SDimitry Andric 
3491e8d8bef9SDimitry Andric   switch(MI.getOpcode()) {
3492e8d8bef9SDimitry Andric   default:
3493e8d8bef9SDimitry Andric     return UnableToLegalize;
349406c3fb27SDimitry Andric   case TargetOpcode::G_FCONSTANT:
349506c3fb27SDimitry Andric     return lowerFConstant(MI);
3496e8d8bef9SDimitry Andric   case TargetOpcode::G_BITCAST:
3497e8d8bef9SDimitry Andric     return lowerBitcast(MI);
3498e8d8bef9SDimitry Andric   case TargetOpcode::G_SREM:
3499e8d8bef9SDimitry Andric   case TargetOpcode::G_UREM: {
3500e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3501e8d8bef9SDimitry Andric     auto Quot =
3502e8d8bef9SDimitry Andric         MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3503e8d8bef9SDimitry Andric                               {MI.getOperand(1), MI.getOperand(2)});
3504e8d8bef9SDimitry Andric 
3505e8d8bef9SDimitry Andric     auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3506e8d8bef9SDimitry Andric     MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3507e8d8bef9SDimitry Andric     MI.eraseFromParent();
3508e8d8bef9SDimitry Andric     return Legalized;
3509e8d8bef9SDimitry Andric   }
3510e8d8bef9SDimitry Andric   case TargetOpcode::G_SADDO:
3511e8d8bef9SDimitry Andric   case TargetOpcode::G_SSUBO:
3512e8d8bef9SDimitry Andric     return lowerSADDO_SSUBO(MI);
3513e8d8bef9SDimitry Andric   case TargetOpcode::G_UMULH:
3514e8d8bef9SDimitry Andric   case TargetOpcode::G_SMULH:
3515e8d8bef9SDimitry Andric     return lowerSMULH_UMULH(MI);
3516e8d8bef9SDimitry Andric   case TargetOpcode::G_SMULO:
3517e8d8bef9SDimitry Andric   case TargetOpcode::G_UMULO: {
3518e8d8bef9SDimitry Andric     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3519e8d8bef9SDimitry Andric     // result.
352006c3fb27SDimitry Andric     auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
3521e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(Res);
3522e8d8bef9SDimitry Andric 
3523e8d8bef9SDimitry Andric     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3524e8d8bef9SDimitry Andric                           ? TargetOpcode::G_SMULH
3525e8d8bef9SDimitry Andric                           : TargetOpcode::G_UMULH;
3526e8d8bef9SDimitry Andric 
3527e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
3528e8d8bef9SDimitry Andric     const auto &TII = MIRBuilder.getTII();
3529e8d8bef9SDimitry Andric     MI.setDesc(TII.get(TargetOpcode::G_MUL));
353081ad6265SDimitry Andric     MI.removeOperand(1);
3531e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
3532e8d8bef9SDimitry Andric 
3533e8d8bef9SDimitry Andric     auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3534e8d8bef9SDimitry Andric     auto Zero = MIRBuilder.buildConstant(Ty, 0);
3535e8d8bef9SDimitry Andric 
3536e8d8bef9SDimitry Andric     // Move insert point forward so we can use the Res register if needed.
3537e8d8bef9SDimitry Andric     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3538e8d8bef9SDimitry Andric 
3539e8d8bef9SDimitry Andric     // For *signed* multiply, overflow is detected by checking:
3540e8d8bef9SDimitry Andric     // (hi != (lo >> bitwidth-1))
3541e8d8bef9SDimitry Andric     if (Opcode == TargetOpcode::G_SMULH) {
3542e8d8bef9SDimitry Andric       auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3543e8d8bef9SDimitry Andric       auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3544e8d8bef9SDimitry Andric       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3545e8d8bef9SDimitry Andric     } else {
3546e8d8bef9SDimitry Andric       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3547e8d8bef9SDimitry Andric     }
3548e8d8bef9SDimitry Andric     return Legalized;
3549e8d8bef9SDimitry Andric   }
3550e8d8bef9SDimitry Andric   case TargetOpcode::G_FNEG: {
355106c3fb27SDimitry Andric     auto [Res, SubByReg] = MI.getFirst2Regs();
3552e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(Res);
3553e8d8bef9SDimitry Andric 
3554e8d8bef9SDimitry Andric     // TODO: Handle vector types once we are able to
3555e8d8bef9SDimitry Andric     // represent them.
3556e8d8bef9SDimitry Andric     if (Ty.isVector())
3557e8d8bef9SDimitry Andric       return UnableToLegalize;
3558e8d8bef9SDimitry Andric     auto SignMask =
3559e8d8bef9SDimitry Andric         MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
3560e8d8bef9SDimitry Andric     MIRBuilder.buildXor(Res, SubByReg, SignMask);
3561e8d8bef9SDimitry Andric     MI.eraseFromParent();
3562e8d8bef9SDimitry Andric     return Legalized;
3563e8d8bef9SDimitry Andric   }
3564bdd1243dSDimitry Andric   case TargetOpcode::G_FSUB:
3565bdd1243dSDimitry Andric   case TargetOpcode::G_STRICT_FSUB: {
356606c3fb27SDimitry Andric     auto [Res, LHS, RHS] = MI.getFirst3Regs();
3567e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(Res);
3568e8d8bef9SDimitry Andric 
3569e8d8bef9SDimitry Andric     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3570bdd1243dSDimitry Andric     auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
3571bdd1243dSDimitry Andric 
3572bdd1243dSDimitry Andric     if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3573bdd1243dSDimitry Andric       MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
3574bdd1243dSDimitry Andric     else
3575e8d8bef9SDimitry Andric       MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3576bdd1243dSDimitry Andric 
3577e8d8bef9SDimitry Andric     MI.eraseFromParent();
3578e8d8bef9SDimitry Andric     return Legalized;
3579e8d8bef9SDimitry Andric   }
3580e8d8bef9SDimitry Andric   case TargetOpcode::G_FMAD:
3581e8d8bef9SDimitry Andric     return lowerFMad(MI);
3582e8d8bef9SDimitry Andric   case TargetOpcode::G_FFLOOR:
3583e8d8bef9SDimitry Andric     return lowerFFloor(MI);
3584e8d8bef9SDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUND:
3585e8d8bef9SDimitry Andric     return lowerIntrinsicRound(MI);
3586*5f757f3fSDimitry Andric   case TargetOpcode::G_FRINT: {
3587e8d8bef9SDimitry Andric     // Since round even is the assumed rounding mode for unconstrained FP
3588e8d8bef9SDimitry Andric     // operations, rint and roundeven are the same operation.
3589*5f757f3fSDimitry Andric     changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
3590e8d8bef9SDimitry Andric     return Legalized;
3591e8d8bef9SDimitry Andric   }
3592e8d8bef9SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
359306c3fb27SDimitry Andric     auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
3594e8d8bef9SDimitry Andric     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
3595e8d8bef9SDimitry Andric                                   **MI.memoperands_begin());
3596e8d8bef9SDimitry Andric     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
3597e8d8bef9SDimitry Andric     MI.eraseFromParent();
3598e8d8bef9SDimitry Andric     return Legalized;
3599e8d8bef9SDimitry Andric   }
3600e8d8bef9SDimitry Andric   case TargetOpcode::G_LOAD:
3601e8d8bef9SDimitry Andric   case TargetOpcode::G_SEXTLOAD:
3602e8d8bef9SDimitry Andric   case TargetOpcode::G_ZEXTLOAD:
3603fe6060f1SDimitry Andric     return lowerLoad(cast<GAnyLoad>(MI));
3604e8d8bef9SDimitry Andric   case TargetOpcode::G_STORE:
3605fe6060f1SDimitry Andric     return lowerStore(cast<GStore>(MI));
36060b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
36070b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
36080b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ:
36090b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ:
36100b57cec5SDimitry Andric   case TargetOpcode::G_CTPOP:
3611e8d8bef9SDimitry Andric     return lowerBitCount(MI);
36120b57cec5SDimitry Andric   case G_UADDO: {
361306c3fb27SDimitry Andric     auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
36140b57cec5SDimitry Andric 
36150b57cec5SDimitry Andric     MIRBuilder.buildAdd(Res, LHS, RHS);
36160b57cec5SDimitry Andric     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
36170b57cec5SDimitry Andric 
36180b57cec5SDimitry Andric     MI.eraseFromParent();
36190b57cec5SDimitry Andric     return Legalized;
36200b57cec5SDimitry Andric   }
36210b57cec5SDimitry Andric   case G_UADDE: {
362206c3fb27SDimitry Andric     auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
3623*5f757f3fSDimitry Andric     const LLT CondTy = MRI.getType(CarryOut);
3624*5f757f3fSDimitry Andric     const LLT Ty = MRI.getType(Res);
36250b57cec5SDimitry Andric 
3626*5f757f3fSDimitry Andric     // Initial add of the two operands.
36275ffd83dbSDimitry Andric     auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3628*5f757f3fSDimitry Andric 
3629*5f757f3fSDimitry Andric     // Initial check for carry.
3630*5f757f3fSDimitry Andric     auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
3631*5f757f3fSDimitry Andric 
3632*5f757f3fSDimitry Andric     // Add the sum and the carry.
36335ffd83dbSDimitry Andric     auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
36340b57cec5SDimitry Andric     MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
3635*5f757f3fSDimitry Andric 
3636*5f757f3fSDimitry Andric     // Second check for carry. We can only carry if the initial sum is all 1s
3637*5f757f3fSDimitry Andric     // and the carry is set, resulting in a new sum of 0.
3638*5f757f3fSDimitry Andric     auto Zero = MIRBuilder.buildConstant(Ty, 0);
3639*5f757f3fSDimitry Andric     auto ResEqZero = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, Res, Zero);
3640*5f757f3fSDimitry Andric     auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
3641*5f757f3fSDimitry Andric     MIRBuilder.buildOr(CarryOut, Carry, Carry2);
36420b57cec5SDimitry Andric 
36430b57cec5SDimitry Andric     MI.eraseFromParent();
36440b57cec5SDimitry Andric     return Legalized;
36450b57cec5SDimitry Andric   }
36460b57cec5SDimitry Andric   case G_USUBO: {
364706c3fb27SDimitry Andric     auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
36480b57cec5SDimitry Andric 
36490b57cec5SDimitry Andric     MIRBuilder.buildSub(Res, LHS, RHS);
36500b57cec5SDimitry Andric     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
36510b57cec5SDimitry Andric 
36520b57cec5SDimitry Andric     MI.eraseFromParent();
36530b57cec5SDimitry Andric     return Legalized;
36540b57cec5SDimitry Andric   }
36550b57cec5SDimitry Andric   case G_USUBE: {
365606c3fb27SDimitry Andric     auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
36575ffd83dbSDimitry Andric     const LLT CondTy = MRI.getType(BorrowOut);
36585ffd83dbSDimitry Andric     const LLT Ty = MRI.getType(Res);
36590b57cec5SDimitry Andric 
3660*5f757f3fSDimitry Andric     // Initial subtract of the two operands.
36615ffd83dbSDimitry Andric     auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3662*5f757f3fSDimitry Andric 
3663*5f757f3fSDimitry Andric     // Initial check for borrow.
3664*5f757f3fSDimitry Andric     auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
3665*5f757f3fSDimitry Andric 
3666*5f757f3fSDimitry Andric     // Subtract the borrow from the first subtract.
36675ffd83dbSDimitry Andric     auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
36680b57cec5SDimitry Andric     MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
36695ffd83dbSDimitry Andric 
3670*5f757f3fSDimitry Andric     // Second check for borrow. We can only borrow if the initial difference is
3671*5f757f3fSDimitry Andric     // 0 and the borrow is set, resulting in a new difference of all 1s.
3672*5f757f3fSDimitry Andric     auto Zero = MIRBuilder.buildConstant(Ty, 0);
3673*5f757f3fSDimitry Andric     auto TmpResEqZero =
3674*5f757f3fSDimitry Andric         MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
3675*5f757f3fSDimitry Andric     auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
3676*5f757f3fSDimitry Andric     MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
36770b57cec5SDimitry Andric 
36780b57cec5SDimitry Andric     MI.eraseFromParent();
36790b57cec5SDimitry Andric     return Legalized;
36800b57cec5SDimitry Andric   }
36810b57cec5SDimitry Andric   case G_UITOFP:
3682e8d8bef9SDimitry Andric     return lowerUITOFP(MI);
36830b57cec5SDimitry Andric   case G_SITOFP:
3684e8d8bef9SDimitry Andric     return lowerSITOFP(MI);
36858bcb0991SDimitry Andric   case G_FPTOUI:
3686e8d8bef9SDimitry Andric     return lowerFPTOUI(MI);
36875ffd83dbSDimitry Andric   case G_FPTOSI:
36885ffd83dbSDimitry Andric     return lowerFPTOSI(MI);
36895ffd83dbSDimitry Andric   case G_FPTRUNC:
3690e8d8bef9SDimitry Andric     return lowerFPTRUNC(MI);
3691e8d8bef9SDimitry Andric   case G_FPOWI:
3692e8d8bef9SDimitry Andric     return lowerFPOWI(MI);
36930b57cec5SDimitry Andric   case G_SMIN:
36940b57cec5SDimitry Andric   case G_SMAX:
36950b57cec5SDimitry Andric   case G_UMIN:
36960b57cec5SDimitry Andric   case G_UMAX:
3697e8d8bef9SDimitry Andric     return lowerMinMax(MI);
36980b57cec5SDimitry Andric   case G_FCOPYSIGN:
3699e8d8bef9SDimitry Andric     return lowerFCopySign(MI);
37000b57cec5SDimitry Andric   case G_FMINNUM:
37010b57cec5SDimitry Andric   case G_FMAXNUM:
37020b57cec5SDimitry Andric     return lowerFMinNumMaxNum(MI);
37035ffd83dbSDimitry Andric   case G_MERGE_VALUES:
37045ffd83dbSDimitry Andric     return lowerMergeValues(MI);
37058bcb0991SDimitry Andric   case G_UNMERGE_VALUES:
37068bcb0991SDimitry Andric     return lowerUnmergeValues(MI);
37078bcb0991SDimitry Andric   case TargetOpcode::G_SEXT_INREG: {
37088bcb0991SDimitry Andric     assert(MI.getOperand(2).isImm() && "Expected immediate");
37098bcb0991SDimitry Andric     int64_t SizeInBits = MI.getOperand(2).getImm();
37108bcb0991SDimitry Andric 
371106c3fb27SDimitry Andric     auto [DstReg, SrcReg] = MI.getFirst2Regs();
37128bcb0991SDimitry Andric     LLT DstTy = MRI.getType(DstReg);
37138bcb0991SDimitry Andric     Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
37148bcb0991SDimitry Andric 
37158bcb0991SDimitry Andric     auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
37165ffd83dbSDimitry Andric     MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
37175ffd83dbSDimitry Andric     MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
37188bcb0991SDimitry Andric     MI.eraseFromParent();
37198bcb0991SDimitry Andric     return Legalized;
37208bcb0991SDimitry Andric   }
3721e8d8bef9SDimitry Andric   case G_EXTRACT_VECTOR_ELT:
3722e8d8bef9SDimitry Andric   case G_INSERT_VECTOR_ELT:
3723e8d8bef9SDimitry Andric     return lowerExtractInsertVectorElt(MI);
37248bcb0991SDimitry Andric   case G_SHUFFLE_VECTOR:
37258bcb0991SDimitry Andric     return lowerShuffleVector(MI);
37268bcb0991SDimitry Andric   case G_DYN_STACKALLOC:
37278bcb0991SDimitry Andric     return lowerDynStackAlloc(MI);
3728*5f757f3fSDimitry Andric   case G_STACKSAVE:
3729*5f757f3fSDimitry Andric     return lowerStackSave(MI);
3730*5f757f3fSDimitry Andric   case G_STACKRESTORE:
3731*5f757f3fSDimitry Andric     return lowerStackRestore(MI);
37328bcb0991SDimitry Andric   case G_EXTRACT:
37338bcb0991SDimitry Andric     return lowerExtract(MI);
37348bcb0991SDimitry Andric   case G_INSERT:
37358bcb0991SDimitry Andric     return lowerInsert(MI);
3736480093f4SDimitry Andric   case G_BSWAP:
3737480093f4SDimitry Andric     return lowerBswap(MI);
3738480093f4SDimitry Andric   case G_BITREVERSE:
3739480093f4SDimitry Andric     return lowerBitreverse(MI);
3740480093f4SDimitry Andric   case G_READ_REGISTER:
37415ffd83dbSDimitry Andric   case G_WRITE_REGISTER:
37425ffd83dbSDimitry Andric     return lowerReadWriteRegister(MI);
3743e8d8bef9SDimitry Andric   case G_UADDSAT:
3744e8d8bef9SDimitry Andric   case G_USUBSAT: {
3745e8d8bef9SDimitry Andric     // Try to make a reasonable guess about which lowering strategy to use. The
3746e8d8bef9SDimitry Andric     // target can override this with custom lowering and calling the
3747e8d8bef9SDimitry Andric     // implementation functions.
3748e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3749e8d8bef9SDimitry Andric     if (LI.isLegalOrCustom({G_UMIN, Ty}))
3750e8d8bef9SDimitry Andric       return lowerAddSubSatToMinMax(MI);
3751e8d8bef9SDimitry Andric     return lowerAddSubSatToAddoSubo(MI);
37520b57cec5SDimitry Andric   }
3753e8d8bef9SDimitry Andric   case G_SADDSAT:
3754e8d8bef9SDimitry Andric   case G_SSUBSAT: {
3755e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3756e8d8bef9SDimitry Andric 
3757e8d8bef9SDimitry Andric     // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3758e8d8bef9SDimitry Andric     // since it's a shorter expansion. However, we would need to figure out the
3759e8d8bef9SDimitry Andric     // preferred boolean type for the carry out for the query.
3760e8d8bef9SDimitry Andric     if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3761e8d8bef9SDimitry Andric       return lowerAddSubSatToMinMax(MI);
3762e8d8bef9SDimitry Andric     return lowerAddSubSatToAddoSubo(MI);
3763e8d8bef9SDimitry Andric   }
3764e8d8bef9SDimitry Andric   case G_SSHLSAT:
3765e8d8bef9SDimitry Andric   case G_USHLSAT:
3766e8d8bef9SDimitry Andric     return lowerShlSat(MI);
3767fe6060f1SDimitry Andric   case G_ABS:
3768fe6060f1SDimitry Andric     return lowerAbsToAddXor(MI);
3769e8d8bef9SDimitry Andric   case G_SELECT:
3770e8d8bef9SDimitry Andric     return lowerSelect(MI);
3771bdd1243dSDimitry Andric   case G_IS_FPCLASS:
3772bdd1243dSDimitry Andric     return lowerISFPCLASS(MI);
3773fe6060f1SDimitry Andric   case G_SDIVREM:
3774fe6060f1SDimitry Andric   case G_UDIVREM:
3775fe6060f1SDimitry Andric     return lowerDIVREM(MI);
3776fe6060f1SDimitry Andric   case G_FSHL:
3777fe6060f1SDimitry Andric   case G_FSHR:
3778fe6060f1SDimitry Andric     return lowerFunnelShift(MI);
3779fe6060f1SDimitry Andric   case G_ROTL:
3780fe6060f1SDimitry Andric   case G_ROTR:
3781fe6060f1SDimitry Andric     return lowerRotate(MI);
3782349cc55cSDimitry Andric   case G_MEMSET:
3783349cc55cSDimitry Andric   case G_MEMCPY:
3784349cc55cSDimitry Andric   case G_MEMMOVE:
3785349cc55cSDimitry Andric     return lowerMemCpyFamily(MI);
3786349cc55cSDimitry Andric   case G_MEMCPY_INLINE:
3787349cc55cSDimitry Andric     return lowerMemcpyInline(MI);
3788*5f757f3fSDimitry Andric   case G_ZEXT:
3789*5f757f3fSDimitry Andric   case G_SEXT:
3790*5f757f3fSDimitry Andric   case G_ANYEXT:
3791*5f757f3fSDimitry Andric     return lowerEXT(MI);
3792*5f757f3fSDimitry Andric   case G_TRUNC:
3793*5f757f3fSDimitry Andric     return lowerTRUNC(MI);
3794349cc55cSDimitry Andric   GISEL_VECREDUCE_CASES_NONSEQ
3795349cc55cSDimitry Andric     return lowerVectorReduction(MI);
3796*5f757f3fSDimitry Andric   case G_VAARG:
3797*5f757f3fSDimitry Andric     return lowerVAArg(MI);
3798e8d8bef9SDimitry Andric   }
3799e8d8bef9SDimitry Andric }
3800e8d8bef9SDimitry Andric 
3801e8d8bef9SDimitry Andric Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
3802e8d8bef9SDimitry Andric                                                   Align MinAlign) const {
3803e8d8bef9SDimitry Andric   // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3804e8d8bef9SDimitry Andric   // datalayout for the preferred alignment. Also there should be a target hook
3805e8d8bef9SDimitry Andric   // for this to allow targets to reduce the alignment and ignore the
3806e8d8bef9SDimitry Andric   // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3807e8d8bef9SDimitry Andric   // the type.
3808e8d8bef9SDimitry Andric   return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
3809e8d8bef9SDimitry Andric }
3810e8d8bef9SDimitry Andric 
3811e8d8bef9SDimitry Andric MachineInstrBuilder
3812e8d8bef9SDimitry Andric LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
3813e8d8bef9SDimitry Andric                                       MachinePointerInfo &PtrInfo) {
3814e8d8bef9SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
3815e8d8bef9SDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
3816e8d8bef9SDimitry Andric   int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3817e8d8bef9SDimitry Andric 
3818e8d8bef9SDimitry Andric   unsigned AddrSpace = DL.getAllocaAddrSpace();
3819e8d8bef9SDimitry Andric   LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3820e8d8bef9SDimitry Andric 
3821e8d8bef9SDimitry Andric   PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3822e8d8bef9SDimitry Andric   return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
3823e8d8bef9SDimitry Andric }
3824e8d8bef9SDimitry Andric 
3825e8d8bef9SDimitry Andric static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
3826e8d8bef9SDimitry Andric                                         LLT VecTy) {
3827e8d8bef9SDimitry Andric   int64_t IdxVal;
3828e8d8bef9SDimitry Andric   if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
3829e8d8bef9SDimitry Andric     return IdxReg;
3830e8d8bef9SDimitry Andric 
3831e8d8bef9SDimitry Andric   LLT IdxTy = B.getMRI()->getType(IdxReg);
3832e8d8bef9SDimitry Andric   unsigned NElts = VecTy.getNumElements();
3833e8d8bef9SDimitry Andric   if (isPowerOf2_32(NElts)) {
3834e8d8bef9SDimitry Andric     APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
3835e8d8bef9SDimitry Andric     return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
3836e8d8bef9SDimitry Andric   }
3837e8d8bef9SDimitry Andric 
3838e8d8bef9SDimitry Andric   return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
3839e8d8bef9SDimitry Andric       .getReg(0);
3840e8d8bef9SDimitry Andric }
3841e8d8bef9SDimitry Andric 
3842e8d8bef9SDimitry Andric Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
3843e8d8bef9SDimitry Andric                                                   Register Index) {
3844e8d8bef9SDimitry Andric   LLT EltTy = VecTy.getElementType();
3845e8d8bef9SDimitry Andric 
3846e8d8bef9SDimitry Andric   // Calculate the element offset and add it to the pointer.
3847e8d8bef9SDimitry Andric   unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
3848e8d8bef9SDimitry Andric   assert(EltSize * 8 == EltTy.getSizeInBits() &&
3849e8d8bef9SDimitry Andric          "Converting bits to bytes lost precision");
3850e8d8bef9SDimitry Andric 
3851e8d8bef9SDimitry Andric   Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
3852e8d8bef9SDimitry Andric 
3853e8d8bef9SDimitry Andric   LLT IdxTy = MRI.getType(Index);
3854e8d8bef9SDimitry Andric   auto Mul = MIRBuilder.buildMul(IdxTy, Index,
3855e8d8bef9SDimitry Andric                                  MIRBuilder.buildConstant(IdxTy, EltSize));
3856e8d8bef9SDimitry Andric 
3857e8d8bef9SDimitry Andric   LLT PtrTy = MRI.getType(VecPtr);
3858e8d8bef9SDimitry Andric   return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
38590b57cec5SDimitry Andric }
38600b57cec5SDimitry Andric 
38610eae32dcSDimitry Andric #ifndef NDEBUG
38620eae32dcSDimitry Andric /// Check that all vector operands have same number of elements. Other operands
38630eae32dcSDimitry Andric /// should be listed in NonVecOp.
38640eae32dcSDimitry Andric static bool hasSameNumEltsOnAllVectorOperands(
38650eae32dcSDimitry Andric     GenericMachineInstr &MI, MachineRegisterInfo &MRI,
38660eae32dcSDimitry Andric     std::initializer_list<unsigned> NonVecOpIndices) {
38670eae32dcSDimitry Andric   if (MI.getNumMemOperands() != 0)
38680eae32dcSDimitry Andric     return false;
38690b57cec5SDimitry Andric 
38700eae32dcSDimitry Andric   LLT VecTy = MRI.getType(MI.getReg(0));
38710eae32dcSDimitry Andric   if (!VecTy.isVector())
38720eae32dcSDimitry Andric     return false;
38730eae32dcSDimitry Andric   unsigned NumElts = VecTy.getNumElements();
38740b57cec5SDimitry Andric 
38750eae32dcSDimitry Andric   for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
38760eae32dcSDimitry Andric     MachineOperand &Op = MI.getOperand(OpIdx);
38770eae32dcSDimitry Andric     if (!Op.isReg()) {
38780eae32dcSDimitry Andric       if (!is_contained(NonVecOpIndices, OpIdx))
38790eae32dcSDimitry Andric         return false;
38800eae32dcSDimitry Andric       continue;
38810eae32dcSDimitry Andric     }
38820b57cec5SDimitry Andric 
38830eae32dcSDimitry Andric     LLT Ty = MRI.getType(Op.getReg());
38840eae32dcSDimitry Andric     if (!Ty.isVector()) {
38850eae32dcSDimitry Andric       if (!is_contained(NonVecOpIndices, OpIdx))
38860eae32dcSDimitry Andric         return false;
38870eae32dcSDimitry Andric       continue;
38880eae32dcSDimitry Andric     }
38890eae32dcSDimitry Andric 
38900eae32dcSDimitry Andric     if (Ty.getNumElements() != NumElts)
38910eae32dcSDimitry Andric       return false;
38920eae32dcSDimitry Andric   }
38930eae32dcSDimitry Andric 
38940eae32dcSDimitry Andric   return true;
38950eae32dcSDimitry Andric }
38960eae32dcSDimitry Andric #endif
38970eae32dcSDimitry Andric 
38980eae32dcSDimitry Andric /// Fill \p DstOps with DstOps that have same number of elements combined as
38990eae32dcSDimitry Andric /// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
39000eae32dcSDimitry Andric /// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
39010eae32dcSDimitry Andric /// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
39020eae32dcSDimitry Andric static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
39030eae32dcSDimitry Andric                        unsigned NumElts) {
39040eae32dcSDimitry Andric   LLT LeftoverTy;
39050eae32dcSDimitry Andric   assert(Ty.isVector() && "Expected vector type");
39060eae32dcSDimitry Andric   LLT EltTy = Ty.getElementType();
39070eae32dcSDimitry Andric   LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
39080eae32dcSDimitry Andric   int NumParts, NumLeftover;
39090eae32dcSDimitry Andric   std::tie(NumParts, NumLeftover) =
39100eae32dcSDimitry Andric       getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
39110eae32dcSDimitry Andric 
39120eae32dcSDimitry Andric   assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
39130eae32dcSDimitry Andric   for (int i = 0; i < NumParts; ++i) {
39140eae32dcSDimitry Andric     DstOps.push_back(NarrowTy);
39150eae32dcSDimitry Andric   }
39160eae32dcSDimitry Andric 
39170eae32dcSDimitry Andric   if (LeftoverTy.isValid()) {
39180eae32dcSDimitry Andric     assert(NumLeftover == 1 && "expected exactly one leftover");
39190eae32dcSDimitry Andric     DstOps.push_back(LeftoverTy);
39200eae32dcSDimitry Andric   }
39210eae32dcSDimitry Andric }
39220eae32dcSDimitry Andric 
39230eae32dcSDimitry Andric /// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
39240eae32dcSDimitry Andric /// made from \p Op depending on operand type.
39250eae32dcSDimitry Andric static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
39260eae32dcSDimitry Andric                            MachineOperand &Op) {
39270eae32dcSDimitry Andric   for (unsigned i = 0; i < N; ++i) {
39280eae32dcSDimitry Andric     if (Op.isReg())
39290eae32dcSDimitry Andric       Ops.push_back(Op.getReg());
39300eae32dcSDimitry Andric     else if (Op.isImm())
39310eae32dcSDimitry Andric       Ops.push_back(Op.getImm());
39320eae32dcSDimitry Andric     else if (Op.isPredicate())
39330eae32dcSDimitry Andric       Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
39340eae32dcSDimitry Andric     else
39350eae32dcSDimitry Andric       llvm_unreachable("Unsupported type");
39360eae32dcSDimitry Andric   }
39370b57cec5SDimitry Andric }
39380b57cec5SDimitry Andric 
39390b57cec5SDimitry Andric // Handle splitting vector operations which need to have the same number of
39400b57cec5SDimitry Andric // elements in each type index, but each type index may have a different element
39410b57cec5SDimitry Andric // type.
39420b57cec5SDimitry Andric //
39430b57cec5SDimitry Andric // e.g.  <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
39440b57cec5SDimitry Andric //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
39450b57cec5SDimitry Andric //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
39460b57cec5SDimitry Andric //
39470b57cec5SDimitry Andric // Also handles some irregular breakdown cases, e.g.
39480b57cec5SDimitry Andric // e.g.  <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
39490b57cec5SDimitry Andric //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
39500b57cec5SDimitry Andric //             s64 = G_SHL s64, s32
39510b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
39520b57cec5SDimitry Andric LegalizerHelper::fewerElementsVectorMultiEltType(
39530eae32dcSDimitry Andric     GenericMachineInstr &MI, unsigned NumElts,
39540eae32dcSDimitry Andric     std::initializer_list<unsigned> NonVecOpIndices) {
39550eae32dcSDimitry Andric   assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
39560eae32dcSDimitry Andric          "Non-compatible opcode or not specified non-vector operands");
39570eae32dcSDimitry Andric   unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
39580b57cec5SDimitry Andric 
39590eae32dcSDimitry Andric   unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
39600eae32dcSDimitry Andric   unsigned NumDefs = MI.getNumDefs();
39610b57cec5SDimitry Andric 
39620eae32dcSDimitry Andric   // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
39630eae32dcSDimitry Andric   // Build instructions with DstOps to use instruction found by CSE directly.
39640eae32dcSDimitry Andric   // CSE copies found instruction into given vreg when building with vreg dest.
39650eae32dcSDimitry Andric   SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
39660eae32dcSDimitry Andric   // Output registers will be taken from created instructions.
39670eae32dcSDimitry Andric   SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
39680eae32dcSDimitry Andric   for (unsigned i = 0; i < NumDefs; ++i) {
39690eae32dcSDimitry Andric     makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
39700b57cec5SDimitry Andric   }
39710b57cec5SDimitry Andric 
39720eae32dcSDimitry Andric   // Split vector input operands into sub-vectors with NumElts elts + Leftover.
39730eae32dcSDimitry Andric   // Operands listed in NonVecOpIndices will be used as is without splitting;
39740eae32dcSDimitry Andric   // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
39750eae32dcSDimitry Andric   // scalar condition (op 1), immediate in sext_inreg (op 2).
39760eae32dcSDimitry Andric   SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
39770eae32dcSDimitry Andric   for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
39780eae32dcSDimitry Andric        ++UseIdx, ++UseNo) {
39790eae32dcSDimitry Andric     if (is_contained(NonVecOpIndices, UseIdx)) {
39800eae32dcSDimitry Andric       broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
39810eae32dcSDimitry Andric                      MI.getOperand(UseIdx));
39820b57cec5SDimitry Andric     } else {
39830eae32dcSDimitry Andric       SmallVector<Register, 8> SplitPieces;
39840eae32dcSDimitry Andric       extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces);
39850eae32dcSDimitry Andric       for (auto Reg : SplitPieces)
39860eae32dcSDimitry Andric         InputOpsPieces[UseNo].push_back(Reg);
39870eae32dcSDimitry Andric     }
39880b57cec5SDimitry Andric   }
39890b57cec5SDimitry Andric 
39900eae32dcSDimitry Andric   unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
39910eae32dcSDimitry Andric 
39920eae32dcSDimitry Andric   // Take i-th piece of each input operand split and build sub-vector/scalar
39930eae32dcSDimitry Andric   // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
39940eae32dcSDimitry Andric   for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
39950eae32dcSDimitry Andric     SmallVector<DstOp, 2> Defs;
39960eae32dcSDimitry Andric     for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
39970eae32dcSDimitry Andric       Defs.push_back(OutputOpsPieces[DstNo][i]);
39980eae32dcSDimitry Andric 
39990eae32dcSDimitry Andric     SmallVector<SrcOp, 3> Uses;
40000eae32dcSDimitry Andric     for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
40010eae32dcSDimitry Andric       Uses.push_back(InputOpsPieces[InputNo][i]);
40020eae32dcSDimitry Andric 
40030eae32dcSDimitry Andric     auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
40040eae32dcSDimitry Andric     for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
40050eae32dcSDimitry Andric       OutputRegs[DstNo].push_back(I.getReg(DstNo));
40060b57cec5SDimitry Andric   }
40070b57cec5SDimitry Andric 
40080eae32dcSDimitry Andric   // Merge small outputs into MI's output for each def operand.
40090eae32dcSDimitry Andric   if (NumLeftovers) {
40100eae32dcSDimitry Andric     for (unsigned i = 0; i < NumDefs; ++i)
40110eae32dcSDimitry Andric       mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
40120eae32dcSDimitry Andric   } else {
40130eae32dcSDimitry Andric     for (unsigned i = 0; i < NumDefs; ++i)
4014bdd1243dSDimitry Andric       MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
40150eae32dcSDimitry Andric   }
40160b57cec5SDimitry Andric 
40170b57cec5SDimitry Andric   MI.eraseFromParent();
40180b57cec5SDimitry Andric   return Legalized;
40190b57cec5SDimitry Andric }
40200b57cec5SDimitry Andric 
40210b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
40220eae32dcSDimitry Andric LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
40230eae32dcSDimitry Andric                                         unsigned NumElts) {
40240eae32dcSDimitry Andric   unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
40250b57cec5SDimitry Andric 
40260eae32dcSDimitry Andric   unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
40270eae32dcSDimitry Andric   unsigned NumDefs = MI.getNumDefs();
40280b57cec5SDimitry Andric 
40290eae32dcSDimitry Andric   SmallVector<DstOp, 8> OutputOpsPieces;
40300eae32dcSDimitry Andric   SmallVector<Register, 8> OutputRegs;
40310eae32dcSDimitry Andric   makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
40320b57cec5SDimitry Andric 
40330eae32dcSDimitry Andric   // Instructions that perform register split will be inserted in basic block
40340eae32dcSDimitry Andric   // where register is defined (basic block is in the next operand).
40350eae32dcSDimitry Andric   SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
40360eae32dcSDimitry Andric   for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
40370eae32dcSDimitry Andric        UseIdx += 2, ++UseNo) {
40380eae32dcSDimitry Andric     MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
4039bdd1243dSDimitry Andric     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
40400eae32dcSDimitry Andric     extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
40410b57cec5SDimitry Andric   }
40420eae32dcSDimitry Andric 
40430eae32dcSDimitry Andric   // Build PHIs with fewer elements.
40440eae32dcSDimitry Andric   unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
40450eae32dcSDimitry Andric   MIRBuilder.setInsertPt(*MI.getParent(), MI);
40460eae32dcSDimitry Andric   for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
40470eae32dcSDimitry Andric     auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
40480eae32dcSDimitry Andric     Phi.addDef(
40490eae32dcSDimitry Andric         MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
40500eae32dcSDimitry Andric     OutputRegs.push_back(Phi.getReg(0));
40510eae32dcSDimitry Andric 
40520eae32dcSDimitry Andric     for (unsigned j = 0; j < NumInputs / 2; ++j) {
40530eae32dcSDimitry Andric       Phi.addUse(InputOpsPieces[j][i]);
40540eae32dcSDimitry Andric       Phi.add(MI.getOperand(1 + j * 2 + 1));
40550eae32dcSDimitry Andric     }
40560eae32dcSDimitry Andric   }
40570eae32dcSDimitry Andric 
40580eae32dcSDimitry Andric   // Merge small outputs into MI's def.
40590eae32dcSDimitry Andric   if (NumLeftovers) {
40600eae32dcSDimitry Andric     mergeMixedSubvectors(MI.getReg(0), OutputRegs);
40610eae32dcSDimitry Andric   } else {
4062bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
40630b57cec5SDimitry Andric   }
40640b57cec5SDimitry Andric 
40650b57cec5SDimitry Andric   MI.eraseFromParent();
40660b57cec5SDimitry Andric   return Legalized;
40670b57cec5SDimitry Andric }
40680b57cec5SDimitry Andric 
40690b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
40708bcb0991SDimitry Andric LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
40718bcb0991SDimitry Andric                                                   unsigned TypeIdx,
40728bcb0991SDimitry Andric                                                   LLT NarrowTy) {
40738bcb0991SDimitry Andric   const int NumDst = MI.getNumOperands() - 1;
40748bcb0991SDimitry Andric   const Register SrcReg = MI.getOperand(NumDst).getReg();
40750eae32dcSDimitry Andric   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
40768bcb0991SDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
40778bcb0991SDimitry Andric 
40780eae32dcSDimitry Andric   if (TypeIdx != 1 || NarrowTy == DstTy)
40798bcb0991SDimitry Andric     return UnableToLegalize;
40808bcb0991SDimitry Andric 
40810eae32dcSDimitry Andric   // Requires compatible types. Otherwise SrcReg should have been defined by
40820eae32dcSDimitry Andric   // merge-like instruction that would get artifact combined. Most likely
40830eae32dcSDimitry Andric   // instruction that defines SrcReg has to perform more/fewer elements
40840eae32dcSDimitry Andric   // legalization compatible with NarrowTy.
40850eae32dcSDimitry Andric   assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
40860eae32dcSDimitry Andric   assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
40878bcb0991SDimitry Andric 
40880eae32dcSDimitry Andric   if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
40890eae32dcSDimitry Andric       (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
40900eae32dcSDimitry Andric     return UnableToLegalize;
40910eae32dcSDimitry Andric 
40920eae32dcSDimitry Andric   // This is most likely DstTy (smaller then register size) packed in SrcTy
40930eae32dcSDimitry Andric   // (larger then register size) and since unmerge was not combined it will be
40940eae32dcSDimitry Andric   // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
40950eae32dcSDimitry Andric   // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
40960eae32dcSDimitry Andric 
40970eae32dcSDimitry Andric   // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
40980eae32dcSDimitry Andric   //
40990eae32dcSDimitry Andric   // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
41000eae32dcSDimitry Andric   // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
41010eae32dcSDimitry Andric   // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
41020eae32dcSDimitry Andric   auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
41038bcb0991SDimitry Andric   const int NumUnmerge = Unmerge->getNumOperands() - 1;
41048bcb0991SDimitry Andric   const int PartsPerUnmerge = NumDst / NumUnmerge;
41058bcb0991SDimitry Andric 
41068bcb0991SDimitry Andric   for (int I = 0; I != NumUnmerge; ++I) {
41078bcb0991SDimitry Andric     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
41088bcb0991SDimitry Andric 
41098bcb0991SDimitry Andric     for (int J = 0; J != PartsPerUnmerge; ++J)
41108bcb0991SDimitry Andric       MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
41118bcb0991SDimitry Andric     MIB.addUse(Unmerge.getReg(I));
41128bcb0991SDimitry Andric   }
41138bcb0991SDimitry Andric 
41148bcb0991SDimitry Andric   MI.eraseFromParent();
41158bcb0991SDimitry Andric   return Legalized;
41168bcb0991SDimitry Andric }
41178bcb0991SDimitry Andric 
4118fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
4119e8d8bef9SDimitry Andric LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
4120e8d8bef9SDimitry Andric                                           LLT NarrowTy) {
412106c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
41220eae32dcSDimitry Andric   // Requires compatible types. Otherwise user of DstReg did not perform unmerge
41230eae32dcSDimitry Andric   // that should have been artifact combined. Most likely instruction that uses
41240eae32dcSDimitry Andric   // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
41250eae32dcSDimitry Andric   assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
41260eae32dcSDimitry Andric   assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
41270eae32dcSDimitry Andric   if (NarrowTy == SrcTy)
41280eae32dcSDimitry Andric     return UnableToLegalize;
41298bcb0991SDimitry Andric 
41300eae32dcSDimitry Andric   // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
41310eae32dcSDimitry Andric   // is for old mir tests. Since the changes to more/fewer elements it should no
41320eae32dcSDimitry Andric   // longer be possible to generate MIR like this when starting from llvm-ir
41330eae32dcSDimitry Andric   // because LCMTy approach was replaced with merge/unmerge to vector elements.
41340eae32dcSDimitry Andric   if (TypeIdx == 1) {
41350eae32dcSDimitry Andric     assert(SrcTy.isVector() && "Expected vector types");
41360eae32dcSDimitry Andric     assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
41370eae32dcSDimitry Andric     if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
41380eae32dcSDimitry Andric         (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
41390eae32dcSDimitry Andric       return UnableToLegalize;
41400eae32dcSDimitry Andric     // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
41410eae32dcSDimitry Andric     //
41420eae32dcSDimitry Andric     // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
41430eae32dcSDimitry Andric     // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
41440eae32dcSDimitry Andric     // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
41450eae32dcSDimitry Andric     // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
41460eae32dcSDimitry Andric     // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
41470eae32dcSDimitry Andric     // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
4148e8d8bef9SDimitry Andric 
41490eae32dcSDimitry Andric     SmallVector<Register, 8> Elts;
41500eae32dcSDimitry Andric     LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
41510eae32dcSDimitry Andric     for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
41520eae32dcSDimitry Andric       auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
41530eae32dcSDimitry Andric       for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
41540eae32dcSDimitry Andric         Elts.push_back(Unmerge.getReg(j));
41550eae32dcSDimitry Andric     }
4156e8d8bef9SDimitry Andric 
41570eae32dcSDimitry Andric     SmallVector<Register, 8> NarrowTyElts;
41580eae32dcSDimitry Andric     unsigned NumNarrowTyElts = NarrowTy.getNumElements();
41590eae32dcSDimitry Andric     unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
41600eae32dcSDimitry Andric     for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
41610eae32dcSDimitry Andric          ++i, Offset += NumNarrowTyElts) {
41620eae32dcSDimitry Andric       ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
4163bdd1243dSDimitry Andric       NarrowTyElts.push_back(
4164bdd1243dSDimitry Andric           MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
41650eae32dcSDimitry Andric     }
4166e8d8bef9SDimitry Andric 
4167bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
41680eae32dcSDimitry Andric     MI.eraseFromParent();
41690eae32dcSDimitry Andric     return Legalized;
41700eae32dcSDimitry Andric   }
41710eae32dcSDimitry Andric 
41720eae32dcSDimitry Andric   assert(TypeIdx == 0 && "Bad type index");
41730eae32dcSDimitry Andric   if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
41740eae32dcSDimitry Andric       (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
41750eae32dcSDimitry Andric     return UnableToLegalize;
41760eae32dcSDimitry Andric 
41770eae32dcSDimitry Andric   // This is most likely SrcTy (smaller then register size) packed in DstTy
41780eae32dcSDimitry Andric   // (larger then register size) and since merge was not combined it will be
41790eae32dcSDimitry Andric   // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
41800eae32dcSDimitry Andric   // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
41810eae32dcSDimitry Andric 
41820eae32dcSDimitry Andric   // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
41830eae32dcSDimitry Andric   //
41840eae32dcSDimitry Andric   // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
41850eae32dcSDimitry Andric   // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
41860eae32dcSDimitry Andric   // %0:_(DstTy)  = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
41870eae32dcSDimitry Andric   SmallVector<Register, 8> NarrowTyElts;
41880eae32dcSDimitry Andric   unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
41890eae32dcSDimitry Andric   unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
41900eae32dcSDimitry Andric   unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
41910eae32dcSDimitry Andric   for (unsigned i = 0; i < NumParts; ++i) {
41920eae32dcSDimitry Andric     SmallVector<Register, 8> Sources;
41930eae32dcSDimitry Andric     for (unsigned j = 0; j < NumElts; ++j)
41940eae32dcSDimitry Andric       Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
4195bdd1243dSDimitry Andric     NarrowTyElts.push_back(
4196bdd1243dSDimitry Andric         MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
41970eae32dcSDimitry Andric   }
41980eae32dcSDimitry Andric 
4199bdd1243dSDimitry Andric   MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4200e8d8bef9SDimitry Andric   MI.eraseFromParent();
4201e8d8bef9SDimitry Andric   return Legalized;
42028bcb0991SDimitry Andric }
42038bcb0991SDimitry Andric 
4204e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
4205e8d8bef9SDimitry Andric LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
4206e8d8bef9SDimitry Andric                                                            unsigned TypeIdx,
4207e8d8bef9SDimitry Andric                                                            LLT NarrowVecTy) {
420806c3fb27SDimitry Andric   auto [DstReg, SrcVec] = MI.getFirst2Regs();
4209e8d8bef9SDimitry Andric   Register InsertVal;
4210e8d8bef9SDimitry Andric   bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4211e8d8bef9SDimitry Andric 
4212e8d8bef9SDimitry Andric   assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
4213e8d8bef9SDimitry Andric   if (IsInsert)
4214e8d8bef9SDimitry Andric     InsertVal = MI.getOperand(2).getReg();
4215e8d8bef9SDimitry Andric 
4216e8d8bef9SDimitry Andric   Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
4217e8d8bef9SDimitry Andric 
4218e8d8bef9SDimitry Andric   // TODO: Handle total scalarization case.
4219e8d8bef9SDimitry Andric   if (!NarrowVecTy.isVector())
4220e8d8bef9SDimitry Andric     return UnableToLegalize;
4221e8d8bef9SDimitry Andric 
4222e8d8bef9SDimitry Andric   LLT VecTy = MRI.getType(SrcVec);
4223e8d8bef9SDimitry Andric 
4224e8d8bef9SDimitry Andric   // If the index is a constant, we can really break this down as you would
4225e8d8bef9SDimitry Andric   // expect, and index into the target size pieces.
4226e8d8bef9SDimitry Andric   int64_t IdxVal;
4227349cc55cSDimitry Andric   auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
4228fe6060f1SDimitry Andric   if (MaybeCst) {
4229fe6060f1SDimitry Andric     IdxVal = MaybeCst->Value.getSExtValue();
4230e8d8bef9SDimitry Andric     // Avoid out of bounds indexing the pieces.
4231e8d8bef9SDimitry Andric     if (IdxVal >= VecTy.getNumElements()) {
4232e8d8bef9SDimitry Andric       MIRBuilder.buildUndef(DstReg);
4233e8d8bef9SDimitry Andric       MI.eraseFromParent();
4234e8d8bef9SDimitry Andric       return Legalized;
42358bcb0991SDimitry Andric     }
42368bcb0991SDimitry Andric 
4237e8d8bef9SDimitry Andric     SmallVector<Register, 8> VecParts;
4238e8d8bef9SDimitry Andric     LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4239e8d8bef9SDimitry Andric 
4240e8d8bef9SDimitry Andric     // Build a sequence of NarrowTy pieces in VecParts for this operand.
4241e8d8bef9SDimitry Andric     LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4242e8d8bef9SDimitry Andric                                     TargetOpcode::G_ANYEXT);
4243e8d8bef9SDimitry Andric 
4244e8d8bef9SDimitry Andric     unsigned NewNumElts = NarrowVecTy.getNumElements();
4245e8d8bef9SDimitry Andric 
4246e8d8bef9SDimitry Andric     LLT IdxTy = MRI.getType(Idx);
4247e8d8bef9SDimitry Andric     int64_t PartIdx = IdxVal / NewNumElts;
4248e8d8bef9SDimitry Andric     auto NewIdx =
4249e8d8bef9SDimitry Andric         MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
4250e8d8bef9SDimitry Andric 
4251e8d8bef9SDimitry Andric     if (IsInsert) {
4252e8d8bef9SDimitry Andric       LLT PartTy = MRI.getType(VecParts[PartIdx]);
4253e8d8bef9SDimitry Andric 
4254e8d8bef9SDimitry Andric       // Use the adjusted index to insert into one of the subvectors.
4255e8d8bef9SDimitry Andric       auto InsertPart = MIRBuilder.buildInsertVectorElement(
4256e8d8bef9SDimitry Andric           PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4257e8d8bef9SDimitry Andric       VecParts[PartIdx] = InsertPart.getReg(0);
4258e8d8bef9SDimitry Andric 
4259e8d8bef9SDimitry Andric       // Recombine the inserted subvector with the others to reform the result
4260e8d8bef9SDimitry Andric       // vector.
4261e8d8bef9SDimitry Andric       buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4262e8d8bef9SDimitry Andric     } else {
4263e8d8bef9SDimitry Andric       MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
42648bcb0991SDimitry Andric     }
42658bcb0991SDimitry Andric 
42668bcb0991SDimitry Andric     MI.eraseFromParent();
42678bcb0991SDimitry Andric     return Legalized;
42688bcb0991SDimitry Andric   }
42698bcb0991SDimitry Andric 
4270e8d8bef9SDimitry Andric   // With a variable index, we can't perform the operation in a smaller type, so
4271e8d8bef9SDimitry Andric   // we're forced to expand this.
4272e8d8bef9SDimitry Andric   //
4273e8d8bef9SDimitry Andric   // TODO: We could emit a chain of compare/select to figure out which piece to
4274e8d8bef9SDimitry Andric   // index.
4275e8d8bef9SDimitry Andric   return lowerExtractInsertVectorElt(MI);
4276e8d8bef9SDimitry Andric }
4277e8d8bef9SDimitry Andric 
42788bcb0991SDimitry Andric LegalizerHelper::LegalizeResult
4279fe6060f1SDimitry Andric LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
42800b57cec5SDimitry Andric                                       LLT NarrowTy) {
42810b57cec5SDimitry Andric   // FIXME: Don't know how to handle secondary types yet.
42820b57cec5SDimitry Andric   if (TypeIdx != 0)
42830b57cec5SDimitry Andric     return UnableToLegalize;
42840b57cec5SDimitry Andric 
42850b57cec5SDimitry Andric   // This implementation doesn't work for atomics. Give up instead of doing
42860b57cec5SDimitry Andric   // something invalid.
4287fe6060f1SDimitry Andric   if (LdStMI.isAtomic())
42880b57cec5SDimitry Andric     return UnableToLegalize;
42890b57cec5SDimitry Andric 
4290fe6060f1SDimitry Andric   bool IsLoad = isa<GLoad>(LdStMI);
4291fe6060f1SDimitry Andric   Register ValReg = LdStMI.getReg(0);
4292fe6060f1SDimitry Andric   Register AddrReg = LdStMI.getPointerReg();
42930b57cec5SDimitry Andric   LLT ValTy = MRI.getType(ValReg);
42940b57cec5SDimitry Andric 
42955ffd83dbSDimitry Andric   // FIXME: Do we need a distinct NarrowMemory legalize action?
4296fe6060f1SDimitry Andric   if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) {
42975ffd83dbSDimitry Andric     LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
42985ffd83dbSDimitry Andric     return UnableToLegalize;
42995ffd83dbSDimitry Andric   }
43005ffd83dbSDimitry Andric 
43010b57cec5SDimitry Andric   int NumParts = -1;
43020b57cec5SDimitry Andric   int NumLeftover = -1;
43030b57cec5SDimitry Andric   LLT LeftoverTy;
43040b57cec5SDimitry Andric   SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
43050b57cec5SDimitry Andric   if (IsLoad) {
43060b57cec5SDimitry Andric     std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
43070b57cec5SDimitry Andric   } else {
43080b57cec5SDimitry Andric     if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
43090b57cec5SDimitry Andric                      NarrowLeftoverRegs)) {
43100b57cec5SDimitry Andric       NumParts = NarrowRegs.size();
43110b57cec5SDimitry Andric       NumLeftover = NarrowLeftoverRegs.size();
43120b57cec5SDimitry Andric     }
43130b57cec5SDimitry Andric   }
43140b57cec5SDimitry Andric 
43150b57cec5SDimitry Andric   if (NumParts == -1)
43160b57cec5SDimitry Andric     return UnableToLegalize;
43170b57cec5SDimitry Andric 
4318e8d8bef9SDimitry Andric   LLT PtrTy = MRI.getType(AddrReg);
4319e8d8bef9SDimitry Andric   const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
43200b57cec5SDimitry Andric 
43210b57cec5SDimitry Andric   unsigned TotalSize = ValTy.getSizeInBits();
43220b57cec5SDimitry Andric 
43230b57cec5SDimitry Andric   // Split the load/store into PartTy sized pieces starting at Offset. If this
43240b57cec5SDimitry Andric   // is a load, return the new registers in ValRegs. For a store, each elements
43250b57cec5SDimitry Andric   // of ValRegs should be PartTy. Returns the next offset that needs to be
43260b57cec5SDimitry Andric   // handled.
432781ad6265SDimitry Andric   bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
4328fe6060f1SDimitry Andric   auto MMO = LdStMI.getMMO();
43290b57cec5SDimitry Andric   auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
433081ad6265SDimitry Andric                              unsigned NumParts, unsigned Offset) -> unsigned {
43310b57cec5SDimitry Andric     MachineFunction &MF = MIRBuilder.getMF();
43320b57cec5SDimitry Andric     unsigned PartSize = PartTy.getSizeInBits();
43330b57cec5SDimitry Andric     for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
433481ad6265SDimitry Andric          ++Idx) {
43350b57cec5SDimitry Andric       unsigned ByteOffset = Offset / 8;
43360b57cec5SDimitry Andric       Register NewAddrReg;
43370b57cec5SDimitry Andric 
4338480093f4SDimitry Andric       MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
43390b57cec5SDimitry Andric 
43400b57cec5SDimitry Andric       MachineMemOperand *NewMMO =
4341fe6060f1SDimitry Andric           MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
43420b57cec5SDimitry Andric 
43430b57cec5SDimitry Andric       if (IsLoad) {
43440b57cec5SDimitry Andric         Register Dst = MRI.createGenericVirtualRegister(PartTy);
43450b57cec5SDimitry Andric         ValRegs.push_back(Dst);
43460b57cec5SDimitry Andric         MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
43470b57cec5SDimitry Andric       } else {
43480b57cec5SDimitry Andric         MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
43490b57cec5SDimitry Andric       }
435081ad6265SDimitry Andric       Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
43510b57cec5SDimitry Andric     }
43520b57cec5SDimitry Andric 
43530b57cec5SDimitry Andric     return Offset;
43540b57cec5SDimitry Andric   };
43550b57cec5SDimitry Andric 
435681ad6265SDimitry Andric   unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
435781ad6265SDimitry Andric   unsigned HandledOffset =
435881ad6265SDimitry Andric       splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
43590b57cec5SDimitry Andric 
43600b57cec5SDimitry Andric   // Handle the rest of the register if this isn't an even type breakdown.
43610b57cec5SDimitry Andric   if (LeftoverTy.isValid())
436281ad6265SDimitry Andric     splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
43630b57cec5SDimitry Andric 
43640b57cec5SDimitry Andric   if (IsLoad) {
43650b57cec5SDimitry Andric     insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
43660b57cec5SDimitry Andric                 LeftoverTy, NarrowLeftoverRegs);
43670b57cec5SDimitry Andric   }
43680b57cec5SDimitry Andric 
4369fe6060f1SDimitry Andric   LdStMI.eraseFromParent();
43700b57cec5SDimitry Andric   return Legalized;
43710b57cec5SDimitry Andric }
43720b57cec5SDimitry Andric 
43730b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
43740b57cec5SDimitry Andric LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
43750b57cec5SDimitry Andric                                      LLT NarrowTy) {
43760b57cec5SDimitry Andric   using namespace TargetOpcode;
43770eae32dcSDimitry Andric   GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
43780eae32dcSDimitry Andric   unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
43790b57cec5SDimitry Andric 
43800b57cec5SDimitry Andric   switch (MI.getOpcode()) {
43810b57cec5SDimitry Andric   case G_IMPLICIT_DEF:
43825ffd83dbSDimitry Andric   case G_TRUNC:
43830b57cec5SDimitry Andric   case G_AND:
43840b57cec5SDimitry Andric   case G_OR:
43850b57cec5SDimitry Andric   case G_XOR:
43860b57cec5SDimitry Andric   case G_ADD:
43870b57cec5SDimitry Andric   case G_SUB:
43880b57cec5SDimitry Andric   case G_MUL:
4389e8d8bef9SDimitry Andric   case G_PTR_ADD:
43900b57cec5SDimitry Andric   case G_SMULH:
43910b57cec5SDimitry Andric   case G_UMULH:
43920b57cec5SDimitry Andric   case G_FADD:
43930b57cec5SDimitry Andric   case G_FMUL:
43940b57cec5SDimitry Andric   case G_FSUB:
43950b57cec5SDimitry Andric   case G_FNEG:
43960b57cec5SDimitry Andric   case G_FABS:
43970b57cec5SDimitry Andric   case G_FCANONICALIZE:
43980b57cec5SDimitry Andric   case G_FDIV:
43990b57cec5SDimitry Andric   case G_FREM:
44000b57cec5SDimitry Andric   case G_FMA:
44018bcb0991SDimitry Andric   case G_FMAD:
44020b57cec5SDimitry Andric   case G_FPOW:
44030b57cec5SDimitry Andric   case G_FEXP:
44040b57cec5SDimitry Andric   case G_FEXP2:
4405*5f757f3fSDimitry Andric   case G_FEXP10:
44060b57cec5SDimitry Andric   case G_FLOG:
44070b57cec5SDimitry Andric   case G_FLOG2:
44080b57cec5SDimitry Andric   case G_FLOG10:
440906c3fb27SDimitry Andric   case G_FLDEXP:
44100b57cec5SDimitry Andric   case G_FNEARBYINT:
44110b57cec5SDimitry Andric   case G_FCEIL:
44120b57cec5SDimitry Andric   case G_FFLOOR:
44130b57cec5SDimitry Andric   case G_FRINT:
44140b57cec5SDimitry Andric   case G_INTRINSIC_ROUND:
4415e8d8bef9SDimitry Andric   case G_INTRINSIC_ROUNDEVEN:
44160b57cec5SDimitry Andric   case G_INTRINSIC_TRUNC:
44170b57cec5SDimitry Andric   case G_FCOS:
44180b57cec5SDimitry Andric   case G_FSIN:
44190b57cec5SDimitry Andric   case G_FSQRT:
44200b57cec5SDimitry Andric   case G_BSWAP:
44218bcb0991SDimitry Andric   case G_BITREVERSE:
44220b57cec5SDimitry Andric   case G_SDIV:
4423480093f4SDimitry Andric   case G_UDIV:
4424480093f4SDimitry Andric   case G_SREM:
4425480093f4SDimitry Andric   case G_UREM:
4426fe6060f1SDimitry Andric   case G_SDIVREM:
4427fe6060f1SDimitry Andric   case G_UDIVREM:
44280b57cec5SDimitry Andric   case G_SMIN:
44290b57cec5SDimitry Andric   case G_SMAX:
44300b57cec5SDimitry Andric   case G_UMIN:
44310b57cec5SDimitry Andric   case G_UMAX:
4432fe6060f1SDimitry Andric   case G_ABS:
44330b57cec5SDimitry Andric   case G_FMINNUM:
44340b57cec5SDimitry Andric   case G_FMAXNUM:
44350b57cec5SDimitry Andric   case G_FMINNUM_IEEE:
44360b57cec5SDimitry Andric   case G_FMAXNUM_IEEE:
44370b57cec5SDimitry Andric   case G_FMINIMUM:
44380b57cec5SDimitry Andric   case G_FMAXIMUM:
44395ffd83dbSDimitry Andric   case G_FSHL:
44405ffd83dbSDimitry Andric   case G_FSHR:
4441349cc55cSDimitry Andric   case G_ROTL:
4442349cc55cSDimitry Andric   case G_ROTR:
44435ffd83dbSDimitry Andric   case G_FREEZE:
44445ffd83dbSDimitry Andric   case G_SADDSAT:
44455ffd83dbSDimitry Andric   case G_SSUBSAT:
44465ffd83dbSDimitry Andric   case G_UADDSAT:
44475ffd83dbSDimitry Andric   case G_USUBSAT:
4448fe6060f1SDimitry Andric   case G_UMULO:
4449fe6060f1SDimitry Andric   case G_SMULO:
44500b57cec5SDimitry Andric   case G_SHL:
44510b57cec5SDimitry Andric   case G_LSHR:
44520b57cec5SDimitry Andric   case G_ASHR:
4453e8d8bef9SDimitry Andric   case G_SSHLSAT:
4454e8d8bef9SDimitry Andric   case G_USHLSAT:
44550b57cec5SDimitry Andric   case G_CTLZ:
44560b57cec5SDimitry Andric   case G_CTLZ_ZERO_UNDEF:
44570b57cec5SDimitry Andric   case G_CTTZ:
44580b57cec5SDimitry Andric   case G_CTTZ_ZERO_UNDEF:
44590b57cec5SDimitry Andric   case G_CTPOP:
44600b57cec5SDimitry Andric   case G_FCOPYSIGN:
44610b57cec5SDimitry Andric   case G_ZEXT:
44620b57cec5SDimitry Andric   case G_SEXT:
44630b57cec5SDimitry Andric   case G_ANYEXT:
44640b57cec5SDimitry Andric   case G_FPEXT:
44650b57cec5SDimitry Andric   case G_FPTRUNC:
44660b57cec5SDimitry Andric   case G_SITOFP:
44670b57cec5SDimitry Andric   case G_UITOFP:
44680b57cec5SDimitry Andric   case G_FPTOSI:
44690b57cec5SDimitry Andric   case G_FPTOUI:
44700b57cec5SDimitry Andric   case G_INTTOPTR:
44710b57cec5SDimitry Andric   case G_PTRTOINT:
44720b57cec5SDimitry Andric   case G_ADDRSPACE_CAST:
447381ad6265SDimitry Andric   case G_UADDO:
447481ad6265SDimitry Andric   case G_USUBO:
447581ad6265SDimitry Andric   case G_UADDE:
447681ad6265SDimitry Andric   case G_USUBE:
447781ad6265SDimitry Andric   case G_SADDO:
447881ad6265SDimitry Andric   case G_SSUBO:
447981ad6265SDimitry Andric   case G_SADDE:
448081ad6265SDimitry Andric   case G_SSUBE:
4481bdd1243dSDimitry Andric   case G_STRICT_FADD:
4482bdd1243dSDimitry Andric   case G_STRICT_FSUB:
4483bdd1243dSDimitry Andric   case G_STRICT_FMUL:
4484bdd1243dSDimitry Andric   case G_STRICT_FMA:
448506c3fb27SDimitry Andric   case G_STRICT_FLDEXP:
448606c3fb27SDimitry Andric   case G_FFREXP:
44870eae32dcSDimitry Andric     return fewerElementsVectorMultiEltType(GMI, NumElts);
44880b57cec5SDimitry Andric   case G_ICMP:
44890b57cec5SDimitry Andric   case G_FCMP:
44900eae32dcSDimitry Andric     return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
4491bdd1243dSDimitry Andric   case G_IS_FPCLASS:
4492bdd1243dSDimitry Andric     return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
44930b57cec5SDimitry Andric   case G_SELECT:
44940eae32dcSDimitry Andric     if (MRI.getType(MI.getOperand(1).getReg()).isVector())
44950eae32dcSDimitry Andric       return fewerElementsVectorMultiEltType(GMI, NumElts);
44960eae32dcSDimitry Andric     return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
44970b57cec5SDimitry Andric   case G_PHI:
44980eae32dcSDimitry Andric     return fewerElementsVectorPhi(GMI, NumElts);
44998bcb0991SDimitry Andric   case G_UNMERGE_VALUES:
45008bcb0991SDimitry Andric     return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
45018bcb0991SDimitry Andric   case G_BUILD_VECTOR:
4502e8d8bef9SDimitry Andric     assert(TypeIdx == 0 && "not a vector type index");
4503e8d8bef9SDimitry Andric     return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4504e8d8bef9SDimitry Andric   case G_CONCAT_VECTORS:
4505e8d8bef9SDimitry Andric     if (TypeIdx != 1) // TODO: This probably does work as expected already.
4506e8d8bef9SDimitry Andric       return UnableToLegalize;
4507e8d8bef9SDimitry Andric     return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4508e8d8bef9SDimitry Andric   case G_EXTRACT_VECTOR_ELT:
4509e8d8bef9SDimitry Andric   case G_INSERT_VECTOR_ELT:
4510e8d8bef9SDimitry Andric     return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
45110b57cec5SDimitry Andric   case G_LOAD:
45120b57cec5SDimitry Andric   case G_STORE:
4513fe6060f1SDimitry Andric     return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
45145ffd83dbSDimitry Andric   case G_SEXT_INREG:
45150eae32dcSDimitry Andric     return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
4516fe6060f1SDimitry Andric   GISEL_VECREDUCE_CASES_NONSEQ
4517fe6060f1SDimitry Andric     return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4518fe6060f1SDimitry Andric   case G_SHUFFLE_VECTOR:
4519fe6060f1SDimitry Andric     return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
45200b57cec5SDimitry Andric   default:
45210b57cec5SDimitry Andric     return UnableToLegalize;
45220b57cec5SDimitry Andric   }
45230b57cec5SDimitry Andric }
45240b57cec5SDimitry Andric 
4525fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
4526fe6060f1SDimitry Andric     MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4527fe6060f1SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4528fe6060f1SDimitry Andric   if (TypeIdx != 0)
4529fe6060f1SDimitry Andric     return UnableToLegalize;
4530fe6060f1SDimitry Andric 
453106c3fb27SDimitry Andric   auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
453206c3fb27SDimitry Andric       MI.getFirst3RegLLTs();
4533fe6060f1SDimitry Andric   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4534fe6060f1SDimitry Andric   // The shuffle should be canonicalized by now.
4535fe6060f1SDimitry Andric   if (DstTy != Src1Ty)
4536fe6060f1SDimitry Andric     return UnableToLegalize;
4537fe6060f1SDimitry Andric   if (DstTy != Src2Ty)
4538fe6060f1SDimitry Andric     return UnableToLegalize;
4539fe6060f1SDimitry Andric 
4540fe6060f1SDimitry Andric   if (!isPowerOf2_32(DstTy.getNumElements()))
4541fe6060f1SDimitry Andric     return UnableToLegalize;
4542fe6060f1SDimitry Andric 
4543fe6060f1SDimitry Andric   // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4544fe6060f1SDimitry Andric   // Further legalization attempts will be needed to do split further.
4545fe6060f1SDimitry Andric   NarrowTy =
4546fe6060f1SDimitry Andric       DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
4547fe6060f1SDimitry Andric   unsigned NewElts = NarrowTy.getNumElements();
4548fe6060f1SDimitry Andric 
4549fe6060f1SDimitry Andric   SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
4550fe6060f1SDimitry Andric   extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
4551fe6060f1SDimitry Andric   extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
4552fe6060f1SDimitry Andric   Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4553fe6060f1SDimitry Andric                         SplitSrc2Regs[1]};
4554fe6060f1SDimitry Andric 
4555fe6060f1SDimitry Andric   Register Hi, Lo;
4556fe6060f1SDimitry Andric 
4557fe6060f1SDimitry Andric   // If Lo or Hi uses elements from at most two of the four input vectors, then
4558fe6060f1SDimitry Andric   // express it as a vector shuffle of those two inputs.  Otherwise extract the
4559fe6060f1SDimitry Andric   // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4560fe6060f1SDimitry Andric   SmallVector<int, 16> Ops;
4561fe6060f1SDimitry Andric   for (unsigned High = 0; High < 2; ++High) {
4562fe6060f1SDimitry Andric     Register &Output = High ? Hi : Lo;
4563fe6060f1SDimitry Andric 
4564fe6060f1SDimitry Andric     // Build a shuffle mask for the output, discovering on the fly which
4565fe6060f1SDimitry Andric     // input vectors to use as shuffle operands (recorded in InputUsed).
4566fe6060f1SDimitry Andric     // If building a suitable shuffle vector proves too hard, then bail
4567fe6060f1SDimitry Andric     // out with useBuildVector set.
4568fe6060f1SDimitry Andric     unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4569fe6060f1SDimitry Andric     unsigned FirstMaskIdx = High * NewElts;
4570fe6060f1SDimitry Andric     bool UseBuildVector = false;
4571fe6060f1SDimitry Andric     for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4572fe6060f1SDimitry Andric       // The mask element.  This indexes into the input.
4573fe6060f1SDimitry Andric       int Idx = Mask[FirstMaskIdx + MaskOffset];
4574fe6060f1SDimitry Andric 
4575fe6060f1SDimitry Andric       // The input vector this mask element indexes into.
4576fe6060f1SDimitry Andric       unsigned Input = (unsigned)Idx / NewElts;
4577fe6060f1SDimitry Andric 
4578bdd1243dSDimitry Andric       if (Input >= std::size(Inputs)) {
4579fe6060f1SDimitry Andric         // The mask element does not index into any input vector.
4580fe6060f1SDimitry Andric         Ops.push_back(-1);
4581fe6060f1SDimitry Andric         continue;
4582fe6060f1SDimitry Andric       }
4583fe6060f1SDimitry Andric 
4584fe6060f1SDimitry Andric       // Turn the index into an offset from the start of the input vector.
4585fe6060f1SDimitry Andric       Idx -= Input * NewElts;
4586fe6060f1SDimitry Andric 
4587fe6060f1SDimitry Andric       // Find or create a shuffle vector operand to hold this input.
4588fe6060f1SDimitry Andric       unsigned OpNo;
4589bdd1243dSDimitry Andric       for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
4590fe6060f1SDimitry Andric         if (InputUsed[OpNo] == Input) {
4591fe6060f1SDimitry Andric           // This input vector is already an operand.
4592fe6060f1SDimitry Andric           break;
4593fe6060f1SDimitry Andric         } else if (InputUsed[OpNo] == -1U) {
4594fe6060f1SDimitry Andric           // Create a new operand for this input vector.
4595fe6060f1SDimitry Andric           InputUsed[OpNo] = Input;
4596fe6060f1SDimitry Andric           break;
4597fe6060f1SDimitry Andric         }
4598fe6060f1SDimitry Andric       }
4599fe6060f1SDimitry Andric 
4600bdd1243dSDimitry Andric       if (OpNo >= std::size(InputUsed)) {
4601fe6060f1SDimitry Andric         // More than two input vectors used!  Give up on trying to create a
4602fe6060f1SDimitry Andric         // shuffle vector.  Insert all elements into a BUILD_VECTOR instead.
4603fe6060f1SDimitry Andric         UseBuildVector = true;
4604fe6060f1SDimitry Andric         break;
4605fe6060f1SDimitry Andric       }
4606fe6060f1SDimitry Andric 
4607fe6060f1SDimitry Andric       // Add the mask index for the new shuffle vector.
4608fe6060f1SDimitry Andric       Ops.push_back(Idx + OpNo * NewElts);
4609fe6060f1SDimitry Andric     }
4610fe6060f1SDimitry Andric 
4611fe6060f1SDimitry Andric     if (UseBuildVector) {
4612fe6060f1SDimitry Andric       LLT EltTy = NarrowTy.getElementType();
4613fe6060f1SDimitry Andric       SmallVector<Register, 16> SVOps;
4614fe6060f1SDimitry Andric 
4615fe6060f1SDimitry Andric       // Extract the input elements by hand.
4616fe6060f1SDimitry Andric       for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4617fe6060f1SDimitry Andric         // The mask element.  This indexes into the input.
4618fe6060f1SDimitry Andric         int Idx = Mask[FirstMaskIdx + MaskOffset];
4619fe6060f1SDimitry Andric 
4620fe6060f1SDimitry Andric         // The input vector this mask element indexes into.
4621fe6060f1SDimitry Andric         unsigned Input = (unsigned)Idx / NewElts;
4622fe6060f1SDimitry Andric 
4623bdd1243dSDimitry Andric         if (Input >= std::size(Inputs)) {
4624fe6060f1SDimitry Andric           // The mask element is "undef" or indexes off the end of the input.
4625fe6060f1SDimitry Andric           SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
4626fe6060f1SDimitry Andric           continue;
4627fe6060f1SDimitry Andric         }
4628fe6060f1SDimitry Andric 
4629fe6060f1SDimitry Andric         // Turn the index into an offset from the start of the input vector.
4630fe6060f1SDimitry Andric         Idx -= Input * NewElts;
4631fe6060f1SDimitry Andric 
4632fe6060f1SDimitry Andric         // Extract the vector element by hand.
4633fe6060f1SDimitry Andric         SVOps.push_back(MIRBuilder
4634fe6060f1SDimitry Andric                             .buildExtractVectorElement(
4635fe6060f1SDimitry Andric                                 EltTy, Inputs[Input],
4636fe6060f1SDimitry Andric                                 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
4637fe6060f1SDimitry Andric                             .getReg(0));
4638fe6060f1SDimitry Andric       }
4639fe6060f1SDimitry Andric 
4640fe6060f1SDimitry Andric       // Construct the Lo/Hi output using a G_BUILD_VECTOR.
4641fe6060f1SDimitry Andric       Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
4642fe6060f1SDimitry Andric     } else if (InputUsed[0] == -1U) {
4643fe6060f1SDimitry Andric       // No input vectors were used! The result is undefined.
4644fe6060f1SDimitry Andric       Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
4645fe6060f1SDimitry Andric     } else {
4646fe6060f1SDimitry Andric       Register Op0 = Inputs[InputUsed[0]];
4647fe6060f1SDimitry Andric       // If only one input was used, use an undefined vector for the other.
4648fe6060f1SDimitry Andric       Register Op1 = InputUsed[1] == -1U
4649fe6060f1SDimitry Andric                          ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
4650fe6060f1SDimitry Andric                          : Inputs[InputUsed[1]];
4651fe6060f1SDimitry Andric       // At least one input vector was used. Create a new shuffle vector.
4652fe6060f1SDimitry Andric       Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
4653fe6060f1SDimitry Andric     }
4654fe6060f1SDimitry Andric 
4655fe6060f1SDimitry Andric     Ops.clear();
4656fe6060f1SDimitry Andric   }
4657fe6060f1SDimitry Andric 
4658fe6060f1SDimitry Andric   MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
4659fe6060f1SDimitry Andric   MI.eraseFromParent();
4660fe6060f1SDimitry Andric   return Legalized;
4661fe6060f1SDimitry Andric }
4662fe6060f1SDimitry Andric 
4663349cc55cSDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
4664349cc55cSDimitry Andric     MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4665*5f757f3fSDimitry Andric   auto &RdxMI = cast<GVecReduce>(MI);
4666349cc55cSDimitry Andric 
4667349cc55cSDimitry Andric   if (TypeIdx != 1)
4668349cc55cSDimitry Andric     return UnableToLegalize;
4669349cc55cSDimitry Andric 
4670349cc55cSDimitry Andric   // The semantics of the normal non-sequential reductions allow us to freely
4671349cc55cSDimitry Andric   // re-associate the operation.
4672*5f757f3fSDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
4673349cc55cSDimitry Andric 
4674349cc55cSDimitry Andric   if (NarrowTy.isVector() &&
4675349cc55cSDimitry Andric       (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
4676349cc55cSDimitry Andric     return UnableToLegalize;
4677349cc55cSDimitry Andric 
4678*5f757f3fSDimitry Andric   unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
4679349cc55cSDimitry Andric   SmallVector<Register> SplitSrcs;
4680349cc55cSDimitry Andric   // If NarrowTy is a scalar then we're being asked to scalarize.
4681349cc55cSDimitry Andric   const unsigned NumParts =
4682349cc55cSDimitry Andric       NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
4683349cc55cSDimitry Andric                           : SrcTy.getNumElements();
4684349cc55cSDimitry Andric 
4685349cc55cSDimitry Andric   extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
4686349cc55cSDimitry Andric   if (NarrowTy.isScalar()) {
4687349cc55cSDimitry Andric     if (DstTy != NarrowTy)
4688349cc55cSDimitry Andric       return UnableToLegalize; // FIXME: handle implicit extensions.
4689349cc55cSDimitry Andric 
4690349cc55cSDimitry Andric     if (isPowerOf2_32(NumParts)) {
4691349cc55cSDimitry Andric       // Generate a tree of scalar operations to reduce the critical path.
4692349cc55cSDimitry Andric       SmallVector<Register> PartialResults;
4693349cc55cSDimitry Andric       unsigned NumPartsLeft = NumParts;
4694349cc55cSDimitry Andric       while (NumPartsLeft > 1) {
4695349cc55cSDimitry Andric         for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
4696349cc55cSDimitry Andric           PartialResults.emplace_back(
4697349cc55cSDimitry Andric               MIRBuilder
4698349cc55cSDimitry Andric                   .buildInstr(ScalarOpc, {NarrowTy},
4699349cc55cSDimitry Andric                               {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
4700349cc55cSDimitry Andric                   .getReg(0));
4701349cc55cSDimitry Andric         }
4702349cc55cSDimitry Andric         SplitSrcs = PartialResults;
4703349cc55cSDimitry Andric         PartialResults.clear();
4704349cc55cSDimitry Andric         NumPartsLeft = SplitSrcs.size();
4705349cc55cSDimitry Andric       }
4706349cc55cSDimitry Andric       assert(SplitSrcs.size() == 1);
4707349cc55cSDimitry Andric       MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
4708349cc55cSDimitry Andric       MI.eraseFromParent();
4709349cc55cSDimitry Andric       return Legalized;
4710349cc55cSDimitry Andric     }
4711349cc55cSDimitry Andric     // If we can't generate a tree, then just do sequential operations.
4712349cc55cSDimitry Andric     Register Acc = SplitSrcs[0];
4713349cc55cSDimitry Andric     for (unsigned Idx = 1; Idx < NumParts; ++Idx)
4714349cc55cSDimitry Andric       Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
4715349cc55cSDimitry Andric                 .getReg(0);
4716349cc55cSDimitry Andric     MIRBuilder.buildCopy(DstReg, Acc);
4717349cc55cSDimitry Andric     MI.eraseFromParent();
4718349cc55cSDimitry Andric     return Legalized;
4719349cc55cSDimitry Andric   }
4720349cc55cSDimitry Andric   SmallVector<Register> PartialReductions;
4721349cc55cSDimitry Andric   for (unsigned Part = 0; Part < NumParts; ++Part) {
4722349cc55cSDimitry Andric     PartialReductions.push_back(
4723*5f757f3fSDimitry Andric         MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
4724*5f757f3fSDimitry Andric             .getReg(0));
4725349cc55cSDimitry Andric   }
4726349cc55cSDimitry Andric 
4727fe6060f1SDimitry Andric   // If the types involved are powers of 2, we can generate intermediate vector
4728fe6060f1SDimitry Andric   // ops, before generating a final reduction operation.
4729fe6060f1SDimitry Andric   if (isPowerOf2_32(SrcTy.getNumElements()) &&
4730fe6060f1SDimitry Andric       isPowerOf2_32(NarrowTy.getNumElements())) {
4731fe6060f1SDimitry Andric     return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
4732fe6060f1SDimitry Andric   }
4733fe6060f1SDimitry Andric 
4734fe6060f1SDimitry Andric   Register Acc = PartialReductions[0];
4735fe6060f1SDimitry Andric   for (unsigned Part = 1; Part < NumParts; ++Part) {
4736fe6060f1SDimitry Andric     if (Part == NumParts - 1) {
4737fe6060f1SDimitry Andric       MIRBuilder.buildInstr(ScalarOpc, {DstReg},
4738fe6060f1SDimitry Andric                             {Acc, PartialReductions[Part]});
4739fe6060f1SDimitry Andric     } else {
4740fe6060f1SDimitry Andric       Acc = MIRBuilder
4741fe6060f1SDimitry Andric                 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
4742fe6060f1SDimitry Andric                 .getReg(0);
4743fe6060f1SDimitry Andric     }
4744fe6060f1SDimitry Andric   }
4745fe6060f1SDimitry Andric   MI.eraseFromParent();
4746fe6060f1SDimitry Andric   return Legalized;
4747fe6060f1SDimitry Andric }
4748fe6060f1SDimitry Andric 
4749fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
4750fe6060f1SDimitry Andric LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
4751fe6060f1SDimitry Andric                                         LLT SrcTy, LLT NarrowTy,
4752fe6060f1SDimitry Andric                                         unsigned ScalarOpc) {
4753fe6060f1SDimitry Andric   SmallVector<Register> SplitSrcs;
4754fe6060f1SDimitry Andric   // Split the sources into NarrowTy size pieces.
4755fe6060f1SDimitry Andric   extractParts(SrcReg, NarrowTy,
4756fe6060f1SDimitry Andric                SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
4757fe6060f1SDimitry Andric   // We're going to do a tree reduction using vector operations until we have
4758fe6060f1SDimitry Andric   // one NarrowTy size value left.
4759fe6060f1SDimitry Andric   while (SplitSrcs.size() > 1) {
4760fe6060f1SDimitry Andric     SmallVector<Register> PartialRdxs;
4761fe6060f1SDimitry Andric     for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
4762fe6060f1SDimitry Andric       Register LHS = SplitSrcs[Idx];
4763fe6060f1SDimitry Andric       Register RHS = SplitSrcs[Idx + 1];
4764fe6060f1SDimitry Andric       // Create the intermediate vector op.
4765fe6060f1SDimitry Andric       Register Res =
4766fe6060f1SDimitry Andric           MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
4767fe6060f1SDimitry Andric       PartialRdxs.push_back(Res);
4768fe6060f1SDimitry Andric     }
4769fe6060f1SDimitry Andric     SplitSrcs = std::move(PartialRdxs);
4770fe6060f1SDimitry Andric   }
4771fe6060f1SDimitry Andric   // Finally generate the requested NarrowTy based reduction.
4772fe6060f1SDimitry Andric   Observer.changingInstr(MI);
4773fe6060f1SDimitry Andric   MI.getOperand(1).setReg(SplitSrcs[0]);
4774fe6060f1SDimitry Andric   Observer.changedInstr(MI);
4775fe6060f1SDimitry Andric   return Legalized;
4776fe6060f1SDimitry Andric }
4777fe6060f1SDimitry Andric 
47780b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
47790b57cec5SDimitry Andric LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
47800b57cec5SDimitry Andric                                              const LLT HalfTy, const LLT AmtTy) {
47810b57cec5SDimitry Andric 
47820b57cec5SDimitry Andric   Register InL = MRI.createGenericVirtualRegister(HalfTy);
47830b57cec5SDimitry Andric   Register InH = MRI.createGenericVirtualRegister(HalfTy);
47845ffd83dbSDimitry Andric   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
47850b57cec5SDimitry Andric 
4786349cc55cSDimitry Andric   if (Amt.isZero()) {
4787bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
47880b57cec5SDimitry Andric     MI.eraseFromParent();
47890b57cec5SDimitry Andric     return Legalized;
47900b57cec5SDimitry Andric   }
47910b57cec5SDimitry Andric 
47920b57cec5SDimitry Andric   LLT NVT = HalfTy;
47930b57cec5SDimitry Andric   unsigned NVTBits = HalfTy.getSizeInBits();
47940b57cec5SDimitry Andric   unsigned VTBits = 2 * NVTBits;
47950b57cec5SDimitry Andric 
47960b57cec5SDimitry Andric   SrcOp Lo(Register(0)), Hi(Register(0));
47970b57cec5SDimitry Andric   if (MI.getOpcode() == TargetOpcode::G_SHL) {
47980b57cec5SDimitry Andric     if (Amt.ugt(VTBits)) {
47990b57cec5SDimitry Andric       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
48000b57cec5SDimitry Andric     } else if (Amt.ugt(NVTBits)) {
48010b57cec5SDimitry Andric       Lo = MIRBuilder.buildConstant(NVT, 0);
48020b57cec5SDimitry Andric       Hi = MIRBuilder.buildShl(NVT, InL,
48030b57cec5SDimitry Andric                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
48040b57cec5SDimitry Andric     } else if (Amt == NVTBits) {
48050b57cec5SDimitry Andric       Lo = MIRBuilder.buildConstant(NVT, 0);
48060b57cec5SDimitry Andric       Hi = InL;
48070b57cec5SDimitry Andric     } else {
48080b57cec5SDimitry Andric       Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
48090b57cec5SDimitry Andric       auto OrLHS =
48100b57cec5SDimitry Andric           MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
48110b57cec5SDimitry Andric       auto OrRHS = MIRBuilder.buildLShr(
48120b57cec5SDimitry Andric           NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
48130b57cec5SDimitry Andric       Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
48140b57cec5SDimitry Andric     }
48150b57cec5SDimitry Andric   } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
48160b57cec5SDimitry Andric     if (Amt.ugt(VTBits)) {
48170b57cec5SDimitry Andric       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
48180b57cec5SDimitry Andric     } else if (Amt.ugt(NVTBits)) {
48190b57cec5SDimitry Andric       Lo = MIRBuilder.buildLShr(NVT, InH,
48200b57cec5SDimitry Andric                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
48210b57cec5SDimitry Andric       Hi = MIRBuilder.buildConstant(NVT, 0);
48220b57cec5SDimitry Andric     } else if (Amt == NVTBits) {
48230b57cec5SDimitry Andric       Lo = InH;
48240b57cec5SDimitry Andric       Hi = MIRBuilder.buildConstant(NVT, 0);
48250b57cec5SDimitry Andric     } else {
48260b57cec5SDimitry Andric       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
48270b57cec5SDimitry Andric 
48280b57cec5SDimitry Andric       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
48290b57cec5SDimitry Andric       auto OrRHS = MIRBuilder.buildShl(
48300b57cec5SDimitry Andric           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
48310b57cec5SDimitry Andric 
48320b57cec5SDimitry Andric       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
48330b57cec5SDimitry Andric       Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
48340b57cec5SDimitry Andric     }
48350b57cec5SDimitry Andric   } else {
48360b57cec5SDimitry Andric     if (Amt.ugt(VTBits)) {
48370b57cec5SDimitry Andric       Hi = Lo = MIRBuilder.buildAShr(
48380b57cec5SDimitry Andric           NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
48390b57cec5SDimitry Andric     } else if (Amt.ugt(NVTBits)) {
48400b57cec5SDimitry Andric       Lo = MIRBuilder.buildAShr(NVT, InH,
48410b57cec5SDimitry Andric                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
48420b57cec5SDimitry Andric       Hi = MIRBuilder.buildAShr(NVT, InH,
48430b57cec5SDimitry Andric                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
48440b57cec5SDimitry Andric     } else if (Amt == NVTBits) {
48450b57cec5SDimitry Andric       Lo = InH;
48460b57cec5SDimitry Andric       Hi = MIRBuilder.buildAShr(NVT, InH,
48470b57cec5SDimitry Andric                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
48480b57cec5SDimitry Andric     } else {
48490b57cec5SDimitry Andric       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
48500b57cec5SDimitry Andric 
48510b57cec5SDimitry Andric       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
48520b57cec5SDimitry Andric       auto OrRHS = MIRBuilder.buildShl(
48530b57cec5SDimitry Andric           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
48540b57cec5SDimitry Andric 
48550b57cec5SDimitry Andric       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
48560b57cec5SDimitry Andric       Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
48570b57cec5SDimitry Andric     }
48580b57cec5SDimitry Andric   }
48590b57cec5SDimitry Andric 
4860bdd1243dSDimitry Andric   MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
48610b57cec5SDimitry Andric   MI.eraseFromParent();
48620b57cec5SDimitry Andric 
48630b57cec5SDimitry Andric   return Legalized;
48640b57cec5SDimitry Andric }
48650b57cec5SDimitry Andric 
48660b57cec5SDimitry Andric // TODO: Optimize if constant shift amount.
48670b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
48680b57cec5SDimitry Andric LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
48690b57cec5SDimitry Andric                                    LLT RequestedTy) {
48700b57cec5SDimitry Andric   if (TypeIdx == 1) {
48710b57cec5SDimitry Andric     Observer.changingInstr(MI);
48720b57cec5SDimitry Andric     narrowScalarSrc(MI, RequestedTy, 2);
48730b57cec5SDimitry Andric     Observer.changedInstr(MI);
48740b57cec5SDimitry Andric     return Legalized;
48750b57cec5SDimitry Andric   }
48760b57cec5SDimitry Andric 
48770b57cec5SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
48780b57cec5SDimitry Andric   LLT DstTy = MRI.getType(DstReg);
48790b57cec5SDimitry Andric   if (DstTy.isVector())
48800b57cec5SDimitry Andric     return UnableToLegalize;
48810b57cec5SDimitry Andric 
48820b57cec5SDimitry Andric   Register Amt = MI.getOperand(2).getReg();
48830b57cec5SDimitry Andric   LLT ShiftAmtTy = MRI.getType(Amt);
48840b57cec5SDimitry Andric   const unsigned DstEltSize = DstTy.getScalarSizeInBits();
48850b57cec5SDimitry Andric   if (DstEltSize % 2 != 0)
48860b57cec5SDimitry Andric     return UnableToLegalize;
48870b57cec5SDimitry Andric 
48880b57cec5SDimitry Andric   // Ignore the input type. We can only go to exactly half the size of the
48890b57cec5SDimitry Andric   // input. If that isn't small enough, the resulting pieces will be further
48900b57cec5SDimitry Andric   // legalized.
48910b57cec5SDimitry Andric   const unsigned NewBitSize = DstEltSize / 2;
48920b57cec5SDimitry Andric   const LLT HalfTy = LLT::scalar(NewBitSize);
48930b57cec5SDimitry Andric   const LLT CondTy = LLT::scalar(1);
48940b57cec5SDimitry Andric 
4895349cc55cSDimitry Andric   if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
4896349cc55cSDimitry Andric     return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
4897349cc55cSDimitry Andric                                        ShiftAmtTy);
48980b57cec5SDimitry Andric   }
48990b57cec5SDimitry Andric 
49000b57cec5SDimitry Andric   // TODO: Expand with known bits.
49010b57cec5SDimitry Andric 
49020b57cec5SDimitry Andric   // Handle the fully general expansion by an unknown amount.
49030b57cec5SDimitry Andric   auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
49040b57cec5SDimitry Andric 
49050b57cec5SDimitry Andric   Register InL = MRI.createGenericVirtualRegister(HalfTy);
49060b57cec5SDimitry Andric   Register InH = MRI.createGenericVirtualRegister(HalfTy);
49075ffd83dbSDimitry Andric   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
49080b57cec5SDimitry Andric 
49090b57cec5SDimitry Andric   auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
49100b57cec5SDimitry Andric   auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
49110b57cec5SDimitry Andric 
49120b57cec5SDimitry Andric   auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
49130b57cec5SDimitry Andric   auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
49140b57cec5SDimitry Andric   auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
49150b57cec5SDimitry Andric 
49160b57cec5SDimitry Andric   Register ResultRegs[2];
49170b57cec5SDimitry Andric   switch (MI.getOpcode()) {
49180b57cec5SDimitry Andric   case TargetOpcode::G_SHL: {
49190b57cec5SDimitry Andric     // Short: ShAmt < NewBitSize
49208bcb0991SDimitry Andric     auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
49210b57cec5SDimitry Andric 
49228bcb0991SDimitry Andric     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
49238bcb0991SDimitry Andric     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
49248bcb0991SDimitry Andric     auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
49250b57cec5SDimitry Andric 
49260b57cec5SDimitry Andric     // Long: ShAmt >= NewBitSize
49270b57cec5SDimitry Andric     auto LoL = MIRBuilder.buildConstant(HalfTy, 0);         // Lo part is zero.
49280b57cec5SDimitry Andric     auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
49290b57cec5SDimitry Andric 
49300b57cec5SDimitry Andric     auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
49310b57cec5SDimitry Andric     auto Hi = MIRBuilder.buildSelect(
49320b57cec5SDimitry Andric         HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
49330b57cec5SDimitry Andric 
49340b57cec5SDimitry Andric     ResultRegs[0] = Lo.getReg(0);
49350b57cec5SDimitry Andric     ResultRegs[1] = Hi.getReg(0);
49360b57cec5SDimitry Andric     break;
49370b57cec5SDimitry Andric   }
49388bcb0991SDimitry Andric   case TargetOpcode::G_LSHR:
49390b57cec5SDimitry Andric   case TargetOpcode::G_ASHR: {
49400b57cec5SDimitry Andric     // Short: ShAmt < NewBitSize
49418bcb0991SDimitry Andric     auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
49420b57cec5SDimitry Andric 
49438bcb0991SDimitry Andric     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
49448bcb0991SDimitry Andric     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
49458bcb0991SDimitry Andric     auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
49460b57cec5SDimitry Andric 
49470b57cec5SDimitry Andric     // Long: ShAmt >= NewBitSize
49488bcb0991SDimitry Andric     MachineInstrBuilder HiL;
49498bcb0991SDimitry Andric     if (MI.getOpcode() == TargetOpcode::G_LSHR) {
49508bcb0991SDimitry Andric       HiL = MIRBuilder.buildConstant(HalfTy, 0);            // Hi part is zero.
49518bcb0991SDimitry Andric     } else {
49528bcb0991SDimitry Andric       auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
49538bcb0991SDimitry Andric       HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);    // Sign of Hi part.
49548bcb0991SDimitry Andric     }
49558bcb0991SDimitry Andric     auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
49568bcb0991SDimitry Andric                                      {InH, AmtExcess});     // Lo from Hi part.
49570b57cec5SDimitry Andric 
49580b57cec5SDimitry Andric     auto Lo = MIRBuilder.buildSelect(
49590b57cec5SDimitry Andric         HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
49600b57cec5SDimitry Andric 
49610b57cec5SDimitry Andric     auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
49620b57cec5SDimitry Andric 
49630b57cec5SDimitry Andric     ResultRegs[0] = Lo.getReg(0);
49640b57cec5SDimitry Andric     ResultRegs[1] = Hi.getReg(0);
49650b57cec5SDimitry Andric     break;
49660b57cec5SDimitry Andric   }
49670b57cec5SDimitry Andric   default:
49680b57cec5SDimitry Andric     llvm_unreachable("not a shift");
49690b57cec5SDimitry Andric   }
49700b57cec5SDimitry Andric 
4971bdd1243dSDimitry Andric   MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
49720b57cec5SDimitry Andric   MI.eraseFromParent();
49730b57cec5SDimitry Andric   return Legalized;
49740b57cec5SDimitry Andric }
49750b57cec5SDimitry Andric 
49760b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
49770b57cec5SDimitry Andric LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
49780b57cec5SDimitry Andric                                        LLT MoreTy) {
49790b57cec5SDimitry Andric   assert(TypeIdx == 0 && "Expecting only Idx 0");
49800b57cec5SDimitry Andric 
49810b57cec5SDimitry Andric   Observer.changingInstr(MI);
49820b57cec5SDimitry Andric   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
49830b57cec5SDimitry Andric     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
49840b57cec5SDimitry Andric     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
49850b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, I);
49860b57cec5SDimitry Andric   }
49870b57cec5SDimitry Andric 
49880b57cec5SDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
49890b57cec5SDimitry Andric   MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
49900b57cec5SDimitry Andric   moreElementsVectorDst(MI, MoreTy, 0);
49910b57cec5SDimitry Andric   Observer.changedInstr(MI);
49920b57cec5SDimitry Andric   return Legalized;
49930b57cec5SDimitry Andric }
49940b57cec5SDimitry Andric 
49950b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
49960b57cec5SDimitry Andric LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
49970b57cec5SDimitry Andric                                     LLT MoreTy) {
49980b57cec5SDimitry Andric   unsigned Opc = MI.getOpcode();
49990b57cec5SDimitry Andric   switch (Opc) {
50008bcb0991SDimitry Andric   case TargetOpcode::G_IMPLICIT_DEF:
50018bcb0991SDimitry Andric   case TargetOpcode::G_LOAD: {
50028bcb0991SDimitry Andric     if (TypeIdx != 0)
50038bcb0991SDimitry Andric       return UnableToLegalize;
50040b57cec5SDimitry Andric     Observer.changingInstr(MI);
50050b57cec5SDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
50060b57cec5SDimitry Andric     Observer.changedInstr(MI);
50070b57cec5SDimitry Andric     return Legalized;
50080b57cec5SDimitry Andric   }
50098bcb0991SDimitry Andric   case TargetOpcode::G_STORE:
50108bcb0991SDimitry Andric     if (TypeIdx != 0)
50118bcb0991SDimitry Andric       return UnableToLegalize;
50128bcb0991SDimitry Andric     Observer.changingInstr(MI);
50138bcb0991SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 0);
50148bcb0991SDimitry Andric     Observer.changedInstr(MI);
50158bcb0991SDimitry Andric     return Legalized;
50160b57cec5SDimitry Andric   case TargetOpcode::G_AND:
50170b57cec5SDimitry Andric   case TargetOpcode::G_OR:
50180b57cec5SDimitry Andric   case TargetOpcode::G_XOR:
50190eae32dcSDimitry Andric   case TargetOpcode::G_ADD:
50200eae32dcSDimitry Andric   case TargetOpcode::G_SUB:
50210eae32dcSDimitry Andric   case TargetOpcode::G_MUL:
50220eae32dcSDimitry Andric   case TargetOpcode::G_FADD:
5023*5f757f3fSDimitry Andric   case TargetOpcode::G_FSUB:
50240eae32dcSDimitry Andric   case TargetOpcode::G_FMUL:
5025*5f757f3fSDimitry Andric   case TargetOpcode::G_FDIV:
50260eae32dcSDimitry Andric   case TargetOpcode::G_UADDSAT:
50270eae32dcSDimitry Andric   case TargetOpcode::G_USUBSAT:
50280eae32dcSDimitry Andric   case TargetOpcode::G_SADDSAT:
50290eae32dcSDimitry Andric   case TargetOpcode::G_SSUBSAT:
50300b57cec5SDimitry Andric   case TargetOpcode::G_SMIN:
50310b57cec5SDimitry Andric   case TargetOpcode::G_SMAX:
50320b57cec5SDimitry Andric   case TargetOpcode::G_UMIN:
5033480093f4SDimitry Andric   case TargetOpcode::G_UMAX:
5034480093f4SDimitry Andric   case TargetOpcode::G_FMINNUM:
5035480093f4SDimitry Andric   case TargetOpcode::G_FMAXNUM:
5036480093f4SDimitry Andric   case TargetOpcode::G_FMINNUM_IEEE:
5037480093f4SDimitry Andric   case TargetOpcode::G_FMAXNUM_IEEE:
5038480093f4SDimitry Andric   case TargetOpcode::G_FMINIMUM:
5039bdd1243dSDimitry Andric   case TargetOpcode::G_FMAXIMUM:
5040bdd1243dSDimitry Andric   case TargetOpcode::G_STRICT_FADD:
5041bdd1243dSDimitry Andric   case TargetOpcode::G_STRICT_FSUB:
5042bdd1243dSDimitry Andric   case TargetOpcode::G_STRICT_FMUL: {
50430b57cec5SDimitry Andric     Observer.changingInstr(MI);
50440b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 1);
50450b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 2);
50460b57cec5SDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
50470b57cec5SDimitry Andric     Observer.changedInstr(MI);
50480b57cec5SDimitry Andric     return Legalized;
50490b57cec5SDimitry Andric   }
50500eae32dcSDimitry Andric   case TargetOpcode::G_FMA:
5051bdd1243dSDimitry Andric   case TargetOpcode::G_STRICT_FMA:
50520eae32dcSDimitry Andric   case TargetOpcode::G_FSHR:
50530eae32dcSDimitry Andric   case TargetOpcode::G_FSHL: {
50540eae32dcSDimitry Andric     Observer.changingInstr(MI);
50550eae32dcSDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 1);
50560eae32dcSDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 2);
50570eae32dcSDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 3);
50580eae32dcSDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
50590eae32dcSDimitry Andric     Observer.changedInstr(MI);
50600eae32dcSDimitry Andric     return Legalized;
50610eae32dcSDimitry Andric   }
506206c3fb27SDimitry Andric   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
50630b57cec5SDimitry Andric   case TargetOpcode::G_EXTRACT:
50640b57cec5SDimitry Andric     if (TypeIdx != 1)
50650b57cec5SDimitry Andric       return UnableToLegalize;
50660b57cec5SDimitry Andric     Observer.changingInstr(MI);
50670b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 1);
50680b57cec5SDimitry Andric     Observer.changedInstr(MI);
50690b57cec5SDimitry Andric     return Legalized;
50700b57cec5SDimitry Andric   case TargetOpcode::G_INSERT:
507106c3fb27SDimitry Andric   case TargetOpcode::G_INSERT_VECTOR_ELT:
50725ffd83dbSDimitry Andric   case TargetOpcode::G_FREEZE:
50730eae32dcSDimitry Andric   case TargetOpcode::G_FNEG:
50740eae32dcSDimitry Andric   case TargetOpcode::G_FABS:
5075*5f757f3fSDimitry Andric   case TargetOpcode::G_FSQRT:
5076*5f757f3fSDimitry Andric   case TargetOpcode::G_FCEIL:
5077*5f757f3fSDimitry Andric   case TargetOpcode::G_FFLOOR:
5078*5f757f3fSDimitry Andric   case TargetOpcode::G_FNEARBYINT:
5079*5f757f3fSDimitry Andric   case TargetOpcode::G_FRINT:
5080*5f757f3fSDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUND:
5081*5f757f3fSDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
5082*5f757f3fSDimitry Andric   case TargetOpcode::G_INTRINSIC_TRUNC:
50830eae32dcSDimitry Andric   case TargetOpcode::G_BSWAP:
50840eae32dcSDimitry Andric   case TargetOpcode::G_FCANONICALIZE:
50850eae32dcSDimitry Andric   case TargetOpcode::G_SEXT_INREG:
50860b57cec5SDimitry Andric     if (TypeIdx != 0)
50870b57cec5SDimitry Andric       return UnableToLegalize;
50880b57cec5SDimitry Andric     Observer.changingInstr(MI);
50890b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 1);
50900b57cec5SDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
50910b57cec5SDimitry Andric     Observer.changedInstr(MI);
50920b57cec5SDimitry Andric     return Legalized;
509381ad6265SDimitry Andric   case TargetOpcode::G_SELECT: {
509406c3fb27SDimitry Andric     auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
509581ad6265SDimitry Andric     if (TypeIdx == 1) {
509681ad6265SDimitry Andric       if (!CondTy.isScalar() ||
509781ad6265SDimitry Andric           DstTy.getElementCount() != MoreTy.getElementCount())
50980b57cec5SDimitry Andric         return UnableToLegalize;
509981ad6265SDimitry Andric 
510081ad6265SDimitry Andric       // This is turning a scalar select of vectors into a vector
510181ad6265SDimitry Andric       // select. Broadcast the select condition.
510281ad6265SDimitry Andric       auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
510381ad6265SDimitry Andric       Observer.changingInstr(MI);
510481ad6265SDimitry Andric       MI.getOperand(1).setReg(ShufSplat.getReg(0));
510581ad6265SDimitry Andric       Observer.changedInstr(MI);
510681ad6265SDimitry Andric       return Legalized;
510781ad6265SDimitry Andric     }
510881ad6265SDimitry Andric 
510981ad6265SDimitry Andric     if (CondTy.isVector())
51100b57cec5SDimitry Andric       return UnableToLegalize;
51110b57cec5SDimitry Andric 
51120b57cec5SDimitry Andric     Observer.changingInstr(MI);
51130b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 2);
51140b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 3);
51150b57cec5SDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
51160b57cec5SDimitry Andric     Observer.changedInstr(MI);
51170b57cec5SDimitry Andric     return Legalized;
511881ad6265SDimitry Andric   }
51190eae32dcSDimitry Andric   case TargetOpcode::G_UNMERGE_VALUES:
51208bcb0991SDimitry Andric     return UnableToLegalize;
51210b57cec5SDimitry Andric   case TargetOpcode::G_PHI:
51220b57cec5SDimitry Andric     return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
5123fe6060f1SDimitry Andric   case TargetOpcode::G_SHUFFLE_VECTOR:
5124fe6060f1SDimitry Andric     return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
51250eae32dcSDimitry Andric   case TargetOpcode::G_BUILD_VECTOR: {
51260eae32dcSDimitry Andric     SmallVector<SrcOp, 8> Elts;
51270eae32dcSDimitry Andric     for (auto Op : MI.uses()) {
51280eae32dcSDimitry Andric       Elts.push_back(Op.getReg());
51290eae32dcSDimitry Andric     }
51300eae32dcSDimitry Andric 
51310eae32dcSDimitry Andric     for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
51320eae32dcSDimitry Andric       Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
51330eae32dcSDimitry Andric     }
51340eae32dcSDimitry Andric 
51350eae32dcSDimitry Andric     MIRBuilder.buildDeleteTrailingVectorElements(
51360eae32dcSDimitry Andric         MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
51370eae32dcSDimitry Andric     MI.eraseFromParent();
51380eae32dcSDimitry Andric     return Legalized;
51390eae32dcSDimitry Andric   }
5140*5f757f3fSDimitry Andric   case TargetOpcode::G_TRUNC:
514106c3fb27SDimitry Andric   case TargetOpcode::G_FPTRUNC:
5142*5f757f3fSDimitry Andric   case TargetOpcode::G_FPEXT:
5143*5f757f3fSDimitry Andric   case TargetOpcode::G_FPTOSI:
5144*5f757f3fSDimitry Andric   case TargetOpcode::G_FPTOUI:
5145*5f757f3fSDimitry Andric   case TargetOpcode::G_SITOFP:
5146*5f757f3fSDimitry Andric   case TargetOpcode::G_UITOFP: {
514706c3fb27SDimitry Andric     if (TypeIdx != 0)
514806c3fb27SDimitry Andric       return UnableToLegalize;
514906c3fb27SDimitry Andric     Observer.changingInstr(MI);
515006c3fb27SDimitry Andric     LLT SrcTy = LLT::fixed_vector(
515106c3fb27SDimitry Andric         MoreTy.getNumElements(),
515206c3fb27SDimitry Andric         MRI.getType(MI.getOperand(1).getReg()).getElementType());
515306c3fb27SDimitry Andric     moreElementsVectorSrc(MI, SrcTy, 1);
515406c3fb27SDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
515506c3fb27SDimitry Andric     Observer.changedInstr(MI);
515606c3fb27SDimitry Andric     return Legalized;
515706c3fb27SDimitry Andric   }
51580b57cec5SDimitry Andric   default:
51590b57cec5SDimitry Andric     return UnableToLegalize;
51600b57cec5SDimitry Andric   }
51610b57cec5SDimitry Andric }
51620b57cec5SDimitry Andric 
516306c3fb27SDimitry Andric LegalizerHelper::LegalizeResult
516406c3fb27SDimitry Andric LegalizerHelper::equalizeVectorShuffleLengths(MachineInstr &MI) {
516506c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5166bdd1243dSDimitry Andric   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5167bdd1243dSDimitry Andric   unsigned MaskNumElts = Mask.size();
5168bdd1243dSDimitry Andric   unsigned SrcNumElts = SrcTy.getNumElements();
5169bdd1243dSDimitry Andric   LLT DestEltTy = DstTy.getElementType();
5170bdd1243dSDimitry Andric 
517106c3fb27SDimitry Andric   if (MaskNumElts == SrcNumElts)
517206c3fb27SDimitry Andric     return Legalized;
517306c3fb27SDimitry Andric 
517406c3fb27SDimitry Andric   if (MaskNumElts < SrcNumElts) {
517506c3fb27SDimitry Andric     // Extend mask to match new destination vector size with
517606c3fb27SDimitry Andric     // undef values.
517706c3fb27SDimitry Andric     SmallVector<int, 16> NewMask(Mask);
517806c3fb27SDimitry Andric     for (unsigned I = MaskNumElts; I < SrcNumElts; ++I)
517906c3fb27SDimitry Andric       NewMask.push_back(-1);
518006c3fb27SDimitry Andric 
518106c3fb27SDimitry Andric     moreElementsVectorDst(MI, SrcTy, 0);
518206c3fb27SDimitry Andric     MIRBuilder.setInstrAndDebugLoc(MI);
518306c3fb27SDimitry Andric     MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
518406c3fb27SDimitry Andric                                   MI.getOperand(1).getReg(),
518506c3fb27SDimitry Andric                                   MI.getOperand(2).getReg(), NewMask);
518606c3fb27SDimitry Andric     MI.eraseFromParent();
518706c3fb27SDimitry Andric 
518806c3fb27SDimitry Andric     return Legalized;
5189bdd1243dSDimitry Andric   }
5190bdd1243dSDimitry Andric 
5191bdd1243dSDimitry Andric   unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
5192bdd1243dSDimitry Andric   unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
5193bdd1243dSDimitry Andric   LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
5194bdd1243dSDimitry Andric 
5195bdd1243dSDimitry Andric   // Create new source vectors by concatenating the initial
5196bdd1243dSDimitry Andric   // source vectors with undefined vectors of the same size.
5197bdd1243dSDimitry Andric   auto Undef = MIRBuilder.buildUndef(SrcTy);
5198bdd1243dSDimitry Andric   SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
5199bdd1243dSDimitry Andric   SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
5200bdd1243dSDimitry Andric   MOps1[0] = MI.getOperand(1).getReg();
5201bdd1243dSDimitry Andric   MOps2[0] = MI.getOperand(2).getReg();
5202bdd1243dSDimitry Andric 
5203bdd1243dSDimitry Andric   auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
5204bdd1243dSDimitry Andric   auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
5205bdd1243dSDimitry Andric 
5206bdd1243dSDimitry Andric   // Readjust mask for new input vector length.
5207bdd1243dSDimitry Andric   SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
5208bdd1243dSDimitry Andric   for (unsigned I = 0; I != MaskNumElts; ++I) {
5209bdd1243dSDimitry Andric     int Idx = Mask[I];
5210bdd1243dSDimitry Andric     if (Idx >= static_cast<int>(SrcNumElts))
5211bdd1243dSDimitry Andric       Idx += PaddedMaskNumElts - SrcNumElts;
5212bdd1243dSDimitry Andric     MappedOps[I] = Idx;
5213bdd1243dSDimitry Andric   }
5214bdd1243dSDimitry Andric 
5215bdd1243dSDimitry Andric   // If we got more elements than required, extract subvector.
5216bdd1243dSDimitry Andric   if (MaskNumElts != PaddedMaskNumElts) {
5217bdd1243dSDimitry Andric     auto Shuffle =
5218bdd1243dSDimitry Andric         MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
5219bdd1243dSDimitry Andric 
5220bdd1243dSDimitry Andric     SmallVector<Register, 16> Elts(MaskNumElts);
5221bdd1243dSDimitry Andric     for (unsigned I = 0; I < MaskNumElts; ++I) {
5222bdd1243dSDimitry Andric       Elts[I] =
5223bdd1243dSDimitry Andric           MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
5224bdd1243dSDimitry Andric               .getReg(0);
5225bdd1243dSDimitry Andric     }
5226bdd1243dSDimitry Andric     MIRBuilder.buildBuildVector(DstReg, Elts);
5227bdd1243dSDimitry Andric   } else {
5228bdd1243dSDimitry Andric     MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
5229bdd1243dSDimitry Andric   }
5230bdd1243dSDimitry Andric 
5231bdd1243dSDimitry Andric   MI.eraseFromParent();
5232bdd1243dSDimitry Andric   return LegalizerHelper::LegalizeResult::Legalized;
5233bdd1243dSDimitry Andric }
5234bdd1243dSDimitry Andric 
5235fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
5236fe6060f1SDimitry Andric LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
5237fe6060f1SDimitry Andric                                            unsigned int TypeIdx, LLT MoreTy) {
523806c3fb27SDimitry Andric   auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
5239fe6060f1SDimitry Andric   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5240fe6060f1SDimitry Andric   unsigned NumElts = DstTy.getNumElements();
5241fe6060f1SDimitry Andric   unsigned WidenNumElts = MoreTy.getNumElements();
5242fe6060f1SDimitry Andric 
5243bdd1243dSDimitry Andric   if (DstTy.isVector() && Src1Ty.isVector() &&
524406c3fb27SDimitry Andric       DstTy.getNumElements() != Src1Ty.getNumElements()) {
524506c3fb27SDimitry Andric     return equalizeVectorShuffleLengths(MI);
5246bdd1243dSDimitry Andric   }
5247bdd1243dSDimitry Andric 
5248bdd1243dSDimitry Andric   if (TypeIdx != 0)
5249bdd1243dSDimitry Andric     return UnableToLegalize;
5250bdd1243dSDimitry Andric 
5251fe6060f1SDimitry Andric   // Expect a canonicalized shuffle.
5252fe6060f1SDimitry Andric   if (DstTy != Src1Ty || DstTy != Src2Ty)
5253fe6060f1SDimitry Andric     return UnableToLegalize;
5254fe6060f1SDimitry Andric 
5255fe6060f1SDimitry Andric   moreElementsVectorSrc(MI, MoreTy, 1);
5256fe6060f1SDimitry Andric   moreElementsVectorSrc(MI, MoreTy, 2);
5257fe6060f1SDimitry Andric 
5258fe6060f1SDimitry Andric   // Adjust mask based on new input vector length.
5259fe6060f1SDimitry Andric   SmallVector<int, 16> NewMask;
5260fe6060f1SDimitry Andric   for (unsigned I = 0; I != NumElts; ++I) {
5261fe6060f1SDimitry Andric     int Idx = Mask[I];
5262fe6060f1SDimitry Andric     if (Idx < static_cast<int>(NumElts))
5263fe6060f1SDimitry Andric       NewMask.push_back(Idx);
5264fe6060f1SDimitry Andric     else
5265fe6060f1SDimitry Andric       NewMask.push_back(Idx - NumElts + WidenNumElts);
5266fe6060f1SDimitry Andric   }
5267fe6060f1SDimitry Andric   for (unsigned I = NumElts; I != WidenNumElts; ++I)
5268fe6060f1SDimitry Andric     NewMask.push_back(-1);
5269fe6060f1SDimitry Andric   moreElementsVectorDst(MI, MoreTy, 0);
5270fe6060f1SDimitry Andric   MIRBuilder.setInstrAndDebugLoc(MI);
5271fe6060f1SDimitry Andric   MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
5272fe6060f1SDimitry Andric                                 MI.getOperand(1).getReg(),
5273fe6060f1SDimitry Andric                                 MI.getOperand(2).getReg(), NewMask);
5274fe6060f1SDimitry Andric   MI.eraseFromParent();
5275fe6060f1SDimitry Andric   return Legalized;
5276fe6060f1SDimitry Andric }
5277fe6060f1SDimitry Andric 
52780b57cec5SDimitry Andric void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
52790b57cec5SDimitry Andric                                         ArrayRef<Register> Src1Regs,
52800b57cec5SDimitry Andric                                         ArrayRef<Register> Src2Regs,
52810b57cec5SDimitry Andric                                         LLT NarrowTy) {
52820b57cec5SDimitry Andric   MachineIRBuilder &B = MIRBuilder;
52830b57cec5SDimitry Andric   unsigned SrcParts = Src1Regs.size();
52840b57cec5SDimitry Andric   unsigned DstParts = DstRegs.size();
52850b57cec5SDimitry Andric 
52860b57cec5SDimitry Andric   unsigned DstIdx = 0; // Low bits of the result.
52870b57cec5SDimitry Andric   Register FactorSum =
52880b57cec5SDimitry Andric       B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
52890b57cec5SDimitry Andric   DstRegs[DstIdx] = FactorSum;
52900b57cec5SDimitry Andric 
52910b57cec5SDimitry Andric   unsigned CarrySumPrevDstIdx;
52920b57cec5SDimitry Andric   SmallVector<Register, 4> Factors;
52930b57cec5SDimitry Andric 
52940b57cec5SDimitry Andric   for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
52950b57cec5SDimitry Andric     // Collect low parts of muls for DstIdx.
52960b57cec5SDimitry Andric     for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
52970b57cec5SDimitry Andric          i <= std::min(DstIdx, SrcParts - 1); ++i) {
52980b57cec5SDimitry Andric       MachineInstrBuilder Mul =
52990b57cec5SDimitry Andric           B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
53000b57cec5SDimitry Andric       Factors.push_back(Mul.getReg(0));
53010b57cec5SDimitry Andric     }
53020b57cec5SDimitry Andric     // Collect high parts of muls from previous DstIdx.
53030b57cec5SDimitry Andric     for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
53040b57cec5SDimitry Andric          i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
53050b57cec5SDimitry Andric       MachineInstrBuilder Umulh =
53060b57cec5SDimitry Andric           B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
53070b57cec5SDimitry Andric       Factors.push_back(Umulh.getReg(0));
53080b57cec5SDimitry Andric     }
5309480093f4SDimitry Andric     // Add CarrySum from additions calculated for previous DstIdx.
53100b57cec5SDimitry Andric     if (DstIdx != 1) {
53110b57cec5SDimitry Andric       Factors.push_back(CarrySumPrevDstIdx);
53120b57cec5SDimitry Andric     }
53130b57cec5SDimitry Andric 
53140b57cec5SDimitry Andric     Register CarrySum;
53150b57cec5SDimitry Andric     // Add all factors and accumulate all carries into CarrySum.
53160b57cec5SDimitry Andric     if (DstIdx != DstParts - 1) {
53170b57cec5SDimitry Andric       MachineInstrBuilder Uaddo =
53180b57cec5SDimitry Andric           B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
53190b57cec5SDimitry Andric       FactorSum = Uaddo.getReg(0);
53200b57cec5SDimitry Andric       CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
53210b57cec5SDimitry Andric       for (unsigned i = 2; i < Factors.size(); ++i) {
53220b57cec5SDimitry Andric         MachineInstrBuilder Uaddo =
53230b57cec5SDimitry Andric             B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
53240b57cec5SDimitry Andric         FactorSum = Uaddo.getReg(0);
53250b57cec5SDimitry Andric         MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
53260b57cec5SDimitry Andric         CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
53270b57cec5SDimitry Andric       }
53280b57cec5SDimitry Andric     } else {
53290b57cec5SDimitry Andric       // Since value for the next index is not calculated, neither is CarrySum.
53300b57cec5SDimitry Andric       FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
53310b57cec5SDimitry Andric       for (unsigned i = 2; i < Factors.size(); ++i)
53320b57cec5SDimitry Andric         FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
53330b57cec5SDimitry Andric     }
53340b57cec5SDimitry Andric 
53350b57cec5SDimitry Andric     CarrySumPrevDstIdx = CarrySum;
53360b57cec5SDimitry Andric     DstRegs[DstIdx] = FactorSum;
53370b57cec5SDimitry Andric     Factors.clear();
53380b57cec5SDimitry Andric   }
53390b57cec5SDimitry Andric }
53400b57cec5SDimitry Andric 
53410b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
5342fe6060f1SDimitry Andric LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
5343fe6060f1SDimitry Andric                                     LLT NarrowTy) {
5344fe6060f1SDimitry Andric   if (TypeIdx != 0)
5345fe6060f1SDimitry Andric     return UnableToLegalize;
5346fe6060f1SDimitry Andric 
5347fe6060f1SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
5348fe6060f1SDimitry Andric   LLT DstType = MRI.getType(DstReg);
5349fe6060f1SDimitry Andric   // FIXME: add support for vector types
5350fe6060f1SDimitry Andric   if (DstType.isVector())
5351fe6060f1SDimitry Andric     return UnableToLegalize;
5352fe6060f1SDimitry Andric 
5353fe6060f1SDimitry Andric   unsigned Opcode = MI.getOpcode();
5354fe6060f1SDimitry Andric   unsigned OpO, OpE, OpF;
5355fe6060f1SDimitry Andric   switch (Opcode) {
5356fe6060f1SDimitry Andric   case TargetOpcode::G_SADDO:
5357fe6060f1SDimitry Andric   case TargetOpcode::G_SADDE:
5358fe6060f1SDimitry Andric   case TargetOpcode::G_UADDO:
5359fe6060f1SDimitry Andric   case TargetOpcode::G_UADDE:
5360fe6060f1SDimitry Andric   case TargetOpcode::G_ADD:
5361fe6060f1SDimitry Andric     OpO = TargetOpcode::G_UADDO;
5362fe6060f1SDimitry Andric     OpE = TargetOpcode::G_UADDE;
5363fe6060f1SDimitry Andric     OpF = TargetOpcode::G_UADDE;
5364fe6060f1SDimitry Andric     if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
5365fe6060f1SDimitry Andric       OpF = TargetOpcode::G_SADDE;
5366fe6060f1SDimitry Andric     break;
5367fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBO:
5368fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBE:
5369fe6060f1SDimitry Andric   case TargetOpcode::G_USUBO:
5370fe6060f1SDimitry Andric   case TargetOpcode::G_USUBE:
5371fe6060f1SDimitry Andric   case TargetOpcode::G_SUB:
5372fe6060f1SDimitry Andric     OpO = TargetOpcode::G_USUBO;
5373fe6060f1SDimitry Andric     OpE = TargetOpcode::G_USUBE;
5374fe6060f1SDimitry Andric     OpF = TargetOpcode::G_USUBE;
5375fe6060f1SDimitry Andric     if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
5376fe6060f1SDimitry Andric       OpF = TargetOpcode::G_SSUBE;
5377fe6060f1SDimitry Andric     break;
5378fe6060f1SDimitry Andric   default:
5379fe6060f1SDimitry Andric     llvm_unreachable("Unexpected add/sub opcode!");
5380fe6060f1SDimitry Andric   }
5381fe6060f1SDimitry Andric 
5382fe6060f1SDimitry Andric   // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
5383fe6060f1SDimitry Andric   unsigned NumDefs = MI.getNumExplicitDefs();
5384fe6060f1SDimitry Andric   Register Src1 = MI.getOperand(NumDefs).getReg();
5385fe6060f1SDimitry Andric   Register Src2 = MI.getOperand(NumDefs + 1).getReg();
5386fe6060f1SDimitry Andric   Register CarryDst, CarryIn;
5387fe6060f1SDimitry Andric   if (NumDefs == 2)
5388fe6060f1SDimitry Andric     CarryDst = MI.getOperand(1).getReg();
5389fe6060f1SDimitry Andric   if (MI.getNumOperands() == NumDefs + 3)
5390fe6060f1SDimitry Andric     CarryIn = MI.getOperand(NumDefs + 2).getReg();
5391fe6060f1SDimitry Andric 
5392fe6060f1SDimitry Andric   LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
5393fe6060f1SDimitry Andric   LLT LeftoverTy, DummyTy;
5394fe6060f1SDimitry Andric   SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
5395fe6060f1SDimitry Andric   extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left);
5396fe6060f1SDimitry Andric   extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left);
5397fe6060f1SDimitry Andric 
5398fe6060f1SDimitry Andric   int NarrowParts = Src1Regs.size();
5399fe6060f1SDimitry Andric   for (int I = 0, E = Src1Left.size(); I != E; ++I) {
5400fe6060f1SDimitry Andric     Src1Regs.push_back(Src1Left[I]);
5401fe6060f1SDimitry Andric     Src2Regs.push_back(Src2Left[I]);
5402fe6060f1SDimitry Andric   }
5403fe6060f1SDimitry Andric   DstRegs.reserve(Src1Regs.size());
5404fe6060f1SDimitry Andric 
5405fe6060f1SDimitry Andric   for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
5406fe6060f1SDimitry Andric     Register DstReg =
5407fe6060f1SDimitry Andric         MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
5408fe6060f1SDimitry Andric     Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
5409fe6060f1SDimitry Andric     // Forward the final carry-out to the destination register
5410fe6060f1SDimitry Andric     if (i == e - 1 && CarryDst)
5411fe6060f1SDimitry Andric       CarryOut = CarryDst;
5412fe6060f1SDimitry Andric 
5413fe6060f1SDimitry Andric     if (!CarryIn) {
5414fe6060f1SDimitry Andric       MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
5415fe6060f1SDimitry Andric                             {Src1Regs[i], Src2Regs[i]});
5416fe6060f1SDimitry Andric     } else if (i == e - 1) {
5417fe6060f1SDimitry Andric       MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
5418fe6060f1SDimitry Andric                             {Src1Regs[i], Src2Regs[i], CarryIn});
5419fe6060f1SDimitry Andric     } else {
5420fe6060f1SDimitry Andric       MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
5421fe6060f1SDimitry Andric                             {Src1Regs[i], Src2Regs[i], CarryIn});
5422fe6060f1SDimitry Andric     }
5423fe6060f1SDimitry Andric 
5424fe6060f1SDimitry Andric     DstRegs.push_back(DstReg);
5425fe6060f1SDimitry Andric     CarryIn = CarryOut;
5426fe6060f1SDimitry Andric   }
5427fe6060f1SDimitry Andric   insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
5428bdd1243dSDimitry Andric               ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
5429bdd1243dSDimitry Andric               ArrayRef(DstRegs).drop_front(NarrowParts));
5430fe6060f1SDimitry Andric 
5431fe6060f1SDimitry Andric   MI.eraseFromParent();
5432fe6060f1SDimitry Andric   return Legalized;
5433fe6060f1SDimitry Andric }
5434fe6060f1SDimitry Andric 
5435fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
54360b57cec5SDimitry Andric LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
543706c3fb27SDimitry Andric   auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
54380b57cec5SDimitry Andric 
54390b57cec5SDimitry Andric   LLT Ty = MRI.getType(DstReg);
54400b57cec5SDimitry Andric   if (Ty.isVector())
54410b57cec5SDimitry Andric     return UnableToLegalize;
54420b57cec5SDimitry Andric 
5443349cc55cSDimitry Andric   unsigned Size = Ty.getSizeInBits();
54440b57cec5SDimitry Andric   unsigned NarrowSize = NarrowTy.getSizeInBits();
5445349cc55cSDimitry Andric   if (Size % NarrowSize != 0)
54460b57cec5SDimitry Andric     return UnableToLegalize;
54470b57cec5SDimitry Andric 
5448349cc55cSDimitry Andric   unsigned NumParts = Size / NarrowSize;
54490b57cec5SDimitry Andric   bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
5450349cc55cSDimitry Andric   unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
54510b57cec5SDimitry Andric 
54525ffd83dbSDimitry Andric   SmallVector<Register, 2> Src1Parts, Src2Parts;
54535ffd83dbSDimitry Andric   SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
5454349cc55cSDimitry Andric   extractParts(Src1, NarrowTy, NumParts, Src1Parts);
5455349cc55cSDimitry Andric   extractParts(Src2, NarrowTy, NumParts, Src2Parts);
54560b57cec5SDimitry Andric   multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
54570b57cec5SDimitry Andric 
54580b57cec5SDimitry Andric   // Take only high half of registers if this is high mul.
5459349cc55cSDimitry Andric   ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
5460bdd1243dSDimitry Andric   MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
54610b57cec5SDimitry Andric   MI.eraseFromParent();
54620b57cec5SDimitry Andric   return Legalized;
54630b57cec5SDimitry Andric }
54640b57cec5SDimitry Andric 
54650b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
546623408297SDimitry Andric LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
546723408297SDimitry Andric                                    LLT NarrowTy) {
546823408297SDimitry Andric   if (TypeIdx != 0)
546923408297SDimitry Andric     return UnableToLegalize;
547023408297SDimitry Andric 
547123408297SDimitry Andric   bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
547223408297SDimitry Andric 
547323408297SDimitry Andric   Register Src = MI.getOperand(1).getReg();
547423408297SDimitry Andric   LLT SrcTy = MRI.getType(Src);
547523408297SDimitry Andric 
547623408297SDimitry Andric   // If all finite floats fit into the narrowed integer type, we can just swap
547723408297SDimitry Andric   // out the result type. This is practically only useful for conversions from
547823408297SDimitry Andric   // half to at least 16-bits, so just handle the one case.
547923408297SDimitry Andric   if (SrcTy.getScalarType() != LLT::scalar(16) ||
5480fe6060f1SDimitry Andric       NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
548123408297SDimitry Andric     return UnableToLegalize;
548223408297SDimitry Andric 
548323408297SDimitry Andric   Observer.changingInstr(MI);
548423408297SDimitry Andric   narrowScalarDst(MI, NarrowTy, 0,
548523408297SDimitry Andric                   IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
548623408297SDimitry Andric   Observer.changedInstr(MI);
548723408297SDimitry Andric   return Legalized;
548823408297SDimitry Andric }
548923408297SDimitry Andric 
549023408297SDimitry Andric LegalizerHelper::LegalizeResult
54910b57cec5SDimitry Andric LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
54920b57cec5SDimitry Andric                                      LLT NarrowTy) {
54930b57cec5SDimitry Andric   if (TypeIdx != 1)
54940b57cec5SDimitry Andric     return UnableToLegalize;
54950b57cec5SDimitry Andric 
54960b57cec5SDimitry Andric   uint64_t NarrowSize = NarrowTy.getSizeInBits();
54970b57cec5SDimitry Andric 
54980b57cec5SDimitry Andric   int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
54990b57cec5SDimitry Andric   // FIXME: add support for when SizeOp1 isn't an exact multiple of
55000b57cec5SDimitry Andric   // NarrowSize.
55010b57cec5SDimitry Andric   if (SizeOp1 % NarrowSize != 0)
55020b57cec5SDimitry Andric     return UnableToLegalize;
55030b57cec5SDimitry Andric   int NumParts = SizeOp1 / NarrowSize;
55040b57cec5SDimitry Andric 
55050b57cec5SDimitry Andric   SmallVector<Register, 2> SrcRegs, DstRegs;
55060b57cec5SDimitry Andric   SmallVector<uint64_t, 2> Indexes;
55070b57cec5SDimitry Andric   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
55080b57cec5SDimitry Andric 
55090b57cec5SDimitry Andric   Register OpReg = MI.getOperand(0).getReg();
55100b57cec5SDimitry Andric   uint64_t OpStart = MI.getOperand(2).getImm();
55110b57cec5SDimitry Andric   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
55120b57cec5SDimitry Andric   for (int i = 0; i < NumParts; ++i) {
55130b57cec5SDimitry Andric     unsigned SrcStart = i * NarrowSize;
55140b57cec5SDimitry Andric 
55150b57cec5SDimitry Andric     if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
55160b57cec5SDimitry Andric       // No part of the extract uses this subregister, ignore it.
55170b57cec5SDimitry Andric       continue;
55180b57cec5SDimitry Andric     } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
55190b57cec5SDimitry Andric       // The entire subregister is extracted, forward the value.
55200b57cec5SDimitry Andric       DstRegs.push_back(SrcRegs[i]);
55210b57cec5SDimitry Andric       continue;
55220b57cec5SDimitry Andric     }
55230b57cec5SDimitry Andric 
55240b57cec5SDimitry Andric     // OpSegStart is where this destination segment would start in OpReg if it
55250b57cec5SDimitry Andric     // extended infinitely in both directions.
55260b57cec5SDimitry Andric     int64_t ExtractOffset;
55270b57cec5SDimitry Andric     uint64_t SegSize;
55280b57cec5SDimitry Andric     if (OpStart < SrcStart) {
55290b57cec5SDimitry Andric       ExtractOffset = 0;
55300b57cec5SDimitry Andric       SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
55310b57cec5SDimitry Andric     } else {
55320b57cec5SDimitry Andric       ExtractOffset = OpStart - SrcStart;
55330b57cec5SDimitry Andric       SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
55340b57cec5SDimitry Andric     }
55350b57cec5SDimitry Andric 
55360b57cec5SDimitry Andric     Register SegReg = SrcRegs[i];
55370b57cec5SDimitry Andric     if (ExtractOffset != 0 || SegSize != NarrowSize) {
55380b57cec5SDimitry Andric       // A genuine extract is needed.
55390b57cec5SDimitry Andric       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
55400b57cec5SDimitry Andric       MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
55410b57cec5SDimitry Andric     }
55420b57cec5SDimitry Andric 
55430b57cec5SDimitry Andric     DstRegs.push_back(SegReg);
55440b57cec5SDimitry Andric   }
55450b57cec5SDimitry Andric 
55460b57cec5SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
55470b57cec5SDimitry Andric   if (MRI.getType(DstReg).isVector())
55480b57cec5SDimitry Andric     MIRBuilder.buildBuildVector(DstReg, DstRegs);
55495ffd83dbSDimitry Andric   else if (DstRegs.size() > 1)
5550bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
55515ffd83dbSDimitry Andric   else
55525ffd83dbSDimitry Andric     MIRBuilder.buildCopy(DstReg, DstRegs[0]);
55530b57cec5SDimitry Andric   MI.eraseFromParent();
55540b57cec5SDimitry Andric   return Legalized;
55550b57cec5SDimitry Andric }
55560b57cec5SDimitry Andric 
55570b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
55580b57cec5SDimitry Andric LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
55590b57cec5SDimitry Andric                                     LLT NarrowTy) {
55600b57cec5SDimitry Andric   // FIXME: Don't know how to handle secondary types yet.
55610b57cec5SDimitry Andric   if (TypeIdx != 0)
55620b57cec5SDimitry Andric     return UnableToLegalize;
55630b57cec5SDimitry Andric 
5564fe6060f1SDimitry Andric   SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
55650b57cec5SDimitry Andric   SmallVector<uint64_t, 2> Indexes;
5566fe6060f1SDimitry Andric   LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
5567fe6060f1SDimitry Andric   LLT LeftoverTy;
5568fe6060f1SDimitry Andric   extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
5569fe6060f1SDimitry Andric                LeftoverRegs);
55700b57cec5SDimitry Andric 
5571fe6060f1SDimitry Andric   for (Register Reg : LeftoverRegs)
5572fe6060f1SDimitry Andric     SrcRegs.push_back(Reg);
5573fe6060f1SDimitry Andric 
5574fe6060f1SDimitry Andric   uint64_t NarrowSize = NarrowTy.getSizeInBits();
55750b57cec5SDimitry Andric   Register OpReg = MI.getOperand(2).getReg();
55760b57cec5SDimitry Andric   uint64_t OpStart = MI.getOperand(3).getImm();
55770b57cec5SDimitry Andric   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
5578fe6060f1SDimitry Andric   for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
5579fe6060f1SDimitry Andric     unsigned DstStart = I * NarrowSize;
55800b57cec5SDimitry Andric 
5581fe6060f1SDimitry Andric     if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
55820b57cec5SDimitry Andric       // The entire subregister is defined by this insert, forward the new
55830b57cec5SDimitry Andric       // value.
55840b57cec5SDimitry Andric       DstRegs.push_back(OpReg);
55850b57cec5SDimitry Andric       continue;
55860b57cec5SDimitry Andric     }
55870b57cec5SDimitry Andric 
5588fe6060f1SDimitry Andric     Register SrcReg = SrcRegs[I];
5589fe6060f1SDimitry Andric     if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
5590fe6060f1SDimitry Andric       // The leftover reg is smaller than NarrowTy, so we need to extend it.
5591fe6060f1SDimitry Andric       SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
5592fe6060f1SDimitry Andric       MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
5593fe6060f1SDimitry Andric     }
5594fe6060f1SDimitry Andric 
5595fe6060f1SDimitry Andric     if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
5596fe6060f1SDimitry Andric       // No part of the insert affects this subregister, forward the original.
5597fe6060f1SDimitry Andric       DstRegs.push_back(SrcReg);
5598fe6060f1SDimitry Andric       continue;
5599fe6060f1SDimitry Andric     }
5600fe6060f1SDimitry Andric 
56010b57cec5SDimitry Andric     // OpSegStart is where this destination segment would start in OpReg if it
56020b57cec5SDimitry Andric     // extended infinitely in both directions.
56030b57cec5SDimitry Andric     int64_t ExtractOffset, InsertOffset;
56040b57cec5SDimitry Andric     uint64_t SegSize;
56050b57cec5SDimitry Andric     if (OpStart < DstStart) {
56060b57cec5SDimitry Andric       InsertOffset = 0;
56070b57cec5SDimitry Andric       ExtractOffset = DstStart - OpStart;
56080b57cec5SDimitry Andric       SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
56090b57cec5SDimitry Andric     } else {
56100b57cec5SDimitry Andric       InsertOffset = OpStart - DstStart;
56110b57cec5SDimitry Andric       ExtractOffset = 0;
56120b57cec5SDimitry Andric       SegSize =
56130b57cec5SDimitry Andric         std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
56140b57cec5SDimitry Andric     }
56150b57cec5SDimitry Andric 
56160b57cec5SDimitry Andric     Register SegReg = OpReg;
56170b57cec5SDimitry Andric     if (ExtractOffset != 0 || SegSize != OpSize) {
56180b57cec5SDimitry Andric       // A genuine extract is needed.
56190b57cec5SDimitry Andric       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
56200b57cec5SDimitry Andric       MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
56210b57cec5SDimitry Andric     }
56220b57cec5SDimitry Andric 
56230b57cec5SDimitry Andric     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
5624fe6060f1SDimitry Andric     MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
56250b57cec5SDimitry Andric     DstRegs.push_back(DstReg);
56260b57cec5SDimitry Andric   }
56270b57cec5SDimitry Andric 
5628fe6060f1SDimitry Andric   uint64_t WideSize = DstRegs.size() * NarrowSize;
56290b57cec5SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
5630fe6060f1SDimitry Andric   if (WideSize > RegTy.getSizeInBits()) {
5631fe6060f1SDimitry Andric     Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
5632bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
5633fe6060f1SDimitry Andric     MIRBuilder.buildTrunc(DstReg, MergeReg);
5634fe6060f1SDimitry Andric   } else
5635bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
5636fe6060f1SDimitry Andric 
56370b57cec5SDimitry Andric   MI.eraseFromParent();
56380b57cec5SDimitry Andric   return Legalized;
56390b57cec5SDimitry Andric }
56400b57cec5SDimitry Andric 
56410b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
56420b57cec5SDimitry Andric LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
56430b57cec5SDimitry Andric                                    LLT NarrowTy) {
56440b57cec5SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
56450b57cec5SDimitry Andric   LLT DstTy = MRI.getType(DstReg);
56460b57cec5SDimitry Andric 
56470b57cec5SDimitry Andric   assert(MI.getNumOperands() == 3 && TypeIdx == 0);
56480b57cec5SDimitry Andric 
56490b57cec5SDimitry Andric   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
56500b57cec5SDimitry Andric   SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
56510b57cec5SDimitry Andric   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
56520b57cec5SDimitry Andric   LLT LeftoverTy;
56530b57cec5SDimitry Andric   if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
56540b57cec5SDimitry Andric                     Src0Regs, Src0LeftoverRegs))
56550b57cec5SDimitry Andric     return UnableToLegalize;
56560b57cec5SDimitry Andric 
56570b57cec5SDimitry Andric   LLT Unused;
56580b57cec5SDimitry Andric   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
56590b57cec5SDimitry Andric                     Src1Regs, Src1LeftoverRegs))
56600b57cec5SDimitry Andric     llvm_unreachable("inconsistent extractParts result");
56610b57cec5SDimitry Andric 
56620b57cec5SDimitry Andric   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
56630b57cec5SDimitry Andric     auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
56640b57cec5SDimitry Andric                                         {Src0Regs[I], Src1Regs[I]});
56655ffd83dbSDimitry Andric     DstRegs.push_back(Inst.getReg(0));
56660b57cec5SDimitry Andric   }
56670b57cec5SDimitry Andric 
56680b57cec5SDimitry Andric   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
56690b57cec5SDimitry Andric     auto Inst = MIRBuilder.buildInstr(
56700b57cec5SDimitry Andric       MI.getOpcode(),
56710b57cec5SDimitry Andric       {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
56725ffd83dbSDimitry Andric     DstLeftoverRegs.push_back(Inst.getReg(0));
56730b57cec5SDimitry Andric   }
56740b57cec5SDimitry Andric 
56750b57cec5SDimitry Andric   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
56760b57cec5SDimitry Andric               LeftoverTy, DstLeftoverRegs);
56770b57cec5SDimitry Andric 
56780b57cec5SDimitry Andric   MI.eraseFromParent();
56790b57cec5SDimitry Andric   return Legalized;
56800b57cec5SDimitry Andric }
56810b57cec5SDimitry Andric 
56820b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
56835ffd83dbSDimitry Andric LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
56845ffd83dbSDimitry Andric                                  LLT NarrowTy) {
56855ffd83dbSDimitry Andric   if (TypeIdx != 0)
56865ffd83dbSDimitry Andric     return UnableToLegalize;
56875ffd83dbSDimitry Andric 
568806c3fb27SDimitry Andric   auto [DstReg, SrcReg] = MI.getFirst2Regs();
56895ffd83dbSDimitry Andric 
56905ffd83dbSDimitry Andric   LLT DstTy = MRI.getType(DstReg);
56915ffd83dbSDimitry Andric   if (DstTy.isVector())
56925ffd83dbSDimitry Andric     return UnableToLegalize;
56935ffd83dbSDimitry Andric 
56945ffd83dbSDimitry Andric   SmallVector<Register, 8> Parts;
56955ffd83dbSDimitry Andric   LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
56965ffd83dbSDimitry Andric   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
56975ffd83dbSDimitry Andric   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
56985ffd83dbSDimitry Andric 
56995ffd83dbSDimitry Andric   MI.eraseFromParent();
57005ffd83dbSDimitry Andric   return Legalized;
57015ffd83dbSDimitry Andric }
57025ffd83dbSDimitry Andric 
57035ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
57040b57cec5SDimitry Andric LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
57050b57cec5SDimitry Andric                                     LLT NarrowTy) {
57060b57cec5SDimitry Andric   if (TypeIdx != 0)
57070b57cec5SDimitry Andric     return UnableToLegalize;
57080b57cec5SDimitry Andric 
57090b57cec5SDimitry Andric   Register CondReg = MI.getOperand(1).getReg();
57100b57cec5SDimitry Andric   LLT CondTy = MRI.getType(CondReg);
57110b57cec5SDimitry Andric   if (CondTy.isVector()) // TODO: Handle vselect
57120b57cec5SDimitry Andric     return UnableToLegalize;
57130b57cec5SDimitry Andric 
57140b57cec5SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
57150b57cec5SDimitry Andric   LLT DstTy = MRI.getType(DstReg);
57160b57cec5SDimitry Andric 
57170b57cec5SDimitry Andric   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
57180b57cec5SDimitry Andric   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
57190b57cec5SDimitry Andric   SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
57200b57cec5SDimitry Andric   LLT LeftoverTy;
57210b57cec5SDimitry Andric   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
57220b57cec5SDimitry Andric                     Src1Regs, Src1LeftoverRegs))
57230b57cec5SDimitry Andric     return UnableToLegalize;
57240b57cec5SDimitry Andric 
57250b57cec5SDimitry Andric   LLT Unused;
57260b57cec5SDimitry Andric   if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
57270b57cec5SDimitry Andric                     Src2Regs, Src2LeftoverRegs))
57280b57cec5SDimitry Andric     llvm_unreachable("inconsistent extractParts result");
57290b57cec5SDimitry Andric 
57300b57cec5SDimitry Andric   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
57310b57cec5SDimitry Andric     auto Select = MIRBuilder.buildSelect(NarrowTy,
57320b57cec5SDimitry Andric                                          CondReg, Src1Regs[I], Src2Regs[I]);
57335ffd83dbSDimitry Andric     DstRegs.push_back(Select.getReg(0));
57340b57cec5SDimitry Andric   }
57350b57cec5SDimitry Andric 
57360b57cec5SDimitry Andric   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
57370b57cec5SDimitry Andric     auto Select = MIRBuilder.buildSelect(
57380b57cec5SDimitry Andric       LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
57395ffd83dbSDimitry Andric     DstLeftoverRegs.push_back(Select.getReg(0));
57400b57cec5SDimitry Andric   }
57410b57cec5SDimitry Andric 
57420b57cec5SDimitry Andric   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
57430b57cec5SDimitry Andric               LeftoverTy, DstLeftoverRegs);
57440b57cec5SDimitry Andric 
57450b57cec5SDimitry Andric   MI.eraseFromParent();
57460b57cec5SDimitry Andric   return Legalized;
57470b57cec5SDimitry Andric }
57480b57cec5SDimitry Andric 
57490b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
57505ffd83dbSDimitry Andric LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
57515ffd83dbSDimitry Andric                                   LLT NarrowTy) {
57525ffd83dbSDimitry Andric   if (TypeIdx != 1)
57535ffd83dbSDimitry Andric     return UnableToLegalize;
57545ffd83dbSDimitry Andric 
575506c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
57565ffd83dbSDimitry Andric   unsigned NarrowSize = NarrowTy.getSizeInBits();
57575ffd83dbSDimitry Andric 
57585ffd83dbSDimitry Andric   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
57595ffd83dbSDimitry Andric     const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
57605ffd83dbSDimitry Andric 
57615ffd83dbSDimitry Andric     MachineIRBuilder &B = MIRBuilder;
57625ffd83dbSDimitry Andric     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
57635ffd83dbSDimitry Andric     // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
57645ffd83dbSDimitry Andric     auto C_0 = B.buildConstant(NarrowTy, 0);
57655ffd83dbSDimitry Andric     auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
57665ffd83dbSDimitry Andric                                 UnmergeSrc.getReg(1), C_0);
57675ffd83dbSDimitry Andric     auto LoCTLZ = IsUndef ?
57685ffd83dbSDimitry Andric       B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
57695ffd83dbSDimitry Andric       B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
57705ffd83dbSDimitry Andric     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
57715ffd83dbSDimitry Andric     auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
57725ffd83dbSDimitry Andric     auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
57735ffd83dbSDimitry Andric     B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
57745ffd83dbSDimitry Andric 
57755ffd83dbSDimitry Andric     MI.eraseFromParent();
57765ffd83dbSDimitry Andric     return Legalized;
57775ffd83dbSDimitry Andric   }
57785ffd83dbSDimitry Andric 
57795ffd83dbSDimitry Andric   return UnableToLegalize;
57805ffd83dbSDimitry Andric }
57815ffd83dbSDimitry Andric 
57825ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
57835ffd83dbSDimitry Andric LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
57845ffd83dbSDimitry Andric                                   LLT NarrowTy) {
57855ffd83dbSDimitry Andric   if (TypeIdx != 1)
57865ffd83dbSDimitry Andric     return UnableToLegalize;
57875ffd83dbSDimitry Andric 
578806c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
57895ffd83dbSDimitry Andric   unsigned NarrowSize = NarrowTy.getSizeInBits();
57905ffd83dbSDimitry Andric 
57915ffd83dbSDimitry Andric   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
57925ffd83dbSDimitry Andric     const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
57935ffd83dbSDimitry Andric 
57945ffd83dbSDimitry Andric     MachineIRBuilder &B = MIRBuilder;
57955ffd83dbSDimitry Andric     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
57965ffd83dbSDimitry Andric     // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
57975ffd83dbSDimitry Andric     auto C_0 = B.buildConstant(NarrowTy, 0);
57985ffd83dbSDimitry Andric     auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
57995ffd83dbSDimitry Andric                                 UnmergeSrc.getReg(0), C_0);
58005ffd83dbSDimitry Andric     auto HiCTTZ = IsUndef ?
58015ffd83dbSDimitry Andric       B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
58025ffd83dbSDimitry Andric       B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
58035ffd83dbSDimitry Andric     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
58045ffd83dbSDimitry Andric     auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
58055ffd83dbSDimitry Andric     auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
58065ffd83dbSDimitry Andric     B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
58075ffd83dbSDimitry Andric 
58085ffd83dbSDimitry Andric     MI.eraseFromParent();
58095ffd83dbSDimitry Andric     return Legalized;
58105ffd83dbSDimitry Andric   }
58115ffd83dbSDimitry Andric 
58125ffd83dbSDimitry Andric   return UnableToLegalize;
58135ffd83dbSDimitry Andric }
58145ffd83dbSDimitry Andric 
58155ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
58165ffd83dbSDimitry Andric LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
58175ffd83dbSDimitry Andric                                    LLT NarrowTy) {
58185ffd83dbSDimitry Andric   if (TypeIdx != 1)
58195ffd83dbSDimitry Andric     return UnableToLegalize;
58205ffd83dbSDimitry Andric 
582106c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
58225ffd83dbSDimitry Andric   unsigned NarrowSize = NarrowTy.getSizeInBits();
58235ffd83dbSDimitry Andric 
58245ffd83dbSDimitry Andric   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
58255ffd83dbSDimitry Andric     auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
58265ffd83dbSDimitry Andric 
58275ffd83dbSDimitry Andric     auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
58285ffd83dbSDimitry Andric     auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
58295ffd83dbSDimitry Andric     MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
58305ffd83dbSDimitry Andric 
58315ffd83dbSDimitry Andric     MI.eraseFromParent();
58325ffd83dbSDimitry Andric     return Legalized;
58335ffd83dbSDimitry Andric   }
58345ffd83dbSDimitry Andric 
58355ffd83dbSDimitry Andric   return UnableToLegalize;
58365ffd83dbSDimitry Andric }
58375ffd83dbSDimitry Andric 
58385ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
583906c3fb27SDimitry Andric LegalizerHelper::narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx,
584006c3fb27SDimitry Andric                                     LLT NarrowTy) {
584106c3fb27SDimitry Andric   if (TypeIdx != 1)
584206c3fb27SDimitry Andric     return UnableToLegalize;
584306c3fb27SDimitry Andric 
584406c3fb27SDimitry Andric   MachineIRBuilder &B = MIRBuilder;
584506c3fb27SDimitry Andric   Register ExpReg = MI.getOperand(2).getReg();
584606c3fb27SDimitry Andric   LLT ExpTy = MRI.getType(ExpReg);
584706c3fb27SDimitry Andric 
584806c3fb27SDimitry Andric   unsigned ClampSize = NarrowTy.getScalarSizeInBits();
584906c3fb27SDimitry Andric 
585006c3fb27SDimitry Andric   // Clamp the exponent to the range of the target type.
585106c3fb27SDimitry Andric   auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
585206c3fb27SDimitry Andric   auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
585306c3fb27SDimitry Andric   auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
585406c3fb27SDimitry Andric   auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
585506c3fb27SDimitry Andric 
585606c3fb27SDimitry Andric   auto Trunc = B.buildTrunc(NarrowTy, Clamp);
585706c3fb27SDimitry Andric   Observer.changingInstr(MI);
585806c3fb27SDimitry Andric   MI.getOperand(2).setReg(Trunc.getReg(0));
585906c3fb27SDimitry Andric   Observer.changedInstr(MI);
586006c3fb27SDimitry Andric   return Legalized;
586106c3fb27SDimitry Andric }
586206c3fb27SDimitry Andric 
586306c3fb27SDimitry Andric LegalizerHelper::LegalizeResult
5864e8d8bef9SDimitry Andric LegalizerHelper::lowerBitCount(MachineInstr &MI) {
58650b57cec5SDimitry Andric   unsigned Opc = MI.getOpcode();
5866e8d8bef9SDimitry Andric   const auto &TII = MIRBuilder.getTII();
58670b57cec5SDimitry Andric   auto isSupported = [this](const LegalityQuery &Q) {
58680b57cec5SDimitry Andric     auto QAction = LI.getAction(Q).Action;
58690b57cec5SDimitry Andric     return QAction == Legal || QAction == Libcall || QAction == Custom;
58700b57cec5SDimitry Andric   };
58710b57cec5SDimitry Andric   switch (Opc) {
58720b57cec5SDimitry Andric   default:
58730b57cec5SDimitry Andric     return UnableToLegalize;
58740b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
58750b57cec5SDimitry Andric     // This trivially expands to CTLZ.
58760b57cec5SDimitry Andric     Observer.changingInstr(MI);
58770b57cec5SDimitry Andric     MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
58780b57cec5SDimitry Andric     Observer.changedInstr(MI);
58790b57cec5SDimitry Andric     return Legalized;
58800b57cec5SDimitry Andric   }
58810b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ: {
588206c3fb27SDimitry Andric     auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
58835ffd83dbSDimitry Andric     unsigned Len = SrcTy.getSizeInBits();
58845ffd83dbSDimitry Andric 
58855ffd83dbSDimitry Andric     if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
58860b57cec5SDimitry Andric       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
58875ffd83dbSDimitry Andric       auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
58885ffd83dbSDimitry Andric       auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
58895ffd83dbSDimitry Andric       auto ICmp = MIRBuilder.buildICmp(
58905ffd83dbSDimitry Andric           CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
58915ffd83dbSDimitry Andric       auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
58925ffd83dbSDimitry Andric       MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
58930b57cec5SDimitry Andric       MI.eraseFromParent();
58940b57cec5SDimitry Andric       return Legalized;
58950b57cec5SDimitry Andric     }
58960b57cec5SDimitry Andric     // for now, we do this:
58970b57cec5SDimitry Andric     // NewLen = NextPowerOf2(Len);
58980b57cec5SDimitry Andric     // x = x | (x >> 1);
58990b57cec5SDimitry Andric     // x = x | (x >> 2);
59000b57cec5SDimitry Andric     // ...
59010b57cec5SDimitry Andric     // x = x | (x >>16);
59020b57cec5SDimitry Andric     // x = x | (x >>32); // for 64-bit input
59030b57cec5SDimitry Andric     // Upto NewLen/2
59040b57cec5SDimitry Andric     // return Len - popcount(x);
59050b57cec5SDimitry Andric     //
59060b57cec5SDimitry Andric     // Ref: "Hacker's Delight" by Henry Warren
59070b57cec5SDimitry Andric     Register Op = SrcReg;
59080b57cec5SDimitry Andric     unsigned NewLen = PowerOf2Ceil(Len);
59090b57cec5SDimitry Andric     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
59105ffd83dbSDimitry Andric       auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
59115ffd83dbSDimitry Andric       auto MIBOp = MIRBuilder.buildOr(
59125ffd83dbSDimitry Andric           SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
59135ffd83dbSDimitry Andric       Op = MIBOp.getReg(0);
59140b57cec5SDimitry Andric     }
59155ffd83dbSDimitry Andric     auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
59165ffd83dbSDimitry Andric     MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
59175ffd83dbSDimitry Andric                         MIBPop);
59180b57cec5SDimitry Andric     MI.eraseFromParent();
59190b57cec5SDimitry Andric     return Legalized;
59200b57cec5SDimitry Andric   }
59210b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
59220b57cec5SDimitry Andric     // This trivially expands to CTTZ.
59230b57cec5SDimitry Andric     Observer.changingInstr(MI);
59240b57cec5SDimitry Andric     MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
59250b57cec5SDimitry Andric     Observer.changedInstr(MI);
59260b57cec5SDimitry Andric     return Legalized;
59270b57cec5SDimitry Andric   }
59280b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ: {
592906c3fb27SDimitry Andric     auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
59305ffd83dbSDimitry Andric 
59315ffd83dbSDimitry Andric     unsigned Len = SrcTy.getSizeInBits();
59325ffd83dbSDimitry Andric     if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
59330b57cec5SDimitry Andric       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
59340b57cec5SDimitry Andric       // zero.
59355ffd83dbSDimitry Andric       auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
59365ffd83dbSDimitry Andric       auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
59375ffd83dbSDimitry Andric       auto ICmp = MIRBuilder.buildICmp(
59385ffd83dbSDimitry Andric           CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
59395ffd83dbSDimitry Andric       auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
59405ffd83dbSDimitry Andric       MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
59410b57cec5SDimitry Andric       MI.eraseFromParent();
59420b57cec5SDimitry Andric       return Legalized;
59430b57cec5SDimitry Andric     }
59440b57cec5SDimitry Andric     // for now, we use: { return popcount(~x & (x - 1)); }
59450b57cec5SDimitry Andric     // unless the target has ctlz but not ctpop, in which case we use:
59460b57cec5SDimitry Andric     // { return 32 - nlz(~x & (x-1)); }
59470b57cec5SDimitry Andric     // Ref: "Hacker's Delight" by Henry Warren
5948e8d8bef9SDimitry Andric     auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
5949e8d8bef9SDimitry Andric     auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
59505ffd83dbSDimitry Andric     auto MIBTmp = MIRBuilder.buildAnd(
5951e8d8bef9SDimitry Andric         SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
5952e8d8bef9SDimitry Andric     if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
5953e8d8bef9SDimitry Andric         isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
5954e8d8bef9SDimitry Andric       auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
59555ffd83dbSDimitry Andric       MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
5956e8d8bef9SDimitry Andric                           MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
59570b57cec5SDimitry Andric       MI.eraseFromParent();
59580b57cec5SDimitry Andric       return Legalized;
59590b57cec5SDimitry Andric     }
5960*5f757f3fSDimitry Andric     Observer.changingInstr(MI);
59610b57cec5SDimitry Andric     MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
59625ffd83dbSDimitry Andric     MI.getOperand(1).setReg(MIBTmp.getReg(0));
5963*5f757f3fSDimitry Andric     Observer.changedInstr(MI);
59645ffd83dbSDimitry Andric     return Legalized;
59655ffd83dbSDimitry Andric   }
59665ffd83dbSDimitry Andric   case TargetOpcode::G_CTPOP: {
5967e8d8bef9SDimitry Andric     Register SrcReg = MI.getOperand(1).getReg();
5968e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(SrcReg);
59695ffd83dbSDimitry Andric     unsigned Size = Ty.getSizeInBits();
59705ffd83dbSDimitry Andric     MachineIRBuilder &B = MIRBuilder;
59715ffd83dbSDimitry Andric 
59725ffd83dbSDimitry Andric     // Count set bits in blocks of 2 bits. Default approach would be
59735ffd83dbSDimitry Andric     // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
59745ffd83dbSDimitry Andric     // We use following formula instead:
59755ffd83dbSDimitry Andric     // B2Count = val - { (val >> 1) & 0x55555555 }
59765ffd83dbSDimitry Andric     // since it gives same result in blocks of 2 with one instruction less.
59775ffd83dbSDimitry Andric     auto C_1 = B.buildConstant(Ty, 1);
5978e8d8bef9SDimitry Andric     auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
59795ffd83dbSDimitry Andric     APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
59805ffd83dbSDimitry Andric     auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
59815ffd83dbSDimitry Andric     auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
5982e8d8bef9SDimitry Andric     auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
59835ffd83dbSDimitry Andric 
59845ffd83dbSDimitry Andric     // In order to get count in blocks of 4 add values from adjacent block of 2.
59855ffd83dbSDimitry Andric     // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
59865ffd83dbSDimitry Andric     auto C_2 = B.buildConstant(Ty, 2);
59875ffd83dbSDimitry Andric     auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
59885ffd83dbSDimitry Andric     APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
59895ffd83dbSDimitry Andric     auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
59905ffd83dbSDimitry Andric     auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
59915ffd83dbSDimitry Andric     auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
59925ffd83dbSDimitry Andric     auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
59935ffd83dbSDimitry Andric 
59945ffd83dbSDimitry Andric     // For count in blocks of 8 bits we don't have to mask high 4 bits before
59955ffd83dbSDimitry Andric     // addition since count value sits in range {0,...,8} and 4 bits are enough
59965ffd83dbSDimitry Andric     // to hold such binary values. After addition high 4 bits still hold count
59975ffd83dbSDimitry Andric     // of set bits in high 4 bit block, set them to zero and get 8 bit result.
59985ffd83dbSDimitry Andric     // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
59995ffd83dbSDimitry Andric     auto C_4 = B.buildConstant(Ty, 4);
60005ffd83dbSDimitry Andric     auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
60015ffd83dbSDimitry Andric     auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
60025ffd83dbSDimitry Andric     APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
60035ffd83dbSDimitry Andric     auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
60045ffd83dbSDimitry Andric     auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
60055ffd83dbSDimitry Andric 
60065ffd83dbSDimitry Andric     assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
60075ffd83dbSDimitry Andric     // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
60085ffd83dbSDimitry Andric     // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
60095ffd83dbSDimitry Andric     auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
60105ffd83dbSDimitry Andric     auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
60115ffd83dbSDimitry Andric 
60125ffd83dbSDimitry Andric     // Shift count result from 8 high bits to low bits.
60135ffd83dbSDimitry Andric     auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
60145ffd83dbSDimitry Andric     B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
60155ffd83dbSDimitry Andric 
60165ffd83dbSDimitry Andric     MI.eraseFromParent();
60170b57cec5SDimitry Andric     return Legalized;
60180b57cec5SDimitry Andric   }
60190b57cec5SDimitry Andric   }
60200b57cec5SDimitry Andric }
60210b57cec5SDimitry Andric 
6022fe6060f1SDimitry Andric // Check that (every element of) Reg is undef or not an exact multiple of BW.
6023fe6060f1SDimitry Andric static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
6024fe6060f1SDimitry Andric                                         Register Reg, unsigned BW) {
6025fe6060f1SDimitry Andric   return matchUnaryPredicate(
6026fe6060f1SDimitry Andric       MRI, Reg,
6027fe6060f1SDimitry Andric       [=](const Constant *C) {
6028fe6060f1SDimitry Andric         // Null constant here means an undef.
6029fe6060f1SDimitry Andric         const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
6030fe6060f1SDimitry Andric         return !CI || CI->getValue().urem(BW) != 0;
6031fe6060f1SDimitry Andric       },
6032fe6060f1SDimitry Andric       /*AllowUndefs*/ true);
6033fe6060f1SDimitry Andric }
6034fe6060f1SDimitry Andric 
6035fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
6036fe6060f1SDimitry Andric LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
603706c3fb27SDimitry Andric   auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6038fe6060f1SDimitry Andric   LLT Ty = MRI.getType(Dst);
6039fe6060f1SDimitry Andric   LLT ShTy = MRI.getType(Z);
6040fe6060f1SDimitry Andric 
6041fe6060f1SDimitry Andric   unsigned BW = Ty.getScalarSizeInBits();
6042fe6060f1SDimitry Andric 
6043fe6060f1SDimitry Andric   if (!isPowerOf2_32(BW))
6044fe6060f1SDimitry Andric     return UnableToLegalize;
6045fe6060f1SDimitry Andric 
6046fe6060f1SDimitry Andric   const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6047fe6060f1SDimitry Andric   unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6048fe6060f1SDimitry Andric 
6049fe6060f1SDimitry Andric   if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6050fe6060f1SDimitry Andric     // fshl X, Y, Z -> fshr X, Y, -Z
6051fe6060f1SDimitry Andric     // fshr X, Y, Z -> fshl X, Y, -Z
6052fe6060f1SDimitry Andric     auto Zero = MIRBuilder.buildConstant(ShTy, 0);
6053fe6060f1SDimitry Andric     Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
6054fe6060f1SDimitry Andric   } else {
6055fe6060f1SDimitry Andric     // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
6056fe6060f1SDimitry Andric     // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
6057fe6060f1SDimitry Andric     auto One = MIRBuilder.buildConstant(ShTy, 1);
6058fe6060f1SDimitry Andric     if (IsFSHL) {
6059fe6060f1SDimitry Andric       Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6060fe6060f1SDimitry Andric       X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
6061fe6060f1SDimitry Andric     } else {
6062fe6060f1SDimitry Andric       X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6063fe6060f1SDimitry Andric       Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
6064fe6060f1SDimitry Andric     }
6065fe6060f1SDimitry Andric 
6066fe6060f1SDimitry Andric     Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
6067fe6060f1SDimitry Andric   }
6068fe6060f1SDimitry Andric 
6069fe6060f1SDimitry Andric   MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
6070fe6060f1SDimitry Andric   MI.eraseFromParent();
6071fe6060f1SDimitry Andric   return Legalized;
6072fe6060f1SDimitry Andric }
6073fe6060f1SDimitry Andric 
6074fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
6075fe6060f1SDimitry Andric LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
607606c3fb27SDimitry Andric   auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6077fe6060f1SDimitry Andric   LLT Ty = MRI.getType(Dst);
6078fe6060f1SDimitry Andric   LLT ShTy = MRI.getType(Z);
6079fe6060f1SDimitry Andric 
6080fe6060f1SDimitry Andric   const unsigned BW = Ty.getScalarSizeInBits();
6081fe6060f1SDimitry Andric   const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6082fe6060f1SDimitry Andric 
6083fe6060f1SDimitry Andric   Register ShX, ShY;
6084fe6060f1SDimitry Andric   Register ShAmt, InvShAmt;
6085fe6060f1SDimitry Andric 
6086fe6060f1SDimitry Andric   // FIXME: Emit optimized urem by constant instead of letting it expand later.
6087fe6060f1SDimitry Andric   if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6088fe6060f1SDimitry Andric     // fshl: X << C | Y >> (BW - C)
6089fe6060f1SDimitry Andric     // fshr: X << (BW - C) | Y >> C
6090fe6060f1SDimitry Andric     // where C = Z % BW is not zero
6091fe6060f1SDimitry Andric     auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6092fe6060f1SDimitry Andric     ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6093fe6060f1SDimitry Andric     InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
6094fe6060f1SDimitry Andric     ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
6095fe6060f1SDimitry Andric     ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
6096fe6060f1SDimitry Andric   } else {
6097fe6060f1SDimitry Andric     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
6098fe6060f1SDimitry Andric     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
6099fe6060f1SDimitry Andric     auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
6100fe6060f1SDimitry Andric     if (isPowerOf2_32(BW)) {
6101fe6060f1SDimitry Andric       // Z % BW -> Z & (BW - 1)
6102fe6060f1SDimitry Andric       ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
6103fe6060f1SDimitry Andric       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
6104fe6060f1SDimitry Andric       auto NotZ = MIRBuilder.buildNot(ShTy, Z);
6105fe6060f1SDimitry Andric       InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
6106fe6060f1SDimitry Andric     } else {
6107fe6060f1SDimitry Andric       auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6108fe6060f1SDimitry Andric       ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6109fe6060f1SDimitry Andric       InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
6110fe6060f1SDimitry Andric     }
6111fe6060f1SDimitry Andric 
6112fe6060f1SDimitry Andric     auto One = MIRBuilder.buildConstant(ShTy, 1);
6113fe6060f1SDimitry Andric     if (IsFSHL) {
6114fe6060f1SDimitry Andric       ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
6115fe6060f1SDimitry Andric       auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
6116fe6060f1SDimitry Andric       ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
6117fe6060f1SDimitry Andric     } else {
6118fe6060f1SDimitry Andric       auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
6119fe6060f1SDimitry Andric       ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
6120fe6060f1SDimitry Andric       ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
6121fe6060f1SDimitry Andric     }
6122fe6060f1SDimitry Andric   }
6123fe6060f1SDimitry Andric 
6124fe6060f1SDimitry Andric   MIRBuilder.buildOr(Dst, ShX, ShY);
6125fe6060f1SDimitry Andric   MI.eraseFromParent();
6126fe6060f1SDimitry Andric   return Legalized;
6127fe6060f1SDimitry Andric }
6128fe6060f1SDimitry Andric 
6129fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
6130fe6060f1SDimitry Andric LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
6131fe6060f1SDimitry Andric   // These operations approximately do the following (while avoiding undefined
6132fe6060f1SDimitry Andric   // shifts by BW):
6133fe6060f1SDimitry Andric   // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
6134fe6060f1SDimitry Andric   // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
6135fe6060f1SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
6136fe6060f1SDimitry Andric   LLT Ty = MRI.getType(Dst);
6137fe6060f1SDimitry Andric   LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
6138fe6060f1SDimitry Andric 
6139fe6060f1SDimitry Andric   bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6140fe6060f1SDimitry Andric   unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6141fe6060f1SDimitry Andric 
6142fe6060f1SDimitry Andric   // TODO: Use smarter heuristic that accounts for vector legalization.
6143fe6060f1SDimitry Andric   if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
6144fe6060f1SDimitry Andric     return lowerFunnelShiftAsShifts(MI);
6145fe6060f1SDimitry Andric 
6146fe6060f1SDimitry Andric   // This only works for powers of 2, fallback to shifts if it fails.
6147fe6060f1SDimitry Andric   LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
6148fe6060f1SDimitry Andric   if (Result == UnableToLegalize)
6149fe6060f1SDimitry Andric     return lowerFunnelShiftAsShifts(MI);
6150fe6060f1SDimitry Andric   return Result;
6151fe6060f1SDimitry Andric }
6152fe6060f1SDimitry Andric 
6153*5f757f3fSDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerEXT(MachineInstr &MI) {
6154*5f757f3fSDimitry Andric   auto [Dst, Src] = MI.getFirst2Regs();
6155*5f757f3fSDimitry Andric   LLT DstTy = MRI.getType(Dst);
6156*5f757f3fSDimitry Andric   LLT SrcTy = MRI.getType(Src);
6157*5f757f3fSDimitry Andric 
6158*5f757f3fSDimitry Andric   uint32_t DstTySize = DstTy.getSizeInBits();
6159*5f757f3fSDimitry Andric   uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
6160*5f757f3fSDimitry Andric   uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
6161*5f757f3fSDimitry Andric 
6162*5f757f3fSDimitry Andric   if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
6163*5f757f3fSDimitry Andric       !isPowerOf2_32(SrcTyScalarSize))
6164*5f757f3fSDimitry Andric     return UnableToLegalize;
6165*5f757f3fSDimitry Andric 
6166*5f757f3fSDimitry Andric   // The step between extend is too large, split it by creating an intermediate
6167*5f757f3fSDimitry Andric   // extend instruction
6168*5f757f3fSDimitry Andric   if (SrcTyScalarSize * 2 < DstTyScalarSize) {
6169*5f757f3fSDimitry Andric     LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
6170*5f757f3fSDimitry Andric     // If the destination type is illegal, split it into multiple statements
6171*5f757f3fSDimitry Andric     // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
6172*5f757f3fSDimitry Andric     auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
6173*5f757f3fSDimitry Andric     // Unmerge the vector
6174*5f757f3fSDimitry Andric     LLT EltTy = MidTy.changeElementCount(
6175*5f757f3fSDimitry Andric         MidTy.getElementCount().divideCoefficientBy(2));
6176*5f757f3fSDimitry Andric     auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
6177*5f757f3fSDimitry Andric 
6178*5f757f3fSDimitry Andric     // ZExt the vectors
6179*5f757f3fSDimitry Andric     LLT ZExtResTy = DstTy.changeElementCount(
6180*5f757f3fSDimitry Andric         DstTy.getElementCount().divideCoefficientBy(2));
6181*5f757f3fSDimitry Andric     auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
6182*5f757f3fSDimitry Andric                                           {UnmergeSrc.getReg(0)});
6183*5f757f3fSDimitry Andric     auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
6184*5f757f3fSDimitry Andric                                           {UnmergeSrc.getReg(1)});
6185*5f757f3fSDimitry Andric 
6186*5f757f3fSDimitry Andric     // Merge the ending vectors
6187*5f757f3fSDimitry Andric     MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
6188*5f757f3fSDimitry Andric 
6189*5f757f3fSDimitry Andric     MI.eraseFromParent();
6190*5f757f3fSDimitry Andric     return Legalized;
6191*5f757f3fSDimitry Andric   }
6192*5f757f3fSDimitry Andric   return UnableToLegalize;
6193*5f757f3fSDimitry Andric }
6194*5f757f3fSDimitry Andric 
6195*5f757f3fSDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerTRUNC(MachineInstr &MI) {
6196*5f757f3fSDimitry Andric   // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
6197*5f757f3fSDimitry Andric   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
6198*5f757f3fSDimitry Andric   // Similar to how operand splitting is done in SelectiondDAG, we can handle
6199*5f757f3fSDimitry Andric   // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
6200*5f757f3fSDimitry Andric   //   %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
6201*5f757f3fSDimitry Andric   //   %lo16(<4 x s16>) = G_TRUNC %inlo
6202*5f757f3fSDimitry Andric   //   %hi16(<4 x s16>) = G_TRUNC %inhi
6203*5f757f3fSDimitry Andric   //   %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
6204*5f757f3fSDimitry Andric   //   %res(<8 x s8>) = G_TRUNC %in16
6205*5f757f3fSDimitry Andric 
6206*5f757f3fSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
6207*5f757f3fSDimitry Andric 
6208*5f757f3fSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
6209*5f757f3fSDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
6210*5f757f3fSDimitry Andric   LLT DstTy = MRI.getType(DstReg);
6211*5f757f3fSDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
6212*5f757f3fSDimitry Andric 
6213*5f757f3fSDimitry Andric   if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
6214*5f757f3fSDimitry Andric       isPowerOf2_32(DstTy.getScalarSizeInBits()) &&
6215*5f757f3fSDimitry Andric       isPowerOf2_32(SrcTy.getNumElements()) &&
6216*5f757f3fSDimitry Andric       isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
6217*5f757f3fSDimitry Andric     // Split input type.
6218*5f757f3fSDimitry Andric     LLT SplitSrcTy = SrcTy.changeElementCount(
6219*5f757f3fSDimitry Andric         SrcTy.getElementCount().divideCoefficientBy(2));
6220*5f757f3fSDimitry Andric 
6221*5f757f3fSDimitry Andric     // First, split the source into two smaller vectors.
6222*5f757f3fSDimitry Andric     SmallVector<Register, 2> SplitSrcs;
6223*5f757f3fSDimitry Andric     extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs);
6224*5f757f3fSDimitry Andric 
6225*5f757f3fSDimitry Andric     // Truncate the splits into intermediate narrower elements.
6226*5f757f3fSDimitry Andric     LLT InterTy;
6227*5f757f3fSDimitry Andric     if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6228*5f757f3fSDimitry Andric       InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
6229*5f757f3fSDimitry Andric     else
6230*5f757f3fSDimitry Andric       InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
6231*5f757f3fSDimitry Andric     for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
6232*5f757f3fSDimitry Andric       SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
6233*5f757f3fSDimitry Andric     }
6234*5f757f3fSDimitry Andric 
6235*5f757f3fSDimitry Andric     // Combine the new truncates into one vector
6236*5f757f3fSDimitry Andric     auto Merge = MIRBuilder.buildMergeLikeInstr(
6237*5f757f3fSDimitry Andric         DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
6238*5f757f3fSDimitry Andric 
6239*5f757f3fSDimitry Andric     // Truncate the new vector to the final result type
6240*5f757f3fSDimitry Andric     if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6241*5f757f3fSDimitry Andric       MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
6242*5f757f3fSDimitry Andric     else
6243*5f757f3fSDimitry Andric       MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
6244*5f757f3fSDimitry Andric 
6245*5f757f3fSDimitry Andric     MI.eraseFromParent();
6246*5f757f3fSDimitry Andric 
6247*5f757f3fSDimitry Andric     return Legalized;
6248*5f757f3fSDimitry Andric   }
6249*5f757f3fSDimitry Andric   return UnableToLegalize;
6250*5f757f3fSDimitry Andric }
6251*5f757f3fSDimitry Andric 
6252fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
6253fe6060f1SDimitry Andric LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
625406c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6255fe6060f1SDimitry Andric   auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6256fe6060f1SDimitry Andric   bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6257fe6060f1SDimitry Andric   unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6258fe6060f1SDimitry Andric   auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6259fe6060f1SDimitry Andric   MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
6260fe6060f1SDimitry Andric   MI.eraseFromParent();
6261fe6060f1SDimitry Andric   return Legalized;
6262fe6060f1SDimitry Andric }
6263fe6060f1SDimitry Andric 
6264fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
626506c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6266fe6060f1SDimitry Andric 
6267fe6060f1SDimitry Andric   unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
6268fe6060f1SDimitry Andric   bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6269fe6060f1SDimitry Andric 
6270fe6060f1SDimitry Andric   MIRBuilder.setInstrAndDebugLoc(MI);
6271fe6060f1SDimitry Andric 
6272fe6060f1SDimitry Andric   // If a rotate in the other direction is supported, use it.
6273fe6060f1SDimitry Andric   unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6274fe6060f1SDimitry Andric   if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
6275fe6060f1SDimitry Andric       isPowerOf2_32(EltSizeInBits))
6276fe6060f1SDimitry Andric     return lowerRotateWithReverseRotate(MI);
6277fe6060f1SDimitry Andric 
6278349cc55cSDimitry Andric   // If a funnel shift is supported, use it.
6279349cc55cSDimitry Andric   unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6280349cc55cSDimitry Andric   unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6281349cc55cSDimitry Andric   bool IsFShLegal = false;
6282349cc55cSDimitry Andric   if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
6283349cc55cSDimitry Andric       LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
6284349cc55cSDimitry Andric     auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
6285349cc55cSDimitry Andric                                 Register R3) {
6286349cc55cSDimitry Andric       MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
6287349cc55cSDimitry Andric       MI.eraseFromParent();
6288349cc55cSDimitry Andric       return Legalized;
6289349cc55cSDimitry Andric     };
6290349cc55cSDimitry Andric     // If a funnel shift in the other direction is supported, use it.
6291349cc55cSDimitry Andric     if (IsFShLegal) {
6292349cc55cSDimitry Andric       return buildFunnelShift(FShOpc, Dst, Src, Amt);
6293349cc55cSDimitry Andric     } else if (isPowerOf2_32(EltSizeInBits)) {
6294349cc55cSDimitry Andric       Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
6295349cc55cSDimitry Andric       return buildFunnelShift(RevFsh, Dst, Src, Amt);
6296349cc55cSDimitry Andric     }
6297349cc55cSDimitry Andric   }
6298349cc55cSDimitry Andric 
6299fe6060f1SDimitry Andric   auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6300fe6060f1SDimitry Andric   unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
6301fe6060f1SDimitry Andric   unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
6302fe6060f1SDimitry Andric   auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
6303fe6060f1SDimitry Andric   Register ShVal;
6304fe6060f1SDimitry Andric   Register RevShiftVal;
6305fe6060f1SDimitry Andric   if (isPowerOf2_32(EltSizeInBits)) {
6306fe6060f1SDimitry Andric     // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
6307fe6060f1SDimitry Andric     // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
6308fe6060f1SDimitry Andric     auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6309fe6060f1SDimitry Andric     auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
6310fe6060f1SDimitry Andric     ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
6311fe6060f1SDimitry Andric     auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
6312fe6060f1SDimitry Andric     RevShiftVal =
6313fe6060f1SDimitry Andric         MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
6314fe6060f1SDimitry Andric   } else {
6315fe6060f1SDimitry Andric     // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
6316fe6060f1SDimitry Andric     // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
6317fe6060f1SDimitry Andric     auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
6318fe6060f1SDimitry Andric     auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
6319fe6060f1SDimitry Andric     ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
6320fe6060f1SDimitry Andric     auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
6321fe6060f1SDimitry Andric     auto One = MIRBuilder.buildConstant(AmtTy, 1);
6322fe6060f1SDimitry Andric     auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
6323fe6060f1SDimitry Andric     RevShiftVal =
6324fe6060f1SDimitry Andric         MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
6325fe6060f1SDimitry Andric   }
6326fe6060f1SDimitry Andric   MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
6327fe6060f1SDimitry Andric   MI.eraseFromParent();
6328fe6060f1SDimitry Andric   return Legalized;
6329fe6060f1SDimitry Andric }
6330fe6060f1SDimitry Andric 
63310b57cec5SDimitry Andric // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
63320b57cec5SDimitry Andric // representation.
63330b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
63340b57cec5SDimitry Andric LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
633506c3fb27SDimitry Andric   auto [Dst, Src] = MI.getFirst2Regs();
63360b57cec5SDimitry Andric   const LLT S64 = LLT::scalar(64);
63370b57cec5SDimitry Andric   const LLT S32 = LLT::scalar(32);
63380b57cec5SDimitry Andric   const LLT S1 = LLT::scalar(1);
63390b57cec5SDimitry Andric 
63400b57cec5SDimitry Andric   assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
63410b57cec5SDimitry Andric 
63420b57cec5SDimitry Andric   // unsigned cul2f(ulong u) {
63430b57cec5SDimitry Andric   //   uint lz = clz(u);
63440b57cec5SDimitry Andric   //   uint e = (u != 0) ? 127U + 63U - lz : 0;
63450b57cec5SDimitry Andric   //   u = (u << lz) & 0x7fffffffffffffffUL;
63460b57cec5SDimitry Andric   //   ulong t = u & 0xffffffffffUL;
63470b57cec5SDimitry Andric   //   uint v = (e << 23) | (uint)(u >> 40);
63480b57cec5SDimitry Andric   //   uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
63490b57cec5SDimitry Andric   //   return as_float(v + r);
63500b57cec5SDimitry Andric   // }
63510b57cec5SDimitry Andric 
63520b57cec5SDimitry Andric   auto Zero32 = MIRBuilder.buildConstant(S32, 0);
63530b57cec5SDimitry Andric   auto Zero64 = MIRBuilder.buildConstant(S64, 0);
63540b57cec5SDimitry Andric 
63550b57cec5SDimitry Andric   auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
63560b57cec5SDimitry Andric 
63570b57cec5SDimitry Andric   auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
63580b57cec5SDimitry Andric   auto Sub = MIRBuilder.buildSub(S32, K, LZ);
63590b57cec5SDimitry Andric 
63600b57cec5SDimitry Andric   auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
63610b57cec5SDimitry Andric   auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
63620b57cec5SDimitry Andric 
63630b57cec5SDimitry Andric   auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
63640b57cec5SDimitry Andric   auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
63650b57cec5SDimitry Andric 
63660b57cec5SDimitry Andric   auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
63670b57cec5SDimitry Andric 
63680b57cec5SDimitry Andric   auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
63690b57cec5SDimitry Andric   auto T = MIRBuilder.buildAnd(S64, U, Mask1);
63700b57cec5SDimitry Andric 
63710b57cec5SDimitry Andric   auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
63720b57cec5SDimitry Andric   auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
63730b57cec5SDimitry Andric   auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
63740b57cec5SDimitry Andric 
63750b57cec5SDimitry Andric   auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
63760b57cec5SDimitry Andric   auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
63770b57cec5SDimitry Andric   auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
63780b57cec5SDimitry Andric   auto One = MIRBuilder.buildConstant(S32, 1);
63790b57cec5SDimitry Andric 
63800b57cec5SDimitry Andric   auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
63810b57cec5SDimitry Andric   auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
63820b57cec5SDimitry Andric   auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
63830b57cec5SDimitry Andric   MIRBuilder.buildAdd(Dst, V, R);
63840b57cec5SDimitry Andric 
63855ffd83dbSDimitry Andric   MI.eraseFromParent();
63860b57cec5SDimitry Andric   return Legalized;
63870b57cec5SDimitry Andric }
63880b57cec5SDimitry Andric 
6389e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
639006c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
63910b57cec5SDimitry Andric 
6392480093f4SDimitry Andric   if (SrcTy == LLT::scalar(1)) {
6393480093f4SDimitry Andric     auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
6394480093f4SDimitry Andric     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6395480093f4SDimitry Andric     MIRBuilder.buildSelect(Dst, Src, True, False);
6396480093f4SDimitry Andric     MI.eraseFromParent();
6397480093f4SDimitry Andric     return Legalized;
6398480093f4SDimitry Andric   }
6399480093f4SDimitry Andric 
64000b57cec5SDimitry Andric   if (SrcTy != LLT::scalar(64))
64010b57cec5SDimitry Andric     return UnableToLegalize;
64020b57cec5SDimitry Andric 
64030b57cec5SDimitry Andric   if (DstTy == LLT::scalar(32)) {
64040b57cec5SDimitry Andric     // TODO: SelectionDAG has several alternative expansions to port which may
64050b57cec5SDimitry Andric     // be more reasonble depending on the available instructions. If a target
64060b57cec5SDimitry Andric     // has sitofp, does not have CTLZ, or can efficiently use f64 as an
64070b57cec5SDimitry Andric     // intermediate type, this is probably worse.
64080b57cec5SDimitry Andric     return lowerU64ToF32BitOps(MI);
64090b57cec5SDimitry Andric   }
64100b57cec5SDimitry Andric 
64110b57cec5SDimitry Andric   return UnableToLegalize;
64120b57cec5SDimitry Andric }
64130b57cec5SDimitry Andric 
6414e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
641506c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
64160b57cec5SDimitry Andric 
64170b57cec5SDimitry Andric   const LLT S64 = LLT::scalar(64);
64180b57cec5SDimitry Andric   const LLT S32 = LLT::scalar(32);
64190b57cec5SDimitry Andric   const LLT S1 = LLT::scalar(1);
64200b57cec5SDimitry Andric 
6421480093f4SDimitry Andric   if (SrcTy == S1) {
6422480093f4SDimitry Andric     auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
6423480093f4SDimitry Andric     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6424480093f4SDimitry Andric     MIRBuilder.buildSelect(Dst, Src, True, False);
6425480093f4SDimitry Andric     MI.eraseFromParent();
6426480093f4SDimitry Andric     return Legalized;
6427480093f4SDimitry Andric   }
6428480093f4SDimitry Andric 
64290b57cec5SDimitry Andric   if (SrcTy != S64)
64300b57cec5SDimitry Andric     return UnableToLegalize;
64310b57cec5SDimitry Andric 
64320b57cec5SDimitry Andric   if (DstTy == S32) {
64330b57cec5SDimitry Andric     // signed cl2f(long l) {
64340b57cec5SDimitry Andric     //   long s = l >> 63;
64350b57cec5SDimitry Andric     //   float r = cul2f((l + s) ^ s);
64360b57cec5SDimitry Andric     //   return s ? -r : r;
64370b57cec5SDimitry Andric     // }
64380b57cec5SDimitry Andric     Register L = Src;
64390b57cec5SDimitry Andric     auto SignBit = MIRBuilder.buildConstant(S64, 63);
64400b57cec5SDimitry Andric     auto S = MIRBuilder.buildAShr(S64, L, SignBit);
64410b57cec5SDimitry Andric 
64420b57cec5SDimitry Andric     auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
64430b57cec5SDimitry Andric     auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
64440b57cec5SDimitry Andric     auto R = MIRBuilder.buildUITOFP(S32, Xor);
64450b57cec5SDimitry Andric 
64460b57cec5SDimitry Andric     auto RNeg = MIRBuilder.buildFNeg(S32, R);
64470b57cec5SDimitry Andric     auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
64480b57cec5SDimitry Andric                                             MIRBuilder.buildConstant(S64, 0));
64490b57cec5SDimitry Andric     MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
64505ffd83dbSDimitry Andric     MI.eraseFromParent();
64510b57cec5SDimitry Andric     return Legalized;
64520b57cec5SDimitry Andric   }
64530b57cec5SDimitry Andric 
64540b57cec5SDimitry Andric   return UnableToLegalize;
64550b57cec5SDimitry Andric }
64560b57cec5SDimitry Andric 
6457e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
645806c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
64598bcb0991SDimitry Andric   const LLT S64 = LLT::scalar(64);
64608bcb0991SDimitry Andric   const LLT S32 = LLT::scalar(32);
64618bcb0991SDimitry Andric 
64628bcb0991SDimitry Andric   if (SrcTy != S64 && SrcTy != S32)
64638bcb0991SDimitry Andric     return UnableToLegalize;
64648bcb0991SDimitry Andric   if (DstTy != S32 && DstTy != S64)
64658bcb0991SDimitry Andric     return UnableToLegalize;
64668bcb0991SDimitry Andric 
64678bcb0991SDimitry Andric   // FPTOSI gives same result as FPTOUI for positive signed integers.
64688bcb0991SDimitry Andric   // FPTOUI needs to deal with fp values that convert to unsigned integers
64698bcb0991SDimitry Andric   // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
64708bcb0991SDimitry Andric 
64718bcb0991SDimitry Andric   APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
64728bcb0991SDimitry Andric   APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
64738bcb0991SDimitry Andric                                                 : APFloat::IEEEdouble(),
6474349cc55cSDimitry Andric                     APInt::getZero(SrcTy.getSizeInBits()));
64758bcb0991SDimitry Andric   TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
64768bcb0991SDimitry Andric 
64778bcb0991SDimitry Andric   MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
64788bcb0991SDimitry Andric 
64798bcb0991SDimitry Andric   MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
64808bcb0991SDimitry Andric   // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
64818bcb0991SDimitry Andric   // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
64828bcb0991SDimitry Andric   MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
64838bcb0991SDimitry Andric   MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
64848bcb0991SDimitry Andric   MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
64858bcb0991SDimitry Andric   MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
64868bcb0991SDimitry Andric 
6487480093f4SDimitry Andric   const LLT S1 = LLT::scalar(1);
6488480093f4SDimitry Andric 
64898bcb0991SDimitry Andric   MachineInstrBuilder FCMP =
6490480093f4SDimitry Andric       MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
64918bcb0991SDimitry Andric   MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
64928bcb0991SDimitry Andric 
64938bcb0991SDimitry Andric   MI.eraseFromParent();
64948bcb0991SDimitry Andric   return Legalized;
64958bcb0991SDimitry Andric }
64968bcb0991SDimitry Andric 
64975ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
649806c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
64995ffd83dbSDimitry Andric   const LLT S64 = LLT::scalar(64);
65005ffd83dbSDimitry Andric   const LLT S32 = LLT::scalar(32);
65015ffd83dbSDimitry Andric 
65025ffd83dbSDimitry Andric   // FIXME: Only f32 to i64 conversions are supported.
65035ffd83dbSDimitry Andric   if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
65045ffd83dbSDimitry Andric     return UnableToLegalize;
65055ffd83dbSDimitry Andric 
65065ffd83dbSDimitry Andric   // Expand f32 -> i64 conversion
65075ffd83dbSDimitry Andric   // This algorithm comes from compiler-rt's implementation of fixsfdi:
6508fe6060f1SDimitry Andric   // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
65095ffd83dbSDimitry Andric 
65105ffd83dbSDimitry Andric   unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
65115ffd83dbSDimitry Andric 
65125ffd83dbSDimitry Andric   auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
65135ffd83dbSDimitry Andric   auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
65145ffd83dbSDimitry Andric 
65155ffd83dbSDimitry Andric   auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
65165ffd83dbSDimitry Andric   auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
65175ffd83dbSDimitry Andric 
65185ffd83dbSDimitry Andric   auto SignMask = MIRBuilder.buildConstant(SrcTy,
65195ffd83dbSDimitry Andric                                            APInt::getSignMask(SrcEltBits));
65205ffd83dbSDimitry Andric   auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
65215ffd83dbSDimitry Andric   auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
65225ffd83dbSDimitry Andric   auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
65235ffd83dbSDimitry Andric   Sign = MIRBuilder.buildSExt(DstTy, Sign);
65245ffd83dbSDimitry Andric 
65255ffd83dbSDimitry Andric   auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
65265ffd83dbSDimitry Andric   auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
65275ffd83dbSDimitry Andric   auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
65285ffd83dbSDimitry Andric 
65295ffd83dbSDimitry Andric   auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
65305ffd83dbSDimitry Andric   R = MIRBuilder.buildZExt(DstTy, R);
65315ffd83dbSDimitry Andric 
65325ffd83dbSDimitry Andric   auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
65335ffd83dbSDimitry Andric   auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
65345ffd83dbSDimitry Andric   auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
65355ffd83dbSDimitry Andric   auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
65365ffd83dbSDimitry Andric 
65375ffd83dbSDimitry Andric   auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
65385ffd83dbSDimitry Andric   auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
65395ffd83dbSDimitry Andric 
65405ffd83dbSDimitry Andric   const LLT S1 = LLT::scalar(1);
65415ffd83dbSDimitry Andric   auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
65425ffd83dbSDimitry Andric                                     S1, Exponent, ExponentLoBit);
65435ffd83dbSDimitry Andric 
65445ffd83dbSDimitry Andric   R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
65455ffd83dbSDimitry Andric 
65465ffd83dbSDimitry Andric   auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
65475ffd83dbSDimitry Andric   auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
65485ffd83dbSDimitry Andric 
65495ffd83dbSDimitry Andric   auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
65505ffd83dbSDimitry Andric 
65515ffd83dbSDimitry Andric   auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
65525ffd83dbSDimitry Andric                                           S1, Exponent, ZeroSrcTy);
65535ffd83dbSDimitry Andric 
65545ffd83dbSDimitry Andric   auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
65555ffd83dbSDimitry Andric   MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
65565ffd83dbSDimitry Andric 
65575ffd83dbSDimitry Andric   MI.eraseFromParent();
65585ffd83dbSDimitry Andric   return Legalized;
65595ffd83dbSDimitry Andric }
65605ffd83dbSDimitry Andric 
65615ffd83dbSDimitry Andric // f64 -> f16 conversion using round-to-nearest-even rounding mode.
65625ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
65635ffd83dbSDimitry Andric LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
656406c3fb27SDimitry Andric   const LLT S1 = LLT::scalar(1);
656506c3fb27SDimitry Andric   const LLT S32 = LLT::scalar(32);
656606c3fb27SDimitry Andric 
656706c3fb27SDimitry Andric   auto [Dst, Src] = MI.getFirst2Regs();
656806c3fb27SDimitry Andric   assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
656906c3fb27SDimitry Andric          MRI.getType(Src).getScalarType() == LLT::scalar(64));
65705ffd83dbSDimitry Andric 
65715ffd83dbSDimitry Andric   if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
65725ffd83dbSDimitry Andric     return UnableToLegalize;
65735ffd83dbSDimitry Andric 
657406c3fb27SDimitry Andric   if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) {
657506c3fb27SDimitry Andric     unsigned Flags = MI.getFlags();
657606c3fb27SDimitry Andric     auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
657706c3fb27SDimitry Andric     MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
657806c3fb27SDimitry Andric     MI.eraseFromParent();
657906c3fb27SDimitry Andric     return Legalized;
658006c3fb27SDimitry Andric   }
658106c3fb27SDimitry Andric 
65825ffd83dbSDimitry Andric   const unsigned ExpMask = 0x7ff;
65835ffd83dbSDimitry Andric   const unsigned ExpBiasf64 = 1023;
65845ffd83dbSDimitry Andric   const unsigned ExpBiasf16 = 15;
65855ffd83dbSDimitry Andric 
65865ffd83dbSDimitry Andric   auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
65875ffd83dbSDimitry Andric   Register U = Unmerge.getReg(0);
65885ffd83dbSDimitry Andric   Register UH = Unmerge.getReg(1);
65895ffd83dbSDimitry Andric 
65905ffd83dbSDimitry Andric   auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
65915ffd83dbSDimitry Andric   E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
65925ffd83dbSDimitry Andric 
65935ffd83dbSDimitry Andric   // Subtract the fp64 exponent bias (1023) to get the real exponent and
65945ffd83dbSDimitry Andric   // add the f16 bias (15) to get the biased exponent for the f16 format.
65955ffd83dbSDimitry Andric   E = MIRBuilder.buildAdd(
65965ffd83dbSDimitry Andric     S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
65975ffd83dbSDimitry Andric 
65985ffd83dbSDimitry Andric   auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
65995ffd83dbSDimitry Andric   M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
66005ffd83dbSDimitry Andric 
66015ffd83dbSDimitry Andric   auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
66025ffd83dbSDimitry Andric                                        MIRBuilder.buildConstant(S32, 0x1ff));
66035ffd83dbSDimitry Andric   MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
66045ffd83dbSDimitry Andric 
66055ffd83dbSDimitry Andric   auto Zero = MIRBuilder.buildConstant(S32, 0);
66065ffd83dbSDimitry Andric   auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
66075ffd83dbSDimitry Andric   auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
66085ffd83dbSDimitry Andric   M = MIRBuilder.buildOr(S32, M, Lo40Set);
66095ffd83dbSDimitry Andric 
66105ffd83dbSDimitry Andric   // (M != 0 ? 0x0200 : 0) | 0x7c00;
66115ffd83dbSDimitry Andric   auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
66125ffd83dbSDimitry Andric   auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
66135ffd83dbSDimitry Andric   auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
66145ffd83dbSDimitry Andric 
66155ffd83dbSDimitry Andric   auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
66165ffd83dbSDimitry Andric   auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
66175ffd83dbSDimitry Andric 
66185ffd83dbSDimitry Andric   // N = M | (E << 12);
66195ffd83dbSDimitry Andric   auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
66205ffd83dbSDimitry Andric   auto N = MIRBuilder.buildOr(S32, M, EShl12);
66215ffd83dbSDimitry Andric 
66225ffd83dbSDimitry Andric   // B = clamp(1-E, 0, 13);
66235ffd83dbSDimitry Andric   auto One = MIRBuilder.buildConstant(S32, 1);
66245ffd83dbSDimitry Andric   auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
66255ffd83dbSDimitry Andric   auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
66265ffd83dbSDimitry Andric   B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
66275ffd83dbSDimitry Andric 
66285ffd83dbSDimitry Andric   auto SigSetHigh = MIRBuilder.buildOr(S32, M,
66295ffd83dbSDimitry Andric                                        MIRBuilder.buildConstant(S32, 0x1000));
66305ffd83dbSDimitry Andric 
66315ffd83dbSDimitry Andric   auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
66325ffd83dbSDimitry Andric   auto D0 = MIRBuilder.buildShl(S32, D, B);
66335ffd83dbSDimitry Andric 
66345ffd83dbSDimitry Andric   auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
66355ffd83dbSDimitry Andric                                              D0, SigSetHigh);
66365ffd83dbSDimitry Andric   auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
66375ffd83dbSDimitry Andric   D = MIRBuilder.buildOr(S32, D, D1);
66385ffd83dbSDimitry Andric 
66395ffd83dbSDimitry Andric   auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
66405ffd83dbSDimitry Andric   auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
66415ffd83dbSDimitry Andric 
66425ffd83dbSDimitry Andric   auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
66435ffd83dbSDimitry Andric   V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
66445ffd83dbSDimitry Andric 
66455ffd83dbSDimitry Andric   auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
66465ffd83dbSDimitry Andric                                        MIRBuilder.buildConstant(S32, 3));
66475ffd83dbSDimitry Andric   auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
66485ffd83dbSDimitry Andric 
66495ffd83dbSDimitry Andric   auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
66505ffd83dbSDimitry Andric                                        MIRBuilder.buildConstant(S32, 5));
66515ffd83dbSDimitry Andric   auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
66525ffd83dbSDimitry Andric 
66535ffd83dbSDimitry Andric   V1 = MIRBuilder.buildOr(S32, V0, V1);
66545ffd83dbSDimitry Andric   V = MIRBuilder.buildAdd(S32, V, V1);
66555ffd83dbSDimitry Andric 
66565ffd83dbSDimitry Andric   auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,  S1,
66575ffd83dbSDimitry Andric                                        E, MIRBuilder.buildConstant(S32, 30));
66585ffd83dbSDimitry Andric   V = MIRBuilder.buildSelect(S32, CmpEGt30,
66595ffd83dbSDimitry Andric                              MIRBuilder.buildConstant(S32, 0x7c00), V);
66605ffd83dbSDimitry Andric 
66615ffd83dbSDimitry Andric   auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
66625ffd83dbSDimitry Andric                                          E, MIRBuilder.buildConstant(S32, 1039));
66635ffd83dbSDimitry Andric   V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
66645ffd83dbSDimitry Andric 
66655ffd83dbSDimitry Andric   // Extract the sign bit.
66665ffd83dbSDimitry Andric   auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
66675ffd83dbSDimitry Andric   Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
66685ffd83dbSDimitry Andric 
66695ffd83dbSDimitry Andric   // Insert the sign bit
66705ffd83dbSDimitry Andric   V = MIRBuilder.buildOr(S32, Sign, V);
66715ffd83dbSDimitry Andric 
66725ffd83dbSDimitry Andric   MIRBuilder.buildTrunc(Dst, V);
66735ffd83dbSDimitry Andric   MI.eraseFromParent();
66745ffd83dbSDimitry Andric   return Legalized;
66755ffd83dbSDimitry Andric }
66765ffd83dbSDimitry Andric 
66775ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
6678e8d8bef9SDimitry Andric LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
667906c3fb27SDimitry Andric   auto [DstTy, SrcTy] = MI.getFirst2LLTs();
66805ffd83dbSDimitry Andric   const LLT S64 = LLT::scalar(64);
66815ffd83dbSDimitry Andric   const LLT S16 = LLT::scalar(16);
66825ffd83dbSDimitry Andric 
66835ffd83dbSDimitry Andric   if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
66845ffd83dbSDimitry Andric     return lowerFPTRUNC_F64_TO_F16(MI);
66855ffd83dbSDimitry Andric 
66865ffd83dbSDimitry Andric   return UnableToLegalize;
66875ffd83dbSDimitry Andric }
66885ffd83dbSDimitry Andric 
6689e8d8bef9SDimitry Andric // TODO: If RHS is a constant SelectionDAGBuilder expands this into a
6690e8d8bef9SDimitry Andric // multiplication tree.
6691e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
669206c3fb27SDimitry Andric   auto [Dst, Src0, Src1] = MI.getFirst3Regs();
6693e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(Dst);
6694e8d8bef9SDimitry Andric 
6695e8d8bef9SDimitry Andric   auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
6696e8d8bef9SDimitry Andric   MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
6697e8d8bef9SDimitry Andric   MI.eraseFromParent();
6698e8d8bef9SDimitry Andric   return Legalized;
6699e8d8bef9SDimitry Andric }
6700e8d8bef9SDimitry Andric 
67010b57cec5SDimitry Andric static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
67020b57cec5SDimitry Andric   switch (Opc) {
67030b57cec5SDimitry Andric   case TargetOpcode::G_SMIN:
67040b57cec5SDimitry Andric     return CmpInst::ICMP_SLT;
67050b57cec5SDimitry Andric   case TargetOpcode::G_SMAX:
67060b57cec5SDimitry Andric     return CmpInst::ICMP_SGT;
67070b57cec5SDimitry Andric   case TargetOpcode::G_UMIN:
67080b57cec5SDimitry Andric     return CmpInst::ICMP_ULT;
67090b57cec5SDimitry Andric   case TargetOpcode::G_UMAX:
67100b57cec5SDimitry Andric     return CmpInst::ICMP_UGT;
67110b57cec5SDimitry Andric   default:
67120b57cec5SDimitry Andric     llvm_unreachable("not in integer min/max");
67130b57cec5SDimitry Andric   }
67140b57cec5SDimitry Andric }
67150b57cec5SDimitry Andric 
6716e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
671706c3fb27SDimitry Andric   auto [Dst, Src0, Src1] = MI.getFirst3Regs();
67180b57cec5SDimitry Andric 
67190b57cec5SDimitry Andric   const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
67200b57cec5SDimitry Andric   LLT CmpType = MRI.getType(Dst).changeElementSize(1);
67210b57cec5SDimitry Andric 
67220b57cec5SDimitry Andric   auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
67230b57cec5SDimitry Andric   MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
67240b57cec5SDimitry Andric 
67250b57cec5SDimitry Andric   MI.eraseFromParent();
67260b57cec5SDimitry Andric   return Legalized;
67270b57cec5SDimitry Andric }
67280b57cec5SDimitry Andric 
67290b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
6730e8d8bef9SDimitry Andric LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
673106c3fb27SDimitry Andric   auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
67320b57cec5SDimitry Andric   const int Src0Size = Src0Ty.getScalarSizeInBits();
67330b57cec5SDimitry Andric   const int Src1Size = Src1Ty.getScalarSizeInBits();
67340b57cec5SDimitry Andric 
67350b57cec5SDimitry Andric   auto SignBitMask = MIRBuilder.buildConstant(
67360b57cec5SDimitry Andric     Src0Ty, APInt::getSignMask(Src0Size));
67370b57cec5SDimitry Andric 
67380b57cec5SDimitry Andric   auto NotSignBitMask = MIRBuilder.buildConstant(
67390b57cec5SDimitry Andric     Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
67400b57cec5SDimitry Andric 
6741fe6060f1SDimitry Andric   Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
6742fe6060f1SDimitry Andric   Register And1;
67430b57cec5SDimitry Andric   if (Src0Ty == Src1Ty) {
6744fe6060f1SDimitry Andric     And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
67450b57cec5SDimitry Andric   } else if (Src0Size > Src1Size) {
67460b57cec5SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
67470b57cec5SDimitry Andric     auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
67480b57cec5SDimitry Andric     auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
6749fe6060f1SDimitry Andric     And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
67500b57cec5SDimitry Andric   } else {
67510b57cec5SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
67520b57cec5SDimitry Andric     auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
67530b57cec5SDimitry Andric     auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
6754fe6060f1SDimitry Andric     And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
67550b57cec5SDimitry Andric   }
67560b57cec5SDimitry Andric 
67570b57cec5SDimitry Andric   // Be careful about setting nsz/nnan/ninf on every instruction, since the
67580b57cec5SDimitry Andric   // constants are a nan and -0.0, but the final result should preserve
67590b57cec5SDimitry Andric   // everything.
6760fe6060f1SDimitry Andric   unsigned Flags = MI.getFlags();
6761fe6060f1SDimitry Andric   MIRBuilder.buildOr(Dst, And0, And1, Flags);
67620b57cec5SDimitry Andric 
67630b57cec5SDimitry Andric   MI.eraseFromParent();
67640b57cec5SDimitry Andric   return Legalized;
67650b57cec5SDimitry Andric }
67660b57cec5SDimitry Andric 
67670b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
67680b57cec5SDimitry Andric LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
67690b57cec5SDimitry Andric   unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
67700b57cec5SDimitry Andric     TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
67710b57cec5SDimitry Andric 
677206c3fb27SDimitry Andric   auto [Dst, Src0, Src1] = MI.getFirst3Regs();
67730b57cec5SDimitry Andric   LLT Ty = MRI.getType(Dst);
67740b57cec5SDimitry Andric 
67750b57cec5SDimitry Andric   if (!MI.getFlag(MachineInstr::FmNoNans)) {
67760b57cec5SDimitry Andric     // Insert canonicalizes if it's possible we need to quiet to get correct
67770b57cec5SDimitry Andric     // sNaN behavior.
67780b57cec5SDimitry Andric 
67790b57cec5SDimitry Andric     // Note this must be done here, and not as an optimization combine in the
67800b57cec5SDimitry Andric     // absence of a dedicate quiet-snan instruction as we're using an
67810b57cec5SDimitry Andric     // omni-purpose G_FCANONICALIZE.
67820b57cec5SDimitry Andric     if (!isKnownNeverSNaN(Src0, MRI))
67830b57cec5SDimitry Andric       Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
67840b57cec5SDimitry Andric 
67850b57cec5SDimitry Andric     if (!isKnownNeverSNaN(Src1, MRI))
67860b57cec5SDimitry Andric       Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
67870b57cec5SDimitry Andric   }
67880b57cec5SDimitry Andric 
67890b57cec5SDimitry Andric   // If there are no nans, it's safe to simply replace this with the non-IEEE
67900b57cec5SDimitry Andric   // version.
67910b57cec5SDimitry Andric   MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
67920b57cec5SDimitry Andric   MI.eraseFromParent();
67930b57cec5SDimitry Andric   return Legalized;
67940b57cec5SDimitry Andric }
67958bcb0991SDimitry Andric 
67968bcb0991SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
67978bcb0991SDimitry Andric   // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
67988bcb0991SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
67998bcb0991SDimitry Andric   LLT Ty = MRI.getType(DstReg);
68008bcb0991SDimitry Andric   unsigned Flags = MI.getFlags();
68018bcb0991SDimitry Andric 
68028bcb0991SDimitry Andric   auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
68038bcb0991SDimitry Andric                                   Flags);
68048bcb0991SDimitry Andric   MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
68058bcb0991SDimitry Andric   MI.eraseFromParent();
68068bcb0991SDimitry Andric   return Legalized;
68078bcb0991SDimitry Andric }
68088bcb0991SDimitry Andric 
68098bcb0991SDimitry Andric LegalizerHelper::LegalizeResult
6810480093f4SDimitry Andric LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
681106c3fb27SDimitry Andric   auto [DstReg, X] = MI.getFirst2Regs();
68125ffd83dbSDimitry Andric   const unsigned Flags = MI.getFlags();
68135ffd83dbSDimitry Andric   const LLT Ty = MRI.getType(DstReg);
68145ffd83dbSDimitry Andric   const LLT CondTy = Ty.changeElementSize(1);
68155ffd83dbSDimitry Andric 
68165ffd83dbSDimitry Andric   // round(x) =>
68175ffd83dbSDimitry Andric   //  t = trunc(x);
68185ffd83dbSDimitry Andric   //  d = fabs(x - t);
6819*5f757f3fSDimitry Andric   //  o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
6820*5f757f3fSDimitry Andric   //  return t + o;
68215ffd83dbSDimitry Andric 
68225ffd83dbSDimitry Andric   auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
68235ffd83dbSDimitry Andric 
68245ffd83dbSDimitry Andric   auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
68255ffd83dbSDimitry Andric   auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
6826*5f757f3fSDimitry Andric 
68275ffd83dbSDimitry Andric   auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
6828*5f757f3fSDimitry Andric   auto Cmp =
6829*5f757f3fSDimitry Andric       MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
68305ffd83dbSDimitry Andric 
6831*5f757f3fSDimitry Andric   // Could emit G_UITOFP instead
6832*5f757f3fSDimitry Andric   auto One = MIRBuilder.buildFConstant(Ty, 1.0);
6833*5f757f3fSDimitry Andric   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
6834*5f757f3fSDimitry Andric   auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
6835*5f757f3fSDimitry Andric   auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
68365ffd83dbSDimitry Andric 
6837*5f757f3fSDimitry Andric   MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
68385ffd83dbSDimitry Andric 
68395ffd83dbSDimitry Andric   MI.eraseFromParent();
68405ffd83dbSDimitry Andric   return Legalized;
68415ffd83dbSDimitry Andric }
68425ffd83dbSDimitry Andric 
684306c3fb27SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerFFloor(MachineInstr &MI) {
684406c3fb27SDimitry Andric   auto [DstReg, SrcReg] = MI.getFirst2Regs();
6845480093f4SDimitry Andric   unsigned Flags = MI.getFlags();
6846480093f4SDimitry Andric   LLT Ty = MRI.getType(DstReg);
6847480093f4SDimitry Andric   const LLT CondTy = Ty.changeElementSize(1);
6848480093f4SDimitry Andric 
6849480093f4SDimitry Andric   // result = trunc(src);
6850480093f4SDimitry Andric   // if (src < 0.0 && src != result)
6851480093f4SDimitry Andric   //   result += -1.0.
6852480093f4SDimitry Andric 
6853480093f4SDimitry Andric   auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
68545ffd83dbSDimitry Andric   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
6855480093f4SDimitry Andric 
6856480093f4SDimitry Andric   auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
6857480093f4SDimitry Andric                                   SrcReg, Zero, Flags);
6858480093f4SDimitry Andric   auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
6859480093f4SDimitry Andric                                       SrcReg, Trunc, Flags);
6860480093f4SDimitry Andric   auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
6861480093f4SDimitry Andric   auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
6862480093f4SDimitry Andric 
68635ffd83dbSDimitry Andric   MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
68645ffd83dbSDimitry Andric   MI.eraseFromParent();
68655ffd83dbSDimitry Andric   return Legalized;
68665ffd83dbSDimitry Andric }
68675ffd83dbSDimitry Andric 
68685ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
68695ffd83dbSDimitry Andric LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
68705ffd83dbSDimitry Andric   const unsigned NumOps = MI.getNumOperands();
687106c3fb27SDimitry Andric   auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
687206c3fb27SDimitry Andric   unsigned PartSize = Src0Ty.getSizeInBits();
68735ffd83dbSDimitry Andric 
68745ffd83dbSDimitry Andric   LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
68755ffd83dbSDimitry Andric   Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
68765ffd83dbSDimitry Andric 
68775ffd83dbSDimitry Andric   for (unsigned I = 2; I != NumOps; ++I) {
68785ffd83dbSDimitry Andric     const unsigned Offset = (I - 1) * PartSize;
68795ffd83dbSDimitry Andric 
68805ffd83dbSDimitry Andric     Register SrcReg = MI.getOperand(I).getReg();
68815ffd83dbSDimitry Andric     auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
68825ffd83dbSDimitry Andric 
68835ffd83dbSDimitry Andric     Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
68845ffd83dbSDimitry Andric       MRI.createGenericVirtualRegister(WideTy);
68855ffd83dbSDimitry Andric 
68865ffd83dbSDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
68875ffd83dbSDimitry Andric     auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
68885ffd83dbSDimitry Andric     MIRBuilder.buildOr(NextResult, ResultReg, Shl);
68895ffd83dbSDimitry Andric     ResultReg = NextResult;
68905ffd83dbSDimitry Andric   }
68915ffd83dbSDimitry Andric 
68925ffd83dbSDimitry Andric   if (DstTy.isPointer()) {
68935ffd83dbSDimitry Andric     if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
68945ffd83dbSDimitry Andric           DstTy.getAddressSpace())) {
68955ffd83dbSDimitry Andric       LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
68965ffd83dbSDimitry Andric       return UnableToLegalize;
68975ffd83dbSDimitry Andric     }
68985ffd83dbSDimitry Andric 
68995ffd83dbSDimitry Andric     MIRBuilder.buildIntToPtr(DstReg, ResultReg);
69005ffd83dbSDimitry Andric   }
69015ffd83dbSDimitry Andric 
6902480093f4SDimitry Andric   MI.eraseFromParent();
6903480093f4SDimitry Andric   return Legalized;
6904480093f4SDimitry Andric }
6905480093f4SDimitry Andric 
6906480093f4SDimitry Andric LegalizerHelper::LegalizeResult
69078bcb0991SDimitry Andric LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
69088bcb0991SDimitry Andric   const unsigned NumDst = MI.getNumOperands() - 1;
69095ffd83dbSDimitry Andric   Register SrcReg = MI.getOperand(NumDst).getReg();
69108bcb0991SDimitry Andric   Register Dst0Reg = MI.getOperand(0).getReg();
69118bcb0991SDimitry Andric   LLT DstTy = MRI.getType(Dst0Reg);
69125ffd83dbSDimitry Andric   if (DstTy.isPointer())
69135ffd83dbSDimitry Andric     return UnableToLegalize; // TODO
69148bcb0991SDimitry Andric 
69155ffd83dbSDimitry Andric   SrcReg = coerceToScalar(SrcReg);
69165ffd83dbSDimitry Andric   if (!SrcReg)
69175ffd83dbSDimitry Andric     return UnableToLegalize;
69188bcb0991SDimitry Andric 
69198bcb0991SDimitry Andric   // Expand scalarizing unmerge as bitcast to integer and shift.
69205ffd83dbSDimitry Andric   LLT IntTy = MRI.getType(SrcReg);
69218bcb0991SDimitry Andric 
69225ffd83dbSDimitry Andric   MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
69238bcb0991SDimitry Andric 
69248bcb0991SDimitry Andric   const unsigned DstSize = DstTy.getSizeInBits();
69258bcb0991SDimitry Andric   unsigned Offset = DstSize;
69268bcb0991SDimitry Andric   for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
69278bcb0991SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
69285ffd83dbSDimitry Andric     auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
69298bcb0991SDimitry Andric     MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
69308bcb0991SDimitry Andric   }
69318bcb0991SDimitry Andric 
69328bcb0991SDimitry Andric   MI.eraseFromParent();
69338bcb0991SDimitry Andric   return Legalized;
69348bcb0991SDimitry Andric }
69358bcb0991SDimitry Andric 
6936e8d8bef9SDimitry Andric /// Lower a vector extract or insert by writing the vector to a stack temporary
6937e8d8bef9SDimitry Andric /// and reloading the element or vector.
6938e8d8bef9SDimitry Andric ///
6939e8d8bef9SDimitry Andric /// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
6940e8d8bef9SDimitry Andric ///  =>
6941e8d8bef9SDimitry Andric ///  %stack_temp = G_FRAME_INDEX
6942e8d8bef9SDimitry Andric ///  G_STORE %vec, %stack_temp
6943e8d8bef9SDimitry Andric ///  %idx = clamp(%idx, %vec.getNumElements())
6944e8d8bef9SDimitry Andric ///  %element_ptr = G_PTR_ADD %stack_temp, %idx
6945e8d8bef9SDimitry Andric ///  %dst = G_LOAD %element_ptr
6946e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
6947e8d8bef9SDimitry Andric LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
6948e8d8bef9SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
6949e8d8bef9SDimitry Andric   Register SrcVec = MI.getOperand(1).getReg();
6950e8d8bef9SDimitry Andric   Register InsertVal;
6951e8d8bef9SDimitry Andric   if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
6952e8d8bef9SDimitry Andric     InsertVal = MI.getOperand(2).getReg();
6953e8d8bef9SDimitry Andric 
6954e8d8bef9SDimitry Andric   Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
6955e8d8bef9SDimitry Andric 
6956e8d8bef9SDimitry Andric   LLT VecTy = MRI.getType(SrcVec);
6957e8d8bef9SDimitry Andric   LLT EltTy = VecTy.getElementType();
69580eae32dcSDimitry Andric   unsigned NumElts = VecTy.getNumElements();
69590eae32dcSDimitry Andric 
69600eae32dcSDimitry Andric   int64_t IdxVal;
69610eae32dcSDimitry Andric   if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
69620eae32dcSDimitry Andric     SmallVector<Register, 8> SrcRegs;
69630eae32dcSDimitry Andric     extractParts(SrcVec, EltTy, NumElts, SrcRegs);
69640eae32dcSDimitry Andric 
69650eae32dcSDimitry Andric     if (InsertVal) {
69660eae32dcSDimitry Andric       SrcRegs[IdxVal] = MI.getOperand(2).getReg();
6967bdd1243dSDimitry Andric       MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
69680eae32dcSDimitry Andric     } else {
69690eae32dcSDimitry Andric       MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
69700eae32dcSDimitry Andric     }
69710eae32dcSDimitry Andric 
69720eae32dcSDimitry Andric     MI.eraseFromParent();
69730eae32dcSDimitry Andric     return Legalized;
69740eae32dcSDimitry Andric   }
69750eae32dcSDimitry Andric 
6976e8d8bef9SDimitry Andric   if (!EltTy.isByteSized()) { // Not implemented.
6977e8d8bef9SDimitry Andric     LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
6978e8d8bef9SDimitry Andric     return UnableToLegalize;
6979e8d8bef9SDimitry Andric   }
6980e8d8bef9SDimitry Andric 
6981e8d8bef9SDimitry Andric   unsigned EltBytes = EltTy.getSizeInBytes();
6982e8d8bef9SDimitry Andric   Align VecAlign = getStackTemporaryAlignment(VecTy);
6983e8d8bef9SDimitry Andric   Align EltAlign;
6984e8d8bef9SDimitry Andric 
6985e8d8bef9SDimitry Andric   MachinePointerInfo PtrInfo;
6986*5f757f3fSDimitry Andric   auto StackTemp = createStackTemporary(
6987*5f757f3fSDimitry Andric       TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
6988e8d8bef9SDimitry Andric   MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
6989e8d8bef9SDimitry Andric 
6990e8d8bef9SDimitry Andric   // Get the pointer to the element, and be sure not to hit undefined behavior
6991e8d8bef9SDimitry Andric   // if the index is out of bounds.
6992e8d8bef9SDimitry Andric   Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
6993e8d8bef9SDimitry Andric 
6994e8d8bef9SDimitry Andric   if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
6995e8d8bef9SDimitry Andric     int64_t Offset = IdxVal * EltBytes;
6996e8d8bef9SDimitry Andric     PtrInfo = PtrInfo.getWithOffset(Offset);
6997e8d8bef9SDimitry Andric     EltAlign = commonAlignment(VecAlign, Offset);
6998e8d8bef9SDimitry Andric   } else {
6999e8d8bef9SDimitry Andric     // We lose information with a variable offset.
7000e8d8bef9SDimitry Andric     EltAlign = getStackTemporaryAlignment(EltTy);
7001e8d8bef9SDimitry Andric     PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
7002e8d8bef9SDimitry Andric   }
7003e8d8bef9SDimitry Andric 
7004e8d8bef9SDimitry Andric   if (InsertVal) {
7005e8d8bef9SDimitry Andric     // Write the inserted element
7006e8d8bef9SDimitry Andric     MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
7007e8d8bef9SDimitry Andric 
7008e8d8bef9SDimitry Andric     // Reload the whole vector.
7009e8d8bef9SDimitry Andric     MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
7010e8d8bef9SDimitry Andric   } else {
7011e8d8bef9SDimitry Andric     MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
7012e8d8bef9SDimitry Andric   }
7013e8d8bef9SDimitry Andric 
7014e8d8bef9SDimitry Andric   MI.eraseFromParent();
7015e8d8bef9SDimitry Andric   return Legalized;
7016e8d8bef9SDimitry Andric }
7017e8d8bef9SDimitry Andric 
70188bcb0991SDimitry Andric LegalizerHelper::LegalizeResult
70198bcb0991SDimitry Andric LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
702006c3fb27SDimitry Andric   auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
702106c3fb27SDimitry Andric       MI.getFirst3RegLLTs();
70228bcb0991SDimitry Andric   LLT IdxTy = LLT::scalar(32);
70238bcb0991SDimitry Andric 
7024480093f4SDimitry Andric   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
70258bcb0991SDimitry Andric   Register Undef;
70268bcb0991SDimitry Andric   SmallVector<Register, 32> BuildVec;
7027*5f757f3fSDimitry Andric   LLT EltTy = DstTy.getScalarType();
70288bcb0991SDimitry Andric 
70298bcb0991SDimitry Andric   for (int Idx : Mask) {
70308bcb0991SDimitry Andric     if (Idx < 0) {
70318bcb0991SDimitry Andric       if (!Undef.isValid())
70328bcb0991SDimitry Andric         Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
70338bcb0991SDimitry Andric       BuildVec.push_back(Undef);
70348bcb0991SDimitry Andric       continue;
70358bcb0991SDimitry Andric     }
70368bcb0991SDimitry Andric 
70378bcb0991SDimitry Andric     if (Src0Ty.isScalar()) {
70388bcb0991SDimitry Andric       BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
70398bcb0991SDimitry Andric     } else {
70408bcb0991SDimitry Andric       int NumElts = Src0Ty.getNumElements();
70418bcb0991SDimitry Andric       Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
70428bcb0991SDimitry Andric       int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
70438bcb0991SDimitry Andric       auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
70448bcb0991SDimitry Andric       auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
70458bcb0991SDimitry Andric       BuildVec.push_back(Extract.getReg(0));
70468bcb0991SDimitry Andric     }
70478bcb0991SDimitry Andric   }
70488bcb0991SDimitry Andric 
7049*5f757f3fSDimitry Andric   if (DstTy.isScalar())
7050*5f757f3fSDimitry Andric     MIRBuilder.buildCopy(DstReg, BuildVec[0]);
7051*5f757f3fSDimitry Andric   else
70528bcb0991SDimitry Andric     MIRBuilder.buildBuildVector(DstReg, BuildVec);
70538bcb0991SDimitry Andric   MI.eraseFromParent();
70548bcb0991SDimitry Andric   return Legalized;
70558bcb0991SDimitry Andric }
70568bcb0991SDimitry Andric 
7057*5f757f3fSDimitry Andric Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg,
7058*5f757f3fSDimitry Andric                                                     Register AllocSize,
7059*5f757f3fSDimitry Andric                                                     Align Alignment,
7060*5f757f3fSDimitry Andric                                                     LLT PtrTy) {
70618bcb0991SDimitry Andric   LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
70628bcb0991SDimitry Andric 
70638bcb0991SDimitry Andric   auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
70648bcb0991SDimitry Andric   SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
70658bcb0991SDimitry Andric 
70668bcb0991SDimitry Andric   // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
70678bcb0991SDimitry Andric   // have to generate an extra instruction to negate the alloc and then use
7068480093f4SDimitry Andric   // G_PTR_ADD to add the negative offset.
70698bcb0991SDimitry Andric   auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
70705ffd83dbSDimitry Andric   if (Alignment > Align(1)) {
70715ffd83dbSDimitry Andric     APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
70728bcb0991SDimitry Andric     AlignMask.negate();
70738bcb0991SDimitry Andric     auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
70748bcb0991SDimitry Andric     Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
70758bcb0991SDimitry Andric   }
70768bcb0991SDimitry Andric 
7077*5f757f3fSDimitry Andric   return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
7078*5f757f3fSDimitry Andric }
7079*5f757f3fSDimitry Andric 
7080*5f757f3fSDimitry Andric LegalizerHelper::LegalizeResult
7081*5f757f3fSDimitry Andric LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
7082*5f757f3fSDimitry Andric   const auto &MF = *MI.getMF();
7083*5f757f3fSDimitry Andric   const auto &TFI = *MF.getSubtarget().getFrameLowering();
7084*5f757f3fSDimitry Andric   if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
7085*5f757f3fSDimitry Andric     return UnableToLegalize;
7086*5f757f3fSDimitry Andric 
7087*5f757f3fSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
7088*5f757f3fSDimitry Andric   Register AllocSize = MI.getOperand(1).getReg();
7089*5f757f3fSDimitry Andric   Align Alignment = assumeAligned(MI.getOperand(2).getImm());
7090*5f757f3fSDimitry Andric 
7091*5f757f3fSDimitry Andric   LLT PtrTy = MRI.getType(Dst);
7092*5f757f3fSDimitry Andric   Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
7093*5f757f3fSDimitry Andric   Register SPTmp =
7094*5f757f3fSDimitry Andric       getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
7095*5f757f3fSDimitry Andric 
70968bcb0991SDimitry Andric   MIRBuilder.buildCopy(SPReg, SPTmp);
70978bcb0991SDimitry Andric   MIRBuilder.buildCopy(Dst, SPTmp);
70988bcb0991SDimitry Andric 
70998bcb0991SDimitry Andric   MI.eraseFromParent();
71008bcb0991SDimitry Andric   return Legalized;
71018bcb0991SDimitry Andric }
71028bcb0991SDimitry Andric 
71038bcb0991SDimitry Andric LegalizerHelper::LegalizeResult
7104*5f757f3fSDimitry Andric LegalizerHelper::lowerStackSave(MachineInstr &MI) {
7105*5f757f3fSDimitry Andric   Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
7106*5f757f3fSDimitry Andric   if (!StackPtr)
7107*5f757f3fSDimitry Andric     return UnableToLegalize;
7108*5f757f3fSDimitry Andric 
7109*5f757f3fSDimitry Andric   MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
7110*5f757f3fSDimitry Andric   MI.eraseFromParent();
7111*5f757f3fSDimitry Andric   return Legalized;
7112*5f757f3fSDimitry Andric }
7113*5f757f3fSDimitry Andric 
7114*5f757f3fSDimitry Andric LegalizerHelper::LegalizeResult
7115*5f757f3fSDimitry Andric LegalizerHelper::lowerStackRestore(MachineInstr &MI) {
7116*5f757f3fSDimitry Andric   Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
7117*5f757f3fSDimitry Andric   if (!StackPtr)
7118*5f757f3fSDimitry Andric     return UnableToLegalize;
7119*5f757f3fSDimitry Andric 
7120*5f757f3fSDimitry Andric   MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
7121*5f757f3fSDimitry Andric   MI.eraseFromParent();
7122*5f757f3fSDimitry Andric   return Legalized;
7123*5f757f3fSDimitry Andric }
7124*5f757f3fSDimitry Andric 
7125*5f757f3fSDimitry Andric LegalizerHelper::LegalizeResult
71268bcb0991SDimitry Andric LegalizerHelper::lowerExtract(MachineInstr &MI) {
712706c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
71288bcb0991SDimitry Andric   unsigned Offset = MI.getOperand(2).getImm();
71298bcb0991SDimitry Andric 
71300eae32dcSDimitry Andric   // Extract sub-vector or one element
71310eae32dcSDimitry Andric   if (SrcTy.isVector()) {
71320eae32dcSDimitry Andric     unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
71330eae32dcSDimitry Andric     unsigned DstSize = DstTy.getSizeInBits();
71340eae32dcSDimitry Andric 
71350eae32dcSDimitry Andric     if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
71360eae32dcSDimitry Andric         (Offset + DstSize <= SrcTy.getSizeInBits())) {
71370eae32dcSDimitry Andric       // Unmerge and allow access to each Src element for the artifact combiner.
713806c3fb27SDimitry Andric       auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
71390eae32dcSDimitry Andric 
71400eae32dcSDimitry Andric       // Take element(s) we need to extract and copy it (merge them).
71410eae32dcSDimitry Andric       SmallVector<Register, 8> SubVectorElts;
71420eae32dcSDimitry Andric       for (unsigned Idx = Offset / SrcEltSize;
71430eae32dcSDimitry Andric            Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
71440eae32dcSDimitry Andric         SubVectorElts.push_back(Unmerge.getReg(Idx));
71450eae32dcSDimitry Andric       }
71460eae32dcSDimitry Andric       if (SubVectorElts.size() == 1)
714706c3fb27SDimitry Andric         MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
71480eae32dcSDimitry Andric       else
714906c3fb27SDimitry Andric         MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
71500eae32dcSDimitry Andric 
71510eae32dcSDimitry Andric       MI.eraseFromParent();
71520eae32dcSDimitry Andric       return Legalized;
71530eae32dcSDimitry Andric     }
71540eae32dcSDimitry Andric   }
71550eae32dcSDimitry Andric 
71568bcb0991SDimitry Andric   if (DstTy.isScalar() &&
71578bcb0991SDimitry Andric       (SrcTy.isScalar() ||
71588bcb0991SDimitry Andric        (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
71598bcb0991SDimitry Andric     LLT SrcIntTy = SrcTy;
71608bcb0991SDimitry Andric     if (!SrcTy.isScalar()) {
71618bcb0991SDimitry Andric       SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
716206c3fb27SDimitry Andric       SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
71638bcb0991SDimitry Andric     }
71648bcb0991SDimitry Andric 
71658bcb0991SDimitry Andric     if (Offset == 0)
716606c3fb27SDimitry Andric       MIRBuilder.buildTrunc(DstReg, SrcReg);
71678bcb0991SDimitry Andric     else {
71688bcb0991SDimitry Andric       auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
716906c3fb27SDimitry Andric       auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
717006c3fb27SDimitry Andric       MIRBuilder.buildTrunc(DstReg, Shr);
71718bcb0991SDimitry Andric     }
71728bcb0991SDimitry Andric 
71738bcb0991SDimitry Andric     MI.eraseFromParent();
71748bcb0991SDimitry Andric     return Legalized;
71758bcb0991SDimitry Andric   }
71768bcb0991SDimitry Andric 
71778bcb0991SDimitry Andric   return UnableToLegalize;
71788bcb0991SDimitry Andric }
71798bcb0991SDimitry Andric 
71808bcb0991SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
718106c3fb27SDimitry Andric   auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
71828bcb0991SDimitry Andric   uint64_t Offset = MI.getOperand(3).getImm();
71838bcb0991SDimitry Andric 
71848bcb0991SDimitry Andric   LLT DstTy = MRI.getType(Src);
71858bcb0991SDimitry Andric   LLT InsertTy = MRI.getType(InsertSrc);
71868bcb0991SDimitry Andric 
71870eae32dcSDimitry Andric   // Insert sub-vector or one element
71880eae32dcSDimitry Andric   if (DstTy.isVector() && !InsertTy.isPointer()) {
71890eae32dcSDimitry Andric     LLT EltTy = DstTy.getElementType();
71900eae32dcSDimitry Andric     unsigned EltSize = EltTy.getSizeInBits();
71910eae32dcSDimitry Andric     unsigned InsertSize = InsertTy.getSizeInBits();
71920eae32dcSDimitry Andric 
71930eae32dcSDimitry Andric     if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
71940eae32dcSDimitry Andric         (Offset + InsertSize <= DstTy.getSizeInBits())) {
71950eae32dcSDimitry Andric       auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
71960eae32dcSDimitry Andric       SmallVector<Register, 8> DstElts;
71970eae32dcSDimitry Andric       unsigned Idx = 0;
71980eae32dcSDimitry Andric       // Elements from Src before insert start Offset
71990eae32dcSDimitry Andric       for (; Idx < Offset / EltSize; ++Idx) {
72000eae32dcSDimitry Andric         DstElts.push_back(UnmergeSrc.getReg(Idx));
72010eae32dcSDimitry Andric       }
72020eae32dcSDimitry Andric 
72030eae32dcSDimitry Andric       // Replace elements in Src with elements from InsertSrc
72040eae32dcSDimitry Andric       if (InsertTy.getSizeInBits() > EltSize) {
72050eae32dcSDimitry Andric         auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
72060eae32dcSDimitry Andric         for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
72070eae32dcSDimitry Andric              ++Idx, ++i) {
72080eae32dcSDimitry Andric           DstElts.push_back(UnmergeInsertSrc.getReg(i));
72090eae32dcSDimitry Andric         }
72100eae32dcSDimitry Andric       } else {
72110eae32dcSDimitry Andric         DstElts.push_back(InsertSrc);
72120eae32dcSDimitry Andric         ++Idx;
72130eae32dcSDimitry Andric       }
72140eae32dcSDimitry Andric 
72150eae32dcSDimitry Andric       // Remaining elements from Src after insert
72160eae32dcSDimitry Andric       for (; Idx < DstTy.getNumElements(); ++Idx) {
72170eae32dcSDimitry Andric         DstElts.push_back(UnmergeSrc.getReg(Idx));
72180eae32dcSDimitry Andric       }
72190eae32dcSDimitry Andric 
7220bdd1243dSDimitry Andric       MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
72210eae32dcSDimitry Andric       MI.eraseFromParent();
72220eae32dcSDimitry Andric       return Legalized;
72230eae32dcSDimitry Andric     }
72240eae32dcSDimitry Andric   }
72250eae32dcSDimitry Andric 
72265ffd83dbSDimitry Andric   if (InsertTy.isVector() ||
72275ffd83dbSDimitry Andric       (DstTy.isVector() && DstTy.getElementType() != InsertTy))
72285ffd83dbSDimitry Andric     return UnableToLegalize;
72295ffd83dbSDimitry Andric 
72305ffd83dbSDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
72315ffd83dbSDimitry Andric   if ((DstTy.isPointer() &&
72325ffd83dbSDimitry Andric        DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
72335ffd83dbSDimitry Andric       (InsertTy.isPointer() &&
72345ffd83dbSDimitry Andric        DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
72355ffd83dbSDimitry Andric     LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
72365ffd83dbSDimitry Andric     return UnableToLegalize;
72375ffd83dbSDimitry Andric   }
72385ffd83dbSDimitry Andric 
72398bcb0991SDimitry Andric   LLT IntDstTy = DstTy;
72405ffd83dbSDimitry Andric 
72418bcb0991SDimitry Andric   if (!DstTy.isScalar()) {
72428bcb0991SDimitry Andric     IntDstTy = LLT::scalar(DstTy.getSizeInBits());
72435ffd83dbSDimitry Andric     Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
72445ffd83dbSDimitry Andric   }
72455ffd83dbSDimitry Andric 
72465ffd83dbSDimitry Andric   if (!InsertTy.isScalar()) {
72475ffd83dbSDimitry Andric     const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
72485ffd83dbSDimitry Andric     InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
72498bcb0991SDimitry Andric   }
72508bcb0991SDimitry Andric 
72518bcb0991SDimitry Andric   Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
72528bcb0991SDimitry Andric   if (Offset != 0) {
72538bcb0991SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
72548bcb0991SDimitry Andric     ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
72558bcb0991SDimitry Andric   }
72568bcb0991SDimitry Andric 
72575ffd83dbSDimitry Andric   APInt MaskVal = APInt::getBitsSetWithWrap(
72585ffd83dbSDimitry Andric       DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
72598bcb0991SDimitry Andric 
72608bcb0991SDimitry Andric   auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
72618bcb0991SDimitry Andric   auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
72628bcb0991SDimitry Andric   auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
72638bcb0991SDimitry Andric 
72645ffd83dbSDimitry Andric   MIRBuilder.buildCast(Dst, Or);
72658bcb0991SDimitry Andric   MI.eraseFromParent();
72668bcb0991SDimitry Andric   return Legalized;
72678bcb0991SDimitry Andric }
72688bcb0991SDimitry Andric 
72698bcb0991SDimitry Andric LegalizerHelper::LegalizeResult
72708bcb0991SDimitry Andric LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
727106c3fb27SDimitry Andric   auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
727206c3fb27SDimitry Andric       MI.getFirst4RegLLTs();
72738bcb0991SDimitry Andric   const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
72748bcb0991SDimitry Andric 
727506c3fb27SDimitry Andric   LLT Ty = Dst0Ty;
727606c3fb27SDimitry Andric   LLT BoolTy = Dst1Ty;
72778bcb0991SDimitry Andric 
72788bcb0991SDimitry Andric   if (IsAdd)
72798bcb0991SDimitry Andric     MIRBuilder.buildAdd(Dst0, LHS, RHS);
72808bcb0991SDimitry Andric   else
72818bcb0991SDimitry Andric     MIRBuilder.buildSub(Dst0, LHS, RHS);
72828bcb0991SDimitry Andric 
72838bcb0991SDimitry Andric   // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
72848bcb0991SDimitry Andric 
72858bcb0991SDimitry Andric   auto Zero = MIRBuilder.buildConstant(Ty, 0);
72868bcb0991SDimitry Andric 
72878bcb0991SDimitry Andric   // For an addition, the result should be less than one of the operands (LHS)
72888bcb0991SDimitry Andric   // if and only if the other operand (RHS) is negative, otherwise there will
72898bcb0991SDimitry Andric   // be overflow.
72908bcb0991SDimitry Andric   // For a subtraction, the result should be less than one of the operands
72918bcb0991SDimitry Andric   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
72928bcb0991SDimitry Andric   // otherwise there will be overflow.
72938bcb0991SDimitry Andric   auto ResultLowerThanLHS =
72948bcb0991SDimitry Andric       MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
72958bcb0991SDimitry Andric   auto ConditionRHS = MIRBuilder.buildICmp(
72968bcb0991SDimitry Andric       IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
72978bcb0991SDimitry Andric 
72988bcb0991SDimitry Andric   MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
72998bcb0991SDimitry Andric   MI.eraseFromParent();
73008bcb0991SDimitry Andric   return Legalized;
73018bcb0991SDimitry Andric }
7302480093f4SDimitry Andric 
7303480093f4SDimitry Andric LegalizerHelper::LegalizeResult
7304e8d8bef9SDimitry Andric LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
730506c3fb27SDimitry Andric   auto [Res, LHS, RHS] = MI.getFirst3Regs();
7306e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(Res);
7307e8d8bef9SDimitry Andric   bool IsSigned;
7308e8d8bef9SDimitry Andric   bool IsAdd;
7309e8d8bef9SDimitry Andric   unsigned BaseOp;
7310e8d8bef9SDimitry Andric   switch (MI.getOpcode()) {
7311e8d8bef9SDimitry Andric   default:
7312e8d8bef9SDimitry Andric     llvm_unreachable("unexpected addsat/subsat opcode");
7313e8d8bef9SDimitry Andric   case TargetOpcode::G_UADDSAT:
7314e8d8bef9SDimitry Andric     IsSigned = false;
7315e8d8bef9SDimitry Andric     IsAdd = true;
7316e8d8bef9SDimitry Andric     BaseOp = TargetOpcode::G_ADD;
7317e8d8bef9SDimitry Andric     break;
7318e8d8bef9SDimitry Andric   case TargetOpcode::G_SADDSAT:
7319e8d8bef9SDimitry Andric     IsSigned = true;
7320e8d8bef9SDimitry Andric     IsAdd = true;
7321e8d8bef9SDimitry Andric     BaseOp = TargetOpcode::G_ADD;
7322e8d8bef9SDimitry Andric     break;
7323e8d8bef9SDimitry Andric   case TargetOpcode::G_USUBSAT:
7324e8d8bef9SDimitry Andric     IsSigned = false;
7325e8d8bef9SDimitry Andric     IsAdd = false;
7326e8d8bef9SDimitry Andric     BaseOp = TargetOpcode::G_SUB;
7327e8d8bef9SDimitry Andric     break;
7328e8d8bef9SDimitry Andric   case TargetOpcode::G_SSUBSAT:
7329e8d8bef9SDimitry Andric     IsSigned = true;
7330e8d8bef9SDimitry Andric     IsAdd = false;
7331e8d8bef9SDimitry Andric     BaseOp = TargetOpcode::G_SUB;
7332e8d8bef9SDimitry Andric     break;
7333e8d8bef9SDimitry Andric   }
7334e8d8bef9SDimitry Andric 
7335e8d8bef9SDimitry Andric   if (IsSigned) {
7336e8d8bef9SDimitry Andric     // sadd.sat(a, b) ->
7337e8d8bef9SDimitry Andric     //   hi = 0x7fffffff - smax(a, 0)
7338e8d8bef9SDimitry Andric     //   lo = 0x80000000 - smin(a, 0)
7339e8d8bef9SDimitry Andric     //   a + smin(smax(lo, b), hi)
7340e8d8bef9SDimitry Andric     // ssub.sat(a, b) ->
7341e8d8bef9SDimitry Andric     //   lo = smax(a, -1) - 0x7fffffff
7342e8d8bef9SDimitry Andric     //   hi = smin(a, -1) - 0x80000000
7343e8d8bef9SDimitry Andric     //   a - smin(smax(lo, b), hi)
7344e8d8bef9SDimitry Andric     // TODO: AMDGPU can use a "median of 3" instruction here:
7345e8d8bef9SDimitry Andric     //   a +/- med3(lo, b, hi)
7346e8d8bef9SDimitry Andric     uint64_t NumBits = Ty.getScalarSizeInBits();
7347e8d8bef9SDimitry Andric     auto MaxVal =
7348e8d8bef9SDimitry Andric         MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
7349e8d8bef9SDimitry Andric     auto MinVal =
7350e8d8bef9SDimitry Andric         MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
7351e8d8bef9SDimitry Andric     MachineInstrBuilder Hi, Lo;
7352e8d8bef9SDimitry Andric     if (IsAdd) {
7353e8d8bef9SDimitry Andric       auto Zero = MIRBuilder.buildConstant(Ty, 0);
7354e8d8bef9SDimitry Andric       Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
7355e8d8bef9SDimitry Andric       Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
7356e8d8bef9SDimitry Andric     } else {
7357e8d8bef9SDimitry Andric       auto NegOne = MIRBuilder.buildConstant(Ty, -1);
7358e8d8bef9SDimitry Andric       Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
7359e8d8bef9SDimitry Andric                                MaxVal);
7360e8d8bef9SDimitry Andric       Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
7361e8d8bef9SDimitry Andric                                MinVal);
7362e8d8bef9SDimitry Andric     }
7363e8d8bef9SDimitry Andric     auto RHSClamped =
7364e8d8bef9SDimitry Andric         MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
7365e8d8bef9SDimitry Andric     MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
7366e8d8bef9SDimitry Andric   } else {
7367e8d8bef9SDimitry Andric     // uadd.sat(a, b) -> a + umin(~a, b)
7368e8d8bef9SDimitry Andric     // usub.sat(a, b) -> a - umin(a, b)
7369e8d8bef9SDimitry Andric     Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
7370e8d8bef9SDimitry Andric     auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
7371e8d8bef9SDimitry Andric     MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
7372e8d8bef9SDimitry Andric   }
7373e8d8bef9SDimitry Andric 
7374e8d8bef9SDimitry Andric   MI.eraseFromParent();
7375e8d8bef9SDimitry Andric   return Legalized;
7376e8d8bef9SDimitry Andric }
7377e8d8bef9SDimitry Andric 
7378e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
7379e8d8bef9SDimitry Andric LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
738006c3fb27SDimitry Andric   auto [Res, LHS, RHS] = MI.getFirst3Regs();
7381e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(Res);
7382e8d8bef9SDimitry Andric   LLT BoolTy = Ty.changeElementSize(1);
7383e8d8bef9SDimitry Andric   bool IsSigned;
7384e8d8bef9SDimitry Andric   bool IsAdd;
7385e8d8bef9SDimitry Andric   unsigned OverflowOp;
7386e8d8bef9SDimitry Andric   switch (MI.getOpcode()) {
7387e8d8bef9SDimitry Andric   default:
7388e8d8bef9SDimitry Andric     llvm_unreachable("unexpected addsat/subsat opcode");
7389e8d8bef9SDimitry Andric   case TargetOpcode::G_UADDSAT:
7390e8d8bef9SDimitry Andric     IsSigned = false;
7391e8d8bef9SDimitry Andric     IsAdd = true;
7392e8d8bef9SDimitry Andric     OverflowOp = TargetOpcode::G_UADDO;
7393e8d8bef9SDimitry Andric     break;
7394e8d8bef9SDimitry Andric   case TargetOpcode::G_SADDSAT:
7395e8d8bef9SDimitry Andric     IsSigned = true;
7396e8d8bef9SDimitry Andric     IsAdd = true;
7397e8d8bef9SDimitry Andric     OverflowOp = TargetOpcode::G_SADDO;
7398e8d8bef9SDimitry Andric     break;
7399e8d8bef9SDimitry Andric   case TargetOpcode::G_USUBSAT:
7400e8d8bef9SDimitry Andric     IsSigned = false;
7401e8d8bef9SDimitry Andric     IsAdd = false;
7402e8d8bef9SDimitry Andric     OverflowOp = TargetOpcode::G_USUBO;
7403e8d8bef9SDimitry Andric     break;
7404e8d8bef9SDimitry Andric   case TargetOpcode::G_SSUBSAT:
7405e8d8bef9SDimitry Andric     IsSigned = true;
7406e8d8bef9SDimitry Andric     IsAdd = false;
7407e8d8bef9SDimitry Andric     OverflowOp = TargetOpcode::G_SSUBO;
7408e8d8bef9SDimitry Andric     break;
7409e8d8bef9SDimitry Andric   }
7410e8d8bef9SDimitry Andric 
7411e8d8bef9SDimitry Andric   auto OverflowRes =
7412e8d8bef9SDimitry Andric       MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
7413e8d8bef9SDimitry Andric   Register Tmp = OverflowRes.getReg(0);
7414e8d8bef9SDimitry Andric   Register Ov = OverflowRes.getReg(1);
7415e8d8bef9SDimitry Andric   MachineInstrBuilder Clamp;
7416e8d8bef9SDimitry Andric   if (IsSigned) {
7417e8d8bef9SDimitry Andric     // sadd.sat(a, b) ->
7418e8d8bef9SDimitry Andric     //   {tmp, ov} = saddo(a, b)
7419e8d8bef9SDimitry Andric     //   ov ? (tmp >>s 31) + 0x80000000 : r
7420e8d8bef9SDimitry Andric     // ssub.sat(a, b) ->
7421e8d8bef9SDimitry Andric     //   {tmp, ov} = ssubo(a, b)
7422e8d8bef9SDimitry Andric     //   ov ? (tmp >>s 31) + 0x80000000 : r
7423e8d8bef9SDimitry Andric     uint64_t NumBits = Ty.getScalarSizeInBits();
7424e8d8bef9SDimitry Andric     auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
7425e8d8bef9SDimitry Andric     auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
7426e8d8bef9SDimitry Andric     auto MinVal =
7427e8d8bef9SDimitry Andric         MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
7428e8d8bef9SDimitry Andric     Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
7429e8d8bef9SDimitry Andric   } else {
7430e8d8bef9SDimitry Andric     // uadd.sat(a, b) ->
7431e8d8bef9SDimitry Andric     //   {tmp, ov} = uaddo(a, b)
7432e8d8bef9SDimitry Andric     //   ov ? 0xffffffff : tmp
7433e8d8bef9SDimitry Andric     // usub.sat(a, b) ->
7434e8d8bef9SDimitry Andric     //   {tmp, ov} = usubo(a, b)
7435e8d8bef9SDimitry Andric     //   ov ? 0 : tmp
7436e8d8bef9SDimitry Andric     Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
7437e8d8bef9SDimitry Andric   }
7438e8d8bef9SDimitry Andric   MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
7439e8d8bef9SDimitry Andric 
7440e8d8bef9SDimitry Andric   MI.eraseFromParent();
7441e8d8bef9SDimitry Andric   return Legalized;
7442e8d8bef9SDimitry Andric }
7443e8d8bef9SDimitry Andric 
7444e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
7445e8d8bef9SDimitry Andric LegalizerHelper::lowerShlSat(MachineInstr &MI) {
7446e8d8bef9SDimitry Andric   assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
7447e8d8bef9SDimitry Andric           MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
7448e8d8bef9SDimitry Andric          "Expected shlsat opcode!");
7449e8d8bef9SDimitry Andric   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
745006c3fb27SDimitry Andric   auto [Res, LHS, RHS] = MI.getFirst3Regs();
7451e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(Res);
7452e8d8bef9SDimitry Andric   LLT BoolTy = Ty.changeElementSize(1);
7453e8d8bef9SDimitry Andric 
7454e8d8bef9SDimitry Andric   unsigned BW = Ty.getScalarSizeInBits();
7455e8d8bef9SDimitry Andric   auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
7456e8d8bef9SDimitry Andric   auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
7457e8d8bef9SDimitry Andric                        : MIRBuilder.buildLShr(Ty, Result, RHS);
7458e8d8bef9SDimitry Andric 
7459e8d8bef9SDimitry Andric   MachineInstrBuilder SatVal;
7460e8d8bef9SDimitry Andric   if (IsSigned) {
7461e8d8bef9SDimitry Andric     auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
7462e8d8bef9SDimitry Andric     auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
7463e8d8bef9SDimitry Andric     auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
7464e8d8bef9SDimitry Andric                                     MIRBuilder.buildConstant(Ty, 0));
7465e8d8bef9SDimitry Andric     SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
7466e8d8bef9SDimitry Andric   } else {
7467e8d8bef9SDimitry Andric     SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
7468e8d8bef9SDimitry Andric   }
7469e8d8bef9SDimitry Andric   auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
7470e8d8bef9SDimitry Andric   MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
7471e8d8bef9SDimitry Andric 
7472e8d8bef9SDimitry Andric   MI.eraseFromParent();
7473e8d8bef9SDimitry Andric   return Legalized;
7474e8d8bef9SDimitry Andric }
7475e8d8bef9SDimitry Andric 
747606c3fb27SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) {
747706c3fb27SDimitry Andric   auto [Dst, Src] = MI.getFirst2Regs();
7478480093f4SDimitry Andric   const LLT Ty = MRI.getType(Src);
74795ffd83dbSDimitry Andric   unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
7480480093f4SDimitry Andric   unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
7481480093f4SDimitry Andric 
7482480093f4SDimitry Andric   // Swap most and least significant byte, set remaining bytes in Res to zero.
7483480093f4SDimitry Andric   auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
7484480093f4SDimitry Andric   auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
7485480093f4SDimitry Andric   auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
7486480093f4SDimitry Andric   auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
7487480093f4SDimitry Andric 
7488480093f4SDimitry Andric   // Set i-th high/low byte in Res to i-th low/high byte from Src.
7489480093f4SDimitry Andric   for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
7490480093f4SDimitry Andric     // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
7491480093f4SDimitry Andric     APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
7492480093f4SDimitry Andric     auto Mask = MIRBuilder.buildConstant(Ty, APMask);
7493480093f4SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
7494480093f4SDimitry Andric     // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
7495480093f4SDimitry Andric     auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
7496480093f4SDimitry Andric     auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
7497480093f4SDimitry Andric     Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
7498480093f4SDimitry Andric     // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
7499480093f4SDimitry Andric     auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
7500480093f4SDimitry Andric     auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
7501480093f4SDimitry Andric     Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
7502480093f4SDimitry Andric   }
7503480093f4SDimitry Andric   Res.getInstr()->getOperand(0).setReg(Dst);
7504480093f4SDimitry Andric 
7505480093f4SDimitry Andric   MI.eraseFromParent();
7506480093f4SDimitry Andric   return Legalized;
7507480093f4SDimitry Andric }
7508480093f4SDimitry Andric 
7509480093f4SDimitry Andric //{ (Src & Mask) >> N } | { (Src << N) & Mask }
7510480093f4SDimitry Andric static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
7511480093f4SDimitry Andric                                  MachineInstrBuilder Src, APInt Mask) {
7512480093f4SDimitry Andric   const LLT Ty = Dst.getLLTTy(*B.getMRI());
7513480093f4SDimitry Andric   MachineInstrBuilder C_N = B.buildConstant(Ty, N);
7514480093f4SDimitry Andric   MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
7515480093f4SDimitry Andric   auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
7516480093f4SDimitry Andric   auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
7517480093f4SDimitry Andric   return B.buildOr(Dst, LHS, RHS);
7518480093f4SDimitry Andric }
7519480093f4SDimitry Andric 
7520480093f4SDimitry Andric LegalizerHelper::LegalizeResult
7521480093f4SDimitry Andric LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
752206c3fb27SDimitry Andric   auto [Dst, Src] = MI.getFirst2Regs();
7523480093f4SDimitry Andric   const LLT Ty = MRI.getType(Src);
7524480093f4SDimitry Andric   unsigned Size = Ty.getSizeInBits();
7525480093f4SDimitry Andric 
7526480093f4SDimitry Andric   MachineInstrBuilder BSWAP =
7527480093f4SDimitry Andric       MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
7528480093f4SDimitry Andric 
7529480093f4SDimitry Andric   // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
7530480093f4SDimitry Andric   //    [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
7531480093f4SDimitry Andric   // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
7532480093f4SDimitry Andric   MachineInstrBuilder Swap4 =
7533480093f4SDimitry Andric       SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
7534480093f4SDimitry Andric 
7535480093f4SDimitry Andric   // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
7536480093f4SDimitry Andric   //    [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
7537480093f4SDimitry Andric   // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
7538480093f4SDimitry Andric   MachineInstrBuilder Swap2 =
7539480093f4SDimitry Andric       SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
7540480093f4SDimitry Andric 
7541480093f4SDimitry Andric   // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
7542480093f4SDimitry Andric   //    [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
7543480093f4SDimitry Andric   // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
7544480093f4SDimitry Andric   SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
7545480093f4SDimitry Andric 
7546480093f4SDimitry Andric   MI.eraseFromParent();
7547480093f4SDimitry Andric   return Legalized;
7548480093f4SDimitry Andric }
7549480093f4SDimitry Andric 
7550480093f4SDimitry Andric LegalizerHelper::LegalizeResult
75515ffd83dbSDimitry Andric LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
7552480093f4SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
75535ffd83dbSDimitry Andric 
75545ffd83dbSDimitry Andric   bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
75555ffd83dbSDimitry Andric   int NameOpIdx = IsRead ? 1 : 0;
75565ffd83dbSDimitry Andric   int ValRegIndex = IsRead ? 0 : 1;
75575ffd83dbSDimitry Andric 
75585ffd83dbSDimitry Andric   Register ValReg = MI.getOperand(ValRegIndex).getReg();
75595ffd83dbSDimitry Andric   const LLT Ty = MRI.getType(ValReg);
75605ffd83dbSDimitry Andric   const MDString *RegStr = cast<MDString>(
75615ffd83dbSDimitry Andric     cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
75625ffd83dbSDimitry Andric 
7563e8d8bef9SDimitry Andric   Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
75645ffd83dbSDimitry Andric   if (!PhysReg.isValid())
7565480093f4SDimitry Andric     return UnableToLegalize;
7566480093f4SDimitry Andric 
75675ffd83dbSDimitry Andric   if (IsRead)
75685ffd83dbSDimitry Andric     MIRBuilder.buildCopy(ValReg, PhysReg);
75695ffd83dbSDimitry Andric   else
75705ffd83dbSDimitry Andric     MIRBuilder.buildCopy(PhysReg, ValReg);
75715ffd83dbSDimitry Andric 
7572480093f4SDimitry Andric   MI.eraseFromParent();
7573480093f4SDimitry Andric   return Legalized;
7574480093f4SDimitry Andric }
7575e8d8bef9SDimitry Andric 
7576e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
7577e8d8bef9SDimitry Andric LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
7578e8d8bef9SDimitry Andric   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
7579e8d8bef9SDimitry Andric   unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
7580e8d8bef9SDimitry Andric   Register Result = MI.getOperand(0).getReg();
7581e8d8bef9SDimitry Andric   LLT OrigTy = MRI.getType(Result);
7582e8d8bef9SDimitry Andric   auto SizeInBits = OrigTy.getScalarSizeInBits();
7583e8d8bef9SDimitry Andric   LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
7584e8d8bef9SDimitry Andric 
7585e8d8bef9SDimitry Andric   auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
7586e8d8bef9SDimitry Andric   auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
7587e8d8bef9SDimitry Andric   auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
7588e8d8bef9SDimitry Andric   unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
7589e8d8bef9SDimitry Andric 
7590e8d8bef9SDimitry Andric   auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
7591e8d8bef9SDimitry Andric   auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
7592e8d8bef9SDimitry Andric   MIRBuilder.buildTrunc(Result, Shifted);
7593e8d8bef9SDimitry Andric 
7594e8d8bef9SDimitry Andric   MI.eraseFromParent();
7595e8d8bef9SDimitry Andric   return Legalized;
7596e8d8bef9SDimitry Andric }
7597e8d8bef9SDimitry Andric 
7598bdd1243dSDimitry Andric LegalizerHelper::LegalizeResult
7599bdd1243dSDimitry Andric LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
760006c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
760106c3fb27SDimitry Andric   FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
7602bdd1243dSDimitry Andric 
760306c3fb27SDimitry Andric   if (Mask == fcNone) {
7604bdd1243dSDimitry Andric     MIRBuilder.buildConstant(DstReg, 0);
7605bdd1243dSDimitry Andric     MI.eraseFromParent();
7606bdd1243dSDimitry Andric     return Legalized;
7607bdd1243dSDimitry Andric   }
760806c3fb27SDimitry Andric   if (Mask == fcAllFlags) {
7609bdd1243dSDimitry Andric     MIRBuilder.buildConstant(DstReg, 1);
7610bdd1243dSDimitry Andric     MI.eraseFromParent();
7611bdd1243dSDimitry Andric     return Legalized;
7612bdd1243dSDimitry Andric   }
7613bdd1243dSDimitry Andric 
761406c3fb27SDimitry Andric   // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
761506c3fb27SDimitry Andric   // version
761606c3fb27SDimitry Andric 
7617bdd1243dSDimitry Andric   unsigned BitSize = SrcTy.getScalarSizeInBits();
7618bdd1243dSDimitry Andric   const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
7619bdd1243dSDimitry Andric 
7620bdd1243dSDimitry Andric   LLT IntTy = LLT::scalar(BitSize);
7621bdd1243dSDimitry Andric   if (SrcTy.isVector())
7622bdd1243dSDimitry Andric     IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
7623bdd1243dSDimitry Andric   auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
7624bdd1243dSDimitry Andric 
7625bdd1243dSDimitry Andric   // Various masks.
7626bdd1243dSDimitry Andric   APInt SignBit = APInt::getSignMask(BitSize);
7627bdd1243dSDimitry Andric   APInt ValueMask = APInt::getSignedMaxValue(BitSize);     // All bits but sign.
7628bdd1243dSDimitry Andric   APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
7629bdd1243dSDimitry Andric   APInt ExpMask = Inf;
7630bdd1243dSDimitry Andric   APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
7631bdd1243dSDimitry Andric   APInt QNaNBitMask =
7632bdd1243dSDimitry Andric       APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
763306c3fb27SDimitry Andric   APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
7634bdd1243dSDimitry Andric 
7635bdd1243dSDimitry Andric   auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
7636bdd1243dSDimitry Andric   auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
7637bdd1243dSDimitry Andric   auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
7638bdd1243dSDimitry Andric   auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
7639bdd1243dSDimitry Andric   auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
7640bdd1243dSDimitry Andric 
7641bdd1243dSDimitry Andric   auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
7642bdd1243dSDimitry Andric   auto Sign =
7643bdd1243dSDimitry Andric       MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
7644bdd1243dSDimitry Andric 
7645bdd1243dSDimitry Andric   auto Res = MIRBuilder.buildConstant(DstTy, 0);
764606c3fb27SDimitry Andric   // Clang doesn't support capture of structured bindings:
764706c3fb27SDimitry Andric   LLT DstTyCopy = DstTy;
7648bdd1243dSDimitry Andric   const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
764906c3fb27SDimitry Andric     Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
7650bdd1243dSDimitry Andric   };
7651bdd1243dSDimitry Andric 
7652bdd1243dSDimitry Andric   // Tests that involve more than one class should be processed first.
7653bdd1243dSDimitry Andric   if ((Mask & fcFinite) == fcFinite) {
7654bdd1243dSDimitry Andric     // finite(V) ==> abs(V) u< exp_mask
7655bdd1243dSDimitry Andric     appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
7656bdd1243dSDimitry Andric                                      ExpMaskC));
7657bdd1243dSDimitry Andric     Mask &= ~fcFinite;
7658bdd1243dSDimitry Andric   } else if ((Mask & fcFinite) == fcPosFinite) {
7659bdd1243dSDimitry Andric     // finite(V) && V > 0 ==> V u< exp_mask
7660bdd1243dSDimitry Andric     appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
7661bdd1243dSDimitry Andric                                      ExpMaskC));
7662bdd1243dSDimitry Andric     Mask &= ~fcPosFinite;
7663bdd1243dSDimitry Andric   } else if ((Mask & fcFinite) == fcNegFinite) {
7664bdd1243dSDimitry Andric     // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
7665bdd1243dSDimitry Andric     auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
7666bdd1243dSDimitry Andric                                     ExpMaskC);
7667bdd1243dSDimitry Andric     auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
7668bdd1243dSDimitry Andric     appendToRes(And);
7669bdd1243dSDimitry Andric     Mask &= ~fcNegFinite;
7670bdd1243dSDimitry Andric   }
7671bdd1243dSDimitry Andric 
767206c3fb27SDimitry Andric   if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
767306c3fb27SDimitry Andric     // fcZero | fcSubnormal => test all exponent bits are 0
767406c3fb27SDimitry Andric     // TODO: Handle sign bit specific cases
767506c3fb27SDimitry Andric     // TODO: Handle inverted case
767606c3fb27SDimitry Andric     if (PartialCheck == (fcZero | fcSubnormal)) {
767706c3fb27SDimitry Andric       auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
767806c3fb27SDimitry Andric       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
767906c3fb27SDimitry Andric                                        ExpBits, ZeroC));
768006c3fb27SDimitry Andric       Mask &= ~PartialCheck;
768106c3fb27SDimitry Andric     }
768206c3fb27SDimitry Andric   }
768306c3fb27SDimitry Andric 
7684bdd1243dSDimitry Andric   // Check for individual classes.
768506c3fb27SDimitry Andric   if (FPClassTest PartialCheck = Mask & fcZero) {
7686bdd1243dSDimitry Andric     if (PartialCheck == fcPosZero)
7687bdd1243dSDimitry Andric       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
7688bdd1243dSDimitry Andric                                        AsInt, ZeroC));
7689bdd1243dSDimitry Andric     else if (PartialCheck == fcZero)
7690bdd1243dSDimitry Andric       appendToRes(
7691bdd1243dSDimitry Andric           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
7692bdd1243dSDimitry Andric     else // fcNegZero
7693bdd1243dSDimitry Andric       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
7694bdd1243dSDimitry Andric                                        AsInt, SignBitC));
7695bdd1243dSDimitry Andric   }
7696bdd1243dSDimitry Andric 
769706c3fb27SDimitry Andric   if (FPClassTest PartialCheck = Mask & fcSubnormal) {
769806c3fb27SDimitry Andric     // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
769906c3fb27SDimitry Andric     // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
770006c3fb27SDimitry Andric     auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
770106c3fb27SDimitry Andric     auto OneC = MIRBuilder.buildConstant(IntTy, 1);
770206c3fb27SDimitry Andric     auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
770306c3fb27SDimitry Andric     auto SubnormalRes =
770406c3fb27SDimitry Andric         MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
770506c3fb27SDimitry Andric                              MIRBuilder.buildConstant(IntTy, AllOneMantissa));
770606c3fb27SDimitry Andric     if (PartialCheck == fcNegSubnormal)
770706c3fb27SDimitry Andric       SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
770806c3fb27SDimitry Andric     appendToRes(SubnormalRes);
770906c3fb27SDimitry Andric   }
771006c3fb27SDimitry Andric 
771106c3fb27SDimitry Andric   if (FPClassTest PartialCheck = Mask & fcInf) {
7712bdd1243dSDimitry Andric     if (PartialCheck == fcPosInf)
7713bdd1243dSDimitry Andric       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
7714bdd1243dSDimitry Andric                                        AsInt, InfC));
7715bdd1243dSDimitry Andric     else if (PartialCheck == fcInf)
7716bdd1243dSDimitry Andric       appendToRes(
7717bdd1243dSDimitry Andric           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
7718bdd1243dSDimitry Andric     else { // fcNegInf
7719bdd1243dSDimitry Andric       APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
7720bdd1243dSDimitry Andric       auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
7721bdd1243dSDimitry Andric       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
7722bdd1243dSDimitry Andric                                        AsInt, NegInfC));
7723bdd1243dSDimitry Andric     }
7724bdd1243dSDimitry Andric   }
7725bdd1243dSDimitry Andric 
772606c3fb27SDimitry Andric   if (FPClassTest PartialCheck = Mask & fcNan) {
7727bdd1243dSDimitry Andric     auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
7728bdd1243dSDimitry Andric     if (PartialCheck == fcNan) {
7729bdd1243dSDimitry Andric       // isnan(V) ==> abs(V) u> int(inf)
7730bdd1243dSDimitry Andric       appendToRes(
7731bdd1243dSDimitry Andric           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
7732bdd1243dSDimitry Andric     } else if (PartialCheck == fcQNan) {
7733bdd1243dSDimitry Andric       // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
7734bdd1243dSDimitry Andric       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
7735bdd1243dSDimitry Andric                                        InfWithQnanBitC));
7736bdd1243dSDimitry Andric     } else { // fcSNan
7737bdd1243dSDimitry Andric       // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
7738bdd1243dSDimitry Andric       //                    abs(V) u< (unsigned(Inf) | quiet_bit)
7739bdd1243dSDimitry Andric       auto IsNan =
7740bdd1243dSDimitry Andric           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
7741bdd1243dSDimitry Andric       auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
7742bdd1243dSDimitry Andric                                             Abs, InfWithQnanBitC);
7743bdd1243dSDimitry Andric       appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
7744bdd1243dSDimitry Andric     }
7745bdd1243dSDimitry Andric   }
7746bdd1243dSDimitry Andric 
774706c3fb27SDimitry Andric   if (FPClassTest PartialCheck = Mask & fcNormal) {
7748bdd1243dSDimitry Andric     // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
7749bdd1243dSDimitry Andric     // (max_exp-1))
7750bdd1243dSDimitry Andric     APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
7751bdd1243dSDimitry Andric     auto ExpMinusOne = MIRBuilder.buildSub(
7752bdd1243dSDimitry Andric         IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
7753bdd1243dSDimitry Andric     APInt MaxExpMinusOne = ExpMask - ExpLSB;
7754bdd1243dSDimitry Andric     auto NormalRes =
7755bdd1243dSDimitry Andric         MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
7756bdd1243dSDimitry Andric                              MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
7757bdd1243dSDimitry Andric     if (PartialCheck == fcNegNormal)
7758bdd1243dSDimitry Andric       NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
7759bdd1243dSDimitry Andric     else if (PartialCheck == fcPosNormal) {
7760bdd1243dSDimitry Andric       auto PosSign = MIRBuilder.buildXor(
7761bdd1243dSDimitry Andric           DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
7762bdd1243dSDimitry Andric       NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
7763bdd1243dSDimitry Andric     }
7764bdd1243dSDimitry Andric     appendToRes(NormalRes);
7765bdd1243dSDimitry Andric   }
7766bdd1243dSDimitry Andric 
7767bdd1243dSDimitry Andric   MIRBuilder.buildCopy(DstReg, Res);
7768bdd1243dSDimitry Andric   MI.eraseFromParent();
7769bdd1243dSDimitry Andric   return Legalized;
7770bdd1243dSDimitry Andric }
7771bdd1243dSDimitry Andric 
7772e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
7773e8d8bef9SDimitry Andric   // Implement vector G_SELECT in terms of XOR, AND, OR.
777406c3fb27SDimitry Andric   auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
777506c3fb27SDimitry Andric       MI.getFirst4RegLLTs();
7776e8d8bef9SDimitry Andric   if (!DstTy.isVector())
7777e8d8bef9SDimitry Andric     return UnableToLegalize;
7778e8d8bef9SDimitry Andric 
7779bdd1243dSDimitry Andric   bool IsEltPtr = DstTy.getElementType().isPointer();
7780bdd1243dSDimitry Andric   if (IsEltPtr) {
7781bdd1243dSDimitry Andric     LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
7782bdd1243dSDimitry Andric     LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
7783bdd1243dSDimitry Andric     Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
7784bdd1243dSDimitry Andric     Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
7785bdd1243dSDimitry Andric     DstTy = NewTy;
7786bdd1243dSDimitry Andric   }
7787bdd1243dSDimitry Andric 
7788e8d8bef9SDimitry Andric   if (MaskTy.isScalar()) {
778981ad6265SDimitry Andric     // Turn the scalar condition into a vector condition mask.
779081ad6265SDimitry Andric 
7791e8d8bef9SDimitry Andric     Register MaskElt = MaskReg;
779281ad6265SDimitry Andric 
779381ad6265SDimitry Andric     // The condition was potentially zero extended before, but we want a sign
779481ad6265SDimitry Andric     // extended boolean.
7795bdd1243dSDimitry Andric     if (MaskTy != LLT::scalar(1))
779681ad6265SDimitry Andric       MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
7797e8d8bef9SDimitry Andric 
779881ad6265SDimitry Andric     // Continue the sign extension (or truncate) to match the data type.
779981ad6265SDimitry Andric     MaskElt = MIRBuilder.buildSExtOrTrunc(DstTy.getElementType(),
780081ad6265SDimitry Andric                                           MaskElt).getReg(0);
780181ad6265SDimitry Andric 
780281ad6265SDimitry Andric     // Generate a vector splat idiom.
780381ad6265SDimitry Andric     auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
780481ad6265SDimitry Andric     MaskReg = ShufSplat.getReg(0);
780581ad6265SDimitry Andric     MaskTy = DstTy;
780681ad6265SDimitry Andric   }
780781ad6265SDimitry Andric 
780881ad6265SDimitry Andric   if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
7809e8d8bef9SDimitry Andric     return UnableToLegalize;
7810e8d8bef9SDimitry Andric   }
7811e8d8bef9SDimitry Andric 
7812e8d8bef9SDimitry Andric   auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
7813e8d8bef9SDimitry Andric   auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
7814e8d8bef9SDimitry Andric   auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
7815bdd1243dSDimitry Andric   if (IsEltPtr) {
7816bdd1243dSDimitry Andric     auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
7817bdd1243dSDimitry Andric     MIRBuilder.buildIntToPtr(DstReg, Or);
7818bdd1243dSDimitry Andric   } else {
7819e8d8bef9SDimitry Andric     MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
7820bdd1243dSDimitry Andric   }
7821e8d8bef9SDimitry Andric   MI.eraseFromParent();
7822e8d8bef9SDimitry Andric   return Legalized;
7823e8d8bef9SDimitry Andric }
7824fe6060f1SDimitry Andric 
7825fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) {
7826fe6060f1SDimitry Andric   // Split DIVREM into individual instructions.
7827fe6060f1SDimitry Andric   unsigned Opcode = MI.getOpcode();
7828fe6060f1SDimitry Andric 
7829fe6060f1SDimitry Andric   MIRBuilder.buildInstr(
7830fe6060f1SDimitry Andric       Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
7831fe6060f1SDimitry Andric                                         : TargetOpcode::G_UDIV,
7832fe6060f1SDimitry Andric       {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
7833fe6060f1SDimitry Andric   MIRBuilder.buildInstr(
7834fe6060f1SDimitry Andric       Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
7835fe6060f1SDimitry Andric                                         : TargetOpcode::G_UREM,
7836fe6060f1SDimitry Andric       {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
7837fe6060f1SDimitry Andric   MI.eraseFromParent();
7838fe6060f1SDimitry Andric   return Legalized;
7839fe6060f1SDimitry Andric }
7840fe6060f1SDimitry Andric 
7841fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
7842fe6060f1SDimitry Andric LegalizerHelper::lowerAbsToAddXor(MachineInstr &MI) {
7843fe6060f1SDimitry Andric   // Expand %res = G_ABS %a into:
7844fe6060f1SDimitry Andric   // %v1 = G_ASHR %a, scalar_size-1
7845fe6060f1SDimitry Andric   // %v2 = G_ADD %a, %v1
7846fe6060f1SDimitry Andric   // %res = G_XOR %v2, %v1
7847fe6060f1SDimitry Andric   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
7848fe6060f1SDimitry Andric   Register OpReg = MI.getOperand(1).getReg();
7849fe6060f1SDimitry Andric   auto ShiftAmt =
7850fe6060f1SDimitry Andric       MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
7851fe6060f1SDimitry Andric   auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
7852fe6060f1SDimitry Andric   auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
7853fe6060f1SDimitry Andric   MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
7854fe6060f1SDimitry Andric   MI.eraseFromParent();
7855fe6060f1SDimitry Andric   return Legalized;
7856fe6060f1SDimitry Andric }
7857fe6060f1SDimitry Andric 
7858fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
7859fe6060f1SDimitry Andric LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
7860fe6060f1SDimitry Andric   // Expand %res = G_ABS %a into:
7861fe6060f1SDimitry Andric   // %v1 = G_CONSTANT 0
7862fe6060f1SDimitry Andric   // %v2 = G_SUB %v1, %a
7863fe6060f1SDimitry Andric   // %res = G_SMAX %a, %v2
7864fe6060f1SDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
7865fe6060f1SDimitry Andric   LLT Ty = MRI.getType(SrcReg);
7866fe6060f1SDimitry Andric   auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
7867fe6060f1SDimitry Andric   auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
7868fe6060f1SDimitry Andric   MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
7869fe6060f1SDimitry Andric   MI.eraseFromParent();
7870fe6060f1SDimitry Andric   return Legalized;
7871fe6060f1SDimitry Andric }
7872349cc55cSDimitry Andric 
7873349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
7874349cc55cSDimitry Andric LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
7875349cc55cSDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
7876349cc55cSDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
7877349cc55cSDimitry Andric   LLT DstTy = MRI.getType(SrcReg);
7878349cc55cSDimitry Andric 
7879349cc55cSDimitry Andric   // The source could be a scalar if the IR type was <1 x sN>.
7880349cc55cSDimitry Andric   if (SrcTy.isScalar()) {
7881349cc55cSDimitry Andric     if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
7882349cc55cSDimitry Andric       return UnableToLegalize; // FIXME: handle extension.
7883349cc55cSDimitry Andric     // This can be just a plain copy.
7884349cc55cSDimitry Andric     Observer.changingInstr(MI);
7885349cc55cSDimitry Andric     MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
7886349cc55cSDimitry Andric     Observer.changedInstr(MI);
7887349cc55cSDimitry Andric     return Legalized;
7888349cc55cSDimitry Andric   }
788906c3fb27SDimitry Andric   return UnableToLegalize;
7890349cc55cSDimitry Andric }
7891349cc55cSDimitry Andric 
7892*5f757f3fSDimitry Andric static Type *getTypeForLLT(LLT Ty, LLVMContext &C);
7893*5f757f3fSDimitry Andric 
7894*5f757f3fSDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerVAArg(MachineInstr &MI) {
7895*5f757f3fSDimitry Andric   MachineFunction &MF = *MI.getMF();
7896*5f757f3fSDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
7897*5f757f3fSDimitry Andric   LLVMContext &Ctx = MF.getFunction().getContext();
7898*5f757f3fSDimitry Andric   Register ListPtr = MI.getOperand(1).getReg();
7899*5f757f3fSDimitry Andric   LLT PtrTy = MRI.getType(ListPtr);
7900*5f757f3fSDimitry Andric 
7901*5f757f3fSDimitry Andric   // LstPtr is a pointer to the head of the list. Get the address
7902*5f757f3fSDimitry Andric   // of the head of the list.
7903*5f757f3fSDimitry Andric   Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
7904*5f757f3fSDimitry Andric   MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
7905*5f757f3fSDimitry Andric       MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
7906*5f757f3fSDimitry Andric   auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
7907*5f757f3fSDimitry Andric 
7908*5f757f3fSDimitry Andric   const Align A(MI.getOperand(2).getImm());
7909*5f757f3fSDimitry Andric   LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
7910*5f757f3fSDimitry Andric   if (A > TLI.getMinStackArgumentAlignment()) {
7911*5f757f3fSDimitry Andric     Register AlignAmt =
7912*5f757f3fSDimitry Andric         MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
7913*5f757f3fSDimitry Andric     auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
7914*5f757f3fSDimitry Andric     auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
7915*5f757f3fSDimitry Andric     VAList = AndDst.getReg(0);
7916*5f757f3fSDimitry Andric   }
7917*5f757f3fSDimitry Andric 
7918*5f757f3fSDimitry Andric   // Increment the pointer, VAList, to the next vaarg
7919*5f757f3fSDimitry Andric   // The list should be bumped by the size of element in the current head of
7920*5f757f3fSDimitry Andric   // list.
7921*5f757f3fSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
7922*5f757f3fSDimitry Andric   LLT LLTTy = MRI.getType(Dst);
7923*5f757f3fSDimitry Andric   Type *Ty = getTypeForLLT(LLTTy, Ctx);
7924*5f757f3fSDimitry Andric   auto IncAmt =
7925*5f757f3fSDimitry Andric       MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
7926*5f757f3fSDimitry Andric   auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
7927*5f757f3fSDimitry Andric 
7928*5f757f3fSDimitry Andric   // Store the increment VAList to the legalized pointer
7929*5f757f3fSDimitry Andric   MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
7930*5f757f3fSDimitry Andric       MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
7931*5f757f3fSDimitry Andric   MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
7932*5f757f3fSDimitry Andric   // Load the actual argument out of the pointer VAList
7933*5f757f3fSDimitry Andric   Align EltAlignment = DL.getABITypeAlign(Ty);
7934*5f757f3fSDimitry Andric   MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
7935*5f757f3fSDimitry Andric       MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
7936*5f757f3fSDimitry Andric   MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
7937*5f757f3fSDimitry Andric 
7938*5f757f3fSDimitry Andric   MI.eraseFromParent();
7939*5f757f3fSDimitry Andric   return Legalized;
7940*5f757f3fSDimitry Andric }
7941*5f757f3fSDimitry Andric 
7942349cc55cSDimitry Andric static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
7943349cc55cSDimitry Andric   // On Darwin, -Os means optimize for size without hurting performance, so
7944349cc55cSDimitry Andric   // only really optimize for size when -Oz (MinSize) is used.
7945349cc55cSDimitry Andric   if (MF.getTarget().getTargetTriple().isOSDarwin())
7946349cc55cSDimitry Andric     return MF.getFunction().hasMinSize();
7947349cc55cSDimitry Andric   return MF.getFunction().hasOptSize();
7948349cc55cSDimitry Andric }
7949349cc55cSDimitry Andric 
7950349cc55cSDimitry Andric // Returns a list of types to use for memory op lowering in MemOps. A partial
7951349cc55cSDimitry Andric // port of findOptimalMemOpLowering in TargetLowering.
7952349cc55cSDimitry Andric static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
7953349cc55cSDimitry Andric                                           unsigned Limit, const MemOp &Op,
7954349cc55cSDimitry Andric                                           unsigned DstAS, unsigned SrcAS,
7955349cc55cSDimitry Andric                                           const AttributeList &FuncAttributes,
7956349cc55cSDimitry Andric                                           const TargetLowering &TLI) {
7957349cc55cSDimitry Andric   if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
7958349cc55cSDimitry Andric     return false;
7959349cc55cSDimitry Andric 
7960349cc55cSDimitry Andric   LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
7961349cc55cSDimitry Andric 
7962349cc55cSDimitry Andric   if (Ty == LLT()) {
7963349cc55cSDimitry Andric     // Use the largest scalar type whose alignment constraints are satisfied.
7964349cc55cSDimitry Andric     // We only need to check DstAlign here as SrcAlign is always greater or
7965349cc55cSDimitry Andric     // equal to DstAlign (or zero).
7966349cc55cSDimitry Andric     Ty = LLT::scalar(64);
7967349cc55cSDimitry Andric     if (Op.isFixedDstAlign())
7968349cc55cSDimitry Andric       while (Op.getDstAlign() < Ty.getSizeInBytes() &&
7969349cc55cSDimitry Andric              !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
7970349cc55cSDimitry Andric         Ty = LLT::scalar(Ty.getSizeInBytes());
7971349cc55cSDimitry Andric     assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
7972349cc55cSDimitry Andric     // FIXME: check for the largest legal type we can load/store to.
7973349cc55cSDimitry Andric   }
7974349cc55cSDimitry Andric 
7975349cc55cSDimitry Andric   unsigned NumMemOps = 0;
7976349cc55cSDimitry Andric   uint64_t Size = Op.size();
7977349cc55cSDimitry Andric   while (Size) {
7978349cc55cSDimitry Andric     unsigned TySize = Ty.getSizeInBytes();
7979349cc55cSDimitry Andric     while (TySize > Size) {
7980349cc55cSDimitry Andric       // For now, only use non-vector load / store's for the left-over pieces.
7981349cc55cSDimitry Andric       LLT NewTy = Ty;
7982349cc55cSDimitry Andric       // FIXME: check for mem op safety and legality of the types. Not all of
7983349cc55cSDimitry Andric       // SDAGisms map cleanly to GISel concepts.
7984349cc55cSDimitry Andric       if (NewTy.isVector())
7985349cc55cSDimitry Andric         NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
798606c3fb27SDimitry Andric       NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
7987349cc55cSDimitry Andric       unsigned NewTySize = NewTy.getSizeInBytes();
7988349cc55cSDimitry Andric       assert(NewTySize > 0 && "Could not find appropriate type");
7989349cc55cSDimitry Andric 
7990349cc55cSDimitry Andric       // If the new LLT cannot cover all of the remaining bits, then consider
7991349cc55cSDimitry Andric       // issuing a (or a pair of) unaligned and overlapping load / store.
7992bdd1243dSDimitry Andric       unsigned Fast;
7993349cc55cSDimitry Andric       // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
7994349cc55cSDimitry Andric       MVT VT = getMVTForLLT(Ty);
7995349cc55cSDimitry Andric       if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
7996349cc55cSDimitry Andric           TLI.allowsMisalignedMemoryAccesses(
7997349cc55cSDimitry Andric               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
7998349cc55cSDimitry Andric               MachineMemOperand::MONone, &Fast) &&
7999349cc55cSDimitry Andric           Fast)
8000349cc55cSDimitry Andric         TySize = Size;
8001349cc55cSDimitry Andric       else {
8002349cc55cSDimitry Andric         Ty = NewTy;
8003349cc55cSDimitry Andric         TySize = NewTySize;
8004349cc55cSDimitry Andric       }
8005349cc55cSDimitry Andric     }
8006349cc55cSDimitry Andric 
8007349cc55cSDimitry Andric     if (++NumMemOps > Limit)
8008349cc55cSDimitry Andric       return false;
8009349cc55cSDimitry Andric 
8010349cc55cSDimitry Andric     MemOps.push_back(Ty);
8011349cc55cSDimitry Andric     Size -= TySize;
8012349cc55cSDimitry Andric   }
8013349cc55cSDimitry Andric 
8014349cc55cSDimitry Andric   return true;
8015349cc55cSDimitry Andric }
8016349cc55cSDimitry Andric 
8017349cc55cSDimitry Andric static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
8018349cc55cSDimitry Andric   if (Ty.isVector())
8019349cc55cSDimitry Andric     return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
8020349cc55cSDimitry Andric                                 Ty.getNumElements());
8021349cc55cSDimitry Andric   return IntegerType::get(C, Ty.getSizeInBits());
8022349cc55cSDimitry Andric }
8023349cc55cSDimitry Andric 
8024349cc55cSDimitry Andric // Get a vectorized representation of the memset value operand, GISel edition.
8025349cc55cSDimitry Andric static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
8026349cc55cSDimitry Andric   MachineRegisterInfo &MRI = *MIB.getMRI();
8027349cc55cSDimitry Andric   unsigned NumBits = Ty.getScalarSizeInBits();
8028349cc55cSDimitry Andric   auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8029349cc55cSDimitry Andric   if (!Ty.isVector() && ValVRegAndVal) {
803081ad6265SDimitry Andric     APInt Scalar = ValVRegAndVal->Value.trunc(8);
8031349cc55cSDimitry Andric     APInt SplatVal = APInt::getSplat(NumBits, Scalar);
8032349cc55cSDimitry Andric     return MIB.buildConstant(Ty, SplatVal).getReg(0);
8033349cc55cSDimitry Andric   }
8034349cc55cSDimitry Andric 
8035349cc55cSDimitry Andric   // Extend the byte value to the larger type, and then multiply by a magic
8036349cc55cSDimitry Andric   // value 0x010101... in order to replicate it across every byte.
8037349cc55cSDimitry Andric   // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
8038349cc55cSDimitry Andric   if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
8039349cc55cSDimitry Andric     return MIB.buildConstant(Ty, 0).getReg(0);
8040349cc55cSDimitry Andric   }
8041349cc55cSDimitry Andric 
8042349cc55cSDimitry Andric   LLT ExtType = Ty.getScalarType();
8043349cc55cSDimitry Andric   auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
8044349cc55cSDimitry Andric   if (NumBits > 8) {
8045349cc55cSDimitry Andric     APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
8046349cc55cSDimitry Andric     auto MagicMI = MIB.buildConstant(ExtType, Magic);
8047349cc55cSDimitry Andric     Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
8048349cc55cSDimitry Andric   }
8049349cc55cSDimitry Andric 
8050349cc55cSDimitry Andric   // For vector types create a G_BUILD_VECTOR.
8051349cc55cSDimitry Andric   if (Ty.isVector())
8052349cc55cSDimitry Andric     Val = MIB.buildSplatVector(Ty, Val).getReg(0);
8053349cc55cSDimitry Andric 
8054349cc55cSDimitry Andric   return Val;
8055349cc55cSDimitry Andric }
8056349cc55cSDimitry Andric 
8057349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8058349cc55cSDimitry Andric LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
8059349cc55cSDimitry Andric                              uint64_t KnownLen, Align Alignment,
8060349cc55cSDimitry Andric                              bool IsVolatile) {
8061349cc55cSDimitry Andric   auto &MF = *MI.getParent()->getParent();
8062349cc55cSDimitry Andric   const auto &TLI = *MF.getSubtarget().getTargetLowering();
8063349cc55cSDimitry Andric   auto &DL = MF.getDataLayout();
8064349cc55cSDimitry Andric   LLVMContext &C = MF.getFunction().getContext();
8065349cc55cSDimitry Andric 
8066349cc55cSDimitry Andric   assert(KnownLen != 0 && "Have a zero length memset length!");
8067349cc55cSDimitry Andric 
8068349cc55cSDimitry Andric   bool DstAlignCanChange = false;
8069349cc55cSDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
8070349cc55cSDimitry Andric   bool OptSize = shouldLowerMemFuncForSize(MF);
8071349cc55cSDimitry Andric 
8072349cc55cSDimitry Andric   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8073349cc55cSDimitry Andric   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8074349cc55cSDimitry Andric     DstAlignCanChange = true;
8075349cc55cSDimitry Andric 
8076349cc55cSDimitry Andric   unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
8077349cc55cSDimitry Andric   std::vector<LLT> MemOps;
8078349cc55cSDimitry Andric 
8079349cc55cSDimitry Andric   const auto &DstMMO = **MI.memoperands_begin();
8080349cc55cSDimitry Andric   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8081349cc55cSDimitry Andric 
8082349cc55cSDimitry Andric   auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8083349cc55cSDimitry Andric   bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
8084349cc55cSDimitry Andric 
8085349cc55cSDimitry Andric   if (!findGISelOptimalMemOpLowering(MemOps, Limit,
8086349cc55cSDimitry Andric                                      MemOp::Set(KnownLen, DstAlignCanChange,
8087349cc55cSDimitry Andric                                                 Alignment,
8088349cc55cSDimitry Andric                                                 /*IsZeroMemset=*/IsZeroVal,
8089349cc55cSDimitry Andric                                                 /*IsVolatile=*/IsVolatile),
8090349cc55cSDimitry Andric                                      DstPtrInfo.getAddrSpace(), ~0u,
8091349cc55cSDimitry Andric                                      MF.getFunction().getAttributes(), TLI))
8092349cc55cSDimitry Andric     return UnableToLegalize;
8093349cc55cSDimitry Andric 
8094349cc55cSDimitry Andric   if (DstAlignCanChange) {
8095349cc55cSDimitry Andric     // Get an estimate of the type from the LLT.
8096349cc55cSDimitry Andric     Type *IRTy = getTypeForLLT(MemOps[0], C);
8097349cc55cSDimitry Andric     Align NewAlign = DL.getABITypeAlign(IRTy);
8098349cc55cSDimitry Andric     if (NewAlign > Alignment) {
8099349cc55cSDimitry Andric       Alignment = NewAlign;
8100349cc55cSDimitry Andric       unsigned FI = FIDef->getOperand(1).getIndex();
8101349cc55cSDimitry Andric       // Give the stack frame object a larger alignment if needed.
8102349cc55cSDimitry Andric       if (MFI.getObjectAlign(FI) < Alignment)
8103349cc55cSDimitry Andric         MFI.setObjectAlignment(FI, Alignment);
8104349cc55cSDimitry Andric     }
8105349cc55cSDimitry Andric   }
8106349cc55cSDimitry Andric 
8107349cc55cSDimitry Andric   MachineIRBuilder MIB(MI);
8108349cc55cSDimitry Andric   // Find the largest store and generate the bit pattern for it.
8109349cc55cSDimitry Andric   LLT LargestTy = MemOps[0];
8110349cc55cSDimitry Andric   for (unsigned i = 1; i < MemOps.size(); i++)
8111349cc55cSDimitry Andric     if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
8112349cc55cSDimitry Andric       LargestTy = MemOps[i];
8113349cc55cSDimitry Andric 
8114349cc55cSDimitry Andric   // The memset stored value is always defined as an s8, so in order to make it
8115349cc55cSDimitry Andric   // work with larger store types we need to repeat the bit pattern across the
8116349cc55cSDimitry Andric   // wider type.
8117349cc55cSDimitry Andric   Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
8118349cc55cSDimitry Andric 
8119349cc55cSDimitry Andric   if (!MemSetValue)
8120349cc55cSDimitry Andric     return UnableToLegalize;
8121349cc55cSDimitry Andric 
8122349cc55cSDimitry Andric   // Generate the stores. For each store type in the list, we generate the
8123349cc55cSDimitry Andric   // matching store of that type to the destination address.
8124349cc55cSDimitry Andric   LLT PtrTy = MRI.getType(Dst);
8125349cc55cSDimitry Andric   unsigned DstOff = 0;
8126349cc55cSDimitry Andric   unsigned Size = KnownLen;
8127349cc55cSDimitry Andric   for (unsigned I = 0; I < MemOps.size(); I++) {
8128349cc55cSDimitry Andric     LLT Ty = MemOps[I];
8129349cc55cSDimitry Andric     unsigned TySize = Ty.getSizeInBytes();
8130349cc55cSDimitry Andric     if (TySize > Size) {
8131349cc55cSDimitry Andric       // Issuing an unaligned load / store pair that overlaps with the previous
8132349cc55cSDimitry Andric       // pair. Adjust the offset accordingly.
8133349cc55cSDimitry Andric       assert(I == MemOps.size() - 1 && I != 0);
8134349cc55cSDimitry Andric       DstOff -= TySize - Size;
8135349cc55cSDimitry Andric     }
8136349cc55cSDimitry Andric 
8137349cc55cSDimitry Andric     // If this store is smaller than the largest store see whether we can get
8138349cc55cSDimitry Andric     // the smaller value for free with a truncate.
8139349cc55cSDimitry Andric     Register Value = MemSetValue;
8140349cc55cSDimitry Andric     if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
8141349cc55cSDimitry Andric       MVT VT = getMVTForLLT(Ty);
8142349cc55cSDimitry Andric       MVT LargestVT = getMVTForLLT(LargestTy);
8143349cc55cSDimitry Andric       if (!LargestTy.isVector() && !Ty.isVector() &&
8144349cc55cSDimitry Andric           TLI.isTruncateFree(LargestVT, VT))
8145349cc55cSDimitry Andric         Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
8146349cc55cSDimitry Andric       else
8147349cc55cSDimitry Andric         Value = getMemsetValue(Val, Ty, MIB);
8148349cc55cSDimitry Andric       if (!Value)
8149349cc55cSDimitry Andric         return UnableToLegalize;
8150349cc55cSDimitry Andric     }
8151349cc55cSDimitry Andric 
8152349cc55cSDimitry Andric     auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
8153349cc55cSDimitry Andric 
8154349cc55cSDimitry Andric     Register Ptr = Dst;
8155349cc55cSDimitry Andric     if (DstOff != 0) {
8156349cc55cSDimitry Andric       auto Offset =
8157349cc55cSDimitry Andric           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
8158349cc55cSDimitry Andric       Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
8159349cc55cSDimitry Andric     }
8160349cc55cSDimitry Andric 
8161349cc55cSDimitry Andric     MIB.buildStore(Value, Ptr, *StoreMMO);
8162349cc55cSDimitry Andric     DstOff += Ty.getSizeInBytes();
8163349cc55cSDimitry Andric     Size -= TySize;
8164349cc55cSDimitry Andric   }
8165349cc55cSDimitry Andric 
8166349cc55cSDimitry Andric   MI.eraseFromParent();
8167349cc55cSDimitry Andric   return Legalized;
8168349cc55cSDimitry Andric }
8169349cc55cSDimitry Andric 
8170349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8171349cc55cSDimitry Andric LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
8172349cc55cSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8173349cc55cSDimitry Andric 
817406c3fb27SDimitry Andric   auto [Dst, Src, Len] = MI.getFirst3Regs();
8175349cc55cSDimitry Andric 
8176349cc55cSDimitry Andric   const auto *MMOIt = MI.memoperands_begin();
8177349cc55cSDimitry Andric   const MachineMemOperand *MemOp = *MMOIt;
8178349cc55cSDimitry Andric   bool IsVolatile = MemOp->isVolatile();
8179349cc55cSDimitry Andric 
8180349cc55cSDimitry Andric   // See if this is a constant length copy
8181349cc55cSDimitry Andric   auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
8182349cc55cSDimitry Andric   // FIXME: support dynamically sized G_MEMCPY_INLINE
818381ad6265SDimitry Andric   assert(LenVRegAndVal &&
8184349cc55cSDimitry Andric          "inline memcpy with dynamic size is not yet supported");
8185349cc55cSDimitry Andric   uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8186349cc55cSDimitry Andric   if (KnownLen == 0) {
8187349cc55cSDimitry Andric     MI.eraseFromParent();
8188349cc55cSDimitry Andric     return Legalized;
8189349cc55cSDimitry Andric   }
8190349cc55cSDimitry Andric 
8191349cc55cSDimitry Andric   const auto &DstMMO = **MI.memoperands_begin();
8192349cc55cSDimitry Andric   const auto &SrcMMO = **std::next(MI.memoperands_begin());
8193349cc55cSDimitry Andric   Align DstAlign = DstMMO.getBaseAlign();
8194349cc55cSDimitry Andric   Align SrcAlign = SrcMMO.getBaseAlign();
8195349cc55cSDimitry Andric 
8196349cc55cSDimitry Andric   return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8197349cc55cSDimitry Andric                            IsVolatile);
8198349cc55cSDimitry Andric }
8199349cc55cSDimitry Andric 
8200349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8201349cc55cSDimitry Andric LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
8202349cc55cSDimitry Andric                                    uint64_t KnownLen, Align DstAlign,
8203349cc55cSDimitry Andric                                    Align SrcAlign, bool IsVolatile) {
8204349cc55cSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8205349cc55cSDimitry Andric   return lowerMemcpy(MI, Dst, Src, KnownLen,
8206349cc55cSDimitry Andric                      std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
8207349cc55cSDimitry Andric                      IsVolatile);
8208349cc55cSDimitry Andric }
8209349cc55cSDimitry Andric 
8210349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8211349cc55cSDimitry Andric LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
8212349cc55cSDimitry Andric                              uint64_t KnownLen, uint64_t Limit, Align DstAlign,
8213349cc55cSDimitry Andric                              Align SrcAlign, bool IsVolatile) {
8214349cc55cSDimitry Andric   auto &MF = *MI.getParent()->getParent();
8215349cc55cSDimitry Andric   const auto &TLI = *MF.getSubtarget().getTargetLowering();
8216349cc55cSDimitry Andric   auto &DL = MF.getDataLayout();
8217349cc55cSDimitry Andric   LLVMContext &C = MF.getFunction().getContext();
8218349cc55cSDimitry Andric 
8219349cc55cSDimitry Andric   assert(KnownLen != 0 && "Have a zero length memcpy length!");
8220349cc55cSDimitry Andric 
8221349cc55cSDimitry Andric   bool DstAlignCanChange = false;
8222349cc55cSDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
822381ad6265SDimitry Andric   Align Alignment = std::min(DstAlign, SrcAlign);
8224349cc55cSDimitry Andric 
8225349cc55cSDimitry Andric   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8226349cc55cSDimitry Andric   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8227349cc55cSDimitry Andric     DstAlignCanChange = true;
8228349cc55cSDimitry Andric 
8229349cc55cSDimitry Andric   // FIXME: infer better src pointer alignment like SelectionDAG does here.
8230349cc55cSDimitry Andric   // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
8231349cc55cSDimitry Andric   // if the memcpy is in a tail call position.
8232349cc55cSDimitry Andric 
8233349cc55cSDimitry Andric   std::vector<LLT> MemOps;
8234349cc55cSDimitry Andric 
8235349cc55cSDimitry Andric   const auto &DstMMO = **MI.memoperands_begin();
8236349cc55cSDimitry Andric   const auto &SrcMMO = **std::next(MI.memoperands_begin());
8237349cc55cSDimitry Andric   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8238349cc55cSDimitry Andric   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
8239349cc55cSDimitry Andric 
8240349cc55cSDimitry Andric   if (!findGISelOptimalMemOpLowering(
8241349cc55cSDimitry Andric           MemOps, Limit,
8242349cc55cSDimitry Andric           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8243349cc55cSDimitry Andric                       IsVolatile),
8244349cc55cSDimitry Andric           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
8245349cc55cSDimitry Andric           MF.getFunction().getAttributes(), TLI))
8246349cc55cSDimitry Andric     return UnableToLegalize;
8247349cc55cSDimitry Andric 
8248349cc55cSDimitry Andric   if (DstAlignCanChange) {
8249349cc55cSDimitry Andric     // Get an estimate of the type from the LLT.
8250349cc55cSDimitry Andric     Type *IRTy = getTypeForLLT(MemOps[0], C);
8251349cc55cSDimitry Andric     Align NewAlign = DL.getABITypeAlign(IRTy);
8252349cc55cSDimitry Andric 
8253349cc55cSDimitry Andric     // Don't promote to an alignment that would require dynamic stack
8254349cc55cSDimitry Andric     // realignment.
8255349cc55cSDimitry Andric     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8256349cc55cSDimitry Andric     if (!TRI->hasStackRealignment(MF))
8257349cc55cSDimitry Andric       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
825881ad6265SDimitry Andric         NewAlign = NewAlign.previous();
8259349cc55cSDimitry Andric 
8260349cc55cSDimitry Andric     if (NewAlign > Alignment) {
8261349cc55cSDimitry Andric       Alignment = NewAlign;
8262349cc55cSDimitry Andric       unsigned FI = FIDef->getOperand(1).getIndex();
8263349cc55cSDimitry Andric       // Give the stack frame object a larger alignment if needed.
8264349cc55cSDimitry Andric       if (MFI.getObjectAlign(FI) < Alignment)
8265349cc55cSDimitry Andric         MFI.setObjectAlignment(FI, Alignment);
8266349cc55cSDimitry Andric     }
8267349cc55cSDimitry Andric   }
8268349cc55cSDimitry Andric 
8269349cc55cSDimitry Andric   LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
8270349cc55cSDimitry Andric 
8271349cc55cSDimitry Andric   MachineIRBuilder MIB(MI);
8272349cc55cSDimitry Andric   // Now we need to emit a pair of load and stores for each of the types we've
8273349cc55cSDimitry Andric   // collected. I.e. for each type, generate a load from the source pointer of
8274349cc55cSDimitry Andric   // that type width, and then generate a corresponding store to the dest buffer
8275349cc55cSDimitry Andric   // of that value loaded. This can result in a sequence of loads and stores
8276349cc55cSDimitry Andric   // mixed types, depending on what the target specifies as good types to use.
8277349cc55cSDimitry Andric   unsigned CurrOffset = 0;
8278349cc55cSDimitry Andric   unsigned Size = KnownLen;
8279349cc55cSDimitry Andric   for (auto CopyTy : MemOps) {
8280349cc55cSDimitry Andric     // Issuing an unaligned load / store pair  that overlaps with the previous
8281349cc55cSDimitry Andric     // pair. Adjust the offset accordingly.
8282349cc55cSDimitry Andric     if (CopyTy.getSizeInBytes() > Size)
8283349cc55cSDimitry Andric       CurrOffset -= CopyTy.getSizeInBytes() - Size;
8284349cc55cSDimitry Andric 
8285349cc55cSDimitry Andric     // Construct MMOs for the accesses.
8286349cc55cSDimitry Andric     auto *LoadMMO =
8287349cc55cSDimitry Andric         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
8288349cc55cSDimitry Andric     auto *StoreMMO =
8289349cc55cSDimitry Andric         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
8290349cc55cSDimitry Andric 
8291349cc55cSDimitry Andric     // Create the load.
8292349cc55cSDimitry Andric     Register LoadPtr = Src;
8293349cc55cSDimitry Andric     Register Offset;
8294349cc55cSDimitry Andric     if (CurrOffset != 0) {
82954824e7fdSDimitry Andric       LLT SrcTy = MRI.getType(Src);
82964824e7fdSDimitry Andric       Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
8297349cc55cSDimitry Andric                    .getReg(0);
82984824e7fdSDimitry Andric       LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
8299349cc55cSDimitry Andric     }
8300349cc55cSDimitry Andric     auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
8301349cc55cSDimitry Andric 
8302349cc55cSDimitry Andric     // Create the store.
83034824e7fdSDimitry Andric     Register StorePtr = Dst;
83044824e7fdSDimitry Andric     if (CurrOffset != 0) {
83054824e7fdSDimitry Andric       LLT DstTy = MRI.getType(Dst);
83064824e7fdSDimitry Andric       StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
83074824e7fdSDimitry Andric     }
8308349cc55cSDimitry Andric     MIB.buildStore(LdVal, StorePtr, *StoreMMO);
8309349cc55cSDimitry Andric     CurrOffset += CopyTy.getSizeInBytes();
8310349cc55cSDimitry Andric     Size -= CopyTy.getSizeInBytes();
8311349cc55cSDimitry Andric   }
8312349cc55cSDimitry Andric 
8313349cc55cSDimitry Andric   MI.eraseFromParent();
8314349cc55cSDimitry Andric   return Legalized;
8315349cc55cSDimitry Andric }
8316349cc55cSDimitry Andric 
8317349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8318349cc55cSDimitry Andric LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
8319349cc55cSDimitry Andric                               uint64_t KnownLen, Align DstAlign, Align SrcAlign,
8320349cc55cSDimitry Andric                               bool IsVolatile) {
8321349cc55cSDimitry Andric   auto &MF = *MI.getParent()->getParent();
8322349cc55cSDimitry Andric   const auto &TLI = *MF.getSubtarget().getTargetLowering();
8323349cc55cSDimitry Andric   auto &DL = MF.getDataLayout();
8324349cc55cSDimitry Andric   LLVMContext &C = MF.getFunction().getContext();
8325349cc55cSDimitry Andric 
8326349cc55cSDimitry Andric   assert(KnownLen != 0 && "Have a zero length memmove length!");
8327349cc55cSDimitry Andric 
8328349cc55cSDimitry Andric   bool DstAlignCanChange = false;
8329349cc55cSDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
8330349cc55cSDimitry Andric   bool OptSize = shouldLowerMemFuncForSize(MF);
833181ad6265SDimitry Andric   Align Alignment = std::min(DstAlign, SrcAlign);
8332349cc55cSDimitry Andric 
8333349cc55cSDimitry Andric   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8334349cc55cSDimitry Andric   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8335349cc55cSDimitry Andric     DstAlignCanChange = true;
8336349cc55cSDimitry Andric 
8337349cc55cSDimitry Andric   unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
8338349cc55cSDimitry Andric   std::vector<LLT> MemOps;
8339349cc55cSDimitry Andric 
8340349cc55cSDimitry Andric   const auto &DstMMO = **MI.memoperands_begin();
8341349cc55cSDimitry Andric   const auto &SrcMMO = **std::next(MI.memoperands_begin());
8342349cc55cSDimitry Andric   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8343349cc55cSDimitry Andric   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
8344349cc55cSDimitry Andric 
8345349cc55cSDimitry Andric   // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
8346349cc55cSDimitry Andric   // to a bug in it's findOptimalMemOpLowering implementation. For now do the
8347349cc55cSDimitry Andric   // same thing here.
8348349cc55cSDimitry Andric   if (!findGISelOptimalMemOpLowering(
8349349cc55cSDimitry Andric           MemOps, Limit,
8350349cc55cSDimitry Andric           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8351349cc55cSDimitry Andric                       /*IsVolatile*/ true),
8352349cc55cSDimitry Andric           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
8353349cc55cSDimitry Andric           MF.getFunction().getAttributes(), TLI))
8354349cc55cSDimitry Andric     return UnableToLegalize;
8355349cc55cSDimitry Andric 
8356349cc55cSDimitry Andric   if (DstAlignCanChange) {
8357349cc55cSDimitry Andric     // Get an estimate of the type from the LLT.
8358349cc55cSDimitry Andric     Type *IRTy = getTypeForLLT(MemOps[0], C);
8359349cc55cSDimitry Andric     Align NewAlign = DL.getABITypeAlign(IRTy);
8360349cc55cSDimitry Andric 
8361349cc55cSDimitry Andric     // Don't promote to an alignment that would require dynamic stack
8362349cc55cSDimitry Andric     // realignment.
8363349cc55cSDimitry Andric     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8364349cc55cSDimitry Andric     if (!TRI->hasStackRealignment(MF))
8365349cc55cSDimitry Andric       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
836681ad6265SDimitry Andric         NewAlign = NewAlign.previous();
8367349cc55cSDimitry Andric 
8368349cc55cSDimitry Andric     if (NewAlign > Alignment) {
8369349cc55cSDimitry Andric       Alignment = NewAlign;
8370349cc55cSDimitry Andric       unsigned FI = FIDef->getOperand(1).getIndex();
8371349cc55cSDimitry Andric       // Give the stack frame object a larger alignment if needed.
8372349cc55cSDimitry Andric       if (MFI.getObjectAlign(FI) < Alignment)
8373349cc55cSDimitry Andric         MFI.setObjectAlignment(FI, Alignment);
8374349cc55cSDimitry Andric     }
8375349cc55cSDimitry Andric   }
8376349cc55cSDimitry Andric 
8377349cc55cSDimitry Andric   LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
8378349cc55cSDimitry Andric 
8379349cc55cSDimitry Andric   MachineIRBuilder MIB(MI);
8380349cc55cSDimitry Andric   // Memmove requires that we perform the loads first before issuing the stores.
8381349cc55cSDimitry Andric   // Apart from that, this loop is pretty much doing the same thing as the
8382349cc55cSDimitry Andric   // memcpy codegen function.
8383349cc55cSDimitry Andric   unsigned CurrOffset = 0;
8384349cc55cSDimitry Andric   SmallVector<Register, 16> LoadVals;
8385349cc55cSDimitry Andric   for (auto CopyTy : MemOps) {
8386349cc55cSDimitry Andric     // Construct MMO for the load.
8387349cc55cSDimitry Andric     auto *LoadMMO =
8388349cc55cSDimitry Andric         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
8389349cc55cSDimitry Andric 
8390349cc55cSDimitry Andric     // Create the load.
8391349cc55cSDimitry Andric     Register LoadPtr = Src;
8392349cc55cSDimitry Andric     if (CurrOffset != 0) {
83934824e7fdSDimitry Andric       LLT SrcTy = MRI.getType(Src);
8394349cc55cSDimitry Andric       auto Offset =
83954824e7fdSDimitry Andric           MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
83964824e7fdSDimitry Andric       LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
8397349cc55cSDimitry Andric     }
8398349cc55cSDimitry Andric     LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
8399349cc55cSDimitry Andric     CurrOffset += CopyTy.getSizeInBytes();
8400349cc55cSDimitry Andric   }
8401349cc55cSDimitry Andric 
8402349cc55cSDimitry Andric   CurrOffset = 0;
8403349cc55cSDimitry Andric   for (unsigned I = 0; I < MemOps.size(); ++I) {
8404349cc55cSDimitry Andric     LLT CopyTy = MemOps[I];
8405349cc55cSDimitry Andric     // Now store the values loaded.
8406349cc55cSDimitry Andric     auto *StoreMMO =
8407349cc55cSDimitry Andric         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
8408349cc55cSDimitry Andric 
8409349cc55cSDimitry Andric     Register StorePtr = Dst;
8410349cc55cSDimitry Andric     if (CurrOffset != 0) {
84114824e7fdSDimitry Andric       LLT DstTy = MRI.getType(Dst);
8412349cc55cSDimitry Andric       auto Offset =
84134824e7fdSDimitry Andric           MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
84144824e7fdSDimitry Andric       StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
8415349cc55cSDimitry Andric     }
8416349cc55cSDimitry Andric     MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
8417349cc55cSDimitry Andric     CurrOffset += CopyTy.getSizeInBytes();
8418349cc55cSDimitry Andric   }
8419349cc55cSDimitry Andric   MI.eraseFromParent();
8420349cc55cSDimitry Andric   return Legalized;
8421349cc55cSDimitry Andric }
8422349cc55cSDimitry Andric 
8423349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8424349cc55cSDimitry Andric LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
8425349cc55cSDimitry Andric   const unsigned Opc = MI.getOpcode();
8426349cc55cSDimitry Andric   // This combine is fairly complex so it's not written with a separate
8427349cc55cSDimitry Andric   // matcher function.
8428349cc55cSDimitry Andric   assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
8429349cc55cSDimitry Andric           Opc == TargetOpcode::G_MEMSET) &&
8430349cc55cSDimitry Andric          "Expected memcpy like instruction");
8431349cc55cSDimitry Andric 
8432349cc55cSDimitry Andric   auto MMOIt = MI.memoperands_begin();
8433349cc55cSDimitry Andric   const MachineMemOperand *MemOp = *MMOIt;
8434349cc55cSDimitry Andric 
8435349cc55cSDimitry Andric   Align DstAlign = MemOp->getBaseAlign();
8436349cc55cSDimitry Andric   Align SrcAlign;
843706c3fb27SDimitry Andric   auto [Dst, Src, Len] = MI.getFirst3Regs();
8438349cc55cSDimitry Andric 
8439349cc55cSDimitry Andric   if (Opc != TargetOpcode::G_MEMSET) {
8440349cc55cSDimitry Andric     assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
8441349cc55cSDimitry Andric     MemOp = *(++MMOIt);
8442349cc55cSDimitry Andric     SrcAlign = MemOp->getBaseAlign();
8443349cc55cSDimitry Andric   }
8444349cc55cSDimitry Andric 
8445349cc55cSDimitry Andric   // See if this is a constant length copy
8446349cc55cSDimitry Andric   auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
8447349cc55cSDimitry Andric   if (!LenVRegAndVal)
8448349cc55cSDimitry Andric     return UnableToLegalize;
8449349cc55cSDimitry Andric   uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8450349cc55cSDimitry Andric 
8451349cc55cSDimitry Andric   if (KnownLen == 0) {
8452349cc55cSDimitry Andric     MI.eraseFromParent();
8453349cc55cSDimitry Andric     return Legalized;
8454349cc55cSDimitry Andric   }
8455349cc55cSDimitry Andric 
8456349cc55cSDimitry Andric   bool IsVolatile = MemOp->isVolatile();
8457349cc55cSDimitry Andric   if (Opc == TargetOpcode::G_MEMCPY_INLINE)
8458349cc55cSDimitry Andric     return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8459349cc55cSDimitry Andric                              IsVolatile);
8460349cc55cSDimitry Andric 
8461349cc55cSDimitry Andric   // Don't try to optimize volatile.
8462349cc55cSDimitry Andric   if (IsVolatile)
8463349cc55cSDimitry Andric     return UnableToLegalize;
8464349cc55cSDimitry Andric 
8465349cc55cSDimitry Andric   if (MaxLen && KnownLen > MaxLen)
8466349cc55cSDimitry Andric     return UnableToLegalize;
8467349cc55cSDimitry Andric 
8468349cc55cSDimitry Andric   if (Opc == TargetOpcode::G_MEMCPY) {
8469349cc55cSDimitry Andric     auto &MF = *MI.getParent()->getParent();
8470349cc55cSDimitry Andric     const auto &TLI = *MF.getSubtarget().getTargetLowering();
8471349cc55cSDimitry Andric     bool OptSize = shouldLowerMemFuncForSize(MF);
8472349cc55cSDimitry Andric     uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
8473349cc55cSDimitry Andric     return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
8474349cc55cSDimitry Andric                        IsVolatile);
8475349cc55cSDimitry Andric   }
8476349cc55cSDimitry Andric   if (Opc == TargetOpcode::G_MEMMOVE)
8477349cc55cSDimitry Andric     return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
8478349cc55cSDimitry Andric   if (Opc == TargetOpcode::G_MEMSET)
8479349cc55cSDimitry Andric     return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
8480349cc55cSDimitry Andric   return UnableToLegalize;
8481349cc55cSDimitry Andric }
8482