xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (revision 297eecfb02bb25902531dbb5c3b9a88caf8adf29)
10b57cec5SDimitry Andric //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file This file implements the LegalizerHelper class to legalize
100b57cec5SDimitry Andric /// individual instructions and the LegalizeMachineIR wrapper pass for the
110b57cec5SDimitry Andric /// primary legalization.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
160b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/CallLowering.h"
170b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
1806c3fb27SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
1981ad6265SDimitry Andric #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
200b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
21fe6060f1SDimitry Andric #include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
22e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
2381ad6265SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
24fe6060f1SDimitry Andric #include "llvm/CodeGen/GlobalISel/Utils.h"
2506c3fb27SDimitry Andric #include "llvm/CodeGen/MachineConstantPool.h"
2681ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
281db9f3b2SDimitry Andric #include "llvm/CodeGen/RuntimeLibcalls.h"
298bcb0991SDimitry Andric #include "llvm/CodeGen/TargetFrameLowering.h"
300b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
310b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLowering.h"
32fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetOpcodes.h"
330b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
34fe6060f1SDimitry Andric #include "llvm/IR/Instructions.h"
350b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
360b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h"
370b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
38349cc55cSDimitry Andric #include "llvm/Target/TargetMachine.h"
39bdd1243dSDimitry Andric #include <numeric>
40bdd1243dSDimitry Andric #include <optional>
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric #define DEBUG_TYPE "legalizer"
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric using namespace llvm;
450b57cec5SDimitry Andric using namespace LegalizeActions;
46e8d8bef9SDimitry Andric using namespace MIPatternMatch;
470b57cec5SDimitry Andric 
480b57cec5SDimitry Andric /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
490b57cec5SDimitry Andric ///
500b57cec5SDimitry Andric /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
510b57cec5SDimitry Andric /// with any leftover piece as type \p LeftoverTy
520b57cec5SDimitry Andric ///
530b57cec5SDimitry Andric /// Returns -1 in the first element of the pair if the breakdown is not
540b57cec5SDimitry Andric /// satisfiable.
550b57cec5SDimitry Andric static std::pair<int, int>
560b57cec5SDimitry Andric getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
570b57cec5SDimitry Andric   assert(!LeftoverTy.isValid() && "this is an out argument");
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric   unsigned Size = OrigTy.getSizeInBits();
600b57cec5SDimitry Andric   unsigned NarrowSize = NarrowTy.getSizeInBits();
610b57cec5SDimitry Andric   unsigned NumParts = Size / NarrowSize;
620b57cec5SDimitry Andric   unsigned LeftoverSize = Size - NumParts * NarrowSize;
630b57cec5SDimitry Andric   assert(Size > NarrowSize);
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric   if (LeftoverSize == 0)
660b57cec5SDimitry Andric     return {NumParts, 0};
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric   if (NarrowTy.isVector()) {
690b57cec5SDimitry Andric     unsigned EltSize = OrigTy.getScalarSizeInBits();
700b57cec5SDimitry Andric     if (LeftoverSize % EltSize != 0)
710b57cec5SDimitry Andric       return {-1, -1};
72fe6060f1SDimitry Andric     LeftoverTy = LLT::scalarOrVector(
73fe6060f1SDimitry Andric         ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
740b57cec5SDimitry Andric   } else {
750b57cec5SDimitry Andric     LeftoverTy = LLT::scalar(LeftoverSize);
760b57cec5SDimitry Andric   }
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric   int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
790b57cec5SDimitry Andric   return std::make_pair(NumParts, NumLeftover);
800b57cec5SDimitry Andric }
810b57cec5SDimitry Andric 
825ffd83dbSDimitry Andric static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
835ffd83dbSDimitry Andric 
845ffd83dbSDimitry Andric   if (!Ty.isScalar())
855ffd83dbSDimitry Andric     return nullptr;
865ffd83dbSDimitry Andric 
875ffd83dbSDimitry Andric   switch (Ty.getSizeInBits()) {
885ffd83dbSDimitry Andric   case 16:
895ffd83dbSDimitry Andric     return Type::getHalfTy(Ctx);
905ffd83dbSDimitry Andric   case 32:
915ffd83dbSDimitry Andric     return Type::getFloatTy(Ctx);
925ffd83dbSDimitry Andric   case 64:
935ffd83dbSDimitry Andric     return Type::getDoubleTy(Ctx);
94e8d8bef9SDimitry Andric   case 80:
95e8d8bef9SDimitry Andric     return Type::getX86_FP80Ty(Ctx);
965ffd83dbSDimitry Andric   case 128:
975ffd83dbSDimitry Andric     return Type::getFP128Ty(Ctx);
985ffd83dbSDimitry Andric   default:
995ffd83dbSDimitry Andric     return nullptr;
1005ffd83dbSDimitry Andric   }
1015ffd83dbSDimitry Andric }
1025ffd83dbSDimitry Andric 
1030b57cec5SDimitry Andric LegalizerHelper::LegalizerHelper(MachineFunction &MF,
1040b57cec5SDimitry Andric                                  GISelChangeObserver &Observer,
1050b57cec5SDimitry Andric                                  MachineIRBuilder &Builder)
1065ffd83dbSDimitry Andric     : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
107e8d8bef9SDimitry Andric       LI(*MF.getSubtarget().getLegalizerInfo()),
10806c3fb27SDimitry Andric       TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
1090b57cec5SDimitry Andric 
1100b57cec5SDimitry Andric LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
1110b57cec5SDimitry Andric                                  GISelChangeObserver &Observer,
11206c3fb27SDimitry Andric                                  MachineIRBuilder &B, GISelKnownBits *KB)
113e8d8bef9SDimitry Andric     : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
11406c3fb27SDimitry Andric       TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
115e8d8bef9SDimitry Andric 
1160b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
117fe6060f1SDimitry Andric LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
118fe6060f1SDimitry Andric                                    LostDebugLocObserver &LocObserver) {
1195ffd83dbSDimitry Andric   LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
1205ffd83dbSDimitry Andric 
1215ffd83dbSDimitry Andric   MIRBuilder.setInstrAndDebugLoc(MI);
1220b57cec5SDimitry Andric 
1235f757f3fSDimitry Andric   if (isa<GIntrinsic>(MI))
1245ffd83dbSDimitry Andric     return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
1250b57cec5SDimitry Andric   auto Step = LI.getAction(MI, MRI);
1260b57cec5SDimitry Andric   switch (Step.Action) {
1270b57cec5SDimitry Andric   case Legal:
1280b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Already legal\n");
1290b57cec5SDimitry Andric     return AlreadyLegal;
1300b57cec5SDimitry Andric   case Libcall:
1310b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
132fe6060f1SDimitry Andric     return libcall(MI, LocObserver);
1330b57cec5SDimitry Andric   case NarrowScalar:
1340b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
1350b57cec5SDimitry Andric     return narrowScalar(MI, Step.TypeIdx, Step.NewType);
1360b57cec5SDimitry Andric   case WidenScalar:
1370b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
1380b57cec5SDimitry Andric     return widenScalar(MI, Step.TypeIdx, Step.NewType);
1395ffd83dbSDimitry Andric   case Bitcast:
1405ffd83dbSDimitry Andric     LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
1415ffd83dbSDimitry Andric     return bitcast(MI, Step.TypeIdx, Step.NewType);
1420b57cec5SDimitry Andric   case Lower:
1430b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Lower\n");
1440b57cec5SDimitry Andric     return lower(MI, Step.TypeIdx, Step.NewType);
1450b57cec5SDimitry Andric   case FewerElements:
1460b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
1470b57cec5SDimitry Andric     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
1480b57cec5SDimitry Andric   case MoreElements:
1490b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
1500b57cec5SDimitry Andric     return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
1510b57cec5SDimitry Andric   case Custom:
1520b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
1531db9f3b2SDimitry Andric     return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
1541db9f3b2SDimitry Andric                                                      : UnableToLegalize;
1550b57cec5SDimitry Andric   default:
1560b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
1570b57cec5SDimitry Andric     return UnableToLegalize;
1580b57cec5SDimitry Andric   }
1590b57cec5SDimitry Andric }
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
1620b57cec5SDimitry Andric                                    SmallVectorImpl<Register> &VRegs) {
1630b57cec5SDimitry Andric   for (int i = 0; i < NumParts; ++i)
1640b57cec5SDimitry Andric     VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
1650b57cec5SDimitry Andric   MIRBuilder.buildUnmerge(VRegs, Reg);
1660b57cec5SDimitry Andric }
1670b57cec5SDimitry Andric 
1680b57cec5SDimitry Andric bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
1690b57cec5SDimitry Andric                                    LLT MainTy, LLT &LeftoverTy,
1700b57cec5SDimitry Andric                                    SmallVectorImpl<Register> &VRegs,
1710b57cec5SDimitry Andric                                    SmallVectorImpl<Register> &LeftoverRegs) {
1720b57cec5SDimitry Andric   assert(!LeftoverTy.isValid() && "this is an out argument");
1730b57cec5SDimitry Andric 
1740b57cec5SDimitry Andric   unsigned RegSize = RegTy.getSizeInBits();
1750b57cec5SDimitry Andric   unsigned MainSize = MainTy.getSizeInBits();
1760b57cec5SDimitry Andric   unsigned NumParts = RegSize / MainSize;
1770b57cec5SDimitry Andric   unsigned LeftoverSize = RegSize - NumParts * MainSize;
1780b57cec5SDimitry Andric 
1790b57cec5SDimitry Andric   // Use an unmerge when possible.
1800b57cec5SDimitry Andric   if (LeftoverSize == 0) {
1810b57cec5SDimitry Andric     for (unsigned I = 0; I < NumParts; ++I)
1820b57cec5SDimitry Andric       VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
1830b57cec5SDimitry Andric     MIRBuilder.buildUnmerge(VRegs, Reg);
1840b57cec5SDimitry Andric     return true;
1850b57cec5SDimitry Andric   }
1860b57cec5SDimitry Andric 
1870eae32dcSDimitry Andric   // Perform irregular split. Leftover is last element of RegPieces.
1880b57cec5SDimitry Andric   if (MainTy.isVector()) {
1890eae32dcSDimitry Andric     SmallVector<Register, 8> RegPieces;
1900eae32dcSDimitry Andric     extractVectorParts(Reg, MainTy.getNumElements(), RegPieces);
1910eae32dcSDimitry Andric     for (unsigned i = 0; i < RegPieces.size() - 1; ++i)
1920eae32dcSDimitry Andric       VRegs.push_back(RegPieces[i]);
1930eae32dcSDimitry Andric     LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
1940eae32dcSDimitry Andric     LeftoverTy = MRI.getType(LeftoverRegs[0]);
1950eae32dcSDimitry Andric     return true;
1960b57cec5SDimitry Andric   }
1970b57cec5SDimitry Andric 
1980eae32dcSDimitry Andric   LeftoverTy = LLT::scalar(LeftoverSize);
1990b57cec5SDimitry Andric   // For irregular sizes, extract the individual parts.
2000b57cec5SDimitry Andric   for (unsigned I = 0; I != NumParts; ++I) {
2010b57cec5SDimitry Andric     Register NewReg = MRI.createGenericVirtualRegister(MainTy);
2020b57cec5SDimitry Andric     VRegs.push_back(NewReg);
2030b57cec5SDimitry Andric     MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
2040b57cec5SDimitry Andric   }
2050b57cec5SDimitry Andric 
2060b57cec5SDimitry Andric   for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
2070b57cec5SDimitry Andric        Offset += LeftoverSize) {
2080b57cec5SDimitry Andric     Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
2090b57cec5SDimitry Andric     LeftoverRegs.push_back(NewReg);
2100b57cec5SDimitry Andric     MIRBuilder.buildExtract(NewReg, Reg, Offset);
2110b57cec5SDimitry Andric   }
2120b57cec5SDimitry Andric 
2130b57cec5SDimitry Andric   return true;
2140b57cec5SDimitry Andric }
2150b57cec5SDimitry Andric 
2160eae32dcSDimitry Andric void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
2170eae32dcSDimitry Andric                                          SmallVectorImpl<Register> &VRegs) {
2180eae32dcSDimitry Andric   LLT RegTy = MRI.getType(Reg);
2190eae32dcSDimitry Andric   assert(RegTy.isVector() && "Expected a vector type");
2200eae32dcSDimitry Andric 
2210eae32dcSDimitry Andric   LLT EltTy = RegTy.getElementType();
2220eae32dcSDimitry Andric   LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
2230eae32dcSDimitry Andric   unsigned RegNumElts = RegTy.getNumElements();
2240eae32dcSDimitry Andric   unsigned LeftoverNumElts = RegNumElts % NumElts;
2250eae32dcSDimitry Andric   unsigned NumNarrowTyPieces = RegNumElts / NumElts;
2260eae32dcSDimitry Andric 
2270eae32dcSDimitry Andric   // Perfect split without leftover
2280eae32dcSDimitry Andric   if (LeftoverNumElts == 0)
2290eae32dcSDimitry Andric     return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
2300eae32dcSDimitry Andric 
2310eae32dcSDimitry Andric   // Irregular split. Provide direct access to all elements for artifact
2320eae32dcSDimitry Andric   // combiner using unmerge to elements. Then build vectors with NumElts
2330eae32dcSDimitry Andric   // elements. Remaining element(s) will be (used to build vector) Leftover.
2340eae32dcSDimitry Andric   SmallVector<Register, 8> Elts;
2350eae32dcSDimitry Andric   extractParts(Reg, EltTy, RegNumElts, Elts);
2360eae32dcSDimitry Andric 
2370eae32dcSDimitry Andric   unsigned Offset = 0;
2380eae32dcSDimitry Andric   // Requested sub-vectors of NarrowTy.
2390eae32dcSDimitry Andric   for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
2400eae32dcSDimitry Andric     ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
241bdd1243dSDimitry Andric     VRegs.push_back(MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
2420eae32dcSDimitry Andric   }
2430eae32dcSDimitry Andric 
2440eae32dcSDimitry Andric   // Leftover element(s).
2450eae32dcSDimitry Andric   if (LeftoverNumElts == 1) {
2460eae32dcSDimitry Andric     VRegs.push_back(Elts[Offset]);
2470eae32dcSDimitry Andric   } else {
2480eae32dcSDimitry Andric     LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
2490eae32dcSDimitry Andric     ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
250bdd1243dSDimitry Andric     VRegs.push_back(
251bdd1243dSDimitry Andric         MIRBuilder.buildMergeLikeInstr(LeftoverTy, Pieces).getReg(0));
2520eae32dcSDimitry Andric   }
2530eae32dcSDimitry Andric }
2540eae32dcSDimitry Andric 
2550b57cec5SDimitry Andric void LegalizerHelper::insertParts(Register DstReg,
2560b57cec5SDimitry Andric                                   LLT ResultTy, LLT PartTy,
2570b57cec5SDimitry Andric                                   ArrayRef<Register> PartRegs,
2580b57cec5SDimitry Andric                                   LLT LeftoverTy,
2590b57cec5SDimitry Andric                                   ArrayRef<Register> LeftoverRegs) {
2600b57cec5SDimitry Andric   if (!LeftoverTy.isValid()) {
2610b57cec5SDimitry Andric     assert(LeftoverRegs.empty());
2620b57cec5SDimitry Andric 
2630b57cec5SDimitry Andric     if (!ResultTy.isVector()) {
264bdd1243dSDimitry Andric       MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
2650b57cec5SDimitry Andric       return;
2660b57cec5SDimitry Andric     }
2670b57cec5SDimitry Andric 
2680b57cec5SDimitry Andric     if (PartTy.isVector())
2690b57cec5SDimitry Andric       MIRBuilder.buildConcatVectors(DstReg, PartRegs);
2700b57cec5SDimitry Andric     else
2710b57cec5SDimitry Andric       MIRBuilder.buildBuildVector(DstReg, PartRegs);
2720b57cec5SDimitry Andric     return;
2730b57cec5SDimitry Andric   }
2740b57cec5SDimitry Andric 
2750eae32dcSDimitry Andric   // Merge sub-vectors with different number of elements and insert into DstReg.
2760eae32dcSDimitry Andric   if (ResultTy.isVector()) {
2770eae32dcSDimitry Andric     assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
2780eae32dcSDimitry Andric     SmallVector<Register, 8> AllRegs;
2790eae32dcSDimitry Andric     for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
2800eae32dcSDimitry Andric       AllRegs.push_back(Reg);
2810eae32dcSDimitry Andric     return mergeMixedSubvectors(DstReg, AllRegs);
2820eae32dcSDimitry Andric   }
2830eae32dcSDimitry Andric 
284fe6060f1SDimitry Andric   SmallVector<Register> GCDRegs;
285fe6060f1SDimitry Andric   LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
286fe6060f1SDimitry Andric   for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
287fe6060f1SDimitry Andric     extractGCDType(GCDRegs, GCDTy, PartReg);
288fe6060f1SDimitry Andric   LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
289fe6060f1SDimitry Andric   buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
2900b57cec5SDimitry Andric }
2910b57cec5SDimitry Andric 
2920eae32dcSDimitry Andric void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
2930eae32dcSDimitry Andric                                        Register Reg) {
2940eae32dcSDimitry Andric   LLT Ty = MRI.getType(Reg);
2950eae32dcSDimitry Andric   SmallVector<Register, 8> RegElts;
2960eae32dcSDimitry Andric   extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts);
2970eae32dcSDimitry Andric   Elts.append(RegElts);
2980eae32dcSDimitry Andric }
2990eae32dcSDimitry Andric 
3000eae32dcSDimitry Andric /// Merge \p PartRegs with different types into \p DstReg.
3010eae32dcSDimitry Andric void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
3020eae32dcSDimitry Andric                                            ArrayRef<Register> PartRegs) {
3030eae32dcSDimitry Andric   SmallVector<Register, 8> AllElts;
3040eae32dcSDimitry Andric   for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
3050eae32dcSDimitry Andric     appendVectorElts(AllElts, PartRegs[i]);
3060eae32dcSDimitry Andric 
3070eae32dcSDimitry Andric   Register Leftover = PartRegs[PartRegs.size() - 1];
3080eae32dcSDimitry Andric   if (MRI.getType(Leftover).isScalar())
3090eae32dcSDimitry Andric     AllElts.push_back(Leftover);
3100eae32dcSDimitry Andric   else
3110eae32dcSDimitry Andric     appendVectorElts(AllElts, Leftover);
3120eae32dcSDimitry Andric 
313bdd1243dSDimitry Andric   MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
3140eae32dcSDimitry Andric }
3150eae32dcSDimitry Andric 
316e8d8bef9SDimitry Andric /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
3175ffd83dbSDimitry Andric static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
3185ffd83dbSDimitry Andric                               const MachineInstr &MI) {
3195ffd83dbSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
3205ffd83dbSDimitry Andric 
321e8d8bef9SDimitry Andric   const int StartIdx = Regs.size();
3225ffd83dbSDimitry Andric   const int NumResults = MI.getNumOperands() - 1;
323e8d8bef9SDimitry Andric   Regs.resize(Regs.size() + NumResults);
3245ffd83dbSDimitry Andric   for (int I = 0; I != NumResults; ++I)
325e8d8bef9SDimitry Andric     Regs[StartIdx + I] = MI.getOperand(I).getReg();
3265ffd83dbSDimitry Andric }
3275ffd83dbSDimitry Andric 
328e8d8bef9SDimitry Andric void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
329e8d8bef9SDimitry Andric                                      LLT GCDTy, Register SrcReg) {
3305ffd83dbSDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
3315ffd83dbSDimitry Andric   if (SrcTy == GCDTy) {
3325ffd83dbSDimitry Andric     // If the source already evenly divides the result type, we don't need to do
3335ffd83dbSDimitry Andric     // anything.
3345ffd83dbSDimitry Andric     Parts.push_back(SrcReg);
3355ffd83dbSDimitry Andric   } else {
3365ffd83dbSDimitry Andric     // Need to split into common type sized pieces.
3375ffd83dbSDimitry Andric     auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
3385ffd83dbSDimitry Andric     getUnmergeResults(Parts, *Unmerge);
3395ffd83dbSDimitry Andric   }
340e8d8bef9SDimitry Andric }
3415ffd83dbSDimitry Andric 
342e8d8bef9SDimitry Andric LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
343e8d8bef9SDimitry Andric                                     LLT NarrowTy, Register SrcReg) {
344e8d8bef9SDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
345e8d8bef9SDimitry Andric   LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
346e8d8bef9SDimitry Andric   extractGCDType(Parts, GCDTy, SrcReg);
3475ffd83dbSDimitry Andric   return GCDTy;
3485ffd83dbSDimitry Andric }
3495ffd83dbSDimitry Andric 
3505ffd83dbSDimitry Andric LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
3515ffd83dbSDimitry Andric                                          SmallVectorImpl<Register> &VRegs,
3525ffd83dbSDimitry Andric                                          unsigned PadStrategy) {
3535ffd83dbSDimitry Andric   LLT LCMTy = getLCMType(DstTy, NarrowTy);
3545ffd83dbSDimitry Andric 
3555ffd83dbSDimitry Andric   int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
3565ffd83dbSDimitry Andric   int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
3575ffd83dbSDimitry Andric   int NumOrigSrc = VRegs.size();
3585ffd83dbSDimitry Andric 
3595ffd83dbSDimitry Andric   Register PadReg;
3605ffd83dbSDimitry Andric 
3615ffd83dbSDimitry Andric   // Get a value we can use to pad the source value if the sources won't evenly
3625ffd83dbSDimitry Andric   // cover the result type.
3635ffd83dbSDimitry Andric   if (NumOrigSrc < NumParts * NumSubParts) {
3645ffd83dbSDimitry Andric     if (PadStrategy == TargetOpcode::G_ZEXT)
3655ffd83dbSDimitry Andric       PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
3665ffd83dbSDimitry Andric     else if (PadStrategy == TargetOpcode::G_ANYEXT)
3675ffd83dbSDimitry Andric       PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
3685ffd83dbSDimitry Andric     else {
3695ffd83dbSDimitry Andric       assert(PadStrategy == TargetOpcode::G_SEXT);
3705ffd83dbSDimitry Andric 
3715ffd83dbSDimitry Andric       // Shift the sign bit of the low register through the high register.
3725ffd83dbSDimitry Andric       auto ShiftAmt =
3735ffd83dbSDimitry Andric         MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
3745ffd83dbSDimitry Andric       PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
3755ffd83dbSDimitry Andric     }
3765ffd83dbSDimitry Andric   }
3775ffd83dbSDimitry Andric 
3785ffd83dbSDimitry Andric   // Registers for the final merge to be produced.
3795ffd83dbSDimitry Andric   SmallVector<Register, 4> Remerge(NumParts);
3805ffd83dbSDimitry Andric 
3815ffd83dbSDimitry Andric   // Registers needed for intermediate merges, which will be merged into a
3825ffd83dbSDimitry Andric   // source for Remerge.
3835ffd83dbSDimitry Andric   SmallVector<Register, 4> SubMerge(NumSubParts);
3845ffd83dbSDimitry Andric 
3855ffd83dbSDimitry Andric   // Once we've fully read off the end of the original source bits, we can reuse
3865ffd83dbSDimitry Andric   // the same high bits for remaining padding elements.
3875ffd83dbSDimitry Andric   Register AllPadReg;
3885ffd83dbSDimitry Andric 
3895ffd83dbSDimitry Andric   // Build merges to the LCM type to cover the original result type.
3905ffd83dbSDimitry Andric   for (int I = 0; I != NumParts; ++I) {
3915ffd83dbSDimitry Andric     bool AllMergePartsArePadding = true;
3925ffd83dbSDimitry Andric 
3935ffd83dbSDimitry Andric     // Build the requested merges to the requested type.
3945ffd83dbSDimitry Andric     for (int J = 0; J != NumSubParts; ++J) {
3955ffd83dbSDimitry Andric       int Idx = I * NumSubParts + J;
3965ffd83dbSDimitry Andric       if (Idx >= NumOrigSrc) {
3975ffd83dbSDimitry Andric         SubMerge[J] = PadReg;
3985ffd83dbSDimitry Andric         continue;
3995ffd83dbSDimitry Andric       }
4005ffd83dbSDimitry Andric 
4015ffd83dbSDimitry Andric       SubMerge[J] = VRegs[Idx];
4025ffd83dbSDimitry Andric 
4035ffd83dbSDimitry Andric       // There are meaningful bits here we can't reuse later.
4045ffd83dbSDimitry Andric       AllMergePartsArePadding = false;
4055ffd83dbSDimitry Andric     }
4065ffd83dbSDimitry Andric 
4075ffd83dbSDimitry Andric     // If we've filled up a complete piece with padding bits, we can directly
4085ffd83dbSDimitry Andric     // emit the natural sized constant if applicable, rather than a merge of
4095ffd83dbSDimitry Andric     // smaller constants.
4105ffd83dbSDimitry Andric     if (AllMergePartsArePadding && !AllPadReg) {
4115ffd83dbSDimitry Andric       if (PadStrategy == TargetOpcode::G_ANYEXT)
4125ffd83dbSDimitry Andric         AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
4135ffd83dbSDimitry Andric       else if (PadStrategy == TargetOpcode::G_ZEXT)
4145ffd83dbSDimitry Andric         AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
4155ffd83dbSDimitry Andric 
4165ffd83dbSDimitry Andric       // If this is a sign extension, we can't materialize a trivial constant
4175ffd83dbSDimitry Andric       // with the right type and have to produce a merge.
4185ffd83dbSDimitry Andric     }
4195ffd83dbSDimitry Andric 
4205ffd83dbSDimitry Andric     if (AllPadReg) {
4215ffd83dbSDimitry Andric       // Avoid creating additional instructions if we're just adding additional
4225ffd83dbSDimitry Andric       // copies of padding bits.
4235ffd83dbSDimitry Andric       Remerge[I] = AllPadReg;
4245ffd83dbSDimitry Andric       continue;
4255ffd83dbSDimitry Andric     }
4265ffd83dbSDimitry Andric 
4275ffd83dbSDimitry Andric     if (NumSubParts == 1)
4285ffd83dbSDimitry Andric       Remerge[I] = SubMerge[0];
4295ffd83dbSDimitry Andric     else
430bdd1243dSDimitry Andric       Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
4315ffd83dbSDimitry Andric 
4325ffd83dbSDimitry Andric     // In the sign extend padding case, re-use the first all-signbit merge.
4335ffd83dbSDimitry Andric     if (AllMergePartsArePadding && !AllPadReg)
4345ffd83dbSDimitry Andric       AllPadReg = Remerge[I];
4355ffd83dbSDimitry Andric   }
4365ffd83dbSDimitry Andric 
4375ffd83dbSDimitry Andric   VRegs = std::move(Remerge);
4385ffd83dbSDimitry Andric   return LCMTy;
4395ffd83dbSDimitry Andric }
4405ffd83dbSDimitry Andric 
4415ffd83dbSDimitry Andric void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
4425ffd83dbSDimitry Andric                                                ArrayRef<Register> RemergeRegs) {
4435ffd83dbSDimitry Andric   LLT DstTy = MRI.getType(DstReg);
4445ffd83dbSDimitry Andric 
4455ffd83dbSDimitry Andric   // Create the merge to the widened source, and extract the relevant bits into
4465ffd83dbSDimitry Andric   // the result.
4475ffd83dbSDimitry Andric 
4485ffd83dbSDimitry Andric   if (DstTy == LCMTy) {
449bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
4505ffd83dbSDimitry Andric     return;
4515ffd83dbSDimitry Andric   }
4525ffd83dbSDimitry Andric 
453bdd1243dSDimitry Andric   auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
4545ffd83dbSDimitry Andric   if (DstTy.isScalar() && LCMTy.isScalar()) {
4555ffd83dbSDimitry Andric     MIRBuilder.buildTrunc(DstReg, Remerge);
4565ffd83dbSDimitry Andric     return;
4575ffd83dbSDimitry Andric   }
4585ffd83dbSDimitry Andric 
4595ffd83dbSDimitry Andric   if (LCMTy.isVector()) {
460e8d8bef9SDimitry Andric     unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
461e8d8bef9SDimitry Andric     SmallVector<Register, 8> UnmergeDefs(NumDefs);
462e8d8bef9SDimitry Andric     UnmergeDefs[0] = DstReg;
463e8d8bef9SDimitry Andric     for (unsigned I = 1; I != NumDefs; ++I)
464e8d8bef9SDimitry Andric       UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
465e8d8bef9SDimitry Andric 
466e8d8bef9SDimitry Andric     MIRBuilder.buildUnmerge(UnmergeDefs,
467bdd1243dSDimitry Andric                             MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
4685ffd83dbSDimitry Andric     return;
4695ffd83dbSDimitry Andric   }
4705ffd83dbSDimitry Andric 
4715ffd83dbSDimitry Andric   llvm_unreachable("unhandled case");
4725ffd83dbSDimitry Andric }
4735ffd83dbSDimitry Andric 
4740b57cec5SDimitry Andric static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
475e8d8bef9SDimitry Andric #define RTLIBCASE_INT(LibcallPrefix)                                           \
4765ffd83dbSDimitry Andric   do {                                                                         \
4775ffd83dbSDimitry Andric     switch (Size) {                                                            \
4785ffd83dbSDimitry Andric     case 32:                                                                   \
4795ffd83dbSDimitry Andric       return RTLIB::LibcallPrefix##32;                                         \
4805ffd83dbSDimitry Andric     case 64:                                                                   \
4815ffd83dbSDimitry Andric       return RTLIB::LibcallPrefix##64;                                         \
4825ffd83dbSDimitry Andric     case 128:                                                                  \
4835ffd83dbSDimitry Andric       return RTLIB::LibcallPrefix##128;                                        \
4845ffd83dbSDimitry Andric     default:                                                                   \
4855ffd83dbSDimitry Andric       llvm_unreachable("unexpected size");                                     \
4865ffd83dbSDimitry Andric     }                                                                          \
4875ffd83dbSDimitry Andric   } while (0)
4885ffd83dbSDimitry Andric 
489e8d8bef9SDimitry Andric #define RTLIBCASE(LibcallPrefix)                                               \
490e8d8bef9SDimitry Andric   do {                                                                         \
491e8d8bef9SDimitry Andric     switch (Size) {                                                            \
492e8d8bef9SDimitry Andric     case 32:                                                                   \
493e8d8bef9SDimitry Andric       return RTLIB::LibcallPrefix##32;                                         \
494e8d8bef9SDimitry Andric     case 64:                                                                   \
495e8d8bef9SDimitry Andric       return RTLIB::LibcallPrefix##64;                                         \
496e8d8bef9SDimitry Andric     case 80:                                                                   \
497e8d8bef9SDimitry Andric       return RTLIB::LibcallPrefix##80;                                         \
498e8d8bef9SDimitry Andric     case 128:                                                                  \
499e8d8bef9SDimitry Andric       return RTLIB::LibcallPrefix##128;                                        \
500e8d8bef9SDimitry Andric     default:                                                                   \
501e8d8bef9SDimitry Andric       llvm_unreachable("unexpected size");                                     \
502e8d8bef9SDimitry Andric     }                                                                          \
503e8d8bef9SDimitry Andric   } while (0)
5045ffd83dbSDimitry Andric 
5050b57cec5SDimitry Andric   switch (Opcode) {
506bdd1243dSDimitry Andric   case TargetOpcode::G_MUL:
507bdd1243dSDimitry Andric     RTLIBCASE_INT(MUL_I);
5080b57cec5SDimitry Andric   case TargetOpcode::G_SDIV:
509e8d8bef9SDimitry Andric     RTLIBCASE_INT(SDIV_I);
5100b57cec5SDimitry Andric   case TargetOpcode::G_UDIV:
511e8d8bef9SDimitry Andric     RTLIBCASE_INT(UDIV_I);
5120b57cec5SDimitry Andric   case TargetOpcode::G_SREM:
513e8d8bef9SDimitry Andric     RTLIBCASE_INT(SREM_I);
5140b57cec5SDimitry Andric   case TargetOpcode::G_UREM:
515e8d8bef9SDimitry Andric     RTLIBCASE_INT(UREM_I);
5160b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
517e8d8bef9SDimitry Andric     RTLIBCASE_INT(CTLZ_I);
5180b57cec5SDimitry Andric   case TargetOpcode::G_FADD:
5195ffd83dbSDimitry Andric     RTLIBCASE(ADD_F);
5200b57cec5SDimitry Andric   case TargetOpcode::G_FSUB:
5215ffd83dbSDimitry Andric     RTLIBCASE(SUB_F);
5220b57cec5SDimitry Andric   case TargetOpcode::G_FMUL:
5235ffd83dbSDimitry Andric     RTLIBCASE(MUL_F);
5240b57cec5SDimitry Andric   case TargetOpcode::G_FDIV:
5255ffd83dbSDimitry Andric     RTLIBCASE(DIV_F);
5260b57cec5SDimitry Andric   case TargetOpcode::G_FEXP:
5275ffd83dbSDimitry Andric     RTLIBCASE(EXP_F);
5280b57cec5SDimitry Andric   case TargetOpcode::G_FEXP2:
5295ffd83dbSDimitry Andric     RTLIBCASE(EXP2_F);
5305f757f3fSDimitry Andric   case TargetOpcode::G_FEXP10:
5315f757f3fSDimitry Andric     RTLIBCASE(EXP10_F);
5320b57cec5SDimitry Andric   case TargetOpcode::G_FREM:
5335ffd83dbSDimitry Andric     RTLIBCASE(REM_F);
5340b57cec5SDimitry Andric   case TargetOpcode::G_FPOW:
5355ffd83dbSDimitry Andric     RTLIBCASE(POW_F);
5361db9f3b2SDimitry Andric   case TargetOpcode::G_FPOWI:
5371db9f3b2SDimitry Andric     RTLIBCASE(POWI_F);
5380b57cec5SDimitry Andric   case TargetOpcode::G_FMA:
5395ffd83dbSDimitry Andric     RTLIBCASE(FMA_F);
5400b57cec5SDimitry Andric   case TargetOpcode::G_FSIN:
5415ffd83dbSDimitry Andric     RTLIBCASE(SIN_F);
5420b57cec5SDimitry Andric   case TargetOpcode::G_FCOS:
5435ffd83dbSDimitry Andric     RTLIBCASE(COS_F);
5440b57cec5SDimitry Andric   case TargetOpcode::G_FLOG10:
5455ffd83dbSDimitry Andric     RTLIBCASE(LOG10_F);
5460b57cec5SDimitry Andric   case TargetOpcode::G_FLOG:
5475ffd83dbSDimitry Andric     RTLIBCASE(LOG_F);
5480b57cec5SDimitry Andric   case TargetOpcode::G_FLOG2:
5495ffd83dbSDimitry Andric     RTLIBCASE(LOG2_F);
55006c3fb27SDimitry Andric   case TargetOpcode::G_FLDEXP:
55106c3fb27SDimitry Andric     RTLIBCASE(LDEXP_F);
5520b57cec5SDimitry Andric   case TargetOpcode::G_FCEIL:
5535ffd83dbSDimitry Andric     RTLIBCASE(CEIL_F);
5540b57cec5SDimitry Andric   case TargetOpcode::G_FFLOOR:
5555ffd83dbSDimitry Andric     RTLIBCASE(FLOOR_F);
5565ffd83dbSDimitry Andric   case TargetOpcode::G_FMINNUM:
5575ffd83dbSDimitry Andric     RTLIBCASE(FMIN_F);
5585ffd83dbSDimitry Andric   case TargetOpcode::G_FMAXNUM:
5595ffd83dbSDimitry Andric     RTLIBCASE(FMAX_F);
5605ffd83dbSDimitry Andric   case TargetOpcode::G_FSQRT:
5615ffd83dbSDimitry Andric     RTLIBCASE(SQRT_F);
5625ffd83dbSDimitry Andric   case TargetOpcode::G_FRINT:
5635ffd83dbSDimitry Andric     RTLIBCASE(RINT_F);
5645ffd83dbSDimitry Andric   case TargetOpcode::G_FNEARBYINT:
5655ffd83dbSDimitry Andric     RTLIBCASE(NEARBYINT_F);
566e8d8bef9SDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
567e8d8bef9SDimitry Andric     RTLIBCASE(ROUNDEVEN_F);
5680b57cec5SDimitry Andric   }
5690b57cec5SDimitry Andric   llvm_unreachable("Unknown libcall function");
5700b57cec5SDimitry Andric }
5710b57cec5SDimitry Andric 
5728bcb0991SDimitry Andric /// True if an instruction is in tail position in its caller. Intended for
5738bcb0991SDimitry Andric /// legalizing libcalls as tail calls when possible.
5741db9f3b2SDimitry Andric static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result,
5751db9f3b2SDimitry Andric                                     MachineInstr &MI,
576fe6060f1SDimitry Andric                                     const TargetInstrInfo &TII,
577fe6060f1SDimitry Andric                                     MachineRegisterInfo &MRI) {
5785ffd83dbSDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
5795ffd83dbSDimitry Andric   const Function &F = MBB.getParent()->getFunction();
5808bcb0991SDimitry Andric 
5818bcb0991SDimitry Andric   // Conservatively require the attributes of the call to match those of
5828bcb0991SDimitry Andric   // the return. Ignore NoAlias and NonNull because they don't affect the
5838bcb0991SDimitry Andric   // call sequence.
5848bcb0991SDimitry Andric   AttributeList CallerAttrs = F.getAttributes();
58504eeddc0SDimitry Andric   if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
5868bcb0991SDimitry Andric           .removeAttribute(Attribute::NoAlias)
5878bcb0991SDimitry Andric           .removeAttribute(Attribute::NonNull)
5888bcb0991SDimitry Andric           .hasAttributes())
5898bcb0991SDimitry Andric     return false;
5908bcb0991SDimitry Andric 
5918bcb0991SDimitry Andric   // It's not safe to eliminate the sign / zero extension of the return value.
592349cc55cSDimitry Andric   if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
593349cc55cSDimitry Andric       CallerAttrs.hasRetAttr(Attribute::SExt))
5948bcb0991SDimitry Andric     return false;
5958bcb0991SDimitry Andric 
596fe6060f1SDimitry Andric   // Only tail call if the following instruction is a standard return or if we
597fe6060f1SDimitry Andric   // have a `thisreturn` callee, and a sequence like:
598fe6060f1SDimitry Andric   //
599fe6060f1SDimitry Andric   //   G_MEMCPY %0, %1, %2
600fe6060f1SDimitry Andric   //   $x0 = COPY %0
601fe6060f1SDimitry Andric   //   RET_ReallyLR implicit $x0
6025ffd83dbSDimitry Andric   auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
603fe6060f1SDimitry Andric   if (Next != MBB.instr_end() && Next->isCopy()) {
6041db9f3b2SDimitry Andric     if (MI.getOpcode() == TargetOpcode::G_BZERO)
605fe6060f1SDimitry Andric       return false;
606fe6060f1SDimitry Andric 
6071db9f3b2SDimitry Andric     // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
6081db9f3b2SDimitry Andric     // mempy/etc routines return the same parameter. For other it will be the
6091db9f3b2SDimitry Andric     // returned value.
610fe6060f1SDimitry Andric     Register VReg = MI.getOperand(0).getReg();
611fe6060f1SDimitry Andric     if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
612fe6060f1SDimitry Andric       return false;
613fe6060f1SDimitry Andric 
614fe6060f1SDimitry Andric     Register PReg = Next->getOperand(0).getReg();
615fe6060f1SDimitry Andric     if (!PReg.isPhysical())
616fe6060f1SDimitry Andric       return false;
617fe6060f1SDimitry Andric 
618fe6060f1SDimitry Andric     auto Ret = next_nodbg(Next, MBB.instr_end());
619fe6060f1SDimitry Andric     if (Ret == MBB.instr_end() || !Ret->isReturn())
620fe6060f1SDimitry Andric       return false;
621fe6060f1SDimitry Andric 
622fe6060f1SDimitry Andric     if (Ret->getNumImplicitOperands() != 1)
623fe6060f1SDimitry Andric       return false;
624fe6060f1SDimitry Andric 
6251db9f3b2SDimitry Andric     if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
626fe6060f1SDimitry Andric       return false;
627fe6060f1SDimitry Andric 
628fe6060f1SDimitry Andric     // Skip over the COPY that we just validated.
629fe6060f1SDimitry Andric     Next = Ret;
630fe6060f1SDimitry Andric   }
631fe6060f1SDimitry Andric 
6325ffd83dbSDimitry Andric   if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
6338bcb0991SDimitry Andric     return false;
6348bcb0991SDimitry Andric 
6358bcb0991SDimitry Andric   return true;
6368bcb0991SDimitry Andric }
6378bcb0991SDimitry Andric 
6380b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
6395ffd83dbSDimitry Andric llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
6400b57cec5SDimitry Andric                     const CallLowering::ArgInfo &Result,
6415ffd83dbSDimitry Andric                     ArrayRef<CallLowering::ArgInfo> Args,
6421db9f3b2SDimitry Andric                     const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
6431db9f3b2SDimitry Andric                     MachineInstr *MI) {
6440b57cec5SDimitry Andric   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
6450b57cec5SDimitry Andric 
6468bcb0991SDimitry Andric   CallLowering::CallLoweringInfo Info;
6475ffd83dbSDimitry Andric   Info.CallConv = CC;
6488bcb0991SDimitry Andric   Info.Callee = MachineOperand::CreateES(Name);
6498bcb0991SDimitry Andric   Info.OrigRet = Result;
6501db9f3b2SDimitry Andric   if (MI)
6511db9f3b2SDimitry Andric     Info.IsTailCall =
6521db9f3b2SDimitry Andric         (Result.Ty->isVoidTy() ||
6531db9f3b2SDimitry Andric          Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
6541db9f3b2SDimitry Andric         isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
6551db9f3b2SDimitry Andric                                 *MIRBuilder.getMRI());
6561db9f3b2SDimitry Andric 
6578bcb0991SDimitry Andric   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
6588bcb0991SDimitry Andric   if (!CLI.lowerCall(MIRBuilder, Info))
6590b57cec5SDimitry Andric     return LegalizerHelper::UnableToLegalize;
6600b57cec5SDimitry Andric 
6611db9f3b2SDimitry Andric   if (MI && Info.LoweredTailCall) {
6621db9f3b2SDimitry Andric     assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
6631db9f3b2SDimitry Andric 
6641db9f3b2SDimitry Andric     // Check debug locations before removing the return.
6651db9f3b2SDimitry Andric     LocObserver.checkpoint(true);
6661db9f3b2SDimitry Andric 
6671db9f3b2SDimitry Andric     // We must have a return following the call (or debug insts) to get past
6681db9f3b2SDimitry Andric     // isLibCallInTailPosition.
6691db9f3b2SDimitry Andric     do {
6701db9f3b2SDimitry Andric       MachineInstr *Next = MI->getNextNode();
6711db9f3b2SDimitry Andric       assert(Next &&
6721db9f3b2SDimitry Andric              (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
6731db9f3b2SDimitry Andric              "Expected instr following MI to be return or debug inst?");
6741db9f3b2SDimitry Andric       // We lowered a tail call, so the call is now the return from the block.
6751db9f3b2SDimitry Andric       // Delete the old return.
6761db9f3b2SDimitry Andric       Next->eraseFromParent();
6771db9f3b2SDimitry Andric     } while (MI->getNextNode());
6781db9f3b2SDimitry Andric 
6791db9f3b2SDimitry Andric     // We expect to lose the debug location from the return.
6801db9f3b2SDimitry Andric     LocObserver.checkpoint(false);
6811db9f3b2SDimitry Andric   }
6820b57cec5SDimitry Andric   return LegalizerHelper::Legalized;
6830b57cec5SDimitry Andric }
6840b57cec5SDimitry Andric 
6855ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
6865ffd83dbSDimitry Andric llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
6875ffd83dbSDimitry Andric                     const CallLowering::ArgInfo &Result,
6881db9f3b2SDimitry Andric                     ArrayRef<CallLowering::ArgInfo> Args,
6891db9f3b2SDimitry Andric                     LostDebugLocObserver &LocObserver, MachineInstr *MI) {
6905ffd83dbSDimitry Andric   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
6915ffd83dbSDimitry Andric   const char *Name = TLI.getLibcallName(Libcall);
6925ffd83dbSDimitry Andric   const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
6931db9f3b2SDimitry Andric   return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
6945ffd83dbSDimitry Andric }
6955ffd83dbSDimitry Andric 
6960b57cec5SDimitry Andric // Useful for libcalls where all operands have the same type.
6970b57cec5SDimitry Andric static LegalizerHelper::LegalizeResult
6980b57cec5SDimitry Andric simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
6991db9f3b2SDimitry Andric               Type *OpType, LostDebugLocObserver &LocObserver) {
7000b57cec5SDimitry Andric   auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
7010b57cec5SDimitry Andric 
702fe6060f1SDimitry Andric   // FIXME: What does the original arg index mean here?
7030b57cec5SDimitry Andric   SmallVector<CallLowering::ArgInfo, 3> Args;
7044824e7fdSDimitry Andric   for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
7054824e7fdSDimitry Andric     Args.push_back({MO.getReg(), OpType, 0});
706fe6060f1SDimitry Andric   return createLibcall(MIRBuilder, Libcall,
7071db9f3b2SDimitry Andric                        {MI.getOperand(0).getReg(), OpType, 0}, Args,
7081db9f3b2SDimitry Andric                        LocObserver, &MI);
7090b57cec5SDimitry Andric }
7100b57cec5SDimitry Andric 
7118bcb0991SDimitry Andric LegalizerHelper::LegalizeResult
7128bcb0991SDimitry Andric llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
713fe6060f1SDimitry Andric                        MachineInstr &MI, LostDebugLocObserver &LocObserver) {
7148bcb0991SDimitry Andric   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
7158bcb0991SDimitry Andric 
7168bcb0991SDimitry Andric   SmallVector<CallLowering::ArgInfo, 3> Args;
7178bcb0991SDimitry Andric   // Add all the args, except for the last which is an imm denoting 'tail'.
718e8d8bef9SDimitry Andric   for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
7198bcb0991SDimitry Andric     Register Reg = MI.getOperand(i).getReg();
7208bcb0991SDimitry Andric 
7218bcb0991SDimitry Andric     // Need derive an IR type for call lowering.
7228bcb0991SDimitry Andric     LLT OpLLT = MRI.getType(Reg);
7238bcb0991SDimitry Andric     Type *OpTy = nullptr;
7248bcb0991SDimitry Andric     if (OpLLT.isPointer())
7255f757f3fSDimitry Andric       OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
7268bcb0991SDimitry Andric     else
7278bcb0991SDimitry Andric       OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
728fe6060f1SDimitry Andric     Args.push_back({Reg, OpTy, 0});
7298bcb0991SDimitry Andric   }
7308bcb0991SDimitry Andric 
7318bcb0991SDimitry Andric   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
7328bcb0991SDimitry Andric   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
7338bcb0991SDimitry Andric   RTLIB::Libcall RTLibcall;
734fe6060f1SDimitry Andric   unsigned Opc = MI.getOpcode();
735fe6060f1SDimitry Andric   switch (Opc) {
736fe6060f1SDimitry Andric   case TargetOpcode::G_BZERO:
737fe6060f1SDimitry Andric     RTLibcall = RTLIB::BZERO;
738fe6060f1SDimitry Andric     break;
739e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMCPY:
7408bcb0991SDimitry Andric     RTLibcall = RTLIB::MEMCPY;
741fe6060f1SDimitry Andric     Args[0].Flags[0].setReturned();
7428bcb0991SDimitry Andric     break;
743e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMMOVE:
7448bcb0991SDimitry Andric     RTLibcall = RTLIB::MEMMOVE;
745fe6060f1SDimitry Andric     Args[0].Flags[0].setReturned();
7468bcb0991SDimitry Andric     break;
747e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMSET:
748e8d8bef9SDimitry Andric     RTLibcall = RTLIB::MEMSET;
749fe6060f1SDimitry Andric     Args[0].Flags[0].setReturned();
750e8d8bef9SDimitry Andric     break;
7518bcb0991SDimitry Andric   default:
752fe6060f1SDimitry Andric     llvm_unreachable("unsupported opcode");
7538bcb0991SDimitry Andric   }
7548bcb0991SDimitry Andric   const char *Name = TLI.getLibcallName(RTLibcall);
7558bcb0991SDimitry Andric 
756fe6060f1SDimitry Andric   // Unsupported libcall on the target.
757fe6060f1SDimitry Andric   if (!Name) {
758fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
759fe6060f1SDimitry Andric                       << MIRBuilder.getTII().getName(Opc) << "\n");
760fe6060f1SDimitry Andric     return LegalizerHelper::UnableToLegalize;
761fe6060f1SDimitry Andric   }
762fe6060f1SDimitry Andric 
7638bcb0991SDimitry Andric   CallLowering::CallLoweringInfo Info;
7648bcb0991SDimitry Andric   Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
7658bcb0991SDimitry Andric   Info.Callee = MachineOperand::CreateES(Name);
766fe6060f1SDimitry Andric   Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
7671db9f3b2SDimitry Andric   Info.IsTailCall =
7681db9f3b2SDimitry Andric       MI.getOperand(MI.getNumOperands() - 1).getImm() &&
7691db9f3b2SDimitry Andric       isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
7708bcb0991SDimitry Andric 
7718bcb0991SDimitry Andric   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
7728bcb0991SDimitry Andric   if (!CLI.lowerCall(MIRBuilder, Info))
7738bcb0991SDimitry Andric     return LegalizerHelper::UnableToLegalize;
7748bcb0991SDimitry Andric 
7758bcb0991SDimitry Andric   if (Info.LoweredTailCall) {
7768bcb0991SDimitry Andric     assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
777fe6060f1SDimitry Andric 
778fe6060f1SDimitry Andric     // Check debug locations before removing the return.
779fe6060f1SDimitry Andric     LocObserver.checkpoint(true);
780fe6060f1SDimitry Andric 
7815ffd83dbSDimitry Andric     // We must have a return following the call (or debug insts) to get past
7828bcb0991SDimitry Andric     // isLibCallInTailPosition.
7835ffd83dbSDimitry Andric     do {
7845ffd83dbSDimitry Andric       MachineInstr *Next = MI.getNextNode();
785fe6060f1SDimitry Andric       assert(Next &&
786fe6060f1SDimitry Andric              (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
7875ffd83dbSDimitry Andric              "Expected instr following MI to be return or debug inst?");
7888bcb0991SDimitry Andric       // We lowered a tail call, so the call is now the return from the block.
7898bcb0991SDimitry Andric       // Delete the old return.
7905ffd83dbSDimitry Andric       Next->eraseFromParent();
7915ffd83dbSDimitry Andric     } while (MI.getNextNode());
792fe6060f1SDimitry Andric 
793fe6060f1SDimitry Andric     // We expect to lose the debug location from the return.
794fe6060f1SDimitry Andric     LocObserver.checkpoint(false);
7958bcb0991SDimitry Andric   }
7968bcb0991SDimitry Andric 
7978bcb0991SDimitry Andric   return LegalizerHelper::Legalized;
7988bcb0991SDimitry Andric }
7998bcb0991SDimitry Andric 
8001db9f3b2SDimitry Andric static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
8011db9f3b2SDimitry Andric   unsigned Opc = MI.getOpcode();
8021db9f3b2SDimitry Andric   auto &AtomicMI = cast<GMemOperation>(MI);
8031db9f3b2SDimitry Andric   auto &MMO = AtomicMI.getMMO();
8041db9f3b2SDimitry Andric   auto Ordering = MMO.getMergedOrdering();
8051db9f3b2SDimitry Andric   LLT MemType = MMO.getMemoryType();
8061db9f3b2SDimitry Andric   uint64_t MemSize = MemType.getSizeInBytes();
8071db9f3b2SDimitry Andric   if (MemType.isVector())
8081db9f3b2SDimitry Andric     return RTLIB::UNKNOWN_LIBCALL;
8091db9f3b2SDimitry Andric 
8101db9f3b2SDimitry Andric #define LCALLS(A, B)                                                           \
8111db9f3b2SDimitry Andric   { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
8121db9f3b2SDimitry Andric #define LCALL5(A)                                                              \
8131db9f3b2SDimitry Andric   LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
8141db9f3b2SDimitry Andric   switch (Opc) {
8151db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG:
8161db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
8171db9f3b2SDimitry Andric     const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
8181db9f3b2SDimitry Andric     return getOutlineAtomicHelper(LC, Ordering, MemSize);
8191db9f3b2SDimitry Andric   }
8201db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XCHG: {
8211db9f3b2SDimitry Andric     const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
8221db9f3b2SDimitry Andric     return getOutlineAtomicHelper(LC, Ordering, MemSize);
8231db9f3b2SDimitry Andric   }
8241db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_ADD:
8251db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_SUB: {
8261db9f3b2SDimitry Andric     const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
8271db9f3b2SDimitry Andric     return getOutlineAtomicHelper(LC, Ordering, MemSize);
8281db9f3b2SDimitry Andric   }
8291db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_AND: {
8301db9f3b2SDimitry Andric     const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
8311db9f3b2SDimitry Andric     return getOutlineAtomicHelper(LC, Ordering, MemSize);
8321db9f3b2SDimitry Andric   }
8331db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_OR: {
8341db9f3b2SDimitry Andric     const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
8351db9f3b2SDimitry Andric     return getOutlineAtomicHelper(LC, Ordering, MemSize);
8361db9f3b2SDimitry Andric   }
8371db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XOR: {
8381db9f3b2SDimitry Andric     const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
8391db9f3b2SDimitry Andric     return getOutlineAtomicHelper(LC, Ordering, MemSize);
8401db9f3b2SDimitry Andric   }
8411db9f3b2SDimitry Andric   default:
8421db9f3b2SDimitry Andric     return RTLIB::UNKNOWN_LIBCALL;
8431db9f3b2SDimitry Andric   }
8441db9f3b2SDimitry Andric #undef LCALLS
8451db9f3b2SDimitry Andric #undef LCALL5
8461db9f3b2SDimitry Andric }
8471db9f3b2SDimitry Andric 
8481db9f3b2SDimitry Andric static LegalizerHelper::LegalizeResult
8491db9f3b2SDimitry Andric createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI) {
8501db9f3b2SDimitry Andric   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8511db9f3b2SDimitry Andric 
8521db9f3b2SDimitry Andric   Type *RetTy;
8531db9f3b2SDimitry Andric   SmallVector<Register> RetRegs;
8541db9f3b2SDimitry Andric   SmallVector<CallLowering::ArgInfo, 3> Args;
8551db9f3b2SDimitry Andric   unsigned Opc = MI.getOpcode();
8561db9f3b2SDimitry Andric   switch (Opc) {
8571db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG:
8581db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
8591db9f3b2SDimitry Andric     Register Success;
8601db9f3b2SDimitry Andric     LLT SuccessLLT;
8611db9f3b2SDimitry Andric     auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
8621db9f3b2SDimitry Andric         MI.getFirst4RegLLTs();
8631db9f3b2SDimitry Andric     RetRegs.push_back(Ret);
8641db9f3b2SDimitry Andric     RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
8651db9f3b2SDimitry Andric     if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
8661db9f3b2SDimitry Andric       std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
8671db9f3b2SDimitry Andric                NewLLT) = MI.getFirst5RegLLTs();
8681db9f3b2SDimitry Andric       RetRegs.push_back(Success);
8691db9f3b2SDimitry Andric       RetTy = StructType::get(
8701db9f3b2SDimitry Andric           Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
8711db9f3b2SDimitry Andric     }
8721db9f3b2SDimitry Andric     Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
8731db9f3b2SDimitry Andric     Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
8741db9f3b2SDimitry Andric     Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
8751db9f3b2SDimitry Andric     break;
8761db9f3b2SDimitry Andric   }
8771db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XCHG:
8781db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_ADD:
8791db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_SUB:
8801db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_AND:
8811db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_OR:
8821db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XOR: {
8831db9f3b2SDimitry Andric     auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
8841db9f3b2SDimitry Andric     RetRegs.push_back(Ret);
8851db9f3b2SDimitry Andric     RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
8861db9f3b2SDimitry Andric     if (Opc == TargetOpcode::G_ATOMICRMW_AND)
8871db9f3b2SDimitry Andric       Val =
8881db9f3b2SDimitry Andric           MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
8891db9f3b2SDimitry Andric               .getReg(0);
8901db9f3b2SDimitry Andric     else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
8911db9f3b2SDimitry Andric       Val =
8921db9f3b2SDimitry Andric           MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
8931db9f3b2SDimitry Andric               .getReg(0);
8941db9f3b2SDimitry Andric     Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
8951db9f3b2SDimitry Andric     Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
8961db9f3b2SDimitry Andric     break;
8971db9f3b2SDimitry Andric   }
8981db9f3b2SDimitry Andric   default:
8991db9f3b2SDimitry Andric     llvm_unreachable("unsupported opcode");
9001db9f3b2SDimitry Andric   }
9011db9f3b2SDimitry Andric 
9021db9f3b2SDimitry Andric   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
9031db9f3b2SDimitry Andric   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
9041db9f3b2SDimitry Andric   RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
9051db9f3b2SDimitry Andric   const char *Name = TLI.getLibcallName(RTLibcall);
9061db9f3b2SDimitry Andric 
9071db9f3b2SDimitry Andric   // Unsupported libcall on the target.
9081db9f3b2SDimitry Andric   if (!Name) {
9091db9f3b2SDimitry Andric     LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
9101db9f3b2SDimitry Andric                       << MIRBuilder.getTII().getName(Opc) << "\n");
9111db9f3b2SDimitry Andric     return LegalizerHelper::UnableToLegalize;
9121db9f3b2SDimitry Andric   }
9131db9f3b2SDimitry Andric 
9141db9f3b2SDimitry Andric   CallLowering::CallLoweringInfo Info;
9151db9f3b2SDimitry Andric   Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
9161db9f3b2SDimitry Andric   Info.Callee = MachineOperand::CreateES(Name);
9171db9f3b2SDimitry Andric   Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
9181db9f3b2SDimitry Andric 
9191db9f3b2SDimitry Andric   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
9201db9f3b2SDimitry Andric   if (!CLI.lowerCall(MIRBuilder, Info))
9211db9f3b2SDimitry Andric     return LegalizerHelper::UnableToLegalize;
9221db9f3b2SDimitry Andric 
9231db9f3b2SDimitry Andric   return LegalizerHelper::Legalized;
9241db9f3b2SDimitry Andric }
9251db9f3b2SDimitry Andric 
9260b57cec5SDimitry Andric static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
9270b57cec5SDimitry Andric                                        Type *FromType) {
9280b57cec5SDimitry Andric   auto ToMVT = MVT::getVT(ToType);
9290b57cec5SDimitry Andric   auto FromMVT = MVT::getVT(FromType);
9300b57cec5SDimitry Andric 
9310b57cec5SDimitry Andric   switch (Opcode) {
9320b57cec5SDimitry Andric   case TargetOpcode::G_FPEXT:
9330b57cec5SDimitry Andric     return RTLIB::getFPEXT(FromMVT, ToMVT);
9340b57cec5SDimitry Andric   case TargetOpcode::G_FPTRUNC:
9350b57cec5SDimitry Andric     return RTLIB::getFPROUND(FromMVT, ToMVT);
9360b57cec5SDimitry Andric   case TargetOpcode::G_FPTOSI:
9370b57cec5SDimitry Andric     return RTLIB::getFPTOSINT(FromMVT, ToMVT);
9380b57cec5SDimitry Andric   case TargetOpcode::G_FPTOUI:
9390b57cec5SDimitry Andric     return RTLIB::getFPTOUINT(FromMVT, ToMVT);
9400b57cec5SDimitry Andric   case TargetOpcode::G_SITOFP:
9410b57cec5SDimitry Andric     return RTLIB::getSINTTOFP(FromMVT, ToMVT);
9420b57cec5SDimitry Andric   case TargetOpcode::G_UITOFP:
9430b57cec5SDimitry Andric     return RTLIB::getUINTTOFP(FromMVT, ToMVT);
9440b57cec5SDimitry Andric   }
9450b57cec5SDimitry Andric   llvm_unreachable("Unsupported libcall function");
9460b57cec5SDimitry Andric }
9470b57cec5SDimitry Andric 
9480b57cec5SDimitry Andric static LegalizerHelper::LegalizeResult
9490b57cec5SDimitry Andric conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
9501db9f3b2SDimitry Andric                   Type *FromType, LostDebugLocObserver &LocObserver) {
9510b57cec5SDimitry Andric   RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
9521db9f3b2SDimitry Andric   return createLibcall(
9531db9f3b2SDimitry Andric       MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType, 0},
9541db9f3b2SDimitry Andric       {{MI.getOperand(1).getReg(), FromType, 0}}, LocObserver, &MI);
9550b57cec5SDimitry Andric }
9560b57cec5SDimitry Andric 
9575f757f3fSDimitry Andric static RTLIB::Libcall
9585f757f3fSDimitry Andric getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) {
9595f757f3fSDimitry Andric   RTLIB::Libcall RTLibcall;
9605f757f3fSDimitry Andric   switch (MI.getOpcode()) {
961*297eecfbSDimitry Andric   case TargetOpcode::G_GET_FPENV:
962*297eecfbSDimitry Andric     RTLibcall = RTLIB::FEGETENV;
963*297eecfbSDimitry Andric     break;
964*297eecfbSDimitry Andric   case TargetOpcode::G_SET_FPENV:
965*297eecfbSDimitry Andric   case TargetOpcode::G_RESET_FPENV:
966*297eecfbSDimitry Andric     RTLibcall = RTLIB::FESETENV;
967*297eecfbSDimitry Andric     break;
9685f757f3fSDimitry Andric   case TargetOpcode::G_GET_FPMODE:
9695f757f3fSDimitry Andric     RTLibcall = RTLIB::FEGETMODE;
9705f757f3fSDimitry Andric     break;
9715f757f3fSDimitry Andric   case TargetOpcode::G_SET_FPMODE:
9725f757f3fSDimitry Andric   case TargetOpcode::G_RESET_FPMODE:
9735f757f3fSDimitry Andric     RTLibcall = RTLIB::FESETMODE;
9745f757f3fSDimitry Andric     break;
9755f757f3fSDimitry Andric   default:
9765f757f3fSDimitry Andric     llvm_unreachable("Unexpected opcode");
9775f757f3fSDimitry Andric   }
9785f757f3fSDimitry Andric   return RTLibcall;
9795f757f3fSDimitry Andric }
9805f757f3fSDimitry Andric 
9815f757f3fSDimitry Andric // Some library functions that read FP state (fegetmode, fegetenv) write the
9825f757f3fSDimitry Andric // state into a region in memory. IR intrinsics that do the same operations
9835f757f3fSDimitry Andric // (get_fpmode, get_fpenv) return the state as integer value. To implement these
9845f757f3fSDimitry Andric // intrinsics via the library functions, we need to use temporary variable,
9855f757f3fSDimitry Andric // for example:
9865f757f3fSDimitry Andric //
9875f757f3fSDimitry Andric //     %0:_(s32) = G_GET_FPMODE
9885f757f3fSDimitry Andric //
9895f757f3fSDimitry Andric // is transformed to:
9905f757f3fSDimitry Andric //
9915f757f3fSDimitry Andric //     %1:_(p0) = G_FRAME_INDEX %stack.0
9925f757f3fSDimitry Andric //     BL &fegetmode
9935f757f3fSDimitry Andric //     %0:_(s32) = G_LOAD % 1
9945f757f3fSDimitry Andric //
9955f757f3fSDimitry Andric LegalizerHelper::LegalizeResult
9965f757f3fSDimitry Andric LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
9971db9f3b2SDimitry Andric                                        MachineInstr &MI,
9981db9f3b2SDimitry Andric                                        LostDebugLocObserver &LocObserver) {
9995f757f3fSDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
10005f757f3fSDimitry Andric   auto &MF = MIRBuilder.getMF();
10015f757f3fSDimitry Andric   auto &MRI = *MIRBuilder.getMRI();
10025f757f3fSDimitry Andric   auto &Ctx = MF.getFunction().getContext();
10035f757f3fSDimitry Andric 
10045f757f3fSDimitry Andric   // Create temporary, where library function will put the read state.
10055f757f3fSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
10065f757f3fSDimitry Andric   LLT StateTy = MRI.getType(Dst);
10075f757f3fSDimitry Andric   TypeSize StateSize = StateTy.getSizeInBytes();
10085f757f3fSDimitry Andric   Align TempAlign = getStackTemporaryAlignment(StateTy);
10095f757f3fSDimitry Andric   MachinePointerInfo TempPtrInfo;
10105f757f3fSDimitry Andric   auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
10115f757f3fSDimitry Andric 
10125f757f3fSDimitry Andric   // Create a call to library function, with the temporary as an argument.
10135f757f3fSDimitry Andric   unsigned TempAddrSpace = DL.getAllocaAddrSpace();
10145f757f3fSDimitry Andric   Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
10155f757f3fSDimitry Andric   RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
10165f757f3fSDimitry Andric   auto Res =
10175f757f3fSDimitry Andric       createLibcall(MIRBuilder, RTLibcall,
10185f757f3fSDimitry Andric                     CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
10191db9f3b2SDimitry Andric                     CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
10201db9f3b2SDimitry Andric                     LocObserver, nullptr);
10215f757f3fSDimitry Andric   if (Res != LegalizerHelper::Legalized)
10225f757f3fSDimitry Andric     return Res;
10235f757f3fSDimitry Andric 
10245f757f3fSDimitry Andric   // Create a load from the temporary.
10255f757f3fSDimitry Andric   MachineMemOperand *MMO = MF.getMachineMemOperand(
10265f757f3fSDimitry Andric       TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
10275f757f3fSDimitry Andric   MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
10285f757f3fSDimitry Andric 
10295f757f3fSDimitry Andric   return LegalizerHelper::Legalized;
10305f757f3fSDimitry Andric }
10315f757f3fSDimitry Andric 
10325f757f3fSDimitry Andric // Similar to `createGetStateLibcall` the function calls a library function
10335f757f3fSDimitry Andric // using transient space in stack. In this case the library function reads
10345f757f3fSDimitry Andric // content of memory region.
10355f757f3fSDimitry Andric LegalizerHelper::LegalizeResult
10365f757f3fSDimitry Andric LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
10371db9f3b2SDimitry Andric                                        MachineInstr &MI,
10381db9f3b2SDimitry Andric                                        LostDebugLocObserver &LocObserver) {
10395f757f3fSDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
10405f757f3fSDimitry Andric   auto &MF = MIRBuilder.getMF();
10415f757f3fSDimitry Andric   auto &MRI = *MIRBuilder.getMRI();
10425f757f3fSDimitry Andric   auto &Ctx = MF.getFunction().getContext();
10435f757f3fSDimitry Andric 
10445f757f3fSDimitry Andric   // Create temporary, where library function will get the new state.
10455f757f3fSDimitry Andric   Register Src = MI.getOperand(0).getReg();
10465f757f3fSDimitry Andric   LLT StateTy = MRI.getType(Src);
10475f757f3fSDimitry Andric   TypeSize StateSize = StateTy.getSizeInBytes();
10485f757f3fSDimitry Andric   Align TempAlign = getStackTemporaryAlignment(StateTy);
10495f757f3fSDimitry Andric   MachinePointerInfo TempPtrInfo;
10505f757f3fSDimitry Andric   auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
10515f757f3fSDimitry Andric 
10525f757f3fSDimitry Andric   // Put the new state into the temporary.
10535f757f3fSDimitry Andric   MachineMemOperand *MMO = MF.getMachineMemOperand(
10545f757f3fSDimitry Andric       TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
10555f757f3fSDimitry Andric   MIRBuilder.buildStore(Src, Temp, *MMO);
10565f757f3fSDimitry Andric 
10575f757f3fSDimitry Andric   // Create a call to library function, with the temporary as an argument.
10585f757f3fSDimitry Andric   unsigned TempAddrSpace = DL.getAllocaAddrSpace();
10595f757f3fSDimitry Andric   Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
10605f757f3fSDimitry Andric   RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
10615f757f3fSDimitry Andric   return createLibcall(MIRBuilder, RTLibcall,
10625f757f3fSDimitry Andric                        CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
10631db9f3b2SDimitry Andric                        CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
10641db9f3b2SDimitry Andric                        LocObserver, nullptr);
10655f757f3fSDimitry Andric }
10665f757f3fSDimitry Andric 
10675f757f3fSDimitry Andric // The function is used to legalize operations that set default environment
10685f757f3fSDimitry Andric // state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
10695f757f3fSDimitry Andric // On most targets supported in glibc FE_DFL_MODE is defined as
10705f757f3fSDimitry Andric // `((const femode_t *) -1)`. Such assumption is used here. If for some target
10715f757f3fSDimitry Andric // it is not true, the target must provide custom lowering.
10725f757f3fSDimitry Andric LegalizerHelper::LegalizeResult
10735f757f3fSDimitry Andric LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
10741db9f3b2SDimitry Andric                                          MachineInstr &MI,
10751db9f3b2SDimitry Andric                                          LostDebugLocObserver &LocObserver) {
10765f757f3fSDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
10775f757f3fSDimitry Andric   auto &MF = MIRBuilder.getMF();
10785f757f3fSDimitry Andric   auto &Ctx = MF.getFunction().getContext();
10795f757f3fSDimitry Andric 
10805f757f3fSDimitry Andric   // Create an argument for the library function.
10815f757f3fSDimitry Andric   unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
10825f757f3fSDimitry Andric   Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
10835f757f3fSDimitry Andric   unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
10845f757f3fSDimitry Andric   LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
10855f757f3fSDimitry Andric   auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
10865f757f3fSDimitry Andric   DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
10875f757f3fSDimitry Andric   MIRBuilder.buildIntToPtr(Dest, DefValue);
10885f757f3fSDimitry Andric 
10895f757f3fSDimitry Andric   RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
10905f757f3fSDimitry Andric   return createLibcall(MIRBuilder, RTLibcall,
10915f757f3fSDimitry Andric                        CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
10921db9f3b2SDimitry Andric                        CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
10931db9f3b2SDimitry Andric                        LocObserver, &MI);
10945f757f3fSDimitry Andric }
10955f757f3fSDimitry Andric 
10960b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
1097fe6060f1SDimitry Andric LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
10980b57cec5SDimitry Andric   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
10990b57cec5SDimitry Andric 
11000b57cec5SDimitry Andric   switch (MI.getOpcode()) {
11010b57cec5SDimitry Andric   default:
11020b57cec5SDimitry Andric     return UnableToLegalize;
1103bdd1243dSDimitry Andric   case TargetOpcode::G_MUL:
11040b57cec5SDimitry Andric   case TargetOpcode::G_SDIV:
11050b57cec5SDimitry Andric   case TargetOpcode::G_UDIV:
11060b57cec5SDimitry Andric   case TargetOpcode::G_SREM:
11070b57cec5SDimitry Andric   case TargetOpcode::G_UREM:
11080b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
11095f757f3fSDimitry Andric     LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
11105f757f3fSDimitry Andric     unsigned Size = LLTy.getSizeInBits();
11110b57cec5SDimitry Andric     Type *HLTy = IntegerType::get(Ctx, Size);
11121db9f3b2SDimitry Andric     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
11130b57cec5SDimitry Andric     if (Status != Legalized)
11140b57cec5SDimitry Andric       return Status;
11150b57cec5SDimitry Andric     break;
11160b57cec5SDimitry Andric   }
11170b57cec5SDimitry Andric   case TargetOpcode::G_FADD:
11180b57cec5SDimitry Andric   case TargetOpcode::G_FSUB:
11190b57cec5SDimitry Andric   case TargetOpcode::G_FMUL:
11200b57cec5SDimitry Andric   case TargetOpcode::G_FDIV:
11210b57cec5SDimitry Andric   case TargetOpcode::G_FMA:
11220b57cec5SDimitry Andric   case TargetOpcode::G_FPOW:
11230b57cec5SDimitry Andric   case TargetOpcode::G_FREM:
11240b57cec5SDimitry Andric   case TargetOpcode::G_FCOS:
11250b57cec5SDimitry Andric   case TargetOpcode::G_FSIN:
11260b57cec5SDimitry Andric   case TargetOpcode::G_FLOG10:
11270b57cec5SDimitry Andric   case TargetOpcode::G_FLOG:
11280b57cec5SDimitry Andric   case TargetOpcode::G_FLOG2:
112906c3fb27SDimitry Andric   case TargetOpcode::G_FLDEXP:
11300b57cec5SDimitry Andric   case TargetOpcode::G_FEXP:
11310b57cec5SDimitry Andric   case TargetOpcode::G_FEXP2:
11325f757f3fSDimitry Andric   case TargetOpcode::G_FEXP10:
11330b57cec5SDimitry Andric   case TargetOpcode::G_FCEIL:
11345ffd83dbSDimitry Andric   case TargetOpcode::G_FFLOOR:
11355ffd83dbSDimitry Andric   case TargetOpcode::G_FMINNUM:
11365ffd83dbSDimitry Andric   case TargetOpcode::G_FMAXNUM:
11375ffd83dbSDimitry Andric   case TargetOpcode::G_FSQRT:
11385ffd83dbSDimitry Andric   case TargetOpcode::G_FRINT:
1139e8d8bef9SDimitry Andric   case TargetOpcode::G_FNEARBYINT:
1140e8d8bef9SDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
11415f757f3fSDimitry Andric     LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
11425f757f3fSDimitry Andric     unsigned Size = LLTy.getSizeInBits();
11435ffd83dbSDimitry Andric     Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1144e8d8bef9SDimitry Andric     if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1145e8d8bef9SDimitry Andric       LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
11460b57cec5SDimitry Andric       return UnableToLegalize;
11470b57cec5SDimitry Andric     }
11481db9f3b2SDimitry Andric     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
11491db9f3b2SDimitry Andric     if (Status != Legalized)
11501db9f3b2SDimitry Andric       return Status;
11511db9f3b2SDimitry Andric     break;
11521db9f3b2SDimitry Andric   }
11531db9f3b2SDimitry Andric   case TargetOpcode::G_FPOWI: {
11541db9f3b2SDimitry Andric     LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
11551db9f3b2SDimitry Andric     unsigned Size = LLTy.getSizeInBits();
11561db9f3b2SDimitry Andric     Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
11571db9f3b2SDimitry Andric     Type *ITy = IntegerType::get(
11581db9f3b2SDimitry Andric         Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
11591db9f3b2SDimitry Andric     if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
11601db9f3b2SDimitry Andric       LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
11611db9f3b2SDimitry Andric       return UnableToLegalize;
11621db9f3b2SDimitry Andric     }
11631db9f3b2SDimitry Andric     auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
11641db9f3b2SDimitry Andric     std::initializer_list<CallLowering::ArgInfo> Args = {
11651db9f3b2SDimitry Andric         {MI.getOperand(1).getReg(), HLTy, 0},
11661db9f3b2SDimitry Andric         {MI.getOperand(2).getReg(), ITy, 1}};
11671db9f3b2SDimitry Andric     LegalizeResult Status =
11681db9f3b2SDimitry Andric         createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
11691db9f3b2SDimitry Andric                       Args, LocObserver, &MI);
11700b57cec5SDimitry Andric     if (Status != Legalized)
11710b57cec5SDimitry Andric       return Status;
11720b57cec5SDimitry Andric     break;
11730b57cec5SDimitry Andric   }
11745ffd83dbSDimitry Andric   case TargetOpcode::G_FPEXT:
11750b57cec5SDimitry Andric   case TargetOpcode::G_FPTRUNC: {
11765ffd83dbSDimitry Andric     Type *FromTy = getFloatTypeForLLT(Ctx,  MRI.getType(MI.getOperand(1).getReg()));
11775ffd83dbSDimitry Andric     Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
11785ffd83dbSDimitry Andric     if (!FromTy || !ToTy)
11790b57cec5SDimitry Andric       return UnableToLegalize;
11801db9f3b2SDimitry Andric     LegalizeResult Status =
11811db9f3b2SDimitry Andric         conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver);
11820b57cec5SDimitry Andric     if (Status != Legalized)
11830b57cec5SDimitry Andric       return Status;
11840b57cec5SDimitry Andric     break;
11850b57cec5SDimitry Andric   }
11860b57cec5SDimitry Andric   case TargetOpcode::G_FPTOSI:
11870b57cec5SDimitry Andric   case TargetOpcode::G_FPTOUI: {
11880b57cec5SDimitry Andric     // FIXME: Support other types
11890b57cec5SDimitry Andric     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
11900b57cec5SDimitry Andric     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
11910b57cec5SDimitry Andric     if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
11920b57cec5SDimitry Andric       return UnableToLegalize;
11930b57cec5SDimitry Andric     LegalizeResult Status = conversionLibcall(
11940b57cec5SDimitry Andric         MI, MIRBuilder,
11950b57cec5SDimitry Andric         ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
11961db9f3b2SDimitry Andric         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
11971db9f3b2SDimitry Andric         LocObserver);
11980b57cec5SDimitry Andric     if (Status != Legalized)
11990b57cec5SDimitry Andric       return Status;
12000b57cec5SDimitry Andric     break;
12010b57cec5SDimitry Andric   }
12020b57cec5SDimitry Andric   case TargetOpcode::G_SITOFP:
12030b57cec5SDimitry Andric   case TargetOpcode::G_UITOFP: {
12040b57cec5SDimitry Andric     // FIXME: Support other types
12050b57cec5SDimitry Andric     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
12060b57cec5SDimitry Andric     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
12070b57cec5SDimitry Andric     if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
12080b57cec5SDimitry Andric       return UnableToLegalize;
12090b57cec5SDimitry Andric     LegalizeResult Status = conversionLibcall(
12100b57cec5SDimitry Andric         MI, MIRBuilder,
12110b57cec5SDimitry Andric         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
12121db9f3b2SDimitry Andric         FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
12131db9f3b2SDimitry Andric         LocObserver);
12141db9f3b2SDimitry Andric     if (Status != Legalized)
12151db9f3b2SDimitry Andric       return Status;
12161db9f3b2SDimitry Andric     break;
12171db9f3b2SDimitry Andric   }
12181db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XCHG:
12191db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_ADD:
12201db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_SUB:
12211db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_AND:
12221db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_OR:
12231db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XOR:
12241db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG:
12251db9f3b2SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
12261db9f3b2SDimitry Andric     auto Status = createAtomicLibcall(MIRBuilder, MI);
12270b57cec5SDimitry Andric     if (Status != Legalized)
12280b57cec5SDimitry Andric       return Status;
12290b57cec5SDimitry Andric     break;
12300b57cec5SDimitry Andric   }
1231fe6060f1SDimitry Andric   case TargetOpcode::G_BZERO:
1232e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMCPY:
1233e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMMOVE:
1234e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMSET: {
1235fe6060f1SDimitry Andric     LegalizeResult Result =
1236fe6060f1SDimitry Andric         createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1237fe6060f1SDimitry Andric     if (Result != Legalized)
1238fe6060f1SDimitry Andric       return Result;
1239e8d8bef9SDimitry Andric     MI.eraseFromParent();
1240e8d8bef9SDimitry Andric     return Result;
1241e8d8bef9SDimitry Andric   }
1242*297eecfbSDimitry Andric   case TargetOpcode::G_GET_FPENV:
12435f757f3fSDimitry Andric   case TargetOpcode::G_GET_FPMODE: {
12441db9f3b2SDimitry Andric     LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
12455f757f3fSDimitry Andric     if (Result != Legalized)
12465f757f3fSDimitry Andric       return Result;
12475f757f3fSDimitry Andric     break;
12485f757f3fSDimitry Andric   }
1249*297eecfbSDimitry Andric   case TargetOpcode::G_SET_FPENV:
12505f757f3fSDimitry Andric   case TargetOpcode::G_SET_FPMODE: {
12511db9f3b2SDimitry Andric     LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
12525f757f3fSDimitry Andric     if (Result != Legalized)
12535f757f3fSDimitry Andric       return Result;
12545f757f3fSDimitry Andric     break;
12555f757f3fSDimitry Andric   }
1256*297eecfbSDimitry Andric   case TargetOpcode::G_RESET_FPENV:
12575f757f3fSDimitry Andric   case TargetOpcode::G_RESET_FPMODE: {
12581db9f3b2SDimitry Andric     LegalizeResult Result =
12591db9f3b2SDimitry Andric         createResetStateLibcall(MIRBuilder, MI, LocObserver);
12605f757f3fSDimitry Andric     if (Result != Legalized)
12615f757f3fSDimitry Andric       return Result;
12625f757f3fSDimitry Andric     break;
12635f757f3fSDimitry Andric   }
12640b57cec5SDimitry Andric   }
12650b57cec5SDimitry Andric 
12660b57cec5SDimitry Andric   MI.eraseFromParent();
12670b57cec5SDimitry Andric   return Legalized;
12680b57cec5SDimitry Andric }
12690b57cec5SDimitry Andric 
12700b57cec5SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
12710b57cec5SDimitry Andric                                                               unsigned TypeIdx,
12720b57cec5SDimitry Andric                                                               LLT NarrowTy) {
12730b57cec5SDimitry Andric   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
12740b57cec5SDimitry Andric   uint64_t NarrowSize = NarrowTy.getSizeInBits();
12750b57cec5SDimitry Andric 
12760b57cec5SDimitry Andric   switch (MI.getOpcode()) {
12770b57cec5SDimitry Andric   default:
12780b57cec5SDimitry Andric     return UnableToLegalize;
12790b57cec5SDimitry Andric   case TargetOpcode::G_IMPLICIT_DEF: {
12805ffd83dbSDimitry Andric     Register DstReg = MI.getOperand(0).getReg();
12815ffd83dbSDimitry Andric     LLT DstTy = MRI.getType(DstReg);
12825ffd83dbSDimitry Andric 
12835ffd83dbSDimitry Andric     // If SizeOp0 is not an exact multiple of NarrowSize, emit
12845ffd83dbSDimitry Andric     // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
12855ffd83dbSDimitry Andric     // FIXME: Although this would also be legal for the general case, it causes
12865ffd83dbSDimitry Andric     //  a lot of regressions in the emitted code (superfluous COPYs, artifact
12875ffd83dbSDimitry Andric     //  combines not being hit). This seems to be a problem related to the
12885ffd83dbSDimitry Andric     //  artifact combiner.
12895ffd83dbSDimitry Andric     if (SizeOp0 % NarrowSize != 0) {
12905ffd83dbSDimitry Andric       LLT ImplicitTy = NarrowTy;
12915ffd83dbSDimitry Andric       if (DstTy.isVector())
1292fe6060f1SDimitry Andric         ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
12935ffd83dbSDimitry Andric 
12945ffd83dbSDimitry Andric       Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
12955ffd83dbSDimitry Andric       MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
12965ffd83dbSDimitry Andric 
12975ffd83dbSDimitry Andric       MI.eraseFromParent();
12985ffd83dbSDimitry Andric       return Legalized;
12995ffd83dbSDimitry Andric     }
13005ffd83dbSDimitry Andric 
13010b57cec5SDimitry Andric     int NumParts = SizeOp0 / NarrowSize;
13020b57cec5SDimitry Andric 
13030b57cec5SDimitry Andric     SmallVector<Register, 2> DstRegs;
13040b57cec5SDimitry Andric     for (int i = 0; i < NumParts; ++i)
13055ffd83dbSDimitry Andric       DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
13060b57cec5SDimitry Andric 
13075ffd83dbSDimitry Andric     if (DstTy.isVector())
13080b57cec5SDimitry Andric       MIRBuilder.buildBuildVector(DstReg, DstRegs);
13090b57cec5SDimitry Andric     else
1310bdd1243dSDimitry Andric       MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
13110b57cec5SDimitry Andric     MI.eraseFromParent();
13120b57cec5SDimitry Andric     return Legalized;
13130b57cec5SDimitry Andric   }
13140b57cec5SDimitry Andric   case TargetOpcode::G_CONSTANT: {
13150b57cec5SDimitry Andric     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
13160b57cec5SDimitry Andric     const APInt &Val = MI.getOperand(1).getCImm()->getValue();
13170b57cec5SDimitry Andric     unsigned TotalSize = Ty.getSizeInBits();
13180b57cec5SDimitry Andric     unsigned NarrowSize = NarrowTy.getSizeInBits();
13190b57cec5SDimitry Andric     int NumParts = TotalSize / NarrowSize;
13200b57cec5SDimitry Andric 
13210b57cec5SDimitry Andric     SmallVector<Register, 4> PartRegs;
13220b57cec5SDimitry Andric     for (int I = 0; I != NumParts; ++I) {
13230b57cec5SDimitry Andric       unsigned Offset = I * NarrowSize;
13240b57cec5SDimitry Andric       auto K = MIRBuilder.buildConstant(NarrowTy,
13250b57cec5SDimitry Andric                                         Val.lshr(Offset).trunc(NarrowSize));
13260b57cec5SDimitry Andric       PartRegs.push_back(K.getReg(0));
13270b57cec5SDimitry Andric     }
13280b57cec5SDimitry Andric 
13290b57cec5SDimitry Andric     LLT LeftoverTy;
13300b57cec5SDimitry Andric     unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
13310b57cec5SDimitry Andric     SmallVector<Register, 1> LeftoverRegs;
13320b57cec5SDimitry Andric     if (LeftoverBits != 0) {
13330b57cec5SDimitry Andric       LeftoverTy = LLT::scalar(LeftoverBits);
13340b57cec5SDimitry Andric       auto K = MIRBuilder.buildConstant(
13350b57cec5SDimitry Andric         LeftoverTy,
13360b57cec5SDimitry Andric         Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
13370b57cec5SDimitry Andric       LeftoverRegs.push_back(K.getReg(0));
13380b57cec5SDimitry Andric     }
13390b57cec5SDimitry Andric 
13400b57cec5SDimitry Andric     insertParts(MI.getOperand(0).getReg(),
13410b57cec5SDimitry Andric                 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
13420b57cec5SDimitry Andric 
13430b57cec5SDimitry Andric     MI.eraseFromParent();
13440b57cec5SDimitry Andric     return Legalized;
13450b57cec5SDimitry Andric   }
13465ffd83dbSDimitry Andric   case TargetOpcode::G_SEXT:
13475ffd83dbSDimitry Andric   case TargetOpcode::G_ZEXT:
13485ffd83dbSDimitry Andric   case TargetOpcode::G_ANYEXT:
13495ffd83dbSDimitry Andric     return narrowScalarExt(MI, TypeIdx, NarrowTy);
13508bcb0991SDimitry Andric   case TargetOpcode::G_TRUNC: {
13518bcb0991SDimitry Andric     if (TypeIdx != 1)
13528bcb0991SDimitry Andric       return UnableToLegalize;
13538bcb0991SDimitry Andric 
13548bcb0991SDimitry Andric     uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
13558bcb0991SDimitry Andric     if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
13568bcb0991SDimitry Andric       LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
13578bcb0991SDimitry Andric       return UnableToLegalize;
13588bcb0991SDimitry Andric     }
13598bcb0991SDimitry Andric 
13605ffd83dbSDimitry Andric     auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
13615ffd83dbSDimitry Andric     MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
13628bcb0991SDimitry Andric     MI.eraseFromParent();
13638bcb0991SDimitry Andric     return Legalized;
13648bcb0991SDimitry Andric   }
13658bcb0991SDimitry Andric 
13660eae32dcSDimitry Andric   case TargetOpcode::G_FREEZE: {
13670eae32dcSDimitry Andric     if (TypeIdx != 0)
13680eae32dcSDimitry Andric       return UnableToLegalize;
13690eae32dcSDimitry Andric 
13700eae32dcSDimitry Andric     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
13710eae32dcSDimitry Andric     // Should widen scalar first
13720eae32dcSDimitry Andric     if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
13730eae32dcSDimitry Andric       return UnableToLegalize;
13740eae32dcSDimitry Andric 
13750eae32dcSDimitry Andric     auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
13760eae32dcSDimitry Andric     SmallVector<Register, 8> Parts;
13770eae32dcSDimitry Andric     for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
13780eae32dcSDimitry Andric       Parts.push_back(
13790eae32dcSDimitry Andric           MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
13800eae32dcSDimitry Andric     }
13810eae32dcSDimitry Andric 
1382bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
13830eae32dcSDimitry Andric     MI.eraseFromParent();
13840eae32dcSDimitry Andric     return Legalized;
13850eae32dcSDimitry Andric   }
1386fe6060f1SDimitry Andric   case TargetOpcode::G_ADD:
1387fe6060f1SDimitry Andric   case TargetOpcode::G_SUB:
1388fe6060f1SDimitry Andric   case TargetOpcode::G_SADDO:
1389fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBO:
1390fe6060f1SDimitry Andric   case TargetOpcode::G_SADDE:
1391fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBE:
1392fe6060f1SDimitry Andric   case TargetOpcode::G_UADDO:
1393fe6060f1SDimitry Andric   case TargetOpcode::G_USUBO:
1394fe6060f1SDimitry Andric   case TargetOpcode::G_UADDE:
1395fe6060f1SDimitry Andric   case TargetOpcode::G_USUBE:
1396fe6060f1SDimitry Andric     return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
13970b57cec5SDimitry Andric   case TargetOpcode::G_MUL:
13980b57cec5SDimitry Andric   case TargetOpcode::G_UMULH:
13990b57cec5SDimitry Andric     return narrowScalarMul(MI, NarrowTy);
14000b57cec5SDimitry Andric   case TargetOpcode::G_EXTRACT:
14010b57cec5SDimitry Andric     return narrowScalarExtract(MI, TypeIdx, NarrowTy);
14020b57cec5SDimitry Andric   case TargetOpcode::G_INSERT:
14030b57cec5SDimitry Andric     return narrowScalarInsert(MI, TypeIdx, NarrowTy);
14040b57cec5SDimitry Andric   case TargetOpcode::G_LOAD: {
1405fe6060f1SDimitry Andric     auto &LoadMI = cast<GLoad>(MI);
1406fe6060f1SDimitry Andric     Register DstReg = LoadMI.getDstReg();
14070b57cec5SDimitry Andric     LLT DstTy = MRI.getType(DstReg);
14080b57cec5SDimitry Andric     if (DstTy.isVector())
14090b57cec5SDimitry Andric       return UnableToLegalize;
14100b57cec5SDimitry Andric 
1411fe6060f1SDimitry Andric     if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
14120b57cec5SDimitry Andric       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1413fe6060f1SDimitry Andric       MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
14140b57cec5SDimitry Andric       MIRBuilder.buildAnyExt(DstReg, TmpReg);
1415fe6060f1SDimitry Andric       LoadMI.eraseFromParent();
14160b57cec5SDimitry Andric       return Legalized;
14170b57cec5SDimitry Andric     }
14180b57cec5SDimitry Andric 
1419fe6060f1SDimitry Andric     return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
14200b57cec5SDimitry Andric   }
14210b57cec5SDimitry Andric   case TargetOpcode::G_ZEXTLOAD:
14220b57cec5SDimitry Andric   case TargetOpcode::G_SEXTLOAD: {
1423fe6060f1SDimitry Andric     auto &LoadMI = cast<GExtLoad>(MI);
1424fe6060f1SDimitry Andric     Register DstReg = LoadMI.getDstReg();
1425fe6060f1SDimitry Andric     Register PtrReg = LoadMI.getPointerReg();
14260b57cec5SDimitry Andric 
14270b57cec5SDimitry Andric     Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1428fe6060f1SDimitry Andric     auto &MMO = LoadMI.getMMO();
1429e8d8bef9SDimitry Andric     unsigned MemSize = MMO.getSizeInBits();
1430e8d8bef9SDimitry Andric 
1431e8d8bef9SDimitry Andric     if (MemSize == NarrowSize) {
14320b57cec5SDimitry Andric       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1433e8d8bef9SDimitry Andric     } else if (MemSize < NarrowSize) {
1434fe6060f1SDimitry Andric       MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1435e8d8bef9SDimitry Andric     } else if (MemSize > NarrowSize) {
1436e8d8bef9SDimitry Andric       // FIXME: Need to split the load.
1437e8d8bef9SDimitry Andric       return UnableToLegalize;
14380b57cec5SDimitry Andric     }
14390b57cec5SDimitry Andric 
1440fe6060f1SDimitry Andric     if (isa<GZExtLoad>(LoadMI))
14410b57cec5SDimitry Andric       MIRBuilder.buildZExt(DstReg, TmpReg);
14420b57cec5SDimitry Andric     else
14430b57cec5SDimitry Andric       MIRBuilder.buildSExt(DstReg, TmpReg);
14440b57cec5SDimitry Andric 
1445fe6060f1SDimitry Andric     LoadMI.eraseFromParent();
14460b57cec5SDimitry Andric     return Legalized;
14470b57cec5SDimitry Andric   }
14480b57cec5SDimitry Andric   case TargetOpcode::G_STORE: {
1449fe6060f1SDimitry Andric     auto &StoreMI = cast<GStore>(MI);
14500b57cec5SDimitry Andric 
1451fe6060f1SDimitry Andric     Register SrcReg = StoreMI.getValueReg();
14520b57cec5SDimitry Andric     LLT SrcTy = MRI.getType(SrcReg);
14530b57cec5SDimitry Andric     if (SrcTy.isVector())
14540b57cec5SDimitry Andric       return UnableToLegalize;
14550b57cec5SDimitry Andric 
14560b57cec5SDimitry Andric     int NumParts = SizeOp0 / NarrowSize;
14570b57cec5SDimitry Andric     unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
14580b57cec5SDimitry Andric     unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
14590b57cec5SDimitry Andric     if (SrcTy.isVector() && LeftoverBits != 0)
14600b57cec5SDimitry Andric       return UnableToLegalize;
14610b57cec5SDimitry Andric 
1462fe6060f1SDimitry Andric     if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
14630b57cec5SDimitry Andric       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
14640b57cec5SDimitry Andric       MIRBuilder.buildTrunc(TmpReg, SrcReg);
1465fe6060f1SDimitry Andric       MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1466fe6060f1SDimitry Andric       StoreMI.eraseFromParent();
14670b57cec5SDimitry Andric       return Legalized;
14680b57cec5SDimitry Andric     }
14690b57cec5SDimitry Andric 
1470fe6060f1SDimitry Andric     return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
14710b57cec5SDimitry Andric   }
14720b57cec5SDimitry Andric   case TargetOpcode::G_SELECT:
14730b57cec5SDimitry Andric     return narrowScalarSelect(MI, TypeIdx, NarrowTy);
14740b57cec5SDimitry Andric   case TargetOpcode::G_AND:
14750b57cec5SDimitry Andric   case TargetOpcode::G_OR:
14760b57cec5SDimitry Andric   case TargetOpcode::G_XOR: {
14770b57cec5SDimitry Andric     // Legalize bitwise operation:
14780b57cec5SDimitry Andric     // A = BinOp<Ty> B, C
14790b57cec5SDimitry Andric     // into:
14800b57cec5SDimitry Andric     // B1, ..., BN = G_UNMERGE_VALUES B
14810b57cec5SDimitry Andric     // C1, ..., CN = G_UNMERGE_VALUES C
14820b57cec5SDimitry Andric     // A1 = BinOp<Ty/N> B1, C2
14830b57cec5SDimitry Andric     // ...
14840b57cec5SDimitry Andric     // AN = BinOp<Ty/N> BN, CN
14850b57cec5SDimitry Andric     // A = G_MERGE_VALUES A1, ..., AN
14860b57cec5SDimitry Andric     return narrowScalarBasic(MI, TypeIdx, NarrowTy);
14870b57cec5SDimitry Andric   }
14880b57cec5SDimitry Andric   case TargetOpcode::G_SHL:
14890b57cec5SDimitry Andric   case TargetOpcode::G_LSHR:
14900b57cec5SDimitry Andric   case TargetOpcode::G_ASHR:
14910b57cec5SDimitry Andric     return narrowScalarShift(MI, TypeIdx, NarrowTy);
14920b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ:
14930b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
14940b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ:
14950b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
14960b57cec5SDimitry Andric   case TargetOpcode::G_CTPOP:
14975ffd83dbSDimitry Andric     if (TypeIdx == 1)
14985ffd83dbSDimitry Andric       switch (MI.getOpcode()) {
14995ffd83dbSDimitry Andric       case TargetOpcode::G_CTLZ:
15005ffd83dbSDimitry Andric       case TargetOpcode::G_CTLZ_ZERO_UNDEF:
15015ffd83dbSDimitry Andric         return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
15025ffd83dbSDimitry Andric       case TargetOpcode::G_CTTZ:
15035ffd83dbSDimitry Andric       case TargetOpcode::G_CTTZ_ZERO_UNDEF:
15045ffd83dbSDimitry Andric         return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
15055ffd83dbSDimitry Andric       case TargetOpcode::G_CTPOP:
15065ffd83dbSDimitry Andric         return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
15075ffd83dbSDimitry Andric       default:
15085ffd83dbSDimitry Andric         return UnableToLegalize;
15095ffd83dbSDimitry Andric       }
15100b57cec5SDimitry Andric 
15110b57cec5SDimitry Andric     Observer.changingInstr(MI);
15120b57cec5SDimitry Andric     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
15130b57cec5SDimitry Andric     Observer.changedInstr(MI);
15140b57cec5SDimitry Andric     return Legalized;
15150b57cec5SDimitry Andric   case TargetOpcode::G_INTTOPTR:
15160b57cec5SDimitry Andric     if (TypeIdx != 1)
15170b57cec5SDimitry Andric       return UnableToLegalize;
15180b57cec5SDimitry Andric 
15190b57cec5SDimitry Andric     Observer.changingInstr(MI);
15200b57cec5SDimitry Andric     narrowScalarSrc(MI, NarrowTy, 1);
15210b57cec5SDimitry Andric     Observer.changedInstr(MI);
15220b57cec5SDimitry Andric     return Legalized;
15230b57cec5SDimitry Andric   case TargetOpcode::G_PTRTOINT:
15240b57cec5SDimitry Andric     if (TypeIdx != 0)
15250b57cec5SDimitry Andric       return UnableToLegalize;
15260b57cec5SDimitry Andric 
15270b57cec5SDimitry Andric     Observer.changingInstr(MI);
15280b57cec5SDimitry Andric     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
15290b57cec5SDimitry Andric     Observer.changedInstr(MI);
15300b57cec5SDimitry Andric     return Legalized;
15310b57cec5SDimitry Andric   case TargetOpcode::G_PHI: {
1532d409305fSDimitry Andric     // FIXME: add support for when SizeOp0 isn't an exact multiple of
1533d409305fSDimitry Andric     // NarrowSize.
1534d409305fSDimitry Andric     if (SizeOp0 % NarrowSize != 0)
1535d409305fSDimitry Andric       return UnableToLegalize;
1536d409305fSDimitry Andric 
15370b57cec5SDimitry Andric     unsigned NumParts = SizeOp0 / NarrowSize;
15385ffd83dbSDimitry Andric     SmallVector<Register, 2> DstRegs(NumParts);
15395ffd83dbSDimitry Andric     SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
15400b57cec5SDimitry Andric     Observer.changingInstr(MI);
15410b57cec5SDimitry Andric     for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
15420b57cec5SDimitry Andric       MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1543bdd1243dSDimitry Andric       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
15440b57cec5SDimitry Andric       extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
15450b57cec5SDimitry Andric                    SrcRegs[i / 2]);
15460b57cec5SDimitry Andric     }
15470b57cec5SDimitry Andric     MachineBasicBlock &MBB = *MI.getParent();
15480b57cec5SDimitry Andric     MIRBuilder.setInsertPt(MBB, MI);
15490b57cec5SDimitry Andric     for (unsigned i = 0; i < NumParts; ++i) {
15500b57cec5SDimitry Andric       DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
15510b57cec5SDimitry Andric       MachineInstrBuilder MIB =
15520b57cec5SDimitry Andric           MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
15530b57cec5SDimitry Andric       for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
15540b57cec5SDimitry Andric         MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
15550b57cec5SDimitry Andric     }
15568bcb0991SDimitry Andric     MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1557bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
15580b57cec5SDimitry Andric     Observer.changedInstr(MI);
15590b57cec5SDimitry Andric     MI.eraseFromParent();
15600b57cec5SDimitry Andric     return Legalized;
15610b57cec5SDimitry Andric   }
15620b57cec5SDimitry Andric   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
15630b57cec5SDimitry Andric   case TargetOpcode::G_INSERT_VECTOR_ELT: {
15640b57cec5SDimitry Andric     if (TypeIdx != 2)
15650b57cec5SDimitry Andric       return UnableToLegalize;
15660b57cec5SDimitry Andric 
15670b57cec5SDimitry Andric     int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
15680b57cec5SDimitry Andric     Observer.changingInstr(MI);
15690b57cec5SDimitry Andric     narrowScalarSrc(MI, NarrowTy, OpIdx);
15700b57cec5SDimitry Andric     Observer.changedInstr(MI);
15710b57cec5SDimitry Andric     return Legalized;
15720b57cec5SDimitry Andric   }
15730b57cec5SDimitry Andric   case TargetOpcode::G_ICMP: {
1574fe6060f1SDimitry Andric     Register LHS = MI.getOperand(2).getReg();
1575fe6060f1SDimitry Andric     LLT SrcTy = MRI.getType(LHS);
1576fe6060f1SDimitry Andric     uint64_t SrcSize = SrcTy.getSizeInBits();
15770b57cec5SDimitry Andric     CmpInst::Predicate Pred =
15780b57cec5SDimitry Andric         static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
15790b57cec5SDimitry Andric 
1580fe6060f1SDimitry Andric     // TODO: Handle the non-equality case for weird sizes.
1581fe6060f1SDimitry Andric     if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1582fe6060f1SDimitry Andric       return UnableToLegalize;
1583fe6060f1SDimitry Andric 
1584fe6060f1SDimitry Andric     LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1585fe6060f1SDimitry Andric     SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1586fe6060f1SDimitry Andric     if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1587fe6060f1SDimitry Andric                       LHSLeftoverRegs))
1588fe6060f1SDimitry Andric       return UnableToLegalize;
1589fe6060f1SDimitry Andric 
1590fe6060f1SDimitry Andric     LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1591fe6060f1SDimitry Andric     SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1592fe6060f1SDimitry Andric     if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1593fe6060f1SDimitry Andric                       RHSPartRegs, RHSLeftoverRegs))
1594fe6060f1SDimitry Andric       return UnableToLegalize;
1595fe6060f1SDimitry Andric 
1596fe6060f1SDimitry Andric     // We now have the LHS and RHS of the compare split into narrow-type
1597fe6060f1SDimitry Andric     // registers, plus potentially some leftover type.
1598fe6060f1SDimitry Andric     Register Dst = MI.getOperand(0).getReg();
1599fe6060f1SDimitry Andric     LLT ResTy = MRI.getType(Dst);
1600fe6060f1SDimitry Andric     if (ICmpInst::isEquality(Pred)) {
1601fe6060f1SDimitry Andric       // For each part on the LHS and RHS, keep track of the result of XOR-ing
1602fe6060f1SDimitry Andric       // them together. For each equal part, the result should be all 0s. For
1603fe6060f1SDimitry Andric       // each non-equal part, we'll get at least one 1.
1604fe6060f1SDimitry Andric       auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1605fe6060f1SDimitry Andric       SmallVector<Register, 4> Xors;
1606fe6060f1SDimitry Andric       for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1607fe6060f1SDimitry Andric         auto LHS = std::get<0>(LHSAndRHS);
1608fe6060f1SDimitry Andric         auto RHS = std::get<1>(LHSAndRHS);
1609fe6060f1SDimitry Andric         auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1610fe6060f1SDimitry Andric         Xors.push_back(Xor);
1611fe6060f1SDimitry Andric       }
1612fe6060f1SDimitry Andric 
1613fe6060f1SDimitry Andric       // Build a G_XOR for each leftover register. Each G_XOR must be widened
1614fe6060f1SDimitry Andric       // to the desired narrow type so that we can OR them together later.
1615fe6060f1SDimitry Andric       SmallVector<Register, 4> WidenedXors;
1616fe6060f1SDimitry Andric       for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1617fe6060f1SDimitry Andric         auto LHS = std::get<0>(LHSAndRHS);
1618fe6060f1SDimitry Andric         auto RHS = std::get<1>(LHSAndRHS);
1619fe6060f1SDimitry Andric         auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1620fe6060f1SDimitry Andric         LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1621fe6060f1SDimitry Andric         buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1622fe6060f1SDimitry Andric                             /* PadStrategy = */ TargetOpcode::G_ZEXT);
1623fe6060f1SDimitry Andric         Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1624fe6060f1SDimitry Andric       }
1625fe6060f1SDimitry Andric 
1626fe6060f1SDimitry Andric       // Now, for each part we broke up, we know if they are equal/not equal
1627fe6060f1SDimitry Andric       // based off the G_XOR. We can OR these all together and compare against
1628fe6060f1SDimitry Andric       // 0 to get the result.
1629fe6060f1SDimitry Andric       assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1630fe6060f1SDimitry Andric       auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1631fe6060f1SDimitry Andric       for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1632fe6060f1SDimitry Andric         Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1633fe6060f1SDimitry Andric       MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
16340b57cec5SDimitry Andric     } else {
1635fe6060f1SDimitry Andric       // TODO: Handle non-power-of-two types.
1636fe6060f1SDimitry Andric       assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1637fe6060f1SDimitry Andric       assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1638fe6060f1SDimitry Andric       Register LHSL = LHSPartRegs[0];
1639fe6060f1SDimitry Andric       Register LHSH = LHSPartRegs[1];
1640fe6060f1SDimitry Andric       Register RHSL = RHSPartRegs[0];
1641fe6060f1SDimitry Andric       Register RHSH = RHSPartRegs[1];
16428bcb0991SDimitry Andric       MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
16430b57cec5SDimitry Andric       MachineInstrBuilder CmpHEQ =
16448bcb0991SDimitry Andric           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
16450b57cec5SDimitry Andric       MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
16468bcb0991SDimitry Andric           ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1647fe6060f1SDimitry Andric       MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
16480b57cec5SDimitry Andric     }
16490b57cec5SDimitry Andric     MI.eraseFromParent();
16500b57cec5SDimitry Andric     return Legalized;
16510b57cec5SDimitry Andric   }
16528bcb0991SDimitry Andric   case TargetOpcode::G_SEXT_INREG: {
16538bcb0991SDimitry Andric     if (TypeIdx != 0)
16548bcb0991SDimitry Andric       return UnableToLegalize;
16558bcb0991SDimitry Andric 
16568bcb0991SDimitry Andric     int64_t SizeInBits = MI.getOperand(2).getImm();
16578bcb0991SDimitry Andric 
16588bcb0991SDimitry Andric     // So long as the new type has more bits than the bits we're extending we
16598bcb0991SDimitry Andric     // don't need to break it apart.
16605f757f3fSDimitry Andric     if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
16618bcb0991SDimitry Andric       Observer.changingInstr(MI);
16628bcb0991SDimitry Andric       // We don't lose any non-extension bits by truncating the src and
16638bcb0991SDimitry Andric       // sign-extending the dst.
16648bcb0991SDimitry Andric       MachineOperand &MO1 = MI.getOperand(1);
16655ffd83dbSDimitry Andric       auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
16665ffd83dbSDimitry Andric       MO1.setReg(TruncMIB.getReg(0));
16678bcb0991SDimitry Andric 
16688bcb0991SDimitry Andric       MachineOperand &MO2 = MI.getOperand(0);
16698bcb0991SDimitry Andric       Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
16708bcb0991SDimitry Andric       MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
16715ffd83dbSDimitry Andric       MIRBuilder.buildSExt(MO2, DstExt);
16728bcb0991SDimitry Andric       MO2.setReg(DstExt);
16738bcb0991SDimitry Andric       Observer.changedInstr(MI);
16748bcb0991SDimitry Andric       return Legalized;
16758bcb0991SDimitry Andric     }
16768bcb0991SDimitry Andric 
16778bcb0991SDimitry Andric     // Break it apart. Components below the extension point are unmodified. The
16788bcb0991SDimitry Andric     // component containing the extension point becomes a narrower SEXT_INREG.
16798bcb0991SDimitry Andric     // Components above it are ashr'd from the component containing the
16808bcb0991SDimitry Andric     // extension point.
16818bcb0991SDimitry Andric     if (SizeOp0 % NarrowSize != 0)
16828bcb0991SDimitry Andric       return UnableToLegalize;
16838bcb0991SDimitry Andric     int NumParts = SizeOp0 / NarrowSize;
16848bcb0991SDimitry Andric 
16858bcb0991SDimitry Andric     // List the registers where the destination will be scattered.
16868bcb0991SDimitry Andric     SmallVector<Register, 2> DstRegs;
16878bcb0991SDimitry Andric     // List the registers where the source will be split.
16888bcb0991SDimitry Andric     SmallVector<Register, 2> SrcRegs;
16898bcb0991SDimitry Andric 
16908bcb0991SDimitry Andric     // Create all the temporary registers.
16918bcb0991SDimitry Andric     for (int i = 0; i < NumParts; ++i) {
16928bcb0991SDimitry Andric       Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
16938bcb0991SDimitry Andric 
16948bcb0991SDimitry Andric       SrcRegs.push_back(SrcReg);
16958bcb0991SDimitry Andric     }
16968bcb0991SDimitry Andric 
16978bcb0991SDimitry Andric     // Explode the big arguments into smaller chunks.
16985ffd83dbSDimitry Andric     MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
16998bcb0991SDimitry Andric 
17008bcb0991SDimitry Andric     Register AshrCstReg =
17018bcb0991SDimitry Andric         MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
17025ffd83dbSDimitry Andric             .getReg(0);
17035f757f3fSDimitry Andric     Register FullExtensionReg;
17045f757f3fSDimitry Andric     Register PartialExtensionReg;
17058bcb0991SDimitry Andric 
17068bcb0991SDimitry Andric     // Do the operation on each small part.
17078bcb0991SDimitry Andric     for (int i = 0; i < NumParts; ++i) {
17085f757f3fSDimitry Andric       if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
17098bcb0991SDimitry Andric         DstRegs.push_back(SrcRegs[i]);
17105f757f3fSDimitry Andric         PartialExtensionReg = DstRegs.back();
17115f757f3fSDimitry Andric       } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
17128bcb0991SDimitry Andric         assert(PartialExtensionReg &&
17138bcb0991SDimitry Andric                "Expected to visit partial extension before full");
17148bcb0991SDimitry Andric         if (FullExtensionReg) {
17158bcb0991SDimitry Andric           DstRegs.push_back(FullExtensionReg);
17168bcb0991SDimitry Andric           continue;
17178bcb0991SDimitry Andric         }
17185ffd83dbSDimitry Andric         DstRegs.push_back(
17195ffd83dbSDimitry Andric             MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
17205ffd83dbSDimitry Andric                 .getReg(0));
17218bcb0991SDimitry Andric         FullExtensionReg = DstRegs.back();
17228bcb0991SDimitry Andric       } else {
17238bcb0991SDimitry Andric         DstRegs.push_back(
17248bcb0991SDimitry Andric             MIRBuilder
17258bcb0991SDimitry Andric                 .buildInstr(
17268bcb0991SDimitry Andric                     TargetOpcode::G_SEXT_INREG, {NarrowTy},
17278bcb0991SDimitry Andric                     {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
17285ffd83dbSDimitry Andric                 .getReg(0));
17298bcb0991SDimitry Andric         PartialExtensionReg = DstRegs.back();
17308bcb0991SDimitry Andric       }
17318bcb0991SDimitry Andric     }
17328bcb0991SDimitry Andric 
17338bcb0991SDimitry Andric     // Gather the destination registers into the final destination.
17348bcb0991SDimitry Andric     Register DstReg = MI.getOperand(0).getReg();
1735bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
17368bcb0991SDimitry Andric     MI.eraseFromParent();
17378bcb0991SDimitry Andric     return Legalized;
17388bcb0991SDimitry Andric   }
1739480093f4SDimitry Andric   case TargetOpcode::G_BSWAP:
1740480093f4SDimitry Andric   case TargetOpcode::G_BITREVERSE: {
1741480093f4SDimitry Andric     if (SizeOp0 % NarrowSize != 0)
1742480093f4SDimitry Andric       return UnableToLegalize;
1743480093f4SDimitry Andric 
1744480093f4SDimitry Andric     Observer.changingInstr(MI);
1745480093f4SDimitry Andric     SmallVector<Register, 2> SrcRegs, DstRegs;
1746480093f4SDimitry Andric     unsigned NumParts = SizeOp0 / NarrowSize;
1747480093f4SDimitry Andric     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1748480093f4SDimitry Andric 
1749480093f4SDimitry Andric     for (unsigned i = 0; i < NumParts; ++i) {
1750480093f4SDimitry Andric       auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1751480093f4SDimitry Andric                                            {SrcRegs[NumParts - 1 - i]});
1752480093f4SDimitry Andric       DstRegs.push_back(DstPart.getReg(0));
1753480093f4SDimitry Andric     }
1754480093f4SDimitry Andric 
1755bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1756480093f4SDimitry Andric 
1757480093f4SDimitry Andric     Observer.changedInstr(MI);
1758480093f4SDimitry Andric     MI.eraseFromParent();
1759480093f4SDimitry Andric     return Legalized;
1760480093f4SDimitry Andric   }
1761e8d8bef9SDimitry Andric   case TargetOpcode::G_PTR_ADD:
17625ffd83dbSDimitry Andric   case TargetOpcode::G_PTRMASK: {
17635ffd83dbSDimitry Andric     if (TypeIdx != 1)
17645ffd83dbSDimitry Andric       return UnableToLegalize;
17655ffd83dbSDimitry Andric     Observer.changingInstr(MI);
17665ffd83dbSDimitry Andric     narrowScalarSrc(MI, NarrowTy, 2);
17675ffd83dbSDimitry Andric     Observer.changedInstr(MI);
17685ffd83dbSDimitry Andric     return Legalized;
17690b57cec5SDimitry Andric   }
177023408297SDimitry Andric   case TargetOpcode::G_FPTOUI:
177123408297SDimitry Andric   case TargetOpcode::G_FPTOSI:
177223408297SDimitry Andric     return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1773e8d8bef9SDimitry Andric   case TargetOpcode::G_FPEXT:
1774e8d8bef9SDimitry Andric     if (TypeIdx != 0)
1775e8d8bef9SDimitry Andric       return UnableToLegalize;
1776e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
1777e8d8bef9SDimitry Andric     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1778e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
1779e8d8bef9SDimitry Andric     return Legalized;
178006c3fb27SDimitry Andric   case TargetOpcode::G_FLDEXP:
178106c3fb27SDimitry Andric   case TargetOpcode::G_STRICT_FLDEXP:
178206c3fb27SDimitry Andric     return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
17830b57cec5SDimitry Andric   }
17845ffd83dbSDimitry Andric }
17855ffd83dbSDimitry Andric 
17865ffd83dbSDimitry Andric Register LegalizerHelper::coerceToScalar(Register Val) {
17875ffd83dbSDimitry Andric   LLT Ty = MRI.getType(Val);
17885ffd83dbSDimitry Andric   if (Ty.isScalar())
17895ffd83dbSDimitry Andric     return Val;
17905ffd83dbSDimitry Andric 
17915ffd83dbSDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
17925ffd83dbSDimitry Andric   LLT NewTy = LLT::scalar(Ty.getSizeInBits());
17935ffd83dbSDimitry Andric   if (Ty.isPointer()) {
17945ffd83dbSDimitry Andric     if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
17955ffd83dbSDimitry Andric       return Register();
17965ffd83dbSDimitry Andric     return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
17975ffd83dbSDimitry Andric   }
17985ffd83dbSDimitry Andric 
17995ffd83dbSDimitry Andric   Register NewVal = Val;
18005ffd83dbSDimitry Andric 
18015ffd83dbSDimitry Andric   assert(Ty.isVector());
18025ffd83dbSDimitry Andric   LLT EltTy = Ty.getElementType();
18035ffd83dbSDimitry Andric   if (EltTy.isPointer())
18045ffd83dbSDimitry Andric     NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
18055ffd83dbSDimitry Andric   return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
18065ffd83dbSDimitry Andric }
18070b57cec5SDimitry Andric 
18080b57cec5SDimitry Andric void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
18090b57cec5SDimitry Andric                                      unsigned OpIdx, unsigned ExtOpcode) {
18100b57cec5SDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
18115ffd83dbSDimitry Andric   auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
18125ffd83dbSDimitry Andric   MO.setReg(ExtB.getReg(0));
18130b57cec5SDimitry Andric }
18140b57cec5SDimitry Andric 
18150b57cec5SDimitry Andric void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
18160b57cec5SDimitry Andric                                       unsigned OpIdx) {
18170b57cec5SDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
18185ffd83dbSDimitry Andric   auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
18195ffd83dbSDimitry Andric   MO.setReg(ExtB.getReg(0));
18200b57cec5SDimitry Andric }
18210b57cec5SDimitry Andric 
18220b57cec5SDimitry Andric void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
18230b57cec5SDimitry Andric                                      unsigned OpIdx, unsigned TruncOpcode) {
18240b57cec5SDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
18250b57cec5SDimitry Andric   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
18260b57cec5SDimitry Andric   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
18275ffd83dbSDimitry Andric   MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
18280b57cec5SDimitry Andric   MO.setReg(DstExt);
18290b57cec5SDimitry Andric }
18300b57cec5SDimitry Andric 
18310b57cec5SDimitry Andric void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
18320b57cec5SDimitry Andric                                       unsigned OpIdx, unsigned ExtOpcode) {
18330b57cec5SDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
18340b57cec5SDimitry Andric   Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
18350b57cec5SDimitry Andric   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
18365ffd83dbSDimitry Andric   MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
18370b57cec5SDimitry Andric   MO.setReg(DstTrunc);
18380b57cec5SDimitry Andric }
18390b57cec5SDimitry Andric 
18400b57cec5SDimitry Andric void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
18410b57cec5SDimitry Andric                                             unsigned OpIdx) {
18420b57cec5SDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
18430b57cec5SDimitry Andric   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
18440eae32dcSDimitry Andric   Register Dst = MO.getReg();
18450eae32dcSDimitry Andric   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
18460eae32dcSDimitry Andric   MO.setReg(DstExt);
18470eae32dcSDimitry Andric   MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
18480b57cec5SDimitry Andric }
18490b57cec5SDimitry Andric 
18500b57cec5SDimitry Andric void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
18510b57cec5SDimitry Andric                                             unsigned OpIdx) {
18520b57cec5SDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
18530eae32dcSDimitry Andric   SmallVector<Register, 8> Regs;
18540eae32dcSDimitry Andric   MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
18550b57cec5SDimitry Andric }
18560b57cec5SDimitry Andric 
18575ffd83dbSDimitry Andric void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
18585ffd83dbSDimitry Andric   MachineOperand &Op = MI.getOperand(OpIdx);
18595ffd83dbSDimitry Andric   Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
18605ffd83dbSDimitry Andric }
18615ffd83dbSDimitry Andric 
18625ffd83dbSDimitry Andric void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
18635ffd83dbSDimitry Andric   MachineOperand &MO = MI.getOperand(OpIdx);
18645ffd83dbSDimitry Andric   Register CastDst = MRI.createGenericVirtualRegister(CastTy);
18655ffd83dbSDimitry Andric   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
18665ffd83dbSDimitry Andric   MIRBuilder.buildBitcast(MO, CastDst);
18675ffd83dbSDimitry Andric   MO.setReg(CastDst);
18685ffd83dbSDimitry Andric }
18695ffd83dbSDimitry Andric 
18700b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
18710b57cec5SDimitry Andric LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
18720b57cec5SDimitry Andric                                         LLT WideTy) {
18730b57cec5SDimitry Andric   if (TypeIdx != 1)
18740b57cec5SDimitry Andric     return UnableToLegalize;
18750b57cec5SDimitry Andric 
187606c3fb27SDimitry Andric   auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
18770b57cec5SDimitry Andric   if (DstTy.isVector())
18780b57cec5SDimitry Andric     return UnableToLegalize;
18790b57cec5SDimitry Andric 
188006c3fb27SDimitry Andric   LLT SrcTy = MRI.getType(Src1Reg);
18810b57cec5SDimitry Andric   const int DstSize = DstTy.getSizeInBits();
18820b57cec5SDimitry Andric   const int SrcSize = SrcTy.getSizeInBits();
18830b57cec5SDimitry Andric   const int WideSize = WideTy.getSizeInBits();
18840b57cec5SDimitry Andric   const int NumMerge = (DstSize + WideSize - 1) / WideSize;
18850b57cec5SDimitry Andric 
18860b57cec5SDimitry Andric   unsigned NumOps = MI.getNumOperands();
18870b57cec5SDimitry Andric   unsigned NumSrc = MI.getNumOperands() - 1;
18880b57cec5SDimitry Andric   unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
18890b57cec5SDimitry Andric 
18900b57cec5SDimitry Andric   if (WideSize >= DstSize) {
18910b57cec5SDimitry Andric     // Directly pack the bits in the target type.
189206c3fb27SDimitry Andric     Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
18930b57cec5SDimitry Andric 
18940b57cec5SDimitry Andric     for (unsigned I = 2; I != NumOps; ++I) {
18950b57cec5SDimitry Andric       const unsigned Offset = (I - 1) * PartSize;
18960b57cec5SDimitry Andric 
18970b57cec5SDimitry Andric       Register SrcReg = MI.getOperand(I).getReg();
18980b57cec5SDimitry Andric       assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
18990b57cec5SDimitry Andric 
19000b57cec5SDimitry Andric       auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
19010b57cec5SDimitry Andric 
19028bcb0991SDimitry Andric       Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
19030b57cec5SDimitry Andric         MRI.createGenericVirtualRegister(WideTy);
19040b57cec5SDimitry Andric 
19050b57cec5SDimitry Andric       auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
19060b57cec5SDimitry Andric       auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
19070b57cec5SDimitry Andric       MIRBuilder.buildOr(NextResult, ResultReg, Shl);
19080b57cec5SDimitry Andric       ResultReg = NextResult;
19090b57cec5SDimitry Andric     }
19100b57cec5SDimitry Andric 
19110b57cec5SDimitry Andric     if (WideSize > DstSize)
19120b57cec5SDimitry Andric       MIRBuilder.buildTrunc(DstReg, ResultReg);
19138bcb0991SDimitry Andric     else if (DstTy.isPointer())
19148bcb0991SDimitry Andric       MIRBuilder.buildIntToPtr(DstReg, ResultReg);
19150b57cec5SDimitry Andric 
19160b57cec5SDimitry Andric     MI.eraseFromParent();
19170b57cec5SDimitry Andric     return Legalized;
19180b57cec5SDimitry Andric   }
19190b57cec5SDimitry Andric 
19200b57cec5SDimitry Andric   // Unmerge the original values to the GCD type, and recombine to the next
19210b57cec5SDimitry Andric   // multiple greater than the original type.
19220b57cec5SDimitry Andric   //
19230b57cec5SDimitry Andric   // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
19240b57cec5SDimitry Andric   // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
19250b57cec5SDimitry Andric   // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
19260b57cec5SDimitry Andric   // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
19270b57cec5SDimitry Andric   // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
19280b57cec5SDimitry Andric   // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
19290b57cec5SDimitry Andric   // %12:_(s12) = G_MERGE_VALUES %10, %11
19300b57cec5SDimitry Andric   //
19310b57cec5SDimitry Andric   // Padding with undef if necessary:
19320b57cec5SDimitry Andric   //
19330b57cec5SDimitry Andric   // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
19340b57cec5SDimitry Andric   // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
19350b57cec5SDimitry Andric   // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
19360b57cec5SDimitry Andric   // %7:_(s2) = G_IMPLICIT_DEF
19370b57cec5SDimitry Andric   // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
19380b57cec5SDimitry Andric   // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
19390b57cec5SDimitry Andric   // %10:_(s12) = G_MERGE_VALUES %8, %9
19400b57cec5SDimitry Andric 
1941bdd1243dSDimitry Andric   const int GCD = std::gcd(SrcSize, WideSize);
19420b57cec5SDimitry Andric   LLT GCDTy = LLT::scalar(GCD);
19430b57cec5SDimitry Andric 
19440b57cec5SDimitry Andric   SmallVector<Register, 8> Parts;
19450b57cec5SDimitry Andric   SmallVector<Register, 8> NewMergeRegs;
19460b57cec5SDimitry Andric   SmallVector<Register, 8> Unmerges;
19470b57cec5SDimitry Andric   LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
19480b57cec5SDimitry Andric 
19490b57cec5SDimitry Andric   // Decompose the original operands if they don't evenly divide.
19504824e7fdSDimitry Andric   for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
19514824e7fdSDimitry Andric     Register SrcReg = MO.getReg();
19520b57cec5SDimitry Andric     if (GCD == SrcSize) {
19530b57cec5SDimitry Andric       Unmerges.push_back(SrcReg);
19540b57cec5SDimitry Andric     } else {
19550b57cec5SDimitry Andric       auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
19560b57cec5SDimitry Andric       for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
19570b57cec5SDimitry Andric         Unmerges.push_back(Unmerge.getReg(J));
19580b57cec5SDimitry Andric     }
19590b57cec5SDimitry Andric   }
19600b57cec5SDimitry Andric 
19610b57cec5SDimitry Andric   // Pad with undef to the next size that is a multiple of the requested size.
19620b57cec5SDimitry Andric   if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
19630b57cec5SDimitry Andric     Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
19640b57cec5SDimitry Andric     for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
19650b57cec5SDimitry Andric       Unmerges.push_back(UndefReg);
19660b57cec5SDimitry Andric   }
19670b57cec5SDimitry Andric 
19680b57cec5SDimitry Andric   const int PartsPerGCD = WideSize / GCD;
19690b57cec5SDimitry Andric 
19700b57cec5SDimitry Andric   // Build merges of each piece.
19710b57cec5SDimitry Andric   ArrayRef<Register> Slicer(Unmerges);
19720b57cec5SDimitry Andric   for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1973bdd1243dSDimitry Andric     auto Merge =
1974bdd1243dSDimitry Andric         MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
19750b57cec5SDimitry Andric     NewMergeRegs.push_back(Merge.getReg(0));
19760b57cec5SDimitry Andric   }
19770b57cec5SDimitry Andric 
19780b57cec5SDimitry Andric   // A truncate may be necessary if the requested type doesn't evenly divide the
19790b57cec5SDimitry Andric   // original result type.
19800b57cec5SDimitry Andric   if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1981bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
19820b57cec5SDimitry Andric   } else {
1983bdd1243dSDimitry Andric     auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
19840b57cec5SDimitry Andric     MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
19850b57cec5SDimitry Andric   }
19860b57cec5SDimitry Andric 
19870b57cec5SDimitry Andric   MI.eraseFromParent();
19880b57cec5SDimitry Andric   return Legalized;
19890b57cec5SDimitry Andric }
19900b57cec5SDimitry Andric 
19910b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
19920b57cec5SDimitry Andric LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
19930b57cec5SDimitry Andric                                           LLT WideTy) {
19940b57cec5SDimitry Andric   if (TypeIdx != 0)
19950b57cec5SDimitry Andric     return UnableToLegalize;
19960b57cec5SDimitry Andric 
19975ffd83dbSDimitry Andric   int NumDst = MI.getNumOperands() - 1;
19980b57cec5SDimitry Andric   Register SrcReg = MI.getOperand(NumDst).getReg();
19990b57cec5SDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
20005ffd83dbSDimitry Andric   if (SrcTy.isVector())
20010b57cec5SDimitry Andric     return UnableToLegalize;
20020b57cec5SDimitry Andric 
20030b57cec5SDimitry Andric   Register Dst0Reg = MI.getOperand(0).getReg();
20040b57cec5SDimitry Andric   LLT DstTy = MRI.getType(Dst0Reg);
20050b57cec5SDimitry Andric   if (!DstTy.isScalar())
20060b57cec5SDimitry Andric     return UnableToLegalize;
20070b57cec5SDimitry Andric 
20085ffd83dbSDimitry Andric   if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
20095ffd83dbSDimitry Andric     if (SrcTy.isPointer()) {
20105ffd83dbSDimitry Andric       const DataLayout &DL = MIRBuilder.getDataLayout();
20115ffd83dbSDimitry Andric       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
20125ffd83dbSDimitry Andric         LLVM_DEBUG(
20135ffd83dbSDimitry Andric             dbgs() << "Not casting non-integral address space integer\n");
20145ffd83dbSDimitry Andric         return UnableToLegalize;
20150b57cec5SDimitry Andric       }
20160b57cec5SDimitry Andric 
20175ffd83dbSDimitry Andric       SrcTy = LLT::scalar(SrcTy.getSizeInBits());
20185ffd83dbSDimitry Andric       SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
20195ffd83dbSDimitry Andric     }
20200b57cec5SDimitry Andric 
20215ffd83dbSDimitry Andric     // Widen SrcTy to WideTy. This does not affect the result, but since the
20225ffd83dbSDimitry Andric     // user requested this size, it is probably better handled than SrcTy and
202304eeddc0SDimitry Andric     // should reduce the total number of legalization artifacts.
20245ffd83dbSDimitry Andric     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
20255ffd83dbSDimitry Andric       SrcTy = WideTy;
20265ffd83dbSDimitry Andric       SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
20275ffd83dbSDimitry Andric     }
20280b57cec5SDimitry Andric 
20295ffd83dbSDimitry Andric     // Theres no unmerge type to target. Directly extract the bits from the
20305ffd83dbSDimitry Andric     // source type
20315ffd83dbSDimitry Andric     unsigned DstSize = DstTy.getSizeInBits();
20320b57cec5SDimitry Andric 
20335ffd83dbSDimitry Andric     MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
20345ffd83dbSDimitry Andric     for (int I = 1; I != NumDst; ++I) {
20355ffd83dbSDimitry Andric       auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
20365ffd83dbSDimitry Andric       auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
20375ffd83dbSDimitry Andric       MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
20385ffd83dbSDimitry Andric     }
20395ffd83dbSDimitry Andric 
20405ffd83dbSDimitry Andric     MI.eraseFromParent();
20415ffd83dbSDimitry Andric     return Legalized;
20425ffd83dbSDimitry Andric   }
20435ffd83dbSDimitry Andric 
20445ffd83dbSDimitry Andric   // Extend the source to a wider type.
20455ffd83dbSDimitry Andric   LLT LCMTy = getLCMType(SrcTy, WideTy);
20465ffd83dbSDimitry Andric 
20475ffd83dbSDimitry Andric   Register WideSrc = SrcReg;
20485ffd83dbSDimitry Andric   if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
20495ffd83dbSDimitry Andric     // TODO: If this is an integral address space, cast to integer and anyext.
20505ffd83dbSDimitry Andric     if (SrcTy.isPointer()) {
20515ffd83dbSDimitry Andric       LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
20525ffd83dbSDimitry Andric       return UnableToLegalize;
20535ffd83dbSDimitry Andric     }
20545ffd83dbSDimitry Andric 
20555ffd83dbSDimitry Andric     WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
20565ffd83dbSDimitry Andric   }
20575ffd83dbSDimitry Andric 
20585ffd83dbSDimitry Andric   auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
20595ffd83dbSDimitry Andric 
2060e8d8bef9SDimitry Andric   // Create a sequence of unmerges and merges to the original results. Since we
2061e8d8bef9SDimitry Andric   // may have widened the source, we will need to pad the results with dead defs
2062e8d8bef9SDimitry Andric   // to cover the source register.
2063e8d8bef9SDimitry Andric   // e.g. widen s48 to s64:
2064e8d8bef9SDimitry Andric   // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
20655ffd83dbSDimitry Andric   //
20665ffd83dbSDimitry Andric   // =>
2067e8d8bef9SDimitry Andric   //  %4:_(s192) = G_ANYEXT %0:_(s96)
2068e8d8bef9SDimitry Andric   //  %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2069e8d8bef9SDimitry Andric   //  ; unpack to GCD type, with extra dead defs
2070e8d8bef9SDimitry Andric   //  %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2071e8d8bef9SDimitry Andric   //  %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2072e8d8bef9SDimitry Andric   //  dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2073e8d8bef9SDimitry Andric   //  %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10   ; Remerge to destination
2074e8d8bef9SDimitry Andric   //  %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2075e8d8bef9SDimitry Andric   const LLT GCDTy = getGCDType(WideTy, DstTy);
20765ffd83dbSDimitry Andric   const int NumUnmerge = Unmerge->getNumOperands() - 1;
2077e8d8bef9SDimitry Andric   const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2078e8d8bef9SDimitry Andric 
2079e8d8bef9SDimitry Andric   // Directly unmerge to the destination without going through a GCD type
2080e8d8bef9SDimitry Andric   // if possible
2081e8d8bef9SDimitry Andric   if (PartsPerRemerge == 1) {
20825ffd83dbSDimitry Andric     const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
20835ffd83dbSDimitry Andric 
20845ffd83dbSDimitry Andric     for (int I = 0; I != NumUnmerge; ++I) {
20855ffd83dbSDimitry Andric       auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
20865ffd83dbSDimitry Andric 
20875ffd83dbSDimitry Andric       for (int J = 0; J != PartsPerUnmerge; ++J) {
20885ffd83dbSDimitry Andric         int Idx = I * PartsPerUnmerge + J;
20895ffd83dbSDimitry Andric         if (Idx < NumDst)
20905ffd83dbSDimitry Andric           MIB.addDef(MI.getOperand(Idx).getReg());
20915ffd83dbSDimitry Andric         else {
20925ffd83dbSDimitry Andric           // Create dead def for excess components.
20935ffd83dbSDimitry Andric           MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
20945ffd83dbSDimitry Andric         }
20955ffd83dbSDimitry Andric       }
20965ffd83dbSDimitry Andric 
20975ffd83dbSDimitry Andric       MIB.addUse(Unmerge.getReg(I));
20985ffd83dbSDimitry Andric     }
2099e8d8bef9SDimitry Andric   } else {
2100e8d8bef9SDimitry Andric     SmallVector<Register, 16> Parts;
2101e8d8bef9SDimitry Andric     for (int J = 0; J != NumUnmerge; ++J)
2102e8d8bef9SDimitry Andric       extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2103e8d8bef9SDimitry Andric 
2104e8d8bef9SDimitry Andric     SmallVector<Register, 8> RemergeParts;
2105e8d8bef9SDimitry Andric     for (int I = 0; I != NumDst; ++I) {
2106e8d8bef9SDimitry Andric       for (int J = 0; J < PartsPerRemerge; ++J) {
2107e8d8bef9SDimitry Andric         const int Idx = I * PartsPerRemerge + J;
2108e8d8bef9SDimitry Andric         RemergeParts.emplace_back(Parts[Idx]);
2109e8d8bef9SDimitry Andric       }
2110e8d8bef9SDimitry Andric 
2111bdd1243dSDimitry Andric       MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2112e8d8bef9SDimitry Andric       RemergeParts.clear();
2113e8d8bef9SDimitry Andric     }
2114e8d8bef9SDimitry Andric   }
21155ffd83dbSDimitry Andric 
21165ffd83dbSDimitry Andric   MI.eraseFromParent();
21170b57cec5SDimitry Andric   return Legalized;
21180b57cec5SDimitry Andric }
21190b57cec5SDimitry Andric 
21200b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
21210b57cec5SDimitry Andric LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
21220b57cec5SDimitry Andric                                     LLT WideTy) {
212306c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
21240b57cec5SDimitry Andric   unsigned Offset = MI.getOperand(2).getImm();
21250b57cec5SDimitry Andric 
21260b57cec5SDimitry Andric   if (TypeIdx == 0) {
21270b57cec5SDimitry Andric     if (SrcTy.isVector() || DstTy.isVector())
21280b57cec5SDimitry Andric       return UnableToLegalize;
21290b57cec5SDimitry Andric 
21300b57cec5SDimitry Andric     SrcOp Src(SrcReg);
21310b57cec5SDimitry Andric     if (SrcTy.isPointer()) {
21320b57cec5SDimitry Andric       // Extracts from pointers can be handled only if they are really just
21330b57cec5SDimitry Andric       // simple integers.
21340b57cec5SDimitry Andric       const DataLayout &DL = MIRBuilder.getDataLayout();
21350b57cec5SDimitry Andric       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
21360b57cec5SDimitry Andric         return UnableToLegalize;
21370b57cec5SDimitry Andric 
21380b57cec5SDimitry Andric       LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
21390b57cec5SDimitry Andric       Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
21400b57cec5SDimitry Andric       SrcTy = SrcAsIntTy;
21410b57cec5SDimitry Andric     }
21420b57cec5SDimitry Andric 
21430b57cec5SDimitry Andric     if (DstTy.isPointer())
21440b57cec5SDimitry Andric       return UnableToLegalize;
21450b57cec5SDimitry Andric 
21460b57cec5SDimitry Andric     if (Offset == 0) {
21470b57cec5SDimitry Andric       // Avoid a shift in the degenerate case.
21480b57cec5SDimitry Andric       MIRBuilder.buildTrunc(DstReg,
21490b57cec5SDimitry Andric                             MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
21500b57cec5SDimitry Andric       MI.eraseFromParent();
21510b57cec5SDimitry Andric       return Legalized;
21520b57cec5SDimitry Andric     }
21530b57cec5SDimitry Andric 
21540b57cec5SDimitry Andric     // Do a shift in the source type.
21550b57cec5SDimitry Andric     LLT ShiftTy = SrcTy;
21560b57cec5SDimitry Andric     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
21570b57cec5SDimitry Andric       Src = MIRBuilder.buildAnyExt(WideTy, Src);
21580b57cec5SDimitry Andric       ShiftTy = WideTy;
2159e8d8bef9SDimitry Andric     }
21600b57cec5SDimitry Andric 
21610b57cec5SDimitry Andric     auto LShr = MIRBuilder.buildLShr(
21620b57cec5SDimitry Andric       ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
21630b57cec5SDimitry Andric     MIRBuilder.buildTrunc(DstReg, LShr);
21640b57cec5SDimitry Andric     MI.eraseFromParent();
21650b57cec5SDimitry Andric     return Legalized;
21660b57cec5SDimitry Andric   }
21670b57cec5SDimitry Andric 
21680b57cec5SDimitry Andric   if (SrcTy.isScalar()) {
21690b57cec5SDimitry Andric     Observer.changingInstr(MI);
21700b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
21710b57cec5SDimitry Andric     Observer.changedInstr(MI);
21720b57cec5SDimitry Andric     return Legalized;
21730b57cec5SDimitry Andric   }
21740b57cec5SDimitry Andric 
21750b57cec5SDimitry Andric   if (!SrcTy.isVector())
21760b57cec5SDimitry Andric     return UnableToLegalize;
21770b57cec5SDimitry Andric 
21780b57cec5SDimitry Andric   if (DstTy != SrcTy.getElementType())
21790b57cec5SDimitry Andric     return UnableToLegalize;
21800b57cec5SDimitry Andric 
21810b57cec5SDimitry Andric   if (Offset % SrcTy.getScalarSizeInBits() != 0)
21820b57cec5SDimitry Andric     return UnableToLegalize;
21830b57cec5SDimitry Andric 
21840b57cec5SDimitry Andric   Observer.changingInstr(MI);
21850b57cec5SDimitry Andric   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
21860b57cec5SDimitry Andric 
21870b57cec5SDimitry Andric   MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
21880b57cec5SDimitry Andric                           Offset);
21890b57cec5SDimitry Andric   widenScalarDst(MI, WideTy.getScalarType(), 0);
21900b57cec5SDimitry Andric   Observer.changedInstr(MI);
21910b57cec5SDimitry Andric   return Legalized;
21920b57cec5SDimitry Andric }
21930b57cec5SDimitry Andric 
21940b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
21950b57cec5SDimitry Andric LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
21960b57cec5SDimitry Andric                                    LLT WideTy) {
2197e8d8bef9SDimitry Andric   if (TypeIdx != 0 || WideTy.isVector())
21980b57cec5SDimitry Andric     return UnableToLegalize;
21990b57cec5SDimitry Andric   Observer.changingInstr(MI);
22000b57cec5SDimitry Andric   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
22010b57cec5SDimitry Andric   widenScalarDst(MI, WideTy);
22020b57cec5SDimitry Andric   Observer.changedInstr(MI);
22030b57cec5SDimitry Andric   return Legalized;
22040b57cec5SDimitry Andric }
22050b57cec5SDimitry Andric 
22060b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
2207fe6060f1SDimitry Andric LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2208e8d8bef9SDimitry Andric                                            LLT WideTy) {
2209fe6060f1SDimitry Andric   unsigned Opcode;
2210fe6060f1SDimitry Andric   unsigned ExtOpcode;
2211bdd1243dSDimitry Andric   std::optional<Register> CarryIn;
2212fe6060f1SDimitry Andric   switch (MI.getOpcode()) {
2213fe6060f1SDimitry Andric   default:
2214fe6060f1SDimitry Andric     llvm_unreachable("Unexpected opcode!");
2215fe6060f1SDimitry Andric   case TargetOpcode::G_SADDO:
2216fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_ADD;
2217fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_SEXT;
2218fe6060f1SDimitry Andric     break;
2219fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBO:
2220fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_SUB;
2221fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_SEXT;
2222fe6060f1SDimitry Andric     break;
2223fe6060f1SDimitry Andric   case TargetOpcode::G_UADDO:
2224fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_ADD;
2225fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_ZEXT;
2226fe6060f1SDimitry Andric     break;
2227fe6060f1SDimitry Andric   case TargetOpcode::G_USUBO:
2228fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_SUB;
2229fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_ZEXT;
2230fe6060f1SDimitry Andric     break;
2231fe6060f1SDimitry Andric   case TargetOpcode::G_SADDE:
2232fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_UADDE;
2233fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_SEXT;
2234fe6060f1SDimitry Andric     CarryIn = MI.getOperand(4).getReg();
2235fe6060f1SDimitry Andric     break;
2236fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBE:
2237fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_USUBE;
2238fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_SEXT;
2239fe6060f1SDimitry Andric     CarryIn = MI.getOperand(4).getReg();
2240fe6060f1SDimitry Andric     break;
2241fe6060f1SDimitry Andric   case TargetOpcode::G_UADDE:
2242fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_UADDE;
2243fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_ZEXT;
2244fe6060f1SDimitry Andric     CarryIn = MI.getOperand(4).getReg();
2245fe6060f1SDimitry Andric     break;
2246fe6060f1SDimitry Andric   case TargetOpcode::G_USUBE:
2247fe6060f1SDimitry Andric     Opcode = TargetOpcode::G_USUBE;
2248fe6060f1SDimitry Andric     ExtOpcode = TargetOpcode::G_ZEXT;
2249fe6060f1SDimitry Andric     CarryIn = MI.getOperand(4).getReg();
2250fe6060f1SDimitry Andric     break;
2251fe6060f1SDimitry Andric   }
2252fe6060f1SDimitry Andric 
225381ad6265SDimitry Andric   if (TypeIdx == 1) {
225481ad6265SDimitry Andric     unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
225581ad6265SDimitry Andric 
225681ad6265SDimitry Andric     Observer.changingInstr(MI);
225781ad6265SDimitry Andric     if (CarryIn)
225881ad6265SDimitry Andric       widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2259bdd1243dSDimitry Andric     widenScalarDst(MI, WideTy, 1);
226081ad6265SDimitry Andric 
226181ad6265SDimitry Andric     Observer.changedInstr(MI);
226281ad6265SDimitry Andric     return Legalized;
226381ad6265SDimitry Andric   }
226481ad6265SDimitry Andric 
2265e8d8bef9SDimitry Andric   auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2266e8d8bef9SDimitry Andric   auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2267e8d8bef9SDimitry Andric   // Do the arithmetic in the larger type.
2268fe6060f1SDimitry Andric   Register NewOp;
2269fe6060f1SDimitry Andric   if (CarryIn) {
2270fe6060f1SDimitry Andric     LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2271fe6060f1SDimitry Andric     NewOp = MIRBuilder
2272fe6060f1SDimitry Andric                 .buildInstr(Opcode, {WideTy, CarryOutTy},
2273fe6060f1SDimitry Andric                             {LHSExt, RHSExt, *CarryIn})
2274fe6060f1SDimitry Andric                 .getReg(0);
2275fe6060f1SDimitry Andric   } else {
2276fe6060f1SDimitry Andric     NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2277fe6060f1SDimitry Andric   }
2278e8d8bef9SDimitry Andric   LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2279e8d8bef9SDimitry Andric   auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2280e8d8bef9SDimitry Andric   auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2281e8d8bef9SDimitry Andric   // There is no overflow if the ExtOp is the same as NewOp.
2282e8d8bef9SDimitry Andric   MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2283e8d8bef9SDimitry Andric   // Now trunc the NewOp to the original result.
2284e8d8bef9SDimitry Andric   MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2285e8d8bef9SDimitry Andric   MI.eraseFromParent();
2286e8d8bef9SDimitry Andric   return Legalized;
2287e8d8bef9SDimitry Andric }
2288e8d8bef9SDimitry Andric 
2289e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
2290e8d8bef9SDimitry Andric LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
22915ffd83dbSDimitry Andric                                          LLT WideTy) {
22925ffd83dbSDimitry Andric   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2293e8d8bef9SDimitry Andric                   MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2294e8d8bef9SDimitry Andric                   MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2295e8d8bef9SDimitry Andric   bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2296e8d8bef9SDimitry Andric                  MI.getOpcode() == TargetOpcode::G_USHLSAT;
22975ffd83dbSDimitry Andric   // We can convert this to:
22985ffd83dbSDimitry Andric   //   1. Any extend iN to iM
22995ffd83dbSDimitry Andric   //   2. SHL by M-N
2300e8d8bef9SDimitry Andric   //   3. [US][ADD|SUB|SHL]SAT
23015ffd83dbSDimitry Andric   //   4. L/ASHR by M-N
23025ffd83dbSDimitry Andric   //
23035ffd83dbSDimitry Andric   // It may be more efficient to lower this to a min and a max operation in
23045ffd83dbSDimitry Andric   // the higher precision arithmetic if the promoted operation isn't legal,
23055ffd83dbSDimitry Andric   // but this decision is up to the target's lowering request.
23065ffd83dbSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
23070b57cec5SDimitry Andric 
23085ffd83dbSDimitry Andric   unsigned NewBits = WideTy.getScalarSizeInBits();
23095ffd83dbSDimitry Andric   unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
23105ffd83dbSDimitry Andric 
2311e8d8bef9SDimitry Andric   // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2312e8d8bef9SDimitry Andric   // must not left shift the RHS to preserve the shift amount.
23135ffd83dbSDimitry Andric   auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2314e8d8bef9SDimitry Andric   auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2315e8d8bef9SDimitry Andric                      : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
23165ffd83dbSDimitry Andric   auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
23175ffd83dbSDimitry Andric   auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2318e8d8bef9SDimitry Andric   auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
23195ffd83dbSDimitry Andric 
23205ffd83dbSDimitry Andric   auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
23215ffd83dbSDimitry Andric                                         {ShiftL, ShiftR}, MI.getFlags());
23225ffd83dbSDimitry Andric 
23235ffd83dbSDimitry Andric   // Use a shift that will preserve the number of sign bits when the trunc is
23245ffd83dbSDimitry Andric   // folded away.
23255ffd83dbSDimitry Andric   auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
23265ffd83dbSDimitry Andric                          : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
23275ffd83dbSDimitry Andric 
23285ffd83dbSDimitry Andric   MIRBuilder.buildTrunc(DstReg, Result);
23295ffd83dbSDimitry Andric   MI.eraseFromParent();
23305ffd83dbSDimitry Andric   return Legalized;
23315ffd83dbSDimitry Andric }
23325ffd83dbSDimitry Andric 
23335ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
2334fe6060f1SDimitry Andric LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2335fe6060f1SDimitry Andric                                  LLT WideTy) {
233681ad6265SDimitry Andric   if (TypeIdx == 1) {
233781ad6265SDimitry Andric     Observer.changingInstr(MI);
233881ad6265SDimitry Andric     widenScalarDst(MI, WideTy, 1);
233981ad6265SDimitry Andric     Observer.changedInstr(MI);
234081ad6265SDimitry Andric     return Legalized;
234181ad6265SDimitry Andric   }
2342fe6060f1SDimitry Andric 
2343fe6060f1SDimitry Andric   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
234406c3fb27SDimitry Andric   auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2345fe6060f1SDimitry Andric   LLT SrcTy = MRI.getType(LHS);
2346fe6060f1SDimitry Andric   LLT OverflowTy = MRI.getType(OriginalOverflow);
2347fe6060f1SDimitry Andric   unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2348fe6060f1SDimitry Andric 
2349fe6060f1SDimitry Andric   // To determine if the result overflowed in the larger type, we extend the
2350fe6060f1SDimitry Andric   // input to the larger type, do the multiply (checking if it overflows),
2351fe6060f1SDimitry Andric   // then also check the high bits of the result to see if overflow happened
2352fe6060f1SDimitry Andric   // there.
2353fe6060f1SDimitry Andric   unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2354fe6060f1SDimitry Andric   auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2355fe6060f1SDimitry Andric   auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2356fe6060f1SDimitry Andric 
23575f757f3fSDimitry Andric   // Multiplication cannot overflow if the WideTy is >= 2 * original width,
23585f757f3fSDimitry Andric   // so we don't need to check the overflow result of larger type Mulo.
23595f757f3fSDimitry Andric   bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
23605f757f3fSDimitry Andric 
23615f757f3fSDimitry Andric   unsigned MulOpc =
23625f757f3fSDimitry Andric       WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
23635f757f3fSDimitry Andric 
23645f757f3fSDimitry Andric   MachineInstrBuilder Mulo;
23655f757f3fSDimitry Andric   if (WideMulCanOverflow)
23665f757f3fSDimitry Andric     Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2367fe6060f1SDimitry Andric                                  {LeftOperand, RightOperand});
23685f757f3fSDimitry Andric   else
23695f757f3fSDimitry Andric     Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
23705f757f3fSDimitry Andric 
2371fe6060f1SDimitry Andric   auto Mul = Mulo->getOperand(0);
2372fe6060f1SDimitry Andric   MIRBuilder.buildTrunc(Result, Mul);
2373fe6060f1SDimitry Andric 
2374fe6060f1SDimitry Andric   MachineInstrBuilder ExtResult;
2375fe6060f1SDimitry Andric   // Overflow occurred if it occurred in the larger type, or if the high part
2376fe6060f1SDimitry Andric   // of the result does not zero/sign-extend the low part.  Check this second
2377fe6060f1SDimitry Andric   // possibility first.
2378fe6060f1SDimitry Andric   if (IsSigned) {
2379fe6060f1SDimitry Andric     // For signed, overflow occurred when the high part does not sign-extend
2380fe6060f1SDimitry Andric     // the low part.
2381fe6060f1SDimitry Andric     ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2382fe6060f1SDimitry Andric   } else {
2383fe6060f1SDimitry Andric     // Unsigned overflow occurred when the high part does not zero-extend the
2384fe6060f1SDimitry Andric     // low part.
2385fe6060f1SDimitry Andric     ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2386fe6060f1SDimitry Andric   }
2387fe6060f1SDimitry Andric 
23885f757f3fSDimitry Andric   if (WideMulCanOverflow) {
2389fe6060f1SDimitry Andric     auto Overflow =
2390fe6060f1SDimitry Andric         MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2391fe6060f1SDimitry Andric     // Finally check if the multiplication in the larger type itself overflowed.
2392fe6060f1SDimitry Andric     MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2393fe6060f1SDimitry Andric   } else {
2394fe6060f1SDimitry Andric     MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2395fe6060f1SDimitry Andric   }
2396fe6060f1SDimitry Andric   MI.eraseFromParent();
2397fe6060f1SDimitry Andric   return Legalized;
2398fe6060f1SDimitry Andric }
2399fe6060f1SDimitry Andric 
2400fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
24015ffd83dbSDimitry Andric LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
24020b57cec5SDimitry Andric   switch (MI.getOpcode()) {
24030b57cec5SDimitry Andric   default:
24040b57cec5SDimitry Andric     return UnableToLegalize;
2405fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XCHG:
2406fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_ADD:
2407fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_SUB:
2408fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_AND:
2409fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_OR:
2410fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XOR:
2411fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_MIN:
2412fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_MAX:
2413fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_UMIN:
2414fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_UMAX:
2415fe6060f1SDimitry Andric     assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2416fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2417fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2418fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy, 0);
2419fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2420fe6060f1SDimitry Andric     return Legalized;
2421fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG:
2422fe6060f1SDimitry Andric     assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2423fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2424fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2425fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2426fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy, 0);
2427fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2428fe6060f1SDimitry Andric     return Legalized;
2429fe6060f1SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2430fe6060f1SDimitry Andric     if (TypeIdx == 0) {
2431fe6060f1SDimitry Andric       Observer.changingInstr(MI);
2432fe6060f1SDimitry Andric       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2433fe6060f1SDimitry Andric       widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2434fe6060f1SDimitry Andric       widenScalarDst(MI, WideTy, 0);
2435fe6060f1SDimitry Andric       Observer.changedInstr(MI);
2436fe6060f1SDimitry Andric       return Legalized;
2437fe6060f1SDimitry Andric     }
2438fe6060f1SDimitry Andric     assert(TypeIdx == 1 &&
2439fe6060f1SDimitry Andric            "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2440fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2441fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy, 1);
2442fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2443fe6060f1SDimitry Andric     return Legalized;
24440b57cec5SDimitry Andric   case TargetOpcode::G_EXTRACT:
24450b57cec5SDimitry Andric     return widenScalarExtract(MI, TypeIdx, WideTy);
24460b57cec5SDimitry Andric   case TargetOpcode::G_INSERT:
24470b57cec5SDimitry Andric     return widenScalarInsert(MI, TypeIdx, WideTy);
24480b57cec5SDimitry Andric   case TargetOpcode::G_MERGE_VALUES:
24490b57cec5SDimitry Andric     return widenScalarMergeValues(MI, TypeIdx, WideTy);
24500b57cec5SDimitry Andric   case TargetOpcode::G_UNMERGE_VALUES:
24510b57cec5SDimitry Andric     return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2452e8d8bef9SDimitry Andric   case TargetOpcode::G_SADDO:
2453e8d8bef9SDimitry Andric   case TargetOpcode::G_SSUBO:
24540b57cec5SDimitry Andric   case TargetOpcode::G_UADDO:
2455e8d8bef9SDimitry Andric   case TargetOpcode::G_USUBO:
2456fe6060f1SDimitry Andric   case TargetOpcode::G_SADDE:
2457fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBE:
2458fe6060f1SDimitry Andric   case TargetOpcode::G_UADDE:
2459fe6060f1SDimitry Andric   case TargetOpcode::G_USUBE:
2460fe6060f1SDimitry Andric     return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2461fe6060f1SDimitry Andric   case TargetOpcode::G_UMULO:
2462fe6060f1SDimitry Andric   case TargetOpcode::G_SMULO:
2463fe6060f1SDimitry Andric     return widenScalarMulo(MI, TypeIdx, WideTy);
24645ffd83dbSDimitry Andric   case TargetOpcode::G_SADDSAT:
24655ffd83dbSDimitry Andric   case TargetOpcode::G_SSUBSAT:
2466e8d8bef9SDimitry Andric   case TargetOpcode::G_SSHLSAT:
24675ffd83dbSDimitry Andric   case TargetOpcode::G_UADDSAT:
24685ffd83dbSDimitry Andric   case TargetOpcode::G_USUBSAT:
2469e8d8bef9SDimitry Andric   case TargetOpcode::G_USHLSAT:
2470e8d8bef9SDimitry Andric     return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
24710b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ:
24720b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
24730b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ:
24740b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
24750b57cec5SDimitry Andric   case TargetOpcode::G_CTPOP: {
24760b57cec5SDimitry Andric     if (TypeIdx == 0) {
24770b57cec5SDimitry Andric       Observer.changingInstr(MI);
24780b57cec5SDimitry Andric       widenScalarDst(MI, WideTy, 0);
24790b57cec5SDimitry Andric       Observer.changedInstr(MI);
24800b57cec5SDimitry Andric       return Legalized;
24810b57cec5SDimitry Andric     }
24820b57cec5SDimitry Andric 
24830b57cec5SDimitry Andric     Register SrcReg = MI.getOperand(1).getReg();
24840b57cec5SDimitry Andric 
2485349cc55cSDimitry Andric     // First extend the input.
2486349cc55cSDimitry Andric     unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
2487349cc55cSDimitry Andric                               MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2488349cc55cSDimitry Andric                           ? TargetOpcode::G_ANYEXT
2489349cc55cSDimitry Andric                           : TargetOpcode::G_ZEXT;
2490349cc55cSDimitry Andric     auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
24910b57cec5SDimitry Andric     LLT CurTy = MRI.getType(SrcReg);
2492349cc55cSDimitry Andric     unsigned NewOpc = MI.getOpcode();
2493349cc55cSDimitry Andric     if (NewOpc == TargetOpcode::G_CTTZ) {
24940b57cec5SDimitry Andric       // The count is the same in the larger type except if the original
24950b57cec5SDimitry Andric       // value was zero.  This can be handled by setting the bit just off
24960b57cec5SDimitry Andric       // the top of the original type.
24970b57cec5SDimitry Andric       auto TopBit =
24980b57cec5SDimitry Andric           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
24990b57cec5SDimitry Andric       MIBSrc = MIRBuilder.buildOr(
25000b57cec5SDimitry Andric         WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2501349cc55cSDimitry Andric       // Now we know the operand is non-zero, use the more relaxed opcode.
2502349cc55cSDimitry Andric       NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
25030b57cec5SDimitry Andric     }
25040b57cec5SDimitry Andric 
25050b57cec5SDimitry Andric     // Perform the operation at the larger size.
2506349cc55cSDimitry Andric     auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
25070b57cec5SDimitry Andric     // This is already the correct result for CTPOP and CTTZs
25080b57cec5SDimitry Andric     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
25090b57cec5SDimitry Andric         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
25100b57cec5SDimitry Andric       // The correct result is NewOp - (Difference in widety and current ty).
25110b57cec5SDimitry Andric       unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
25125ffd83dbSDimitry Andric       MIBNewOp = MIRBuilder.buildSub(
25135ffd83dbSDimitry Andric           WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
25140b57cec5SDimitry Andric     }
25150b57cec5SDimitry Andric 
25160b57cec5SDimitry Andric     MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
25170b57cec5SDimitry Andric     MI.eraseFromParent();
25180b57cec5SDimitry Andric     return Legalized;
25190b57cec5SDimitry Andric   }
25200b57cec5SDimitry Andric   case TargetOpcode::G_BSWAP: {
25210b57cec5SDimitry Andric     Observer.changingInstr(MI);
25220b57cec5SDimitry Andric     Register DstReg = MI.getOperand(0).getReg();
25230b57cec5SDimitry Andric 
25240b57cec5SDimitry Andric     Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
25250b57cec5SDimitry Andric     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
25260b57cec5SDimitry Andric     Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
25270b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
25280b57cec5SDimitry Andric 
25290b57cec5SDimitry Andric     MI.getOperand(0).setReg(DstExt);
25300b57cec5SDimitry Andric 
25310b57cec5SDimitry Andric     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
25320b57cec5SDimitry Andric 
25330b57cec5SDimitry Andric     LLT Ty = MRI.getType(DstReg);
25340b57cec5SDimitry Andric     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
25350b57cec5SDimitry Andric     MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
25365ffd83dbSDimitry Andric     MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
25370b57cec5SDimitry Andric 
25380b57cec5SDimitry Andric     MIRBuilder.buildTrunc(DstReg, ShrReg);
25390b57cec5SDimitry Andric     Observer.changedInstr(MI);
25400b57cec5SDimitry Andric     return Legalized;
25410b57cec5SDimitry Andric   }
25428bcb0991SDimitry Andric   case TargetOpcode::G_BITREVERSE: {
25438bcb0991SDimitry Andric     Observer.changingInstr(MI);
25448bcb0991SDimitry Andric 
25458bcb0991SDimitry Andric     Register DstReg = MI.getOperand(0).getReg();
25468bcb0991SDimitry Andric     LLT Ty = MRI.getType(DstReg);
25478bcb0991SDimitry Andric     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
25488bcb0991SDimitry Andric 
25498bcb0991SDimitry Andric     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
25508bcb0991SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
25518bcb0991SDimitry Andric     MI.getOperand(0).setReg(DstExt);
25528bcb0991SDimitry Andric     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
25538bcb0991SDimitry Andric 
25548bcb0991SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
25558bcb0991SDimitry Andric     auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
25568bcb0991SDimitry Andric     MIRBuilder.buildTrunc(DstReg, Shift);
25578bcb0991SDimitry Andric     Observer.changedInstr(MI);
25588bcb0991SDimitry Andric     return Legalized;
25598bcb0991SDimitry Andric   }
25605ffd83dbSDimitry Andric   case TargetOpcode::G_FREEZE:
25615ffd83dbSDimitry Andric     Observer.changingInstr(MI);
25625ffd83dbSDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
25635ffd83dbSDimitry Andric     widenScalarDst(MI, WideTy);
25645ffd83dbSDimitry Andric     Observer.changedInstr(MI);
25655ffd83dbSDimitry Andric     return Legalized;
25665ffd83dbSDimitry Andric 
2567fe6060f1SDimitry Andric   case TargetOpcode::G_ABS:
2568fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2569fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2570fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy);
2571fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2572fe6060f1SDimitry Andric     return Legalized;
2573fe6060f1SDimitry Andric 
25740b57cec5SDimitry Andric   case TargetOpcode::G_ADD:
25750b57cec5SDimitry Andric   case TargetOpcode::G_AND:
25760b57cec5SDimitry Andric   case TargetOpcode::G_MUL:
25770b57cec5SDimitry Andric   case TargetOpcode::G_OR:
25780b57cec5SDimitry Andric   case TargetOpcode::G_XOR:
25790b57cec5SDimitry Andric   case TargetOpcode::G_SUB:
25800b57cec5SDimitry Andric     // Perform operation at larger width (any extension is fines here, high bits
25810b57cec5SDimitry Andric     // don't affect the result) and then truncate the result back to the
25820b57cec5SDimitry Andric     // original type.
25830b57cec5SDimitry Andric     Observer.changingInstr(MI);
25840b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
25850b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
25860b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
25870b57cec5SDimitry Andric     Observer.changedInstr(MI);
25880b57cec5SDimitry Andric     return Legalized;
25890b57cec5SDimitry Andric 
2590fe6060f1SDimitry Andric   case TargetOpcode::G_SBFX:
2591fe6060f1SDimitry Andric   case TargetOpcode::G_UBFX:
2592fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2593fe6060f1SDimitry Andric 
2594fe6060f1SDimitry Andric     if (TypeIdx == 0) {
2595fe6060f1SDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2596fe6060f1SDimitry Andric       widenScalarDst(MI, WideTy);
2597fe6060f1SDimitry Andric     } else {
2598fe6060f1SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2599fe6060f1SDimitry Andric       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2600fe6060f1SDimitry Andric     }
2601fe6060f1SDimitry Andric 
2602fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2603fe6060f1SDimitry Andric     return Legalized;
2604fe6060f1SDimitry Andric 
26050b57cec5SDimitry Andric   case TargetOpcode::G_SHL:
26060b57cec5SDimitry Andric     Observer.changingInstr(MI);
26070b57cec5SDimitry Andric 
26080b57cec5SDimitry Andric     if (TypeIdx == 0) {
26090b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
26100b57cec5SDimitry Andric       widenScalarDst(MI, WideTy);
26110b57cec5SDimitry Andric     } else {
26120b57cec5SDimitry Andric       assert(TypeIdx == 1);
26130b57cec5SDimitry Andric       // The "number of bits to shift" operand must preserve its value as an
26140b57cec5SDimitry Andric       // unsigned integer:
26150b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
26160b57cec5SDimitry Andric     }
26170b57cec5SDimitry Andric 
26180b57cec5SDimitry Andric     Observer.changedInstr(MI);
26190b57cec5SDimitry Andric     return Legalized;
26200b57cec5SDimitry Andric 
26215f757f3fSDimitry Andric   case TargetOpcode::G_ROTR:
26225f757f3fSDimitry Andric   case TargetOpcode::G_ROTL:
26235f757f3fSDimitry Andric     if (TypeIdx != 1)
26245f757f3fSDimitry Andric       return UnableToLegalize;
26255f757f3fSDimitry Andric 
26265f757f3fSDimitry Andric     Observer.changingInstr(MI);
26275f757f3fSDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
26285f757f3fSDimitry Andric     Observer.changedInstr(MI);
26295f757f3fSDimitry Andric     return Legalized;
26305f757f3fSDimitry Andric 
26310b57cec5SDimitry Andric   case TargetOpcode::G_SDIV:
26320b57cec5SDimitry Andric   case TargetOpcode::G_SREM:
26330b57cec5SDimitry Andric   case TargetOpcode::G_SMIN:
26340b57cec5SDimitry Andric   case TargetOpcode::G_SMAX:
26350b57cec5SDimitry Andric     Observer.changingInstr(MI);
26360b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
26370b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
26380b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
26390b57cec5SDimitry Andric     Observer.changedInstr(MI);
26400b57cec5SDimitry Andric     return Legalized;
26410b57cec5SDimitry Andric 
2642fe6060f1SDimitry Andric   case TargetOpcode::G_SDIVREM:
2643fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2644fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2645fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2646fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy);
2647fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy, 1);
2648fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2649fe6060f1SDimitry Andric     return Legalized;
2650fe6060f1SDimitry Andric 
26510b57cec5SDimitry Andric   case TargetOpcode::G_ASHR:
26520b57cec5SDimitry Andric   case TargetOpcode::G_LSHR:
26530b57cec5SDimitry Andric     Observer.changingInstr(MI);
26540b57cec5SDimitry Andric 
26550b57cec5SDimitry Andric     if (TypeIdx == 0) {
26560b57cec5SDimitry Andric       unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
26570b57cec5SDimitry Andric         TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
26580b57cec5SDimitry Andric 
26590b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 1, CvtOp);
26600b57cec5SDimitry Andric       widenScalarDst(MI, WideTy);
26610b57cec5SDimitry Andric     } else {
26620b57cec5SDimitry Andric       assert(TypeIdx == 1);
26630b57cec5SDimitry Andric       // The "number of bits to shift" operand must preserve its value as an
26640b57cec5SDimitry Andric       // unsigned integer:
26650b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
26660b57cec5SDimitry Andric     }
26670b57cec5SDimitry Andric 
26680b57cec5SDimitry Andric     Observer.changedInstr(MI);
26690b57cec5SDimitry Andric     return Legalized;
26700b57cec5SDimitry Andric   case TargetOpcode::G_UDIV:
26710b57cec5SDimitry Andric   case TargetOpcode::G_UREM:
26720b57cec5SDimitry Andric   case TargetOpcode::G_UMIN:
26730b57cec5SDimitry Andric   case TargetOpcode::G_UMAX:
26740b57cec5SDimitry Andric     Observer.changingInstr(MI);
26750b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
26760b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
26770b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
26780b57cec5SDimitry Andric     Observer.changedInstr(MI);
26790b57cec5SDimitry Andric     return Legalized;
26800b57cec5SDimitry Andric 
2681fe6060f1SDimitry Andric   case TargetOpcode::G_UDIVREM:
2682fe6060f1SDimitry Andric     Observer.changingInstr(MI);
2683fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2684fe6060f1SDimitry Andric     widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2685fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy);
2686fe6060f1SDimitry Andric     widenScalarDst(MI, WideTy, 1);
2687fe6060f1SDimitry Andric     Observer.changedInstr(MI);
2688fe6060f1SDimitry Andric     return Legalized;
2689fe6060f1SDimitry Andric 
26900b57cec5SDimitry Andric   case TargetOpcode::G_SELECT:
26910b57cec5SDimitry Andric     Observer.changingInstr(MI);
26920b57cec5SDimitry Andric     if (TypeIdx == 0) {
26930b57cec5SDimitry Andric       // Perform operation at larger width (any extension is fine here, high
26940b57cec5SDimitry Andric       // bits don't affect the result) and then truncate the result back to the
26950b57cec5SDimitry Andric       // original type.
26960b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
26970b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
26980b57cec5SDimitry Andric       widenScalarDst(MI, WideTy);
26990b57cec5SDimitry Andric     } else {
27000b57cec5SDimitry Andric       bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
27010b57cec5SDimitry Andric       // Explicit extension is required here since high bits affect the result.
27020b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
27030b57cec5SDimitry Andric     }
27040b57cec5SDimitry Andric     Observer.changedInstr(MI);
27050b57cec5SDimitry Andric     return Legalized;
27060b57cec5SDimitry Andric 
27070b57cec5SDimitry Andric   case TargetOpcode::G_FPTOSI:
27080b57cec5SDimitry Andric   case TargetOpcode::G_FPTOUI:
27095f757f3fSDimitry Andric   case TargetOpcode::G_IS_FPCLASS:
27100b57cec5SDimitry Andric     Observer.changingInstr(MI);
27118bcb0991SDimitry Andric 
27128bcb0991SDimitry Andric     if (TypeIdx == 0)
27130b57cec5SDimitry Andric       widenScalarDst(MI, WideTy);
27148bcb0991SDimitry Andric     else
27158bcb0991SDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
27168bcb0991SDimitry Andric 
27170b57cec5SDimitry Andric     Observer.changedInstr(MI);
27180b57cec5SDimitry Andric     return Legalized;
27190b57cec5SDimitry Andric   case TargetOpcode::G_SITOFP:
27200b57cec5SDimitry Andric     Observer.changingInstr(MI);
2721e8d8bef9SDimitry Andric 
2722e8d8bef9SDimitry Andric     if (TypeIdx == 0)
2723e8d8bef9SDimitry Andric       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2724e8d8bef9SDimitry Andric     else
27250b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2726e8d8bef9SDimitry Andric 
27270b57cec5SDimitry Andric     Observer.changedInstr(MI);
27280b57cec5SDimitry Andric     return Legalized;
27290b57cec5SDimitry Andric   case TargetOpcode::G_UITOFP:
27300b57cec5SDimitry Andric     Observer.changingInstr(MI);
2731e8d8bef9SDimitry Andric 
2732e8d8bef9SDimitry Andric     if (TypeIdx == 0)
2733e8d8bef9SDimitry Andric       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2734e8d8bef9SDimitry Andric     else
27350b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2736e8d8bef9SDimitry Andric 
27370b57cec5SDimitry Andric     Observer.changedInstr(MI);
27380b57cec5SDimitry Andric     return Legalized;
27390b57cec5SDimitry Andric   case TargetOpcode::G_LOAD:
27400b57cec5SDimitry Andric   case TargetOpcode::G_SEXTLOAD:
27410b57cec5SDimitry Andric   case TargetOpcode::G_ZEXTLOAD:
27420b57cec5SDimitry Andric     Observer.changingInstr(MI);
27430b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
27440b57cec5SDimitry Andric     Observer.changedInstr(MI);
27450b57cec5SDimitry Andric     return Legalized;
27460b57cec5SDimitry Andric 
27470b57cec5SDimitry Andric   case TargetOpcode::G_STORE: {
27480b57cec5SDimitry Andric     if (TypeIdx != 0)
27490b57cec5SDimitry Andric       return UnableToLegalize;
27500b57cec5SDimitry Andric 
27510b57cec5SDimitry Andric     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2752e8d8bef9SDimitry Andric     if (!Ty.isScalar())
27530b57cec5SDimitry Andric       return UnableToLegalize;
27540b57cec5SDimitry Andric 
27550b57cec5SDimitry Andric     Observer.changingInstr(MI);
27560b57cec5SDimitry Andric 
27570b57cec5SDimitry Andric     unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
27580b57cec5SDimitry Andric       TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
27590b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 0, ExtType);
27600b57cec5SDimitry Andric 
27610b57cec5SDimitry Andric     Observer.changedInstr(MI);
27620b57cec5SDimitry Andric     return Legalized;
27630b57cec5SDimitry Andric   }
27640b57cec5SDimitry Andric   case TargetOpcode::G_CONSTANT: {
27650b57cec5SDimitry Andric     MachineOperand &SrcMO = MI.getOperand(1);
27660b57cec5SDimitry Andric     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2767480093f4SDimitry Andric     unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2768480093f4SDimitry Andric         MRI.getType(MI.getOperand(0).getReg()));
2769480093f4SDimitry Andric     assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2770480093f4SDimitry Andric             ExtOpc == TargetOpcode::G_ANYEXT) &&
2771480093f4SDimitry Andric            "Illegal Extend");
2772480093f4SDimitry Andric     const APInt &SrcVal = SrcMO.getCImm()->getValue();
2773480093f4SDimitry Andric     const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2774480093f4SDimitry Andric                            ? SrcVal.sext(WideTy.getSizeInBits())
2775480093f4SDimitry Andric                            : SrcVal.zext(WideTy.getSizeInBits());
27760b57cec5SDimitry Andric     Observer.changingInstr(MI);
27770b57cec5SDimitry Andric     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
27780b57cec5SDimitry Andric 
27790b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
27800b57cec5SDimitry Andric     Observer.changedInstr(MI);
27810b57cec5SDimitry Andric     return Legalized;
27820b57cec5SDimitry Andric   }
27830b57cec5SDimitry Andric   case TargetOpcode::G_FCONSTANT: {
2784fcaf7f86SDimitry Andric     // To avoid changing the bits of the constant due to extension to a larger
2785fcaf7f86SDimitry Andric     // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
27860b57cec5SDimitry Andric     MachineOperand &SrcMO = MI.getOperand(1);
2787fcaf7f86SDimitry Andric     APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
2788fcaf7f86SDimitry Andric     MIRBuilder.setInstrAndDebugLoc(MI);
2789fcaf7f86SDimitry Andric     auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
2790fcaf7f86SDimitry Andric     widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
2791fcaf7f86SDimitry Andric     MI.eraseFromParent();
27920b57cec5SDimitry Andric     return Legalized;
27930b57cec5SDimitry Andric   }
27940b57cec5SDimitry Andric   case TargetOpcode::G_IMPLICIT_DEF: {
27950b57cec5SDimitry Andric     Observer.changingInstr(MI);
27960b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
27970b57cec5SDimitry Andric     Observer.changedInstr(MI);
27980b57cec5SDimitry Andric     return Legalized;
27990b57cec5SDimitry Andric   }
28000b57cec5SDimitry Andric   case TargetOpcode::G_BRCOND:
28010b57cec5SDimitry Andric     Observer.changingInstr(MI);
28020b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
28030b57cec5SDimitry Andric     Observer.changedInstr(MI);
28040b57cec5SDimitry Andric     return Legalized;
28050b57cec5SDimitry Andric 
28060b57cec5SDimitry Andric   case TargetOpcode::G_FCMP:
28070b57cec5SDimitry Andric     Observer.changingInstr(MI);
28080b57cec5SDimitry Andric     if (TypeIdx == 0)
28090b57cec5SDimitry Andric       widenScalarDst(MI, WideTy);
28100b57cec5SDimitry Andric     else {
28110b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
28120b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
28130b57cec5SDimitry Andric     }
28140b57cec5SDimitry Andric     Observer.changedInstr(MI);
28150b57cec5SDimitry Andric     return Legalized;
28160b57cec5SDimitry Andric 
28170b57cec5SDimitry Andric   case TargetOpcode::G_ICMP:
28180b57cec5SDimitry Andric     Observer.changingInstr(MI);
28190b57cec5SDimitry Andric     if (TypeIdx == 0)
28200b57cec5SDimitry Andric       widenScalarDst(MI, WideTy);
28210b57cec5SDimitry Andric     else {
28220b57cec5SDimitry Andric       unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
28230b57cec5SDimitry Andric                                MI.getOperand(1).getPredicate()))
28240b57cec5SDimitry Andric                                ? TargetOpcode::G_SEXT
28250b57cec5SDimitry Andric                                : TargetOpcode::G_ZEXT;
28260b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
28270b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
28280b57cec5SDimitry Andric     }
28290b57cec5SDimitry Andric     Observer.changedInstr(MI);
28300b57cec5SDimitry Andric     return Legalized;
28310b57cec5SDimitry Andric 
2832480093f4SDimitry Andric   case TargetOpcode::G_PTR_ADD:
2833480093f4SDimitry Andric     assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
28340b57cec5SDimitry Andric     Observer.changingInstr(MI);
28350b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
28360b57cec5SDimitry Andric     Observer.changedInstr(MI);
28370b57cec5SDimitry Andric     return Legalized;
28380b57cec5SDimitry Andric 
28390b57cec5SDimitry Andric   case TargetOpcode::G_PHI: {
28400b57cec5SDimitry Andric     assert(TypeIdx == 0 && "Expecting only Idx 0");
28410b57cec5SDimitry Andric 
28420b57cec5SDimitry Andric     Observer.changingInstr(MI);
28430b57cec5SDimitry Andric     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
28440b57cec5SDimitry Andric       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2845bdd1243dSDimitry Andric       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
28460b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
28470b57cec5SDimitry Andric     }
28480b57cec5SDimitry Andric 
28490b57cec5SDimitry Andric     MachineBasicBlock &MBB = *MI.getParent();
28500b57cec5SDimitry Andric     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
28510b57cec5SDimitry Andric     widenScalarDst(MI, WideTy);
28520b57cec5SDimitry Andric     Observer.changedInstr(MI);
28530b57cec5SDimitry Andric     return Legalized;
28540b57cec5SDimitry Andric   }
28550b57cec5SDimitry Andric   case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
28560b57cec5SDimitry Andric     if (TypeIdx == 0) {
28570b57cec5SDimitry Andric       Register VecReg = MI.getOperand(1).getReg();
28580b57cec5SDimitry Andric       LLT VecTy = MRI.getType(VecReg);
28590b57cec5SDimitry Andric       Observer.changingInstr(MI);
28600b57cec5SDimitry Andric 
2861fe6060f1SDimitry Andric       widenScalarSrc(
2862fe6060f1SDimitry Andric           MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
2863349cc55cSDimitry Andric           TargetOpcode::G_ANYEXT);
28640b57cec5SDimitry Andric 
28650b57cec5SDimitry Andric       widenScalarDst(MI, WideTy, 0);
28660b57cec5SDimitry Andric       Observer.changedInstr(MI);
28670b57cec5SDimitry Andric       return Legalized;
28680b57cec5SDimitry Andric     }
28690b57cec5SDimitry Andric 
28700b57cec5SDimitry Andric     if (TypeIdx != 2)
28710b57cec5SDimitry Andric       return UnableToLegalize;
28720b57cec5SDimitry Andric     Observer.changingInstr(MI);
2873480093f4SDimitry Andric     // TODO: Probably should be zext
28740b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
28750b57cec5SDimitry Andric     Observer.changedInstr(MI);
28760b57cec5SDimitry Andric     return Legalized;
28770b57cec5SDimitry Andric   }
2878480093f4SDimitry Andric   case TargetOpcode::G_INSERT_VECTOR_ELT: {
28795f757f3fSDimitry Andric     if (TypeIdx == 0) {
28805f757f3fSDimitry Andric       Observer.changingInstr(MI);
28815f757f3fSDimitry Andric       const LLT WideEltTy = WideTy.getElementType();
28825f757f3fSDimitry Andric 
28835f757f3fSDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
28845f757f3fSDimitry Andric       widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
28855f757f3fSDimitry Andric       widenScalarDst(MI, WideTy, 0);
28865f757f3fSDimitry Andric       Observer.changedInstr(MI);
28875f757f3fSDimitry Andric       return Legalized;
28885f757f3fSDimitry Andric     }
28895f757f3fSDimitry Andric 
2890480093f4SDimitry Andric     if (TypeIdx == 1) {
2891480093f4SDimitry Andric       Observer.changingInstr(MI);
2892480093f4SDimitry Andric 
2893480093f4SDimitry Andric       Register VecReg = MI.getOperand(1).getReg();
2894480093f4SDimitry Andric       LLT VecTy = MRI.getType(VecReg);
2895fe6060f1SDimitry Andric       LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
2896480093f4SDimitry Andric 
2897480093f4SDimitry Andric       widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2898480093f4SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2899480093f4SDimitry Andric       widenScalarDst(MI, WideVecTy, 0);
2900480093f4SDimitry Andric       Observer.changedInstr(MI);
2901480093f4SDimitry Andric       return Legalized;
2902480093f4SDimitry Andric     }
2903480093f4SDimitry Andric 
2904480093f4SDimitry Andric     if (TypeIdx == 2) {
2905480093f4SDimitry Andric       Observer.changingInstr(MI);
2906480093f4SDimitry Andric       // TODO: Probably should be zext
2907480093f4SDimitry Andric       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2908480093f4SDimitry Andric       Observer.changedInstr(MI);
29095ffd83dbSDimitry Andric       return Legalized;
2910480093f4SDimitry Andric     }
2911480093f4SDimitry Andric 
29125ffd83dbSDimitry Andric     return UnableToLegalize;
2913480093f4SDimitry Andric   }
29140b57cec5SDimitry Andric   case TargetOpcode::G_FADD:
29150b57cec5SDimitry Andric   case TargetOpcode::G_FMUL:
29160b57cec5SDimitry Andric   case TargetOpcode::G_FSUB:
29170b57cec5SDimitry Andric   case TargetOpcode::G_FMA:
29188bcb0991SDimitry Andric   case TargetOpcode::G_FMAD:
29190b57cec5SDimitry Andric   case TargetOpcode::G_FNEG:
29200b57cec5SDimitry Andric   case TargetOpcode::G_FABS:
29210b57cec5SDimitry Andric   case TargetOpcode::G_FCANONICALIZE:
29220b57cec5SDimitry Andric   case TargetOpcode::G_FMINNUM:
29230b57cec5SDimitry Andric   case TargetOpcode::G_FMAXNUM:
29240b57cec5SDimitry Andric   case TargetOpcode::G_FMINNUM_IEEE:
29250b57cec5SDimitry Andric   case TargetOpcode::G_FMAXNUM_IEEE:
29260b57cec5SDimitry Andric   case TargetOpcode::G_FMINIMUM:
29270b57cec5SDimitry Andric   case TargetOpcode::G_FMAXIMUM:
29280b57cec5SDimitry Andric   case TargetOpcode::G_FDIV:
29290b57cec5SDimitry Andric   case TargetOpcode::G_FREM:
29300b57cec5SDimitry Andric   case TargetOpcode::G_FCEIL:
29310b57cec5SDimitry Andric   case TargetOpcode::G_FFLOOR:
29320b57cec5SDimitry Andric   case TargetOpcode::G_FCOS:
29330b57cec5SDimitry Andric   case TargetOpcode::G_FSIN:
29340b57cec5SDimitry Andric   case TargetOpcode::G_FLOG10:
29350b57cec5SDimitry Andric   case TargetOpcode::G_FLOG:
29360b57cec5SDimitry Andric   case TargetOpcode::G_FLOG2:
29370b57cec5SDimitry Andric   case TargetOpcode::G_FRINT:
29380b57cec5SDimitry Andric   case TargetOpcode::G_FNEARBYINT:
29390b57cec5SDimitry Andric   case TargetOpcode::G_FSQRT:
29400b57cec5SDimitry Andric   case TargetOpcode::G_FEXP:
29410b57cec5SDimitry Andric   case TargetOpcode::G_FEXP2:
29425f757f3fSDimitry Andric   case TargetOpcode::G_FEXP10:
29430b57cec5SDimitry Andric   case TargetOpcode::G_FPOW:
29440b57cec5SDimitry Andric   case TargetOpcode::G_INTRINSIC_TRUNC:
29450b57cec5SDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUND:
2946e8d8bef9SDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
29470b57cec5SDimitry Andric     assert(TypeIdx == 0);
29480b57cec5SDimitry Andric     Observer.changingInstr(MI);
29490b57cec5SDimitry Andric 
29500b57cec5SDimitry Andric     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
29510b57cec5SDimitry Andric       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
29520b57cec5SDimitry Andric 
29530b57cec5SDimitry Andric     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
29540b57cec5SDimitry Andric     Observer.changedInstr(MI);
29550b57cec5SDimitry Andric     return Legalized;
295606c3fb27SDimitry Andric   case TargetOpcode::G_FPOWI:
295706c3fb27SDimitry Andric   case TargetOpcode::G_FLDEXP:
295806c3fb27SDimitry Andric   case TargetOpcode::G_STRICT_FLDEXP: {
295906c3fb27SDimitry Andric     if (TypeIdx == 0) {
296006c3fb27SDimitry Andric       if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
2961e8d8bef9SDimitry Andric         return UnableToLegalize;
296206c3fb27SDimitry Andric 
2963e8d8bef9SDimitry Andric       Observer.changingInstr(MI);
2964e8d8bef9SDimitry Andric       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2965e8d8bef9SDimitry Andric       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2966e8d8bef9SDimitry Andric       Observer.changedInstr(MI);
2967e8d8bef9SDimitry Andric       return Legalized;
2968e8d8bef9SDimitry Andric     }
296906c3fb27SDimitry Andric 
297006c3fb27SDimitry Andric     if (TypeIdx == 1) {
297106c3fb27SDimitry Andric       // For some reason SelectionDAG tries to promote to a libcall without
297206c3fb27SDimitry Andric       // actually changing the integer type for promotion.
297306c3fb27SDimitry Andric       Observer.changingInstr(MI);
297406c3fb27SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
297506c3fb27SDimitry Andric       Observer.changedInstr(MI);
297606c3fb27SDimitry Andric       return Legalized;
297706c3fb27SDimitry Andric     }
297806c3fb27SDimitry Andric 
297906c3fb27SDimitry Andric     return UnableToLegalize;
298006c3fb27SDimitry Andric   }
298106c3fb27SDimitry Andric   case TargetOpcode::G_FFREXP: {
298206c3fb27SDimitry Andric     Observer.changingInstr(MI);
298306c3fb27SDimitry Andric 
298406c3fb27SDimitry Andric     if (TypeIdx == 0) {
298506c3fb27SDimitry Andric       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
298606c3fb27SDimitry Andric       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
298706c3fb27SDimitry Andric     } else {
298806c3fb27SDimitry Andric       widenScalarDst(MI, WideTy, 1);
298906c3fb27SDimitry Andric     }
299006c3fb27SDimitry Andric 
299106c3fb27SDimitry Andric     Observer.changedInstr(MI);
299206c3fb27SDimitry Andric     return Legalized;
299306c3fb27SDimitry Andric   }
29940b57cec5SDimitry Andric   case TargetOpcode::G_INTTOPTR:
29950b57cec5SDimitry Andric     if (TypeIdx != 1)
29960b57cec5SDimitry Andric       return UnableToLegalize;
29970b57cec5SDimitry Andric 
29980b57cec5SDimitry Andric     Observer.changingInstr(MI);
29990b57cec5SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
30000b57cec5SDimitry Andric     Observer.changedInstr(MI);
30010b57cec5SDimitry Andric     return Legalized;
30020b57cec5SDimitry Andric   case TargetOpcode::G_PTRTOINT:
30030b57cec5SDimitry Andric     if (TypeIdx != 0)
30040b57cec5SDimitry Andric       return UnableToLegalize;
30050b57cec5SDimitry Andric 
30060b57cec5SDimitry Andric     Observer.changingInstr(MI);
30070b57cec5SDimitry Andric     widenScalarDst(MI, WideTy, 0);
30080b57cec5SDimitry Andric     Observer.changedInstr(MI);
30090b57cec5SDimitry Andric     return Legalized;
30100b57cec5SDimitry Andric   case TargetOpcode::G_BUILD_VECTOR: {
30110b57cec5SDimitry Andric     Observer.changingInstr(MI);
30120b57cec5SDimitry Andric 
30130b57cec5SDimitry Andric     const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
30140b57cec5SDimitry Andric     for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
30150b57cec5SDimitry Andric       widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
30160b57cec5SDimitry Andric 
30170b57cec5SDimitry Andric     // Avoid changing the result vector type if the source element type was
30180b57cec5SDimitry Andric     // requested.
30190b57cec5SDimitry Andric     if (TypeIdx == 1) {
3020e8d8bef9SDimitry Andric       MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
30210b57cec5SDimitry Andric     } else {
30220b57cec5SDimitry Andric       widenScalarDst(MI, WideTy, 0);
30230b57cec5SDimitry Andric     }
30240b57cec5SDimitry Andric 
30250b57cec5SDimitry Andric     Observer.changedInstr(MI);
30260b57cec5SDimitry Andric     return Legalized;
30270b57cec5SDimitry Andric   }
30288bcb0991SDimitry Andric   case TargetOpcode::G_SEXT_INREG:
30298bcb0991SDimitry Andric     if (TypeIdx != 0)
30308bcb0991SDimitry Andric       return UnableToLegalize;
30318bcb0991SDimitry Andric 
30328bcb0991SDimitry Andric     Observer.changingInstr(MI);
30338bcb0991SDimitry Andric     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
30348bcb0991SDimitry Andric     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
30358bcb0991SDimitry Andric     Observer.changedInstr(MI);
30368bcb0991SDimitry Andric     return Legalized;
30375ffd83dbSDimitry Andric   case TargetOpcode::G_PTRMASK: {
30385ffd83dbSDimitry Andric     if (TypeIdx != 1)
30395ffd83dbSDimitry Andric       return UnableToLegalize;
30405ffd83dbSDimitry Andric     Observer.changingInstr(MI);
30415ffd83dbSDimitry Andric     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
30425ffd83dbSDimitry Andric     Observer.changedInstr(MI);
30435ffd83dbSDimitry Andric     return Legalized;
30445ffd83dbSDimitry Andric   }
30455f757f3fSDimitry Andric   case TargetOpcode::G_VECREDUCE_FADD:
30461db9f3b2SDimitry Andric   case TargetOpcode::G_VECREDUCE_FMUL:
30475f757f3fSDimitry Andric   case TargetOpcode::G_VECREDUCE_FMIN:
30485f757f3fSDimitry Andric   case TargetOpcode::G_VECREDUCE_FMAX:
30495f757f3fSDimitry Andric   case TargetOpcode::G_VECREDUCE_FMINIMUM:
30505f757f3fSDimitry Andric   case TargetOpcode::G_VECREDUCE_FMAXIMUM:
30515f757f3fSDimitry Andric     if (TypeIdx != 0)
30525f757f3fSDimitry Andric       return UnableToLegalize;
30535f757f3fSDimitry Andric     Observer.changingInstr(MI);
30545f757f3fSDimitry Andric     Register VecReg = MI.getOperand(1).getReg();
30555f757f3fSDimitry Andric     LLT VecTy = MRI.getType(VecReg);
30565f757f3fSDimitry Andric     LLT WideVecTy = VecTy.isVector()
30575f757f3fSDimitry Andric                         ? LLT::vector(VecTy.getElementCount(), WideTy)
30585f757f3fSDimitry Andric                         : WideTy;
30595f757f3fSDimitry Andric     widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
30605f757f3fSDimitry Andric     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
30615f757f3fSDimitry Andric     Observer.changedInstr(MI);
30625f757f3fSDimitry Andric     return Legalized;
30635ffd83dbSDimitry Andric   }
30645ffd83dbSDimitry Andric }
30655ffd83dbSDimitry Andric 
30665ffd83dbSDimitry Andric static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
30675ffd83dbSDimitry Andric                              MachineIRBuilder &B, Register Src, LLT Ty) {
30685ffd83dbSDimitry Andric   auto Unmerge = B.buildUnmerge(Ty, Src);
30695ffd83dbSDimitry Andric   for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
30705ffd83dbSDimitry Andric     Pieces.push_back(Unmerge.getReg(I));
30715ffd83dbSDimitry Andric }
30725ffd83dbSDimitry Andric 
30735ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
307406c3fb27SDimitry Andric LegalizerHelper::lowerFConstant(MachineInstr &MI) {
30755ffd83dbSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
30765ffd83dbSDimitry Andric 
307706c3fb27SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
307806c3fb27SDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
307906c3fb27SDimitry Andric 
308006c3fb27SDimitry Andric   unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
308106c3fb27SDimitry Andric   LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
308206c3fb27SDimitry Andric   Align Alignment = Align(DL.getABITypeAlign(
308306c3fb27SDimitry Andric       getFloatTypeForLLT(MF.getFunction().getContext(), MRI.getType(Dst))));
308406c3fb27SDimitry Andric 
308506c3fb27SDimitry Andric   auto Addr = MIRBuilder.buildConstantPool(
308606c3fb27SDimitry Andric       AddrPtrTy, MF.getConstantPool()->getConstantPoolIndex(
308706c3fb27SDimitry Andric                      MI.getOperand(1).getFPImm(), Alignment));
308806c3fb27SDimitry Andric 
308906c3fb27SDimitry Andric   MachineMemOperand *MMO = MF.getMachineMemOperand(
309006c3fb27SDimitry Andric       MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
309106c3fb27SDimitry Andric       MRI.getType(Dst), Alignment);
309206c3fb27SDimitry Andric 
309306c3fb27SDimitry Andric   MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Addr, *MMO);
309406c3fb27SDimitry Andric   MI.eraseFromParent();
309506c3fb27SDimitry Andric 
309606c3fb27SDimitry Andric   return Legalized;
309706c3fb27SDimitry Andric }
309806c3fb27SDimitry Andric 
309906c3fb27SDimitry Andric LegalizerHelper::LegalizeResult
310006c3fb27SDimitry Andric LegalizerHelper::lowerBitcast(MachineInstr &MI) {
310106c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
31025ffd83dbSDimitry Andric   if (SrcTy.isVector()) {
31035ffd83dbSDimitry Andric     LLT SrcEltTy = SrcTy.getElementType();
31045ffd83dbSDimitry Andric     SmallVector<Register, 8> SrcRegs;
31055ffd83dbSDimitry Andric 
31065ffd83dbSDimitry Andric     if (DstTy.isVector()) {
31075ffd83dbSDimitry Andric       int NumDstElt = DstTy.getNumElements();
31085ffd83dbSDimitry Andric       int NumSrcElt = SrcTy.getNumElements();
31095ffd83dbSDimitry Andric 
31105ffd83dbSDimitry Andric       LLT DstEltTy = DstTy.getElementType();
31115ffd83dbSDimitry Andric       LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
31125ffd83dbSDimitry Andric       LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
31135ffd83dbSDimitry Andric 
31145ffd83dbSDimitry Andric       // If there's an element size mismatch, insert intermediate casts to match
31155ffd83dbSDimitry Andric       // the result element type.
31165ffd83dbSDimitry Andric       if (NumSrcElt < NumDstElt) { // Source element type is larger.
31175ffd83dbSDimitry Andric         // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
31185ffd83dbSDimitry Andric         //
31195ffd83dbSDimitry Andric         // =>
31205ffd83dbSDimitry Andric         //
31215ffd83dbSDimitry Andric         // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
31225ffd83dbSDimitry Andric         // %3:_(<2 x s8>) = G_BITCAST %2
31235ffd83dbSDimitry Andric         // %4:_(<2 x s8>) = G_BITCAST %3
31245ffd83dbSDimitry Andric         // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3125fe6060f1SDimitry Andric         DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
31265ffd83dbSDimitry Andric         SrcPartTy = SrcEltTy;
31275ffd83dbSDimitry Andric       } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
31285ffd83dbSDimitry Andric         //
31295ffd83dbSDimitry Andric         // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
31305ffd83dbSDimitry Andric         //
31315ffd83dbSDimitry Andric         // =>
31325ffd83dbSDimitry Andric         //
31335ffd83dbSDimitry Andric         // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
31345ffd83dbSDimitry Andric         // %3:_(s16) = G_BITCAST %2
31355ffd83dbSDimitry Andric         // %4:_(s16) = G_BITCAST %3
31365ffd83dbSDimitry Andric         // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3137fe6060f1SDimitry Andric         SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
31385ffd83dbSDimitry Andric         DstCastTy = DstEltTy;
31395ffd83dbSDimitry Andric       }
31405ffd83dbSDimitry Andric 
31415ffd83dbSDimitry Andric       getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
31425ffd83dbSDimitry Andric       for (Register &SrcReg : SrcRegs)
31435ffd83dbSDimitry Andric         SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
31445ffd83dbSDimitry Andric     } else
31455ffd83dbSDimitry Andric       getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
31465ffd83dbSDimitry Andric 
3147bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
31485ffd83dbSDimitry Andric     MI.eraseFromParent();
31495ffd83dbSDimitry Andric     return Legalized;
31505ffd83dbSDimitry Andric   }
31515ffd83dbSDimitry Andric 
31525ffd83dbSDimitry Andric   if (DstTy.isVector()) {
31535ffd83dbSDimitry Andric     SmallVector<Register, 8> SrcRegs;
31545ffd83dbSDimitry Andric     getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3155bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
31565ffd83dbSDimitry Andric     MI.eraseFromParent();
31575ffd83dbSDimitry Andric     return Legalized;
31585ffd83dbSDimitry Andric   }
31595ffd83dbSDimitry Andric 
31605ffd83dbSDimitry Andric   return UnableToLegalize;
31615ffd83dbSDimitry Andric }
31625ffd83dbSDimitry Andric 
3163e8d8bef9SDimitry Andric /// Figure out the bit offset into a register when coercing a vector index for
3164e8d8bef9SDimitry Andric /// the wide element type. This is only for the case when promoting vector to
3165e8d8bef9SDimitry Andric /// one with larger elements.
3166e8d8bef9SDimitry Andric //
3167e8d8bef9SDimitry Andric ///
3168e8d8bef9SDimitry Andric /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3169e8d8bef9SDimitry Andric /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3170e8d8bef9SDimitry Andric static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
3171e8d8bef9SDimitry Andric                                                    Register Idx,
3172e8d8bef9SDimitry Andric                                                    unsigned NewEltSize,
3173e8d8bef9SDimitry Andric                                                    unsigned OldEltSize) {
3174e8d8bef9SDimitry Andric   const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3175e8d8bef9SDimitry Andric   LLT IdxTy = B.getMRI()->getType(Idx);
3176e8d8bef9SDimitry Andric 
3177e8d8bef9SDimitry Andric   // Now figure out the amount we need to shift to get the target bits.
3178e8d8bef9SDimitry Andric   auto OffsetMask = B.buildConstant(
3179349cc55cSDimitry Andric       IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3180e8d8bef9SDimitry Andric   auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3181e8d8bef9SDimitry Andric   return B.buildShl(IdxTy, OffsetIdx,
3182e8d8bef9SDimitry Andric                     B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3183e8d8bef9SDimitry Andric }
3184e8d8bef9SDimitry Andric 
3185e8d8bef9SDimitry Andric /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3186e8d8bef9SDimitry Andric /// is casting to a vector with a smaller element size, perform multiple element
3187e8d8bef9SDimitry Andric /// extracts and merge the results. If this is coercing to a vector with larger
3188e8d8bef9SDimitry Andric /// elements, index the bitcasted vector and extract the target element with bit
3189e8d8bef9SDimitry Andric /// operations. This is intended to force the indexing in the native register
3190e8d8bef9SDimitry Andric /// size for architectures that can dynamically index the register file.
31915ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
3192e8d8bef9SDimitry Andric LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
3193e8d8bef9SDimitry Andric                                          LLT CastTy) {
3194e8d8bef9SDimitry Andric   if (TypeIdx != 1)
3195e8d8bef9SDimitry Andric     return UnableToLegalize;
3196e8d8bef9SDimitry Andric 
319706c3fb27SDimitry Andric   auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3198e8d8bef9SDimitry Andric 
3199e8d8bef9SDimitry Andric   LLT SrcEltTy = SrcVecTy.getElementType();
3200e8d8bef9SDimitry Andric   unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3201e8d8bef9SDimitry Andric   unsigned OldNumElts = SrcVecTy.getNumElements();
3202e8d8bef9SDimitry Andric 
3203e8d8bef9SDimitry Andric   LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3204e8d8bef9SDimitry Andric   Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3205e8d8bef9SDimitry Andric 
3206e8d8bef9SDimitry Andric   const unsigned NewEltSize = NewEltTy.getSizeInBits();
3207e8d8bef9SDimitry Andric   const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3208e8d8bef9SDimitry Andric   if (NewNumElts > OldNumElts) {
3209e8d8bef9SDimitry Andric     // Decreasing the vector element size
3210e8d8bef9SDimitry Andric     //
3211e8d8bef9SDimitry Andric     // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3212e8d8bef9SDimitry Andric     //  =>
3213e8d8bef9SDimitry Andric     //  v4i32:castx = bitcast x:v2i64
3214e8d8bef9SDimitry Andric     //
3215e8d8bef9SDimitry Andric     // i64 = bitcast
3216e8d8bef9SDimitry Andric     //   (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3217e8d8bef9SDimitry Andric     //                       (i32 (extract_vector_elt castx, (2 * y + 1)))
3218e8d8bef9SDimitry Andric     //
3219e8d8bef9SDimitry Andric     if (NewNumElts % OldNumElts != 0)
3220e8d8bef9SDimitry Andric       return UnableToLegalize;
3221e8d8bef9SDimitry Andric 
3222e8d8bef9SDimitry Andric     // Type of the intermediate result vector.
3223e8d8bef9SDimitry Andric     const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3224fe6060f1SDimitry Andric     LLT MidTy =
3225fe6060f1SDimitry Andric         LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3226e8d8bef9SDimitry Andric 
3227e8d8bef9SDimitry Andric     auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3228e8d8bef9SDimitry Andric 
3229e8d8bef9SDimitry Andric     SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3230e8d8bef9SDimitry Andric     auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3231e8d8bef9SDimitry Andric 
3232e8d8bef9SDimitry Andric     for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3233e8d8bef9SDimitry Andric       auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3234e8d8bef9SDimitry Andric       auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3235e8d8bef9SDimitry Andric       auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3236e8d8bef9SDimitry Andric       NewOps[I] = Elt.getReg(0);
3237e8d8bef9SDimitry Andric     }
3238e8d8bef9SDimitry Andric 
3239e8d8bef9SDimitry Andric     auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3240e8d8bef9SDimitry Andric     MIRBuilder.buildBitcast(Dst, NewVec);
3241e8d8bef9SDimitry Andric     MI.eraseFromParent();
3242e8d8bef9SDimitry Andric     return Legalized;
3243e8d8bef9SDimitry Andric   }
3244e8d8bef9SDimitry Andric 
3245e8d8bef9SDimitry Andric   if (NewNumElts < OldNumElts) {
3246e8d8bef9SDimitry Andric     if (NewEltSize % OldEltSize != 0)
3247e8d8bef9SDimitry Andric       return UnableToLegalize;
3248e8d8bef9SDimitry Andric 
3249e8d8bef9SDimitry Andric     // This only depends on powers of 2 because we use bit tricks to figure out
3250e8d8bef9SDimitry Andric     // the bit offset we need to shift to get the target element. A general
3251e8d8bef9SDimitry Andric     // expansion could emit division/multiply.
3252e8d8bef9SDimitry Andric     if (!isPowerOf2_32(NewEltSize / OldEltSize))
3253e8d8bef9SDimitry Andric       return UnableToLegalize;
3254e8d8bef9SDimitry Andric 
3255e8d8bef9SDimitry Andric     // Increasing the vector element size.
3256e8d8bef9SDimitry Andric     // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3257e8d8bef9SDimitry Andric     //
3258e8d8bef9SDimitry Andric     //   =>
3259e8d8bef9SDimitry Andric     //
3260e8d8bef9SDimitry Andric     // %cast = G_BITCAST %vec
3261e8d8bef9SDimitry Andric     // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3262e8d8bef9SDimitry Andric     // %wide_elt  = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3263e8d8bef9SDimitry Andric     // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3264e8d8bef9SDimitry Andric     // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3265e8d8bef9SDimitry Andric     // %elt_bits = G_LSHR %wide_elt, %offset_bits
3266e8d8bef9SDimitry Andric     // %elt = G_TRUNC %elt_bits
3267e8d8bef9SDimitry Andric 
3268e8d8bef9SDimitry Andric     const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3269e8d8bef9SDimitry Andric     auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3270e8d8bef9SDimitry Andric 
3271e8d8bef9SDimitry Andric     // Divide to get the index in the wider element type.
3272e8d8bef9SDimitry Andric     auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3273e8d8bef9SDimitry Andric 
3274e8d8bef9SDimitry Andric     Register WideElt = CastVec;
3275e8d8bef9SDimitry Andric     if (CastTy.isVector()) {
3276e8d8bef9SDimitry Andric       WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3277e8d8bef9SDimitry Andric                                                      ScaledIdx).getReg(0);
3278e8d8bef9SDimitry Andric     }
3279e8d8bef9SDimitry Andric 
3280e8d8bef9SDimitry Andric     // Compute the bit offset into the register of the target element.
3281e8d8bef9SDimitry Andric     Register OffsetBits = getBitcastWiderVectorElementOffset(
3282e8d8bef9SDimitry Andric       MIRBuilder, Idx, NewEltSize, OldEltSize);
3283e8d8bef9SDimitry Andric 
3284e8d8bef9SDimitry Andric     // Shift the wide element to get the target element.
3285e8d8bef9SDimitry Andric     auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3286e8d8bef9SDimitry Andric     MIRBuilder.buildTrunc(Dst, ExtractedBits);
3287e8d8bef9SDimitry Andric     MI.eraseFromParent();
3288e8d8bef9SDimitry Andric     return Legalized;
3289e8d8bef9SDimitry Andric   }
3290e8d8bef9SDimitry Andric 
3291e8d8bef9SDimitry Andric   return UnableToLegalize;
3292e8d8bef9SDimitry Andric }
3293e8d8bef9SDimitry Andric 
3294e8d8bef9SDimitry Andric /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3295e8d8bef9SDimitry Andric /// TargetReg, while preserving other bits in \p TargetReg.
3296e8d8bef9SDimitry Andric ///
3297e8d8bef9SDimitry Andric /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3298e8d8bef9SDimitry Andric static Register buildBitFieldInsert(MachineIRBuilder &B,
3299e8d8bef9SDimitry Andric                                     Register TargetReg, Register InsertReg,
3300e8d8bef9SDimitry Andric                                     Register OffsetBits) {
3301e8d8bef9SDimitry Andric   LLT TargetTy = B.getMRI()->getType(TargetReg);
3302e8d8bef9SDimitry Andric   LLT InsertTy = B.getMRI()->getType(InsertReg);
3303e8d8bef9SDimitry Andric   auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3304e8d8bef9SDimitry Andric   auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3305e8d8bef9SDimitry Andric 
3306e8d8bef9SDimitry Andric   // Produce a bitmask of the value to insert
3307e8d8bef9SDimitry Andric   auto EltMask = B.buildConstant(
3308e8d8bef9SDimitry Andric     TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3309e8d8bef9SDimitry Andric                                    InsertTy.getSizeInBits()));
3310e8d8bef9SDimitry Andric   // Shift it into position
3311e8d8bef9SDimitry Andric   auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3312e8d8bef9SDimitry Andric   auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3313e8d8bef9SDimitry Andric 
3314e8d8bef9SDimitry Andric   // Clear out the bits in the wide element
3315e8d8bef9SDimitry Andric   auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3316e8d8bef9SDimitry Andric 
3317e8d8bef9SDimitry Andric   // The value to insert has all zeros already, so stick it into the masked
3318e8d8bef9SDimitry Andric   // wide element.
3319e8d8bef9SDimitry Andric   return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3320e8d8bef9SDimitry Andric }
3321e8d8bef9SDimitry Andric 
3322e8d8bef9SDimitry Andric /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3323e8d8bef9SDimitry Andric /// is increasing the element size, perform the indexing in the target element
3324e8d8bef9SDimitry Andric /// type, and use bit operations to insert at the element position. This is
3325e8d8bef9SDimitry Andric /// intended for architectures that can dynamically index the register file and
3326e8d8bef9SDimitry Andric /// want to force indexing in the native register size.
3327e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
3328e8d8bef9SDimitry Andric LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
3329e8d8bef9SDimitry Andric                                         LLT CastTy) {
33305ffd83dbSDimitry Andric   if (TypeIdx != 0)
33315ffd83dbSDimitry Andric     return UnableToLegalize;
33325ffd83dbSDimitry Andric 
333306c3fb27SDimitry Andric   auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
333406c3fb27SDimitry Andric       MI.getFirst4RegLLTs();
333506c3fb27SDimitry Andric   LLT VecTy = DstTy;
3336e8d8bef9SDimitry Andric 
3337e8d8bef9SDimitry Andric   LLT VecEltTy = VecTy.getElementType();
3338e8d8bef9SDimitry Andric   LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3339e8d8bef9SDimitry Andric   const unsigned NewEltSize = NewEltTy.getSizeInBits();
3340e8d8bef9SDimitry Andric   const unsigned OldEltSize = VecEltTy.getSizeInBits();
3341e8d8bef9SDimitry Andric 
3342e8d8bef9SDimitry Andric   unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3343e8d8bef9SDimitry Andric   unsigned OldNumElts = VecTy.getNumElements();
3344e8d8bef9SDimitry Andric 
3345e8d8bef9SDimitry Andric   Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3346e8d8bef9SDimitry Andric   if (NewNumElts < OldNumElts) {
3347e8d8bef9SDimitry Andric     if (NewEltSize % OldEltSize != 0)
33485ffd83dbSDimitry Andric       return UnableToLegalize;
33495ffd83dbSDimitry Andric 
3350e8d8bef9SDimitry Andric     // This only depends on powers of 2 because we use bit tricks to figure out
3351e8d8bef9SDimitry Andric     // the bit offset we need to shift to get the target element. A general
3352e8d8bef9SDimitry Andric     // expansion could emit division/multiply.
3353e8d8bef9SDimitry Andric     if (!isPowerOf2_32(NewEltSize / OldEltSize))
33545ffd83dbSDimitry Andric       return UnableToLegalize;
33555ffd83dbSDimitry Andric 
3356e8d8bef9SDimitry Andric     const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3357e8d8bef9SDimitry Andric     auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3358e8d8bef9SDimitry Andric 
3359e8d8bef9SDimitry Andric     // Divide to get the index in the wider element type.
3360e8d8bef9SDimitry Andric     auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3361e8d8bef9SDimitry Andric 
3362e8d8bef9SDimitry Andric     Register ExtractedElt = CastVec;
3363e8d8bef9SDimitry Andric     if (CastTy.isVector()) {
3364e8d8bef9SDimitry Andric       ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3365e8d8bef9SDimitry Andric                                                           ScaledIdx).getReg(0);
33665ffd83dbSDimitry Andric     }
33675ffd83dbSDimitry Andric 
3368e8d8bef9SDimitry Andric     // Compute the bit offset into the register of the target element.
3369e8d8bef9SDimitry Andric     Register OffsetBits = getBitcastWiderVectorElementOffset(
3370e8d8bef9SDimitry Andric       MIRBuilder, Idx, NewEltSize, OldEltSize);
3371e8d8bef9SDimitry Andric 
3372e8d8bef9SDimitry Andric     Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3373e8d8bef9SDimitry Andric                                                Val, OffsetBits);
3374e8d8bef9SDimitry Andric     if (CastTy.isVector()) {
3375e8d8bef9SDimitry Andric       InsertedElt = MIRBuilder.buildInsertVectorElement(
3376e8d8bef9SDimitry Andric         CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3377e8d8bef9SDimitry Andric     }
3378e8d8bef9SDimitry Andric 
3379e8d8bef9SDimitry Andric     MIRBuilder.buildBitcast(Dst, InsertedElt);
3380e8d8bef9SDimitry Andric     MI.eraseFromParent();
33815ffd83dbSDimitry Andric     return Legalized;
33825ffd83dbSDimitry Andric   }
3383e8d8bef9SDimitry Andric 
33845ffd83dbSDimitry Andric   return UnableToLegalize;
33850b57cec5SDimitry Andric }
33860b57cec5SDimitry Andric 
3387fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
33880b57cec5SDimitry Andric   // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
3389fe6060f1SDimitry Andric   Register DstReg = LoadMI.getDstReg();
3390fe6060f1SDimitry Andric   Register PtrReg = LoadMI.getPointerReg();
33910b57cec5SDimitry Andric   LLT DstTy = MRI.getType(DstReg);
3392fe6060f1SDimitry Andric   MachineMemOperand &MMO = LoadMI.getMMO();
3393fe6060f1SDimitry Andric   LLT MemTy = MMO.getMemoryType();
3394fe6060f1SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
33950b57cec5SDimitry Andric 
3396fe6060f1SDimitry Andric   unsigned MemSizeInBits = MemTy.getSizeInBits();
3397fe6060f1SDimitry Andric   unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3398fe6060f1SDimitry Andric 
3399fe6060f1SDimitry Andric   if (MemSizeInBits != MemStoreSizeInBits) {
3400349cc55cSDimitry Andric     if (MemTy.isVector())
3401349cc55cSDimitry Andric       return UnableToLegalize;
3402349cc55cSDimitry Andric 
3403fe6060f1SDimitry Andric     // Promote to a byte-sized load if not loading an integral number of
3404fe6060f1SDimitry Andric     // bytes.  For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
3405fe6060f1SDimitry Andric     LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
3406fe6060f1SDimitry Andric     MachineMemOperand *NewMMO =
3407fe6060f1SDimitry Andric         MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
3408fe6060f1SDimitry Andric 
3409fe6060f1SDimitry Andric     Register LoadReg = DstReg;
3410fe6060f1SDimitry Andric     LLT LoadTy = DstTy;
3411fe6060f1SDimitry Andric 
3412fe6060f1SDimitry Andric     // If this wasn't already an extending load, we need to widen the result
3413fe6060f1SDimitry Andric     // register to avoid creating a load with a narrower result than the source.
3414fe6060f1SDimitry Andric     if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
3415fe6060f1SDimitry Andric       LoadTy = WideMemTy;
3416fe6060f1SDimitry Andric       LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
3417fe6060f1SDimitry Andric     }
3418fe6060f1SDimitry Andric 
3419fe6060f1SDimitry Andric     if (isa<GSExtLoad>(LoadMI)) {
3420fe6060f1SDimitry Andric       auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3421fe6060f1SDimitry Andric       MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
342281ad6265SDimitry Andric     } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3423fe6060f1SDimitry Andric       auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3424fe6060f1SDimitry Andric       // The extra bits are guaranteed to be zero, since we stored them that
3425fe6060f1SDimitry Andric       // way.  A zext load from Wide thus automatically gives zext from MemVT.
3426fe6060f1SDimitry Andric       MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
3427fe6060f1SDimitry Andric     } else {
3428fe6060f1SDimitry Andric       MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
3429fe6060f1SDimitry Andric     }
3430fe6060f1SDimitry Andric 
3431fe6060f1SDimitry Andric     if (DstTy != LoadTy)
3432fe6060f1SDimitry Andric       MIRBuilder.buildTrunc(DstReg, LoadReg);
3433fe6060f1SDimitry Andric 
3434fe6060f1SDimitry Andric     LoadMI.eraseFromParent();
3435fe6060f1SDimitry Andric     return Legalized;
3436fe6060f1SDimitry Andric   }
3437fe6060f1SDimitry Andric 
3438fe6060f1SDimitry Andric   // Big endian lowering not implemented.
3439fe6060f1SDimitry Andric   if (MIRBuilder.getDataLayout().isBigEndian())
3440fe6060f1SDimitry Andric     return UnableToLegalize;
3441fe6060f1SDimitry Andric 
3442349cc55cSDimitry Andric   // This load needs splitting into power of 2 sized loads.
3443349cc55cSDimitry Andric   //
34448bcb0991SDimitry Andric   // Our strategy here is to generate anyextending loads for the smaller
34458bcb0991SDimitry Andric   // types up to next power-2 result type, and then combine the two larger
34468bcb0991SDimitry Andric   // result values together, before truncating back down to the non-pow-2
34478bcb0991SDimitry Andric   // type.
34488bcb0991SDimitry Andric   // E.g. v1 = i24 load =>
34495ffd83dbSDimitry Andric   // v2 = i32 zextload (2 byte)
34508bcb0991SDimitry Andric   // v3 = i32 load (1 byte)
34518bcb0991SDimitry Andric   // v4 = i32 shl v3, 16
34528bcb0991SDimitry Andric   // v5 = i32 or v4, v2
34538bcb0991SDimitry Andric   // v1 = i24 trunc v5
34548bcb0991SDimitry Andric   // By doing this we generate the correct truncate which should get
34558bcb0991SDimitry Andric   // combined away as an artifact with a matching extend.
3456349cc55cSDimitry Andric 
3457349cc55cSDimitry Andric   uint64_t LargeSplitSize, SmallSplitSize;
3458349cc55cSDimitry Andric 
3459349cc55cSDimitry Andric   if (!isPowerOf2_32(MemSizeInBits)) {
3460349cc55cSDimitry Andric     // This load needs splitting into power of 2 sized loads.
346106c3fb27SDimitry Andric     LargeSplitSize = llvm::bit_floor(MemSizeInBits);
3462349cc55cSDimitry Andric     SmallSplitSize = MemSizeInBits - LargeSplitSize;
3463349cc55cSDimitry Andric   } else {
3464349cc55cSDimitry Andric     // This is already a power of 2, but we still need to split this in half.
3465349cc55cSDimitry Andric     //
3466349cc55cSDimitry Andric     // Assume we're being asked to decompose an unaligned load.
3467349cc55cSDimitry Andric     // TODO: If this requires multiple splits, handle them all at once.
3468349cc55cSDimitry Andric     auto &Ctx = MF.getFunction().getContext();
3469349cc55cSDimitry Andric     if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3470349cc55cSDimitry Andric       return UnableToLegalize;
3471349cc55cSDimitry Andric 
3472349cc55cSDimitry Andric     SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3473349cc55cSDimitry Andric   }
3474349cc55cSDimitry Andric 
3475349cc55cSDimitry Andric   if (MemTy.isVector()) {
3476349cc55cSDimitry Andric     // TODO: Handle vector extloads
3477349cc55cSDimitry Andric     if (MemTy != DstTy)
3478349cc55cSDimitry Andric       return UnableToLegalize;
3479349cc55cSDimitry Andric 
3480349cc55cSDimitry Andric     // TODO: We can do better than scalarizing the vector and at least split it
3481349cc55cSDimitry Andric     // in half.
3482349cc55cSDimitry Andric     return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
3483349cc55cSDimitry Andric   }
34848bcb0991SDimitry Andric 
34858bcb0991SDimitry Andric   MachineMemOperand *LargeMMO =
34868bcb0991SDimitry Andric       MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3487fe6060f1SDimitry Andric   MachineMemOperand *SmallMMO =
3488fe6060f1SDimitry Andric       MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
34898bcb0991SDimitry Andric 
34908bcb0991SDimitry Andric   LLT PtrTy = MRI.getType(PtrReg);
3491fe6060f1SDimitry Andric   unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
34928bcb0991SDimitry Andric   LLT AnyExtTy = LLT::scalar(AnyExtSize);
3493fe6060f1SDimitry Andric   auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
3494fe6060f1SDimitry Andric                                              PtrReg, *LargeMMO);
34958bcb0991SDimitry Andric 
3496fe6060f1SDimitry Andric   auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
3497fe6060f1SDimitry Andric                                             LargeSplitSize / 8);
3498480093f4SDimitry Andric   Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3499fe6060f1SDimitry Andric   auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3500fe6060f1SDimitry Andric   auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
3501fe6060f1SDimitry Andric                                              SmallPtr, *SmallMMO);
35028bcb0991SDimitry Andric 
35038bcb0991SDimitry Andric   auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
35048bcb0991SDimitry Andric   auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
3505fe6060f1SDimitry Andric 
3506fe6060f1SDimitry Andric   if (AnyExtTy == DstTy)
3507fe6060f1SDimitry Andric     MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
3508349cc55cSDimitry Andric   else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
35098bcb0991SDimitry Andric     auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3510fe6060f1SDimitry Andric     MIRBuilder.buildTrunc(DstReg, {Or});
3511349cc55cSDimitry Andric   } else {
3512349cc55cSDimitry Andric     assert(DstTy.isPointer() && "expected pointer");
3513349cc55cSDimitry Andric     auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3514349cc55cSDimitry Andric 
3515349cc55cSDimitry Andric     // FIXME: We currently consider this to be illegal for non-integral address
3516349cc55cSDimitry Andric     // spaces, but we need still need a way to reinterpret the bits.
3517349cc55cSDimitry Andric     MIRBuilder.buildIntToPtr(DstReg, Or);
3518fe6060f1SDimitry Andric   }
3519fe6060f1SDimitry Andric 
3520fe6060f1SDimitry Andric   LoadMI.eraseFromParent();
35218bcb0991SDimitry Andric   return Legalized;
35228bcb0991SDimitry Andric }
3523e8d8bef9SDimitry Andric 
3524fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
35258bcb0991SDimitry Andric   // Lower a non-power of 2 store into multiple pow-2 stores.
35268bcb0991SDimitry Andric   // E.g. split an i24 store into an i16 store + i8 store.
35278bcb0991SDimitry Andric   // We do this by first extending the stored value to the next largest power
35288bcb0991SDimitry Andric   // of 2 type, and then using truncating stores to store the components.
35298bcb0991SDimitry Andric   // By doing this, likewise with G_LOAD, generate an extend that can be
35308bcb0991SDimitry Andric   // artifact-combined away instead of leaving behind extracts.
3531fe6060f1SDimitry Andric   Register SrcReg = StoreMI.getValueReg();
3532fe6060f1SDimitry Andric   Register PtrReg = StoreMI.getPointerReg();
35338bcb0991SDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
3534fe6060f1SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
3535fe6060f1SDimitry Andric   MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3536fe6060f1SDimitry Andric   LLT MemTy = MMO.getMemoryType();
3537fe6060f1SDimitry Andric 
3538fe6060f1SDimitry Andric   unsigned StoreWidth = MemTy.getSizeInBits();
3539fe6060f1SDimitry Andric   unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3540fe6060f1SDimitry Andric 
3541fe6060f1SDimitry Andric   if (StoreWidth != StoreSizeInBits) {
3542349cc55cSDimitry Andric     if (SrcTy.isVector())
3543349cc55cSDimitry Andric       return UnableToLegalize;
3544349cc55cSDimitry Andric 
3545fe6060f1SDimitry Andric     // Promote to a byte-sized store with upper bits zero if not
3546fe6060f1SDimitry Andric     // storing an integral number of bytes.  For example, promote
3547fe6060f1SDimitry Andric     // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3548fe6060f1SDimitry Andric     LLT WideTy = LLT::scalar(StoreSizeInBits);
3549fe6060f1SDimitry Andric 
3550fe6060f1SDimitry Andric     if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3551fe6060f1SDimitry Andric       // Avoid creating a store with a narrower source than result.
3552fe6060f1SDimitry Andric       SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3553fe6060f1SDimitry Andric       SrcTy = WideTy;
3554fe6060f1SDimitry Andric     }
3555fe6060f1SDimitry Andric 
3556fe6060f1SDimitry Andric     auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3557fe6060f1SDimitry Andric 
3558fe6060f1SDimitry Andric     MachineMemOperand *NewMMO =
3559fe6060f1SDimitry Andric         MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3560fe6060f1SDimitry Andric     MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3561fe6060f1SDimitry Andric     StoreMI.eraseFromParent();
3562fe6060f1SDimitry Andric     return Legalized;
3563fe6060f1SDimitry Andric   }
3564fe6060f1SDimitry Andric 
3565349cc55cSDimitry Andric   if (MemTy.isVector()) {
3566349cc55cSDimitry Andric     // TODO: Handle vector trunc stores
3567349cc55cSDimitry Andric     if (MemTy != SrcTy)
3568349cc55cSDimitry Andric       return UnableToLegalize;
3569349cc55cSDimitry Andric 
3570349cc55cSDimitry Andric     // TODO: We can do better than scalarizing the vector and at least split it
3571349cc55cSDimitry Andric     // in half.
3572349cc55cSDimitry Andric     return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
3573349cc55cSDimitry Andric   }
3574349cc55cSDimitry Andric 
3575349cc55cSDimitry Andric   unsigned MemSizeInBits = MemTy.getSizeInBits();
3576349cc55cSDimitry Andric   uint64_t LargeSplitSize, SmallSplitSize;
3577349cc55cSDimitry Andric 
3578349cc55cSDimitry Andric   if (!isPowerOf2_32(MemSizeInBits)) {
357906c3fb27SDimitry Andric     LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
3580349cc55cSDimitry Andric     SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3581349cc55cSDimitry Andric   } else {
3582349cc55cSDimitry Andric     auto &Ctx = MF.getFunction().getContext();
3583349cc55cSDimitry Andric     if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
35848bcb0991SDimitry Andric       return UnableToLegalize; // Don't know what we're being asked to do.
35858bcb0991SDimitry Andric 
3586349cc55cSDimitry Andric     SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3587349cc55cSDimitry Andric   }
3588349cc55cSDimitry Andric 
3589fe6060f1SDimitry Andric   // Extend to the next pow-2. If this store was itself the result of lowering,
3590fe6060f1SDimitry Andric   // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3591349cc55cSDimitry Andric   // that's wider than the stored size.
3592349cc55cSDimitry Andric   unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
3593349cc55cSDimitry Andric   const LLT NewSrcTy = LLT::scalar(AnyExtSize);
3594349cc55cSDimitry Andric 
3595349cc55cSDimitry Andric   if (SrcTy.isPointer()) {
3596349cc55cSDimitry Andric     const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
3597349cc55cSDimitry Andric     SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
3598349cc55cSDimitry Andric   }
3599349cc55cSDimitry Andric 
3600fe6060f1SDimitry Andric   auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
36018bcb0991SDimitry Andric 
36028bcb0991SDimitry Andric   // Obtain the smaller value by shifting away the larger value.
3603fe6060f1SDimitry Andric   auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3604fe6060f1SDimitry Andric   auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
36058bcb0991SDimitry Andric 
3606480093f4SDimitry Andric   // Generate the PtrAdd and truncating stores.
36078bcb0991SDimitry Andric   LLT PtrTy = MRI.getType(PtrReg);
36085ffd83dbSDimitry Andric   auto OffsetCst = MIRBuilder.buildConstant(
36095ffd83dbSDimitry Andric     LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3610480093f4SDimitry Andric   auto SmallPtr =
3611349cc55cSDimitry Andric     MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
36128bcb0991SDimitry Andric 
36138bcb0991SDimitry Andric   MachineMemOperand *LargeMMO =
36148bcb0991SDimitry Andric     MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
36158bcb0991SDimitry Andric   MachineMemOperand *SmallMMO =
36168bcb0991SDimitry Andric     MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3617fe6060f1SDimitry Andric   MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3618fe6060f1SDimitry Andric   MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3619fe6060f1SDimitry Andric   StoreMI.eraseFromParent();
36208bcb0991SDimitry Andric   return Legalized;
36218bcb0991SDimitry Andric }
3622e8d8bef9SDimitry Andric 
3623e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
3624e8d8bef9SDimitry Andric LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3625e8d8bef9SDimitry Andric   switch (MI.getOpcode()) {
3626e8d8bef9SDimitry Andric   case TargetOpcode::G_LOAD: {
3627e8d8bef9SDimitry Andric     if (TypeIdx != 0)
3628e8d8bef9SDimitry Andric       return UnableToLegalize;
3629fe6060f1SDimitry Andric     MachineMemOperand &MMO = **MI.memoperands_begin();
3630fe6060f1SDimitry Andric 
3631fe6060f1SDimitry Andric     // Not sure how to interpret a bitcast of an extending load.
3632fe6060f1SDimitry Andric     if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3633fe6060f1SDimitry Andric       return UnableToLegalize;
3634e8d8bef9SDimitry Andric 
3635e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
3636e8d8bef9SDimitry Andric     bitcastDst(MI, CastTy, 0);
3637fe6060f1SDimitry Andric     MMO.setType(CastTy);
3638e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
3639e8d8bef9SDimitry Andric     return Legalized;
3640e8d8bef9SDimitry Andric   }
3641e8d8bef9SDimitry Andric   case TargetOpcode::G_STORE: {
3642e8d8bef9SDimitry Andric     if (TypeIdx != 0)
3643e8d8bef9SDimitry Andric       return UnableToLegalize;
3644e8d8bef9SDimitry Andric 
3645fe6060f1SDimitry Andric     MachineMemOperand &MMO = **MI.memoperands_begin();
3646fe6060f1SDimitry Andric 
3647fe6060f1SDimitry Andric     // Not sure how to interpret a bitcast of a truncating store.
3648fe6060f1SDimitry Andric     if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3649fe6060f1SDimitry Andric       return UnableToLegalize;
3650fe6060f1SDimitry Andric 
3651e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
3652e8d8bef9SDimitry Andric     bitcastSrc(MI, CastTy, 0);
3653fe6060f1SDimitry Andric     MMO.setType(CastTy);
3654e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
3655e8d8bef9SDimitry Andric     return Legalized;
3656e8d8bef9SDimitry Andric   }
3657e8d8bef9SDimitry Andric   case TargetOpcode::G_SELECT: {
3658e8d8bef9SDimitry Andric     if (TypeIdx != 0)
3659e8d8bef9SDimitry Andric       return UnableToLegalize;
3660e8d8bef9SDimitry Andric 
3661e8d8bef9SDimitry Andric     if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3662e8d8bef9SDimitry Andric       LLVM_DEBUG(
3663e8d8bef9SDimitry Andric           dbgs() << "bitcast action not implemented for vector select\n");
3664e8d8bef9SDimitry Andric       return UnableToLegalize;
3665e8d8bef9SDimitry Andric     }
3666e8d8bef9SDimitry Andric 
3667e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
3668e8d8bef9SDimitry Andric     bitcastSrc(MI, CastTy, 2);
3669e8d8bef9SDimitry Andric     bitcastSrc(MI, CastTy, 3);
3670e8d8bef9SDimitry Andric     bitcastDst(MI, CastTy, 0);
3671e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
3672e8d8bef9SDimitry Andric     return Legalized;
3673e8d8bef9SDimitry Andric   }
3674e8d8bef9SDimitry Andric   case TargetOpcode::G_AND:
3675e8d8bef9SDimitry Andric   case TargetOpcode::G_OR:
3676e8d8bef9SDimitry Andric   case TargetOpcode::G_XOR: {
3677e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
3678e8d8bef9SDimitry Andric     bitcastSrc(MI, CastTy, 1);
3679e8d8bef9SDimitry Andric     bitcastSrc(MI, CastTy, 2);
3680e8d8bef9SDimitry Andric     bitcastDst(MI, CastTy, 0);
3681e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
3682e8d8bef9SDimitry Andric     return Legalized;
3683e8d8bef9SDimitry Andric   }
3684e8d8bef9SDimitry Andric   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3685e8d8bef9SDimitry Andric     return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3686e8d8bef9SDimitry Andric   case TargetOpcode::G_INSERT_VECTOR_ELT:
3687e8d8bef9SDimitry Andric     return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3688e8d8bef9SDimitry Andric   default:
3689e8d8bef9SDimitry Andric     return UnableToLegalize;
3690e8d8bef9SDimitry Andric   }
3691e8d8bef9SDimitry Andric }
3692e8d8bef9SDimitry Andric 
3693e8d8bef9SDimitry Andric // Legalize an instruction by changing the opcode in place.
3694e8d8bef9SDimitry Andric void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3695e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
3696e8d8bef9SDimitry Andric     MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3697e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
3698e8d8bef9SDimitry Andric }
3699e8d8bef9SDimitry Andric 
3700e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
3701e8d8bef9SDimitry Andric LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3702e8d8bef9SDimitry Andric   using namespace TargetOpcode;
3703e8d8bef9SDimitry Andric 
3704e8d8bef9SDimitry Andric   switch(MI.getOpcode()) {
3705e8d8bef9SDimitry Andric   default:
3706e8d8bef9SDimitry Andric     return UnableToLegalize;
370706c3fb27SDimitry Andric   case TargetOpcode::G_FCONSTANT:
370806c3fb27SDimitry Andric     return lowerFConstant(MI);
3709e8d8bef9SDimitry Andric   case TargetOpcode::G_BITCAST:
3710e8d8bef9SDimitry Andric     return lowerBitcast(MI);
3711e8d8bef9SDimitry Andric   case TargetOpcode::G_SREM:
3712e8d8bef9SDimitry Andric   case TargetOpcode::G_UREM: {
3713e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3714e8d8bef9SDimitry Andric     auto Quot =
3715e8d8bef9SDimitry Andric         MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3716e8d8bef9SDimitry Andric                               {MI.getOperand(1), MI.getOperand(2)});
3717e8d8bef9SDimitry Andric 
3718e8d8bef9SDimitry Andric     auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3719e8d8bef9SDimitry Andric     MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3720e8d8bef9SDimitry Andric     MI.eraseFromParent();
3721e8d8bef9SDimitry Andric     return Legalized;
3722e8d8bef9SDimitry Andric   }
3723e8d8bef9SDimitry Andric   case TargetOpcode::G_SADDO:
3724e8d8bef9SDimitry Andric   case TargetOpcode::G_SSUBO:
3725e8d8bef9SDimitry Andric     return lowerSADDO_SSUBO(MI);
3726e8d8bef9SDimitry Andric   case TargetOpcode::G_UMULH:
3727e8d8bef9SDimitry Andric   case TargetOpcode::G_SMULH:
3728e8d8bef9SDimitry Andric     return lowerSMULH_UMULH(MI);
3729e8d8bef9SDimitry Andric   case TargetOpcode::G_SMULO:
3730e8d8bef9SDimitry Andric   case TargetOpcode::G_UMULO: {
3731e8d8bef9SDimitry Andric     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3732e8d8bef9SDimitry Andric     // result.
373306c3fb27SDimitry Andric     auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
3734e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(Res);
3735e8d8bef9SDimitry Andric 
3736e8d8bef9SDimitry Andric     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3737e8d8bef9SDimitry Andric                           ? TargetOpcode::G_SMULH
3738e8d8bef9SDimitry Andric                           : TargetOpcode::G_UMULH;
3739e8d8bef9SDimitry Andric 
3740e8d8bef9SDimitry Andric     Observer.changingInstr(MI);
3741e8d8bef9SDimitry Andric     const auto &TII = MIRBuilder.getTII();
3742e8d8bef9SDimitry Andric     MI.setDesc(TII.get(TargetOpcode::G_MUL));
374381ad6265SDimitry Andric     MI.removeOperand(1);
3744e8d8bef9SDimitry Andric     Observer.changedInstr(MI);
3745e8d8bef9SDimitry Andric 
3746e8d8bef9SDimitry Andric     auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3747e8d8bef9SDimitry Andric     auto Zero = MIRBuilder.buildConstant(Ty, 0);
3748e8d8bef9SDimitry Andric 
3749e8d8bef9SDimitry Andric     // Move insert point forward so we can use the Res register if needed.
3750e8d8bef9SDimitry Andric     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3751e8d8bef9SDimitry Andric 
3752e8d8bef9SDimitry Andric     // For *signed* multiply, overflow is detected by checking:
3753e8d8bef9SDimitry Andric     // (hi != (lo >> bitwidth-1))
3754e8d8bef9SDimitry Andric     if (Opcode == TargetOpcode::G_SMULH) {
3755e8d8bef9SDimitry Andric       auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3756e8d8bef9SDimitry Andric       auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3757e8d8bef9SDimitry Andric       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3758e8d8bef9SDimitry Andric     } else {
3759e8d8bef9SDimitry Andric       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3760e8d8bef9SDimitry Andric     }
3761e8d8bef9SDimitry Andric     return Legalized;
3762e8d8bef9SDimitry Andric   }
3763e8d8bef9SDimitry Andric   case TargetOpcode::G_FNEG: {
376406c3fb27SDimitry Andric     auto [Res, SubByReg] = MI.getFirst2Regs();
3765e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(Res);
3766e8d8bef9SDimitry Andric 
3767e8d8bef9SDimitry Andric     // TODO: Handle vector types once we are able to
3768e8d8bef9SDimitry Andric     // represent them.
3769e8d8bef9SDimitry Andric     if (Ty.isVector())
3770e8d8bef9SDimitry Andric       return UnableToLegalize;
3771e8d8bef9SDimitry Andric     auto SignMask =
3772e8d8bef9SDimitry Andric         MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
3773e8d8bef9SDimitry Andric     MIRBuilder.buildXor(Res, SubByReg, SignMask);
3774e8d8bef9SDimitry Andric     MI.eraseFromParent();
3775e8d8bef9SDimitry Andric     return Legalized;
3776e8d8bef9SDimitry Andric   }
3777bdd1243dSDimitry Andric   case TargetOpcode::G_FSUB:
3778bdd1243dSDimitry Andric   case TargetOpcode::G_STRICT_FSUB: {
377906c3fb27SDimitry Andric     auto [Res, LHS, RHS] = MI.getFirst3Regs();
3780e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(Res);
3781e8d8bef9SDimitry Andric 
3782e8d8bef9SDimitry Andric     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3783bdd1243dSDimitry Andric     auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
3784bdd1243dSDimitry Andric 
3785bdd1243dSDimitry Andric     if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3786bdd1243dSDimitry Andric       MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
3787bdd1243dSDimitry Andric     else
3788e8d8bef9SDimitry Andric       MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3789bdd1243dSDimitry Andric 
3790e8d8bef9SDimitry Andric     MI.eraseFromParent();
3791e8d8bef9SDimitry Andric     return Legalized;
3792e8d8bef9SDimitry Andric   }
3793e8d8bef9SDimitry Andric   case TargetOpcode::G_FMAD:
3794e8d8bef9SDimitry Andric     return lowerFMad(MI);
3795e8d8bef9SDimitry Andric   case TargetOpcode::G_FFLOOR:
3796e8d8bef9SDimitry Andric     return lowerFFloor(MI);
3797e8d8bef9SDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUND:
3798e8d8bef9SDimitry Andric     return lowerIntrinsicRound(MI);
37995f757f3fSDimitry Andric   case TargetOpcode::G_FRINT: {
3800e8d8bef9SDimitry Andric     // Since round even is the assumed rounding mode for unconstrained FP
3801e8d8bef9SDimitry Andric     // operations, rint and roundeven are the same operation.
38025f757f3fSDimitry Andric     changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
3803e8d8bef9SDimitry Andric     return Legalized;
3804e8d8bef9SDimitry Andric   }
3805e8d8bef9SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
380606c3fb27SDimitry Andric     auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
3807e8d8bef9SDimitry Andric     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
3808e8d8bef9SDimitry Andric                                   **MI.memoperands_begin());
3809e8d8bef9SDimitry Andric     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
3810e8d8bef9SDimitry Andric     MI.eraseFromParent();
3811e8d8bef9SDimitry Andric     return Legalized;
3812e8d8bef9SDimitry Andric   }
3813e8d8bef9SDimitry Andric   case TargetOpcode::G_LOAD:
3814e8d8bef9SDimitry Andric   case TargetOpcode::G_SEXTLOAD:
3815e8d8bef9SDimitry Andric   case TargetOpcode::G_ZEXTLOAD:
3816fe6060f1SDimitry Andric     return lowerLoad(cast<GAnyLoad>(MI));
3817e8d8bef9SDimitry Andric   case TargetOpcode::G_STORE:
3818fe6060f1SDimitry Andric     return lowerStore(cast<GStore>(MI));
38190b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
38200b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
38210b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ:
38220b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ:
38230b57cec5SDimitry Andric   case TargetOpcode::G_CTPOP:
3824e8d8bef9SDimitry Andric     return lowerBitCount(MI);
38250b57cec5SDimitry Andric   case G_UADDO: {
382606c3fb27SDimitry Andric     auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
38270b57cec5SDimitry Andric 
38280b57cec5SDimitry Andric     MIRBuilder.buildAdd(Res, LHS, RHS);
38290b57cec5SDimitry Andric     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
38300b57cec5SDimitry Andric 
38310b57cec5SDimitry Andric     MI.eraseFromParent();
38320b57cec5SDimitry Andric     return Legalized;
38330b57cec5SDimitry Andric   }
38340b57cec5SDimitry Andric   case G_UADDE: {
383506c3fb27SDimitry Andric     auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
38365f757f3fSDimitry Andric     const LLT CondTy = MRI.getType(CarryOut);
38375f757f3fSDimitry Andric     const LLT Ty = MRI.getType(Res);
38380b57cec5SDimitry Andric 
38395f757f3fSDimitry Andric     // Initial add of the two operands.
38405ffd83dbSDimitry Andric     auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
38415f757f3fSDimitry Andric 
38425f757f3fSDimitry Andric     // Initial check for carry.
38435f757f3fSDimitry Andric     auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
38445f757f3fSDimitry Andric 
38455f757f3fSDimitry Andric     // Add the sum and the carry.
38465ffd83dbSDimitry Andric     auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
38470b57cec5SDimitry Andric     MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
38485f757f3fSDimitry Andric 
38495f757f3fSDimitry Andric     // Second check for carry. We can only carry if the initial sum is all 1s
38505f757f3fSDimitry Andric     // and the carry is set, resulting in a new sum of 0.
38515f757f3fSDimitry Andric     auto Zero = MIRBuilder.buildConstant(Ty, 0);
38525f757f3fSDimitry Andric     auto ResEqZero = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, Res, Zero);
38535f757f3fSDimitry Andric     auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
38545f757f3fSDimitry Andric     MIRBuilder.buildOr(CarryOut, Carry, Carry2);
38550b57cec5SDimitry Andric 
38560b57cec5SDimitry Andric     MI.eraseFromParent();
38570b57cec5SDimitry Andric     return Legalized;
38580b57cec5SDimitry Andric   }
38590b57cec5SDimitry Andric   case G_USUBO: {
386006c3fb27SDimitry Andric     auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
38610b57cec5SDimitry Andric 
38620b57cec5SDimitry Andric     MIRBuilder.buildSub(Res, LHS, RHS);
38630b57cec5SDimitry Andric     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
38640b57cec5SDimitry Andric 
38650b57cec5SDimitry Andric     MI.eraseFromParent();
38660b57cec5SDimitry Andric     return Legalized;
38670b57cec5SDimitry Andric   }
38680b57cec5SDimitry Andric   case G_USUBE: {
386906c3fb27SDimitry Andric     auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
38705ffd83dbSDimitry Andric     const LLT CondTy = MRI.getType(BorrowOut);
38715ffd83dbSDimitry Andric     const LLT Ty = MRI.getType(Res);
38720b57cec5SDimitry Andric 
38735f757f3fSDimitry Andric     // Initial subtract of the two operands.
38745ffd83dbSDimitry Andric     auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
38755f757f3fSDimitry Andric 
38765f757f3fSDimitry Andric     // Initial check for borrow.
38775f757f3fSDimitry Andric     auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
38785f757f3fSDimitry Andric 
38795f757f3fSDimitry Andric     // Subtract the borrow from the first subtract.
38805ffd83dbSDimitry Andric     auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
38810b57cec5SDimitry Andric     MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
38825ffd83dbSDimitry Andric 
38835f757f3fSDimitry Andric     // Second check for borrow. We can only borrow if the initial difference is
38845f757f3fSDimitry Andric     // 0 and the borrow is set, resulting in a new difference of all 1s.
38855f757f3fSDimitry Andric     auto Zero = MIRBuilder.buildConstant(Ty, 0);
38865f757f3fSDimitry Andric     auto TmpResEqZero =
38875f757f3fSDimitry Andric         MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
38885f757f3fSDimitry Andric     auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
38895f757f3fSDimitry Andric     MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
38900b57cec5SDimitry Andric 
38910b57cec5SDimitry Andric     MI.eraseFromParent();
38920b57cec5SDimitry Andric     return Legalized;
38930b57cec5SDimitry Andric   }
38940b57cec5SDimitry Andric   case G_UITOFP:
3895e8d8bef9SDimitry Andric     return lowerUITOFP(MI);
38960b57cec5SDimitry Andric   case G_SITOFP:
3897e8d8bef9SDimitry Andric     return lowerSITOFP(MI);
38988bcb0991SDimitry Andric   case G_FPTOUI:
3899e8d8bef9SDimitry Andric     return lowerFPTOUI(MI);
39005ffd83dbSDimitry Andric   case G_FPTOSI:
39015ffd83dbSDimitry Andric     return lowerFPTOSI(MI);
39025ffd83dbSDimitry Andric   case G_FPTRUNC:
3903e8d8bef9SDimitry Andric     return lowerFPTRUNC(MI);
3904e8d8bef9SDimitry Andric   case G_FPOWI:
3905e8d8bef9SDimitry Andric     return lowerFPOWI(MI);
39060b57cec5SDimitry Andric   case G_SMIN:
39070b57cec5SDimitry Andric   case G_SMAX:
39080b57cec5SDimitry Andric   case G_UMIN:
39090b57cec5SDimitry Andric   case G_UMAX:
3910e8d8bef9SDimitry Andric     return lowerMinMax(MI);
39110b57cec5SDimitry Andric   case G_FCOPYSIGN:
3912e8d8bef9SDimitry Andric     return lowerFCopySign(MI);
39130b57cec5SDimitry Andric   case G_FMINNUM:
39140b57cec5SDimitry Andric   case G_FMAXNUM:
39150b57cec5SDimitry Andric     return lowerFMinNumMaxNum(MI);
39165ffd83dbSDimitry Andric   case G_MERGE_VALUES:
39175ffd83dbSDimitry Andric     return lowerMergeValues(MI);
39188bcb0991SDimitry Andric   case G_UNMERGE_VALUES:
39198bcb0991SDimitry Andric     return lowerUnmergeValues(MI);
39208bcb0991SDimitry Andric   case TargetOpcode::G_SEXT_INREG: {
39218bcb0991SDimitry Andric     assert(MI.getOperand(2).isImm() && "Expected immediate");
39228bcb0991SDimitry Andric     int64_t SizeInBits = MI.getOperand(2).getImm();
39238bcb0991SDimitry Andric 
392406c3fb27SDimitry Andric     auto [DstReg, SrcReg] = MI.getFirst2Regs();
39258bcb0991SDimitry Andric     LLT DstTy = MRI.getType(DstReg);
39268bcb0991SDimitry Andric     Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
39278bcb0991SDimitry Andric 
39288bcb0991SDimitry Andric     auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
39295ffd83dbSDimitry Andric     MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
39305ffd83dbSDimitry Andric     MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
39318bcb0991SDimitry Andric     MI.eraseFromParent();
39328bcb0991SDimitry Andric     return Legalized;
39338bcb0991SDimitry Andric   }
3934e8d8bef9SDimitry Andric   case G_EXTRACT_VECTOR_ELT:
3935e8d8bef9SDimitry Andric   case G_INSERT_VECTOR_ELT:
3936e8d8bef9SDimitry Andric     return lowerExtractInsertVectorElt(MI);
39378bcb0991SDimitry Andric   case G_SHUFFLE_VECTOR:
39388bcb0991SDimitry Andric     return lowerShuffleVector(MI);
39398bcb0991SDimitry Andric   case G_DYN_STACKALLOC:
39408bcb0991SDimitry Andric     return lowerDynStackAlloc(MI);
39415f757f3fSDimitry Andric   case G_STACKSAVE:
39425f757f3fSDimitry Andric     return lowerStackSave(MI);
39435f757f3fSDimitry Andric   case G_STACKRESTORE:
39445f757f3fSDimitry Andric     return lowerStackRestore(MI);
39458bcb0991SDimitry Andric   case G_EXTRACT:
39468bcb0991SDimitry Andric     return lowerExtract(MI);
39478bcb0991SDimitry Andric   case G_INSERT:
39488bcb0991SDimitry Andric     return lowerInsert(MI);
3949480093f4SDimitry Andric   case G_BSWAP:
3950480093f4SDimitry Andric     return lowerBswap(MI);
3951480093f4SDimitry Andric   case G_BITREVERSE:
3952480093f4SDimitry Andric     return lowerBitreverse(MI);
3953480093f4SDimitry Andric   case G_READ_REGISTER:
39545ffd83dbSDimitry Andric   case G_WRITE_REGISTER:
39555ffd83dbSDimitry Andric     return lowerReadWriteRegister(MI);
3956e8d8bef9SDimitry Andric   case G_UADDSAT:
3957e8d8bef9SDimitry Andric   case G_USUBSAT: {
3958e8d8bef9SDimitry Andric     // Try to make a reasonable guess about which lowering strategy to use. The
3959e8d8bef9SDimitry Andric     // target can override this with custom lowering and calling the
3960e8d8bef9SDimitry Andric     // implementation functions.
3961e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3962e8d8bef9SDimitry Andric     if (LI.isLegalOrCustom({G_UMIN, Ty}))
3963e8d8bef9SDimitry Andric       return lowerAddSubSatToMinMax(MI);
3964e8d8bef9SDimitry Andric     return lowerAddSubSatToAddoSubo(MI);
39650b57cec5SDimitry Andric   }
3966e8d8bef9SDimitry Andric   case G_SADDSAT:
3967e8d8bef9SDimitry Andric   case G_SSUBSAT: {
3968e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3969e8d8bef9SDimitry Andric 
3970e8d8bef9SDimitry Andric     // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3971e8d8bef9SDimitry Andric     // since it's a shorter expansion. However, we would need to figure out the
3972e8d8bef9SDimitry Andric     // preferred boolean type for the carry out for the query.
3973e8d8bef9SDimitry Andric     if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3974e8d8bef9SDimitry Andric       return lowerAddSubSatToMinMax(MI);
3975e8d8bef9SDimitry Andric     return lowerAddSubSatToAddoSubo(MI);
3976e8d8bef9SDimitry Andric   }
3977e8d8bef9SDimitry Andric   case G_SSHLSAT:
3978e8d8bef9SDimitry Andric   case G_USHLSAT:
3979e8d8bef9SDimitry Andric     return lowerShlSat(MI);
3980fe6060f1SDimitry Andric   case G_ABS:
3981fe6060f1SDimitry Andric     return lowerAbsToAddXor(MI);
3982e8d8bef9SDimitry Andric   case G_SELECT:
3983e8d8bef9SDimitry Andric     return lowerSelect(MI);
3984bdd1243dSDimitry Andric   case G_IS_FPCLASS:
3985bdd1243dSDimitry Andric     return lowerISFPCLASS(MI);
3986fe6060f1SDimitry Andric   case G_SDIVREM:
3987fe6060f1SDimitry Andric   case G_UDIVREM:
3988fe6060f1SDimitry Andric     return lowerDIVREM(MI);
3989fe6060f1SDimitry Andric   case G_FSHL:
3990fe6060f1SDimitry Andric   case G_FSHR:
3991fe6060f1SDimitry Andric     return lowerFunnelShift(MI);
3992fe6060f1SDimitry Andric   case G_ROTL:
3993fe6060f1SDimitry Andric   case G_ROTR:
3994fe6060f1SDimitry Andric     return lowerRotate(MI);
3995349cc55cSDimitry Andric   case G_MEMSET:
3996349cc55cSDimitry Andric   case G_MEMCPY:
3997349cc55cSDimitry Andric   case G_MEMMOVE:
3998349cc55cSDimitry Andric     return lowerMemCpyFamily(MI);
3999349cc55cSDimitry Andric   case G_MEMCPY_INLINE:
4000349cc55cSDimitry Andric     return lowerMemcpyInline(MI);
40015f757f3fSDimitry Andric   case G_ZEXT:
40025f757f3fSDimitry Andric   case G_SEXT:
40035f757f3fSDimitry Andric   case G_ANYEXT:
40045f757f3fSDimitry Andric     return lowerEXT(MI);
40055f757f3fSDimitry Andric   case G_TRUNC:
40065f757f3fSDimitry Andric     return lowerTRUNC(MI);
4007349cc55cSDimitry Andric   GISEL_VECREDUCE_CASES_NONSEQ
4008349cc55cSDimitry Andric     return lowerVectorReduction(MI);
40095f757f3fSDimitry Andric   case G_VAARG:
40105f757f3fSDimitry Andric     return lowerVAArg(MI);
4011e8d8bef9SDimitry Andric   }
4012e8d8bef9SDimitry Andric }
4013e8d8bef9SDimitry Andric 
4014e8d8bef9SDimitry Andric Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
4015e8d8bef9SDimitry Andric                                                   Align MinAlign) const {
4016e8d8bef9SDimitry Andric   // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4017e8d8bef9SDimitry Andric   // datalayout for the preferred alignment. Also there should be a target hook
4018e8d8bef9SDimitry Andric   // for this to allow targets to reduce the alignment and ignore the
4019e8d8bef9SDimitry Andric   // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4020e8d8bef9SDimitry Andric   // the type.
4021e8d8bef9SDimitry Andric   return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4022e8d8bef9SDimitry Andric }
4023e8d8bef9SDimitry Andric 
4024e8d8bef9SDimitry Andric MachineInstrBuilder
4025e8d8bef9SDimitry Andric LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
4026e8d8bef9SDimitry Andric                                       MachinePointerInfo &PtrInfo) {
4027e8d8bef9SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
4028e8d8bef9SDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
4029e8d8bef9SDimitry Andric   int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4030e8d8bef9SDimitry Andric 
4031e8d8bef9SDimitry Andric   unsigned AddrSpace = DL.getAllocaAddrSpace();
4032e8d8bef9SDimitry Andric   LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4033e8d8bef9SDimitry Andric 
4034e8d8bef9SDimitry Andric   PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4035e8d8bef9SDimitry Andric   return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4036e8d8bef9SDimitry Andric }
4037e8d8bef9SDimitry Andric 
4038e8d8bef9SDimitry Andric static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
4039e8d8bef9SDimitry Andric                                         LLT VecTy) {
4040e8d8bef9SDimitry Andric   int64_t IdxVal;
4041e8d8bef9SDimitry Andric   if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
4042e8d8bef9SDimitry Andric     return IdxReg;
4043e8d8bef9SDimitry Andric 
4044e8d8bef9SDimitry Andric   LLT IdxTy = B.getMRI()->getType(IdxReg);
4045e8d8bef9SDimitry Andric   unsigned NElts = VecTy.getNumElements();
4046e8d8bef9SDimitry Andric   if (isPowerOf2_32(NElts)) {
4047e8d8bef9SDimitry Andric     APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4048e8d8bef9SDimitry Andric     return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4049e8d8bef9SDimitry Andric   }
4050e8d8bef9SDimitry Andric 
4051e8d8bef9SDimitry Andric   return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4052e8d8bef9SDimitry Andric       .getReg(0);
4053e8d8bef9SDimitry Andric }
4054e8d8bef9SDimitry Andric 
4055e8d8bef9SDimitry Andric Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
4056e8d8bef9SDimitry Andric                                                   Register Index) {
4057e8d8bef9SDimitry Andric   LLT EltTy = VecTy.getElementType();
4058e8d8bef9SDimitry Andric 
4059e8d8bef9SDimitry Andric   // Calculate the element offset and add it to the pointer.
4060e8d8bef9SDimitry Andric   unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4061e8d8bef9SDimitry Andric   assert(EltSize * 8 == EltTy.getSizeInBits() &&
4062e8d8bef9SDimitry Andric          "Converting bits to bytes lost precision");
4063e8d8bef9SDimitry Andric 
4064e8d8bef9SDimitry Andric   Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
4065e8d8bef9SDimitry Andric 
4066e8d8bef9SDimitry Andric   LLT IdxTy = MRI.getType(Index);
4067e8d8bef9SDimitry Andric   auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4068e8d8bef9SDimitry Andric                                  MIRBuilder.buildConstant(IdxTy, EltSize));
4069e8d8bef9SDimitry Andric 
4070e8d8bef9SDimitry Andric   LLT PtrTy = MRI.getType(VecPtr);
4071e8d8bef9SDimitry Andric   return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
40720b57cec5SDimitry Andric }
40730b57cec5SDimitry Andric 
40740eae32dcSDimitry Andric #ifndef NDEBUG
40750eae32dcSDimitry Andric /// Check that all vector operands have same number of elements. Other operands
40760eae32dcSDimitry Andric /// should be listed in NonVecOp.
40770eae32dcSDimitry Andric static bool hasSameNumEltsOnAllVectorOperands(
40780eae32dcSDimitry Andric     GenericMachineInstr &MI, MachineRegisterInfo &MRI,
40790eae32dcSDimitry Andric     std::initializer_list<unsigned> NonVecOpIndices) {
40800eae32dcSDimitry Andric   if (MI.getNumMemOperands() != 0)
40810eae32dcSDimitry Andric     return false;
40820b57cec5SDimitry Andric 
40830eae32dcSDimitry Andric   LLT VecTy = MRI.getType(MI.getReg(0));
40840eae32dcSDimitry Andric   if (!VecTy.isVector())
40850eae32dcSDimitry Andric     return false;
40860eae32dcSDimitry Andric   unsigned NumElts = VecTy.getNumElements();
40870b57cec5SDimitry Andric 
40880eae32dcSDimitry Andric   for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
40890eae32dcSDimitry Andric     MachineOperand &Op = MI.getOperand(OpIdx);
40900eae32dcSDimitry Andric     if (!Op.isReg()) {
40910eae32dcSDimitry Andric       if (!is_contained(NonVecOpIndices, OpIdx))
40920eae32dcSDimitry Andric         return false;
40930eae32dcSDimitry Andric       continue;
40940eae32dcSDimitry Andric     }
40950b57cec5SDimitry Andric 
40960eae32dcSDimitry Andric     LLT Ty = MRI.getType(Op.getReg());
40970eae32dcSDimitry Andric     if (!Ty.isVector()) {
40980eae32dcSDimitry Andric       if (!is_contained(NonVecOpIndices, OpIdx))
40990eae32dcSDimitry Andric         return false;
41000eae32dcSDimitry Andric       continue;
41010eae32dcSDimitry Andric     }
41020eae32dcSDimitry Andric 
41030eae32dcSDimitry Andric     if (Ty.getNumElements() != NumElts)
41040eae32dcSDimitry Andric       return false;
41050eae32dcSDimitry Andric   }
41060eae32dcSDimitry Andric 
41070eae32dcSDimitry Andric   return true;
41080eae32dcSDimitry Andric }
41090eae32dcSDimitry Andric #endif
41100eae32dcSDimitry Andric 
41110eae32dcSDimitry Andric /// Fill \p DstOps with DstOps that have same number of elements combined as
41120eae32dcSDimitry Andric /// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
41130eae32dcSDimitry Andric /// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
41140eae32dcSDimitry Andric /// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
41150eae32dcSDimitry Andric static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
41160eae32dcSDimitry Andric                        unsigned NumElts) {
41170eae32dcSDimitry Andric   LLT LeftoverTy;
41180eae32dcSDimitry Andric   assert(Ty.isVector() && "Expected vector type");
41190eae32dcSDimitry Andric   LLT EltTy = Ty.getElementType();
41200eae32dcSDimitry Andric   LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
41210eae32dcSDimitry Andric   int NumParts, NumLeftover;
41220eae32dcSDimitry Andric   std::tie(NumParts, NumLeftover) =
41230eae32dcSDimitry Andric       getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
41240eae32dcSDimitry Andric 
41250eae32dcSDimitry Andric   assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
41260eae32dcSDimitry Andric   for (int i = 0; i < NumParts; ++i) {
41270eae32dcSDimitry Andric     DstOps.push_back(NarrowTy);
41280eae32dcSDimitry Andric   }
41290eae32dcSDimitry Andric 
41300eae32dcSDimitry Andric   if (LeftoverTy.isValid()) {
41310eae32dcSDimitry Andric     assert(NumLeftover == 1 && "expected exactly one leftover");
41320eae32dcSDimitry Andric     DstOps.push_back(LeftoverTy);
41330eae32dcSDimitry Andric   }
41340eae32dcSDimitry Andric }
41350eae32dcSDimitry Andric 
41360eae32dcSDimitry Andric /// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
41370eae32dcSDimitry Andric /// made from \p Op depending on operand type.
41380eae32dcSDimitry Andric static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
41390eae32dcSDimitry Andric                            MachineOperand &Op) {
41400eae32dcSDimitry Andric   for (unsigned i = 0; i < N; ++i) {
41410eae32dcSDimitry Andric     if (Op.isReg())
41420eae32dcSDimitry Andric       Ops.push_back(Op.getReg());
41430eae32dcSDimitry Andric     else if (Op.isImm())
41440eae32dcSDimitry Andric       Ops.push_back(Op.getImm());
41450eae32dcSDimitry Andric     else if (Op.isPredicate())
41460eae32dcSDimitry Andric       Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
41470eae32dcSDimitry Andric     else
41480eae32dcSDimitry Andric       llvm_unreachable("Unsupported type");
41490eae32dcSDimitry Andric   }
41500b57cec5SDimitry Andric }
41510b57cec5SDimitry Andric 
41520b57cec5SDimitry Andric // Handle splitting vector operations which need to have the same number of
41530b57cec5SDimitry Andric // elements in each type index, but each type index may have a different element
41540b57cec5SDimitry Andric // type.
41550b57cec5SDimitry Andric //
41560b57cec5SDimitry Andric // e.g.  <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
41570b57cec5SDimitry Andric //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
41580b57cec5SDimitry Andric //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
41590b57cec5SDimitry Andric //
41600b57cec5SDimitry Andric // Also handles some irregular breakdown cases, e.g.
41610b57cec5SDimitry Andric // e.g.  <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
41620b57cec5SDimitry Andric //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
41630b57cec5SDimitry Andric //             s64 = G_SHL s64, s32
41640b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
41650b57cec5SDimitry Andric LegalizerHelper::fewerElementsVectorMultiEltType(
41660eae32dcSDimitry Andric     GenericMachineInstr &MI, unsigned NumElts,
41670eae32dcSDimitry Andric     std::initializer_list<unsigned> NonVecOpIndices) {
41680eae32dcSDimitry Andric   assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
41690eae32dcSDimitry Andric          "Non-compatible opcode or not specified non-vector operands");
41700eae32dcSDimitry Andric   unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
41710b57cec5SDimitry Andric 
41720eae32dcSDimitry Andric   unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
41730eae32dcSDimitry Andric   unsigned NumDefs = MI.getNumDefs();
41740b57cec5SDimitry Andric 
41750eae32dcSDimitry Andric   // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
41760eae32dcSDimitry Andric   // Build instructions with DstOps to use instruction found by CSE directly.
41770eae32dcSDimitry Andric   // CSE copies found instruction into given vreg when building with vreg dest.
41780eae32dcSDimitry Andric   SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
41790eae32dcSDimitry Andric   // Output registers will be taken from created instructions.
41800eae32dcSDimitry Andric   SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
41810eae32dcSDimitry Andric   for (unsigned i = 0; i < NumDefs; ++i) {
41820eae32dcSDimitry Andric     makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
41830b57cec5SDimitry Andric   }
41840b57cec5SDimitry Andric 
41850eae32dcSDimitry Andric   // Split vector input operands into sub-vectors with NumElts elts + Leftover.
41860eae32dcSDimitry Andric   // Operands listed in NonVecOpIndices will be used as is without splitting;
41870eae32dcSDimitry Andric   // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
41880eae32dcSDimitry Andric   // scalar condition (op 1), immediate in sext_inreg (op 2).
41890eae32dcSDimitry Andric   SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
41900eae32dcSDimitry Andric   for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
41910eae32dcSDimitry Andric        ++UseIdx, ++UseNo) {
41920eae32dcSDimitry Andric     if (is_contained(NonVecOpIndices, UseIdx)) {
41930eae32dcSDimitry Andric       broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
41940eae32dcSDimitry Andric                      MI.getOperand(UseIdx));
41950b57cec5SDimitry Andric     } else {
41960eae32dcSDimitry Andric       SmallVector<Register, 8> SplitPieces;
41970eae32dcSDimitry Andric       extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces);
41980eae32dcSDimitry Andric       for (auto Reg : SplitPieces)
41990eae32dcSDimitry Andric         InputOpsPieces[UseNo].push_back(Reg);
42000eae32dcSDimitry Andric     }
42010b57cec5SDimitry Andric   }
42020b57cec5SDimitry Andric 
42030eae32dcSDimitry Andric   unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
42040eae32dcSDimitry Andric 
42050eae32dcSDimitry Andric   // Take i-th piece of each input operand split and build sub-vector/scalar
42060eae32dcSDimitry Andric   // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
42070eae32dcSDimitry Andric   for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
42080eae32dcSDimitry Andric     SmallVector<DstOp, 2> Defs;
42090eae32dcSDimitry Andric     for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
42100eae32dcSDimitry Andric       Defs.push_back(OutputOpsPieces[DstNo][i]);
42110eae32dcSDimitry Andric 
42120eae32dcSDimitry Andric     SmallVector<SrcOp, 3> Uses;
42130eae32dcSDimitry Andric     for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
42140eae32dcSDimitry Andric       Uses.push_back(InputOpsPieces[InputNo][i]);
42150eae32dcSDimitry Andric 
42160eae32dcSDimitry Andric     auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
42170eae32dcSDimitry Andric     for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
42180eae32dcSDimitry Andric       OutputRegs[DstNo].push_back(I.getReg(DstNo));
42190b57cec5SDimitry Andric   }
42200b57cec5SDimitry Andric 
42210eae32dcSDimitry Andric   // Merge small outputs into MI's output for each def operand.
42220eae32dcSDimitry Andric   if (NumLeftovers) {
42230eae32dcSDimitry Andric     for (unsigned i = 0; i < NumDefs; ++i)
42240eae32dcSDimitry Andric       mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
42250eae32dcSDimitry Andric   } else {
42260eae32dcSDimitry Andric     for (unsigned i = 0; i < NumDefs; ++i)
4227bdd1243dSDimitry Andric       MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
42280eae32dcSDimitry Andric   }
42290b57cec5SDimitry Andric 
42300b57cec5SDimitry Andric   MI.eraseFromParent();
42310b57cec5SDimitry Andric   return Legalized;
42320b57cec5SDimitry Andric }
42330b57cec5SDimitry Andric 
42340b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
42350eae32dcSDimitry Andric LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
42360eae32dcSDimitry Andric                                         unsigned NumElts) {
42370eae32dcSDimitry Andric   unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
42380b57cec5SDimitry Andric 
42390eae32dcSDimitry Andric   unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
42400eae32dcSDimitry Andric   unsigned NumDefs = MI.getNumDefs();
42410b57cec5SDimitry Andric 
42420eae32dcSDimitry Andric   SmallVector<DstOp, 8> OutputOpsPieces;
42430eae32dcSDimitry Andric   SmallVector<Register, 8> OutputRegs;
42440eae32dcSDimitry Andric   makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
42450b57cec5SDimitry Andric 
42460eae32dcSDimitry Andric   // Instructions that perform register split will be inserted in basic block
42470eae32dcSDimitry Andric   // where register is defined (basic block is in the next operand).
42480eae32dcSDimitry Andric   SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
42490eae32dcSDimitry Andric   for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
42500eae32dcSDimitry Andric        UseIdx += 2, ++UseNo) {
42510eae32dcSDimitry Andric     MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
4252bdd1243dSDimitry Andric     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
42530eae32dcSDimitry Andric     extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
42540b57cec5SDimitry Andric   }
42550eae32dcSDimitry Andric 
42560eae32dcSDimitry Andric   // Build PHIs with fewer elements.
42570eae32dcSDimitry Andric   unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
42580eae32dcSDimitry Andric   MIRBuilder.setInsertPt(*MI.getParent(), MI);
42590eae32dcSDimitry Andric   for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
42600eae32dcSDimitry Andric     auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
42610eae32dcSDimitry Andric     Phi.addDef(
42620eae32dcSDimitry Andric         MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
42630eae32dcSDimitry Andric     OutputRegs.push_back(Phi.getReg(0));
42640eae32dcSDimitry Andric 
42650eae32dcSDimitry Andric     for (unsigned j = 0; j < NumInputs / 2; ++j) {
42660eae32dcSDimitry Andric       Phi.addUse(InputOpsPieces[j][i]);
42670eae32dcSDimitry Andric       Phi.add(MI.getOperand(1 + j * 2 + 1));
42680eae32dcSDimitry Andric     }
42690eae32dcSDimitry Andric   }
42700eae32dcSDimitry Andric 
42710eae32dcSDimitry Andric   // Merge small outputs into MI's def.
42720eae32dcSDimitry Andric   if (NumLeftovers) {
42730eae32dcSDimitry Andric     mergeMixedSubvectors(MI.getReg(0), OutputRegs);
42740eae32dcSDimitry Andric   } else {
4275bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
42760b57cec5SDimitry Andric   }
42770b57cec5SDimitry Andric 
42780b57cec5SDimitry Andric   MI.eraseFromParent();
42790b57cec5SDimitry Andric   return Legalized;
42800b57cec5SDimitry Andric }
42810b57cec5SDimitry Andric 
42820b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
42838bcb0991SDimitry Andric LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
42848bcb0991SDimitry Andric                                                   unsigned TypeIdx,
42858bcb0991SDimitry Andric                                                   LLT NarrowTy) {
42868bcb0991SDimitry Andric   const int NumDst = MI.getNumOperands() - 1;
42878bcb0991SDimitry Andric   const Register SrcReg = MI.getOperand(NumDst).getReg();
42880eae32dcSDimitry Andric   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
42898bcb0991SDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
42908bcb0991SDimitry Andric 
42910eae32dcSDimitry Andric   if (TypeIdx != 1 || NarrowTy == DstTy)
42928bcb0991SDimitry Andric     return UnableToLegalize;
42938bcb0991SDimitry Andric 
42940eae32dcSDimitry Andric   // Requires compatible types. Otherwise SrcReg should have been defined by
42950eae32dcSDimitry Andric   // merge-like instruction that would get artifact combined. Most likely
42960eae32dcSDimitry Andric   // instruction that defines SrcReg has to perform more/fewer elements
42970eae32dcSDimitry Andric   // legalization compatible with NarrowTy.
42980eae32dcSDimitry Andric   assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
42990eae32dcSDimitry Andric   assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
43008bcb0991SDimitry Andric 
43010eae32dcSDimitry Andric   if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
43020eae32dcSDimitry Andric       (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
43030eae32dcSDimitry Andric     return UnableToLegalize;
43040eae32dcSDimitry Andric 
43050eae32dcSDimitry Andric   // This is most likely DstTy (smaller then register size) packed in SrcTy
43060eae32dcSDimitry Andric   // (larger then register size) and since unmerge was not combined it will be
43070eae32dcSDimitry Andric   // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
43080eae32dcSDimitry Andric   // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
43090eae32dcSDimitry Andric 
43100eae32dcSDimitry Andric   // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
43110eae32dcSDimitry Andric   //
43120eae32dcSDimitry Andric   // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
43130eae32dcSDimitry Andric   // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
43140eae32dcSDimitry Andric   // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
43150eae32dcSDimitry Andric   auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
43168bcb0991SDimitry Andric   const int NumUnmerge = Unmerge->getNumOperands() - 1;
43178bcb0991SDimitry Andric   const int PartsPerUnmerge = NumDst / NumUnmerge;
43188bcb0991SDimitry Andric 
43198bcb0991SDimitry Andric   for (int I = 0; I != NumUnmerge; ++I) {
43208bcb0991SDimitry Andric     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
43218bcb0991SDimitry Andric 
43228bcb0991SDimitry Andric     for (int J = 0; J != PartsPerUnmerge; ++J)
43238bcb0991SDimitry Andric       MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
43248bcb0991SDimitry Andric     MIB.addUse(Unmerge.getReg(I));
43258bcb0991SDimitry Andric   }
43268bcb0991SDimitry Andric 
43278bcb0991SDimitry Andric   MI.eraseFromParent();
43288bcb0991SDimitry Andric   return Legalized;
43298bcb0991SDimitry Andric }
43308bcb0991SDimitry Andric 
4331fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
4332e8d8bef9SDimitry Andric LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
4333e8d8bef9SDimitry Andric                                           LLT NarrowTy) {
433406c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
43350eae32dcSDimitry Andric   // Requires compatible types. Otherwise user of DstReg did not perform unmerge
43360eae32dcSDimitry Andric   // that should have been artifact combined. Most likely instruction that uses
43370eae32dcSDimitry Andric   // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
43380eae32dcSDimitry Andric   assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
43390eae32dcSDimitry Andric   assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
43400eae32dcSDimitry Andric   if (NarrowTy == SrcTy)
43410eae32dcSDimitry Andric     return UnableToLegalize;
43428bcb0991SDimitry Andric 
43430eae32dcSDimitry Andric   // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
43440eae32dcSDimitry Andric   // is for old mir tests. Since the changes to more/fewer elements it should no
43450eae32dcSDimitry Andric   // longer be possible to generate MIR like this when starting from llvm-ir
43460eae32dcSDimitry Andric   // because LCMTy approach was replaced with merge/unmerge to vector elements.
43470eae32dcSDimitry Andric   if (TypeIdx == 1) {
43480eae32dcSDimitry Andric     assert(SrcTy.isVector() && "Expected vector types");
43490eae32dcSDimitry Andric     assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
43500eae32dcSDimitry Andric     if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
43510eae32dcSDimitry Andric         (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
43520eae32dcSDimitry Andric       return UnableToLegalize;
43530eae32dcSDimitry Andric     // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
43540eae32dcSDimitry Andric     //
43550eae32dcSDimitry Andric     // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
43560eae32dcSDimitry Andric     // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
43570eae32dcSDimitry Andric     // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
43580eae32dcSDimitry Andric     // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
43590eae32dcSDimitry Andric     // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
43600eae32dcSDimitry Andric     // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
4361e8d8bef9SDimitry Andric 
43620eae32dcSDimitry Andric     SmallVector<Register, 8> Elts;
43630eae32dcSDimitry Andric     LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
43640eae32dcSDimitry Andric     for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
43650eae32dcSDimitry Andric       auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
43660eae32dcSDimitry Andric       for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
43670eae32dcSDimitry Andric         Elts.push_back(Unmerge.getReg(j));
43680eae32dcSDimitry Andric     }
4369e8d8bef9SDimitry Andric 
43700eae32dcSDimitry Andric     SmallVector<Register, 8> NarrowTyElts;
43710eae32dcSDimitry Andric     unsigned NumNarrowTyElts = NarrowTy.getNumElements();
43720eae32dcSDimitry Andric     unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
43730eae32dcSDimitry Andric     for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
43740eae32dcSDimitry Andric          ++i, Offset += NumNarrowTyElts) {
43750eae32dcSDimitry Andric       ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
4376bdd1243dSDimitry Andric       NarrowTyElts.push_back(
4377bdd1243dSDimitry Andric           MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
43780eae32dcSDimitry Andric     }
4379e8d8bef9SDimitry Andric 
4380bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
43810eae32dcSDimitry Andric     MI.eraseFromParent();
43820eae32dcSDimitry Andric     return Legalized;
43830eae32dcSDimitry Andric   }
43840eae32dcSDimitry Andric 
43850eae32dcSDimitry Andric   assert(TypeIdx == 0 && "Bad type index");
43860eae32dcSDimitry Andric   if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
43870eae32dcSDimitry Andric       (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
43880eae32dcSDimitry Andric     return UnableToLegalize;
43890eae32dcSDimitry Andric 
43900eae32dcSDimitry Andric   // This is most likely SrcTy (smaller then register size) packed in DstTy
43910eae32dcSDimitry Andric   // (larger then register size) and since merge was not combined it will be
43920eae32dcSDimitry Andric   // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
43930eae32dcSDimitry Andric   // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
43940eae32dcSDimitry Andric 
43950eae32dcSDimitry Andric   // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
43960eae32dcSDimitry Andric   //
43970eae32dcSDimitry Andric   // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
43980eae32dcSDimitry Andric   // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
43990eae32dcSDimitry Andric   // %0:_(DstTy)  = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
44000eae32dcSDimitry Andric   SmallVector<Register, 8> NarrowTyElts;
44010eae32dcSDimitry Andric   unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
44020eae32dcSDimitry Andric   unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
44030eae32dcSDimitry Andric   unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
44040eae32dcSDimitry Andric   for (unsigned i = 0; i < NumParts; ++i) {
44050eae32dcSDimitry Andric     SmallVector<Register, 8> Sources;
44060eae32dcSDimitry Andric     for (unsigned j = 0; j < NumElts; ++j)
44070eae32dcSDimitry Andric       Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
4408bdd1243dSDimitry Andric     NarrowTyElts.push_back(
4409bdd1243dSDimitry Andric         MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
44100eae32dcSDimitry Andric   }
44110eae32dcSDimitry Andric 
4412bdd1243dSDimitry Andric   MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4413e8d8bef9SDimitry Andric   MI.eraseFromParent();
4414e8d8bef9SDimitry Andric   return Legalized;
44158bcb0991SDimitry Andric }
44168bcb0991SDimitry Andric 
4417e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
4418e8d8bef9SDimitry Andric LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
4419e8d8bef9SDimitry Andric                                                            unsigned TypeIdx,
4420e8d8bef9SDimitry Andric                                                            LLT NarrowVecTy) {
442106c3fb27SDimitry Andric   auto [DstReg, SrcVec] = MI.getFirst2Regs();
4422e8d8bef9SDimitry Andric   Register InsertVal;
4423e8d8bef9SDimitry Andric   bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4424e8d8bef9SDimitry Andric 
4425e8d8bef9SDimitry Andric   assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
4426e8d8bef9SDimitry Andric   if (IsInsert)
4427e8d8bef9SDimitry Andric     InsertVal = MI.getOperand(2).getReg();
4428e8d8bef9SDimitry Andric 
4429e8d8bef9SDimitry Andric   Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
4430e8d8bef9SDimitry Andric 
4431e8d8bef9SDimitry Andric   // TODO: Handle total scalarization case.
4432e8d8bef9SDimitry Andric   if (!NarrowVecTy.isVector())
4433e8d8bef9SDimitry Andric     return UnableToLegalize;
4434e8d8bef9SDimitry Andric 
4435e8d8bef9SDimitry Andric   LLT VecTy = MRI.getType(SrcVec);
4436e8d8bef9SDimitry Andric 
4437e8d8bef9SDimitry Andric   // If the index is a constant, we can really break this down as you would
4438e8d8bef9SDimitry Andric   // expect, and index into the target size pieces.
4439e8d8bef9SDimitry Andric   int64_t IdxVal;
4440349cc55cSDimitry Andric   auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
4441fe6060f1SDimitry Andric   if (MaybeCst) {
4442fe6060f1SDimitry Andric     IdxVal = MaybeCst->Value.getSExtValue();
4443e8d8bef9SDimitry Andric     // Avoid out of bounds indexing the pieces.
4444e8d8bef9SDimitry Andric     if (IdxVal >= VecTy.getNumElements()) {
4445e8d8bef9SDimitry Andric       MIRBuilder.buildUndef(DstReg);
4446e8d8bef9SDimitry Andric       MI.eraseFromParent();
4447e8d8bef9SDimitry Andric       return Legalized;
44488bcb0991SDimitry Andric     }
44498bcb0991SDimitry Andric 
4450e8d8bef9SDimitry Andric     SmallVector<Register, 8> VecParts;
4451e8d8bef9SDimitry Andric     LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4452e8d8bef9SDimitry Andric 
4453e8d8bef9SDimitry Andric     // Build a sequence of NarrowTy pieces in VecParts for this operand.
4454e8d8bef9SDimitry Andric     LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4455e8d8bef9SDimitry Andric                                     TargetOpcode::G_ANYEXT);
4456e8d8bef9SDimitry Andric 
4457e8d8bef9SDimitry Andric     unsigned NewNumElts = NarrowVecTy.getNumElements();
4458e8d8bef9SDimitry Andric 
4459e8d8bef9SDimitry Andric     LLT IdxTy = MRI.getType(Idx);
4460e8d8bef9SDimitry Andric     int64_t PartIdx = IdxVal / NewNumElts;
4461e8d8bef9SDimitry Andric     auto NewIdx =
4462e8d8bef9SDimitry Andric         MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
4463e8d8bef9SDimitry Andric 
4464e8d8bef9SDimitry Andric     if (IsInsert) {
4465e8d8bef9SDimitry Andric       LLT PartTy = MRI.getType(VecParts[PartIdx]);
4466e8d8bef9SDimitry Andric 
4467e8d8bef9SDimitry Andric       // Use the adjusted index to insert into one of the subvectors.
4468e8d8bef9SDimitry Andric       auto InsertPart = MIRBuilder.buildInsertVectorElement(
4469e8d8bef9SDimitry Andric           PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4470e8d8bef9SDimitry Andric       VecParts[PartIdx] = InsertPart.getReg(0);
4471e8d8bef9SDimitry Andric 
4472e8d8bef9SDimitry Andric       // Recombine the inserted subvector with the others to reform the result
4473e8d8bef9SDimitry Andric       // vector.
4474e8d8bef9SDimitry Andric       buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4475e8d8bef9SDimitry Andric     } else {
4476e8d8bef9SDimitry Andric       MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
44778bcb0991SDimitry Andric     }
44788bcb0991SDimitry Andric 
44798bcb0991SDimitry Andric     MI.eraseFromParent();
44808bcb0991SDimitry Andric     return Legalized;
44818bcb0991SDimitry Andric   }
44828bcb0991SDimitry Andric 
4483e8d8bef9SDimitry Andric   // With a variable index, we can't perform the operation in a smaller type, so
4484e8d8bef9SDimitry Andric   // we're forced to expand this.
4485e8d8bef9SDimitry Andric   //
4486e8d8bef9SDimitry Andric   // TODO: We could emit a chain of compare/select to figure out which piece to
4487e8d8bef9SDimitry Andric   // index.
4488e8d8bef9SDimitry Andric   return lowerExtractInsertVectorElt(MI);
4489e8d8bef9SDimitry Andric }
4490e8d8bef9SDimitry Andric 
44918bcb0991SDimitry Andric LegalizerHelper::LegalizeResult
4492fe6060f1SDimitry Andric LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
44930b57cec5SDimitry Andric                                       LLT NarrowTy) {
44940b57cec5SDimitry Andric   // FIXME: Don't know how to handle secondary types yet.
44950b57cec5SDimitry Andric   if (TypeIdx != 0)
44960b57cec5SDimitry Andric     return UnableToLegalize;
44970b57cec5SDimitry Andric 
44980b57cec5SDimitry Andric   // This implementation doesn't work for atomics. Give up instead of doing
44990b57cec5SDimitry Andric   // something invalid.
4500fe6060f1SDimitry Andric   if (LdStMI.isAtomic())
45010b57cec5SDimitry Andric     return UnableToLegalize;
45020b57cec5SDimitry Andric 
4503fe6060f1SDimitry Andric   bool IsLoad = isa<GLoad>(LdStMI);
4504fe6060f1SDimitry Andric   Register ValReg = LdStMI.getReg(0);
4505fe6060f1SDimitry Andric   Register AddrReg = LdStMI.getPointerReg();
45060b57cec5SDimitry Andric   LLT ValTy = MRI.getType(ValReg);
45070b57cec5SDimitry Andric 
45085ffd83dbSDimitry Andric   // FIXME: Do we need a distinct NarrowMemory legalize action?
4509fe6060f1SDimitry Andric   if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) {
45105ffd83dbSDimitry Andric     LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
45115ffd83dbSDimitry Andric     return UnableToLegalize;
45125ffd83dbSDimitry Andric   }
45135ffd83dbSDimitry Andric 
45140b57cec5SDimitry Andric   int NumParts = -1;
45150b57cec5SDimitry Andric   int NumLeftover = -1;
45160b57cec5SDimitry Andric   LLT LeftoverTy;
45170b57cec5SDimitry Andric   SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
45180b57cec5SDimitry Andric   if (IsLoad) {
45190b57cec5SDimitry Andric     std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
45200b57cec5SDimitry Andric   } else {
45210b57cec5SDimitry Andric     if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
45220b57cec5SDimitry Andric                      NarrowLeftoverRegs)) {
45230b57cec5SDimitry Andric       NumParts = NarrowRegs.size();
45240b57cec5SDimitry Andric       NumLeftover = NarrowLeftoverRegs.size();
45250b57cec5SDimitry Andric     }
45260b57cec5SDimitry Andric   }
45270b57cec5SDimitry Andric 
45280b57cec5SDimitry Andric   if (NumParts == -1)
45290b57cec5SDimitry Andric     return UnableToLegalize;
45300b57cec5SDimitry Andric 
4531e8d8bef9SDimitry Andric   LLT PtrTy = MRI.getType(AddrReg);
4532e8d8bef9SDimitry Andric   const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
45330b57cec5SDimitry Andric 
45340b57cec5SDimitry Andric   unsigned TotalSize = ValTy.getSizeInBits();
45350b57cec5SDimitry Andric 
45360b57cec5SDimitry Andric   // Split the load/store into PartTy sized pieces starting at Offset. If this
45370b57cec5SDimitry Andric   // is a load, return the new registers in ValRegs. For a store, each elements
45380b57cec5SDimitry Andric   // of ValRegs should be PartTy. Returns the next offset that needs to be
45390b57cec5SDimitry Andric   // handled.
454081ad6265SDimitry Andric   bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
4541fe6060f1SDimitry Andric   auto MMO = LdStMI.getMMO();
45420b57cec5SDimitry Andric   auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
454381ad6265SDimitry Andric                              unsigned NumParts, unsigned Offset) -> unsigned {
45440b57cec5SDimitry Andric     MachineFunction &MF = MIRBuilder.getMF();
45450b57cec5SDimitry Andric     unsigned PartSize = PartTy.getSizeInBits();
45460b57cec5SDimitry Andric     for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
454781ad6265SDimitry Andric          ++Idx) {
45480b57cec5SDimitry Andric       unsigned ByteOffset = Offset / 8;
45490b57cec5SDimitry Andric       Register NewAddrReg;
45500b57cec5SDimitry Andric 
4551480093f4SDimitry Andric       MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
45520b57cec5SDimitry Andric 
45530b57cec5SDimitry Andric       MachineMemOperand *NewMMO =
4554fe6060f1SDimitry Andric           MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
45550b57cec5SDimitry Andric 
45560b57cec5SDimitry Andric       if (IsLoad) {
45570b57cec5SDimitry Andric         Register Dst = MRI.createGenericVirtualRegister(PartTy);
45580b57cec5SDimitry Andric         ValRegs.push_back(Dst);
45590b57cec5SDimitry Andric         MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
45600b57cec5SDimitry Andric       } else {
45610b57cec5SDimitry Andric         MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
45620b57cec5SDimitry Andric       }
456381ad6265SDimitry Andric       Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
45640b57cec5SDimitry Andric     }
45650b57cec5SDimitry Andric 
45660b57cec5SDimitry Andric     return Offset;
45670b57cec5SDimitry Andric   };
45680b57cec5SDimitry Andric 
456981ad6265SDimitry Andric   unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
457081ad6265SDimitry Andric   unsigned HandledOffset =
457181ad6265SDimitry Andric       splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
45720b57cec5SDimitry Andric 
45730b57cec5SDimitry Andric   // Handle the rest of the register if this isn't an even type breakdown.
45740b57cec5SDimitry Andric   if (LeftoverTy.isValid())
457581ad6265SDimitry Andric     splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
45760b57cec5SDimitry Andric 
45770b57cec5SDimitry Andric   if (IsLoad) {
45780b57cec5SDimitry Andric     insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
45790b57cec5SDimitry Andric                 LeftoverTy, NarrowLeftoverRegs);
45800b57cec5SDimitry Andric   }
45810b57cec5SDimitry Andric 
4582fe6060f1SDimitry Andric   LdStMI.eraseFromParent();
45830b57cec5SDimitry Andric   return Legalized;
45840b57cec5SDimitry Andric }
45850b57cec5SDimitry Andric 
45860b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
45870b57cec5SDimitry Andric LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
45880b57cec5SDimitry Andric                                      LLT NarrowTy) {
45890b57cec5SDimitry Andric   using namespace TargetOpcode;
45900eae32dcSDimitry Andric   GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
45910eae32dcSDimitry Andric   unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
45920b57cec5SDimitry Andric 
45930b57cec5SDimitry Andric   switch (MI.getOpcode()) {
45940b57cec5SDimitry Andric   case G_IMPLICIT_DEF:
45955ffd83dbSDimitry Andric   case G_TRUNC:
45960b57cec5SDimitry Andric   case G_AND:
45970b57cec5SDimitry Andric   case G_OR:
45980b57cec5SDimitry Andric   case G_XOR:
45990b57cec5SDimitry Andric   case G_ADD:
46000b57cec5SDimitry Andric   case G_SUB:
46010b57cec5SDimitry Andric   case G_MUL:
4602e8d8bef9SDimitry Andric   case G_PTR_ADD:
46030b57cec5SDimitry Andric   case G_SMULH:
46040b57cec5SDimitry Andric   case G_UMULH:
46050b57cec5SDimitry Andric   case G_FADD:
46060b57cec5SDimitry Andric   case G_FMUL:
46070b57cec5SDimitry Andric   case G_FSUB:
46080b57cec5SDimitry Andric   case G_FNEG:
46090b57cec5SDimitry Andric   case G_FABS:
46100b57cec5SDimitry Andric   case G_FCANONICALIZE:
46110b57cec5SDimitry Andric   case G_FDIV:
46120b57cec5SDimitry Andric   case G_FREM:
46130b57cec5SDimitry Andric   case G_FMA:
46148bcb0991SDimitry Andric   case G_FMAD:
46150b57cec5SDimitry Andric   case G_FPOW:
46160b57cec5SDimitry Andric   case G_FEXP:
46170b57cec5SDimitry Andric   case G_FEXP2:
46185f757f3fSDimitry Andric   case G_FEXP10:
46190b57cec5SDimitry Andric   case G_FLOG:
46200b57cec5SDimitry Andric   case G_FLOG2:
46210b57cec5SDimitry Andric   case G_FLOG10:
462206c3fb27SDimitry Andric   case G_FLDEXP:
46230b57cec5SDimitry Andric   case G_FNEARBYINT:
46240b57cec5SDimitry Andric   case G_FCEIL:
46250b57cec5SDimitry Andric   case G_FFLOOR:
46260b57cec5SDimitry Andric   case G_FRINT:
46270b57cec5SDimitry Andric   case G_INTRINSIC_ROUND:
4628e8d8bef9SDimitry Andric   case G_INTRINSIC_ROUNDEVEN:
46290b57cec5SDimitry Andric   case G_INTRINSIC_TRUNC:
46300b57cec5SDimitry Andric   case G_FCOS:
46310b57cec5SDimitry Andric   case G_FSIN:
46320b57cec5SDimitry Andric   case G_FSQRT:
46330b57cec5SDimitry Andric   case G_BSWAP:
46348bcb0991SDimitry Andric   case G_BITREVERSE:
46350b57cec5SDimitry Andric   case G_SDIV:
4636480093f4SDimitry Andric   case G_UDIV:
4637480093f4SDimitry Andric   case G_SREM:
4638480093f4SDimitry Andric   case G_UREM:
4639fe6060f1SDimitry Andric   case G_SDIVREM:
4640fe6060f1SDimitry Andric   case G_UDIVREM:
46410b57cec5SDimitry Andric   case G_SMIN:
46420b57cec5SDimitry Andric   case G_SMAX:
46430b57cec5SDimitry Andric   case G_UMIN:
46440b57cec5SDimitry Andric   case G_UMAX:
4645fe6060f1SDimitry Andric   case G_ABS:
46460b57cec5SDimitry Andric   case G_FMINNUM:
46470b57cec5SDimitry Andric   case G_FMAXNUM:
46480b57cec5SDimitry Andric   case G_FMINNUM_IEEE:
46490b57cec5SDimitry Andric   case G_FMAXNUM_IEEE:
46500b57cec5SDimitry Andric   case G_FMINIMUM:
46510b57cec5SDimitry Andric   case G_FMAXIMUM:
46525ffd83dbSDimitry Andric   case G_FSHL:
46535ffd83dbSDimitry Andric   case G_FSHR:
4654349cc55cSDimitry Andric   case G_ROTL:
4655349cc55cSDimitry Andric   case G_ROTR:
46565ffd83dbSDimitry Andric   case G_FREEZE:
46575ffd83dbSDimitry Andric   case G_SADDSAT:
46585ffd83dbSDimitry Andric   case G_SSUBSAT:
46595ffd83dbSDimitry Andric   case G_UADDSAT:
46605ffd83dbSDimitry Andric   case G_USUBSAT:
4661fe6060f1SDimitry Andric   case G_UMULO:
4662fe6060f1SDimitry Andric   case G_SMULO:
46630b57cec5SDimitry Andric   case G_SHL:
46640b57cec5SDimitry Andric   case G_LSHR:
46650b57cec5SDimitry Andric   case G_ASHR:
4666e8d8bef9SDimitry Andric   case G_SSHLSAT:
4667e8d8bef9SDimitry Andric   case G_USHLSAT:
46680b57cec5SDimitry Andric   case G_CTLZ:
46690b57cec5SDimitry Andric   case G_CTLZ_ZERO_UNDEF:
46700b57cec5SDimitry Andric   case G_CTTZ:
46710b57cec5SDimitry Andric   case G_CTTZ_ZERO_UNDEF:
46720b57cec5SDimitry Andric   case G_CTPOP:
46730b57cec5SDimitry Andric   case G_FCOPYSIGN:
46740b57cec5SDimitry Andric   case G_ZEXT:
46750b57cec5SDimitry Andric   case G_SEXT:
46760b57cec5SDimitry Andric   case G_ANYEXT:
46770b57cec5SDimitry Andric   case G_FPEXT:
46780b57cec5SDimitry Andric   case G_FPTRUNC:
46790b57cec5SDimitry Andric   case G_SITOFP:
46800b57cec5SDimitry Andric   case G_UITOFP:
46810b57cec5SDimitry Andric   case G_FPTOSI:
46820b57cec5SDimitry Andric   case G_FPTOUI:
46830b57cec5SDimitry Andric   case G_INTTOPTR:
46840b57cec5SDimitry Andric   case G_PTRTOINT:
46850b57cec5SDimitry Andric   case G_ADDRSPACE_CAST:
468681ad6265SDimitry Andric   case G_UADDO:
468781ad6265SDimitry Andric   case G_USUBO:
468881ad6265SDimitry Andric   case G_UADDE:
468981ad6265SDimitry Andric   case G_USUBE:
469081ad6265SDimitry Andric   case G_SADDO:
469181ad6265SDimitry Andric   case G_SSUBO:
469281ad6265SDimitry Andric   case G_SADDE:
469381ad6265SDimitry Andric   case G_SSUBE:
4694bdd1243dSDimitry Andric   case G_STRICT_FADD:
4695bdd1243dSDimitry Andric   case G_STRICT_FSUB:
4696bdd1243dSDimitry Andric   case G_STRICT_FMUL:
4697bdd1243dSDimitry Andric   case G_STRICT_FMA:
469806c3fb27SDimitry Andric   case G_STRICT_FLDEXP:
469906c3fb27SDimitry Andric   case G_FFREXP:
47000eae32dcSDimitry Andric     return fewerElementsVectorMultiEltType(GMI, NumElts);
47010b57cec5SDimitry Andric   case G_ICMP:
47020b57cec5SDimitry Andric   case G_FCMP:
47030eae32dcSDimitry Andric     return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
4704bdd1243dSDimitry Andric   case G_IS_FPCLASS:
4705bdd1243dSDimitry Andric     return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
47060b57cec5SDimitry Andric   case G_SELECT:
47070eae32dcSDimitry Andric     if (MRI.getType(MI.getOperand(1).getReg()).isVector())
47080eae32dcSDimitry Andric       return fewerElementsVectorMultiEltType(GMI, NumElts);
47090eae32dcSDimitry Andric     return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
47100b57cec5SDimitry Andric   case G_PHI:
47110eae32dcSDimitry Andric     return fewerElementsVectorPhi(GMI, NumElts);
47128bcb0991SDimitry Andric   case G_UNMERGE_VALUES:
47138bcb0991SDimitry Andric     return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
47148bcb0991SDimitry Andric   case G_BUILD_VECTOR:
4715e8d8bef9SDimitry Andric     assert(TypeIdx == 0 && "not a vector type index");
4716e8d8bef9SDimitry Andric     return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4717e8d8bef9SDimitry Andric   case G_CONCAT_VECTORS:
4718e8d8bef9SDimitry Andric     if (TypeIdx != 1) // TODO: This probably does work as expected already.
4719e8d8bef9SDimitry Andric       return UnableToLegalize;
4720e8d8bef9SDimitry Andric     return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4721e8d8bef9SDimitry Andric   case G_EXTRACT_VECTOR_ELT:
4722e8d8bef9SDimitry Andric   case G_INSERT_VECTOR_ELT:
4723e8d8bef9SDimitry Andric     return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
47240b57cec5SDimitry Andric   case G_LOAD:
47250b57cec5SDimitry Andric   case G_STORE:
4726fe6060f1SDimitry Andric     return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
47275ffd83dbSDimitry Andric   case G_SEXT_INREG:
47280eae32dcSDimitry Andric     return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
4729fe6060f1SDimitry Andric   GISEL_VECREDUCE_CASES_NONSEQ
4730fe6060f1SDimitry Andric     return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
47311db9f3b2SDimitry Andric   case TargetOpcode::G_VECREDUCE_SEQ_FADD:
47321db9f3b2SDimitry Andric   case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
47331db9f3b2SDimitry Andric     return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
4734fe6060f1SDimitry Andric   case G_SHUFFLE_VECTOR:
4735fe6060f1SDimitry Andric     return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
47361db9f3b2SDimitry Andric   case G_FPOWI:
47371db9f3b2SDimitry Andric     return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
47380b57cec5SDimitry Andric   default:
47390b57cec5SDimitry Andric     return UnableToLegalize;
47400b57cec5SDimitry Andric   }
47410b57cec5SDimitry Andric }
47420b57cec5SDimitry Andric 
4743fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
4744fe6060f1SDimitry Andric     MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4745fe6060f1SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4746fe6060f1SDimitry Andric   if (TypeIdx != 0)
4747fe6060f1SDimitry Andric     return UnableToLegalize;
4748fe6060f1SDimitry Andric 
474906c3fb27SDimitry Andric   auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
475006c3fb27SDimitry Andric       MI.getFirst3RegLLTs();
4751fe6060f1SDimitry Andric   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4752fe6060f1SDimitry Andric   // The shuffle should be canonicalized by now.
4753fe6060f1SDimitry Andric   if (DstTy != Src1Ty)
4754fe6060f1SDimitry Andric     return UnableToLegalize;
4755fe6060f1SDimitry Andric   if (DstTy != Src2Ty)
4756fe6060f1SDimitry Andric     return UnableToLegalize;
4757fe6060f1SDimitry Andric 
4758fe6060f1SDimitry Andric   if (!isPowerOf2_32(DstTy.getNumElements()))
4759fe6060f1SDimitry Andric     return UnableToLegalize;
4760fe6060f1SDimitry Andric 
4761fe6060f1SDimitry Andric   // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4762fe6060f1SDimitry Andric   // Further legalization attempts will be needed to do split further.
4763fe6060f1SDimitry Andric   NarrowTy =
4764fe6060f1SDimitry Andric       DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
4765fe6060f1SDimitry Andric   unsigned NewElts = NarrowTy.getNumElements();
4766fe6060f1SDimitry Andric 
4767fe6060f1SDimitry Andric   SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
4768fe6060f1SDimitry Andric   extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
4769fe6060f1SDimitry Andric   extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
4770fe6060f1SDimitry Andric   Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4771fe6060f1SDimitry Andric                         SplitSrc2Regs[1]};
4772fe6060f1SDimitry Andric 
4773fe6060f1SDimitry Andric   Register Hi, Lo;
4774fe6060f1SDimitry Andric 
4775fe6060f1SDimitry Andric   // If Lo or Hi uses elements from at most two of the four input vectors, then
4776fe6060f1SDimitry Andric   // express it as a vector shuffle of those two inputs.  Otherwise extract the
4777fe6060f1SDimitry Andric   // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4778fe6060f1SDimitry Andric   SmallVector<int, 16> Ops;
4779fe6060f1SDimitry Andric   for (unsigned High = 0; High < 2; ++High) {
4780fe6060f1SDimitry Andric     Register &Output = High ? Hi : Lo;
4781fe6060f1SDimitry Andric 
4782fe6060f1SDimitry Andric     // Build a shuffle mask for the output, discovering on the fly which
4783fe6060f1SDimitry Andric     // input vectors to use as shuffle operands (recorded in InputUsed).
4784fe6060f1SDimitry Andric     // If building a suitable shuffle vector proves too hard, then bail
4785fe6060f1SDimitry Andric     // out with useBuildVector set.
4786fe6060f1SDimitry Andric     unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4787fe6060f1SDimitry Andric     unsigned FirstMaskIdx = High * NewElts;
4788fe6060f1SDimitry Andric     bool UseBuildVector = false;
4789fe6060f1SDimitry Andric     for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4790fe6060f1SDimitry Andric       // The mask element.  This indexes into the input.
4791fe6060f1SDimitry Andric       int Idx = Mask[FirstMaskIdx + MaskOffset];
4792fe6060f1SDimitry Andric 
4793fe6060f1SDimitry Andric       // The input vector this mask element indexes into.
4794fe6060f1SDimitry Andric       unsigned Input = (unsigned)Idx / NewElts;
4795fe6060f1SDimitry Andric 
4796bdd1243dSDimitry Andric       if (Input >= std::size(Inputs)) {
4797fe6060f1SDimitry Andric         // The mask element does not index into any input vector.
4798fe6060f1SDimitry Andric         Ops.push_back(-1);
4799fe6060f1SDimitry Andric         continue;
4800fe6060f1SDimitry Andric       }
4801fe6060f1SDimitry Andric 
4802fe6060f1SDimitry Andric       // Turn the index into an offset from the start of the input vector.
4803fe6060f1SDimitry Andric       Idx -= Input * NewElts;
4804fe6060f1SDimitry Andric 
4805fe6060f1SDimitry Andric       // Find or create a shuffle vector operand to hold this input.
4806fe6060f1SDimitry Andric       unsigned OpNo;
4807bdd1243dSDimitry Andric       for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
4808fe6060f1SDimitry Andric         if (InputUsed[OpNo] == Input) {
4809fe6060f1SDimitry Andric           // This input vector is already an operand.
4810fe6060f1SDimitry Andric           break;
4811fe6060f1SDimitry Andric         } else if (InputUsed[OpNo] == -1U) {
4812fe6060f1SDimitry Andric           // Create a new operand for this input vector.
4813fe6060f1SDimitry Andric           InputUsed[OpNo] = Input;
4814fe6060f1SDimitry Andric           break;
4815fe6060f1SDimitry Andric         }
4816fe6060f1SDimitry Andric       }
4817fe6060f1SDimitry Andric 
4818bdd1243dSDimitry Andric       if (OpNo >= std::size(InputUsed)) {
4819fe6060f1SDimitry Andric         // More than two input vectors used!  Give up on trying to create a
4820fe6060f1SDimitry Andric         // shuffle vector.  Insert all elements into a BUILD_VECTOR instead.
4821fe6060f1SDimitry Andric         UseBuildVector = true;
4822fe6060f1SDimitry Andric         break;
4823fe6060f1SDimitry Andric       }
4824fe6060f1SDimitry Andric 
4825fe6060f1SDimitry Andric       // Add the mask index for the new shuffle vector.
4826fe6060f1SDimitry Andric       Ops.push_back(Idx + OpNo * NewElts);
4827fe6060f1SDimitry Andric     }
4828fe6060f1SDimitry Andric 
4829fe6060f1SDimitry Andric     if (UseBuildVector) {
4830fe6060f1SDimitry Andric       LLT EltTy = NarrowTy.getElementType();
4831fe6060f1SDimitry Andric       SmallVector<Register, 16> SVOps;
4832fe6060f1SDimitry Andric 
4833fe6060f1SDimitry Andric       // Extract the input elements by hand.
4834fe6060f1SDimitry Andric       for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4835fe6060f1SDimitry Andric         // The mask element.  This indexes into the input.
4836fe6060f1SDimitry Andric         int Idx = Mask[FirstMaskIdx + MaskOffset];
4837fe6060f1SDimitry Andric 
4838fe6060f1SDimitry Andric         // The input vector this mask element indexes into.
4839fe6060f1SDimitry Andric         unsigned Input = (unsigned)Idx / NewElts;
4840fe6060f1SDimitry Andric 
4841bdd1243dSDimitry Andric         if (Input >= std::size(Inputs)) {
4842fe6060f1SDimitry Andric           // The mask element is "undef" or indexes off the end of the input.
4843fe6060f1SDimitry Andric           SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
4844fe6060f1SDimitry Andric           continue;
4845fe6060f1SDimitry Andric         }
4846fe6060f1SDimitry Andric 
4847fe6060f1SDimitry Andric         // Turn the index into an offset from the start of the input vector.
4848fe6060f1SDimitry Andric         Idx -= Input * NewElts;
4849fe6060f1SDimitry Andric 
4850fe6060f1SDimitry Andric         // Extract the vector element by hand.
4851fe6060f1SDimitry Andric         SVOps.push_back(MIRBuilder
4852fe6060f1SDimitry Andric                             .buildExtractVectorElement(
4853fe6060f1SDimitry Andric                                 EltTy, Inputs[Input],
4854fe6060f1SDimitry Andric                                 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
4855fe6060f1SDimitry Andric                             .getReg(0));
4856fe6060f1SDimitry Andric       }
4857fe6060f1SDimitry Andric 
4858fe6060f1SDimitry Andric       // Construct the Lo/Hi output using a G_BUILD_VECTOR.
4859fe6060f1SDimitry Andric       Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
4860fe6060f1SDimitry Andric     } else if (InputUsed[0] == -1U) {
4861fe6060f1SDimitry Andric       // No input vectors were used! The result is undefined.
4862fe6060f1SDimitry Andric       Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
4863fe6060f1SDimitry Andric     } else {
4864fe6060f1SDimitry Andric       Register Op0 = Inputs[InputUsed[0]];
4865fe6060f1SDimitry Andric       // If only one input was used, use an undefined vector for the other.
4866fe6060f1SDimitry Andric       Register Op1 = InputUsed[1] == -1U
4867fe6060f1SDimitry Andric                          ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
4868fe6060f1SDimitry Andric                          : Inputs[InputUsed[1]];
4869fe6060f1SDimitry Andric       // At least one input vector was used. Create a new shuffle vector.
4870fe6060f1SDimitry Andric       Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
4871fe6060f1SDimitry Andric     }
4872fe6060f1SDimitry Andric 
4873fe6060f1SDimitry Andric     Ops.clear();
4874fe6060f1SDimitry Andric   }
4875fe6060f1SDimitry Andric 
4876fe6060f1SDimitry Andric   MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
4877fe6060f1SDimitry Andric   MI.eraseFromParent();
4878fe6060f1SDimitry Andric   return Legalized;
4879fe6060f1SDimitry Andric }
4880fe6060f1SDimitry Andric 
4881349cc55cSDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
4882349cc55cSDimitry Andric     MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
48835f757f3fSDimitry Andric   auto &RdxMI = cast<GVecReduce>(MI);
4884349cc55cSDimitry Andric 
4885349cc55cSDimitry Andric   if (TypeIdx != 1)
4886349cc55cSDimitry Andric     return UnableToLegalize;
4887349cc55cSDimitry Andric 
4888349cc55cSDimitry Andric   // The semantics of the normal non-sequential reductions allow us to freely
4889349cc55cSDimitry Andric   // re-associate the operation.
48905f757f3fSDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
4891349cc55cSDimitry Andric 
4892349cc55cSDimitry Andric   if (NarrowTy.isVector() &&
4893349cc55cSDimitry Andric       (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
4894349cc55cSDimitry Andric     return UnableToLegalize;
4895349cc55cSDimitry Andric 
48965f757f3fSDimitry Andric   unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
4897349cc55cSDimitry Andric   SmallVector<Register> SplitSrcs;
4898349cc55cSDimitry Andric   // If NarrowTy is a scalar then we're being asked to scalarize.
4899349cc55cSDimitry Andric   const unsigned NumParts =
4900349cc55cSDimitry Andric       NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
4901349cc55cSDimitry Andric                           : SrcTy.getNumElements();
4902349cc55cSDimitry Andric 
4903349cc55cSDimitry Andric   extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
4904349cc55cSDimitry Andric   if (NarrowTy.isScalar()) {
4905349cc55cSDimitry Andric     if (DstTy != NarrowTy)
4906349cc55cSDimitry Andric       return UnableToLegalize; // FIXME: handle implicit extensions.
4907349cc55cSDimitry Andric 
4908349cc55cSDimitry Andric     if (isPowerOf2_32(NumParts)) {
4909349cc55cSDimitry Andric       // Generate a tree of scalar operations to reduce the critical path.
4910349cc55cSDimitry Andric       SmallVector<Register> PartialResults;
4911349cc55cSDimitry Andric       unsigned NumPartsLeft = NumParts;
4912349cc55cSDimitry Andric       while (NumPartsLeft > 1) {
4913349cc55cSDimitry Andric         for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
4914349cc55cSDimitry Andric           PartialResults.emplace_back(
4915349cc55cSDimitry Andric               MIRBuilder
4916349cc55cSDimitry Andric                   .buildInstr(ScalarOpc, {NarrowTy},
4917349cc55cSDimitry Andric                               {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
4918349cc55cSDimitry Andric                   .getReg(0));
4919349cc55cSDimitry Andric         }
4920349cc55cSDimitry Andric         SplitSrcs = PartialResults;
4921349cc55cSDimitry Andric         PartialResults.clear();
4922349cc55cSDimitry Andric         NumPartsLeft = SplitSrcs.size();
4923349cc55cSDimitry Andric       }
4924349cc55cSDimitry Andric       assert(SplitSrcs.size() == 1);
4925349cc55cSDimitry Andric       MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
4926349cc55cSDimitry Andric       MI.eraseFromParent();
4927349cc55cSDimitry Andric       return Legalized;
4928349cc55cSDimitry Andric     }
4929349cc55cSDimitry Andric     // If we can't generate a tree, then just do sequential operations.
4930349cc55cSDimitry Andric     Register Acc = SplitSrcs[0];
4931349cc55cSDimitry Andric     for (unsigned Idx = 1; Idx < NumParts; ++Idx)
4932349cc55cSDimitry Andric       Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
4933349cc55cSDimitry Andric                 .getReg(0);
4934349cc55cSDimitry Andric     MIRBuilder.buildCopy(DstReg, Acc);
4935349cc55cSDimitry Andric     MI.eraseFromParent();
4936349cc55cSDimitry Andric     return Legalized;
4937349cc55cSDimitry Andric   }
4938349cc55cSDimitry Andric   SmallVector<Register> PartialReductions;
4939349cc55cSDimitry Andric   for (unsigned Part = 0; Part < NumParts; ++Part) {
4940349cc55cSDimitry Andric     PartialReductions.push_back(
49415f757f3fSDimitry Andric         MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
49425f757f3fSDimitry Andric             .getReg(0));
4943349cc55cSDimitry Andric   }
4944349cc55cSDimitry Andric 
4945fe6060f1SDimitry Andric   // If the types involved are powers of 2, we can generate intermediate vector
4946fe6060f1SDimitry Andric   // ops, before generating a final reduction operation.
4947fe6060f1SDimitry Andric   if (isPowerOf2_32(SrcTy.getNumElements()) &&
4948fe6060f1SDimitry Andric       isPowerOf2_32(NarrowTy.getNumElements())) {
4949fe6060f1SDimitry Andric     return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
4950fe6060f1SDimitry Andric   }
4951fe6060f1SDimitry Andric 
4952fe6060f1SDimitry Andric   Register Acc = PartialReductions[0];
4953fe6060f1SDimitry Andric   for (unsigned Part = 1; Part < NumParts; ++Part) {
4954fe6060f1SDimitry Andric     if (Part == NumParts - 1) {
4955fe6060f1SDimitry Andric       MIRBuilder.buildInstr(ScalarOpc, {DstReg},
4956fe6060f1SDimitry Andric                             {Acc, PartialReductions[Part]});
4957fe6060f1SDimitry Andric     } else {
4958fe6060f1SDimitry Andric       Acc = MIRBuilder
4959fe6060f1SDimitry Andric                 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
4960fe6060f1SDimitry Andric                 .getReg(0);
4961fe6060f1SDimitry Andric     }
4962fe6060f1SDimitry Andric   }
4963fe6060f1SDimitry Andric   MI.eraseFromParent();
4964fe6060f1SDimitry Andric   return Legalized;
4965fe6060f1SDimitry Andric }
4966fe6060f1SDimitry Andric 
4967fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
49681db9f3b2SDimitry Andric LegalizerHelper::fewerElementsVectorSeqReductions(MachineInstr &MI,
49691db9f3b2SDimitry Andric                                                   unsigned int TypeIdx,
49701db9f3b2SDimitry Andric                                                   LLT NarrowTy) {
49711db9f3b2SDimitry Andric   auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
49721db9f3b2SDimitry Andric       MI.getFirst3RegLLTs();
49731db9f3b2SDimitry Andric   if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
49741db9f3b2SDimitry Andric       DstTy != NarrowTy)
49751db9f3b2SDimitry Andric     return UnableToLegalize;
49761db9f3b2SDimitry Andric 
49771db9f3b2SDimitry Andric   assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
49781db9f3b2SDimitry Andric           MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
49791db9f3b2SDimitry Andric          "Unexpected vecreduce opcode");
49801db9f3b2SDimitry Andric   unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
49811db9f3b2SDimitry Andric                            ? TargetOpcode::G_FADD
49821db9f3b2SDimitry Andric                            : TargetOpcode::G_FMUL;
49831db9f3b2SDimitry Andric 
49841db9f3b2SDimitry Andric   SmallVector<Register> SplitSrcs;
49851db9f3b2SDimitry Andric   unsigned NumParts = SrcTy.getNumElements();
49861db9f3b2SDimitry Andric   extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
49871db9f3b2SDimitry Andric   Register Acc = ScalarReg;
49881db9f3b2SDimitry Andric   for (unsigned i = 0; i < NumParts; i++)
49891db9f3b2SDimitry Andric     Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
49901db9f3b2SDimitry Andric               .getReg(0);
49911db9f3b2SDimitry Andric 
49921db9f3b2SDimitry Andric   MIRBuilder.buildCopy(DstReg, Acc);
49931db9f3b2SDimitry Andric   MI.eraseFromParent();
49941db9f3b2SDimitry Andric   return Legalized;
49951db9f3b2SDimitry Andric }
49961db9f3b2SDimitry Andric 
49971db9f3b2SDimitry Andric LegalizerHelper::LegalizeResult
4998fe6060f1SDimitry Andric LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
4999fe6060f1SDimitry Andric                                         LLT SrcTy, LLT NarrowTy,
5000fe6060f1SDimitry Andric                                         unsigned ScalarOpc) {
5001fe6060f1SDimitry Andric   SmallVector<Register> SplitSrcs;
5002fe6060f1SDimitry Andric   // Split the sources into NarrowTy size pieces.
5003fe6060f1SDimitry Andric   extractParts(SrcReg, NarrowTy,
5004fe6060f1SDimitry Andric                SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
5005fe6060f1SDimitry Andric   // We're going to do a tree reduction using vector operations until we have
5006fe6060f1SDimitry Andric   // one NarrowTy size value left.
5007fe6060f1SDimitry Andric   while (SplitSrcs.size() > 1) {
5008fe6060f1SDimitry Andric     SmallVector<Register> PartialRdxs;
5009fe6060f1SDimitry Andric     for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5010fe6060f1SDimitry Andric       Register LHS = SplitSrcs[Idx];
5011fe6060f1SDimitry Andric       Register RHS = SplitSrcs[Idx + 1];
5012fe6060f1SDimitry Andric       // Create the intermediate vector op.
5013fe6060f1SDimitry Andric       Register Res =
5014fe6060f1SDimitry Andric           MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5015fe6060f1SDimitry Andric       PartialRdxs.push_back(Res);
5016fe6060f1SDimitry Andric     }
5017fe6060f1SDimitry Andric     SplitSrcs = std::move(PartialRdxs);
5018fe6060f1SDimitry Andric   }
5019fe6060f1SDimitry Andric   // Finally generate the requested NarrowTy based reduction.
5020fe6060f1SDimitry Andric   Observer.changingInstr(MI);
5021fe6060f1SDimitry Andric   MI.getOperand(1).setReg(SplitSrcs[0]);
5022fe6060f1SDimitry Andric   Observer.changedInstr(MI);
5023fe6060f1SDimitry Andric   return Legalized;
5024fe6060f1SDimitry Andric }
5025fe6060f1SDimitry Andric 
50260b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
50270b57cec5SDimitry Andric LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
50280b57cec5SDimitry Andric                                              const LLT HalfTy, const LLT AmtTy) {
50290b57cec5SDimitry Andric 
50300b57cec5SDimitry Andric   Register InL = MRI.createGenericVirtualRegister(HalfTy);
50310b57cec5SDimitry Andric   Register InH = MRI.createGenericVirtualRegister(HalfTy);
50325ffd83dbSDimitry Andric   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
50330b57cec5SDimitry Andric 
5034349cc55cSDimitry Andric   if (Amt.isZero()) {
5035bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
50360b57cec5SDimitry Andric     MI.eraseFromParent();
50370b57cec5SDimitry Andric     return Legalized;
50380b57cec5SDimitry Andric   }
50390b57cec5SDimitry Andric 
50400b57cec5SDimitry Andric   LLT NVT = HalfTy;
50410b57cec5SDimitry Andric   unsigned NVTBits = HalfTy.getSizeInBits();
50420b57cec5SDimitry Andric   unsigned VTBits = 2 * NVTBits;
50430b57cec5SDimitry Andric 
50440b57cec5SDimitry Andric   SrcOp Lo(Register(0)), Hi(Register(0));
50450b57cec5SDimitry Andric   if (MI.getOpcode() == TargetOpcode::G_SHL) {
50460b57cec5SDimitry Andric     if (Amt.ugt(VTBits)) {
50470b57cec5SDimitry Andric       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
50480b57cec5SDimitry Andric     } else if (Amt.ugt(NVTBits)) {
50490b57cec5SDimitry Andric       Lo = MIRBuilder.buildConstant(NVT, 0);
50500b57cec5SDimitry Andric       Hi = MIRBuilder.buildShl(NVT, InL,
50510b57cec5SDimitry Andric                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
50520b57cec5SDimitry Andric     } else if (Amt == NVTBits) {
50530b57cec5SDimitry Andric       Lo = MIRBuilder.buildConstant(NVT, 0);
50540b57cec5SDimitry Andric       Hi = InL;
50550b57cec5SDimitry Andric     } else {
50560b57cec5SDimitry Andric       Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
50570b57cec5SDimitry Andric       auto OrLHS =
50580b57cec5SDimitry Andric           MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
50590b57cec5SDimitry Andric       auto OrRHS = MIRBuilder.buildLShr(
50600b57cec5SDimitry Andric           NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
50610b57cec5SDimitry Andric       Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
50620b57cec5SDimitry Andric     }
50630b57cec5SDimitry Andric   } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
50640b57cec5SDimitry Andric     if (Amt.ugt(VTBits)) {
50650b57cec5SDimitry Andric       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
50660b57cec5SDimitry Andric     } else if (Amt.ugt(NVTBits)) {
50670b57cec5SDimitry Andric       Lo = MIRBuilder.buildLShr(NVT, InH,
50680b57cec5SDimitry Andric                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
50690b57cec5SDimitry Andric       Hi = MIRBuilder.buildConstant(NVT, 0);
50700b57cec5SDimitry Andric     } else if (Amt == NVTBits) {
50710b57cec5SDimitry Andric       Lo = InH;
50720b57cec5SDimitry Andric       Hi = MIRBuilder.buildConstant(NVT, 0);
50730b57cec5SDimitry Andric     } else {
50740b57cec5SDimitry Andric       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
50750b57cec5SDimitry Andric 
50760b57cec5SDimitry Andric       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
50770b57cec5SDimitry Andric       auto OrRHS = MIRBuilder.buildShl(
50780b57cec5SDimitry Andric           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
50790b57cec5SDimitry Andric 
50800b57cec5SDimitry Andric       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
50810b57cec5SDimitry Andric       Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
50820b57cec5SDimitry Andric     }
50830b57cec5SDimitry Andric   } else {
50840b57cec5SDimitry Andric     if (Amt.ugt(VTBits)) {
50850b57cec5SDimitry Andric       Hi = Lo = MIRBuilder.buildAShr(
50860b57cec5SDimitry Andric           NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
50870b57cec5SDimitry Andric     } else if (Amt.ugt(NVTBits)) {
50880b57cec5SDimitry Andric       Lo = MIRBuilder.buildAShr(NVT, InH,
50890b57cec5SDimitry Andric                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
50900b57cec5SDimitry Andric       Hi = MIRBuilder.buildAShr(NVT, InH,
50910b57cec5SDimitry Andric                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
50920b57cec5SDimitry Andric     } else if (Amt == NVTBits) {
50930b57cec5SDimitry Andric       Lo = InH;
50940b57cec5SDimitry Andric       Hi = MIRBuilder.buildAShr(NVT, InH,
50950b57cec5SDimitry Andric                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
50960b57cec5SDimitry Andric     } else {
50970b57cec5SDimitry Andric       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
50980b57cec5SDimitry Andric 
50990b57cec5SDimitry Andric       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
51000b57cec5SDimitry Andric       auto OrRHS = MIRBuilder.buildShl(
51010b57cec5SDimitry Andric           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
51020b57cec5SDimitry Andric 
51030b57cec5SDimitry Andric       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
51040b57cec5SDimitry Andric       Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
51050b57cec5SDimitry Andric     }
51060b57cec5SDimitry Andric   }
51070b57cec5SDimitry Andric 
5108bdd1243dSDimitry Andric   MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
51090b57cec5SDimitry Andric   MI.eraseFromParent();
51100b57cec5SDimitry Andric 
51110b57cec5SDimitry Andric   return Legalized;
51120b57cec5SDimitry Andric }
51130b57cec5SDimitry Andric 
51140b57cec5SDimitry Andric // TODO: Optimize if constant shift amount.
51150b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
51160b57cec5SDimitry Andric LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
51170b57cec5SDimitry Andric                                    LLT RequestedTy) {
51180b57cec5SDimitry Andric   if (TypeIdx == 1) {
51190b57cec5SDimitry Andric     Observer.changingInstr(MI);
51200b57cec5SDimitry Andric     narrowScalarSrc(MI, RequestedTy, 2);
51210b57cec5SDimitry Andric     Observer.changedInstr(MI);
51220b57cec5SDimitry Andric     return Legalized;
51230b57cec5SDimitry Andric   }
51240b57cec5SDimitry Andric 
51250b57cec5SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
51260b57cec5SDimitry Andric   LLT DstTy = MRI.getType(DstReg);
51270b57cec5SDimitry Andric   if (DstTy.isVector())
51280b57cec5SDimitry Andric     return UnableToLegalize;
51290b57cec5SDimitry Andric 
51300b57cec5SDimitry Andric   Register Amt = MI.getOperand(2).getReg();
51310b57cec5SDimitry Andric   LLT ShiftAmtTy = MRI.getType(Amt);
51320b57cec5SDimitry Andric   const unsigned DstEltSize = DstTy.getScalarSizeInBits();
51330b57cec5SDimitry Andric   if (DstEltSize % 2 != 0)
51340b57cec5SDimitry Andric     return UnableToLegalize;
51350b57cec5SDimitry Andric 
51360b57cec5SDimitry Andric   // Ignore the input type. We can only go to exactly half the size of the
51370b57cec5SDimitry Andric   // input. If that isn't small enough, the resulting pieces will be further
51380b57cec5SDimitry Andric   // legalized.
51390b57cec5SDimitry Andric   const unsigned NewBitSize = DstEltSize / 2;
51400b57cec5SDimitry Andric   const LLT HalfTy = LLT::scalar(NewBitSize);
51410b57cec5SDimitry Andric   const LLT CondTy = LLT::scalar(1);
51420b57cec5SDimitry Andric 
5143349cc55cSDimitry Andric   if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
5144349cc55cSDimitry Andric     return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
5145349cc55cSDimitry Andric                                        ShiftAmtTy);
51460b57cec5SDimitry Andric   }
51470b57cec5SDimitry Andric 
51480b57cec5SDimitry Andric   // TODO: Expand with known bits.
51490b57cec5SDimitry Andric 
51500b57cec5SDimitry Andric   // Handle the fully general expansion by an unknown amount.
51510b57cec5SDimitry Andric   auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
51520b57cec5SDimitry Andric 
51530b57cec5SDimitry Andric   Register InL = MRI.createGenericVirtualRegister(HalfTy);
51540b57cec5SDimitry Andric   Register InH = MRI.createGenericVirtualRegister(HalfTy);
51555ffd83dbSDimitry Andric   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
51560b57cec5SDimitry Andric 
51570b57cec5SDimitry Andric   auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
51580b57cec5SDimitry Andric   auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
51590b57cec5SDimitry Andric 
51600b57cec5SDimitry Andric   auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
51610b57cec5SDimitry Andric   auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
51620b57cec5SDimitry Andric   auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
51630b57cec5SDimitry Andric 
51640b57cec5SDimitry Andric   Register ResultRegs[2];
51650b57cec5SDimitry Andric   switch (MI.getOpcode()) {
51660b57cec5SDimitry Andric   case TargetOpcode::G_SHL: {
51670b57cec5SDimitry Andric     // Short: ShAmt < NewBitSize
51688bcb0991SDimitry Andric     auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
51690b57cec5SDimitry Andric 
51708bcb0991SDimitry Andric     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
51718bcb0991SDimitry Andric     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
51728bcb0991SDimitry Andric     auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
51730b57cec5SDimitry Andric 
51740b57cec5SDimitry Andric     // Long: ShAmt >= NewBitSize
51750b57cec5SDimitry Andric     auto LoL = MIRBuilder.buildConstant(HalfTy, 0);         // Lo part is zero.
51760b57cec5SDimitry Andric     auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
51770b57cec5SDimitry Andric 
51780b57cec5SDimitry Andric     auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
51790b57cec5SDimitry Andric     auto Hi = MIRBuilder.buildSelect(
51800b57cec5SDimitry Andric         HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
51810b57cec5SDimitry Andric 
51820b57cec5SDimitry Andric     ResultRegs[0] = Lo.getReg(0);
51830b57cec5SDimitry Andric     ResultRegs[1] = Hi.getReg(0);
51840b57cec5SDimitry Andric     break;
51850b57cec5SDimitry Andric   }
51868bcb0991SDimitry Andric   case TargetOpcode::G_LSHR:
51870b57cec5SDimitry Andric   case TargetOpcode::G_ASHR: {
51880b57cec5SDimitry Andric     // Short: ShAmt < NewBitSize
51898bcb0991SDimitry Andric     auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
51900b57cec5SDimitry Andric 
51918bcb0991SDimitry Andric     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
51928bcb0991SDimitry Andric     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
51938bcb0991SDimitry Andric     auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
51940b57cec5SDimitry Andric 
51950b57cec5SDimitry Andric     // Long: ShAmt >= NewBitSize
51968bcb0991SDimitry Andric     MachineInstrBuilder HiL;
51978bcb0991SDimitry Andric     if (MI.getOpcode() == TargetOpcode::G_LSHR) {
51988bcb0991SDimitry Andric       HiL = MIRBuilder.buildConstant(HalfTy, 0);            // Hi part is zero.
51998bcb0991SDimitry Andric     } else {
52008bcb0991SDimitry Andric       auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
52018bcb0991SDimitry Andric       HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);    // Sign of Hi part.
52028bcb0991SDimitry Andric     }
52038bcb0991SDimitry Andric     auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
52048bcb0991SDimitry Andric                                      {InH, AmtExcess});     // Lo from Hi part.
52050b57cec5SDimitry Andric 
52060b57cec5SDimitry Andric     auto Lo = MIRBuilder.buildSelect(
52070b57cec5SDimitry Andric         HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
52080b57cec5SDimitry Andric 
52090b57cec5SDimitry Andric     auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
52100b57cec5SDimitry Andric 
52110b57cec5SDimitry Andric     ResultRegs[0] = Lo.getReg(0);
52120b57cec5SDimitry Andric     ResultRegs[1] = Hi.getReg(0);
52130b57cec5SDimitry Andric     break;
52140b57cec5SDimitry Andric   }
52150b57cec5SDimitry Andric   default:
52160b57cec5SDimitry Andric     llvm_unreachable("not a shift");
52170b57cec5SDimitry Andric   }
52180b57cec5SDimitry Andric 
5219bdd1243dSDimitry Andric   MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
52200b57cec5SDimitry Andric   MI.eraseFromParent();
52210b57cec5SDimitry Andric   return Legalized;
52220b57cec5SDimitry Andric }
52230b57cec5SDimitry Andric 
52240b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
52250b57cec5SDimitry Andric LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
52260b57cec5SDimitry Andric                                        LLT MoreTy) {
52270b57cec5SDimitry Andric   assert(TypeIdx == 0 && "Expecting only Idx 0");
52280b57cec5SDimitry Andric 
52290b57cec5SDimitry Andric   Observer.changingInstr(MI);
52300b57cec5SDimitry Andric   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
52310b57cec5SDimitry Andric     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
52320b57cec5SDimitry Andric     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
52330b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, I);
52340b57cec5SDimitry Andric   }
52350b57cec5SDimitry Andric 
52360b57cec5SDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
52370b57cec5SDimitry Andric   MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
52380b57cec5SDimitry Andric   moreElementsVectorDst(MI, MoreTy, 0);
52390b57cec5SDimitry Andric   Observer.changedInstr(MI);
52400b57cec5SDimitry Andric   return Legalized;
52410b57cec5SDimitry Andric }
52420b57cec5SDimitry Andric 
52430b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
52440b57cec5SDimitry Andric LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
52450b57cec5SDimitry Andric                                     LLT MoreTy) {
52460b57cec5SDimitry Andric   unsigned Opc = MI.getOpcode();
52470b57cec5SDimitry Andric   switch (Opc) {
52488bcb0991SDimitry Andric   case TargetOpcode::G_IMPLICIT_DEF:
52498bcb0991SDimitry Andric   case TargetOpcode::G_LOAD: {
52508bcb0991SDimitry Andric     if (TypeIdx != 0)
52518bcb0991SDimitry Andric       return UnableToLegalize;
52520b57cec5SDimitry Andric     Observer.changingInstr(MI);
52530b57cec5SDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
52540b57cec5SDimitry Andric     Observer.changedInstr(MI);
52550b57cec5SDimitry Andric     return Legalized;
52560b57cec5SDimitry Andric   }
52578bcb0991SDimitry Andric   case TargetOpcode::G_STORE:
52588bcb0991SDimitry Andric     if (TypeIdx != 0)
52598bcb0991SDimitry Andric       return UnableToLegalize;
52608bcb0991SDimitry Andric     Observer.changingInstr(MI);
52618bcb0991SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 0);
52628bcb0991SDimitry Andric     Observer.changedInstr(MI);
52638bcb0991SDimitry Andric     return Legalized;
52640b57cec5SDimitry Andric   case TargetOpcode::G_AND:
52650b57cec5SDimitry Andric   case TargetOpcode::G_OR:
52660b57cec5SDimitry Andric   case TargetOpcode::G_XOR:
52670eae32dcSDimitry Andric   case TargetOpcode::G_ADD:
52680eae32dcSDimitry Andric   case TargetOpcode::G_SUB:
52690eae32dcSDimitry Andric   case TargetOpcode::G_MUL:
52700eae32dcSDimitry Andric   case TargetOpcode::G_FADD:
52715f757f3fSDimitry Andric   case TargetOpcode::G_FSUB:
52720eae32dcSDimitry Andric   case TargetOpcode::G_FMUL:
52735f757f3fSDimitry Andric   case TargetOpcode::G_FDIV:
52740eae32dcSDimitry Andric   case TargetOpcode::G_UADDSAT:
52750eae32dcSDimitry Andric   case TargetOpcode::G_USUBSAT:
52760eae32dcSDimitry Andric   case TargetOpcode::G_SADDSAT:
52770eae32dcSDimitry Andric   case TargetOpcode::G_SSUBSAT:
52780b57cec5SDimitry Andric   case TargetOpcode::G_SMIN:
52790b57cec5SDimitry Andric   case TargetOpcode::G_SMAX:
52800b57cec5SDimitry Andric   case TargetOpcode::G_UMIN:
5281480093f4SDimitry Andric   case TargetOpcode::G_UMAX:
5282480093f4SDimitry Andric   case TargetOpcode::G_FMINNUM:
5283480093f4SDimitry Andric   case TargetOpcode::G_FMAXNUM:
5284480093f4SDimitry Andric   case TargetOpcode::G_FMINNUM_IEEE:
5285480093f4SDimitry Andric   case TargetOpcode::G_FMAXNUM_IEEE:
5286480093f4SDimitry Andric   case TargetOpcode::G_FMINIMUM:
5287bdd1243dSDimitry Andric   case TargetOpcode::G_FMAXIMUM:
5288bdd1243dSDimitry Andric   case TargetOpcode::G_STRICT_FADD:
5289bdd1243dSDimitry Andric   case TargetOpcode::G_STRICT_FSUB:
5290bdd1243dSDimitry Andric   case TargetOpcode::G_STRICT_FMUL: {
52910b57cec5SDimitry Andric     Observer.changingInstr(MI);
52920b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 1);
52930b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 2);
52940b57cec5SDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
52950b57cec5SDimitry Andric     Observer.changedInstr(MI);
52960b57cec5SDimitry Andric     return Legalized;
52970b57cec5SDimitry Andric   }
52980eae32dcSDimitry Andric   case TargetOpcode::G_FMA:
5299bdd1243dSDimitry Andric   case TargetOpcode::G_STRICT_FMA:
53000eae32dcSDimitry Andric   case TargetOpcode::G_FSHR:
53010eae32dcSDimitry Andric   case TargetOpcode::G_FSHL: {
53020eae32dcSDimitry Andric     Observer.changingInstr(MI);
53030eae32dcSDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 1);
53040eae32dcSDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 2);
53050eae32dcSDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 3);
53060eae32dcSDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
53070eae32dcSDimitry Andric     Observer.changedInstr(MI);
53080eae32dcSDimitry Andric     return Legalized;
53090eae32dcSDimitry Andric   }
531006c3fb27SDimitry Andric   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
53110b57cec5SDimitry Andric   case TargetOpcode::G_EXTRACT:
53120b57cec5SDimitry Andric     if (TypeIdx != 1)
53130b57cec5SDimitry Andric       return UnableToLegalize;
53140b57cec5SDimitry Andric     Observer.changingInstr(MI);
53150b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 1);
53160b57cec5SDimitry Andric     Observer.changedInstr(MI);
53170b57cec5SDimitry Andric     return Legalized;
53180b57cec5SDimitry Andric   case TargetOpcode::G_INSERT:
531906c3fb27SDimitry Andric   case TargetOpcode::G_INSERT_VECTOR_ELT:
53205ffd83dbSDimitry Andric   case TargetOpcode::G_FREEZE:
53210eae32dcSDimitry Andric   case TargetOpcode::G_FNEG:
53220eae32dcSDimitry Andric   case TargetOpcode::G_FABS:
53235f757f3fSDimitry Andric   case TargetOpcode::G_FSQRT:
53245f757f3fSDimitry Andric   case TargetOpcode::G_FCEIL:
53255f757f3fSDimitry Andric   case TargetOpcode::G_FFLOOR:
53265f757f3fSDimitry Andric   case TargetOpcode::G_FNEARBYINT:
53275f757f3fSDimitry Andric   case TargetOpcode::G_FRINT:
53285f757f3fSDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUND:
53295f757f3fSDimitry Andric   case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
53305f757f3fSDimitry Andric   case TargetOpcode::G_INTRINSIC_TRUNC:
53310eae32dcSDimitry Andric   case TargetOpcode::G_BSWAP:
53320eae32dcSDimitry Andric   case TargetOpcode::G_FCANONICALIZE:
53330eae32dcSDimitry Andric   case TargetOpcode::G_SEXT_INREG:
53340b57cec5SDimitry Andric     if (TypeIdx != 0)
53350b57cec5SDimitry Andric       return UnableToLegalize;
53360b57cec5SDimitry Andric     Observer.changingInstr(MI);
53370b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 1);
53380b57cec5SDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
53390b57cec5SDimitry Andric     Observer.changedInstr(MI);
53400b57cec5SDimitry Andric     return Legalized;
534181ad6265SDimitry Andric   case TargetOpcode::G_SELECT: {
534206c3fb27SDimitry Andric     auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
534381ad6265SDimitry Andric     if (TypeIdx == 1) {
534481ad6265SDimitry Andric       if (!CondTy.isScalar() ||
534581ad6265SDimitry Andric           DstTy.getElementCount() != MoreTy.getElementCount())
53460b57cec5SDimitry Andric         return UnableToLegalize;
534781ad6265SDimitry Andric 
534881ad6265SDimitry Andric       // This is turning a scalar select of vectors into a vector
534981ad6265SDimitry Andric       // select. Broadcast the select condition.
535081ad6265SDimitry Andric       auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
535181ad6265SDimitry Andric       Observer.changingInstr(MI);
535281ad6265SDimitry Andric       MI.getOperand(1).setReg(ShufSplat.getReg(0));
535381ad6265SDimitry Andric       Observer.changedInstr(MI);
535481ad6265SDimitry Andric       return Legalized;
535581ad6265SDimitry Andric     }
535681ad6265SDimitry Andric 
535781ad6265SDimitry Andric     if (CondTy.isVector())
53580b57cec5SDimitry Andric       return UnableToLegalize;
53590b57cec5SDimitry Andric 
53600b57cec5SDimitry Andric     Observer.changingInstr(MI);
53610b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 2);
53620b57cec5SDimitry Andric     moreElementsVectorSrc(MI, MoreTy, 3);
53630b57cec5SDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
53640b57cec5SDimitry Andric     Observer.changedInstr(MI);
53650b57cec5SDimitry Andric     return Legalized;
536681ad6265SDimitry Andric   }
53670eae32dcSDimitry Andric   case TargetOpcode::G_UNMERGE_VALUES:
53688bcb0991SDimitry Andric     return UnableToLegalize;
53690b57cec5SDimitry Andric   case TargetOpcode::G_PHI:
53700b57cec5SDimitry Andric     return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
5371fe6060f1SDimitry Andric   case TargetOpcode::G_SHUFFLE_VECTOR:
5372fe6060f1SDimitry Andric     return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
53730eae32dcSDimitry Andric   case TargetOpcode::G_BUILD_VECTOR: {
53740eae32dcSDimitry Andric     SmallVector<SrcOp, 8> Elts;
53750eae32dcSDimitry Andric     for (auto Op : MI.uses()) {
53760eae32dcSDimitry Andric       Elts.push_back(Op.getReg());
53770eae32dcSDimitry Andric     }
53780eae32dcSDimitry Andric 
53790eae32dcSDimitry Andric     for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
53800eae32dcSDimitry Andric       Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
53810eae32dcSDimitry Andric     }
53820eae32dcSDimitry Andric 
53830eae32dcSDimitry Andric     MIRBuilder.buildDeleteTrailingVectorElements(
53840eae32dcSDimitry Andric         MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
53850eae32dcSDimitry Andric     MI.eraseFromParent();
53860eae32dcSDimitry Andric     return Legalized;
53870eae32dcSDimitry Andric   }
53885f757f3fSDimitry Andric   case TargetOpcode::G_TRUNC:
538906c3fb27SDimitry Andric   case TargetOpcode::G_FPTRUNC:
53905f757f3fSDimitry Andric   case TargetOpcode::G_FPEXT:
53915f757f3fSDimitry Andric   case TargetOpcode::G_FPTOSI:
53925f757f3fSDimitry Andric   case TargetOpcode::G_FPTOUI:
53935f757f3fSDimitry Andric   case TargetOpcode::G_SITOFP:
53945f757f3fSDimitry Andric   case TargetOpcode::G_UITOFP: {
539506c3fb27SDimitry Andric     if (TypeIdx != 0)
539606c3fb27SDimitry Andric       return UnableToLegalize;
539706c3fb27SDimitry Andric     Observer.changingInstr(MI);
539806c3fb27SDimitry Andric     LLT SrcTy = LLT::fixed_vector(
539906c3fb27SDimitry Andric         MoreTy.getNumElements(),
540006c3fb27SDimitry Andric         MRI.getType(MI.getOperand(1).getReg()).getElementType());
540106c3fb27SDimitry Andric     moreElementsVectorSrc(MI, SrcTy, 1);
540206c3fb27SDimitry Andric     moreElementsVectorDst(MI, MoreTy, 0);
540306c3fb27SDimitry Andric     Observer.changedInstr(MI);
540406c3fb27SDimitry Andric     return Legalized;
540506c3fb27SDimitry Andric   }
54060b57cec5SDimitry Andric   default:
54070b57cec5SDimitry Andric     return UnableToLegalize;
54080b57cec5SDimitry Andric   }
54090b57cec5SDimitry Andric }
54100b57cec5SDimitry Andric 
541106c3fb27SDimitry Andric LegalizerHelper::LegalizeResult
541206c3fb27SDimitry Andric LegalizerHelper::equalizeVectorShuffleLengths(MachineInstr &MI) {
541306c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5414bdd1243dSDimitry Andric   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5415bdd1243dSDimitry Andric   unsigned MaskNumElts = Mask.size();
5416bdd1243dSDimitry Andric   unsigned SrcNumElts = SrcTy.getNumElements();
5417bdd1243dSDimitry Andric   LLT DestEltTy = DstTy.getElementType();
5418bdd1243dSDimitry Andric 
541906c3fb27SDimitry Andric   if (MaskNumElts == SrcNumElts)
542006c3fb27SDimitry Andric     return Legalized;
542106c3fb27SDimitry Andric 
542206c3fb27SDimitry Andric   if (MaskNumElts < SrcNumElts) {
542306c3fb27SDimitry Andric     // Extend mask to match new destination vector size with
542406c3fb27SDimitry Andric     // undef values.
542506c3fb27SDimitry Andric     SmallVector<int, 16> NewMask(Mask);
542606c3fb27SDimitry Andric     for (unsigned I = MaskNumElts; I < SrcNumElts; ++I)
542706c3fb27SDimitry Andric       NewMask.push_back(-1);
542806c3fb27SDimitry Andric 
542906c3fb27SDimitry Andric     moreElementsVectorDst(MI, SrcTy, 0);
543006c3fb27SDimitry Andric     MIRBuilder.setInstrAndDebugLoc(MI);
543106c3fb27SDimitry Andric     MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
543206c3fb27SDimitry Andric                                   MI.getOperand(1).getReg(),
543306c3fb27SDimitry Andric                                   MI.getOperand(2).getReg(), NewMask);
543406c3fb27SDimitry Andric     MI.eraseFromParent();
543506c3fb27SDimitry Andric 
543606c3fb27SDimitry Andric     return Legalized;
5437bdd1243dSDimitry Andric   }
5438bdd1243dSDimitry Andric 
5439bdd1243dSDimitry Andric   unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
5440bdd1243dSDimitry Andric   unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
5441bdd1243dSDimitry Andric   LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
5442bdd1243dSDimitry Andric 
5443bdd1243dSDimitry Andric   // Create new source vectors by concatenating the initial
5444bdd1243dSDimitry Andric   // source vectors with undefined vectors of the same size.
5445bdd1243dSDimitry Andric   auto Undef = MIRBuilder.buildUndef(SrcTy);
5446bdd1243dSDimitry Andric   SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
5447bdd1243dSDimitry Andric   SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
5448bdd1243dSDimitry Andric   MOps1[0] = MI.getOperand(1).getReg();
5449bdd1243dSDimitry Andric   MOps2[0] = MI.getOperand(2).getReg();
5450bdd1243dSDimitry Andric 
5451bdd1243dSDimitry Andric   auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
5452bdd1243dSDimitry Andric   auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
5453bdd1243dSDimitry Andric 
5454bdd1243dSDimitry Andric   // Readjust mask for new input vector length.
5455bdd1243dSDimitry Andric   SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
5456bdd1243dSDimitry Andric   for (unsigned I = 0; I != MaskNumElts; ++I) {
5457bdd1243dSDimitry Andric     int Idx = Mask[I];
5458bdd1243dSDimitry Andric     if (Idx >= static_cast<int>(SrcNumElts))
5459bdd1243dSDimitry Andric       Idx += PaddedMaskNumElts - SrcNumElts;
5460bdd1243dSDimitry Andric     MappedOps[I] = Idx;
5461bdd1243dSDimitry Andric   }
5462bdd1243dSDimitry Andric 
5463bdd1243dSDimitry Andric   // If we got more elements than required, extract subvector.
5464bdd1243dSDimitry Andric   if (MaskNumElts != PaddedMaskNumElts) {
5465bdd1243dSDimitry Andric     auto Shuffle =
5466bdd1243dSDimitry Andric         MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
5467bdd1243dSDimitry Andric 
5468bdd1243dSDimitry Andric     SmallVector<Register, 16> Elts(MaskNumElts);
5469bdd1243dSDimitry Andric     for (unsigned I = 0; I < MaskNumElts; ++I) {
5470bdd1243dSDimitry Andric       Elts[I] =
5471bdd1243dSDimitry Andric           MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
5472bdd1243dSDimitry Andric               .getReg(0);
5473bdd1243dSDimitry Andric     }
5474bdd1243dSDimitry Andric     MIRBuilder.buildBuildVector(DstReg, Elts);
5475bdd1243dSDimitry Andric   } else {
5476bdd1243dSDimitry Andric     MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
5477bdd1243dSDimitry Andric   }
5478bdd1243dSDimitry Andric 
5479bdd1243dSDimitry Andric   MI.eraseFromParent();
5480bdd1243dSDimitry Andric   return LegalizerHelper::LegalizeResult::Legalized;
5481bdd1243dSDimitry Andric }
5482bdd1243dSDimitry Andric 
5483fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
5484fe6060f1SDimitry Andric LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
5485fe6060f1SDimitry Andric                                            unsigned int TypeIdx, LLT MoreTy) {
548606c3fb27SDimitry Andric   auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
5487fe6060f1SDimitry Andric   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5488fe6060f1SDimitry Andric   unsigned NumElts = DstTy.getNumElements();
5489fe6060f1SDimitry Andric   unsigned WidenNumElts = MoreTy.getNumElements();
5490fe6060f1SDimitry Andric 
5491bdd1243dSDimitry Andric   if (DstTy.isVector() && Src1Ty.isVector() &&
549206c3fb27SDimitry Andric       DstTy.getNumElements() != Src1Ty.getNumElements()) {
549306c3fb27SDimitry Andric     return equalizeVectorShuffleLengths(MI);
5494bdd1243dSDimitry Andric   }
5495bdd1243dSDimitry Andric 
5496bdd1243dSDimitry Andric   if (TypeIdx != 0)
5497bdd1243dSDimitry Andric     return UnableToLegalize;
5498bdd1243dSDimitry Andric 
5499fe6060f1SDimitry Andric   // Expect a canonicalized shuffle.
5500fe6060f1SDimitry Andric   if (DstTy != Src1Ty || DstTy != Src2Ty)
5501fe6060f1SDimitry Andric     return UnableToLegalize;
5502fe6060f1SDimitry Andric 
5503fe6060f1SDimitry Andric   moreElementsVectorSrc(MI, MoreTy, 1);
5504fe6060f1SDimitry Andric   moreElementsVectorSrc(MI, MoreTy, 2);
5505fe6060f1SDimitry Andric 
5506fe6060f1SDimitry Andric   // Adjust mask based on new input vector length.
5507fe6060f1SDimitry Andric   SmallVector<int, 16> NewMask;
5508fe6060f1SDimitry Andric   for (unsigned I = 0; I != NumElts; ++I) {
5509fe6060f1SDimitry Andric     int Idx = Mask[I];
5510fe6060f1SDimitry Andric     if (Idx < static_cast<int>(NumElts))
5511fe6060f1SDimitry Andric       NewMask.push_back(Idx);
5512fe6060f1SDimitry Andric     else
5513fe6060f1SDimitry Andric       NewMask.push_back(Idx - NumElts + WidenNumElts);
5514fe6060f1SDimitry Andric   }
5515fe6060f1SDimitry Andric   for (unsigned I = NumElts; I != WidenNumElts; ++I)
5516fe6060f1SDimitry Andric     NewMask.push_back(-1);
5517fe6060f1SDimitry Andric   moreElementsVectorDst(MI, MoreTy, 0);
5518fe6060f1SDimitry Andric   MIRBuilder.setInstrAndDebugLoc(MI);
5519fe6060f1SDimitry Andric   MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
5520fe6060f1SDimitry Andric                                 MI.getOperand(1).getReg(),
5521fe6060f1SDimitry Andric                                 MI.getOperand(2).getReg(), NewMask);
5522fe6060f1SDimitry Andric   MI.eraseFromParent();
5523fe6060f1SDimitry Andric   return Legalized;
5524fe6060f1SDimitry Andric }
5525fe6060f1SDimitry Andric 
55260b57cec5SDimitry Andric void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
55270b57cec5SDimitry Andric                                         ArrayRef<Register> Src1Regs,
55280b57cec5SDimitry Andric                                         ArrayRef<Register> Src2Regs,
55290b57cec5SDimitry Andric                                         LLT NarrowTy) {
55300b57cec5SDimitry Andric   MachineIRBuilder &B = MIRBuilder;
55310b57cec5SDimitry Andric   unsigned SrcParts = Src1Regs.size();
55320b57cec5SDimitry Andric   unsigned DstParts = DstRegs.size();
55330b57cec5SDimitry Andric 
55340b57cec5SDimitry Andric   unsigned DstIdx = 0; // Low bits of the result.
55350b57cec5SDimitry Andric   Register FactorSum =
55360b57cec5SDimitry Andric       B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
55370b57cec5SDimitry Andric   DstRegs[DstIdx] = FactorSum;
55380b57cec5SDimitry Andric 
55390b57cec5SDimitry Andric   unsigned CarrySumPrevDstIdx;
55400b57cec5SDimitry Andric   SmallVector<Register, 4> Factors;
55410b57cec5SDimitry Andric 
55420b57cec5SDimitry Andric   for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
55430b57cec5SDimitry Andric     // Collect low parts of muls for DstIdx.
55440b57cec5SDimitry Andric     for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
55450b57cec5SDimitry Andric          i <= std::min(DstIdx, SrcParts - 1); ++i) {
55460b57cec5SDimitry Andric       MachineInstrBuilder Mul =
55470b57cec5SDimitry Andric           B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
55480b57cec5SDimitry Andric       Factors.push_back(Mul.getReg(0));
55490b57cec5SDimitry Andric     }
55500b57cec5SDimitry Andric     // Collect high parts of muls from previous DstIdx.
55510b57cec5SDimitry Andric     for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
55520b57cec5SDimitry Andric          i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
55530b57cec5SDimitry Andric       MachineInstrBuilder Umulh =
55540b57cec5SDimitry Andric           B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
55550b57cec5SDimitry Andric       Factors.push_back(Umulh.getReg(0));
55560b57cec5SDimitry Andric     }
5557480093f4SDimitry Andric     // Add CarrySum from additions calculated for previous DstIdx.
55580b57cec5SDimitry Andric     if (DstIdx != 1) {
55590b57cec5SDimitry Andric       Factors.push_back(CarrySumPrevDstIdx);
55600b57cec5SDimitry Andric     }
55610b57cec5SDimitry Andric 
55620b57cec5SDimitry Andric     Register CarrySum;
55630b57cec5SDimitry Andric     // Add all factors and accumulate all carries into CarrySum.
55640b57cec5SDimitry Andric     if (DstIdx != DstParts - 1) {
55650b57cec5SDimitry Andric       MachineInstrBuilder Uaddo =
55660b57cec5SDimitry Andric           B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
55670b57cec5SDimitry Andric       FactorSum = Uaddo.getReg(0);
55680b57cec5SDimitry Andric       CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
55690b57cec5SDimitry Andric       for (unsigned i = 2; i < Factors.size(); ++i) {
55700b57cec5SDimitry Andric         MachineInstrBuilder Uaddo =
55710b57cec5SDimitry Andric             B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
55720b57cec5SDimitry Andric         FactorSum = Uaddo.getReg(0);
55730b57cec5SDimitry Andric         MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
55740b57cec5SDimitry Andric         CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
55750b57cec5SDimitry Andric       }
55760b57cec5SDimitry Andric     } else {
55770b57cec5SDimitry Andric       // Since value for the next index is not calculated, neither is CarrySum.
55780b57cec5SDimitry Andric       FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
55790b57cec5SDimitry Andric       for (unsigned i = 2; i < Factors.size(); ++i)
55800b57cec5SDimitry Andric         FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
55810b57cec5SDimitry Andric     }
55820b57cec5SDimitry Andric 
55830b57cec5SDimitry Andric     CarrySumPrevDstIdx = CarrySum;
55840b57cec5SDimitry Andric     DstRegs[DstIdx] = FactorSum;
55850b57cec5SDimitry Andric     Factors.clear();
55860b57cec5SDimitry Andric   }
55870b57cec5SDimitry Andric }
55880b57cec5SDimitry Andric 
55890b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
5590fe6060f1SDimitry Andric LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
5591fe6060f1SDimitry Andric                                     LLT NarrowTy) {
5592fe6060f1SDimitry Andric   if (TypeIdx != 0)
5593fe6060f1SDimitry Andric     return UnableToLegalize;
5594fe6060f1SDimitry Andric 
5595fe6060f1SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
5596fe6060f1SDimitry Andric   LLT DstType = MRI.getType(DstReg);
5597fe6060f1SDimitry Andric   // FIXME: add support for vector types
5598fe6060f1SDimitry Andric   if (DstType.isVector())
5599fe6060f1SDimitry Andric     return UnableToLegalize;
5600fe6060f1SDimitry Andric 
5601fe6060f1SDimitry Andric   unsigned Opcode = MI.getOpcode();
5602fe6060f1SDimitry Andric   unsigned OpO, OpE, OpF;
5603fe6060f1SDimitry Andric   switch (Opcode) {
5604fe6060f1SDimitry Andric   case TargetOpcode::G_SADDO:
5605fe6060f1SDimitry Andric   case TargetOpcode::G_SADDE:
5606fe6060f1SDimitry Andric   case TargetOpcode::G_UADDO:
5607fe6060f1SDimitry Andric   case TargetOpcode::G_UADDE:
5608fe6060f1SDimitry Andric   case TargetOpcode::G_ADD:
5609fe6060f1SDimitry Andric     OpO = TargetOpcode::G_UADDO;
5610fe6060f1SDimitry Andric     OpE = TargetOpcode::G_UADDE;
5611fe6060f1SDimitry Andric     OpF = TargetOpcode::G_UADDE;
5612fe6060f1SDimitry Andric     if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
5613fe6060f1SDimitry Andric       OpF = TargetOpcode::G_SADDE;
5614fe6060f1SDimitry Andric     break;
5615fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBO:
5616fe6060f1SDimitry Andric   case TargetOpcode::G_SSUBE:
5617fe6060f1SDimitry Andric   case TargetOpcode::G_USUBO:
5618fe6060f1SDimitry Andric   case TargetOpcode::G_USUBE:
5619fe6060f1SDimitry Andric   case TargetOpcode::G_SUB:
5620fe6060f1SDimitry Andric     OpO = TargetOpcode::G_USUBO;
5621fe6060f1SDimitry Andric     OpE = TargetOpcode::G_USUBE;
5622fe6060f1SDimitry Andric     OpF = TargetOpcode::G_USUBE;
5623fe6060f1SDimitry Andric     if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
5624fe6060f1SDimitry Andric       OpF = TargetOpcode::G_SSUBE;
5625fe6060f1SDimitry Andric     break;
5626fe6060f1SDimitry Andric   default:
5627fe6060f1SDimitry Andric     llvm_unreachable("Unexpected add/sub opcode!");
5628fe6060f1SDimitry Andric   }
5629fe6060f1SDimitry Andric 
5630fe6060f1SDimitry Andric   // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
5631fe6060f1SDimitry Andric   unsigned NumDefs = MI.getNumExplicitDefs();
5632fe6060f1SDimitry Andric   Register Src1 = MI.getOperand(NumDefs).getReg();
5633fe6060f1SDimitry Andric   Register Src2 = MI.getOperand(NumDefs + 1).getReg();
5634fe6060f1SDimitry Andric   Register CarryDst, CarryIn;
5635fe6060f1SDimitry Andric   if (NumDefs == 2)
5636fe6060f1SDimitry Andric     CarryDst = MI.getOperand(1).getReg();
5637fe6060f1SDimitry Andric   if (MI.getNumOperands() == NumDefs + 3)
5638fe6060f1SDimitry Andric     CarryIn = MI.getOperand(NumDefs + 2).getReg();
5639fe6060f1SDimitry Andric 
5640fe6060f1SDimitry Andric   LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
5641fe6060f1SDimitry Andric   LLT LeftoverTy, DummyTy;
5642fe6060f1SDimitry Andric   SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
5643fe6060f1SDimitry Andric   extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left);
5644fe6060f1SDimitry Andric   extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left);
5645fe6060f1SDimitry Andric 
5646fe6060f1SDimitry Andric   int NarrowParts = Src1Regs.size();
5647fe6060f1SDimitry Andric   for (int I = 0, E = Src1Left.size(); I != E; ++I) {
5648fe6060f1SDimitry Andric     Src1Regs.push_back(Src1Left[I]);
5649fe6060f1SDimitry Andric     Src2Regs.push_back(Src2Left[I]);
5650fe6060f1SDimitry Andric   }
5651fe6060f1SDimitry Andric   DstRegs.reserve(Src1Regs.size());
5652fe6060f1SDimitry Andric 
5653fe6060f1SDimitry Andric   for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
5654fe6060f1SDimitry Andric     Register DstReg =
5655fe6060f1SDimitry Andric         MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
5656fe6060f1SDimitry Andric     Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
5657fe6060f1SDimitry Andric     // Forward the final carry-out to the destination register
5658fe6060f1SDimitry Andric     if (i == e - 1 && CarryDst)
5659fe6060f1SDimitry Andric       CarryOut = CarryDst;
5660fe6060f1SDimitry Andric 
5661fe6060f1SDimitry Andric     if (!CarryIn) {
5662fe6060f1SDimitry Andric       MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
5663fe6060f1SDimitry Andric                             {Src1Regs[i], Src2Regs[i]});
5664fe6060f1SDimitry Andric     } else if (i == e - 1) {
5665fe6060f1SDimitry Andric       MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
5666fe6060f1SDimitry Andric                             {Src1Regs[i], Src2Regs[i], CarryIn});
5667fe6060f1SDimitry Andric     } else {
5668fe6060f1SDimitry Andric       MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
5669fe6060f1SDimitry Andric                             {Src1Regs[i], Src2Regs[i], CarryIn});
5670fe6060f1SDimitry Andric     }
5671fe6060f1SDimitry Andric 
5672fe6060f1SDimitry Andric     DstRegs.push_back(DstReg);
5673fe6060f1SDimitry Andric     CarryIn = CarryOut;
5674fe6060f1SDimitry Andric   }
5675fe6060f1SDimitry Andric   insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
5676bdd1243dSDimitry Andric               ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
5677bdd1243dSDimitry Andric               ArrayRef(DstRegs).drop_front(NarrowParts));
5678fe6060f1SDimitry Andric 
5679fe6060f1SDimitry Andric   MI.eraseFromParent();
5680fe6060f1SDimitry Andric   return Legalized;
5681fe6060f1SDimitry Andric }
5682fe6060f1SDimitry Andric 
5683fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
56840b57cec5SDimitry Andric LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
568506c3fb27SDimitry Andric   auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
56860b57cec5SDimitry Andric 
56870b57cec5SDimitry Andric   LLT Ty = MRI.getType(DstReg);
56880b57cec5SDimitry Andric   if (Ty.isVector())
56890b57cec5SDimitry Andric     return UnableToLegalize;
56900b57cec5SDimitry Andric 
5691349cc55cSDimitry Andric   unsigned Size = Ty.getSizeInBits();
56920b57cec5SDimitry Andric   unsigned NarrowSize = NarrowTy.getSizeInBits();
5693349cc55cSDimitry Andric   if (Size % NarrowSize != 0)
56940b57cec5SDimitry Andric     return UnableToLegalize;
56950b57cec5SDimitry Andric 
5696349cc55cSDimitry Andric   unsigned NumParts = Size / NarrowSize;
56970b57cec5SDimitry Andric   bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
5698349cc55cSDimitry Andric   unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
56990b57cec5SDimitry Andric 
57005ffd83dbSDimitry Andric   SmallVector<Register, 2> Src1Parts, Src2Parts;
57015ffd83dbSDimitry Andric   SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
5702349cc55cSDimitry Andric   extractParts(Src1, NarrowTy, NumParts, Src1Parts);
5703349cc55cSDimitry Andric   extractParts(Src2, NarrowTy, NumParts, Src2Parts);
57040b57cec5SDimitry Andric   multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
57050b57cec5SDimitry Andric 
57060b57cec5SDimitry Andric   // Take only high half of registers if this is high mul.
5707349cc55cSDimitry Andric   ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
5708bdd1243dSDimitry Andric   MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
57090b57cec5SDimitry Andric   MI.eraseFromParent();
57100b57cec5SDimitry Andric   return Legalized;
57110b57cec5SDimitry Andric }
57120b57cec5SDimitry Andric 
57130b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
571423408297SDimitry Andric LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
571523408297SDimitry Andric                                    LLT NarrowTy) {
571623408297SDimitry Andric   if (TypeIdx != 0)
571723408297SDimitry Andric     return UnableToLegalize;
571823408297SDimitry Andric 
571923408297SDimitry Andric   bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
572023408297SDimitry Andric 
572123408297SDimitry Andric   Register Src = MI.getOperand(1).getReg();
572223408297SDimitry Andric   LLT SrcTy = MRI.getType(Src);
572323408297SDimitry Andric 
572423408297SDimitry Andric   // If all finite floats fit into the narrowed integer type, we can just swap
572523408297SDimitry Andric   // out the result type. This is practically only useful for conversions from
572623408297SDimitry Andric   // half to at least 16-bits, so just handle the one case.
572723408297SDimitry Andric   if (SrcTy.getScalarType() != LLT::scalar(16) ||
5728fe6060f1SDimitry Andric       NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
572923408297SDimitry Andric     return UnableToLegalize;
573023408297SDimitry Andric 
573123408297SDimitry Andric   Observer.changingInstr(MI);
573223408297SDimitry Andric   narrowScalarDst(MI, NarrowTy, 0,
573323408297SDimitry Andric                   IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
573423408297SDimitry Andric   Observer.changedInstr(MI);
573523408297SDimitry Andric   return Legalized;
573623408297SDimitry Andric }
573723408297SDimitry Andric 
573823408297SDimitry Andric LegalizerHelper::LegalizeResult
57390b57cec5SDimitry Andric LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
57400b57cec5SDimitry Andric                                      LLT NarrowTy) {
57410b57cec5SDimitry Andric   if (TypeIdx != 1)
57420b57cec5SDimitry Andric     return UnableToLegalize;
57430b57cec5SDimitry Andric 
57440b57cec5SDimitry Andric   uint64_t NarrowSize = NarrowTy.getSizeInBits();
57450b57cec5SDimitry Andric 
57460b57cec5SDimitry Andric   int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
57470b57cec5SDimitry Andric   // FIXME: add support for when SizeOp1 isn't an exact multiple of
57480b57cec5SDimitry Andric   // NarrowSize.
57490b57cec5SDimitry Andric   if (SizeOp1 % NarrowSize != 0)
57500b57cec5SDimitry Andric     return UnableToLegalize;
57510b57cec5SDimitry Andric   int NumParts = SizeOp1 / NarrowSize;
57520b57cec5SDimitry Andric 
57530b57cec5SDimitry Andric   SmallVector<Register, 2> SrcRegs, DstRegs;
57540b57cec5SDimitry Andric   SmallVector<uint64_t, 2> Indexes;
57550b57cec5SDimitry Andric   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
57560b57cec5SDimitry Andric 
57570b57cec5SDimitry Andric   Register OpReg = MI.getOperand(0).getReg();
57580b57cec5SDimitry Andric   uint64_t OpStart = MI.getOperand(2).getImm();
57590b57cec5SDimitry Andric   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
57600b57cec5SDimitry Andric   for (int i = 0; i < NumParts; ++i) {
57610b57cec5SDimitry Andric     unsigned SrcStart = i * NarrowSize;
57620b57cec5SDimitry Andric 
57630b57cec5SDimitry Andric     if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
57640b57cec5SDimitry Andric       // No part of the extract uses this subregister, ignore it.
57650b57cec5SDimitry Andric       continue;
57660b57cec5SDimitry Andric     } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
57670b57cec5SDimitry Andric       // The entire subregister is extracted, forward the value.
57680b57cec5SDimitry Andric       DstRegs.push_back(SrcRegs[i]);
57690b57cec5SDimitry Andric       continue;
57700b57cec5SDimitry Andric     }
57710b57cec5SDimitry Andric 
57720b57cec5SDimitry Andric     // OpSegStart is where this destination segment would start in OpReg if it
57730b57cec5SDimitry Andric     // extended infinitely in both directions.
57740b57cec5SDimitry Andric     int64_t ExtractOffset;
57750b57cec5SDimitry Andric     uint64_t SegSize;
57760b57cec5SDimitry Andric     if (OpStart < SrcStart) {
57770b57cec5SDimitry Andric       ExtractOffset = 0;
57780b57cec5SDimitry Andric       SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
57790b57cec5SDimitry Andric     } else {
57800b57cec5SDimitry Andric       ExtractOffset = OpStart - SrcStart;
57810b57cec5SDimitry Andric       SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
57820b57cec5SDimitry Andric     }
57830b57cec5SDimitry Andric 
57840b57cec5SDimitry Andric     Register SegReg = SrcRegs[i];
57850b57cec5SDimitry Andric     if (ExtractOffset != 0 || SegSize != NarrowSize) {
57860b57cec5SDimitry Andric       // A genuine extract is needed.
57870b57cec5SDimitry Andric       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
57880b57cec5SDimitry Andric       MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
57890b57cec5SDimitry Andric     }
57900b57cec5SDimitry Andric 
57910b57cec5SDimitry Andric     DstRegs.push_back(SegReg);
57920b57cec5SDimitry Andric   }
57930b57cec5SDimitry Andric 
57940b57cec5SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
57950b57cec5SDimitry Andric   if (MRI.getType(DstReg).isVector())
57960b57cec5SDimitry Andric     MIRBuilder.buildBuildVector(DstReg, DstRegs);
57975ffd83dbSDimitry Andric   else if (DstRegs.size() > 1)
5798bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
57995ffd83dbSDimitry Andric   else
58005ffd83dbSDimitry Andric     MIRBuilder.buildCopy(DstReg, DstRegs[0]);
58010b57cec5SDimitry Andric   MI.eraseFromParent();
58020b57cec5SDimitry Andric   return Legalized;
58030b57cec5SDimitry Andric }
58040b57cec5SDimitry Andric 
58050b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
58060b57cec5SDimitry Andric LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
58070b57cec5SDimitry Andric                                     LLT NarrowTy) {
58080b57cec5SDimitry Andric   // FIXME: Don't know how to handle secondary types yet.
58090b57cec5SDimitry Andric   if (TypeIdx != 0)
58100b57cec5SDimitry Andric     return UnableToLegalize;
58110b57cec5SDimitry Andric 
5812fe6060f1SDimitry Andric   SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
58130b57cec5SDimitry Andric   SmallVector<uint64_t, 2> Indexes;
5814fe6060f1SDimitry Andric   LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
5815fe6060f1SDimitry Andric   LLT LeftoverTy;
5816fe6060f1SDimitry Andric   extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
5817fe6060f1SDimitry Andric                LeftoverRegs);
58180b57cec5SDimitry Andric 
5819fe6060f1SDimitry Andric   for (Register Reg : LeftoverRegs)
5820fe6060f1SDimitry Andric     SrcRegs.push_back(Reg);
5821fe6060f1SDimitry Andric 
5822fe6060f1SDimitry Andric   uint64_t NarrowSize = NarrowTy.getSizeInBits();
58230b57cec5SDimitry Andric   Register OpReg = MI.getOperand(2).getReg();
58240b57cec5SDimitry Andric   uint64_t OpStart = MI.getOperand(3).getImm();
58250b57cec5SDimitry Andric   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
5826fe6060f1SDimitry Andric   for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
5827fe6060f1SDimitry Andric     unsigned DstStart = I * NarrowSize;
58280b57cec5SDimitry Andric 
5829fe6060f1SDimitry Andric     if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
58300b57cec5SDimitry Andric       // The entire subregister is defined by this insert, forward the new
58310b57cec5SDimitry Andric       // value.
58320b57cec5SDimitry Andric       DstRegs.push_back(OpReg);
58330b57cec5SDimitry Andric       continue;
58340b57cec5SDimitry Andric     }
58350b57cec5SDimitry Andric 
5836fe6060f1SDimitry Andric     Register SrcReg = SrcRegs[I];
5837fe6060f1SDimitry Andric     if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
5838fe6060f1SDimitry Andric       // The leftover reg is smaller than NarrowTy, so we need to extend it.
5839fe6060f1SDimitry Andric       SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
5840fe6060f1SDimitry Andric       MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
5841fe6060f1SDimitry Andric     }
5842fe6060f1SDimitry Andric 
5843fe6060f1SDimitry Andric     if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
5844fe6060f1SDimitry Andric       // No part of the insert affects this subregister, forward the original.
5845fe6060f1SDimitry Andric       DstRegs.push_back(SrcReg);
5846fe6060f1SDimitry Andric       continue;
5847fe6060f1SDimitry Andric     }
5848fe6060f1SDimitry Andric 
58490b57cec5SDimitry Andric     // OpSegStart is where this destination segment would start in OpReg if it
58500b57cec5SDimitry Andric     // extended infinitely in both directions.
58510b57cec5SDimitry Andric     int64_t ExtractOffset, InsertOffset;
58520b57cec5SDimitry Andric     uint64_t SegSize;
58530b57cec5SDimitry Andric     if (OpStart < DstStart) {
58540b57cec5SDimitry Andric       InsertOffset = 0;
58550b57cec5SDimitry Andric       ExtractOffset = DstStart - OpStart;
58560b57cec5SDimitry Andric       SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
58570b57cec5SDimitry Andric     } else {
58580b57cec5SDimitry Andric       InsertOffset = OpStart - DstStart;
58590b57cec5SDimitry Andric       ExtractOffset = 0;
58600b57cec5SDimitry Andric       SegSize =
58610b57cec5SDimitry Andric         std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
58620b57cec5SDimitry Andric     }
58630b57cec5SDimitry Andric 
58640b57cec5SDimitry Andric     Register SegReg = OpReg;
58650b57cec5SDimitry Andric     if (ExtractOffset != 0 || SegSize != OpSize) {
58660b57cec5SDimitry Andric       // A genuine extract is needed.
58670b57cec5SDimitry Andric       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
58680b57cec5SDimitry Andric       MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
58690b57cec5SDimitry Andric     }
58700b57cec5SDimitry Andric 
58710b57cec5SDimitry Andric     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
5872fe6060f1SDimitry Andric     MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
58730b57cec5SDimitry Andric     DstRegs.push_back(DstReg);
58740b57cec5SDimitry Andric   }
58750b57cec5SDimitry Andric 
5876fe6060f1SDimitry Andric   uint64_t WideSize = DstRegs.size() * NarrowSize;
58770b57cec5SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
5878fe6060f1SDimitry Andric   if (WideSize > RegTy.getSizeInBits()) {
5879fe6060f1SDimitry Andric     Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
5880bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
5881fe6060f1SDimitry Andric     MIRBuilder.buildTrunc(DstReg, MergeReg);
5882fe6060f1SDimitry Andric   } else
5883bdd1243dSDimitry Andric     MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
5884fe6060f1SDimitry Andric 
58850b57cec5SDimitry Andric   MI.eraseFromParent();
58860b57cec5SDimitry Andric   return Legalized;
58870b57cec5SDimitry Andric }
58880b57cec5SDimitry Andric 
58890b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
58900b57cec5SDimitry Andric LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
58910b57cec5SDimitry Andric                                    LLT NarrowTy) {
58920b57cec5SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
58930b57cec5SDimitry Andric   LLT DstTy = MRI.getType(DstReg);
58940b57cec5SDimitry Andric 
58950b57cec5SDimitry Andric   assert(MI.getNumOperands() == 3 && TypeIdx == 0);
58960b57cec5SDimitry Andric 
58970b57cec5SDimitry Andric   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
58980b57cec5SDimitry Andric   SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
58990b57cec5SDimitry Andric   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
59000b57cec5SDimitry Andric   LLT LeftoverTy;
59010b57cec5SDimitry Andric   if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
59020b57cec5SDimitry Andric                     Src0Regs, Src0LeftoverRegs))
59030b57cec5SDimitry Andric     return UnableToLegalize;
59040b57cec5SDimitry Andric 
59050b57cec5SDimitry Andric   LLT Unused;
59060b57cec5SDimitry Andric   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
59070b57cec5SDimitry Andric                     Src1Regs, Src1LeftoverRegs))
59080b57cec5SDimitry Andric     llvm_unreachable("inconsistent extractParts result");
59090b57cec5SDimitry Andric 
59100b57cec5SDimitry Andric   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
59110b57cec5SDimitry Andric     auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
59120b57cec5SDimitry Andric                                         {Src0Regs[I], Src1Regs[I]});
59135ffd83dbSDimitry Andric     DstRegs.push_back(Inst.getReg(0));
59140b57cec5SDimitry Andric   }
59150b57cec5SDimitry Andric 
59160b57cec5SDimitry Andric   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
59170b57cec5SDimitry Andric     auto Inst = MIRBuilder.buildInstr(
59180b57cec5SDimitry Andric       MI.getOpcode(),
59190b57cec5SDimitry Andric       {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
59205ffd83dbSDimitry Andric     DstLeftoverRegs.push_back(Inst.getReg(0));
59210b57cec5SDimitry Andric   }
59220b57cec5SDimitry Andric 
59230b57cec5SDimitry Andric   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
59240b57cec5SDimitry Andric               LeftoverTy, DstLeftoverRegs);
59250b57cec5SDimitry Andric 
59260b57cec5SDimitry Andric   MI.eraseFromParent();
59270b57cec5SDimitry Andric   return Legalized;
59280b57cec5SDimitry Andric }
59290b57cec5SDimitry Andric 
59300b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
59315ffd83dbSDimitry Andric LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
59325ffd83dbSDimitry Andric                                  LLT NarrowTy) {
59335ffd83dbSDimitry Andric   if (TypeIdx != 0)
59345ffd83dbSDimitry Andric     return UnableToLegalize;
59355ffd83dbSDimitry Andric 
593606c3fb27SDimitry Andric   auto [DstReg, SrcReg] = MI.getFirst2Regs();
59375ffd83dbSDimitry Andric 
59385ffd83dbSDimitry Andric   LLT DstTy = MRI.getType(DstReg);
59395ffd83dbSDimitry Andric   if (DstTy.isVector())
59405ffd83dbSDimitry Andric     return UnableToLegalize;
59415ffd83dbSDimitry Andric 
59425ffd83dbSDimitry Andric   SmallVector<Register, 8> Parts;
59435ffd83dbSDimitry Andric   LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
59445ffd83dbSDimitry Andric   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
59455ffd83dbSDimitry Andric   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
59465ffd83dbSDimitry Andric 
59475ffd83dbSDimitry Andric   MI.eraseFromParent();
59485ffd83dbSDimitry Andric   return Legalized;
59495ffd83dbSDimitry Andric }
59505ffd83dbSDimitry Andric 
59515ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
59520b57cec5SDimitry Andric LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
59530b57cec5SDimitry Andric                                     LLT NarrowTy) {
59540b57cec5SDimitry Andric   if (TypeIdx != 0)
59550b57cec5SDimitry Andric     return UnableToLegalize;
59560b57cec5SDimitry Andric 
59570b57cec5SDimitry Andric   Register CondReg = MI.getOperand(1).getReg();
59580b57cec5SDimitry Andric   LLT CondTy = MRI.getType(CondReg);
59590b57cec5SDimitry Andric   if (CondTy.isVector()) // TODO: Handle vselect
59600b57cec5SDimitry Andric     return UnableToLegalize;
59610b57cec5SDimitry Andric 
59620b57cec5SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
59630b57cec5SDimitry Andric   LLT DstTy = MRI.getType(DstReg);
59640b57cec5SDimitry Andric 
59650b57cec5SDimitry Andric   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
59660b57cec5SDimitry Andric   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
59670b57cec5SDimitry Andric   SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
59680b57cec5SDimitry Andric   LLT LeftoverTy;
59690b57cec5SDimitry Andric   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
59700b57cec5SDimitry Andric                     Src1Regs, Src1LeftoverRegs))
59710b57cec5SDimitry Andric     return UnableToLegalize;
59720b57cec5SDimitry Andric 
59730b57cec5SDimitry Andric   LLT Unused;
59740b57cec5SDimitry Andric   if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
59750b57cec5SDimitry Andric                     Src2Regs, Src2LeftoverRegs))
59760b57cec5SDimitry Andric     llvm_unreachable("inconsistent extractParts result");
59770b57cec5SDimitry Andric 
59780b57cec5SDimitry Andric   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
59790b57cec5SDimitry Andric     auto Select = MIRBuilder.buildSelect(NarrowTy,
59800b57cec5SDimitry Andric                                          CondReg, Src1Regs[I], Src2Regs[I]);
59815ffd83dbSDimitry Andric     DstRegs.push_back(Select.getReg(0));
59820b57cec5SDimitry Andric   }
59830b57cec5SDimitry Andric 
59840b57cec5SDimitry Andric   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
59850b57cec5SDimitry Andric     auto Select = MIRBuilder.buildSelect(
59860b57cec5SDimitry Andric       LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
59875ffd83dbSDimitry Andric     DstLeftoverRegs.push_back(Select.getReg(0));
59880b57cec5SDimitry Andric   }
59890b57cec5SDimitry Andric 
59900b57cec5SDimitry Andric   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
59910b57cec5SDimitry Andric               LeftoverTy, DstLeftoverRegs);
59920b57cec5SDimitry Andric 
59930b57cec5SDimitry Andric   MI.eraseFromParent();
59940b57cec5SDimitry Andric   return Legalized;
59950b57cec5SDimitry Andric }
59960b57cec5SDimitry Andric 
59970b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
59985ffd83dbSDimitry Andric LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
59995ffd83dbSDimitry Andric                                   LLT NarrowTy) {
60005ffd83dbSDimitry Andric   if (TypeIdx != 1)
60015ffd83dbSDimitry Andric     return UnableToLegalize;
60025ffd83dbSDimitry Andric 
600306c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
60045ffd83dbSDimitry Andric   unsigned NarrowSize = NarrowTy.getSizeInBits();
60055ffd83dbSDimitry Andric 
60065ffd83dbSDimitry Andric   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
60075ffd83dbSDimitry Andric     const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
60085ffd83dbSDimitry Andric 
60095ffd83dbSDimitry Andric     MachineIRBuilder &B = MIRBuilder;
60105ffd83dbSDimitry Andric     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
60115ffd83dbSDimitry Andric     // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
60125ffd83dbSDimitry Andric     auto C_0 = B.buildConstant(NarrowTy, 0);
60135ffd83dbSDimitry Andric     auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
60145ffd83dbSDimitry Andric                                 UnmergeSrc.getReg(1), C_0);
60155ffd83dbSDimitry Andric     auto LoCTLZ = IsUndef ?
60165ffd83dbSDimitry Andric       B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
60175ffd83dbSDimitry Andric       B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
60185ffd83dbSDimitry Andric     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
60195ffd83dbSDimitry Andric     auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
60205ffd83dbSDimitry Andric     auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
60215ffd83dbSDimitry Andric     B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
60225ffd83dbSDimitry Andric 
60235ffd83dbSDimitry Andric     MI.eraseFromParent();
60245ffd83dbSDimitry Andric     return Legalized;
60255ffd83dbSDimitry Andric   }
60265ffd83dbSDimitry Andric 
60275ffd83dbSDimitry Andric   return UnableToLegalize;
60285ffd83dbSDimitry Andric }
60295ffd83dbSDimitry Andric 
60305ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
60315ffd83dbSDimitry Andric LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
60325ffd83dbSDimitry Andric                                   LLT NarrowTy) {
60335ffd83dbSDimitry Andric   if (TypeIdx != 1)
60345ffd83dbSDimitry Andric     return UnableToLegalize;
60355ffd83dbSDimitry Andric 
603606c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
60375ffd83dbSDimitry Andric   unsigned NarrowSize = NarrowTy.getSizeInBits();
60385ffd83dbSDimitry Andric 
60395ffd83dbSDimitry Andric   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
60405ffd83dbSDimitry Andric     const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
60415ffd83dbSDimitry Andric 
60425ffd83dbSDimitry Andric     MachineIRBuilder &B = MIRBuilder;
60435ffd83dbSDimitry Andric     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
60445ffd83dbSDimitry Andric     // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
60455ffd83dbSDimitry Andric     auto C_0 = B.buildConstant(NarrowTy, 0);
60465ffd83dbSDimitry Andric     auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
60475ffd83dbSDimitry Andric                                 UnmergeSrc.getReg(0), C_0);
60485ffd83dbSDimitry Andric     auto HiCTTZ = IsUndef ?
60495ffd83dbSDimitry Andric       B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
60505ffd83dbSDimitry Andric       B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
60515ffd83dbSDimitry Andric     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
60525ffd83dbSDimitry Andric     auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
60535ffd83dbSDimitry Andric     auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
60545ffd83dbSDimitry Andric     B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
60555ffd83dbSDimitry Andric 
60565ffd83dbSDimitry Andric     MI.eraseFromParent();
60575ffd83dbSDimitry Andric     return Legalized;
60585ffd83dbSDimitry Andric   }
60595ffd83dbSDimitry Andric 
60605ffd83dbSDimitry Andric   return UnableToLegalize;
60615ffd83dbSDimitry Andric }
60625ffd83dbSDimitry Andric 
60635ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
60645ffd83dbSDimitry Andric LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
60655ffd83dbSDimitry Andric                                    LLT NarrowTy) {
60665ffd83dbSDimitry Andric   if (TypeIdx != 1)
60675ffd83dbSDimitry Andric     return UnableToLegalize;
60685ffd83dbSDimitry Andric 
606906c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
60705ffd83dbSDimitry Andric   unsigned NarrowSize = NarrowTy.getSizeInBits();
60715ffd83dbSDimitry Andric 
60725ffd83dbSDimitry Andric   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
60735ffd83dbSDimitry Andric     auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
60745ffd83dbSDimitry Andric 
60755ffd83dbSDimitry Andric     auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
60765ffd83dbSDimitry Andric     auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
60775ffd83dbSDimitry Andric     MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
60785ffd83dbSDimitry Andric 
60795ffd83dbSDimitry Andric     MI.eraseFromParent();
60805ffd83dbSDimitry Andric     return Legalized;
60815ffd83dbSDimitry Andric   }
60825ffd83dbSDimitry Andric 
60835ffd83dbSDimitry Andric   return UnableToLegalize;
60845ffd83dbSDimitry Andric }
60855ffd83dbSDimitry Andric 
60865ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
608706c3fb27SDimitry Andric LegalizerHelper::narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx,
608806c3fb27SDimitry Andric                                     LLT NarrowTy) {
608906c3fb27SDimitry Andric   if (TypeIdx != 1)
609006c3fb27SDimitry Andric     return UnableToLegalize;
609106c3fb27SDimitry Andric 
609206c3fb27SDimitry Andric   MachineIRBuilder &B = MIRBuilder;
609306c3fb27SDimitry Andric   Register ExpReg = MI.getOperand(2).getReg();
609406c3fb27SDimitry Andric   LLT ExpTy = MRI.getType(ExpReg);
609506c3fb27SDimitry Andric 
609606c3fb27SDimitry Andric   unsigned ClampSize = NarrowTy.getScalarSizeInBits();
609706c3fb27SDimitry Andric 
609806c3fb27SDimitry Andric   // Clamp the exponent to the range of the target type.
609906c3fb27SDimitry Andric   auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
610006c3fb27SDimitry Andric   auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
610106c3fb27SDimitry Andric   auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
610206c3fb27SDimitry Andric   auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
610306c3fb27SDimitry Andric 
610406c3fb27SDimitry Andric   auto Trunc = B.buildTrunc(NarrowTy, Clamp);
610506c3fb27SDimitry Andric   Observer.changingInstr(MI);
610606c3fb27SDimitry Andric   MI.getOperand(2).setReg(Trunc.getReg(0));
610706c3fb27SDimitry Andric   Observer.changedInstr(MI);
610806c3fb27SDimitry Andric   return Legalized;
610906c3fb27SDimitry Andric }
611006c3fb27SDimitry Andric 
611106c3fb27SDimitry Andric LegalizerHelper::LegalizeResult
6112e8d8bef9SDimitry Andric LegalizerHelper::lowerBitCount(MachineInstr &MI) {
61130b57cec5SDimitry Andric   unsigned Opc = MI.getOpcode();
6114e8d8bef9SDimitry Andric   const auto &TII = MIRBuilder.getTII();
61150b57cec5SDimitry Andric   auto isSupported = [this](const LegalityQuery &Q) {
61160b57cec5SDimitry Andric     auto QAction = LI.getAction(Q).Action;
61170b57cec5SDimitry Andric     return QAction == Legal || QAction == Libcall || QAction == Custom;
61180b57cec5SDimitry Andric   };
61190b57cec5SDimitry Andric   switch (Opc) {
61200b57cec5SDimitry Andric   default:
61210b57cec5SDimitry Andric     return UnableToLegalize;
61220b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
61230b57cec5SDimitry Andric     // This trivially expands to CTLZ.
61240b57cec5SDimitry Andric     Observer.changingInstr(MI);
61250b57cec5SDimitry Andric     MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
61260b57cec5SDimitry Andric     Observer.changedInstr(MI);
61270b57cec5SDimitry Andric     return Legalized;
61280b57cec5SDimitry Andric   }
61290b57cec5SDimitry Andric   case TargetOpcode::G_CTLZ: {
613006c3fb27SDimitry Andric     auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
61315ffd83dbSDimitry Andric     unsigned Len = SrcTy.getSizeInBits();
61325ffd83dbSDimitry Andric 
61335ffd83dbSDimitry Andric     if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
61340b57cec5SDimitry Andric       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
61355ffd83dbSDimitry Andric       auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
61365ffd83dbSDimitry Andric       auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
61375ffd83dbSDimitry Andric       auto ICmp = MIRBuilder.buildICmp(
61385ffd83dbSDimitry Andric           CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
61395ffd83dbSDimitry Andric       auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
61405ffd83dbSDimitry Andric       MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
61410b57cec5SDimitry Andric       MI.eraseFromParent();
61420b57cec5SDimitry Andric       return Legalized;
61430b57cec5SDimitry Andric     }
61440b57cec5SDimitry Andric     // for now, we do this:
61450b57cec5SDimitry Andric     // NewLen = NextPowerOf2(Len);
61460b57cec5SDimitry Andric     // x = x | (x >> 1);
61470b57cec5SDimitry Andric     // x = x | (x >> 2);
61480b57cec5SDimitry Andric     // ...
61490b57cec5SDimitry Andric     // x = x | (x >>16);
61500b57cec5SDimitry Andric     // x = x | (x >>32); // for 64-bit input
61510b57cec5SDimitry Andric     // Upto NewLen/2
61520b57cec5SDimitry Andric     // return Len - popcount(x);
61530b57cec5SDimitry Andric     //
61540b57cec5SDimitry Andric     // Ref: "Hacker's Delight" by Henry Warren
61550b57cec5SDimitry Andric     Register Op = SrcReg;
61560b57cec5SDimitry Andric     unsigned NewLen = PowerOf2Ceil(Len);
61570b57cec5SDimitry Andric     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
61585ffd83dbSDimitry Andric       auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
61595ffd83dbSDimitry Andric       auto MIBOp = MIRBuilder.buildOr(
61605ffd83dbSDimitry Andric           SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
61615ffd83dbSDimitry Andric       Op = MIBOp.getReg(0);
61620b57cec5SDimitry Andric     }
61635ffd83dbSDimitry Andric     auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
61645ffd83dbSDimitry Andric     MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
61655ffd83dbSDimitry Andric                         MIBPop);
61660b57cec5SDimitry Andric     MI.eraseFromParent();
61670b57cec5SDimitry Andric     return Legalized;
61680b57cec5SDimitry Andric   }
61690b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
61700b57cec5SDimitry Andric     // This trivially expands to CTTZ.
61710b57cec5SDimitry Andric     Observer.changingInstr(MI);
61720b57cec5SDimitry Andric     MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
61730b57cec5SDimitry Andric     Observer.changedInstr(MI);
61740b57cec5SDimitry Andric     return Legalized;
61750b57cec5SDimitry Andric   }
61760b57cec5SDimitry Andric   case TargetOpcode::G_CTTZ: {
617706c3fb27SDimitry Andric     auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
61785ffd83dbSDimitry Andric 
61795ffd83dbSDimitry Andric     unsigned Len = SrcTy.getSizeInBits();
61805ffd83dbSDimitry Andric     if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
61810b57cec5SDimitry Andric       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
61820b57cec5SDimitry Andric       // zero.
61835ffd83dbSDimitry Andric       auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
61845ffd83dbSDimitry Andric       auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
61855ffd83dbSDimitry Andric       auto ICmp = MIRBuilder.buildICmp(
61865ffd83dbSDimitry Andric           CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
61875ffd83dbSDimitry Andric       auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
61885ffd83dbSDimitry Andric       MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
61890b57cec5SDimitry Andric       MI.eraseFromParent();
61900b57cec5SDimitry Andric       return Legalized;
61910b57cec5SDimitry Andric     }
61920b57cec5SDimitry Andric     // for now, we use: { return popcount(~x & (x - 1)); }
61930b57cec5SDimitry Andric     // unless the target has ctlz but not ctpop, in which case we use:
61940b57cec5SDimitry Andric     // { return 32 - nlz(~x & (x-1)); }
61950b57cec5SDimitry Andric     // Ref: "Hacker's Delight" by Henry Warren
6196e8d8bef9SDimitry Andric     auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
6197e8d8bef9SDimitry Andric     auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
61985ffd83dbSDimitry Andric     auto MIBTmp = MIRBuilder.buildAnd(
6199e8d8bef9SDimitry Andric         SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
6200e8d8bef9SDimitry Andric     if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
6201e8d8bef9SDimitry Andric         isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
6202e8d8bef9SDimitry Andric       auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
62035ffd83dbSDimitry Andric       MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
6204e8d8bef9SDimitry Andric                           MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
62050b57cec5SDimitry Andric       MI.eraseFromParent();
62060b57cec5SDimitry Andric       return Legalized;
62070b57cec5SDimitry Andric     }
62085f757f3fSDimitry Andric     Observer.changingInstr(MI);
62090b57cec5SDimitry Andric     MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
62105ffd83dbSDimitry Andric     MI.getOperand(1).setReg(MIBTmp.getReg(0));
62115f757f3fSDimitry Andric     Observer.changedInstr(MI);
62125ffd83dbSDimitry Andric     return Legalized;
62135ffd83dbSDimitry Andric   }
62145ffd83dbSDimitry Andric   case TargetOpcode::G_CTPOP: {
6215e8d8bef9SDimitry Andric     Register SrcReg = MI.getOperand(1).getReg();
6216e8d8bef9SDimitry Andric     LLT Ty = MRI.getType(SrcReg);
62175ffd83dbSDimitry Andric     unsigned Size = Ty.getSizeInBits();
62185ffd83dbSDimitry Andric     MachineIRBuilder &B = MIRBuilder;
62195ffd83dbSDimitry Andric 
62205ffd83dbSDimitry Andric     // Count set bits in blocks of 2 bits. Default approach would be
62215ffd83dbSDimitry Andric     // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
62225ffd83dbSDimitry Andric     // We use following formula instead:
62235ffd83dbSDimitry Andric     // B2Count = val - { (val >> 1) & 0x55555555 }
62245ffd83dbSDimitry Andric     // since it gives same result in blocks of 2 with one instruction less.
62255ffd83dbSDimitry Andric     auto C_1 = B.buildConstant(Ty, 1);
6226e8d8bef9SDimitry Andric     auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
62275ffd83dbSDimitry Andric     APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
62285ffd83dbSDimitry Andric     auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
62295ffd83dbSDimitry Andric     auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
6230e8d8bef9SDimitry Andric     auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
62315ffd83dbSDimitry Andric 
62325ffd83dbSDimitry Andric     // In order to get count in blocks of 4 add values from adjacent block of 2.
62335ffd83dbSDimitry Andric     // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
62345ffd83dbSDimitry Andric     auto C_2 = B.buildConstant(Ty, 2);
62355ffd83dbSDimitry Andric     auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
62365ffd83dbSDimitry Andric     APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
62375ffd83dbSDimitry Andric     auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
62385ffd83dbSDimitry Andric     auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
62395ffd83dbSDimitry Andric     auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
62405ffd83dbSDimitry Andric     auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
62415ffd83dbSDimitry Andric 
62425ffd83dbSDimitry Andric     // For count in blocks of 8 bits we don't have to mask high 4 bits before
62435ffd83dbSDimitry Andric     // addition since count value sits in range {0,...,8} and 4 bits are enough
62445ffd83dbSDimitry Andric     // to hold such binary values. After addition high 4 bits still hold count
62455ffd83dbSDimitry Andric     // of set bits in high 4 bit block, set them to zero and get 8 bit result.
62465ffd83dbSDimitry Andric     // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
62475ffd83dbSDimitry Andric     auto C_4 = B.buildConstant(Ty, 4);
62485ffd83dbSDimitry Andric     auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
62495ffd83dbSDimitry Andric     auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
62505ffd83dbSDimitry Andric     APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
62515ffd83dbSDimitry Andric     auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
62525ffd83dbSDimitry Andric     auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
62535ffd83dbSDimitry Andric 
62545ffd83dbSDimitry Andric     assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
62555ffd83dbSDimitry Andric     // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
62565ffd83dbSDimitry Andric     // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
62575ffd83dbSDimitry Andric     auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
62585ffd83dbSDimitry Andric     auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
62595ffd83dbSDimitry Andric 
62605ffd83dbSDimitry Andric     // Shift count result from 8 high bits to low bits.
62615ffd83dbSDimitry Andric     auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
62625ffd83dbSDimitry Andric     B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
62635ffd83dbSDimitry Andric 
62645ffd83dbSDimitry Andric     MI.eraseFromParent();
62650b57cec5SDimitry Andric     return Legalized;
62660b57cec5SDimitry Andric   }
62670b57cec5SDimitry Andric   }
62680b57cec5SDimitry Andric }
62690b57cec5SDimitry Andric 
6270fe6060f1SDimitry Andric // Check that (every element of) Reg is undef or not an exact multiple of BW.
6271fe6060f1SDimitry Andric static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
6272fe6060f1SDimitry Andric                                         Register Reg, unsigned BW) {
6273fe6060f1SDimitry Andric   return matchUnaryPredicate(
6274fe6060f1SDimitry Andric       MRI, Reg,
6275fe6060f1SDimitry Andric       [=](const Constant *C) {
6276fe6060f1SDimitry Andric         // Null constant here means an undef.
6277fe6060f1SDimitry Andric         const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
6278fe6060f1SDimitry Andric         return !CI || CI->getValue().urem(BW) != 0;
6279fe6060f1SDimitry Andric       },
6280fe6060f1SDimitry Andric       /*AllowUndefs*/ true);
6281fe6060f1SDimitry Andric }
6282fe6060f1SDimitry Andric 
6283fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
6284fe6060f1SDimitry Andric LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
628506c3fb27SDimitry Andric   auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6286fe6060f1SDimitry Andric   LLT Ty = MRI.getType(Dst);
6287fe6060f1SDimitry Andric   LLT ShTy = MRI.getType(Z);
6288fe6060f1SDimitry Andric 
6289fe6060f1SDimitry Andric   unsigned BW = Ty.getScalarSizeInBits();
6290fe6060f1SDimitry Andric 
6291fe6060f1SDimitry Andric   if (!isPowerOf2_32(BW))
6292fe6060f1SDimitry Andric     return UnableToLegalize;
6293fe6060f1SDimitry Andric 
6294fe6060f1SDimitry Andric   const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6295fe6060f1SDimitry Andric   unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6296fe6060f1SDimitry Andric 
6297fe6060f1SDimitry Andric   if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6298fe6060f1SDimitry Andric     // fshl X, Y, Z -> fshr X, Y, -Z
6299fe6060f1SDimitry Andric     // fshr X, Y, Z -> fshl X, Y, -Z
6300fe6060f1SDimitry Andric     auto Zero = MIRBuilder.buildConstant(ShTy, 0);
6301fe6060f1SDimitry Andric     Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
6302fe6060f1SDimitry Andric   } else {
6303fe6060f1SDimitry Andric     // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
6304fe6060f1SDimitry Andric     // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
6305fe6060f1SDimitry Andric     auto One = MIRBuilder.buildConstant(ShTy, 1);
6306fe6060f1SDimitry Andric     if (IsFSHL) {
6307fe6060f1SDimitry Andric       Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6308fe6060f1SDimitry Andric       X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
6309fe6060f1SDimitry Andric     } else {
6310fe6060f1SDimitry Andric       X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6311fe6060f1SDimitry Andric       Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
6312fe6060f1SDimitry Andric     }
6313fe6060f1SDimitry Andric 
6314fe6060f1SDimitry Andric     Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
6315fe6060f1SDimitry Andric   }
6316fe6060f1SDimitry Andric 
6317fe6060f1SDimitry Andric   MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
6318fe6060f1SDimitry Andric   MI.eraseFromParent();
6319fe6060f1SDimitry Andric   return Legalized;
6320fe6060f1SDimitry Andric }
6321fe6060f1SDimitry Andric 
6322fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
6323fe6060f1SDimitry Andric LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
632406c3fb27SDimitry Andric   auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6325fe6060f1SDimitry Andric   LLT Ty = MRI.getType(Dst);
6326fe6060f1SDimitry Andric   LLT ShTy = MRI.getType(Z);
6327fe6060f1SDimitry Andric 
6328fe6060f1SDimitry Andric   const unsigned BW = Ty.getScalarSizeInBits();
6329fe6060f1SDimitry Andric   const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6330fe6060f1SDimitry Andric 
6331fe6060f1SDimitry Andric   Register ShX, ShY;
6332fe6060f1SDimitry Andric   Register ShAmt, InvShAmt;
6333fe6060f1SDimitry Andric 
6334fe6060f1SDimitry Andric   // FIXME: Emit optimized urem by constant instead of letting it expand later.
6335fe6060f1SDimitry Andric   if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6336fe6060f1SDimitry Andric     // fshl: X << C | Y >> (BW - C)
6337fe6060f1SDimitry Andric     // fshr: X << (BW - C) | Y >> C
6338fe6060f1SDimitry Andric     // where C = Z % BW is not zero
6339fe6060f1SDimitry Andric     auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6340fe6060f1SDimitry Andric     ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6341fe6060f1SDimitry Andric     InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
6342fe6060f1SDimitry Andric     ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
6343fe6060f1SDimitry Andric     ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
6344fe6060f1SDimitry Andric   } else {
6345fe6060f1SDimitry Andric     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
6346fe6060f1SDimitry Andric     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
6347fe6060f1SDimitry Andric     auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
6348fe6060f1SDimitry Andric     if (isPowerOf2_32(BW)) {
6349fe6060f1SDimitry Andric       // Z % BW -> Z & (BW - 1)
6350fe6060f1SDimitry Andric       ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
6351fe6060f1SDimitry Andric       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
6352fe6060f1SDimitry Andric       auto NotZ = MIRBuilder.buildNot(ShTy, Z);
6353fe6060f1SDimitry Andric       InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
6354fe6060f1SDimitry Andric     } else {
6355fe6060f1SDimitry Andric       auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6356fe6060f1SDimitry Andric       ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6357fe6060f1SDimitry Andric       InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
6358fe6060f1SDimitry Andric     }
6359fe6060f1SDimitry Andric 
6360fe6060f1SDimitry Andric     auto One = MIRBuilder.buildConstant(ShTy, 1);
6361fe6060f1SDimitry Andric     if (IsFSHL) {
6362fe6060f1SDimitry Andric       ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
6363fe6060f1SDimitry Andric       auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
6364fe6060f1SDimitry Andric       ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
6365fe6060f1SDimitry Andric     } else {
6366fe6060f1SDimitry Andric       auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
6367fe6060f1SDimitry Andric       ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
6368fe6060f1SDimitry Andric       ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
6369fe6060f1SDimitry Andric     }
6370fe6060f1SDimitry Andric   }
6371fe6060f1SDimitry Andric 
6372fe6060f1SDimitry Andric   MIRBuilder.buildOr(Dst, ShX, ShY);
6373fe6060f1SDimitry Andric   MI.eraseFromParent();
6374fe6060f1SDimitry Andric   return Legalized;
6375fe6060f1SDimitry Andric }
6376fe6060f1SDimitry Andric 
6377fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
6378fe6060f1SDimitry Andric LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
6379fe6060f1SDimitry Andric   // These operations approximately do the following (while avoiding undefined
6380fe6060f1SDimitry Andric   // shifts by BW):
6381fe6060f1SDimitry Andric   // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
6382fe6060f1SDimitry Andric   // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
6383fe6060f1SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
6384fe6060f1SDimitry Andric   LLT Ty = MRI.getType(Dst);
6385fe6060f1SDimitry Andric   LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
6386fe6060f1SDimitry Andric 
6387fe6060f1SDimitry Andric   bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6388fe6060f1SDimitry Andric   unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6389fe6060f1SDimitry Andric 
6390fe6060f1SDimitry Andric   // TODO: Use smarter heuristic that accounts for vector legalization.
6391fe6060f1SDimitry Andric   if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
6392fe6060f1SDimitry Andric     return lowerFunnelShiftAsShifts(MI);
6393fe6060f1SDimitry Andric 
6394fe6060f1SDimitry Andric   // This only works for powers of 2, fallback to shifts if it fails.
6395fe6060f1SDimitry Andric   LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
6396fe6060f1SDimitry Andric   if (Result == UnableToLegalize)
6397fe6060f1SDimitry Andric     return lowerFunnelShiftAsShifts(MI);
6398fe6060f1SDimitry Andric   return Result;
6399fe6060f1SDimitry Andric }
6400fe6060f1SDimitry Andric 
64015f757f3fSDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerEXT(MachineInstr &MI) {
64025f757f3fSDimitry Andric   auto [Dst, Src] = MI.getFirst2Regs();
64035f757f3fSDimitry Andric   LLT DstTy = MRI.getType(Dst);
64045f757f3fSDimitry Andric   LLT SrcTy = MRI.getType(Src);
64055f757f3fSDimitry Andric 
64065f757f3fSDimitry Andric   uint32_t DstTySize = DstTy.getSizeInBits();
64075f757f3fSDimitry Andric   uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
64085f757f3fSDimitry Andric   uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
64095f757f3fSDimitry Andric 
64105f757f3fSDimitry Andric   if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
64115f757f3fSDimitry Andric       !isPowerOf2_32(SrcTyScalarSize))
64125f757f3fSDimitry Andric     return UnableToLegalize;
64135f757f3fSDimitry Andric 
64145f757f3fSDimitry Andric   // The step between extend is too large, split it by creating an intermediate
64155f757f3fSDimitry Andric   // extend instruction
64165f757f3fSDimitry Andric   if (SrcTyScalarSize * 2 < DstTyScalarSize) {
64175f757f3fSDimitry Andric     LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
64185f757f3fSDimitry Andric     // If the destination type is illegal, split it into multiple statements
64195f757f3fSDimitry Andric     // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
64205f757f3fSDimitry Andric     auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
64215f757f3fSDimitry Andric     // Unmerge the vector
64225f757f3fSDimitry Andric     LLT EltTy = MidTy.changeElementCount(
64235f757f3fSDimitry Andric         MidTy.getElementCount().divideCoefficientBy(2));
64245f757f3fSDimitry Andric     auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
64255f757f3fSDimitry Andric 
64265f757f3fSDimitry Andric     // ZExt the vectors
64275f757f3fSDimitry Andric     LLT ZExtResTy = DstTy.changeElementCount(
64285f757f3fSDimitry Andric         DstTy.getElementCount().divideCoefficientBy(2));
64295f757f3fSDimitry Andric     auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
64305f757f3fSDimitry Andric                                           {UnmergeSrc.getReg(0)});
64315f757f3fSDimitry Andric     auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
64325f757f3fSDimitry Andric                                           {UnmergeSrc.getReg(1)});
64335f757f3fSDimitry Andric 
64345f757f3fSDimitry Andric     // Merge the ending vectors
64355f757f3fSDimitry Andric     MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
64365f757f3fSDimitry Andric 
64375f757f3fSDimitry Andric     MI.eraseFromParent();
64385f757f3fSDimitry Andric     return Legalized;
64395f757f3fSDimitry Andric   }
64405f757f3fSDimitry Andric   return UnableToLegalize;
64415f757f3fSDimitry Andric }
64425f757f3fSDimitry Andric 
64435f757f3fSDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerTRUNC(MachineInstr &MI) {
64445f757f3fSDimitry Andric   // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
64455f757f3fSDimitry Andric   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
64465f757f3fSDimitry Andric   // Similar to how operand splitting is done in SelectiondDAG, we can handle
64475f757f3fSDimitry Andric   // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
64485f757f3fSDimitry Andric   //   %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
64495f757f3fSDimitry Andric   //   %lo16(<4 x s16>) = G_TRUNC %inlo
64505f757f3fSDimitry Andric   //   %hi16(<4 x s16>) = G_TRUNC %inhi
64515f757f3fSDimitry Andric   //   %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
64525f757f3fSDimitry Andric   //   %res(<8 x s8>) = G_TRUNC %in16
64535f757f3fSDimitry Andric 
64545f757f3fSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
64555f757f3fSDimitry Andric 
64565f757f3fSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
64575f757f3fSDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
64585f757f3fSDimitry Andric   LLT DstTy = MRI.getType(DstReg);
64595f757f3fSDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
64605f757f3fSDimitry Andric 
64615f757f3fSDimitry Andric   if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
64625f757f3fSDimitry Andric       isPowerOf2_32(DstTy.getScalarSizeInBits()) &&
64635f757f3fSDimitry Andric       isPowerOf2_32(SrcTy.getNumElements()) &&
64645f757f3fSDimitry Andric       isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
64655f757f3fSDimitry Andric     // Split input type.
64665f757f3fSDimitry Andric     LLT SplitSrcTy = SrcTy.changeElementCount(
64675f757f3fSDimitry Andric         SrcTy.getElementCount().divideCoefficientBy(2));
64685f757f3fSDimitry Andric 
64695f757f3fSDimitry Andric     // First, split the source into two smaller vectors.
64705f757f3fSDimitry Andric     SmallVector<Register, 2> SplitSrcs;
64715f757f3fSDimitry Andric     extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs);
64725f757f3fSDimitry Andric 
64735f757f3fSDimitry Andric     // Truncate the splits into intermediate narrower elements.
64745f757f3fSDimitry Andric     LLT InterTy;
64755f757f3fSDimitry Andric     if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
64765f757f3fSDimitry Andric       InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
64775f757f3fSDimitry Andric     else
64785f757f3fSDimitry Andric       InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
64795f757f3fSDimitry Andric     for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
64805f757f3fSDimitry Andric       SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
64815f757f3fSDimitry Andric     }
64825f757f3fSDimitry Andric 
64835f757f3fSDimitry Andric     // Combine the new truncates into one vector
64845f757f3fSDimitry Andric     auto Merge = MIRBuilder.buildMergeLikeInstr(
64855f757f3fSDimitry Andric         DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
64865f757f3fSDimitry Andric 
64875f757f3fSDimitry Andric     // Truncate the new vector to the final result type
64885f757f3fSDimitry Andric     if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
64895f757f3fSDimitry Andric       MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
64905f757f3fSDimitry Andric     else
64915f757f3fSDimitry Andric       MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
64925f757f3fSDimitry Andric 
64935f757f3fSDimitry Andric     MI.eraseFromParent();
64945f757f3fSDimitry Andric 
64955f757f3fSDimitry Andric     return Legalized;
64965f757f3fSDimitry Andric   }
64975f757f3fSDimitry Andric   return UnableToLegalize;
64985f757f3fSDimitry Andric }
64995f757f3fSDimitry Andric 
6500fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
6501fe6060f1SDimitry Andric LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
650206c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6503fe6060f1SDimitry Andric   auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6504fe6060f1SDimitry Andric   bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6505fe6060f1SDimitry Andric   unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6506fe6060f1SDimitry Andric   auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6507fe6060f1SDimitry Andric   MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
6508fe6060f1SDimitry Andric   MI.eraseFromParent();
6509fe6060f1SDimitry Andric   return Legalized;
6510fe6060f1SDimitry Andric }
6511fe6060f1SDimitry Andric 
6512fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
651306c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6514fe6060f1SDimitry Andric 
6515fe6060f1SDimitry Andric   unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
6516fe6060f1SDimitry Andric   bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6517fe6060f1SDimitry Andric 
6518fe6060f1SDimitry Andric   MIRBuilder.setInstrAndDebugLoc(MI);
6519fe6060f1SDimitry Andric 
6520fe6060f1SDimitry Andric   // If a rotate in the other direction is supported, use it.
6521fe6060f1SDimitry Andric   unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6522fe6060f1SDimitry Andric   if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
6523fe6060f1SDimitry Andric       isPowerOf2_32(EltSizeInBits))
6524fe6060f1SDimitry Andric     return lowerRotateWithReverseRotate(MI);
6525fe6060f1SDimitry Andric 
6526349cc55cSDimitry Andric   // If a funnel shift is supported, use it.
6527349cc55cSDimitry Andric   unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6528349cc55cSDimitry Andric   unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6529349cc55cSDimitry Andric   bool IsFShLegal = false;
6530349cc55cSDimitry Andric   if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
6531349cc55cSDimitry Andric       LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
6532349cc55cSDimitry Andric     auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
6533349cc55cSDimitry Andric                                 Register R3) {
6534349cc55cSDimitry Andric       MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
6535349cc55cSDimitry Andric       MI.eraseFromParent();
6536349cc55cSDimitry Andric       return Legalized;
6537349cc55cSDimitry Andric     };
6538349cc55cSDimitry Andric     // If a funnel shift in the other direction is supported, use it.
6539349cc55cSDimitry Andric     if (IsFShLegal) {
6540349cc55cSDimitry Andric       return buildFunnelShift(FShOpc, Dst, Src, Amt);
6541349cc55cSDimitry Andric     } else if (isPowerOf2_32(EltSizeInBits)) {
6542349cc55cSDimitry Andric       Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
6543349cc55cSDimitry Andric       return buildFunnelShift(RevFsh, Dst, Src, Amt);
6544349cc55cSDimitry Andric     }
6545349cc55cSDimitry Andric   }
6546349cc55cSDimitry Andric 
6547fe6060f1SDimitry Andric   auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6548fe6060f1SDimitry Andric   unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
6549fe6060f1SDimitry Andric   unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
6550fe6060f1SDimitry Andric   auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
6551fe6060f1SDimitry Andric   Register ShVal;
6552fe6060f1SDimitry Andric   Register RevShiftVal;
6553fe6060f1SDimitry Andric   if (isPowerOf2_32(EltSizeInBits)) {
6554fe6060f1SDimitry Andric     // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
6555fe6060f1SDimitry Andric     // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
6556fe6060f1SDimitry Andric     auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6557fe6060f1SDimitry Andric     auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
6558fe6060f1SDimitry Andric     ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
6559fe6060f1SDimitry Andric     auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
6560fe6060f1SDimitry Andric     RevShiftVal =
6561fe6060f1SDimitry Andric         MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
6562fe6060f1SDimitry Andric   } else {
6563fe6060f1SDimitry Andric     // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
6564fe6060f1SDimitry Andric     // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
6565fe6060f1SDimitry Andric     auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
6566fe6060f1SDimitry Andric     auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
6567fe6060f1SDimitry Andric     ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
6568fe6060f1SDimitry Andric     auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
6569fe6060f1SDimitry Andric     auto One = MIRBuilder.buildConstant(AmtTy, 1);
6570fe6060f1SDimitry Andric     auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
6571fe6060f1SDimitry Andric     RevShiftVal =
6572fe6060f1SDimitry Andric         MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
6573fe6060f1SDimitry Andric   }
6574fe6060f1SDimitry Andric   MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
6575fe6060f1SDimitry Andric   MI.eraseFromParent();
6576fe6060f1SDimitry Andric   return Legalized;
6577fe6060f1SDimitry Andric }
6578fe6060f1SDimitry Andric 
65790b57cec5SDimitry Andric // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
65800b57cec5SDimitry Andric // representation.
65810b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
65820b57cec5SDimitry Andric LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
658306c3fb27SDimitry Andric   auto [Dst, Src] = MI.getFirst2Regs();
65840b57cec5SDimitry Andric   const LLT S64 = LLT::scalar(64);
65850b57cec5SDimitry Andric   const LLT S32 = LLT::scalar(32);
65860b57cec5SDimitry Andric   const LLT S1 = LLT::scalar(1);
65870b57cec5SDimitry Andric 
65880b57cec5SDimitry Andric   assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
65890b57cec5SDimitry Andric 
65900b57cec5SDimitry Andric   // unsigned cul2f(ulong u) {
65910b57cec5SDimitry Andric   //   uint lz = clz(u);
65920b57cec5SDimitry Andric   //   uint e = (u != 0) ? 127U + 63U - lz : 0;
65930b57cec5SDimitry Andric   //   u = (u << lz) & 0x7fffffffffffffffUL;
65940b57cec5SDimitry Andric   //   ulong t = u & 0xffffffffffUL;
65950b57cec5SDimitry Andric   //   uint v = (e << 23) | (uint)(u >> 40);
65960b57cec5SDimitry Andric   //   uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
65970b57cec5SDimitry Andric   //   return as_float(v + r);
65980b57cec5SDimitry Andric   // }
65990b57cec5SDimitry Andric 
66000b57cec5SDimitry Andric   auto Zero32 = MIRBuilder.buildConstant(S32, 0);
66010b57cec5SDimitry Andric   auto Zero64 = MIRBuilder.buildConstant(S64, 0);
66020b57cec5SDimitry Andric 
66030b57cec5SDimitry Andric   auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
66040b57cec5SDimitry Andric 
66050b57cec5SDimitry Andric   auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
66060b57cec5SDimitry Andric   auto Sub = MIRBuilder.buildSub(S32, K, LZ);
66070b57cec5SDimitry Andric 
66080b57cec5SDimitry Andric   auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
66090b57cec5SDimitry Andric   auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
66100b57cec5SDimitry Andric 
66110b57cec5SDimitry Andric   auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
66120b57cec5SDimitry Andric   auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
66130b57cec5SDimitry Andric 
66140b57cec5SDimitry Andric   auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
66150b57cec5SDimitry Andric 
66160b57cec5SDimitry Andric   auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
66170b57cec5SDimitry Andric   auto T = MIRBuilder.buildAnd(S64, U, Mask1);
66180b57cec5SDimitry Andric 
66190b57cec5SDimitry Andric   auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
66200b57cec5SDimitry Andric   auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
66210b57cec5SDimitry Andric   auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
66220b57cec5SDimitry Andric 
66230b57cec5SDimitry Andric   auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
66240b57cec5SDimitry Andric   auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
66250b57cec5SDimitry Andric   auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
66260b57cec5SDimitry Andric   auto One = MIRBuilder.buildConstant(S32, 1);
66270b57cec5SDimitry Andric 
66280b57cec5SDimitry Andric   auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
66290b57cec5SDimitry Andric   auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
66300b57cec5SDimitry Andric   auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
66310b57cec5SDimitry Andric   MIRBuilder.buildAdd(Dst, V, R);
66320b57cec5SDimitry Andric 
66335ffd83dbSDimitry Andric   MI.eraseFromParent();
66340b57cec5SDimitry Andric   return Legalized;
66350b57cec5SDimitry Andric }
66360b57cec5SDimitry Andric 
6637e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
663806c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
66390b57cec5SDimitry Andric 
6640480093f4SDimitry Andric   if (SrcTy == LLT::scalar(1)) {
6641480093f4SDimitry Andric     auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
6642480093f4SDimitry Andric     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6643480093f4SDimitry Andric     MIRBuilder.buildSelect(Dst, Src, True, False);
6644480093f4SDimitry Andric     MI.eraseFromParent();
6645480093f4SDimitry Andric     return Legalized;
6646480093f4SDimitry Andric   }
6647480093f4SDimitry Andric 
66480b57cec5SDimitry Andric   if (SrcTy != LLT::scalar(64))
66490b57cec5SDimitry Andric     return UnableToLegalize;
66500b57cec5SDimitry Andric 
66510b57cec5SDimitry Andric   if (DstTy == LLT::scalar(32)) {
66520b57cec5SDimitry Andric     // TODO: SelectionDAG has several alternative expansions to port which may
66530b57cec5SDimitry Andric     // be more reasonble depending on the available instructions. If a target
66540b57cec5SDimitry Andric     // has sitofp, does not have CTLZ, or can efficiently use f64 as an
66550b57cec5SDimitry Andric     // intermediate type, this is probably worse.
66560b57cec5SDimitry Andric     return lowerU64ToF32BitOps(MI);
66570b57cec5SDimitry Andric   }
66580b57cec5SDimitry Andric 
66590b57cec5SDimitry Andric   return UnableToLegalize;
66600b57cec5SDimitry Andric }
66610b57cec5SDimitry Andric 
6662e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
666306c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
66640b57cec5SDimitry Andric 
66650b57cec5SDimitry Andric   const LLT S64 = LLT::scalar(64);
66660b57cec5SDimitry Andric   const LLT S32 = LLT::scalar(32);
66670b57cec5SDimitry Andric   const LLT S1 = LLT::scalar(1);
66680b57cec5SDimitry Andric 
6669480093f4SDimitry Andric   if (SrcTy == S1) {
6670480093f4SDimitry Andric     auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
6671480093f4SDimitry Andric     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6672480093f4SDimitry Andric     MIRBuilder.buildSelect(Dst, Src, True, False);
6673480093f4SDimitry Andric     MI.eraseFromParent();
6674480093f4SDimitry Andric     return Legalized;
6675480093f4SDimitry Andric   }
6676480093f4SDimitry Andric 
66770b57cec5SDimitry Andric   if (SrcTy != S64)
66780b57cec5SDimitry Andric     return UnableToLegalize;
66790b57cec5SDimitry Andric 
66800b57cec5SDimitry Andric   if (DstTy == S32) {
66810b57cec5SDimitry Andric     // signed cl2f(long l) {
66820b57cec5SDimitry Andric     //   long s = l >> 63;
66830b57cec5SDimitry Andric     //   float r = cul2f((l + s) ^ s);
66840b57cec5SDimitry Andric     //   return s ? -r : r;
66850b57cec5SDimitry Andric     // }
66860b57cec5SDimitry Andric     Register L = Src;
66870b57cec5SDimitry Andric     auto SignBit = MIRBuilder.buildConstant(S64, 63);
66880b57cec5SDimitry Andric     auto S = MIRBuilder.buildAShr(S64, L, SignBit);
66890b57cec5SDimitry Andric 
66900b57cec5SDimitry Andric     auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
66910b57cec5SDimitry Andric     auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
66920b57cec5SDimitry Andric     auto R = MIRBuilder.buildUITOFP(S32, Xor);
66930b57cec5SDimitry Andric 
66940b57cec5SDimitry Andric     auto RNeg = MIRBuilder.buildFNeg(S32, R);
66950b57cec5SDimitry Andric     auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
66960b57cec5SDimitry Andric                                             MIRBuilder.buildConstant(S64, 0));
66970b57cec5SDimitry Andric     MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
66985ffd83dbSDimitry Andric     MI.eraseFromParent();
66990b57cec5SDimitry Andric     return Legalized;
67000b57cec5SDimitry Andric   }
67010b57cec5SDimitry Andric 
67020b57cec5SDimitry Andric   return UnableToLegalize;
67030b57cec5SDimitry Andric }
67040b57cec5SDimitry Andric 
6705e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
670606c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
67078bcb0991SDimitry Andric   const LLT S64 = LLT::scalar(64);
67088bcb0991SDimitry Andric   const LLT S32 = LLT::scalar(32);
67098bcb0991SDimitry Andric 
67108bcb0991SDimitry Andric   if (SrcTy != S64 && SrcTy != S32)
67118bcb0991SDimitry Andric     return UnableToLegalize;
67128bcb0991SDimitry Andric   if (DstTy != S32 && DstTy != S64)
67138bcb0991SDimitry Andric     return UnableToLegalize;
67148bcb0991SDimitry Andric 
67158bcb0991SDimitry Andric   // FPTOSI gives same result as FPTOUI for positive signed integers.
67168bcb0991SDimitry Andric   // FPTOUI needs to deal with fp values that convert to unsigned integers
67178bcb0991SDimitry Andric   // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
67188bcb0991SDimitry Andric 
67198bcb0991SDimitry Andric   APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
67208bcb0991SDimitry Andric   APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
67218bcb0991SDimitry Andric                                                 : APFloat::IEEEdouble(),
6722349cc55cSDimitry Andric                     APInt::getZero(SrcTy.getSizeInBits()));
67238bcb0991SDimitry Andric   TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
67248bcb0991SDimitry Andric 
67258bcb0991SDimitry Andric   MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
67268bcb0991SDimitry Andric 
67278bcb0991SDimitry Andric   MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
67288bcb0991SDimitry Andric   // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
67298bcb0991SDimitry Andric   // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
67308bcb0991SDimitry Andric   MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
67318bcb0991SDimitry Andric   MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
67328bcb0991SDimitry Andric   MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
67338bcb0991SDimitry Andric   MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
67348bcb0991SDimitry Andric 
6735480093f4SDimitry Andric   const LLT S1 = LLT::scalar(1);
6736480093f4SDimitry Andric 
67378bcb0991SDimitry Andric   MachineInstrBuilder FCMP =
6738480093f4SDimitry Andric       MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
67398bcb0991SDimitry Andric   MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
67408bcb0991SDimitry Andric 
67418bcb0991SDimitry Andric   MI.eraseFromParent();
67428bcb0991SDimitry Andric   return Legalized;
67438bcb0991SDimitry Andric }
67448bcb0991SDimitry Andric 
67455ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
674606c3fb27SDimitry Andric   auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
67475ffd83dbSDimitry Andric   const LLT S64 = LLT::scalar(64);
67485ffd83dbSDimitry Andric   const LLT S32 = LLT::scalar(32);
67495ffd83dbSDimitry Andric 
67505ffd83dbSDimitry Andric   // FIXME: Only f32 to i64 conversions are supported.
67515ffd83dbSDimitry Andric   if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
67525ffd83dbSDimitry Andric     return UnableToLegalize;
67535ffd83dbSDimitry Andric 
67545ffd83dbSDimitry Andric   // Expand f32 -> i64 conversion
67555ffd83dbSDimitry Andric   // This algorithm comes from compiler-rt's implementation of fixsfdi:
6756fe6060f1SDimitry Andric   // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
67575ffd83dbSDimitry Andric 
67585ffd83dbSDimitry Andric   unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
67595ffd83dbSDimitry Andric 
67605ffd83dbSDimitry Andric   auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
67615ffd83dbSDimitry Andric   auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
67625ffd83dbSDimitry Andric 
67635ffd83dbSDimitry Andric   auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
67645ffd83dbSDimitry Andric   auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
67655ffd83dbSDimitry Andric 
67665ffd83dbSDimitry Andric   auto SignMask = MIRBuilder.buildConstant(SrcTy,
67675ffd83dbSDimitry Andric                                            APInt::getSignMask(SrcEltBits));
67685ffd83dbSDimitry Andric   auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
67695ffd83dbSDimitry Andric   auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
67705ffd83dbSDimitry Andric   auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
67715ffd83dbSDimitry Andric   Sign = MIRBuilder.buildSExt(DstTy, Sign);
67725ffd83dbSDimitry Andric 
67735ffd83dbSDimitry Andric   auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
67745ffd83dbSDimitry Andric   auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
67755ffd83dbSDimitry Andric   auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
67765ffd83dbSDimitry Andric 
67775ffd83dbSDimitry Andric   auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
67785ffd83dbSDimitry Andric   R = MIRBuilder.buildZExt(DstTy, R);
67795ffd83dbSDimitry Andric 
67805ffd83dbSDimitry Andric   auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
67815ffd83dbSDimitry Andric   auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
67825ffd83dbSDimitry Andric   auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
67835ffd83dbSDimitry Andric   auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
67845ffd83dbSDimitry Andric 
67855ffd83dbSDimitry Andric   auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
67865ffd83dbSDimitry Andric   auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
67875ffd83dbSDimitry Andric 
67885ffd83dbSDimitry Andric   const LLT S1 = LLT::scalar(1);
67895ffd83dbSDimitry Andric   auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
67905ffd83dbSDimitry Andric                                     S1, Exponent, ExponentLoBit);
67915ffd83dbSDimitry Andric 
67925ffd83dbSDimitry Andric   R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
67935ffd83dbSDimitry Andric 
67945ffd83dbSDimitry Andric   auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
67955ffd83dbSDimitry Andric   auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
67965ffd83dbSDimitry Andric 
67975ffd83dbSDimitry Andric   auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
67985ffd83dbSDimitry Andric 
67995ffd83dbSDimitry Andric   auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
68005ffd83dbSDimitry Andric                                           S1, Exponent, ZeroSrcTy);
68015ffd83dbSDimitry Andric 
68025ffd83dbSDimitry Andric   auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
68035ffd83dbSDimitry Andric   MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
68045ffd83dbSDimitry Andric 
68055ffd83dbSDimitry Andric   MI.eraseFromParent();
68065ffd83dbSDimitry Andric   return Legalized;
68075ffd83dbSDimitry Andric }
68085ffd83dbSDimitry Andric 
68095ffd83dbSDimitry Andric // f64 -> f16 conversion using round-to-nearest-even rounding mode.
68105ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
68115ffd83dbSDimitry Andric LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
681206c3fb27SDimitry Andric   const LLT S1 = LLT::scalar(1);
681306c3fb27SDimitry Andric   const LLT S32 = LLT::scalar(32);
681406c3fb27SDimitry Andric 
681506c3fb27SDimitry Andric   auto [Dst, Src] = MI.getFirst2Regs();
681606c3fb27SDimitry Andric   assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
681706c3fb27SDimitry Andric          MRI.getType(Src).getScalarType() == LLT::scalar(64));
68185ffd83dbSDimitry Andric 
68195ffd83dbSDimitry Andric   if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
68205ffd83dbSDimitry Andric     return UnableToLegalize;
68215ffd83dbSDimitry Andric 
682206c3fb27SDimitry Andric   if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) {
682306c3fb27SDimitry Andric     unsigned Flags = MI.getFlags();
682406c3fb27SDimitry Andric     auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
682506c3fb27SDimitry Andric     MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
682606c3fb27SDimitry Andric     MI.eraseFromParent();
682706c3fb27SDimitry Andric     return Legalized;
682806c3fb27SDimitry Andric   }
682906c3fb27SDimitry Andric 
68305ffd83dbSDimitry Andric   const unsigned ExpMask = 0x7ff;
68315ffd83dbSDimitry Andric   const unsigned ExpBiasf64 = 1023;
68325ffd83dbSDimitry Andric   const unsigned ExpBiasf16 = 15;
68335ffd83dbSDimitry Andric 
68345ffd83dbSDimitry Andric   auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
68355ffd83dbSDimitry Andric   Register U = Unmerge.getReg(0);
68365ffd83dbSDimitry Andric   Register UH = Unmerge.getReg(1);
68375ffd83dbSDimitry Andric 
68385ffd83dbSDimitry Andric   auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
68395ffd83dbSDimitry Andric   E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
68405ffd83dbSDimitry Andric 
68415ffd83dbSDimitry Andric   // Subtract the fp64 exponent bias (1023) to get the real exponent and
68425ffd83dbSDimitry Andric   // add the f16 bias (15) to get the biased exponent for the f16 format.
68435ffd83dbSDimitry Andric   E = MIRBuilder.buildAdd(
68445ffd83dbSDimitry Andric     S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
68455ffd83dbSDimitry Andric 
68465ffd83dbSDimitry Andric   auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
68475ffd83dbSDimitry Andric   M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
68485ffd83dbSDimitry Andric 
68495ffd83dbSDimitry Andric   auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
68505ffd83dbSDimitry Andric                                        MIRBuilder.buildConstant(S32, 0x1ff));
68515ffd83dbSDimitry Andric   MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
68525ffd83dbSDimitry Andric 
68535ffd83dbSDimitry Andric   auto Zero = MIRBuilder.buildConstant(S32, 0);
68545ffd83dbSDimitry Andric   auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
68555ffd83dbSDimitry Andric   auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
68565ffd83dbSDimitry Andric   M = MIRBuilder.buildOr(S32, M, Lo40Set);
68575ffd83dbSDimitry Andric 
68585ffd83dbSDimitry Andric   // (M != 0 ? 0x0200 : 0) | 0x7c00;
68595ffd83dbSDimitry Andric   auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
68605ffd83dbSDimitry Andric   auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
68615ffd83dbSDimitry Andric   auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
68625ffd83dbSDimitry Andric 
68635ffd83dbSDimitry Andric   auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
68645ffd83dbSDimitry Andric   auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
68655ffd83dbSDimitry Andric 
68665ffd83dbSDimitry Andric   // N = M | (E << 12);
68675ffd83dbSDimitry Andric   auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
68685ffd83dbSDimitry Andric   auto N = MIRBuilder.buildOr(S32, M, EShl12);
68695ffd83dbSDimitry Andric 
68705ffd83dbSDimitry Andric   // B = clamp(1-E, 0, 13);
68715ffd83dbSDimitry Andric   auto One = MIRBuilder.buildConstant(S32, 1);
68725ffd83dbSDimitry Andric   auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
68735ffd83dbSDimitry Andric   auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
68745ffd83dbSDimitry Andric   B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
68755ffd83dbSDimitry Andric 
68765ffd83dbSDimitry Andric   auto SigSetHigh = MIRBuilder.buildOr(S32, M,
68775ffd83dbSDimitry Andric                                        MIRBuilder.buildConstant(S32, 0x1000));
68785ffd83dbSDimitry Andric 
68795ffd83dbSDimitry Andric   auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
68805ffd83dbSDimitry Andric   auto D0 = MIRBuilder.buildShl(S32, D, B);
68815ffd83dbSDimitry Andric 
68825ffd83dbSDimitry Andric   auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
68835ffd83dbSDimitry Andric                                              D0, SigSetHigh);
68845ffd83dbSDimitry Andric   auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
68855ffd83dbSDimitry Andric   D = MIRBuilder.buildOr(S32, D, D1);
68865ffd83dbSDimitry Andric 
68875ffd83dbSDimitry Andric   auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
68885ffd83dbSDimitry Andric   auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
68895ffd83dbSDimitry Andric 
68905ffd83dbSDimitry Andric   auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
68915ffd83dbSDimitry Andric   V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
68925ffd83dbSDimitry Andric 
68935ffd83dbSDimitry Andric   auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
68945ffd83dbSDimitry Andric                                        MIRBuilder.buildConstant(S32, 3));
68955ffd83dbSDimitry Andric   auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
68965ffd83dbSDimitry Andric 
68975ffd83dbSDimitry Andric   auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
68985ffd83dbSDimitry Andric                                        MIRBuilder.buildConstant(S32, 5));
68995ffd83dbSDimitry Andric   auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
69005ffd83dbSDimitry Andric 
69015ffd83dbSDimitry Andric   V1 = MIRBuilder.buildOr(S32, V0, V1);
69025ffd83dbSDimitry Andric   V = MIRBuilder.buildAdd(S32, V, V1);
69035ffd83dbSDimitry Andric 
69045ffd83dbSDimitry Andric   auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,  S1,
69055ffd83dbSDimitry Andric                                        E, MIRBuilder.buildConstant(S32, 30));
69065ffd83dbSDimitry Andric   V = MIRBuilder.buildSelect(S32, CmpEGt30,
69075ffd83dbSDimitry Andric                              MIRBuilder.buildConstant(S32, 0x7c00), V);
69085ffd83dbSDimitry Andric 
69095ffd83dbSDimitry Andric   auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
69105ffd83dbSDimitry Andric                                          E, MIRBuilder.buildConstant(S32, 1039));
69115ffd83dbSDimitry Andric   V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
69125ffd83dbSDimitry Andric 
69135ffd83dbSDimitry Andric   // Extract the sign bit.
69145ffd83dbSDimitry Andric   auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
69155ffd83dbSDimitry Andric   Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
69165ffd83dbSDimitry Andric 
69175ffd83dbSDimitry Andric   // Insert the sign bit
69185ffd83dbSDimitry Andric   V = MIRBuilder.buildOr(S32, Sign, V);
69195ffd83dbSDimitry Andric 
69205ffd83dbSDimitry Andric   MIRBuilder.buildTrunc(Dst, V);
69215ffd83dbSDimitry Andric   MI.eraseFromParent();
69225ffd83dbSDimitry Andric   return Legalized;
69235ffd83dbSDimitry Andric }
69245ffd83dbSDimitry Andric 
69255ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
6926e8d8bef9SDimitry Andric LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
692706c3fb27SDimitry Andric   auto [DstTy, SrcTy] = MI.getFirst2LLTs();
69285ffd83dbSDimitry Andric   const LLT S64 = LLT::scalar(64);
69295ffd83dbSDimitry Andric   const LLT S16 = LLT::scalar(16);
69305ffd83dbSDimitry Andric 
69315ffd83dbSDimitry Andric   if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
69325ffd83dbSDimitry Andric     return lowerFPTRUNC_F64_TO_F16(MI);
69335ffd83dbSDimitry Andric 
69345ffd83dbSDimitry Andric   return UnableToLegalize;
69355ffd83dbSDimitry Andric }
69365ffd83dbSDimitry Andric 
6937e8d8bef9SDimitry Andric // TODO: If RHS is a constant SelectionDAGBuilder expands this into a
6938e8d8bef9SDimitry Andric // multiplication tree.
6939e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
694006c3fb27SDimitry Andric   auto [Dst, Src0, Src1] = MI.getFirst3Regs();
6941e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(Dst);
6942e8d8bef9SDimitry Andric 
6943e8d8bef9SDimitry Andric   auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
6944e8d8bef9SDimitry Andric   MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
6945e8d8bef9SDimitry Andric   MI.eraseFromParent();
6946e8d8bef9SDimitry Andric   return Legalized;
6947e8d8bef9SDimitry Andric }
6948e8d8bef9SDimitry Andric 
69490b57cec5SDimitry Andric static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
69500b57cec5SDimitry Andric   switch (Opc) {
69510b57cec5SDimitry Andric   case TargetOpcode::G_SMIN:
69520b57cec5SDimitry Andric     return CmpInst::ICMP_SLT;
69530b57cec5SDimitry Andric   case TargetOpcode::G_SMAX:
69540b57cec5SDimitry Andric     return CmpInst::ICMP_SGT;
69550b57cec5SDimitry Andric   case TargetOpcode::G_UMIN:
69560b57cec5SDimitry Andric     return CmpInst::ICMP_ULT;
69570b57cec5SDimitry Andric   case TargetOpcode::G_UMAX:
69580b57cec5SDimitry Andric     return CmpInst::ICMP_UGT;
69590b57cec5SDimitry Andric   default:
69600b57cec5SDimitry Andric     llvm_unreachable("not in integer min/max");
69610b57cec5SDimitry Andric   }
69620b57cec5SDimitry Andric }
69630b57cec5SDimitry Andric 
6964e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
696506c3fb27SDimitry Andric   auto [Dst, Src0, Src1] = MI.getFirst3Regs();
69660b57cec5SDimitry Andric 
69670b57cec5SDimitry Andric   const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
69680b57cec5SDimitry Andric   LLT CmpType = MRI.getType(Dst).changeElementSize(1);
69690b57cec5SDimitry Andric 
69700b57cec5SDimitry Andric   auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
69710b57cec5SDimitry Andric   MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
69720b57cec5SDimitry Andric 
69730b57cec5SDimitry Andric   MI.eraseFromParent();
69740b57cec5SDimitry Andric   return Legalized;
69750b57cec5SDimitry Andric }
69760b57cec5SDimitry Andric 
69770b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
6978e8d8bef9SDimitry Andric LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
697906c3fb27SDimitry Andric   auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
69800b57cec5SDimitry Andric   const int Src0Size = Src0Ty.getScalarSizeInBits();
69810b57cec5SDimitry Andric   const int Src1Size = Src1Ty.getScalarSizeInBits();
69820b57cec5SDimitry Andric 
69830b57cec5SDimitry Andric   auto SignBitMask = MIRBuilder.buildConstant(
69840b57cec5SDimitry Andric     Src0Ty, APInt::getSignMask(Src0Size));
69850b57cec5SDimitry Andric 
69860b57cec5SDimitry Andric   auto NotSignBitMask = MIRBuilder.buildConstant(
69870b57cec5SDimitry Andric     Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
69880b57cec5SDimitry Andric 
6989fe6060f1SDimitry Andric   Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
6990fe6060f1SDimitry Andric   Register And1;
69910b57cec5SDimitry Andric   if (Src0Ty == Src1Ty) {
6992fe6060f1SDimitry Andric     And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
69930b57cec5SDimitry Andric   } else if (Src0Size > Src1Size) {
69940b57cec5SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
69950b57cec5SDimitry Andric     auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
69960b57cec5SDimitry Andric     auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
6997fe6060f1SDimitry Andric     And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
69980b57cec5SDimitry Andric   } else {
69990b57cec5SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
70000b57cec5SDimitry Andric     auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
70010b57cec5SDimitry Andric     auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
7002fe6060f1SDimitry Andric     And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
70030b57cec5SDimitry Andric   }
70040b57cec5SDimitry Andric 
70050b57cec5SDimitry Andric   // Be careful about setting nsz/nnan/ninf on every instruction, since the
70060b57cec5SDimitry Andric   // constants are a nan and -0.0, but the final result should preserve
70070b57cec5SDimitry Andric   // everything.
7008fe6060f1SDimitry Andric   unsigned Flags = MI.getFlags();
7009fe6060f1SDimitry Andric   MIRBuilder.buildOr(Dst, And0, And1, Flags);
70100b57cec5SDimitry Andric 
70110b57cec5SDimitry Andric   MI.eraseFromParent();
70120b57cec5SDimitry Andric   return Legalized;
70130b57cec5SDimitry Andric }
70140b57cec5SDimitry Andric 
70150b57cec5SDimitry Andric LegalizerHelper::LegalizeResult
70160b57cec5SDimitry Andric LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
70170b57cec5SDimitry Andric   unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
70180b57cec5SDimitry Andric     TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
70190b57cec5SDimitry Andric 
702006c3fb27SDimitry Andric   auto [Dst, Src0, Src1] = MI.getFirst3Regs();
70210b57cec5SDimitry Andric   LLT Ty = MRI.getType(Dst);
70220b57cec5SDimitry Andric 
70230b57cec5SDimitry Andric   if (!MI.getFlag(MachineInstr::FmNoNans)) {
70240b57cec5SDimitry Andric     // Insert canonicalizes if it's possible we need to quiet to get correct
70250b57cec5SDimitry Andric     // sNaN behavior.
70260b57cec5SDimitry Andric 
70270b57cec5SDimitry Andric     // Note this must be done here, and not as an optimization combine in the
70280b57cec5SDimitry Andric     // absence of a dedicate quiet-snan instruction as we're using an
70290b57cec5SDimitry Andric     // omni-purpose G_FCANONICALIZE.
70300b57cec5SDimitry Andric     if (!isKnownNeverSNaN(Src0, MRI))
70310b57cec5SDimitry Andric       Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
70320b57cec5SDimitry Andric 
70330b57cec5SDimitry Andric     if (!isKnownNeverSNaN(Src1, MRI))
70340b57cec5SDimitry Andric       Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
70350b57cec5SDimitry Andric   }
70360b57cec5SDimitry Andric 
70370b57cec5SDimitry Andric   // If there are no nans, it's safe to simply replace this with the non-IEEE
70380b57cec5SDimitry Andric   // version.
70390b57cec5SDimitry Andric   MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
70400b57cec5SDimitry Andric   MI.eraseFromParent();
70410b57cec5SDimitry Andric   return Legalized;
70420b57cec5SDimitry Andric }
70438bcb0991SDimitry Andric 
70448bcb0991SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
70458bcb0991SDimitry Andric   // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
70468bcb0991SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
70478bcb0991SDimitry Andric   LLT Ty = MRI.getType(DstReg);
70488bcb0991SDimitry Andric   unsigned Flags = MI.getFlags();
70498bcb0991SDimitry Andric 
70508bcb0991SDimitry Andric   auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
70518bcb0991SDimitry Andric                                   Flags);
70528bcb0991SDimitry Andric   MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
70538bcb0991SDimitry Andric   MI.eraseFromParent();
70548bcb0991SDimitry Andric   return Legalized;
70558bcb0991SDimitry Andric }
70568bcb0991SDimitry Andric 
70578bcb0991SDimitry Andric LegalizerHelper::LegalizeResult
7058480093f4SDimitry Andric LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
705906c3fb27SDimitry Andric   auto [DstReg, X] = MI.getFirst2Regs();
70605ffd83dbSDimitry Andric   const unsigned Flags = MI.getFlags();
70615ffd83dbSDimitry Andric   const LLT Ty = MRI.getType(DstReg);
70625ffd83dbSDimitry Andric   const LLT CondTy = Ty.changeElementSize(1);
70635ffd83dbSDimitry Andric 
70645ffd83dbSDimitry Andric   // round(x) =>
70655ffd83dbSDimitry Andric   //  t = trunc(x);
70665ffd83dbSDimitry Andric   //  d = fabs(x - t);
70675f757f3fSDimitry Andric   //  o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
70685f757f3fSDimitry Andric   //  return t + o;
70695ffd83dbSDimitry Andric 
70705ffd83dbSDimitry Andric   auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
70715ffd83dbSDimitry Andric 
70725ffd83dbSDimitry Andric   auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
70735ffd83dbSDimitry Andric   auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
70745f757f3fSDimitry Andric 
70755ffd83dbSDimitry Andric   auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
70765f757f3fSDimitry Andric   auto Cmp =
70775f757f3fSDimitry Andric       MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
70785ffd83dbSDimitry Andric 
70795f757f3fSDimitry Andric   // Could emit G_UITOFP instead
70805f757f3fSDimitry Andric   auto One = MIRBuilder.buildFConstant(Ty, 1.0);
70815f757f3fSDimitry Andric   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
70825f757f3fSDimitry Andric   auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
70835f757f3fSDimitry Andric   auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
70845ffd83dbSDimitry Andric 
70855f757f3fSDimitry Andric   MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
70865ffd83dbSDimitry Andric 
70875ffd83dbSDimitry Andric   MI.eraseFromParent();
70885ffd83dbSDimitry Andric   return Legalized;
70895ffd83dbSDimitry Andric }
70905ffd83dbSDimitry Andric 
709106c3fb27SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerFFloor(MachineInstr &MI) {
709206c3fb27SDimitry Andric   auto [DstReg, SrcReg] = MI.getFirst2Regs();
7093480093f4SDimitry Andric   unsigned Flags = MI.getFlags();
7094480093f4SDimitry Andric   LLT Ty = MRI.getType(DstReg);
7095480093f4SDimitry Andric   const LLT CondTy = Ty.changeElementSize(1);
7096480093f4SDimitry Andric 
7097480093f4SDimitry Andric   // result = trunc(src);
7098480093f4SDimitry Andric   // if (src < 0.0 && src != result)
7099480093f4SDimitry Andric   //   result += -1.0.
7100480093f4SDimitry Andric 
7101480093f4SDimitry Andric   auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
71025ffd83dbSDimitry Andric   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
7103480093f4SDimitry Andric 
7104480093f4SDimitry Andric   auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
7105480093f4SDimitry Andric                                   SrcReg, Zero, Flags);
7106480093f4SDimitry Andric   auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
7107480093f4SDimitry Andric                                       SrcReg, Trunc, Flags);
7108480093f4SDimitry Andric   auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
7109480093f4SDimitry Andric   auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
7110480093f4SDimitry Andric 
71115ffd83dbSDimitry Andric   MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
71125ffd83dbSDimitry Andric   MI.eraseFromParent();
71135ffd83dbSDimitry Andric   return Legalized;
71145ffd83dbSDimitry Andric }
71155ffd83dbSDimitry Andric 
71165ffd83dbSDimitry Andric LegalizerHelper::LegalizeResult
71175ffd83dbSDimitry Andric LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
71185ffd83dbSDimitry Andric   const unsigned NumOps = MI.getNumOperands();
711906c3fb27SDimitry Andric   auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
712006c3fb27SDimitry Andric   unsigned PartSize = Src0Ty.getSizeInBits();
71215ffd83dbSDimitry Andric 
71225ffd83dbSDimitry Andric   LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
71235ffd83dbSDimitry Andric   Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
71245ffd83dbSDimitry Andric 
71255ffd83dbSDimitry Andric   for (unsigned I = 2; I != NumOps; ++I) {
71265ffd83dbSDimitry Andric     const unsigned Offset = (I - 1) * PartSize;
71275ffd83dbSDimitry Andric 
71285ffd83dbSDimitry Andric     Register SrcReg = MI.getOperand(I).getReg();
71295ffd83dbSDimitry Andric     auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
71305ffd83dbSDimitry Andric 
71315ffd83dbSDimitry Andric     Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
71325ffd83dbSDimitry Andric       MRI.createGenericVirtualRegister(WideTy);
71335ffd83dbSDimitry Andric 
71345ffd83dbSDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
71355ffd83dbSDimitry Andric     auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
71365ffd83dbSDimitry Andric     MIRBuilder.buildOr(NextResult, ResultReg, Shl);
71375ffd83dbSDimitry Andric     ResultReg = NextResult;
71385ffd83dbSDimitry Andric   }
71395ffd83dbSDimitry Andric 
71405ffd83dbSDimitry Andric   if (DstTy.isPointer()) {
71415ffd83dbSDimitry Andric     if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
71425ffd83dbSDimitry Andric           DstTy.getAddressSpace())) {
71435ffd83dbSDimitry Andric       LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
71445ffd83dbSDimitry Andric       return UnableToLegalize;
71455ffd83dbSDimitry Andric     }
71465ffd83dbSDimitry Andric 
71475ffd83dbSDimitry Andric     MIRBuilder.buildIntToPtr(DstReg, ResultReg);
71485ffd83dbSDimitry Andric   }
71495ffd83dbSDimitry Andric 
7150480093f4SDimitry Andric   MI.eraseFromParent();
7151480093f4SDimitry Andric   return Legalized;
7152480093f4SDimitry Andric }
7153480093f4SDimitry Andric 
7154480093f4SDimitry Andric LegalizerHelper::LegalizeResult
71558bcb0991SDimitry Andric LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
71568bcb0991SDimitry Andric   const unsigned NumDst = MI.getNumOperands() - 1;
71575ffd83dbSDimitry Andric   Register SrcReg = MI.getOperand(NumDst).getReg();
71588bcb0991SDimitry Andric   Register Dst0Reg = MI.getOperand(0).getReg();
71598bcb0991SDimitry Andric   LLT DstTy = MRI.getType(Dst0Reg);
71605ffd83dbSDimitry Andric   if (DstTy.isPointer())
71615ffd83dbSDimitry Andric     return UnableToLegalize; // TODO
71628bcb0991SDimitry Andric 
71635ffd83dbSDimitry Andric   SrcReg = coerceToScalar(SrcReg);
71645ffd83dbSDimitry Andric   if (!SrcReg)
71655ffd83dbSDimitry Andric     return UnableToLegalize;
71668bcb0991SDimitry Andric 
71678bcb0991SDimitry Andric   // Expand scalarizing unmerge as bitcast to integer and shift.
71685ffd83dbSDimitry Andric   LLT IntTy = MRI.getType(SrcReg);
71698bcb0991SDimitry Andric 
71705ffd83dbSDimitry Andric   MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
71718bcb0991SDimitry Andric 
71728bcb0991SDimitry Andric   const unsigned DstSize = DstTy.getSizeInBits();
71738bcb0991SDimitry Andric   unsigned Offset = DstSize;
71748bcb0991SDimitry Andric   for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
71758bcb0991SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
71765ffd83dbSDimitry Andric     auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
71778bcb0991SDimitry Andric     MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
71788bcb0991SDimitry Andric   }
71798bcb0991SDimitry Andric 
71808bcb0991SDimitry Andric   MI.eraseFromParent();
71818bcb0991SDimitry Andric   return Legalized;
71828bcb0991SDimitry Andric }
71838bcb0991SDimitry Andric 
7184e8d8bef9SDimitry Andric /// Lower a vector extract or insert by writing the vector to a stack temporary
7185e8d8bef9SDimitry Andric /// and reloading the element or vector.
7186e8d8bef9SDimitry Andric ///
7187e8d8bef9SDimitry Andric /// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
7188e8d8bef9SDimitry Andric ///  =>
7189e8d8bef9SDimitry Andric ///  %stack_temp = G_FRAME_INDEX
7190e8d8bef9SDimitry Andric ///  G_STORE %vec, %stack_temp
7191e8d8bef9SDimitry Andric ///  %idx = clamp(%idx, %vec.getNumElements())
7192e8d8bef9SDimitry Andric ///  %element_ptr = G_PTR_ADD %stack_temp, %idx
7193e8d8bef9SDimitry Andric ///  %dst = G_LOAD %element_ptr
7194e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
7195e8d8bef9SDimitry Andric LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
7196e8d8bef9SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
7197e8d8bef9SDimitry Andric   Register SrcVec = MI.getOperand(1).getReg();
7198e8d8bef9SDimitry Andric   Register InsertVal;
7199e8d8bef9SDimitry Andric   if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
7200e8d8bef9SDimitry Andric     InsertVal = MI.getOperand(2).getReg();
7201e8d8bef9SDimitry Andric 
7202e8d8bef9SDimitry Andric   Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
7203e8d8bef9SDimitry Andric 
7204e8d8bef9SDimitry Andric   LLT VecTy = MRI.getType(SrcVec);
7205e8d8bef9SDimitry Andric   LLT EltTy = VecTy.getElementType();
72060eae32dcSDimitry Andric   unsigned NumElts = VecTy.getNumElements();
72070eae32dcSDimitry Andric 
72080eae32dcSDimitry Andric   int64_t IdxVal;
72090eae32dcSDimitry Andric   if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
72100eae32dcSDimitry Andric     SmallVector<Register, 8> SrcRegs;
72110eae32dcSDimitry Andric     extractParts(SrcVec, EltTy, NumElts, SrcRegs);
72120eae32dcSDimitry Andric 
72130eae32dcSDimitry Andric     if (InsertVal) {
72140eae32dcSDimitry Andric       SrcRegs[IdxVal] = MI.getOperand(2).getReg();
7215bdd1243dSDimitry Andric       MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
72160eae32dcSDimitry Andric     } else {
72170eae32dcSDimitry Andric       MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
72180eae32dcSDimitry Andric     }
72190eae32dcSDimitry Andric 
72200eae32dcSDimitry Andric     MI.eraseFromParent();
72210eae32dcSDimitry Andric     return Legalized;
72220eae32dcSDimitry Andric   }
72230eae32dcSDimitry Andric 
7224e8d8bef9SDimitry Andric   if (!EltTy.isByteSized()) { // Not implemented.
7225e8d8bef9SDimitry Andric     LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
7226e8d8bef9SDimitry Andric     return UnableToLegalize;
7227e8d8bef9SDimitry Andric   }
7228e8d8bef9SDimitry Andric 
7229e8d8bef9SDimitry Andric   unsigned EltBytes = EltTy.getSizeInBytes();
7230e8d8bef9SDimitry Andric   Align VecAlign = getStackTemporaryAlignment(VecTy);
7231e8d8bef9SDimitry Andric   Align EltAlign;
7232e8d8bef9SDimitry Andric 
7233e8d8bef9SDimitry Andric   MachinePointerInfo PtrInfo;
72345f757f3fSDimitry Andric   auto StackTemp = createStackTemporary(
72355f757f3fSDimitry Andric       TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
7236e8d8bef9SDimitry Andric   MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
7237e8d8bef9SDimitry Andric 
7238e8d8bef9SDimitry Andric   // Get the pointer to the element, and be sure not to hit undefined behavior
7239e8d8bef9SDimitry Andric   // if the index is out of bounds.
7240e8d8bef9SDimitry Andric   Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
7241e8d8bef9SDimitry Andric 
7242e8d8bef9SDimitry Andric   if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
7243e8d8bef9SDimitry Andric     int64_t Offset = IdxVal * EltBytes;
7244e8d8bef9SDimitry Andric     PtrInfo = PtrInfo.getWithOffset(Offset);
7245e8d8bef9SDimitry Andric     EltAlign = commonAlignment(VecAlign, Offset);
7246e8d8bef9SDimitry Andric   } else {
7247e8d8bef9SDimitry Andric     // We lose information with a variable offset.
7248e8d8bef9SDimitry Andric     EltAlign = getStackTemporaryAlignment(EltTy);
7249e8d8bef9SDimitry Andric     PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
7250e8d8bef9SDimitry Andric   }
7251e8d8bef9SDimitry Andric 
7252e8d8bef9SDimitry Andric   if (InsertVal) {
7253e8d8bef9SDimitry Andric     // Write the inserted element
7254e8d8bef9SDimitry Andric     MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
7255e8d8bef9SDimitry Andric 
7256e8d8bef9SDimitry Andric     // Reload the whole vector.
7257e8d8bef9SDimitry Andric     MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
7258e8d8bef9SDimitry Andric   } else {
7259e8d8bef9SDimitry Andric     MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
7260e8d8bef9SDimitry Andric   }
7261e8d8bef9SDimitry Andric 
7262e8d8bef9SDimitry Andric   MI.eraseFromParent();
7263e8d8bef9SDimitry Andric   return Legalized;
7264e8d8bef9SDimitry Andric }
7265e8d8bef9SDimitry Andric 
72668bcb0991SDimitry Andric LegalizerHelper::LegalizeResult
72678bcb0991SDimitry Andric LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
726806c3fb27SDimitry Andric   auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
726906c3fb27SDimitry Andric       MI.getFirst3RegLLTs();
72708bcb0991SDimitry Andric   LLT IdxTy = LLT::scalar(32);
72718bcb0991SDimitry Andric 
7272480093f4SDimitry Andric   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
72738bcb0991SDimitry Andric   Register Undef;
72748bcb0991SDimitry Andric   SmallVector<Register, 32> BuildVec;
72755f757f3fSDimitry Andric   LLT EltTy = DstTy.getScalarType();
72768bcb0991SDimitry Andric 
72778bcb0991SDimitry Andric   for (int Idx : Mask) {
72788bcb0991SDimitry Andric     if (Idx < 0) {
72798bcb0991SDimitry Andric       if (!Undef.isValid())
72808bcb0991SDimitry Andric         Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
72818bcb0991SDimitry Andric       BuildVec.push_back(Undef);
72828bcb0991SDimitry Andric       continue;
72838bcb0991SDimitry Andric     }
72848bcb0991SDimitry Andric 
72858bcb0991SDimitry Andric     if (Src0Ty.isScalar()) {
72868bcb0991SDimitry Andric       BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
72878bcb0991SDimitry Andric     } else {
72888bcb0991SDimitry Andric       int NumElts = Src0Ty.getNumElements();
72898bcb0991SDimitry Andric       Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
72908bcb0991SDimitry Andric       int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
72918bcb0991SDimitry Andric       auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
72928bcb0991SDimitry Andric       auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
72938bcb0991SDimitry Andric       BuildVec.push_back(Extract.getReg(0));
72948bcb0991SDimitry Andric     }
72958bcb0991SDimitry Andric   }
72968bcb0991SDimitry Andric 
72975f757f3fSDimitry Andric   if (DstTy.isScalar())
72985f757f3fSDimitry Andric     MIRBuilder.buildCopy(DstReg, BuildVec[0]);
72995f757f3fSDimitry Andric   else
73008bcb0991SDimitry Andric     MIRBuilder.buildBuildVector(DstReg, BuildVec);
73018bcb0991SDimitry Andric   MI.eraseFromParent();
73028bcb0991SDimitry Andric   return Legalized;
73038bcb0991SDimitry Andric }
73048bcb0991SDimitry Andric 
73055f757f3fSDimitry Andric Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg,
73065f757f3fSDimitry Andric                                                     Register AllocSize,
73075f757f3fSDimitry Andric                                                     Align Alignment,
73085f757f3fSDimitry Andric                                                     LLT PtrTy) {
73098bcb0991SDimitry Andric   LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
73108bcb0991SDimitry Andric 
73118bcb0991SDimitry Andric   auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
73128bcb0991SDimitry Andric   SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
73138bcb0991SDimitry Andric 
73148bcb0991SDimitry Andric   // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
73158bcb0991SDimitry Andric   // have to generate an extra instruction to negate the alloc and then use
7316480093f4SDimitry Andric   // G_PTR_ADD to add the negative offset.
73178bcb0991SDimitry Andric   auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
73185ffd83dbSDimitry Andric   if (Alignment > Align(1)) {
73195ffd83dbSDimitry Andric     APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
73208bcb0991SDimitry Andric     AlignMask.negate();
73218bcb0991SDimitry Andric     auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
73228bcb0991SDimitry Andric     Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
73238bcb0991SDimitry Andric   }
73248bcb0991SDimitry Andric 
73255f757f3fSDimitry Andric   return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
73265f757f3fSDimitry Andric }
73275f757f3fSDimitry Andric 
73285f757f3fSDimitry Andric LegalizerHelper::LegalizeResult
73295f757f3fSDimitry Andric LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
73305f757f3fSDimitry Andric   const auto &MF = *MI.getMF();
73315f757f3fSDimitry Andric   const auto &TFI = *MF.getSubtarget().getFrameLowering();
73325f757f3fSDimitry Andric   if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
73335f757f3fSDimitry Andric     return UnableToLegalize;
73345f757f3fSDimitry Andric 
73355f757f3fSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
73365f757f3fSDimitry Andric   Register AllocSize = MI.getOperand(1).getReg();
73375f757f3fSDimitry Andric   Align Alignment = assumeAligned(MI.getOperand(2).getImm());
73385f757f3fSDimitry Andric 
73395f757f3fSDimitry Andric   LLT PtrTy = MRI.getType(Dst);
73405f757f3fSDimitry Andric   Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
73415f757f3fSDimitry Andric   Register SPTmp =
73425f757f3fSDimitry Andric       getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
73435f757f3fSDimitry Andric 
73448bcb0991SDimitry Andric   MIRBuilder.buildCopy(SPReg, SPTmp);
73458bcb0991SDimitry Andric   MIRBuilder.buildCopy(Dst, SPTmp);
73468bcb0991SDimitry Andric 
73478bcb0991SDimitry Andric   MI.eraseFromParent();
73488bcb0991SDimitry Andric   return Legalized;
73498bcb0991SDimitry Andric }
73508bcb0991SDimitry Andric 
73518bcb0991SDimitry Andric LegalizerHelper::LegalizeResult
73525f757f3fSDimitry Andric LegalizerHelper::lowerStackSave(MachineInstr &MI) {
73535f757f3fSDimitry Andric   Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
73545f757f3fSDimitry Andric   if (!StackPtr)
73555f757f3fSDimitry Andric     return UnableToLegalize;
73565f757f3fSDimitry Andric 
73575f757f3fSDimitry Andric   MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
73585f757f3fSDimitry Andric   MI.eraseFromParent();
73595f757f3fSDimitry Andric   return Legalized;
73605f757f3fSDimitry Andric }
73615f757f3fSDimitry Andric 
73625f757f3fSDimitry Andric LegalizerHelper::LegalizeResult
73635f757f3fSDimitry Andric LegalizerHelper::lowerStackRestore(MachineInstr &MI) {
73645f757f3fSDimitry Andric   Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
73655f757f3fSDimitry Andric   if (!StackPtr)
73665f757f3fSDimitry Andric     return UnableToLegalize;
73675f757f3fSDimitry Andric 
73685f757f3fSDimitry Andric   MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
73695f757f3fSDimitry Andric   MI.eraseFromParent();
73705f757f3fSDimitry Andric   return Legalized;
73715f757f3fSDimitry Andric }
73725f757f3fSDimitry Andric 
73735f757f3fSDimitry Andric LegalizerHelper::LegalizeResult
73748bcb0991SDimitry Andric LegalizerHelper::lowerExtract(MachineInstr &MI) {
737506c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
73768bcb0991SDimitry Andric   unsigned Offset = MI.getOperand(2).getImm();
73778bcb0991SDimitry Andric 
73780eae32dcSDimitry Andric   // Extract sub-vector or one element
73790eae32dcSDimitry Andric   if (SrcTy.isVector()) {
73800eae32dcSDimitry Andric     unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
73810eae32dcSDimitry Andric     unsigned DstSize = DstTy.getSizeInBits();
73820eae32dcSDimitry Andric 
73830eae32dcSDimitry Andric     if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
73840eae32dcSDimitry Andric         (Offset + DstSize <= SrcTy.getSizeInBits())) {
73850eae32dcSDimitry Andric       // Unmerge and allow access to each Src element for the artifact combiner.
738606c3fb27SDimitry Andric       auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
73870eae32dcSDimitry Andric 
73880eae32dcSDimitry Andric       // Take element(s) we need to extract and copy it (merge them).
73890eae32dcSDimitry Andric       SmallVector<Register, 8> SubVectorElts;
73900eae32dcSDimitry Andric       for (unsigned Idx = Offset / SrcEltSize;
73910eae32dcSDimitry Andric            Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
73920eae32dcSDimitry Andric         SubVectorElts.push_back(Unmerge.getReg(Idx));
73930eae32dcSDimitry Andric       }
73940eae32dcSDimitry Andric       if (SubVectorElts.size() == 1)
739506c3fb27SDimitry Andric         MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
73960eae32dcSDimitry Andric       else
739706c3fb27SDimitry Andric         MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
73980eae32dcSDimitry Andric 
73990eae32dcSDimitry Andric       MI.eraseFromParent();
74000eae32dcSDimitry Andric       return Legalized;
74010eae32dcSDimitry Andric     }
74020eae32dcSDimitry Andric   }
74030eae32dcSDimitry Andric 
74048bcb0991SDimitry Andric   if (DstTy.isScalar() &&
74058bcb0991SDimitry Andric       (SrcTy.isScalar() ||
74068bcb0991SDimitry Andric        (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
74078bcb0991SDimitry Andric     LLT SrcIntTy = SrcTy;
74088bcb0991SDimitry Andric     if (!SrcTy.isScalar()) {
74098bcb0991SDimitry Andric       SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
741006c3fb27SDimitry Andric       SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
74118bcb0991SDimitry Andric     }
74128bcb0991SDimitry Andric 
74138bcb0991SDimitry Andric     if (Offset == 0)
741406c3fb27SDimitry Andric       MIRBuilder.buildTrunc(DstReg, SrcReg);
74158bcb0991SDimitry Andric     else {
74168bcb0991SDimitry Andric       auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
741706c3fb27SDimitry Andric       auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
741806c3fb27SDimitry Andric       MIRBuilder.buildTrunc(DstReg, Shr);
74198bcb0991SDimitry Andric     }
74208bcb0991SDimitry Andric 
74218bcb0991SDimitry Andric     MI.eraseFromParent();
74228bcb0991SDimitry Andric     return Legalized;
74238bcb0991SDimitry Andric   }
74248bcb0991SDimitry Andric 
74258bcb0991SDimitry Andric   return UnableToLegalize;
74268bcb0991SDimitry Andric }
74278bcb0991SDimitry Andric 
74288bcb0991SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
742906c3fb27SDimitry Andric   auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
74308bcb0991SDimitry Andric   uint64_t Offset = MI.getOperand(3).getImm();
74318bcb0991SDimitry Andric 
74328bcb0991SDimitry Andric   LLT DstTy = MRI.getType(Src);
74338bcb0991SDimitry Andric   LLT InsertTy = MRI.getType(InsertSrc);
74348bcb0991SDimitry Andric 
74350eae32dcSDimitry Andric   // Insert sub-vector or one element
74360eae32dcSDimitry Andric   if (DstTy.isVector() && !InsertTy.isPointer()) {
74370eae32dcSDimitry Andric     LLT EltTy = DstTy.getElementType();
74380eae32dcSDimitry Andric     unsigned EltSize = EltTy.getSizeInBits();
74390eae32dcSDimitry Andric     unsigned InsertSize = InsertTy.getSizeInBits();
74400eae32dcSDimitry Andric 
74410eae32dcSDimitry Andric     if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
74420eae32dcSDimitry Andric         (Offset + InsertSize <= DstTy.getSizeInBits())) {
74430eae32dcSDimitry Andric       auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
74440eae32dcSDimitry Andric       SmallVector<Register, 8> DstElts;
74450eae32dcSDimitry Andric       unsigned Idx = 0;
74460eae32dcSDimitry Andric       // Elements from Src before insert start Offset
74470eae32dcSDimitry Andric       for (; Idx < Offset / EltSize; ++Idx) {
74480eae32dcSDimitry Andric         DstElts.push_back(UnmergeSrc.getReg(Idx));
74490eae32dcSDimitry Andric       }
74500eae32dcSDimitry Andric 
74510eae32dcSDimitry Andric       // Replace elements in Src with elements from InsertSrc
74520eae32dcSDimitry Andric       if (InsertTy.getSizeInBits() > EltSize) {
74530eae32dcSDimitry Andric         auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
74540eae32dcSDimitry Andric         for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
74550eae32dcSDimitry Andric              ++Idx, ++i) {
74560eae32dcSDimitry Andric           DstElts.push_back(UnmergeInsertSrc.getReg(i));
74570eae32dcSDimitry Andric         }
74580eae32dcSDimitry Andric       } else {
74590eae32dcSDimitry Andric         DstElts.push_back(InsertSrc);
74600eae32dcSDimitry Andric         ++Idx;
74610eae32dcSDimitry Andric       }
74620eae32dcSDimitry Andric 
74630eae32dcSDimitry Andric       // Remaining elements from Src after insert
74640eae32dcSDimitry Andric       for (; Idx < DstTy.getNumElements(); ++Idx) {
74650eae32dcSDimitry Andric         DstElts.push_back(UnmergeSrc.getReg(Idx));
74660eae32dcSDimitry Andric       }
74670eae32dcSDimitry Andric 
7468bdd1243dSDimitry Andric       MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
74690eae32dcSDimitry Andric       MI.eraseFromParent();
74700eae32dcSDimitry Andric       return Legalized;
74710eae32dcSDimitry Andric     }
74720eae32dcSDimitry Andric   }
74730eae32dcSDimitry Andric 
74745ffd83dbSDimitry Andric   if (InsertTy.isVector() ||
74755ffd83dbSDimitry Andric       (DstTy.isVector() && DstTy.getElementType() != InsertTy))
74765ffd83dbSDimitry Andric     return UnableToLegalize;
74775ffd83dbSDimitry Andric 
74785ffd83dbSDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
74795ffd83dbSDimitry Andric   if ((DstTy.isPointer() &&
74805ffd83dbSDimitry Andric        DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
74815ffd83dbSDimitry Andric       (InsertTy.isPointer() &&
74825ffd83dbSDimitry Andric        DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
74835ffd83dbSDimitry Andric     LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
74845ffd83dbSDimitry Andric     return UnableToLegalize;
74855ffd83dbSDimitry Andric   }
74865ffd83dbSDimitry Andric 
74878bcb0991SDimitry Andric   LLT IntDstTy = DstTy;
74885ffd83dbSDimitry Andric 
74898bcb0991SDimitry Andric   if (!DstTy.isScalar()) {
74908bcb0991SDimitry Andric     IntDstTy = LLT::scalar(DstTy.getSizeInBits());
74915ffd83dbSDimitry Andric     Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
74925ffd83dbSDimitry Andric   }
74935ffd83dbSDimitry Andric 
74945ffd83dbSDimitry Andric   if (!InsertTy.isScalar()) {
74955ffd83dbSDimitry Andric     const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
74965ffd83dbSDimitry Andric     InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
74978bcb0991SDimitry Andric   }
74988bcb0991SDimitry Andric 
74998bcb0991SDimitry Andric   Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
75008bcb0991SDimitry Andric   if (Offset != 0) {
75018bcb0991SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
75028bcb0991SDimitry Andric     ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
75038bcb0991SDimitry Andric   }
75048bcb0991SDimitry Andric 
75055ffd83dbSDimitry Andric   APInt MaskVal = APInt::getBitsSetWithWrap(
75065ffd83dbSDimitry Andric       DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
75078bcb0991SDimitry Andric 
75088bcb0991SDimitry Andric   auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
75098bcb0991SDimitry Andric   auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
75108bcb0991SDimitry Andric   auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
75118bcb0991SDimitry Andric 
75125ffd83dbSDimitry Andric   MIRBuilder.buildCast(Dst, Or);
75138bcb0991SDimitry Andric   MI.eraseFromParent();
75148bcb0991SDimitry Andric   return Legalized;
75158bcb0991SDimitry Andric }
75168bcb0991SDimitry Andric 
75178bcb0991SDimitry Andric LegalizerHelper::LegalizeResult
75188bcb0991SDimitry Andric LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
751906c3fb27SDimitry Andric   auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
752006c3fb27SDimitry Andric       MI.getFirst4RegLLTs();
75218bcb0991SDimitry Andric   const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
75228bcb0991SDimitry Andric 
752306c3fb27SDimitry Andric   LLT Ty = Dst0Ty;
752406c3fb27SDimitry Andric   LLT BoolTy = Dst1Ty;
75258bcb0991SDimitry Andric 
75268bcb0991SDimitry Andric   if (IsAdd)
75278bcb0991SDimitry Andric     MIRBuilder.buildAdd(Dst0, LHS, RHS);
75288bcb0991SDimitry Andric   else
75298bcb0991SDimitry Andric     MIRBuilder.buildSub(Dst0, LHS, RHS);
75308bcb0991SDimitry Andric 
75318bcb0991SDimitry Andric   // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
75328bcb0991SDimitry Andric 
75338bcb0991SDimitry Andric   auto Zero = MIRBuilder.buildConstant(Ty, 0);
75348bcb0991SDimitry Andric 
75358bcb0991SDimitry Andric   // For an addition, the result should be less than one of the operands (LHS)
75368bcb0991SDimitry Andric   // if and only if the other operand (RHS) is negative, otherwise there will
75378bcb0991SDimitry Andric   // be overflow.
75388bcb0991SDimitry Andric   // For a subtraction, the result should be less than one of the operands
75398bcb0991SDimitry Andric   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
75408bcb0991SDimitry Andric   // otherwise there will be overflow.
75418bcb0991SDimitry Andric   auto ResultLowerThanLHS =
75428bcb0991SDimitry Andric       MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
75438bcb0991SDimitry Andric   auto ConditionRHS = MIRBuilder.buildICmp(
75448bcb0991SDimitry Andric       IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
75458bcb0991SDimitry Andric 
75468bcb0991SDimitry Andric   MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
75478bcb0991SDimitry Andric   MI.eraseFromParent();
75488bcb0991SDimitry Andric   return Legalized;
75498bcb0991SDimitry Andric }
7550480093f4SDimitry Andric 
7551480093f4SDimitry Andric LegalizerHelper::LegalizeResult
7552e8d8bef9SDimitry Andric LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
755306c3fb27SDimitry Andric   auto [Res, LHS, RHS] = MI.getFirst3Regs();
7554e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(Res);
7555e8d8bef9SDimitry Andric   bool IsSigned;
7556e8d8bef9SDimitry Andric   bool IsAdd;
7557e8d8bef9SDimitry Andric   unsigned BaseOp;
7558e8d8bef9SDimitry Andric   switch (MI.getOpcode()) {
7559e8d8bef9SDimitry Andric   default:
7560e8d8bef9SDimitry Andric     llvm_unreachable("unexpected addsat/subsat opcode");
7561e8d8bef9SDimitry Andric   case TargetOpcode::G_UADDSAT:
7562e8d8bef9SDimitry Andric     IsSigned = false;
7563e8d8bef9SDimitry Andric     IsAdd = true;
7564e8d8bef9SDimitry Andric     BaseOp = TargetOpcode::G_ADD;
7565e8d8bef9SDimitry Andric     break;
7566e8d8bef9SDimitry Andric   case TargetOpcode::G_SADDSAT:
7567e8d8bef9SDimitry Andric     IsSigned = true;
7568e8d8bef9SDimitry Andric     IsAdd = true;
7569e8d8bef9SDimitry Andric     BaseOp = TargetOpcode::G_ADD;
7570e8d8bef9SDimitry Andric     break;
7571e8d8bef9SDimitry Andric   case TargetOpcode::G_USUBSAT:
7572e8d8bef9SDimitry Andric     IsSigned = false;
7573e8d8bef9SDimitry Andric     IsAdd = false;
7574e8d8bef9SDimitry Andric     BaseOp = TargetOpcode::G_SUB;
7575e8d8bef9SDimitry Andric     break;
7576e8d8bef9SDimitry Andric   case TargetOpcode::G_SSUBSAT:
7577e8d8bef9SDimitry Andric     IsSigned = true;
7578e8d8bef9SDimitry Andric     IsAdd = false;
7579e8d8bef9SDimitry Andric     BaseOp = TargetOpcode::G_SUB;
7580e8d8bef9SDimitry Andric     break;
7581e8d8bef9SDimitry Andric   }
7582e8d8bef9SDimitry Andric 
7583e8d8bef9SDimitry Andric   if (IsSigned) {
7584e8d8bef9SDimitry Andric     // sadd.sat(a, b) ->
7585e8d8bef9SDimitry Andric     //   hi = 0x7fffffff - smax(a, 0)
7586e8d8bef9SDimitry Andric     //   lo = 0x80000000 - smin(a, 0)
7587e8d8bef9SDimitry Andric     //   a + smin(smax(lo, b), hi)
7588e8d8bef9SDimitry Andric     // ssub.sat(a, b) ->
7589e8d8bef9SDimitry Andric     //   lo = smax(a, -1) - 0x7fffffff
7590e8d8bef9SDimitry Andric     //   hi = smin(a, -1) - 0x80000000
7591e8d8bef9SDimitry Andric     //   a - smin(smax(lo, b), hi)
7592e8d8bef9SDimitry Andric     // TODO: AMDGPU can use a "median of 3" instruction here:
7593e8d8bef9SDimitry Andric     //   a +/- med3(lo, b, hi)
7594e8d8bef9SDimitry Andric     uint64_t NumBits = Ty.getScalarSizeInBits();
7595e8d8bef9SDimitry Andric     auto MaxVal =
7596e8d8bef9SDimitry Andric         MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
7597e8d8bef9SDimitry Andric     auto MinVal =
7598e8d8bef9SDimitry Andric         MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
7599e8d8bef9SDimitry Andric     MachineInstrBuilder Hi, Lo;
7600e8d8bef9SDimitry Andric     if (IsAdd) {
7601e8d8bef9SDimitry Andric       auto Zero = MIRBuilder.buildConstant(Ty, 0);
7602e8d8bef9SDimitry Andric       Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
7603e8d8bef9SDimitry Andric       Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
7604e8d8bef9SDimitry Andric     } else {
7605e8d8bef9SDimitry Andric       auto NegOne = MIRBuilder.buildConstant(Ty, -1);
7606e8d8bef9SDimitry Andric       Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
7607e8d8bef9SDimitry Andric                                MaxVal);
7608e8d8bef9SDimitry Andric       Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
7609e8d8bef9SDimitry Andric                                MinVal);
7610e8d8bef9SDimitry Andric     }
7611e8d8bef9SDimitry Andric     auto RHSClamped =
7612e8d8bef9SDimitry Andric         MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
7613e8d8bef9SDimitry Andric     MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
7614e8d8bef9SDimitry Andric   } else {
7615e8d8bef9SDimitry Andric     // uadd.sat(a, b) -> a + umin(~a, b)
7616e8d8bef9SDimitry Andric     // usub.sat(a, b) -> a - umin(a, b)
7617e8d8bef9SDimitry Andric     Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
7618e8d8bef9SDimitry Andric     auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
7619e8d8bef9SDimitry Andric     MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
7620e8d8bef9SDimitry Andric   }
7621e8d8bef9SDimitry Andric 
7622e8d8bef9SDimitry Andric   MI.eraseFromParent();
7623e8d8bef9SDimitry Andric   return Legalized;
7624e8d8bef9SDimitry Andric }
7625e8d8bef9SDimitry Andric 
7626e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
7627e8d8bef9SDimitry Andric LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
762806c3fb27SDimitry Andric   auto [Res, LHS, RHS] = MI.getFirst3Regs();
7629e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(Res);
7630e8d8bef9SDimitry Andric   LLT BoolTy = Ty.changeElementSize(1);
7631e8d8bef9SDimitry Andric   bool IsSigned;
7632e8d8bef9SDimitry Andric   bool IsAdd;
7633e8d8bef9SDimitry Andric   unsigned OverflowOp;
7634e8d8bef9SDimitry Andric   switch (MI.getOpcode()) {
7635e8d8bef9SDimitry Andric   default:
7636e8d8bef9SDimitry Andric     llvm_unreachable("unexpected addsat/subsat opcode");
7637e8d8bef9SDimitry Andric   case TargetOpcode::G_UADDSAT:
7638e8d8bef9SDimitry Andric     IsSigned = false;
7639e8d8bef9SDimitry Andric     IsAdd = true;
7640e8d8bef9SDimitry Andric     OverflowOp = TargetOpcode::G_UADDO;
7641e8d8bef9SDimitry Andric     break;
7642e8d8bef9SDimitry Andric   case TargetOpcode::G_SADDSAT:
7643e8d8bef9SDimitry Andric     IsSigned = true;
7644e8d8bef9SDimitry Andric     IsAdd = true;
7645e8d8bef9SDimitry Andric     OverflowOp = TargetOpcode::G_SADDO;
7646e8d8bef9SDimitry Andric     break;
7647e8d8bef9SDimitry Andric   case TargetOpcode::G_USUBSAT:
7648e8d8bef9SDimitry Andric     IsSigned = false;
7649e8d8bef9SDimitry Andric     IsAdd = false;
7650e8d8bef9SDimitry Andric     OverflowOp = TargetOpcode::G_USUBO;
7651e8d8bef9SDimitry Andric     break;
7652e8d8bef9SDimitry Andric   case TargetOpcode::G_SSUBSAT:
7653e8d8bef9SDimitry Andric     IsSigned = true;
7654e8d8bef9SDimitry Andric     IsAdd = false;
7655e8d8bef9SDimitry Andric     OverflowOp = TargetOpcode::G_SSUBO;
7656e8d8bef9SDimitry Andric     break;
7657e8d8bef9SDimitry Andric   }
7658e8d8bef9SDimitry Andric 
7659e8d8bef9SDimitry Andric   auto OverflowRes =
7660e8d8bef9SDimitry Andric       MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
7661e8d8bef9SDimitry Andric   Register Tmp = OverflowRes.getReg(0);
7662e8d8bef9SDimitry Andric   Register Ov = OverflowRes.getReg(1);
7663e8d8bef9SDimitry Andric   MachineInstrBuilder Clamp;
7664e8d8bef9SDimitry Andric   if (IsSigned) {
7665e8d8bef9SDimitry Andric     // sadd.sat(a, b) ->
7666e8d8bef9SDimitry Andric     //   {tmp, ov} = saddo(a, b)
7667e8d8bef9SDimitry Andric     //   ov ? (tmp >>s 31) + 0x80000000 : r
7668e8d8bef9SDimitry Andric     // ssub.sat(a, b) ->
7669e8d8bef9SDimitry Andric     //   {tmp, ov} = ssubo(a, b)
7670e8d8bef9SDimitry Andric     //   ov ? (tmp >>s 31) + 0x80000000 : r
7671e8d8bef9SDimitry Andric     uint64_t NumBits = Ty.getScalarSizeInBits();
7672e8d8bef9SDimitry Andric     auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
7673e8d8bef9SDimitry Andric     auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
7674e8d8bef9SDimitry Andric     auto MinVal =
7675e8d8bef9SDimitry Andric         MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
7676e8d8bef9SDimitry Andric     Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
7677e8d8bef9SDimitry Andric   } else {
7678e8d8bef9SDimitry Andric     // uadd.sat(a, b) ->
7679e8d8bef9SDimitry Andric     //   {tmp, ov} = uaddo(a, b)
7680e8d8bef9SDimitry Andric     //   ov ? 0xffffffff : tmp
7681e8d8bef9SDimitry Andric     // usub.sat(a, b) ->
7682e8d8bef9SDimitry Andric     //   {tmp, ov} = usubo(a, b)
7683e8d8bef9SDimitry Andric     //   ov ? 0 : tmp
7684e8d8bef9SDimitry Andric     Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
7685e8d8bef9SDimitry Andric   }
7686e8d8bef9SDimitry Andric   MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
7687e8d8bef9SDimitry Andric 
7688e8d8bef9SDimitry Andric   MI.eraseFromParent();
7689e8d8bef9SDimitry Andric   return Legalized;
7690e8d8bef9SDimitry Andric }
7691e8d8bef9SDimitry Andric 
7692e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
7693e8d8bef9SDimitry Andric LegalizerHelper::lowerShlSat(MachineInstr &MI) {
7694e8d8bef9SDimitry Andric   assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
7695e8d8bef9SDimitry Andric           MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
7696e8d8bef9SDimitry Andric          "Expected shlsat opcode!");
7697e8d8bef9SDimitry Andric   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
769806c3fb27SDimitry Andric   auto [Res, LHS, RHS] = MI.getFirst3Regs();
7699e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(Res);
7700e8d8bef9SDimitry Andric   LLT BoolTy = Ty.changeElementSize(1);
7701e8d8bef9SDimitry Andric 
7702e8d8bef9SDimitry Andric   unsigned BW = Ty.getScalarSizeInBits();
7703e8d8bef9SDimitry Andric   auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
7704e8d8bef9SDimitry Andric   auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
7705e8d8bef9SDimitry Andric                        : MIRBuilder.buildLShr(Ty, Result, RHS);
7706e8d8bef9SDimitry Andric 
7707e8d8bef9SDimitry Andric   MachineInstrBuilder SatVal;
7708e8d8bef9SDimitry Andric   if (IsSigned) {
7709e8d8bef9SDimitry Andric     auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
7710e8d8bef9SDimitry Andric     auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
7711e8d8bef9SDimitry Andric     auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
7712e8d8bef9SDimitry Andric                                     MIRBuilder.buildConstant(Ty, 0));
7713e8d8bef9SDimitry Andric     SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
7714e8d8bef9SDimitry Andric   } else {
7715e8d8bef9SDimitry Andric     SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
7716e8d8bef9SDimitry Andric   }
7717e8d8bef9SDimitry Andric   auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
7718e8d8bef9SDimitry Andric   MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
7719e8d8bef9SDimitry Andric 
7720e8d8bef9SDimitry Andric   MI.eraseFromParent();
7721e8d8bef9SDimitry Andric   return Legalized;
7722e8d8bef9SDimitry Andric }
7723e8d8bef9SDimitry Andric 
772406c3fb27SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) {
772506c3fb27SDimitry Andric   auto [Dst, Src] = MI.getFirst2Regs();
7726480093f4SDimitry Andric   const LLT Ty = MRI.getType(Src);
77275ffd83dbSDimitry Andric   unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
7728480093f4SDimitry Andric   unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
7729480093f4SDimitry Andric 
7730480093f4SDimitry Andric   // Swap most and least significant byte, set remaining bytes in Res to zero.
7731480093f4SDimitry Andric   auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
7732480093f4SDimitry Andric   auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
7733480093f4SDimitry Andric   auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
7734480093f4SDimitry Andric   auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
7735480093f4SDimitry Andric 
7736480093f4SDimitry Andric   // Set i-th high/low byte in Res to i-th low/high byte from Src.
7737480093f4SDimitry Andric   for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
7738480093f4SDimitry Andric     // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
7739480093f4SDimitry Andric     APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
7740480093f4SDimitry Andric     auto Mask = MIRBuilder.buildConstant(Ty, APMask);
7741480093f4SDimitry Andric     auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
7742480093f4SDimitry Andric     // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
7743480093f4SDimitry Andric     auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
7744480093f4SDimitry Andric     auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
7745480093f4SDimitry Andric     Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
7746480093f4SDimitry Andric     // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
7747480093f4SDimitry Andric     auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
7748480093f4SDimitry Andric     auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
7749480093f4SDimitry Andric     Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
7750480093f4SDimitry Andric   }
7751480093f4SDimitry Andric   Res.getInstr()->getOperand(0).setReg(Dst);
7752480093f4SDimitry Andric 
7753480093f4SDimitry Andric   MI.eraseFromParent();
7754480093f4SDimitry Andric   return Legalized;
7755480093f4SDimitry Andric }
7756480093f4SDimitry Andric 
7757480093f4SDimitry Andric //{ (Src & Mask) >> N } | { (Src << N) & Mask }
7758480093f4SDimitry Andric static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
7759480093f4SDimitry Andric                                  MachineInstrBuilder Src, APInt Mask) {
7760480093f4SDimitry Andric   const LLT Ty = Dst.getLLTTy(*B.getMRI());
7761480093f4SDimitry Andric   MachineInstrBuilder C_N = B.buildConstant(Ty, N);
7762480093f4SDimitry Andric   MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
7763480093f4SDimitry Andric   auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
7764480093f4SDimitry Andric   auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
7765480093f4SDimitry Andric   return B.buildOr(Dst, LHS, RHS);
7766480093f4SDimitry Andric }
7767480093f4SDimitry Andric 
7768480093f4SDimitry Andric LegalizerHelper::LegalizeResult
7769480093f4SDimitry Andric LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
777006c3fb27SDimitry Andric   auto [Dst, Src] = MI.getFirst2Regs();
7771480093f4SDimitry Andric   const LLT Ty = MRI.getType(Src);
7772480093f4SDimitry Andric   unsigned Size = Ty.getSizeInBits();
7773480093f4SDimitry Andric 
7774480093f4SDimitry Andric   MachineInstrBuilder BSWAP =
7775480093f4SDimitry Andric       MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
7776480093f4SDimitry Andric 
7777480093f4SDimitry Andric   // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
7778480093f4SDimitry Andric   //    [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
7779480093f4SDimitry Andric   // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
7780480093f4SDimitry Andric   MachineInstrBuilder Swap4 =
7781480093f4SDimitry Andric       SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
7782480093f4SDimitry Andric 
7783480093f4SDimitry Andric   // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
7784480093f4SDimitry Andric   //    [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
7785480093f4SDimitry Andric   // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
7786480093f4SDimitry Andric   MachineInstrBuilder Swap2 =
7787480093f4SDimitry Andric       SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
7788480093f4SDimitry Andric 
7789480093f4SDimitry Andric   // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
7790480093f4SDimitry Andric   //    [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
7791480093f4SDimitry Andric   // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
7792480093f4SDimitry Andric   SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
7793480093f4SDimitry Andric 
7794480093f4SDimitry Andric   MI.eraseFromParent();
7795480093f4SDimitry Andric   return Legalized;
7796480093f4SDimitry Andric }
7797480093f4SDimitry Andric 
7798480093f4SDimitry Andric LegalizerHelper::LegalizeResult
77995ffd83dbSDimitry Andric LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
7800480093f4SDimitry Andric   MachineFunction &MF = MIRBuilder.getMF();
78015ffd83dbSDimitry Andric 
78025ffd83dbSDimitry Andric   bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
78035ffd83dbSDimitry Andric   int NameOpIdx = IsRead ? 1 : 0;
78045ffd83dbSDimitry Andric   int ValRegIndex = IsRead ? 0 : 1;
78055ffd83dbSDimitry Andric 
78065ffd83dbSDimitry Andric   Register ValReg = MI.getOperand(ValRegIndex).getReg();
78075ffd83dbSDimitry Andric   const LLT Ty = MRI.getType(ValReg);
78085ffd83dbSDimitry Andric   const MDString *RegStr = cast<MDString>(
78095ffd83dbSDimitry Andric     cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
78105ffd83dbSDimitry Andric 
7811e8d8bef9SDimitry Andric   Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
78125ffd83dbSDimitry Andric   if (!PhysReg.isValid())
7813480093f4SDimitry Andric     return UnableToLegalize;
7814480093f4SDimitry Andric 
78155ffd83dbSDimitry Andric   if (IsRead)
78165ffd83dbSDimitry Andric     MIRBuilder.buildCopy(ValReg, PhysReg);
78175ffd83dbSDimitry Andric   else
78185ffd83dbSDimitry Andric     MIRBuilder.buildCopy(PhysReg, ValReg);
78195ffd83dbSDimitry Andric 
7820480093f4SDimitry Andric   MI.eraseFromParent();
7821480093f4SDimitry Andric   return Legalized;
7822480093f4SDimitry Andric }
7823e8d8bef9SDimitry Andric 
7824e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult
7825e8d8bef9SDimitry Andric LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
7826e8d8bef9SDimitry Andric   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
7827e8d8bef9SDimitry Andric   unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
7828e8d8bef9SDimitry Andric   Register Result = MI.getOperand(0).getReg();
7829e8d8bef9SDimitry Andric   LLT OrigTy = MRI.getType(Result);
7830e8d8bef9SDimitry Andric   auto SizeInBits = OrigTy.getScalarSizeInBits();
7831e8d8bef9SDimitry Andric   LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
7832e8d8bef9SDimitry Andric 
7833e8d8bef9SDimitry Andric   auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
7834e8d8bef9SDimitry Andric   auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
7835e8d8bef9SDimitry Andric   auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
7836e8d8bef9SDimitry Andric   unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
7837e8d8bef9SDimitry Andric 
7838e8d8bef9SDimitry Andric   auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
7839e8d8bef9SDimitry Andric   auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
7840e8d8bef9SDimitry Andric   MIRBuilder.buildTrunc(Result, Shifted);
7841e8d8bef9SDimitry Andric 
7842e8d8bef9SDimitry Andric   MI.eraseFromParent();
7843e8d8bef9SDimitry Andric   return Legalized;
7844e8d8bef9SDimitry Andric }
7845e8d8bef9SDimitry Andric 
7846bdd1243dSDimitry Andric LegalizerHelper::LegalizeResult
7847bdd1243dSDimitry Andric LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
784806c3fb27SDimitry Andric   auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
784906c3fb27SDimitry Andric   FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
7850bdd1243dSDimitry Andric 
785106c3fb27SDimitry Andric   if (Mask == fcNone) {
7852bdd1243dSDimitry Andric     MIRBuilder.buildConstant(DstReg, 0);
7853bdd1243dSDimitry Andric     MI.eraseFromParent();
7854bdd1243dSDimitry Andric     return Legalized;
7855bdd1243dSDimitry Andric   }
785606c3fb27SDimitry Andric   if (Mask == fcAllFlags) {
7857bdd1243dSDimitry Andric     MIRBuilder.buildConstant(DstReg, 1);
7858bdd1243dSDimitry Andric     MI.eraseFromParent();
7859bdd1243dSDimitry Andric     return Legalized;
7860bdd1243dSDimitry Andric   }
7861bdd1243dSDimitry Andric 
786206c3fb27SDimitry Andric   // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
786306c3fb27SDimitry Andric   // version
786406c3fb27SDimitry Andric 
7865bdd1243dSDimitry Andric   unsigned BitSize = SrcTy.getScalarSizeInBits();
7866bdd1243dSDimitry Andric   const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
7867bdd1243dSDimitry Andric 
7868bdd1243dSDimitry Andric   LLT IntTy = LLT::scalar(BitSize);
7869bdd1243dSDimitry Andric   if (SrcTy.isVector())
7870bdd1243dSDimitry Andric     IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
7871bdd1243dSDimitry Andric   auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
7872bdd1243dSDimitry Andric 
7873bdd1243dSDimitry Andric   // Various masks.
7874bdd1243dSDimitry Andric   APInt SignBit = APInt::getSignMask(BitSize);
7875bdd1243dSDimitry Andric   APInt ValueMask = APInt::getSignedMaxValue(BitSize);     // All bits but sign.
7876bdd1243dSDimitry Andric   APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
7877bdd1243dSDimitry Andric   APInt ExpMask = Inf;
7878bdd1243dSDimitry Andric   APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
7879bdd1243dSDimitry Andric   APInt QNaNBitMask =
7880bdd1243dSDimitry Andric       APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
788106c3fb27SDimitry Andric   APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
7882bdd1243dSDimitry Andric 
7883bdd1243dSDimitry Andric   auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
7884bdd1243dSDimitry Andric   auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
7885bdd1243dSDimitry Andric   auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
7886bdd1243dSDimitry Andric   auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
7887bdd1243dSDimitry Andric   auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
7888bdd1243dSDimitry Andric 
7889bdd1243dSDimitry Andric   auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
7890bdd1243dSDimitry Andric   auto Sign =
7891bdd1243dSDimitry Andric       MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
7892bdd1243dSDimitry Andric 
7893bdd1243dSDimitry Andric   auto Res = MIRBuilder.buildConstant(DstTy, 0);
789406c3fb27SDimitry Andric   // Clang doesn't support capture of structured bindings:
789506c3fb27SDimitry Andric   LLT DstTyCopy = DstTy;
7896bdd1243dSDimitry Andric   const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
789706c3fb27SDimitry Andric     Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
7898bdd1243dSDimitry Andric   };
7899bdd1243dSDimitry Andric 
7900bdd1243dSDimitry Andric   // Tests that involve more than one class should be processed first.
7901bdd1243dSDimitry Andric   if ((Mask & fcFinite) == fcFinite) {
7902bdd1243dSDimitry Andric     // finite(V) ==> abs(V) u< exp_mask
7903bdd1243dSDimitry Andric     appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
7904bdd1243dSDimitry Andric                                      ExpMaskC));
7905bdd1243dSDimitry Andric     Mask &= ~fcFinite;
7906bdd1243dSDimitry Andric   } else if ((Mask & fcFinite) == fcPosFinite) {
7907bdd1243dSDimitry Andric     // finite(V) && V > 0 ==> V u< exp_mask
7908bdd1243dSDimitry Andric     appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
7909bdd1243dSDimitry Andric                                      ExpMaskC));
7910bdd1243dSDimitry Andric     Mask &= ~fcPosFinite;
7911bdd1243dSDimitry Andric   } else if ((Mask & fcFinite) == fcNegFinite) {
7912bdd1243dSDimitry Andric     // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
7913bdd1243dSDimitry Andric     auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
7914bdd1243dSDimitry Andric                                     ExpMaskC);
7915bdd1243dSDimitry Andric     auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
7916bdd1243dSDimitry Andric     appendToRes(And);
7917bdd1243dSDimitry Andric     Mask &= ~fcNegFinite;
7918bdd1243dSDimitry Andric   }
7919bdd1243dSDimitry Andric 
792006c3fb27SDimitry Andric   if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
792106c3fb27SDimitry Andric     // fcZero | fcSubnormal => test all exponent bits are 0
792206c3fb27SDimitry Andric     // TODO: Handle sign bit specific cases
792306c3fb27SDimitry Andric     // TODO: Handle inverted case
792406c3fb27SDimitry Andric     if (PartialCheck == (fcZero | fcSubnormal)) {
792506c3fb27SDimitry Andric       auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
792606c3fb27SDimitry Andric       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
792706c3fb27SDimitry Andric                                        ExpBits, ZeroC));
792806c3fb27SDimitry Andric       Mask &= ~PartialCheck;
792906c3fb27SDimitry Andric     }
793006c3fb27SDimitry Andric   }
793106c3fb27SDimitry Andric 
7932bdd1243dSDimitry Andric   // Check for individual classes.
793306c3fb27SDimitry Andric   if (FPClassTest PartialCheck = Mask & fcZero) {
7934bdd1243dSDimitry Andric     if (PartialCheck == fcPosZero)
7935bdd1243dSDimitry Andric       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
7936bdd1243dSDimitry Andric                                        AsInt, ZeroC));
7937bdd1243dSDimitry Andric     else if (PartialCheck == fcZero)
7938bdd1243dSDimitry Andric       appendToRes(
7939bdd1243dSDimitry Andric           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
7940bdd1243dSDimitry Andric     else // fcNegZero
7941bdd1243dSDimitry Andric       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
7942bdd1243dSDimitry Andric                                        AsInt, SignBitC));
7943bdd1243dSDimitry Andric   }
7944bdd1243dSDimitry Andric 
794506c3fb27SDimitry Andric   if (FPClassTest PartialCheck = Mask & fcSubnormal) {
794606c3fb27SDimitry Andric     // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
794706c3fb27SDimitry Andric     // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
794806c3fb27SDimitry Andric     auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
794906c3fb27SDimitry Andric     auto OneC = MIRBuilder.buildConstant(IntTy, 1);
795006c3fb27SDimitry Andric     auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
795106c3fb27SDimitry Andric     auto SubnormalRes =
795206c3fb27SDimitry Andric         MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
795306c3fb27SDimitry Andric                              MIRBuilder.buildConstant(IntTy, AllOneMantissa));
795406c3fb27SDimitry Andric     if (PartialCheck == fcNegSubnormal)
795506c3fb27SDimitry Andric       SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
795606c3fb27SDimitry Andric     appendToRes(SubnormalRes);
795706c3fb27SDimitry Andric   }
795806c3fb27SDimitry Andric 
795906c3fb27SDimitry Andric   if (FPClassTest PartialCheck = Mask & fcInf) {
7960bdd1243dSDimitry Andric     if (PartialCheck == fcPosInf)
7961bdd1243dSDimitry Andric       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
7962bdd1243dSDimitry Andric                                        AsInt, InfC));
7963bdd1243dSDimitry Andric     else if (PartialCheck == fcInf)
7964bdd1243dSDimitry Andric       appendToRes(
7965bdd1243dSDimitry Andric           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
7966bdd1243dSDimitry Andric     else { // fcNegInf
7967bdd1243dSDimitry Andric       APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
7968bdd1243dSDimitry Andric       auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
7969bdd1243dSDimitry Andric       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
7970bdd1243dSDimitry Andric                                        AsInt, NegInfC));
7971bdd1243dSDimitry Andric     }
7972bdd1243dSDimitry Andric   }
7973bdd1243dSDimitry Andric 
797406c3fb27SDimitry Andric   if (FPClassTest PartialCheck = Mask & fcNan) {
7975bdd1243dSDimitry Andric     auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
7976bdd1243dSDimitry Andric     if (PartialCheck == fcNan) {
7977bdd1243dSDimitry Andric       // isnan(V) ==> abs(V) u> int(inf)
7978bdd1243dSDimitry Andric       appendToRes(
7979bdd1243dSDimitry Andric           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
7980bdd1243dSDimitry Andric     } else if (PartialCheck == fcQNan) {
7981bdd1243dSDimitry Andric       // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
7982bdd1243dSDimitry Andric       appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
7983bdd1243dSDimitry Andric                                        InfWithQnanBitC));
7984bdd1243dSDimitry Andric     } else { // fcSNan
7985bdd1243dSDimitry Andric       // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
7986bdd1243dSDimitry Andric       //                    abs(V) u< (unsigned(Inf) | quiet_bit)
7987bdd1243dSDimitry Andric       auto IsNan =
7988bdd1243dSDimitry Andric           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
7989bdd1243dSDimitry Andric       auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
7990bdd1243dSDimitry Andric                                             Abs, InfWithQnanBitC);
7991bdd1243dSDimitry Andric       appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
7992bdd1243dSDimitry Andric     }
7993bdd1243dSDimitry Andric   }
7994bdd1243dSDimitry Andric 
799506c3fb27SDimitry Andric   if (FPClassTest PartialCheck = Mask & fcNormal) {
7996bdd1243dSDimitry Andric     // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
7997bdd1243dSDimitry Andric     // (max_exp-1))
7998bdd1243dSDimitry Andric     APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
7999bdd1243dSDimitry Andric     auto ExpMinusOne = MIRBuilder.buildSub(
8000bdd1243dSDimitry Andric         IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
8001bdd1243dSDimitry Andric     APInt MaxExpMinusOne = ExpMask - ExpLSB;
8002bdd1243dSDimitry Andric     auto NormalRes =
8003bdd1243dSDimitry Andric         MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
8004bdd1243dSDimitry Andric                              MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
8005bdd1243dSDimitry Andric     if (PartialCheck == fcNegNormal)
8006bdd1243dSDimitry Andric       NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
8007bdd1243dSDimitry Andric     else if (PartialCheck == fcPosNormal) {
8008bdd1243dSDimitry Andric       auto PosSign = MIRBuilder.buildXor(
8009bdd1243dSDimitry Andric           DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
8010bdd1243dSDimitry Andric       NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
8011bdd1243dSDimitry Andric     }
8012bdd1243dSDimitry Andric     appendToRes(NormalRes);
8013bdd1243dSDimitry Andric   }
8014bdd1243dSDimitry Andric 
8015bdd1243dSDimitry Andric   MIRBuilder.buildCopy(DstReg, Res);
8016bdd1243dSDimitry Andric   MI.eraseFromParent();
8017bdd1243dSDimitry Andric   return Legalized;
8018bdd1243dSDimitry Andric }
8019bdd1243dSDimitry Andric 
8020e8d8bef9SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
8021e8d8bef9SDimitry Andric   // Implement vector G_SELECT in terms of XOR, AND, OR.
802206c3fb27SDimitry Andric   auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
802306c3fb27SDimitry Andric       MI.getFirst4RegLLTs();
8024e8d8bef9SDimitry Andric   if (!DstTy.isVector())
8025e8d8bef9SDimitry Andric     return UnableToLegalize;
8026e8d8bef9SDimitry Andric 
8027bdd1243dSDimitry Andric   bool IsEltPtr = DstTy.getElementType().isPointer();
8028bdd1243dSDimitry Andric   if (IsEltPtr) {
8029bdd1243dSDimitry Andric     LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
8030bdd1243dSDimitry Andric     LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
8031bdd1243dSDimitry Andric     Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
8032bdd1243dSDimitry Andric     Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
8033bdd1243dSDimitry Andric     DstTy = NewTy;
8034bdd1243dSDimitry Andric   }
8035bdd1243dSDimitry Andric 
8036e8d8bef9SDimitry Andric   if (MaskTy.isScalar()) {
803781ad6265SDimitry Andric     // Turn the scalar condition into a vector condition mask.
803881ad6265SDimitry Andric 
8039e8d8bef9SDimitry Andric     Register MaskElt = MaskReg;
804081ad6265SDimitry Andric 
804181ad6265SDimitry Andric     // The condition was potentially zero extended before, but we want a sign
804281ad6265SDimitry Andric     // extended boolean.
8043bdd1243dSDimitry Andric     if (MaskTy != LLT::scalar(1))
804481ad6265SDimitry Andric       MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
8045e8d8bef9SDimitry Andric 
804681ad6265SDimitry Andric     // Continue the sign extension (or truncate) to match the data type.
804781ad6265SDimitry Andric     MaskElt = MIRBuilder.buildSExtOrTrunc(DstTy.getElementType(),
804881ad6265SDimitry Andric                                           MaskElt).getReg(0);
804981ad6265SDimitry Andric 
805081ad6265SDimitry Andric     // Generate a vector splat idiom.
805181ad6265SDimitry Andric     auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
805281ad6265SDimitry Andric     MaskReg = ShufSplat.getReg(0);
805381ad6265SDimitry Andric     MaskTy = DstTy;
805481ad6265SDimitry Andric   }
805581ad6265SDimitry Andric 
805681ad6265SDimitry Andric   if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
8057e8d8bef9SDimitry Andric     return UnableToLegalize;
8058e8d8bef9SDimitry Andric   }
8059e8d8bef9SDimitry Andric 
8060e8d8bef9SDimitry Andric   auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
8061e8d8bef9SDimitry Andric   auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
8062e8d8bef9SDimitry Andric   auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
8063bdd1243dSDimitry Andric   if (IsEltPtr) {
8064bdd1243dSDimitry Andric     auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
8065bdd1243dSDimitry Andric     MIRBuilder.buildIntToPtr(DstReg, Or);
8066bdd1243dSDimitry Andric   } else {
8067e8d8bef9SDimitry Andric     MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
8068bdd1243dSDimitry Andric   }
8069e8d8bef9SDimitry Andric   MI.eraseFromParent();
8070e8d8bef9SDimitry Andric   return Legalized;
8071e8d8bef9SDimitry Andric }
8072fe6060f1SDimitry Andric 
8073fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) {
8074fe6060f1SDimitry Andric   // Split DIVREM into individual instructions.
8075fe6060f1SDimitry Andric   unsigned Opcode = MI.getOpcode();
8076fe6060f1SDimitry Andric 
8077fe6060f1SDimitry Andric   MIRBuilder.buildInstr(
8078fe6060f1SDimitry Andric       Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
8079fe6060f1SDimitry Andric                                         : TargetOpcode::G_UDIV,
8080fe6060f1SDimitry Andric       {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
8081fe6060f1SDimitry Andric   MIRBuilder.buildInstr(
8082fe6060f1SDimitry Andric       Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
8083fe6060f1SDimitry Andric                                         : TargetOpcode::G_UREM,
8084fe6060f1SDimitry Andric       {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
8085fe6060f1SDimitry Andric   MI.eraseFromParent();
8086fe6060f1SDimitry Andric   return Legalized;
8087fe6060f1SDimitry Andric }
8088fe6060f1SDimitry Andric 
8089fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
8090fe6060f1SDimitry Andric LegalizerHelper::lowerAbsToAddXor(MachineInstr &MI) {
8091fe6060f1SDimitry Andric   // Expand %res = G_ABS %a into:
8092fe6060f1SDimitry Andric   // %v1 = G_ASHR %a, scalar_size-1
8093fe6060f1SDimitry Andric   // %v2 = G_ADD %a, %v1
8094fe6060f1SDimitry Andric   // %res = G_XOR %v2, %v1
8095fe6060f1SDimitry Andric   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
8096fe6060f1SDimitry Andric   Register OpReg = MI.getOperand(1).getReg();
8097fe6060f1SDimitry Andric   auto ShiftAmt =
8098fe6060f1SDimitry Andric       MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
8099fe6060f1SDimitry Andric   auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
8100fe6060f1SDimitry Andric   auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
8101fe6060f1SDimitry Andric   MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
8102fe6060f1SDimitry Andric   MI.eraseFromParent();
8103fe6060f1SDimitry Andric   return Legalized;
8104fe6060f1SDimitry Andric }
8105fe6060f1SDimitry Andric 
8106fe6060f1SDimitry Andric LegalizerHelper::LegalizeResult
8107fe6060f1SDimitry Andric LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
8108fe6060f1SDimitry Andric   // Expand %res = G_ABS %a into:
8109fe6060f1SDimitry Andric   // %v1 = G_CONSTANT 0
8110fe6060f1SDimitry Andric   // %v2 = G_SUB %v1, %a
8111fe6060f1SDimitry Andric   // %res = G_SMAX %a, %v2
8112fe6060f1SDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
8113fe6060f1SDimitry Andric   LLT Ty = MRI.getType(SrcReg);
8114fe6060f1SDimitry Andric   auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
8115fe6060f1SDimitry Andric   auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
8116fe6060f1SDimitry Andric   MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
8117fe6060f1SDimitry Andric   MI.eraseFromParent();
8118fe6060f1SDimitry Andric   return Legalized;
8119fe6060f1SDimitry Andric }
8120349cc55cSDimitry Andric 
8121349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8122349cc55cSDimitry Andric LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
8123349cc55cSDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
8124349cc55cSDimitry Andric   LLT SrcTy = MRI.getType(SrcReg);
8125349cc55cSDimitry Andric   LLT DstTy = MRI.getType(SrcReg);
8126349cc55cSDimitry Andric 
8127349cc55cSDimitry Andric   // The source could be a scalar if the IR type was <1 x sN>.
8128349cc55cSDimitry Andric   if (SrcTy.isScalar()) {
8129349cc55cSDimitry Andric     if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
8130349cc55cSDimitry Andric       return UnableToLegalize; // FIXME: handle extension.
8131349cc55cSDimitry Andric     // This can be just a plain copy.
8132349cc55cSDimitry Andric     Observer.changingInstr(MI);
8133349cc55cSDimitry Andric     MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
8134349cc55cSDimitry Andric     Observer.changedInstr(MI);
8135349cc55cSDimitry Andric     return Legalized;
8136349cc55cSDimitry Andric   }
813706c3fb27SDimitry Andric   return UnableToLegalize;
8138349cc55cSDimitry Andric }
8139349cc55cSDimitry Andric 
81405f757f3fSDimitry Andric static Type *getTypeForLLT(LLT Ty, LLVMContext &C);
81415f757f3fSDimitry Andric 
81425f757f3fSDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::lowerVAArg(MachineInstr &MI) {
81435f757f3fSDimitry Andric   MachineFunction &MF = *MI.getMF();
81445f757f3fSDimitry Andric   const DataLayout &DL = MIRBuilder.getDataLayout();
81455f757f3fSDimitry Andric   LLVMContext &Ctx = MF.getFunction().getContext();
81465f757f3fSDimitry Andric   Register ListPtr = MI.getOperand(1).getReg();
81475f757f3fSDimitry Andric   LLT PtrTy = MRI.getType(ListPtr);
81485f757f3fSDimitry Andric 
81495f757f3fSDimitry Andric   // LstPtr is a pointer to the head of the list. Get the address
81505f757f3fSDimitry Andric   // of the head of the list.
81515f757f3fSDimitry Andric   Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
81525f757f3fSDimitry Andric   MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
81535f757f3fSDimitry Andric       MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
81545f757f3fSDimitry Andric   auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
81555f757f3fSDimitry Andric 
81565f757f3fSDimitry Andric   const Align A(MI.getOperand(2).getImm());
81575f757f3fSDimitry Andric   LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
81585f757f3fSDimitry Andric   if (A > TLI.getMinStackArgumentAlignment()) {
81595f757f3fSDimitry Andric     Register AlignAmt =
81605f757f3fSDimitry Andric         MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
81615f757f3fSDimitry Andric     auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
81625f757f3fSDimitry Andric     auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
81635f757f3fSDimitry Andric     VAList = AndDst.getReg(0);
81645f757f3fSDimitry Andric   }
81655f757f3fSDimitry Andric 
81665f757f3fSDimitry Andric   // Increment the pointer, VAList, to the next vaarg
81675f757f3fSDimitry Andric   // The list should be bumped by the size of element in the current head of
81685f757f3fSDimitry Andric   // list.
81695f757f3fSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
81705f757f3fSDimitry Andric   LLT LLTTy = MRI.getType(Dst);
81715f757f3fSDimitry Andric   Type *Ty = getTypeForLLT(LLTTy, Ctx);
81725f757f3fSDimitry Andric   auto IncAmt =
81735f757f3fSDimitry Andric       MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
81745f757f3fSDimitry Andric   auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
81755f757f3fSDimitry Andric 
81765f757f3fSDimitry Andric   // Store the increment VAList to the legalized pointer
81775f757f3fSDimitry Andric   MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
81785f757f3fSDimitry Andric       MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
81795f757f3fSDimitry Andric   MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
81805f757f3fSDimitry Andric   // Load the actual argument out of the pointer VAList
81815f757f3fSDimitry Andric   Align EltAlignment = DL.getABITypeAlign(Ty);
81825f757f3fSDimitry Andric   MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
81835f757f3fSDimitry Andric       MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
81845f757f3fSDimitry Andric   MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
81855f757f3fSDimitry Andric 
81865f757f3fSDimitry Andric   MI.eraseFromParent();
81875f757f3fSDimitry Andric   return Legalized;
81885f757f3fSDimitry Andric }
81895f757f3fSDimitry Andric 
8190349cc55cSDimitry Andric static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
8191349cc55cSDimitry Andric   // On Darwin, -Os means optimize for size without hurting performance, so
8192349cc55cSDimitry Andric   // only really optimize for size when -Oz (MinSize) is used.
8193349cc55cSDimitry Andric   if (MF.getTarget().getTargetTriple().isOSDarwin())
8194349cc55cSDimitry Andric     return MF.getFunction().hasMinSize();
8195349cc55cSDimitry Andric   return MF.getFunction().hasOptSize();
8196349cc55cSDimitry Andric }
8197349cc55cSDimitry Andric 
8198349cc55cSDimitry Andric // Returns a list of types to use for memory op lowering in MemOps. A partial
8199349cc55cSDimitry Andric // port of findOptimalMemOpLowering in TargetLowering.
8200349cc55cSDimitry Andric static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
8201349cc55cSDimitry Andric                                           unsigned Limit, const MemOp &Op,
8202349cc55cSDimitry Andric                                           unsigned DstAS, unsigned SrcAS,
8203349cc55cSDimitry Andric                                           const AttributeList &FuncAttributes,
8204349cc55cSDimitry Andric                                           const TargetLowering &TLI) {
8205349cc55cSDimitry Andric   if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
8206349cc55cSDimitry Andric     return false;
8207349cc55cSDimitry Andric 
8208349cc55cSDimitry Andric   LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
8209349cc55cSDimitry Andric 
8210349cc55cSDimitry Andric   if (Ty == LLT()) {
8211349cc55cSDimitry Andric     // Use the largest scalar type whose alignment constraints are satisfied.
8212349cc55cSDimitry Andric     // We only need to check DstAlign here as SrcAlign is always greater or
8213349cc55cSDimitry Andric     // equal to DstAlign (or zero).
8214349cc55cSDimitry Andric     Ty = LLT::scalar(64);
8215349cc55cSDimitry Andric     if (Op.isFixedDstAlign())
8216349cc55cSDimitry Andric       while (Op.getDstAlign() < Ty.getSizeInBytes() &&
8217349cc55cSDimitry Andric              !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
8218349cc55cSDimitry Andric         Ty = LLT::scalar(Ty.getSizeInBytes());
8219349cc55cSDimitry Andric     assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
8220349cc55cSDimitry Andric     // FIXME: check for the largest legal type we can load/store to.
8221349cc55cSDimitry Andric   }
8222349cc55cSDimitry Andric 
8223349cc55cSDimitry Andric   unsigned NumMemOps = 0;
8224349cc55cSDimitry Andric   uint64_t Size = Op.size();
8225349cc55cSDimitry Andric   while (Size) {
8226349cc55cSDimitry Andric     unsigned TySize = Ty.getSizeInBytes();
8227349cc55cSDimitry Andric     while (TySize > Size) {
8228349cc55cSDimitry Andric       // For now, only use non-vector load / store's for the left-over pieces.
8229349cc55cSDimitry Andric       LLT NewTy = Ty;
8230349cc55cSDimitry Andric       // FIXME: check for mem op safety and legality of the types. Not all of
8231349cc55cSDimitry Andric       // SDAGisms map cleanly to GISel concepts.
8232349cc55cSDimitry Andric       if (NewTy.isVector())
8233349cc55cSDimitry Andric         NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
823406c3fb27SDimitry Andric       NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
8235349cc55cSDimitry Andric       unsigned NewTySize = NewTy.getSizeInBytes();
8236349cc55cSDimitry Andric       assert(NewTySize > 0 && "Could not find appropriate type");
8237349cc55cSDimitry Andric 
8238349cc55cSDimitry Andric       // If the new LLT cannot cover all of the remaining bits, then consider
8239349cc55cSDimitry Andric       // issuing a (or a pair of) unaligned and overlapping load / store.
8240bdd1243dSDimitry Andric       unsigned Fast;
8241349cc55cSDimitry Andric       // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
8242349cc55cSDimitry Andric       MVT VT = getMVTForLLT(Ty);
8243349cc55cSDimitry Andric       if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
8244349cc55cSDimitry Andric           TLI.allowsMisalignedMemoryAccesses(
8245349cc55cSDimitry Andric               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
8246349cc55cSDimitry Andric               MachineMemOperand::MONone, &Fast) &&
8247349cc55cSDimitry Andric           Fast)
8248349cc55cSDimitry Andric         TySize = Size;
8249349cc55cSDimitry Andric       else {
8250349cc55cSDimitry Andric         Ty = NewTy;
8251349cc55cSDimitry Andric         TySize = NewTySize;
8252349cc55cSDimitry Andric       }
8253349cc55cSDimitry Andric     }
8254349cc55cSDimitry Andric 
8255349cc55cSDimitry Andric     if (++NumMemOps > Limit)
8256349cc55cSDimitry Andric       return false;
8257349cc55cSDimitry Andric 
8258349cc55cSDimitry Andric     MemOps.push_back(Ty);
8259349cc55cSDimitry Andric     Size -= TySize;
8260349cc55cSDimitry Andric   }
8261349cc55cSDimitry Andric 
8262349cc55cSDimitry Andric   return true;
8263349cc55cSDimitry Andric }
8264349cc55cSDimitry Andric 
8265349cc55cSDimitry Andric static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
8266349cc55cSDimitry Andric   if (Ty.isVector())
8267349cc55cSDimitry Andric     return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
8268349cc55cSDimitry Andric                                 Ty.getNumElements());
8269349cc55cSDimitry Andric   return IntegerType::get(C, Ty.getSizeInBits());
8270349cc55cSDimitry Andric }
8271349cc55cSDimitry Andric 
8272349cc55cSDimitry Andric // Get a vectorized representation of the memset value operand, GISel edition.
8273349cc55cSDimitry Andric static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
8274349cc55cSDimitry Andric   MachineRegisterInfo &MRI = *MIB.getMRI();
8275349cc55cSDimitry Andric   unsigned NumBits = Ty.getScalarSizeInBits();
8276349cc55cSDimitry Andric   auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8277349cc55cSDimitry Andric   if (!Ty.isVector() && ValVRegAndVal) {
827881ad6265SDimitry Andric     APInt Scalar = ValVRegAndVal->Value.trunc(8);
8279349cc55cSDimitry Andric     APInt SplatVal = APInt::getSplat(NumBits, Scalar);
8280349cc55cSDimitry Andric     return MIB.buildConstant(Ty, SplatVal).getReg(0);
8281349cc55cSDimitry Andric   }
8282349cc55cSDimitry Andric 
8283349cc55cSDimitry Andric   // Extend the byte value to the larger type, and then multiply by a magic
8284349cc55cSDimitry Andric   // value 0x010101... in order to replicate it across every byte.
8285349cc55cSDimitry Andric   // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
8286349cc55cSDimitry Andric   if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
8287349cc55cSDimitry Andric     return MIB.buildConstant(Ty, 0).getReg(0);
8288349cc55cSDimitry Andric   }
8289349cc55cSDimitry Andric 
8290349cc55cSDimitry Andric   LLT ExtType = Ty.getScalarType();
8291349cc55cSDimitry Andric   auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
8292349cc55cSDimitry Andric   if (NumBits > 8) {
8293349cc55cSDimitry Andric     APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
8294349cc55cSDimitry Andric     auto MagicMI = MIB.buildConstant(ExtType, Magic);
8295349cc55cSDimitry Andric     Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
8296349cc55cSDimitry Andric   }
8297349cc55cSDimitry Andric 
8298349cc55cSDimitry Andric   // For vector types create a G_BUILD_VECTOR.
8299349cc55cSDimitry Andric   if (Ty.isVector())
8300349cc55cSDimitry Andric     Val = MIB.buildSplatVector(Ty, Val).getReg(0);
8301349cc55cSDimitry Andric 
8302349cc55cSDimitry Andric   return Val;
8303349cc55cSDimitry Andric }
8304349cc55cSDimitry Andric 
8305349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8306349cc55cSDimitry Andric LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
8307349cc55cSDimitry Andric                              uint64_t KnownLen, Align Alignment,
8308349cc55cSDimitry Andric                              bool IsVolatile) {
8309349cc55cSDimitry Andric   auto &MF = *MI.getParent()->getParent();
8310349cc55cSDimitry Andric   const auto &TLI = *MF.getSubtarget().getTargetLowering();
8311349cc55cSDimitry Andric   auto &DL = MF.getDataLayout();
8312349cc55cSDimitry Andric   LLVMContext &C = MF.getFunction().getContext();
8313349cc55cSDimitry Andric 
8314349cc55cSDimitry Andric   assert(KnownLen != 0 && "Have a zero length memset length!");
8315349cc55cSDimitry Andric 
8316349cc55cSDimitry Andric   bool DstAlignCanChange = false;
8317349cc55cSDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
8318349cc55cSDimitry Andric   bool OptSize = shouldLowerMemFuncForSize(MF);
8319349cc55cSDimitry Andric 
8320349cc55cSDimitry Andric   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8321349cc55cSDimitry Andric   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8322349cc55cSDimitry Andric     DstAlignCanChange = true;
8323349cc55cSDimitry Andric 
8324349cc55cSDimitry Andric   unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
8325349cc55cSDimitry Andric   std::vector<LLT> MemOps;
8326349cc55cSDimitry Andric 
8327349cc55cSDimitry Andric   const auto &DstMMO = **MI.memoperands_begin();
8328349cc55cSDimitry Andric   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8329349cc55cSDimitry Andric 
8330349cc55cSDimitry Andric   auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8331349cc55cSDimitry Andric   bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
8332349cc55cSDimitry Andric 
8333349cc55cSDimitry Andric   if (!findGISelOptimalMemOpLowering(MemOps, Limit,
8334349cc55cSDimitry Andric                                      MemOp::Set(KnownLen, DstAlignCanChange,
8335349cc55cSDimitry Andric                                                 Alignment,
8336349cc55cSDimitry Andric                                                 /*IsZeroMemset=*/IsZeroVal,
8337349cc55cSDimitry Andric                                                 /*IsVolatile=*/IsVolatile),
8338349cc55cSDimitry Andric                                      DstPtrInfo.getAddrSpace(), ~0u,
8339349cc55cSDimitry Andric                                      MF.getFunction().getAttributes(), TLI))
8340349cc55cSDimitry Andric     return UnableToLegalize;
8341349cc55cSDimitry Andric 
8342349cc55cSDimitry Andric   if (DstAlignCanChange) {
8343349cc55cSDimitry Andric     // Get an estimate of the type from the LLT.
8344349cc55cSDimitry Andric     Type *IRTy = getTypeForLLT(MemOps[0], C);
8345349cc55cSDimitry Andric     Align NewAlign = DL.getABITypeAlign(IRTy);
8346349cc55cSDimitry Andric     if (NewAlign > Alignment) {
8347349cc55cSDimitry Andric       Alignment = NewAlign;
8348349cc55cSDimitry Andric       unsigned FI = FIDef->getOperand(1).getIndex();
8349349cc55cSDimitry Andric       // Give the stack frame object a larger alignment if needed.
8350349cc55cSDimitry Andric       if (MFI.getObjectAlign(FI) < Alignment)
8351349cc55cSDimitry Andric         MFI.setObjectAlignment(FI, Alignment);
8352349cc55cSDimitry Andric     }
8353349cc55cSDimitry Andric   }
8354349cc55cSDimitry Andric 
8355349cc55cSDimitry Andric   MachineIRBuilder MIB(MI);
8356349cc55cSDimitry Andric   // Find the largest store and generate the bit pattern for it.
8357349cc55cSDimitry Andric   LLT LargestTy = MemOps[0];
8358349cc55cSDimitry Andric   for (unsigned i = 1; i < MemOps.size(); i++)
8359349cc55cSDimitry Andric     if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
8360349cc55cSDimitry Andric       LargestTy = MemOps[i];
8361349cc55cSDimitry Andric 
8362349cc55cSDimitry Andric   // The memset stored value is always defined as an s8, so in order to make it
8363349cc55cSDimitry Andric   // work with larger store types we need to repeat the bit pattern across the
8364349cc55cSDimitry Andric   // wider type.
8365349cc55cSDimitry Andric   Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
8366349cc55cSDimitry Andric 
8367349cc55cSDimitry Andric   if (!MemSetValue)
8368349cc55cSDimitry Andric     return UnableToLegalize;
8369349cc55cSDimitry Andric 
8370349cc55cSDimitry Andric   // Generate the stores. For each store type in the list, we generate the
8371349cc55cSDimitry Andric   // matching store of that type to the destination address.
8372349cc55cSDimitry Andric   LLT PtrTy = MRI.getType(Dst);
8373349cc55cSDimitry Andric   unsigned DstOff = 0;
8374349cc55cSDimitry Andric   unsigned Size = KnownLen;
8375349cc55cSDimitry Andric   for (unsigned I = 0; I < MemOps.size(); I++) {
8376349cc55cSDimitry Andric     LLT Ty = MemOps[I];
8377349cc55cSDimitry Andric     unsigned TySize = Ty.getSizeInBytes();
8378349cc55cSDimitry Andric     if (TySize > Size) {
8379349cc55cSDimitry Andric       // Issuing an unaligned load / store pair that overlaps with the previous
8380349cc55cSDimitry Andric       // pair. Adjust the offset accordingly.
8381349cc55cSDimitry Andric       assert(I == MemOps.size() - 1 && I != 0);
8382349cc55cSDimitry Andric       DstOff -= TySize - Size;
8383349cc55cSDimitry Andric     }
8384349cc55cSDimitry Andric 
8385349cc55cSDimitry Andric     // If this store is smaller than the largest store see whether we can get
8386349cc55cSDimitry Andric     // the smaller value for free with a truncate.
8387349cc55cSDimitry Andric     Register Value = MemSetValue;
8388349cc55cSDimitry Andric     if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
8389349cc55cSDimitry Andric       MVT VT = getMVTForLLT(Ty);
8390349cc55cSDimitry Andric       MVT LargestVT = getMVTForLLT(LargestTy);
8391349cc55cSDimitry Andric       if (!LargestTy.isVector() && !Ty.isVector() &&
8392349cc55cSDimitry Andric           TLI.isTruncateFree(LargestVT, VT))
8393349cc55cSDimitry Andric         Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
8394349cc55cSDimitry Andric       else
8395349cc55cSDimitry Andric         Value = getMemsetValue(Val, Ty, MIB);
8396349cc55cSDimitry Andric       if (!Value)
8397349cc55cSDimitry Andric         return UnableToLegalize;
8398349cc55cSDimitry Andric     }
8399349cc55cSDimitry Andric 
8400349cc55cSDimitry Andric     auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
8401349cc55cSDimitry Andric 
8402349cc55cSDimitry Andric     Register Ptr = Dst;
8403349cc55cSDimitry Andric     if (DstOff != 0) {
8404349cc55cSDimitry Andric       auto Offset =
8405349cc55cSDimitry Andric           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
8406349cc55cSDimitry Andric       Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
8407349cc55cSDimitry Andric     }
8408349cc55cSDimitry Andric 
8409349cc55cSDimitry Andric     MIB.buildStore(Value, Ptr, *StoreMMO);
8410349cc55cSDimitry Andric     DstOff += Ty.getSizeInBytes();
8411349cc55cSDimitry Andric     Size -= TySize;
8412349cc55cSDimitry Andric   }
8413349cc55cSDimitry Andric 
8414349cc55cSDimitry Andric   MI.eraseFromParent();
8415349cc55cSDimitry Andric   return Legalized;
8416349cc55cSDimitry Andric }
8417349cc55cSDimitry Andric 
8418349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8419349cc55cSDimitry Andric LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
8420349cc55cSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8421349cc55cSDimitry Andric 
842206c3fb27SDimitry Andric   auto [Dst, Src, Len] = MI.getFirst3Regs();
8423349cc55cSDimitry Andric 
8424349cc55cSDimitry Andric   const auto *MMOIt = MI.memoperands_begin();
8425349cc55cSDimitry Andric   const MachineMemOperand *MemOp = *MMOIt;
8426349cc55cSDimitry Andric   bool IsVolatile = MemOp->isVolatile();
8427349cc55cSDimitry Andric 
8428349cc55cSDimitry Andric   // See if this is a constant length copy
8429349cc55cSDimitry Andric   auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
8430349cc55cSDimitry Andric   // FIXME: support dynamically sized G_MEMCPY_INLINE
843181ad6265SDimitry Andric   assert(LenVRegAndVal &&
8432349cc55cSDimitry Andric          "inline memcpy with dynamic size is not yet supported");
8433349cc55cSDimitry Andric   uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8434349cc55cSDimitry Andric   if (KnownLen == 0) {
8435349cc55cSDimitry Andric     MI.eraseFromParent();
8436349cc55cSDimitry Andric     return Legalized;
8437349cc55cSDimitry Andric   }
8438349cc55cSDimitry Andric 
8439349cc55cSDimitry Andric   const auto &DstMMO = **MI.memoperands_begin();
8440349cc55cSDimitry Andric   const auto &SrcMMO = **std::next(MI.memoperands_begin());
8441349cc55cSDimitry Andric   Align DstAlign = DstMMO.getBaseAlign();
8442349cc55cSDimitry Andric   Align SrcAlign = SrcMMO.getBaseAlign();
8443349cc55cSDimitry Andric 
8444349cc55cSDimitry Andric   return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8445349cc55cSDimitry Andric                            IsVolatile);
8446349cc55cSDimitry Andric }
8447349cc55cSDimitry Andric 
8448349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8449349cc55cSDimitry Andric LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
8450349cc55cSDimitry Andric                                    uint64_t KnownLen, Align DstAlign,
8451349cc55cSDimitry Andric                                    Align SrcAlign, bool IsVolatile) {
8452349cc55cSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8453349cc55cSDimitry Andric   return lowerMemcpy(MI, Dst, Src, KnownLen,
8454349cc55cSDimitry Andric                      std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
8455349cc55cSDimitry Andric                      IsVolatile);
8456349cc55cSDimitry Andric }
8457349cc55cSDimitry Andric 
8458349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8459349cc55cSDimitry Andric LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
8460349cc55cSDimitry Andric                              uint64_t KnownLen, uint64_t Limit, Align DstAlign,
8461349cc55cSDimitry Andric                              Align SrcAlign, bool IsVolatile) {
8462349cc55cSDimitry Andric   auto &MF = *MI.getParent()->getParent();
8463349cc55cSDimitry Andric   const auto &TLI = *MF.getSubtarget().getTargetLowering();
8464349cc55cSDimitry Andric   auto &DL = MF.getDataLayout();
8465349cc55cSDimitry Andric   LLVMContext &C = MF.getFunction().getContext();
8466349cc55cSDimitry Andric 
8467349cc55cSDimitry Andric   assert(KnownLen != 0 && "Have a zero length memcpy length!");
8468349cc55cSDimitry Andric 
8469349cc55cSDimitry Andric   bool DstAlignCanChange = false;
8470349cc55cSDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
847181ad6265SDimitry Andric   Align Alignment = std::min(DstAlign, SrcAlign);
8472349cc55cSDimitry Andric 
8473349cc55cSDimitry Andric   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8474349cc55cSDimitry Andric   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8475349cc55cSDimitry Andric     DstAlignCanChange = true;
8476349cc55cSDimitry Andric 
8477349cc55cSDimitry Andric   // FIXME: infer better src pointer alignment like SelectionDAG does here.
8478349cc55cSDimitry Andric   // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
8479349cc55cSDimitry Andric   // if the memcpy is in a tail call position.
8480349cc55cSDimitry Andric 
8481349cc55cSDimitry Andric   std::vector<LLT> MemOps;
8482349cc55cSDimitry Andric 
8483349cc55cSDimitry Andric   const auto &DstMMO = **MI.memoperands_begin();
8484349cc55cSDimitry Andric   const auto &SrcMMO = **std::next(MI.memoperands_begin());
8485349cc55cSDimitry Andric   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8486349cc55cSDimitry Andric   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
8487349cc55cSDimitry Andric 
8488349cc55cSDimitry Andric   if (!findGISelOptimalMemOpLowering(
8489349cc55cSDimitry Andric           MemOps, Limit,
8490349cc55cSDimitry Andric           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8491349cc55cSDimitry Andric                       IsVolatile),
8492349cc55cSDimitry Andric           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
8493349cc55cSDimitry Andric           MF.getFunction().getAttributes(), TLI))
8494349cc55cSDimitry Andric     return UnableToLegalize;
8495349cc55cSDimitry Andric 
8496349cc55cSDimitry Andric   if (DstAlignCanChange) {
8497349cc55cSDimitry Andric     // Get an estimate of the type from the LLT.
8498349cc55cSDimitry Andric     Type *IRTy = getTypeForLLT(MemOps[0], C);
8499349cc55cSDimitry Andric     Align NewAlign = DL.getABITypeAlign(IRTy);
8500349cc55cSDimitry Andric 
8501349cc55cSDimitry Andric     // Don't promote to an alignment that would require dynamic stack
8502349cc55cSDimitry Andric     // realignment.
8503349cc55cSDimitry Andric     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8504349cc55cSDimitry Andric     if (!TRI->hasStackRealignment(MF))
8505349cc55cSDimitry Andric       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
850681ad6265SDimitry Andric         NewAlign = NewAlign.previous();
8507349cc55cSDimitry Andric 
8508349cc55cSDimitry Andric     if (NewAlign > Alignment) {
8509349cc55cSDimitry Andric       Alignment = NewAlign;
8510349cc55cSDimitry Andric       unsigned FI = FIDef->getOperand(1).getIndex();
8511349cc55cSDimitry Andric       // Give the stack frame object a larger alignment if needed.
8512349cc55cSDimitry Andric       if (MFI.getObjectAlign(FI) < Alignment)
8513349cc55cSDimitry Andric         MFI.setObjectAlignment(FI, Alignment);
8514349cc55cSDimitry Andric     }
8515349cc55cSDimitry Andric   }
8516349cc55cSDimitry Andric 
8517349cc55cSDimitry Andric   LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
8518349cc55cSDimitry Andric 
8519349cc55cSDimitry Andric   MachineIRBuilder MIB(MI);
8520349cc55cSDimitry Andric   // Now we need to emit a pair of load and stores for each of the types we've
8521349cc55cSDimitry Andric   // collected. I.e. for each type, generate a load from the source pointer of
8522349cc55cSDimitry Andric   // that type width, and then generate a corresponding store to the dest buffer
8523349cc55cSDimitry Andric   // of that value loaded. This can result in a sequence of loads and stores
8524349cc55cSDimitry Andric   // mixed types, depending on what the target specifies as good types to use.
8525349cc55cSDimitry Andric   unsigned CurrOffset = 0;
8526349cc55cSDimitry Andric   unsigned Size = KnownLen;
8527349cc55cSDimitry Andric   for (auto CopyTy : MemOps) {
8528349cc55cSDimitry Andric     // Issuing an unaligned load / store pair  that overlaps with the previous
8529349cc55cSDimitry Andric     // pair. Adjust the offset accordingly.
8530349cc55cSDimitry Andric     if (CopyTy.getSizeInBytes() > Size)
8531349cc55cSDimitry Andric       CurrOffset -= CopyTy.getSizeInBytes() - Size;
8532349cc55cSDimitry Andric 
8533349cc55cSDimitry Andric     // Construct MMOs for the accesses.
8534349cc55cSDimitry Andric     auto *LoadMMO =
8535349cc55cSDimitry Andric         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
8536349cc55cSDimitry Andric     auto *StoreMMO =
8537349cc55cSDimitry Andric         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
8538349cc55cSDimitry Andric 
8539349cc55cSDimitry Andric     // Create the load.
8540349cc55cSDimitry Andric     Register LoadPtr = Src;
8541349cc55cSDimitry Andric     Register Offset;
8542349cc55cSDimitry Andric     if (CurrOffset != 0) {
85434824e7fdSDimitry Andric       LLT SrcTy = MRI.getType(Src);
85444824e7fdSDimitry Andric       Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
8545349cc55cSDimitry Andric                    .getReg(0);
85464824e7fdSDimitry Andric       LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
8547349cc55cSDimitry Andric     }
8548349cc55cSDimitry Andric     auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
8549349cc55cSDimitry Andric 
8550349cc55cSDimitry Andric     // Create the store.
85514824e7fdSDimitry Andric     Register StorePtr = Dst;
85524824e7fdSDimitry Andric     if (CurrOffset != 0) {
85534824e7fdSDimitry Andric       LLT DstTy = MRI.getType(Dst);
85544824e7fdSDimitry Andric       StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
85554824e7fdSDimitry Andric     }
8556349cc55cSDimitry Andric     MIB.buildStore(LdVal, StorePtr, *StoreMMO);
8557349cc55cSDimitry Andric     CurrOffset += CopyTy.getSizeInBytes();
8558349cc55cSDimitry Andric     Size -= CopyTy.getSizeInBytes();
8559349cc55cSDimitry Andric   }
8560349cc55cSDimitry Andric 
8561349cc55cSDimitry Andric   MI.eraseFromParent();
8562349cc55cSDimitry Andric   return Legalized;
8563349cc55cSDimitry Andric }
8564349cc55cSDimitry Andric 
8565349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8566349cc55cSDimitry Andric LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
8567349cc55cSDimitry Andric                               uint64_t KnownLen, Align DstAlign, Align SrcAlign,
8568349cc55cSDimitry Andric                               bool IsVolatile) {
8569349cc55cSDimitry Andric   auto &MF = *MI.getParent()->getParent();
8570349cc55cSDimitry Andric   const auto &TLI = *MF.getSubtarget().getTargetLowering();
8571349cc55cSDimitry Andric   auto &DL = MF.getDataLayout();
8572349cc55cSDimitry Andric   LLVMContext &C = MF.getFunction().getContext();
8573349cc55cSDimitry Andric 
8574349cc55cSDimitry Andric   assert(KnownLen != 0 && "Have a zero length memmove length!");
8575349cc55cSDimitry Andric 
8576349cc55cSDimitry Andric   bool DstAlignCanChange = false;
8577349cc55cSDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
8578349cc55cSDimitry Andric   bool OptSize = shouldLowerMemFuncForSize(MF);
857981ad6265SDimitry Andric   Align Alignment = std::min(DstAlign, SrcAlign);
8580349cc55cSDimitry Andric 
8581349cc55cSDimitry Andric   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8582349cc55cSDimitry Andric   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8583349cc55cSDimitry Andric     DstAlignCanChange = true;
8584349cc55cSDimitry Andric 
8585349cc55cSDimitry Andric   unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
8586349cc55cSDimitry Andric   std::vector<LLT> MemOps;
8587349cc55cSDimitry Andric 
8588349cc55cSDimitry Andric   const auto &DstMMO = **MI.memoperands_begin();
8589349cc55cSDimitry Andric   const auto &SrcMMO = **std::next(MI.memoperands_begin());
8590349cc55cSDimitry Andric   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8591349cc55cSDimitry Andric   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
8592349cc55cSDimitry Andric 
8593349cc55cSDimitry Andric   // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
8594349cc55cSDimitry Andric   // to a bug in it's findOptimalMemOpLowering implementation. For now do the
8595349cc55cSDimitry Andric   // same thing here.
8596349cc55cSDimitry Andric   if (!findGISelOptimalMemOpLowering(
8597349cc55cSDimitry Andric           MemOps, Limit,
8598349cc55cSDimitry Andric           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8599349cc55cSDimitry Andric                       /*IsVolatile*/ true),
8600349cc55cSDimitry Andric           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
8601349cc55cSDimitry Andric           MF.getFunction().getAttributes(), TLI))
8602349cc55cSDimitry Andric     return UnableToLegalize;
8603349cc55cSDimitry Andric 
8604349cc55cSDimitry Andric   if (DstAlignCanChange) {
8605349cc55cSDimitry Andric     // Get an estimate of the type from the LLT.
8606349cc55cSDimitry Andric     Type *IRTy = getTypeForLLT(MemOps[0], C);
8607349cc55cSDimitry Andric     Align NewAlign = DL.getABITypeAlign(IRTy);
8608349cc55cSDimitry Andric 
8609349cc55cSDimitry Andric     // Don't promote to an alignment that would require dynamic stack
8610349cc55cSDimitry Andric     // realignment.
8611349cc55cSDimitry Andric     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8612349cc55cSDimitry Andric     if (!TRI->hasStackRealignment(MF))
8613349cc55cSDimitry Andric       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
861481ad6265SDimitry Andric         NewAlign = NewAlign.previous();
8615349cc55cSDimitry Andric 
8616349cc55cSDimitry Andric     if (NewAlign > Alignment) {
8617349cc55cSDimitry Andric       Alignment = NewAlign;
8618349cc55cSDimitry Andric       unsigned FI = FIDef->getOperand(1).getIndex();
8619349cc55cSDimitry Andric       // Give the stack frame object a larger alignment if needed.
8620349cc55cSDimitry Andric       if (MFI.getObjectAlign(FI) < Alignment)
8621349cc55cSDimitry Andric         MFI.setObjectAlignment(FI, Alignment);
8622349cc55cSDimitry Andric     }
8623349cc55cSDimitry Andric   }
8624349cc55cSDimitry Andric 
8625349cc55cSDimitry Andric   LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
8626349cc55cSDimitry Andric 
8627349cc55cSDimitry Andric   MachineIRBuilder MIB(MI);
8628349cc55cSDimitry Andric   // Memmove requires that we perform the loads first before issuing the stores.
8629349cc55cSDimitry Andric   // Apart from that, this loop is pretty much doing the same thing as the
8630349cc55cSDimitry Andric   // memcpy codegen function.
8631349cc55cSDimitry Andric   unsigned CurrOffset = 0;
8632349cc55cSDimitry Andric   SmallVector<Register, 16> LoadVals;
8633349cc55cSDimitry Andric   for (auto CopyTy : MemOps) {
8634349cc55cSDimitry Andric     // Construct MMO for the load.
8635349cc55cSDimitry Andric     auto *LoadMMO =
8636349cc55cSDimitry Andric         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
8637349cc55cSDimitry Andric 
8638349cc55cSDimitry Andric     // Create the load.
8639349cc55cSDimitry Andric     Register LoadPtr = Src;
8640349cc55cSDimitry Andric     if (CurrOffset != 0) {
86414824e7fdSDimitry Andric       LLT SrcTy = MRI.getType(Src);
8642349cc55cSDimitry Andric       auto Offset =
86434824e7fdSDimitry Andric           MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
86444824e7fdSDimitry Andric       LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
8645349cc55cSDimitry Andric     }
8646349cc55cSDimitry Andric     LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
8647349cc55cSDimitry Andric     CurrOffset += CopyTy.getSizeInBytes();
8648349cc55cSDimitry Andric   }
8649349cc55cSDimitry Andric 
8650349cc55cSDimitry Andric   CurrOffset = 0;
8651349cc55cSDimitry Andric   for (unsigned I = 0; I < MemOps.size(); ++I) {
8652349cc55cSDimitry Andric     LLT CopyTy = MemOps[I];
8653349cc55cSDimitry Andric     // Now store the values loaded.
8654349cc55cSDimitry Andric     auto *StoreMMO =
8655349cc55cSDimitry Andric         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
8656349cc55cSDimitry Andric 
8657349cc55cSDimitry Andric     Register StorePtr = Dst;
8658349cc55cSDimitry Andric     if (CurrOffset != 0) {
86594824e7fdSDimitry Andric       LLT DstTy = MRI.getType(Dst);
8660349cc55cSDimitry Andric       auto Offset =
86614824e7fdSDimitry Andric           MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
86624824e7fdSDimitry Andric       StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
8663349cc55cSDimitry Andric     }
8664349cc55cSDimitry Andric     MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
8665349cc55cSDimitry Andric     CurrOffset += CopyTy.getSizeInBytes();
8666349cc55cSDimitry Andric   }
8667349cc55cSDimitry Andric   MI.eraseFromParent();
8668349cc55cSDimitry Andric   return Legalized;
8669349cc55cSDimitry Andric }
8670349cc55cSDimitry Andric 
8671349cc55cSDimitry Andric LegalizerHelper::LegalizeResult
8672349cc55cSDimitry Andric LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
8673349cc55cSDimitry Andric   const unsigned Opc = MI.getOpcode();
8674349cc55cSDimitry Andric   // This combine is fairly complex so it's not written with a separate
8675349cc55cSDimitry Andric   // matcher function.
8676349cc55cSDimitry Andric   assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
8677349cc55cSDimitry Andric           Opc == TargetOpcode::G_MEMSET) &&
8678349cc55cSDimitry Andric          "Expected memcpy like instruction");
8679349cc55cSDimitry Andric 
8680349cc55cSDimitry Andric   auto MMOIt = MI.memoperands_begin();
8681349cc55cSDimitry Andric   const MachineMemOperand *MemOp = *MMOIt;
8682349cc55cSDimitry Andric 
8683349cc55cSDimitry Andric   Align DstAlign = MemOp->getBaseAlign();
8684349cc55cSDimitry Andric   Align SrcAlign;
868506c3fb27SDimitry Andric   auto [Dst, Src, Len] = MI.getFirst3Regs();
8686349cc55cSDimitry Andric 
8687349cc55cSDimitry Andric   if (Opc != TargetOpcode::G_MEMSET) {
8688349cc55cSDimitry Andric     assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
8689349cc55cSDimitry Andric     MemOp = *(++MMOIt);
8690349cc55cSDimitry Andric     SrcAlign = MemOp->getBaseAlign();
8691349cc55cSDimitry Andric   }
8692349cc55cSDimitry Andric 
8693349cc55cSDimitry Andric   // See if this is a constant length copy
8694349cc55cSDimitry Andric   auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
8695349cc55cSDimitry Andric   if (!LenVRegAndVal)
8696349cc55cSDimitry Andric     return UnableToLegalize;
8697349cc55cSDimitry Andric   uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8698349cc55cSDimitry Andric 
8699349cc55cSDimitry Andric   if (KnownLen == 0) {
8700349cc55cSDimitry Andric     MI.eraseFromParent();
8701349cc55cSDimitry Andric     return Legalized;
8702349cc55cSDimitry Andric   }
8703349cc55cSDimitry Andric 
8704349cc55cSDimitry Andric   bool IsVolatile = MemOp->isVolatile();
8705349cc55cSDimitry Andric   if (Opc == TargetOpcode::G_MEMCPY_INLINE)
8706349cc55cSDimitry Andric     return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8707349cc55cSDimitry Andric                              IsVolatile);
8708349cc55cSDimitry Andric 
8709349cc55cSDimitry Andric   // Don't try to optimize volatile.
8710349cc55cSDimitry Andric   if (IsVolatile)
8711349cc55cSDimitry Andric     return UnableToLegalize;
8712349cc55cSDimitry Andric 
8713349cc55cSDimitry Andric   if (MaxLen && KnownLen > MaxLen)
8714349cc55cSDimitry Andric     return UnableToLegalize;
8715349cc55cSDimitry Andric 
8716349cc55cSDimitry Andric   if (Opc == TargetOpcode::G_MEMCPY) {
8717349cc55cSDimitry Andric     auto &MF = *MI.getParent()->getParent();
8718349cc55cSDimitry Andric     const auto &TLI = *MF.getSubtarget().getTargetLowering();
8719349cc55cSDimitry Andric     bool OptSize = shouldLowerMemFuncForSize(MF);
8720349cc55cSDimitry Andric     uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
8721349cc55cSDimitry Andric     return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
8722349cc55cSDimitry Andric                        IsVolatile);
8723349cc55cSDimitry Andric   }
8724349cc55cSDimitry Andric   if (Opc == TargetOpcode::G_MEMMOVE)
8725349cc55cSDimitry Andric     return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
8726349cc55cSDimitry Andric   if (Opc == TargetOpcode::G_MEMSET)
8727349cc55cSDimitry Andric     return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
8728349cc55cSDimitry Andric   return UnableToLegalize;
8729349cc55cSDimitry Andric }
8730