1*0b57cec5SDimitry Andric //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric /// \file This file implements the LegalizerHelper class to legalize 10*0b57cec5SDimitry Andric /// individual instructions and the LegalizeMachineIR wrapper pass for the 11*0b57cec5SDimitry Andric /// primary legalization. 12*0b57cec5SDimitry Andric // 13*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 14*0b57cec5SDimitry Andric 15*0b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 16*0b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/CallLowering.h" 17*0b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 18*0b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 19*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 20*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 21*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLowering.h" 22*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 23*0b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 24*0b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h" 25*0b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 26*0b57cec5SDimitry Andric 27*0b57cec5SDimitry Andric #define DEBUG_TYPE "legalizer" 28*0b57cec5SDimitry Andric 29*0b57cec5SDimitry Andric using namespace llvm; 30*0b57cec5SDimitry Andric using namespace LegalizeActions; 31*0b57cec5SDimitry Andric 32*0b57cec5SDimitry Andric /// Try to break down \p OrigTy into \p NarrowTy sized pieces. 33*0b57cec5SDimitry Andric /// 34*0b57cec5SDimitry Andric /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy, 35*0b57cec5SDimitry Andric /// with any leftover piece as type \p LeftoverTy 36*0b57cec5SDimitry Andric /// 37*0b57cec5SDimitry Andric /// Returns -1 in the first element of the pair if the breakdown is not 38*0b57cec5SDimitry Andric /// satisfiable. 39*0b57cec5SDimitry Andric static std::pair<int, int> 40*0b57cec5SDimitry Andric getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { 41*0b57cec5SDimitry Andric assert(!LeftoverTy.isValid() && "this is an out argument"); 42*0b57cec5SDimitry Andric 43*0b57cec5SDimitry Andric unsigned Size = OrigTy.getSizeInBits(); 44*0b57cec5SDimitry Andric unsigned NarrowSize = NarrowTy.getSizeInBits(); 45*0b57cec5SDimitry Andric unsigned NumParts = Size / NarrowSize; 46*0b57cec5SDimitry Andric unsigned LeftoverSize = Size - NumParts * NarrowSize; 47*0b57cec5SDimitry Andric assert(Size > NarrowSize); 48*0b57cec5SDimitry Andric 49*0b57cec5SDimitry Andric if (LeftoverSize == 0) 50*0b57cec5SDimitry Andric return {NumParts, 0}; 51*0b57cec5SDimitry Andric 52*0b57cec5SDimitry Andric if (NarrowTy.isVector()) { 53*0b57cec5SDimitry Andric unsigned EltSize = OrigTy.getScalarSizeInBits(); 54*0b57cec5SDimitry Andric if (LeftoverSize % EltSize != 0) 55*0b57cec5SDimitry Andric return {-1, -1}; 56*0b57cec5SDimitry Andric LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); 57*0b57cec5SDimitry Andric } else { 58*0b57cec5SDimitry Andric LeftoverTy = LLT::scalar(LeftoverSize); 59*0b57cec5SDimitry Andric } 60*0b57cec5SDimitry Andric 61*0b57cec5SDimitry Andric int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits(); 62*0b57cec5SDimitry Andric return std::make_pair(NumParts, NumLeftover); 63*0b57cec5SDimitry Andric } 64*0b57cec5SDimitry Andric 65*0b57cec5SDimitry Andric LegalizerHelper::LegalizerHelper(MachineFunction &MF, 66*0b57cec5SDimitry Andric GISelChangeObserver &Observer, 67*0b57cec5SDimitry Andric MachineIRBuilder &Builder) 68*0b57cec5SDimitry Andric : MIRBuilder(Builder), MRI(MF.getRegInfo()), 69*0b57cec5SDimitry Andric LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) { 70*0b57cec5SDimitry Andric MIRBuilder.setMF(MF); 71*0b57cec5SDimitry Andric MIRBuilder.setChangeObserver(Observer); 72*0b57cec5SDimitry Andric } 73*0b57cec5SDimitry Andric 74*0b57cec5SDimitry Andric LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, 75*0b57cec5SDimitry Andric GISelChangeObserver &Observer, 76*0b57cec5SDimitry Andric MachineIRBuilder &B) 77*0b57cec5SDimitry Andric : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) { 78*0b57cec5SDimitry Andric MIRBuilder.setMF(MF); 79*0b57cec5SDimitry Andric MIRBuilder.setChangeObserver(Observer); 80*0b57cec5SDimitry Andric } 81*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 82*0b57cec5SDimitry Andric LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { 83*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); 84*0b57cec5SDimitry Andric 85*0b57cec5SDimitry Andric if (MI.getOpcode() == TargetOpcode::G_INTRINSIC || 86*0b57cec5SDimitry Andric MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) 87*0b57cec5SDimitry Andric return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? Legalized 88*0b57cec5SDimitry Andric : UnableToLegalize; 89*0b57cec5SDimitry Andric auto Step = LI.getAction(MI, MRI); 90*0b57cec5SDimitry Andric switch (Step.Action) { 91*0b57cec5SDimitry Andric case Legal: 92*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << ".. Already legal\n"); 93*0b57cec5SDimitry Andric return AlreadyLegal; 94*0b57cec5SDimitry Andric case Libcall: 95*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); 96*0b57cec5SDimitry Andric return libcall(MI); 97*0b57cec5SDimitry Andric case NarrowScalar: 98*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); 99*0b57cec5SDimitry Andric return narrowScalar(MI, Step.TypeIdx, Step.NewType); 100*0b57cec5SDimitry Andric case WidenScalar: 101*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); 102*0b57cec5SDimitry Andric return widenScalar(MI, Step.TypeIdx, Step.NewType); 103*0b57cec5SDimitry Andric case Lower: 104*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << ".. Lower\n"); 105*0b57cec5SDimitry Andric return lower(MI, Step.TypeIdx, Step.NewType); 106*0b57cec5SDimitry Andric case FewerElements: 107*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); 108*0b57cec5SDimitry Andric return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); 109*0b57cec5SDimitry Andric case MoreElements: 110*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << ".. Increase number of elements\n"); 111*0b57cec5SDimitry Andric return moreElementsVector(MI, Step.TypeIdx, Step.NewType); 112*0b57cec5SDimitry Andric case Custom: 113*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); 114*0b57cec5SDimitry Andric return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized 115*0b57cec5SDimitry Andric : UnableToLegalize; 116*0b57cec5SDimitry Andric default: 117*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); 118*0b57cec5SDimitry Andric return UnableToLegalize; 119*0b57cec5SDimitry Andric } 120*0b57cec5SDimitry Andric } 121*0b57cec5SDimitry Andric 122*0b57cec5SDimitry Andric void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts, 123*0b57cec5SDimitry Andric SmallVectorImpl<Register> &VRegs) { 124*0b57cec5SDimitry Andric for (int i = 0; i < NumParts; ++i) 125*0b57cec5SDimitry Andric VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 126*0b57cec5SDimitry Andric MIRBuilder.buildUnmerge(VRegs, Reg); 127*0b57cec5SDimitry Andric } 128*0b57cec5SDimitry Andric 129*0b57cec5SDimitry Andric bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, 130*0b57cec5SDimitry Andric LLT MainTy, LLT &LeftoverTy, 131*0b57cec5SDimitry Andric SmallVectorImpl<Register> &VRegs, 132*0b57cec5SDimitry Andric SmallVectorImpl<Register> &LeftoverRegs) { 133*0b57cec5SDimitry Andric assert(!LeftoverTy.isValid() && "this is an out argument"); 134*0b57cec5SDimitry Andric 135*0b57cec5SDimitry Andric unsigned RegSize = RegTy.getSizeInBits(); 136*0b57cec5SDimitry Andric unsigned MainSize = MainTy.getSizeInBits(); 137*0b57cec5SDimitry Andric unsigned NumParts = RegSize / MainSize; 138*0b57cec5SDimitry Andric unsigned LeftoverSize = RegSize - NumParts * MainSize; 139*0b57cec5SDimitry Andric 140*0b57cec5SDimitry Andric // Use an unmerge when possible. 141*0b57cec5SDimitry Andric if (LeftoverSize == 0) { 142*0b57cec5SDimitry Andric for (unsigned I = 0; I < NumParts; ++I) 143*0b57cec5SDimitry Andric VRegs.push_back(MRI.createGenericVirtualRegister(MainTy)); 144*0b57cec5SDimitry Andric MIRBuilder.buildUnmerge(VRegs, Reg); 145*0b57cec5SDimitry Andric return true; 146*0b57cec5SDimitry Andric } 147*0b57cec5SDimitry Andric 148*0b57cec5SDimitry Andric if (MainTy.isVector()) { 149*0b57cec5SDimitry Andric unsigned EltSize = MainTy.getScalarSizeInBits(); 150*0b57cec5SDimitry Andric if (LeftoverSize % EltSize != 0) 151*0b57cec5SDimitry Andric return false; 152*0b57cec5SDimitry Andric LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); 153*0b57cec5SDimitry Andric } else { 154*0b57cec5SDimitry Andric LeftoverTy = LLT::scalar(LeftoverSize); 155*0b57cec5SDimitry Andric } 156*0b57cec5SDimitry Andric 157*0b57cec5SDimitry Andric // For irregular sizes, extract the individual parts. 158*0b57cec5SDimitry Andric for (unsigned I = 0; I != NumParts; ++I) { 159*0b57cec5SDimitry Andric Register NewReg = MRI.createGenericVirtualRegister(MainTy); 160*0b57cec5SDimitry Andric VRegs.push_back(NewReg); 161*0b57cec5SDimitry Andric MIRBuilder.buildExtract(NewReg, Reg, MainSize * I); 162*0b57cec5SDimitry Andric } 163*0b57cec5SDimitry Andric 164*0b57cec5SDimitry Andric for (unsigned Offset = MainSize * NumParts; Offset < RegSize; 165*0b57cec5SDimitry Andric Offset += LeftoverSize) { 166*0b57cec5SDimitry Andric Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy); 167*0b57cec5SDimitry Andric LeftoverRegs.push_back(NewReg); 168*0b57cec5SDimitry Andric MIRBuilder.buildExtract(NewReg, Reg, Offset); 169*0b57cec5SDimitry Andric } 170*0b57cec5SDimitry Andric 171*0b57cec5SDimitry Andric return true; 172*0b57cec5SDimitry Andric } 173*0b57cec5SDimitry Andric 174*0b57cec5SDimitry Andric void LegalizerHelper::insertParts(Register DstReg, 175*0b57cec5SDimitry Andric LLT ResultTy, LLT PartTy, 176*0b57cec5SDimitry Andric ArrayRef<Register> PartRegs, 177*0b57cec5SDimitry Andric LLT LeftoverTy, 178*0b57cec5SDimitry Andric ArrayRef<Register> LeftoverRegs) { 179*0b57cec5SDimitry Andric if (!LeftoverTy.isValid()) { 180*0b57cec5SDimitry Andric assert(LeftoverRegs.empty()); 181*0b57cec5SDimitry Andric 182*0b57cec5SDimitry Andric if (!ResultTy.isVector()) { 183*0b57cec5SDimitry Andric MIRBuilder.buildMerge(DstReg, PartRegs); 184*0b57cec5SDimitry Andric return; 185*0b57cec5SDimitry Andric } 186*0b57cec5SDimitry Andric 187*0b57cec5SDimitry Andric if (PartTy.isVector()) 188*0b57cec5SDimitry Andric MIRBuilder.buildConcatVectors(DstReg, PartRegs); 189*0b57cec5SDimitry Andric else 190*0b57cec5SDimitry Andric MIRBuilder.buildBuildVector(DstReg, PartRegs); 191*0b57cec5SDimitry Andric return; 192*0b57cec5SDimitry Andric } 193*0b57cec5SDimitry Andric 194*0b57cec5SDimitry Andric unsigned PartSize = PartTy.getSizeInBits(); 195*0b57cec5SDimitry Andric unsigned LeftoverPartSize = LeftoverTy.getSizeInBits(); 196*0b57cec5SDimitry Andric 197*0b57cec5SDimitry Andric Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy); 198*0b57cec5SDimitry Andric MIRBuilder.buildUndef(CurResultReg); 199*0b57cec5SDimitry Andric 200*0b57cec5SDimitry Andric unsigned Offset = 0; 201*0b57cec5SDimitry Andric for (Register PartReg : PartRegs) { 202*0b57cec5SDimitry Andric Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy); 203*0b57cec5SDimitry Andric MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset); 204*0b57cec5SDimitry Andric CurResultReg = NewResultReg; 205*0b57cec5SDimitry Andric Offset += PartSize; 206*0b57cec5SDimitry Andric } 207*0b57cec5SDimitry Andric 208*0b57cec5SDimitry Andric for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) { 209*0b57cec5SDimitry Andric // Use the original output register for the final insert to avoid a copy. 210*0b57cec5SDimitry Andric Register NewResultReg = (I + 1 == E) ? 211*0b57cec5SDimitry Andric DstReg : MRI.createGenericVirtualRegister(ResultTy); 212*0b57cec5SDimitry Andric 213*0b57cec5SDimitry Andric MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset); 214*0b57cec5SDimitry Andric CurResultReg = NewResultReg; 215*0b57cec5SDimitry Andric Offset += LeftoverPartSize; 216*0b57cec5SDimitry Andric } 217*0b57cec5SDimitry Andric } 218*0b57cec5SDimitry Andric 219*0b57cec5SDimitry Andric static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { 220*0b57cec5SDimitry Andric switch (Opcode) { 221*0b57cec5SDimitry Andric case TargetOpcode::G_SDIV: 222*0b57cec5SDimitry Andric assert((Size == 32 || Size == 64) && "Unsupported size"); 223*0b57cec5SDimitry Andric return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32; 224*0b57cec5SDimitry Andric case TargetOpcode::G_UDIV: 225*0b57cec5SDimitry Andric assert((Size == 32 || Size == 64) && "Unsupported size"); 226*0b57cec5SDimitry Andric return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32; 227*0b57cec5SDimitry Andric case TargetOpcode::G_SREM: 228*0b57cec5SDimitry Andric assert((Size == 32 || Size == 64) && "Unsupported size"); 229*0b57cec5SDimitry Andric return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32; 230*0b57cec5SDimitry Andric case TargetOpcode::G_UREM: 231*0b57cec5SDimitry Andric assert((Size == 32 || Size == 64) && "Unsupported size"); 232*0b57cec5SDimitry Andric return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32; 233*0b57cec5SDimitry Andric case TargetOpcode::G_CTLZ_ZERO_UNDEF: 234*0b57cec5SDimitry Andric assert(Size == 32 && "Unsupported size"); 235*0b57cec5SDimitry Andric return RTLIB::CTLZ_I32; 236*0b57cec5SDimitry Andric case TargetOpcode::G_FADD: 237*0b57cec5SDimitry Andric assert((Size == 32 || Size == 64) && "Unsupported size"); 238*0b57cec5SDimitry Andric return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; 239*0b57cec5SDimitry Andric case TargetOpcode::G_FSUB: 240*0b57cec5SDimitry Andric assert((Size == 32 || Size == 64) && "Unsupported size"); 241*0b57cec5SDimitry Andric return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32; 242*0b57cec5SDimitry Andric case TargetOpcode::G_FMUL: 243*0b57cec5SDimitry Andric assert((Size == 32 || Size == 64) && "Unsupported size"); 244*0b57cec5SDimitry Andric return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32; 245*0b57cec5SDimitry Andric case TargetOpcode::G_FDIV: 246*0b57cec5SDimitry Andric assert((Size == 32 || Size == 64) && "Unsupported size"); 247*0b57cec5SDimitry Andric return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; 248*0b57cec5SDimitry Andric case TargetOpcode::G_FEXP: 249*0b57cec5SDimitry Andric assert((Size == 32 || Size == 64) && "Unsupported size"); 250*0b57cec5SDimitry Andric return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32; 251*0b57cec5SDimitry Andric case TargetOpcode::G_FEXP2: 252*0b57cec5SDimitry Andric assert((Size == 32 || Size == 64) && "Unsupported size"); 253*0b57cec5SDimitry Andric return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32; 254*0b57cec5SDimitry Andric case TargetOpcode::G_FREM: 255*0b57cec5SDimitry Andric return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; 256*0b57cec5SDimitry Andric case TargetOpcode::G_FPOW: 257*0b57cec5SDimitry Andric return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; 258*0b57cec5SDimitry Andric case TargetOpcode::G_FMA: 259*0b57cec5SDimitry Andric assert((Size == 32 || Size == 64) && "Unsupported size"); 260*0b57cec5SDimitry Andric return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; 261*0b57cec5SDimitry Andric case TargetOpcode::G_FSIN: 262*0b57cec5SDimitry Andric assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 263*0b57cec5SDimitry Andric return Size == 128 ? RTLIB::SIN_F128 264*0b57cec5SDimitry Andric : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32; 265*0b57cec5SDimitry Andric case TargetOpcode::G_FCOS: 266*0b57cec5SDimitry Andric assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 267*0b57cec5SDimitry Andric return Size == 128 ? RTLIB::COS_F128 268*0b57cec5SDimitry Andric : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32; 269*0b57cec5SDimitry Andric case TargetOpcode::G_FLOG10: 270*0b57cec5SDimitry Andric assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 271*0b57cec5SDimitry Andric return Size == 128 ? RTLIB::LOG10_F128 272*0b57cec5SDimitry Andric : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32; 273*0b57cec5SDimitry Andric case TargetOpcode::G_FLOG: 274*0b57cec5SDimitry Andric assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 275*0b57cec5SDimitry Andric return Size == 128 ? RTLIB::LOG_F128 276*0b57cec5SDimitry Andric : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32; 277*0b57cec5SDimitry Andric case TargetOpcode::G_FLOG2: 278*0b57cec5SDimitry Andric assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 279*0b57cec5SDimitry Andric return Size == 128 ? RTLIB::LOG2_F128 280*0b57cec5SDimitry Andric : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32; 281*0b57cec5SDimitry Andric case TargetOpcode::G_FCEIL: 282*0b57cec5SDimitry Andric assert((Size == 32 || Size == 64) && "Unsupported size"); 283*0b57cec5SDimitry Andric return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32; 284*0b57cec5SDimitry Andric case TargetOpcode::G_FFLOOR: 285*0b57cec5SDimitry Andric assert((Size == 32 || Size == 64) && "Unsupported size"); 286*0b57cec5SDimitry Andric return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32; 287*0b57cec5SDimitry Andric } 288*0b57cec5SDimitry Andric llvm_unreachable("Unknown libcall function"); 289*0b57cec5SDimitry Andric } 290*0b57cec5SDimitry Andric 291*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 292*0b57cec5SDimitry Andric llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, 293*0b57cec5SDimitry Andric const CallLowering::ArgInfo &Result, 294*0b57cec5SDimitry Andric ArrayRef<CallLowering::ArgInfo> Args) { 295*0b57cec5SDimitry Andric auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 296*0b57cec5SDimitry Andric auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 297*0b57cec5SDimitry Andric const char *Name = TLI.getLibcallName(Libcall); 298*0b57cec5SDimitry Andric 299*0b57cec5SDimitry Andric MIRBuilder.getMF().getFrameInfo().setHasCalls(true); 300*0b57cec5SDimitry Andric if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), 301*0b57cec5SDimitry Andric MachineOperand::CreateES(Name), Result, Args)) 302*0b57cec5SDimitry Andric return LegalizerHelper::UnableToLegalize; 303*0b57cec5SDimitry Andric 304*0b57cec5SDimitry Andric return LegalizerHelper::Legalized; 305*0b57cec5SDimitry Andric } 306*0b57cec5SDimitry Andric 307*0b57cec5SDimitry Andric // Useful for libcalls where all operands have the same type. 308*0b57cec5SDimitry Andric static LegalizerHelper::LegalizeResult 309*0b57cec5SDimitry Andric simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, 310*0b57cec5SDimitry Andric Type *OpType) { 311*0b57cec5SDimitry Andric auto Libcall = getRTLibDesc(MI.getOpcode(), Size); 312*0b57cec5SDimitry Andric 313*0b57cec5SDimitry Andric SmallVector<CallLowering::ArgInfo, 3> Args; 314*0b57cec5SDimitry Andric for (unsigned i = 1; i < MI.getNumOperands(); i++) 315*0b57cec5SDimitry Andric Args.push_back({MI.getOperand(i).getReg(), OpType}); 316*0b57cec5SDimitry Andric return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, 317*0b57cec5SDimitry Andric Args); 318*0b57cec5SDimitry Andric } 319*0b57cec5SDimitry Andric 320*0b57cec5SDimitry Andric static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, 321*0b57cec5SDimitry Andric Type *FromType) { 322*0b57cec5SDimitry Andric auto ToMVT = MVT::getVT(ToType); 323*0b57cec5SDimitry Andric auto FromMVT = MVT::getVT(FromType); 324*0b57cec5SDimitry Andric 325*0b57cec5SDimitry Andric switch (Opcode) { 326*0b57cec5SDimitry Andric case TargetOpcode::G_FPEXT: 327*0b57cec5SDimitry Andric return RTLIB::getFPEXT(FromMVT, ToMVT); 328*0b57cec5SDimitry Andric case TargetOpcode::G_FPTRUNC: 329*0b57cec5SDimitry Andric return RTLIB::getFPROUND(FromMVT, ToMVT); 330*0b57cec5SDimitry Andric case TargetOpcode::G_FPTOSI: 331*0b57cec5SDimitry Andric return RTLIB::getFPTOSINT(FromMVT, ToMVT); 332*0b57cec5SDimitry Andric case TargetOpcode::G_FPTOUI: 333*0b57cec5SDimitry Andric return RTLIB::getFPTOUINT(FromMVT, ToMVT); 334*0b57cec5SDimitry Andric case TargetOpcode::G_SITOFP: 335*0b57cec5SDimitry Andric return RTLIB::getSINTTOFP(FromMVT, ToMVT); 336*0b57cec5SDimitry Andric case TargetOpcode::G_UITOFP: 337*0b57cec5SDimitry Andric return RTLIB::getUINTTOFP(FromMVT, ToMVT); 338*0b57cec5SDimitry Andric } 339*0b57cec5SDimitry Andric llvm_unreachable("Unsupported libcall function"); 340*0b57cec5SDimitry Andric } 341*0b57cec5SDimitry Andric 342*0b57cec5SDimitry Andric static LegalizerHelper::LegalizeResult 343*0b57cec5SDimitry Andric conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, 344*0b57cec5SDimitry Andric Type *FromType) { 345*0b57cec5SDimitry Andric RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); 346*0b57cec5SDimitry Andric return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, 347*0b57cec5SDimitry Andric {{MI.getOperand(1).getReg(), FromType}}); 348*0b57cec5SDimitry Andric } 349*0b57cec5SDimitry Andric 350*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 351*0b57cec5SDimitry Andric LegalizerHelper::libcall(MachineInstr &MI) { 352*0b57cec5SDimitry Andric LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); 353*0b57cec5SDimitry Andric unsigned Size = LLTy.getSizeInBits(); 354*0b57cec5SDimitry Andric auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 355*0b57cec5SDimitry Andric 356*0b57cec5SDimitry Andric MIRBuilder.setInstr(MI); 357*0b57cec5SDimitry Andric 358*0b57cec5SDimitry Andric switch (MI.getOpcode()) { 359*0b57cec5SDimitry Andric default: 360*0b57cec5SDimitry Andric return UnableToLegalize; 361*0b57cec5SDimitry Andric case TargetOpcode::G_SDIV: 362*0b57cec5SDimitry Andric case TargetOpcode::G_UDIV: 363*0b57cec5SDimitry Andric case TargetOpcode::G_SREM: 364*0b57cec5SDimitry Andric case TargetOpcode::G_UREM: 365*0b57cec5SDimitry Andric case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 366*0b57cec5SDimitry Andric Type *HLTy = IntegerType::get(Ctx, Size); 367*0b57cec5SDimitry Andric auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 368*0b57cec5SDimitry Andric if (Status != Legalized) 369*0b57cec5SDimitry Andric return Status; 370*0b57cec5SDimitry Andric break; 371*0b57cec5SDimitry Andric } 372*0b57cec5SDimitry Andric case TargetOpcode::G_FADD: 373*0b57cec5SDimitry Andric case TargetOpcode::G_FSUB: 374*0b57cec5SDimitry Andric case TargetOpcode::G_FMUL: 375*0b57cec5SDimitry Andric case TargetOpcode::G_FDIV: 376*0b57cec5SDimitry Andric case TargetOpcode::G_FMA: 377*0b57cec5SDimitry Andric case TargetOpcode::G_FPOW: 378*0b57cec5SDimitry Andric case TargetOpcode::G_FREM: 379*0b57cec5SDimitry Andric case TargetOpcode::G_FCOS: 380*0b57cec5SDimitry Andric case TargetOpcode::G_FSIN: 381*0b57cec5SDimitry Andric case TargetOpcode::G_FLOG10: 382*0b57cec5SDimitry Andric case TargetOpcode::G_FLOG: 383*0b57cec5SDimitry Andric case TargetOpcode::G_FLOG2: 384*0b57cec5SDimitry Andric case TargetOpcode::G_FEXP: 385*0b57cec5SDimitry Andric case TargetOpcode::G_FEXP2: 386*0b57cec5SDimitry Andric case TargetOpcode::G_FCEIL: 387*0b57cec5SDimitry Andric case TargetOpcode::G_FFLOOR: { 388*0b57cec5SDimitry Andric if (Size > 64) { 389*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n"); 390*0b57cec5SDimitry Andric return UnableToLegalize; 391*0b57cec5SDimitry Andric } 392*0b57cec5SDimitry Andric Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); 393*0b57cec5SDimitry Andric auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 394*0b57cec5SDimitry Andric if (Status != Legalized) 395*0b57cec5SDimitry Andric return Status; 396*0b57cec5SDimitry Andric break; 397*0b57cec5SDimitry Andric } 398*0b57cec5SDimitry Andric case TargetOpcode::G_FPEXT: { 399*0b57cec5SDimitry Andric // FIXME: Support other floating point types (half, fp128 etc) 400*0b57cec5SDimitry Andric unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 401*0b57cec5SDimitry Andric unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 402*0b57cec5SDimitry Andric if (ToSize != 64 || FromSize != 32) 403*0b57cec5SDimitry Andric return UnableToLegalize; 404*0b57cec5SDimitry Andric LegalizeResult Status = conversionLibcall( 405*0b57cec5SDimitry Andric MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); 406*0b57cec5SDimitry Andric if (Status != Legalized) 407*0b57cec5SDimitry Andric return Status; 408*0b57cec5SDimitry Andric break; 409*0b57cec5SDimitry Andric } 410*0b57cec5SDimitry Andric case TargetOpcode::G_FPTRUNC: { 411*0b57cec5SDimitry Andric // FIXME: Support other floating point types (half, fp128 etc) 412*0b57cec5SDimitry Andric unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 413*0b57cec5SDimitry Andric unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 414*0b57cec5SDimitry Andric if (ToSize != 32 || FromSize != 64) 415*0b57cec5SDimitry Andric return UnableToLegalize; 416*0b57cec5SDimitry Andric LegalizeResult Status = conversionLibcall( 417*0b57cec5SDimitry Andric MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); 418*0b57cec5SDimitry Andric if (Status != Legalized) 419*0b57cec5SDimitry Andric return Status; 420*0b57cec5SDimitry Andric break; 421*0b57cec5SDimitry Andric } 422*0b57cec5SDimitry Andric case TargetOpcode::G_FPTOSI: 423*0b57cec5SDimitry Andric case TargetOpcode::G_FPTOUI: { 424*0b57cec5SDimitry Andric // FIXME: Support other types 425*0b57cec5SDimitry Andric unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 426*0b57cec5SDimitry Andric unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 427*0b57cec5SDimitry Andric if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64)) 428*0b57cec5SDimitry Andric return UnableToLegalize; 429*0b57cec5SDimitry Andric LegalizeResult Status = conversionLibcall( 430*0b57cec5SDimitry Andric MI, MIRBuilder, 431*0b57cec5SDimitry Andric ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx), 432*0b57cec5SDimitry Andric FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); 433*0b57cec5SDimitry Andric if (Status != Legalized) 434*0b57cec5SDimitry Andric return Status; 435*0b57cec5SDimitry Andric break; 436*0b57cec5SDimitry Andric } 437*0b57cec5SDimitry Andric case TargetOpcode::G_SITOFP: 438*0b57cec5SDimitry Andric case TargetOpcode::G_UITOFP: { 439*0b57cec5SDimitry Andric // FIXME: Support other types 440*0b57cec5SDimitry Andric unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 441*0b57cec5SDimitry Andric unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 442*0b57cec5SDimitry Andric if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64)) 443*0b57cec5SDimitry Andric return UnableToLegalize; 444*0b57cec5SDimitry Andric LegalizeResult Status = conversionLibcall( 445*0b57cec5SDimitry Andric MI, MIRBuilder, 446*0b57cec5SDimitry Andric ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), 447*0b57cec5SDimitry Andric FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx)); 448*0b57cec5SDimitry Andric if (Status != Legalized) 449*0b57cec5SDimitry Andric return Status; 450*0b57cec5SDimitry Andric break; 451*0b57cec5SDimitry Andric } 452*0b57cec5SDimitry Andric } 453*0b57cec5SDimitry Andric 454*0b57cec5SDimitry Andric MI.eraseFromParent(); 455*0b57cec5SDimitry Andric return Legalized; 456*0b57cec5SDimitry Andric } 457*0b57cec5SDimitry Andric 458*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, 459*0b57cec5SDimitry Andric unsigned TypeIdx, 460*0b57cec5SDimitry Andric LLT NarrowTy) { 461*0b57cec5SDimitry Andric MIRBuilder.setInstr(MI); 462*0b57cec5SDimitry Andric 463*0b57cec5SDimitry Andric uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 464*0b57cec5SDimitry Andric uint64_t NarrowSize = NarrowTy.getSizeInBits(); 465*0b57cec5SDimitry Andric 466*0b57cec5SDimitry Andric switch (MI.getOpcode()) { 467*0b57cec5SDimitry Andric default: 468*0b57cec5SDimitry Andric return UnableToLegalize; 469*0b57cec5SDimitry Andric case TargetOpcode::G_IMPLICIT_DEF: { 470*0b57cec5SDimitry Andric // FIXME: add support for when SizeOp0 isn't an exact multiple of 471*0b57cec5SDimitry Andric // NarrowSize. 472*0b57cec5SDimitry Andric if (SizeOp0 % NarrowSize != 0) 473*0b57cec5SDimitry Andric return UnableToLegalize; 474*0b57cec5SDimitry Andric int NumParts = SizeOp0 / NarrowSize; 475*0b57cec5SDimitry Andric 476*0b57cec5SDimitry Andric SmallVector<Register, 2> DstRegs; 477*0b57cec5SDimitry Andric for (int i = 0; i < NumParts; ++i) 478*0b57cec5SDimitry Andric DstRegs.push_back( 479*0b57cec5SDimitry Andric MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); 480*0b57cec5SDimitry Andric 481*0b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 482*0b57cec5SDimitry Andric if(MRI.getType(DstReg).isVector()) 483*0b57cec5SDimitry Andric MIRBuilder.buildBuildVector(DstReg, DstRegs); 484*0b57cec5SDimitry Andric else 485*0b57cec5SDimitry Andric MIRBuilder.buildMerge(DstReg, DstRegs); 486*0b57cec5SDimitry Andric MI.eraseFromParent(); 487*0b57cec5SDimitry Andric return Legalized; 488*0b57cec5SDimitry Andric } 489*0b57cec5SDimitry Andric case TargetOpcode::G_CONSTANT: { 490*0b57cec5SDimitry Andric LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 491*0b57cec5SDimitry Andric const APInt &Val = MI.getOperand(1).getCImm()->getValue(); 492*0b57cec5SDimitry Andric unsigned TotalSize = Ty.getSizeInBits(); 493*0b57cec5SDimitry Andric unsigned NarrowSize = NarrowTy.getSizeInBits(); 494*0b57cec5SDimitry Andric int NumParts = TotalSize / NarrowSize; 495*0b57cec5SDimitry Andric 496*0b57cec5SDimitry Andric SmallVector<Register, 4> PartRegs; 497*0b57cec5SDimitry Andric for (int I = 0; I != NumParts; ++I) { 498*0b57cec5SDimitry Andric unsigned Offset = I * NarrowSize; 499*0b57cec5SDimitry Andric auto K = MIRBuilder.buildConstant(NarrowTy, 500*0b57cec5SDimitry Andric Val.lshr(Offset).trunc(NarrowSize)); 501*0b57cec5SDimitry Andric PartRegs.push_back(K.getReg(0)); 502*0b57cec5SDimitry Andric } 503*0b57cec5SDimitry Andric 504*0b57cec5SDimitry Andric LLT LeftoverTy; 505*0b57cec5SDimitry Andric unsigned LeftoverBits = TotalSize - NumParts * NarrowSize; 506*0b57cec5SDimitry Andric SmallVector<Register, 1> LeftoverRegs; 507*0b57cec5SDimitry Andric if (LeftoverBits != 0) { 508*0b57cec5SDimitry Andric LeftoverTy = LLT::scalar(LeftoverBits); 509*0b57cec5SDimitry Andric auto K = MIRBuilder.buildConstant( 510*0b57cec5SDimitry Andric LeftoverTy, 511*0b57cec5SDimitry Andric Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits)); 512*0b57cec5SDimitry Andric LeftoverRegs.push_back(K.getReg(0)); 513*0b57cec5SDimitry Andric } 514*0b57cec5SDimitry Andric 515*0b57cec5SDimitry Andric insertParts(MI.getOperand(0).getReg(), 516*0b57cec5SDimitry Andric Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs); 517*0b57cec5SDimitry Andric 518*0b57cec5SDimitry Andric MI.eraseFromParent(); 519*0b57cec5SDimitry Andric return Legalized; 520*0b57cec5SDimitry Andric } 521*0b57cec5SDimitry Andric case TargetOpcode::G_ADD: { 522*0b57cec5SDimitry Andric // FIXME: add support for when SizeOp0 isn't an exact multiple of 523*0b57cec5SDimitry Andric // NarrowSize. 524*0b57cec5SDimitry Andric if (SizeOp0 % NarrowSize != 0) 525*0b57cec5SDimitry Andric return UnableToLegalize; 526*0b57cec5SDimitry Andric // Expand in terms of carry-setting/consuming G_ADDE instructions. 527*0b57cec5SDimitry Andric int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 528*0b57cec5SDimitry Andric 529*0b57cec5SDimitry Andric SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; 530*0b57cec5SDimitry Andric extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 531*0b57cec5SDimitry Andric extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 532*0b57cec5SDimitry Andric 533*0b57cec5SDimitry Andric Register CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); 534*0b57cec5SDimitry Andric MIRBuilder.buildConstant(CarryIn, 0); 535*0b57cec5SDimitry Andric 536*0b57cec5SDimitry Andric for (int i = 0; i < NumParts; ++i) { 537*0b57cec5SDimitry Andric Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); 538*0b57cec5SDimitry Andric Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 539*0b57cec5SDimitry Andric 540*0b57cec5SDimitry Andric MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], 541*0b57cec5SDimitry Andric Src2Regs[i], CarryIn); 542*0b57cec5SDimitry Andric 543*0b57cec5SDimitry Andric DstRegs.push_back(DstReg); 544*0b57cec5SDimitry Andric CarryIn = CarryOut; 545*0b57cec5SDimitry Andric } 546*0b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 547*0b57cec5SDimitry Andric if(MRI.getType(DstReg).isVector()) 548*0b57cec5SDimitry Andric MIRBuilder.buildBuildVector(DstReg, DstRegs); 549*0b57cec5SDimitry Andric else 550*0b57cec5SDimitry Andric MIRBuilder.buildMerge(DstReg, DstRegs); 551*0b57cec5SDimitry Andric MI.eraseFromParent(); 552*0b57cec5SDimitry Andric return Legalized; 553*0b57cec5SDimitry Andric } 554*0b57cec5SDimitry Andric case TargetOpcode::G_SUB: { 555*0b57cec5SDimitry Andric // FIXME: add support for when SizeOp0 isn't an exact multiple of 556*0b57cec5SDimitry Andric // NarrowSize. 557*0b57cec5SDimitry Andric if (SizeOp0 % NarrowSize != 0) 558*0b57cec5SDimitry Andric return UnableToLegalize; 559*0b57cec5SDimitry Andric 560*0b57cec5SDimitry Andric int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 561*0b57cec5SDimitry Andric 562*0b57cec5SDimitry Andric SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; 563*0b57cec5SDimitry Andric extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 564*0b57cec5SDimitry Andric extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 565*0b57cec5SDimitry Andric 566*0b57cec5SDimitry Andric Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); 567*0b57cec5SDimitry Andric Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 568*0b57cec5SDimitry Andric MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut}, 569*0b57cec5SDimitry Andric {Src1Regs[0], Src2Regs[0]}); 570*0b57cec5SDimitry Andric DstRegs.push_back(DstReg); 571*0b57cec5SDimitry Andric Register BorrowIn = BorrowOut; 572*0b57cec5SDimitry Andric for (int i = 1; i < NumParts; ++i) { 573*0b57cec5SDimitry Andric DstReg = MRI.createGenericVirtualRegister(NarrowTy); 574*0b57cec5SDimitry Andric BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 575*0b57cec5SDimitry Andric 576*0b57cec5SDimitry Andric MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut}, 577*0b57cec5SDimitry Andric {Src1Regs[i], Src2Regs[i], BorrowIn}); 578*0b57cec5SDimitry Andric 579*0b57cec5SDimitry Andric DstRegs.push_back(DstReg); 580*0b57cec5SDimitry Andric BorrowIn = BorrowOut; 581*0b57cec5SDimitry Andric } 582*0b57cec5SDimitry Andric MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 583*0b57cec5SDimitry Andric MI.eraseFromParent(); 584*0b57cec5SDimitry Andric return Legalized; 585*0b57cec5SDimitry Andric } 586*0b57cec5SDimitry Andric case TargetOpcode::G_MUL: 587*0b57cec5SDimitry Andric case TargetOpcode::G_UMULH: 588*0b57cec5SDimitry Andric return narrowScalarMul(MI, NarrowTy); 589*0b57cec5SDimitry Andric case TargetOpcode::G_EXTRACT: 590*0b57cec5SDimitry Andric return narrowScalarExtract(MI, TypeIdx, NarrowTy); 591*0b57cec5SDimitry Andric case TargetOpcode::G_INSERT: 592*0b57cec5SDimitry Andric return narrowScalarInsert(MI, TypeIdx, NarrowTy); 593*0b57cec5SDimitry Andric case TargetOpcode::G_LOAD: { 594*0b57cec5SDimitry Andric const auto &MMO = **MI.memoperands_begin(); 595*0b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 596*0b57cec5SDimitry Andric LLT DstTy = MRI.getType(DstReg); 597*0b57cec5SDimitry Andric if (DstTy.isVector()) 598*0b57cec5SDimitry Andric return UnableToLegalize; 599*0b57cec5SDimitry Andric 600*0b57cec5SDimitry Andric if (8 * MMO.getSize() != DstTy.getSizeInBits()) { 601*0b57cec5SDimitry Andric Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 602*0b57cec5SDimitry Andric auto &MMO = **MI.memoperands_begin(); 603*0b57cec5SDimitry Andric MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO); 604*0b57cec5SDimitry Andric MIRBuilder.buildAnyExt(DstReg, TmpReg); 605*0b57cec5SDimitry Andric MI.eraseFromParent(); 606*0b57cec5SDimitry Andric return Legalized; 607*0b57cec5SDimitry Andric } 608*0b57cec5SDimitry Andric 609*0b57cec5SDimitry Andric return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); 610*0b57cec5SDimitry Andric } 611*0b57cec5SDimitry Andric case TargetOpcode::G_ZEXTLOAD: 612*0b57cec5SDimitry Andric case TargetOpcode::G_SEXTLOAD: { 613*0b57cec5SDimitry Andric bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD; 614*0b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 615*0b57cec5SDimitry Andric Register PtrReg = MI.getOperand(1).getReg(); 616*0b57cec5SDimitry Andric 617*0b57cec5SDimitry Andric Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 618*0b57cec5SDimitry Andric auto &MMO = **MI.memoperands_begin(); 619*0b57cec5SDimitry Andric if (MMO.getSizeInBits() == NarrowSize) { 620*0b57cec5SDimitry Andric MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 621*0b57cec5SDimitry Andric } else { 622*0b57cec5SDimitry Andric unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD 623*0b57cec5SDimitry Andric : TargetOpcode::G_SEXTLOAD; 624*0b57cec5SDimitry Andric MIRBuilder.buildInstr(ExtLoad) 625*0b57cec5SDimitry Andric .addDef(TmpReg) 626*0b57cec5SDimitry Andric .addUse(PtrReg) 627*0b57cec5SDimitry Andric .addMemOperand(&MMO); 628*0b57cec5SDimitry Andric } 629*0b57cec5SDimitry Andric 630*0b57cec5SDimitry Andric if (ZExt) 631*0b57cec5SDimitry Andric MIRBuilder.buildZExt(DstReg, TmpReg); 632*0b57cec5SDimitry Andric else 633*0b57cec5SDimitry Andric MIRBuilder.buildSExt(DstReg, TmpReg); 634*0b57cec5SDimitry Andric 635*0b57cec5SDimitry Andric MI.eraseFromParent(); 636*0b57cec5SDimitry Andric return Legalized; 637*0b57cec5SDimitry Andric } 638*0b57cec5SDimitry Andric case TargetOpcode::G_STORE: { 639*0b57cec5SDimitry Andric const auto &MMO = **MI.memoperands_begin(); 640*0b57cec5SDimitry Andric 641*0b57cec5SDimitry Andric Register SrcReg = MI.getOperand(0).getReg(); 642*0b57cec5SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 643*0b57cec5SDimitry Andric if (SrcTy.isVector()) 644*0b57cec5SDimitry Andric return UnableToLegalize; 645*0b57cec5SDimitry Andric 646*0b57cec5SDimitry Andric int NumParts = SizeOp0 / NarrowSize; 647*0b57cec5SDimitry Andric unsigned HandledSize = NumParts * NarrowTy.getSizeInBits(); 648*0b57cec5SDimitry Andric unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize; 649*0b57cec5SDimitry Andric if (SrcTy.isVector() && LeftoverBits != 0) 650*0b57cec5SDimitry Andric return UnableToLegalize; 651*0b57cec5SDimitry Andric 652*0b57cec5SDimitry Andric if (8 * MMO.getSize() != SrcTy.getSizeInBits()) { 653*0b57cec5SDimitry Andric Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 654*0b57cec5SDimitry Andric auto &MMO = **MI.memoperands_begin(); 655*0b57cec5SDimitry Andric MIRBuilder.buildTrunc(TmpReg, SrcReg); 656*0b57cec5SDimitry Andric MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO); 657*0b57cec5SDimitry Andric MI.eraseFromParent(); 658*0b57cec5SDimitry Andric return Legalized; 659*0b57cec5SDimitry Andric } 660*0b57cec5SDimitry Andric 661*0b57cec5SDimitry Andric return reduceLoadStoreWidth(MI, 0, NarrowTy); 662*0b57cec5SDimitry Andric } 663*0b57cec5SDimitry Andric case TargetOpcode::G_SELECT: 664*0b57cec5SDimitry Andric return narrowScalarSelect(MI, TypeIdx, NarrowTy); 665*0b57cec5SDimitry Andric case TargetOpcode::G_AND: 666*0b57cec5SDimitry Andric case TargetOpcode::G_OR: 667*0b57cec5SDimitry Andric case TargetOpcode::G_XOR: { 668*0b57cec5SDimitry Andric // Legalize bitwise operation: 669*0b57cec5SDimitry Andric // A = BinOp<Ty> B, C 670*0b57cec5SDimitry Andric // into: 671*0b57cec5SDimitry Andric // B1, ..., BN = G_UNMERGE_VALUES B 672*0b57cec5SDimitry Andric // C1, ..., CN = G_UNMERGE_VALUES C 673*0b57cec5SDimitry Andric // A1 = BinOp<Ty/N> B1, C2 674*0b57cec5SDimitry Andric // ... 675*0b57cec5SDimitry Andric // AN = BinOp<Ty/N> BN, CN 676*0b57cec5SDimitry Andric // A = G_MERGE_VALUES A1, ..., AN 677*0b57cec5SDimitry Andric return narrowScalarBasic(MI, TypeIdx, NarrowTy); 678*0b57cec5SDimitry Andric } 679*0b57cec5SDimitry Andric case TargetOpcode::G_SHL: 680*0b57cec5SDimitry Andric case TargetOpcode::G_LSHR: 681*0b57cec5SDimitry Andric case TargetOpcode::G_ASHR: 682*0b57cec5SDimitry Andric return narrowScalarShift(MI, TypeIdx, NarrowTy); 683*0b57cec5SDimitry Andric case TargetOpcode::G_CTLZ: 684*0b57cec5SDimitry Andric case TargetOpcode::G_CTLZ_ZERO_UNDEF: 685*0b57cec5SDimitry Andric case TargetOpcode::G_CTTZ: 686*0b57cec5SDimitry Andric case TargetOpcode::G_CTTZ_ZERO_UNDEF: 687*0b57cec5SDimitry Andric case TargetOpcode::G_CTPOP: 688*0b57cec5SDimitry Andric if (TypeIdx != 0) 689*0b57cec5SDimitry Andric return UnableToLegalize; // TODO 690*0b57cec5SDimitry Andric 691*0b57cec5SDimitry Andric Observer.changingInstr(MI); 692*0b57cec5SDimitry Andric narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 693*0b57cec5SDimitry Andric Observer.changedInstr(MI); 694*0b57cec5SDimitry Andric return Legalized; 695*0b57cec5SDimitry Andric case TargetOpcode::G_INTTOPTR: 696*0b57cec5SDimitry Andric if (TypeIdx != 1) 697*0b57cec5SDimitry Andric return UnableToLegalize; 698*0b57cec5SDimitry Andric 699*0b57cec5SDimitry Andric Observer.changingInstr(MI); 700*0b57cec5SDimitry Andric narrowScalarSrc(MI, NarrowTy, 1); 701*0b57cec5SDimitry Andric Observer.changedInstr(MI); 702*0b57cec5SDimitry Andric return Legalized; 703*0b57cec5SDimitry Andric case TargetOpcode::G_PTRTOINT: 704*0b57cec5SDimitry Andric if (TypeIdx != 0) 705*0b57cec5SDimitry Andric return UnableToLegalize; 706*0b57cec5SDimitry Andric 707*0b57cec5SDimitry Andric Observer.changingInstr(MI); 708*0b57cec5SDimitry Andric narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 709*0b57cec5SDimitry Andric Observer.changedInstr(MI); 710*0b57cec5SDimitry Andric return Legalized; 711*0b57cec5SDimitry Andric case TargetOpcode::G_PHI: { 712*0b57cec5SDimitry Andric unsigned NumParts = SizeOp0 / NarrowSize; 713*0b57cec5SDimitry Andric SmallVector<Register, 2> DstRegs; 714*0b57cec5SDimitry Andric SmallVector<SmallVector<Register, 2>, 2> SrcRegs; 715*0b57cec5SDimitry Andric DstRegs.resize(NumParts); 716*0b57cec5SDimitry Andric SrcRegs.resize(MI.getNumOperands() / 2); 717*0b57cec5SDimitry Andric Observer.changingInstr(MI); 718*0b57cec5SDimitry Andric for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { 719*0b57cec5SDimitry Andric MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB(); 720*0b57cec5SDimitry Andric MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 721*0b57cec5SDimitry Andric extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts, 722*0b57cec5SDimitry Andric SrcRegs[i / 2]); 723*0b57cec5SDimitry Andric } 724*0b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 725*0b57cec5SDimitry Andric MIRBuilder.setInsertPt(MBB, MI); 726*0b57cec5SDimitry Andric for (unsigned i = 0; i < NumParts; ++i) { 727*0b57cec5SDimitry Andric DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy); 728*0b57cec5SDimitry Andric MachineInstrBuilder MIB = 729*0b57cec5SDimitry Andric MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]); 730*0b57cec5SDimitry Andric for (unsigned j = 1; j < MI.getNumOperands(); j += 2) 731*0b57cec5SDimitry Andric MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1)); 732*0b57cec5SDimitry Andric } 733*0b57cec5SDimitry Andric MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 734*0b57cec5SDimitry Andric MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 735*0b57cec5SDimitry Andric Observer.changedInstr(MI); 736*0b57cec5SDimitry Andric MI.eraseFromParent(); 737*0b57cec5SDimitry Andric return Legalized; 738*0b57cec5SDimitry Andric } 739*0b57cec5SDimitry Andric case TargetOpcode::G_EXTRACT_VECTOR_ELT: 740*0b57cec5SDimitry Andric case TargetOpcode::G_INSERT_VECTOR_ELT: { 741*0b57cec5SDimitry Andric if (TypeIdx != 2) 742*0b57cec5SDimitry Andric return UnableToLegalize; 743*0b57cec5SDimitry Andric 744*0b57cec5SDimitry Andric int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3; 745*0b57cec5SDimitry Andric Observer.changingInstr(MI); 746*0b57cec5SDimitry Andric narrowScalarSrc(MI, NarrowTy, OpIdx); 747*0b57cec5SDimitry Andric Observer.changedInstr(MI); 748*0b57cec5SDimitry Andric return Legalized; 749*0b57cec5SDimitry Andric } 750*0b57cec5SDimitry Andric case TargetOpcode::G_ICMP: { 751*0b57cec5SDimitry Andric uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); 752*0b57cec5SDimitry Andric if (NarrowSize * 2 != SrcSize) 753*0b57cec5SDimitry Andric return UnableToLegalize; 754*0b57cec5SDimitry Andric 755*0b57cec5SDimitry Andric Observer.changingInstr(MI); 756*0b57cec5SDimitry Andric Register LHSL = MRI.createGenericVirtualRegister(NarrowTy); 757*0b57cec5SDimitry Andric Register LHSH = MRI.createGenericVirtualRegister(NarrowTy); 758*0b57cec5SDimitry Andric MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg()); 759*0b57cec5SDimitry Andric 760*0b57cec5SDimitry Andric Register RHSL = MRI.createGenericVirtualRegister(NarrowTy); 761*0b57cec5SDimitry Andric Register RHSH = MRI.createGenericVirtualRegister(NarrowTy); 762*0b57cec5SDimitry Andric MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg()); 763*0b57cec5SDimitry Andric 764*0b57cec5SDimitry Andric CmpInst::Predicate Pred = 765*0b57cec5SDimitry Andric static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 766*0b57cec5SDimitry Andric 767*0b57cec5SDimitry Andric if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { 768*0b57cec5SDimitry Andric MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL); 769*0b57cec5SDimitry Andric MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH); 770*0b57cec5SDimitry Andric MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH); 771*0b57cec5SDimitry Andric MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0); 772*0b57cec5SDimitry Andric MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero); 773*0b57cec5SDimitry Andric } else { 774*0b57cec5SDimitry Andric const LLT s1 = LLT::scalar(1); 775*0b57cec5SDimitry Andric MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, s1, LHSH, RHSH); 776*0b57cec5SDimitry Andric MachineInstrBuilder CmpHEQ = 777*0b57cec5SDimitry Andric MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, s1, LHSH, RHSH); 778*0b57cec5SDimitry Andric MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( 779*0b57cec5SDimitry Andric ICmpInst::getUnsignedPredicate(Pred), s1, LHSL, RHSL); 780*0b57cec5SDimitry Andric MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH); 781*0b57cec5SDimitry Andric } 782*0b57cec5SDimitry Andric Observer.changedInstr(MI); 783*0b57cec5SDimitry Andric MI.eraseFromParent(); 784*0b57cec5SDimitry Andric return Legalized; 785*0b57cec5SDimitry Andric } 786*0b57cec5SDimitry Andric } 787*0b57cec5SDimitry Andric } 788*0b57cec5SDimitry Andric 789*0b57cec5SDimitry Andric void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, 790*0b57cec5SDimitry Andric unsigned OpIdx, unsigned ExtOpcode) { 791*0b57cec5SDimitry Andric MachineOperand &MO = MI.getOperand(OpIdx); 792*0b57cec5SDimitry Andric auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()}); 793*0b57cec5SDimitry Andric MO.setReg(ExtB->getOperand(0).getReg()); 794*0b57cec5SDimitry Andric } 795*0b57cec5SDimitry Andric 796*0b57cec5SDimitry Andric void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, 797*0b57cec5SDimitry Andric unsigned OpIdx) { 798*0b57cec5SDimitry Andric MachineOperand &MO = MI.getOperand(OpIdx); 799*0b57cec5SDimitry Andric auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy}, 800*0b57cec5SDimitry Andric {MO.getReg()}); 801*0b57cec5SDimitry Andric MO.setReg(ExtB->getOperand(0).getReg()); 802*0b57cec5SDimitry Andric } 803*0b57cec5SDimitry Andric 804*0b57cec5SDimitry Andric void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, 805*0b57cec5SDimitry Andric unsigned OpIdx, unsigned TruncOpcode) { 806*0b57cec5SDimitry Andric MachineOperand &MO = MI.getOperand(OpIdx); 807*0b57cec5SDimitry Andric Register DstExt = MRI.createGenericVirtualRegister(WideTy); 808*0b57cec5SDimitry Andric MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 809*0b57cec5SDimitry Andric MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt}); 810*0b57cec5SDimitry Andric MO.setReg(DstExt); 811*0b57cec5SDimitry Andric } 812*0b57cec5SDimitry Andric 813*0b57cec5SDimitry Andric void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy, 814*0b57cec5SDimitry Andric unsigned OpIdx, unsigned ExtOpcode) { 815*0b57cec5SDimitry Andric MachineOperand &MO = MI.getOperand(OpIdx); 816*0b57cec5SDimitry Andric Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy); 817*0b57cec5SDimitry Andric MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 818*0b57cec5SDimitry Andric MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc}); 819*0b57cec5SDimitry Andric MO.setReg(DstTrunc); 820*0b57cec5SDimitry Andric } 821*0b57cec5SDimitry Andric 822*0b57cec5SDimitry Andric void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, 823*0b57cec5SDimitry Andric unsigned OpIdx) { 824*0b57cec5SDimitry Andric MachineOperand &MO = MI.getOperand(OpIdx); 825*0b57cec5SDimitry Andric Register DstExt = MRI.createGenericVirtualRegister(WideTy); 826*0b57cec5SDimitry Andric MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 827*0b57cec5SDimitry Andric MIRBuilder.buildExtract(MO.getReg(), DstExt, 0); 828*0b57cec5SDimitry Andric MO.setReg(DstExt); 829*0b57cec5SDimitry Andric } 830*0b57cec5SDimitry Andric 831*0b57cec5SDimitry Andric void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, 832*0b57cec5SDimitry Andric unsigned OpIdx) { 833*0b57cec5SDimitry Andric MachineOperand &MO = MI.getOperand(OpIdx); 834*0b57cec5SDimitry Andric 835*0b57cec5SDimitry Andric LLT OldTy = MRI.getType(MO.getReg()); 836*0b57cec5SDimitry Andric unsigned OldElts = OldTy.getNumElements(); 837*0b57cec5SDimitry Andric unsigned NewElts = MoreTy.getNumElements(); 838*0b57cec5SDimitry Andric 839*0b57cec5SDimitry Andric unsigned NumParts = NewElts / OldElts; 840*0b57cec5SDimitry Andric 841*0b57cec5SDimitry Andric // Use concat_vectors if the result is a multiple of the number of elements. 842*0b57cec5SDimitry Andric if (NumParts * OldElts == NewElts) { 843*0b57cec5SDimitry Andric SmallVector<Register, 8> Parts; 844*0b57cec5SDimitry Andric Parts.push_back(MO.getReg()); 845*0b57cec5SDimitry Andric 846*0b57cec5SDimitry Andric Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0); 847*0b57cec5SDimitry Andric for (unsigned I = 1; I != NumParts; ++I) 848*0b57cec5SDimitry Andric Parts.push_back(ImpDef); 849*0b57cec5SDimitry Andric 850*0b57cec5SDimitry Andric auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts); 851*0b57cec5SDimitry Andric MO.setReg(Concat.getReg(0)); 852*0b57cec5SDimitry Andric return; 853*0b57cec5SDimitry Andric } 854*0b57cec5SDimitry Andric 855*0b57cec5SDimitry Andric Register MoreReg = MRI.createGenericVirtualRegister(MoreTy); 856*0b57cec5SDimitry Andric Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0); 857*0b57cec5SDimitry Andric MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0); 858*0b57cec5SDimitry Andric MO.setReg(MoreReg); 859*0b57cec5SDimitry Andric } 860*0b57cec5SDimitry Andric 861*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 862*0b57cec5SDimitry Andric LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, 863*0b57cec5SDimitry Andric LLT WideTy) { 864*0b57cec5SDimitry Andric if (TypeIdx != 1) 865*0b57cec5SDimitry Andric return UnableToLegalize; 866*0b57cec5SDimitry Andric 867*0b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 868*0b57cec5SDimitry Andric LLT DstTy = MRI.getType(DstReg); 869*0b57cec5SDimitry Andric if (DstTy.isVector()) 870*0b57cec5SDimitry Andric return UnableToLegalize; 871*0b57cec5SDimitry Andric 872*0b57cec5SDimitry Andric Register Src1 = MI.getOperand(1).getReg(); 873*0b57cec5SDimitry Andric LLT SrcTy = MRI.getType(Src1); 874*0b57cec5SDimitry Andric const int DstSize = DstTy.getSizeInBits(); 875*0b57cec5SDimitry Andric const int SrcSize = SrcTy.getSizeInBits(); 876*0b57cec5SDimitry Andric const int WideSize = WideTy.getSizeInBits(); 877*0b57cec5SDimitry Andric const int NumMerge = (DstSize + WideSize - 1) / WideSize; 878*0b57cec5SDimitry Andric 879*0b57cec5SDimitry Andric unsigned NumOps = MI.getNumOperands(); 880*0b57cec5SDimitry Andric unsigned NumSrc = MI.getNumOperands() - 1; 881*0b57cec5SDimitry Andric unsigned PartSize = DstTy.getSizeInBits() / NumSrc; 882*0b57cec5SDimitry Andric 883*0b57cec5SDimitry Andric if (WideSize >= DstSize) { 884*0b57cec5SDimitry Andric // Directly pack the bits in the target type. 885*0b57cec5SDimitry Andric Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0); 886*0b57cec5SDimitry Andric 887*0b57cec5SDimitry Andric for (unsigned I = 2; I != NumOps; ++I) { 888*0b57cec5SDimitry Andric const unsigned Offset = (I - 1) * PartSize; 889*0b57cec5SDimitry Andric 890*0b57cec5SDimitry Andric Register SrcReg = MI.getOperand(I).getReg(); 891*0b57cec5SDimitry Andric assert(MRI.getType(SrcReg) == LLT::scalar(PartSize)); 892*0b57cec5SDimitry Andric 893*0b57cec5SDimitry Andric auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); 894*0b57cec5SDimitry Andric 895*0b57cec5SDimitry Andric Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg : 896*0b57cec5SDimitry Andric MRI.createGenericVirtualRegister(WideTy); 897*0b57cec5SDimitry Andric 898*0b57cec5SDimitry Andric auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); 899*0b57cec5SDimitry Andric auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt); 900*0b57cec5SDimitry Andric MIRBuilder.buildOr(NextResult, ResultReg, Shl); 901*0b57cec5SDimitry Andric ResultReg = NextResult; 902*0b57cec5SDimitry Andric } 903*0b57cec5SDimitry Andric 904*0b57cec5SDimitry Andric if (WideSize > DstSize) 905*0b57cec5SDimitry Andric MIRBuilder.buildTrunc(DstReg, ResultReg); 906*0b57cec5SDimitry Andric 907*0b57cec5SDimitry Andric MI.eraseFromParent(); 908*0b57cec5SDimitry Andric return Legalized; 909*0b57cec5SDimitry Andric } 910*0b57cec5SDimitry Andric 911*0b57cec5SDimitry Andric // Unmerge the original values to the GCD type, and recombine to the next 912*0b57cec5SDimitry Andric // multiple greater than the original type. 913*0b57cec5SDimitry Andric // 914*0b57cec5SDimitry Andric // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6 915*0b57cec5SDimitry Andric // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0 916*0b57cec5SDimitry Andric // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1 917*0b57cec5SDimitry Andric // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2 918*0b57cec5SDimitry Andric // %10:_(s6) = G_MERGE_VALUES %4, %5, %6 919*0b57cec5SDimitry Andric // %11:_(s6) = G_MERGE_VALUES %7, %8, %9 920*0b57cec5SDimitry Andric // %12:_(s12) = G_MERGE_VALUES %10, %11 921*0b57cec5SDimitry Andric // 922*0b57cec5SDimitry Andric // Padding with undef if necessary: 923*0b57cec5SDimitry Andric // 924*0b57cec5SDimitry Andric // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6 925*0b57cec5SDimitry Andric // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0 926*0b57cec5SDimitry Andric // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1 927*0b57cec5SDimitry Andric // %7:_(s2) = G_IMPLICIT_DEF 928*0b57cec5SDimitry Andric // %8:_(s6) = G_MERGE_VALUES %3, %4, %5 929*0b57cec5SDimitry Andric // %9:_(s6) = G_MERGE_VALUES %6, %7, %7 930*0b57cec5SDimitry Andric // %10:_(s12) = G_MERGE_VALUES %8, %9 931*0b57cec5SDimitry Andric 932*0b57cec5SDimitry Andric const int GCD = greatestCommonDivisor(SrcSize, WideSize); 933*0b57cec5SDimitry Andric LLT GCDTy = LLT::scalar(GCD); 934*0b57cec5SDimitry Andric 935*0b57cec5SDimitry Andric SmallVector<Register, 8> Parts; 936*0b57cec5SDimitry Andric SmallVector<Register, 8> NewMergeRegs; 937*0b57cec5SDimitry Andric SmallVector<Register, 8> Unmerges; 938*0b57cec5SDimitry Andric LLT WideDstTy = LLT::scalar(NumMerge * WideSize); 939*0b57cec5SDimitry Andric 940*0b57cec5SDimitry Andric // Decompose the original operands if they don't evenly divide. 941*0b57cec5SDimitry Andric for (int I = 1, E = MI.getNumOperands(); I != E; ++I) { 942*0b57cec5SDimitry Andric Register SrcReg = MI.getOperand(I).getReg(); 943*0b57cec5SDimitry Andric if (GCD == SrcSize) { 944*0b57cec5SDimitry Andric Unmerges.push_back(SrcReg); 945*0b57cec5SDimitry Andric } else { 946*0b57cec5SDimitry Andric auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); 947*0b57cec5SDimitry Andric for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J) 948*0b57cec5SDimitry Andric Unmerges.push_back(Unmerge.getReg(J)); 949*0b57cec5SDimitry Andric } 950*0b57cec5SDimitry Andric } 951*0b57cec5SDimitry Andric 952*0b57cec5SDimitry Andric // Pad with undef to the next size that is a multiple of the requested size. 953*0b57cec5SDimitry Andric if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) { 954*0b57cec5SDimitry Andric Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0); 955*0b57cec5SDimitry Andric for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I) 956*0b57cec5SDimitry Andric Unmerges.push_back(UndefReg); 957*0b57cec5SDimitry Andric } 958*0b57cec5SDimitry Andric 959*0b57cec5SDimitry Andric const int PartsPerGCD = WideSize / GCD; 960*0b57cec5SDimitry Andric 961*0b57cec5SDimitry Andric // Build merges of each piece. 962*0b57cec5SDimitry Andric ArrayRef<Register> Slicer(Unmerges); 963*0b57cec5SDimitry Andric for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) { 964*0b57cec5SDimitry Andric auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD)); 965*0b57cec5SDimitry Andric NewMergeRegs.push_back(Merge.getReg(0)); 966*0b57cec5SDimitry Andric } 967*0b57cec5SDimitry Andric 968*0b57cec5SDimitry Andric // A truncate may be necessary if the requested type doesn't evenly divide the 969*0b57cec5SDimitry Andric // original result type. 970*0b57cec5SDimitry Andric if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) { 971*0b57cec5SDimitry Andric MIRBuilder.buildMerge(DstReg, NewMergeRegs); 972*0b57cec5SDimitry Andric } else { 973*0b57cec5SDimitry Andric auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs); 974*0b57cec5SDimitry Andric MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0)); 975*0b57cec5SDimitry Andric } 976*0b57cec5SDimitry Andric 977*0b57cec5SDimitry Andric MI.eraseFromParent(); 978*0b57cec5SDimitry Andric return Legalized; 979*0b57cec5SDimitry Andric } 980*0b57cec5SDimitry Andric 981*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 982*0b57cec5SDimitry Andric LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, 983*0b57cec5SDimitry Andric LLT WideTy) { 984*0b57cec5SDimitry Andric if (TypeIdx != 0) 985*0b57cec5SDimitry Andric return UnableToLegalize; 986*0b57cec5SDimitry Andric 987*0b57cec5SDimitry Andric unsigned NumDst = MI.getNumOperands() - 1; 988*0b57cec5SDimitry Andric Register SrcReg = MI.getOperand(NumDst).getReg(); 989*0b57cec5SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 990*0b57cec5SDimitry Andric if (!SrcTy.isScalar()) 991*0b57cec5SDimitry Andric return UnableToLegalize; 992*0b57cec5SDimitry Andric 993*0b57cec5SDimitry Andric Register Dst0Reg = MI.getOperand(0).getReg(); 994*0b57cec5SDimitry Andric LLT DstTy = MRI.getType(Dst0Reg); 995*0b57cec5SDimitry Andric if (!DstTy.isScalar()) 996*0b57cec5SDimitry Andric return UnableToLegalize; 997*0b57cec5SDimitry Andric 998*0b57cec5SDimitry Andric unsigned NewSrcSize = NumDst * WideTy.getSizeInBits(); 999*0b57cec5SDimitry Andric LLT NewSrcTy = LLT::scalar(NewSrcSize); 1000*0b57cec5SDimitry Andric unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits(); 1001*0b57cec5SDimitry Andric 1002*0b57cec5SDimitry Andric auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg); 1003*0b57cec5SDimitry Andric 1004*0b57cec5SDimitry Andric for (unsigned I = 1; I != NumDst; ++I) { 1005*0b57cec5SDimitry Andric auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I); 1006*0b57cec5SDimitry Andric auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt); 1007*0b57cec5SDimitry Andric WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl); 1008*0b57cec5SDimitry Andric } 1009*0b57cec5SDimitry Andric 1010*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1011*0b57cec5SDimitry Andric 1012*0b57cec5SDimitry Andric MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg()); 1013*0b57cec5SDimitry Andric for (unsigned I = 0; I != NumDst; ++I) 1014*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy, I); 1015*0b57cec5SDimitry Andric 1016*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1017*0b57cec5SDimitry Andric 1018*0b57cec5SDimitry Andric return Legalized; 1019*0b57cec5SDimitry Andric } 1020*0b57cec5SDimitry Andric 1021*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 1022*0b57cec5SDimitry Andric LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, 1023*0b57cec5SDimitry Andric LLT WideTy) { 1024*0b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1025*0b57cec5SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1026*0b57cec5SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 1027*0b57cec5SDimitry Andric 1028*0b57cec5SDimitry Andric LLT DstTy = MRI.getType(DstReg); 1029*0b57cec5SDimitry Andric unsigned Offset = MI.getOperand(2).getImm(); 1030*0b57cec5SDimitry Andric 1031*0b57cec5SDimitry Andric if (TypeIdx == 0) { 1032*0b57cec5SDimitry Andric if (SrcTy.isVector() || DstTy.isVector()) 1033*0b57cec5SDimitry Andric return UnableToLegalize; 1034*0b57cec5SDimitry Andric 1035*0b57cec5SDimitry Andric SrcOp Src(SrcReg); 1036*0b57cec5SDimitry Andric if (SrcTy.isPointer()) { 1037*0b57cec5SDimitry Andric // Extracts from pointers can be handled only if they are really just 1038*0b57cec5SDimitry Andric // simple integers. 1039*0b57cec5SDimitry Andric const DataLayout &DL = MIRBuilder.getDataLayout(); 1040*0b57cec5SDimitry Andric if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) 1041*0b57cec5SDimitry Andric return UnableToLegalize; 1042*0b57cec5SDimitry Andric 1043*0b57cec5SDimitry Andric LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits()); 1044*0b57cec5SDimitry Andric Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src); 1045*0b57cec5SDimitry Andric SrcTy = SrcAsIntTy; 1046*0b57cec5SDimitry Andric } 1047*0b57cec5SDimitry Andric 1048*0b57cec5SDimitry Andric if (DstTy.isPointer()) 1049*0b57cec5SDimitry Andric return UnableToLegalize; 1050*0b57cec5SDimitry Andric 1051*0b57cec5SDimitry Andric if (Offset == 0) { 1052*0b57cec5SDimitry Andric // Avoid a shift in the degenerate case. 1053*0b57cec5SDimitry Andric MIRBuilder.buildTrunc(DstReg, 1054*0b57cec5SDimitry Andric MIRBuilder.buildAnyExtOrTrunc(WideTy, Src)); 1055*0b57cec5SDimitry Andric MI.eraseFromParent(); 1056*0b57cec5SDimitry Andric return Legalized; 1057*0b57cec5SDimitry Andric } 1058*0b57cec5SDimitry Andric 1059*0b57cec5SDimitry Andric // Do a shift in the source type. 1060*0b57cec5SDimitry Andric LLT ShiftTy = SrcTy; 1061*0b57cec5SDimitry Andric if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { 1062*0b57cec5SDimitry Andric Src = MIRBuilder.buildAnyExt(WideTy, Src); 1063*0b57cec5SDimitry Andric ShiftTy = WideTy; 1064*0b57cec5SDimitry Andric } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) 1065*0b57cec5SDimitry Andric return UnableToLegalize; 1066*0b57cec5SDimitry Andric 1067*0b57cec5SDimitry Andric auto LShr = MIRBuilder.buildLShr( 1068*0b57cec5SDimitry Andric ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset)); 1069*0b57cec5SDimitry Andric MIRBuilder.buildTrunc(DstReg, LShr); 1070*0b57cec5SDimitry Andric MI.eraseFromParent(); 1071*0b57cec5SDimitry Andric return Legalized; 1072*0b57cec5SDimitry Andric } 1073*0b57cec5SDimitry Andric 1074*0b57cec5SDimitry Andric if (SrcTy.isScalar()) { 1075*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1076*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1077*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1078*0b57cec5SDimitry Andric return Legalized; 1079*0b57cec5SDimitry Andric } 1080*0b57cec5SDimitry Andric 1081*0b57cec5SDimitry Andric if (!SrcTy.isVector()) 1082*0b57cec5SDimitry Andric return UnableToLegalize; 1083*0b57cec5SDimitry Andric 1084*0b57cec5SDimitry Andric if (DstTy != SrcTy.getElementType()) 1085*0b57cec5SDimitry Andric return UnableToLegalize; 1086*0b57cec5SDimitry Andric 1087*0b57cec5SDimitry Andric if (Offset % SrcTy.getScalarSizeInBits() != 0) 1088*0b57cec5SDimitry Andric return UnableToLegalize; 1089*0b57cec5SDimitry Andric 1090*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1091*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1092*0b57cec5SDimitry Andric 1093*0b57cec5SDimitry Andric MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) * 1094*0b57cec5SDimitry Andric Offset); 1095*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy.getScalarType(), 0); 1096*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1097*0b57cec5SDimitry Andric return Legalized; 1098*0b57cec5SDimitry Andric } 1099*0b57cec5SDimitry Andric 1100*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 1101*0b57cec5SDimitry Andric LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, 1102*0b57cec5SDimitry Andric LLT WideTy) { 1103*0b57cec5SDimitry Andric if (TypeIdx != 0) 1104*0b57cec5SDimitry Andric return UnableToLegalize; 1105*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1106*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1107*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy); 1108*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1109*0b57cec5SDimitry Andric return Legalized; 1110*0b57cec5SDimitry Andric } 1111*0b57cec5SDimitry Andric 1112*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 1113*0b57cec5SDimitry Andric LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { 1114*0b57cec5SDimitry Andric MIRBuilder.setInstr(MI); 1115*0b57cec5SDimitry Andric 1116*0b57cec5SDimitry Andric switch (MI.getOpcode()) { 1117*0b57cec5SDimitry Andric default: 1118*0b57cec5SDimitry Andric return UnableToLegalize; 1119*0b57cec5SDimitry Andric case TargetOpcode::G_EXTRACT: 1120*0b57cec5SDimitry Andric return widenScalarExtract(MI, TypeIdx, WideTy); 1121*0b57cec5SDimitry Andric case TargetOpcode::G_INSERT: 1122*0b57cec5SDimitry Andric return widenScalarInsert(MI, TypeIdx, WideTy); 1123*0b57cec5SDimitry Andric case TargetOpcode::G_MERGE_VALUES: 1124*0b57cec5SDimitry Andric return widenScalarMergeValues(MI, TypeIdx, WideTy); 1125*0b57cec5SDimitry Andric case TargetOpcode::G_UNMERGE_VALUES: 1126*0b57cec5SDimitry Andric return widenScalarUnmergeValues(MI, TypeIdx, WideTy); 1127*0b57cec5SDimitry Andric case TargetOpcode::G_UADDO: 1128*0b57cec5SDimitry Andric case TargetOpcode::G_USUBO: { 1129*0b57cec5SDimitry Andric if (TypeIdx == 1) 1130*0b57cec5SDimitry Andric return UnableToLegalize; // TODO 1131*0b57cec5SDimitry Andric auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 1132*0b57cec5SDimitry Andric {MI.getOperand(2).getReg()}); 1133*0b57cec5SDimitry Andric auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 1134*0b57cec5SDimitry Andric {MI.getOperand(3).getReg()}); 1135*0b57cec5SDimitry Andric unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO 1136*0b57cec5SDimitry Andric ? TargetOpcode::G_ADD 1137*0b57cec5SDimitry Andric : TargetOpcode::G_SUB; 1138*0b57cec5SDimitry Andric // Do the arithmetic in the larger type. 1139*0b57cec5SDimitry Andric auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext}); 1140*0b57cec5SDimitry Andric LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); 1141*0b57cec5SDimitry Andric APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits()); 1142*0b57cec5SDimitry Andric auto AndOp = MIRBuilder.buildInstr( 1143*0b57cec5SDimitry Andric TargetOpcode::G_AND, {WideTy}, 1144*0b57cec5SDimitry Andric {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())}); 1145*0b57cec5SDimitry Andric // There is no overflow if the AndOp is the same as NewOp. 1146*0b57cec5SDimitry Andric MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp, 1147*0b57cec5SDimitry Andric AndOp); 1148*0b57cec5SDimitry Andric // Now trunc the NewOp to the original result. 1149*0b57cec5SDimitry Andric MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp); 1150*0b57cec5SDimitry Andric MI.eraseFromParent(); 1151*0b57cec5SDimitry Andric return Legalized; 1152*0b57cec5SDimitry Andric } 1153*0b57cec5SDimitry Andric case TargetOpcode::G_CTTZ: 1154*0b57cec5SDimitry Andric case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1155*0b57cec5SDimitry Andric case TargetOpcode::G_CTLZ: 1156*0b57cec5SDimitry Andric case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1157*0b57cec5SDimitry Andric case TargetOpcode::G_CTPOP: { 1158*0b57cec5SDimitry Andric if (TypeIdx == 0) { 1159*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1160*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy, 0); 1161*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1162*0b57cec5SDimitry Andric return Legalized; 1163*0b57cec5SDimitry Andric } 1164*0b57cec5SDimitry Andric 1165*0b57cec5SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1166*0b57cec5SDimitry Andric 1167*0b57cec5SDimitry Andric // First ZEXT the input. 1168*0b57cec5SDimitry Andric auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg); 1169*0b57cec5SDimitry Andric LLT CurTy = MRI.getType(SrcReg); 1170*0b57cec5SDimitry Andric if (MI.getOpcode() == TargetOpcode::G_CTTZ) { 1171*0b57cec5SDimitry Andric // The count is the same in the larger type except if the original 1172*0b57cec5SDimitry Andric // value was zero. This can be handled by setting the bit just off 1173*0b57cec5SDimitry Andric // the top of the original type. 1174*0b57cec5SDimitry Andric auto TopBit = 1175*0b57cec5SDimitry Andric APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); 1176*0b57cec5SDimitry Andric MIBSrc = MIRBuilder.buildOr( 1177*0b57cec5SDimitry Andric WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit)); 1178*0b57cec5SDimitry Andric } 1179*0b57cec5SDimitry Andric 1180*0b57cec5SDimitry Andric // Perform the operation at the larger size. 1181*0b57cec5SDimitry Andric auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); 1182*0b57cec5SDimitry Andric // This is already the correct result for CTPOP and CTTZs 1183*0b57cec5SDimitry Andric if (MI.getOpcode() == TargetOpcode::G_CTLZ || 1184*0b57cec5SDimitry Andric MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { 1185*0b57cec5SDimitry Andric // The correct result is NewOp - (Difference in widety and current ty). 1186*0b57cec5SDimitry Andric unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); 1187*0b57cec5SDimitry Andric MIBNewOp = MIRBuilder.buildInstr( 1188*0b57cec5SDimitry Andric TargetOpcode::G_SUB, {WideTy}, 1189*0b57cec5SDimitry Andric {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)}); 1190*0b57cec5SDimitry Andric } 1191*0b57cec5SDimitry Andric 1192*0b57cec5SDimitry Andric MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp); 1193*0b57cec5SDimitry Andric MI.eraseFromParent(); 1194*0b57cec5SDimitry Andric return Legalized; 1195*0b57cec5SDimitry Andric } 1196*0b57cec5SDimitry Andric case TargetOpcode::G_BSWAP: { 1197*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1198*0b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1199*0b57cec5SDimitry Andric 1200*0b57cec5SDimitry Andric Register ShrReg = MRI.createGenericVirtualRegister(WideTy); 1201*0b57cec5SDimitry Andric Register DstExt = MRI.createGenericVirtualRegister(WideTy); 1202*0b57cec5SDimitry Andric Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy); 1203*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1204*0b57cec5SDimitry Andric 1205*0b57cec5SDimitry Andric MI.getOperand(0).setReg(DstExt); 1206*0b57cec5SDimitry Andric 1207*0b57cec5SDimitry Andric MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1208*0b57cec5SDimitry Andric 1209*0b57cec5SDimitry Andric LLT Ty = MRI.getType(DstReg); 1210*0b57cec5SDimitry Andric unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); 1211*0b57cec5SDimitry Andric MIRBuilder.buildConstant(ShiftAmtReg, DiffBits); 1212*0b57cec5SDimitry Andric MIRBuilder.buildInstr(TargetOpcode::G_LSHR) 1213*0b57cec5SDimitry Andric .addDef(ShrReg) 1214*0b57cec5SDimitry Andric .addUse(DstExt) 1215*0b57cec5SDimitry Andric .addUse(ShiftAmtReg); 1216*0b57cec5SDimitry Andric 1217*0b57cec5SDimitry Andric MIRBuilder.buildTrunc(DstReg, ShrReg); 1218*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1219*0b57cec5SDimitry Andric return Legalized; 1220*0b57cec5SDimitry Andric } 1221*0b57cec5SDimitry Andric case TargetOpcode::G_ADD: 1222*0b57cec5SDimitry Andric case TargetOpcode::G_AND: 1223*0b57cec5SDimitry Andric case TargetOpcode::G_MUL: 1224*0b57cec5SDimitry Andric case TargetOpcode::G_OR: 1225*0b57cec5SDimitry Andric case TargetOpcode::G_XOR: 1226*0b57cec5SDimitry Andric case TargetOpcode::G_SUB: 1227*0b57cec5SDimitry Andric // Perform operation at larger width (any extension is fines here, high bits 1228*0b57cec5SDimitry Andric // don't affect the result) and then truncate the result back to the 1229*0b57cec5SDimitry Andric // original type. 1230*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1231*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1232*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1233*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy); 1234*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1235*0b57cec5SDimitry Andric return Legalized; 1236*0b57cec5SDimitry Andric 1237*0b57cec5SDimitry Andric case TargetOpcode::G_SHL: 1238*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1239*0b57cec5SDimitry Andric 1240*0b57cec5SDimitry Andric if (TypeIdx == 0) { 1241*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1242*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy); 1243*0b57cec5SDimitry Andric } else { 1244*0b57cec5SDimitry Andric assert(TypeIdx == 1); 1245*0b57cec5SDimitry Andric // The "number of bits to shift" operand must preserve its value as an 1246*0b57cec5SDimitry Andric // unsigned integer: 1247*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1248*0b57cec5SDimitry Andric } 1249*0b57cec5SDimitry Andric 1250*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1251*0b57cec5SDimitry Andric return Legalized; 1252*0b57cec5SDimitry Andric 1253*0b57cec5SDimitry Andric case TargetOpcode::G_SDIV: 1254*0b57cec5SDimitry Andric case TargetOpcode::G_SREM: 1255*0b57cec5SDimitry Andric case TargetOpcode::G_SMIN: 1256*0b57cec5SDimitry Andric case TargetOpcode::G_SMAX: 1257*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1258*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 1259*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1260*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy); 1261*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1262*0b57cec5SDimitry Andric return Legalized; 1263*0b57cec5SDimitry Andric 1264*0b57cec5SDimitry Andric case TargetOpcode::G_ASHR: 1265*0b57cec5SDimitry Andric case TargetOpcode::G_LSHR: 1266*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1267*0b57cec5SDimitry Andric 1268*0b57cec5SDimitry Andric if (TypeIdx == 0) { 1269*0b57cec5SDimitry Andric unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ? 1270*0b57cec5SDimitry Andric TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 1271*0b57cec5SDimitry Andric 1272*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 1, CvtOp); 1273*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy); 1274*0b57cec5SDimitry Andric } else { 1275*0b57cec5SDimitry Andric assert(TypeIdx == 1); 1276*0b57cec5SDimitry Andric // The "number of bits to shift" operand must preserve its value as an 1277*0b57cec5SDimitry Andric // unsigned integer: 1278*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1279*0b57cec5SDimitry Andric } 1280*0b57cec5SDimitry Andric 1281*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1282*0b57cec5SDimitry Andric return Legalized; 1283*0b57cec5SDimitry Andric case TargetOpcode::G_UDIV: 1284*0b57cec5SDimitry Andric case TargetOpcode::G_UREM: 1285*0b57cec5SDimitry Andric case TargetOpcode::G_UMIN: 1286*0b57cec5SDimitry Andric case TargetOpcode::G_UMAX: 1287*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1288*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1289*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1290*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy); 1291*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1292*0b57cec5SDimitry Andric return Legalized; 1293*0b57cec5SDimitry Andric 1294*0b57cec5SDimitry Andric case TargetOpcode::G_SELECT: 1295*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1296*0b57cec5SDimitry Andric if (TypeIdx == 0) { 1297*0b57cec5SDimitry Andric // Perform operation at larger width (any extension is fine here, high 1298*0b57cec5SDimitry Andric // bits don't affect the result) and then truncate the result back to the 1299*0b57cec5SDimitry Andric // original type. 1300*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1301*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 1302*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy); 1303*0b57cec5SDimitry Andric } else { 1304*0b57cec5SDimitry Andric bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector(); 1305*0b57cec5SDimitry Andric // Explicit extension is required here since high bits affect the result. 1306*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false)); 1307*0b57cec5SDimitry Andric } 1308*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1309*0b57cec5SDimitry Andric return Legalized; 1310*0b57cec5SDimitry Andric 1311*0b57cec5SDimitry Andric case TargetOpcode::G_FPTOSI: 1312*0b57cec5SDimitry Andric case TargetOpcode::G_FPTOUI: 1313*0b57cec5SDimitry Andric if (TypeIdx != 0) 1314*0b57cec5SDimitry Andric return UnableToLegalize; 1315*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1316*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy); 1317*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1318*0b57cec5SDimitry Andric return Legalized; 1319*0b57cec5SDimitry Andric 1320*0b57cec5SDimitry Andric case TargetOpcode::G_SITOFP: 1321*0b57cec5SDimitry Andric if (TypeIdx != 1) 1322*0b57cec5SDimitry Andric return UnableToLegalize; 1323*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1324*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 1325*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1326*0b57cec5SDimitry Andric return Legalized; 1327*0b57cec5SDimitry Andric 1328*0b57cec5SDimitry Andric case TargetOpcode::G_UITOFP: 1329*0b57cec5SDimitry Andric if (TypeIdx != 1) 1330*0b57cec5SDimitry Andric return UnableToLegalize; 1331*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1332*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1333*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1334*0b57cec5SDimitry Andric return Legalized; 1335*0b57cec5SDimitry Andric 1336*0b57cec5SDimitry Andric case TargetOpcode::G_LOAD: 1337*0b57cec5SDimitry Andric case TargetOpcode::G_SEXTLOAD: 1338*0b57cec5SDimitry Andric case TargetOpcode::G_ZEXTLOAD: 1339*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1340*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy); 1341*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1342*0b57cec5SDimitry Andric return Legalized; 1343*0b57cec5SDimitry Andric 1344*0b57cec5SDimitry Andric case TargetOpcode::G_STORE: { 1345*0b57cec5SDimitry Andric if (TypeIdx != 0) 1346*0b57cec5SDimitry Andric return UnableToLegalize; 1347*0b57cec5SDimitry Andric 1348*0b57cec5SDimitry Andric LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 1349*0b57cec5SDimitry Andric if (!isPowerOf2_32(Ty.getSizeInBits())) 1350*0b57cec5SDimitry Andric return UnableToLegalize; 1351*0b57cec5SDimitry Andric 1352*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1353*0b57cec5SDimitry Andric 1354*0b57cec5SDimitry Andric unsigned ExtType = Ty.getScalarSizeInBits() == 1 ? 1355*0b57cec5SDimitry Andric TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT; 1356*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 0, ExtType); 1357*0b57cec5SDimitry Andric 1358*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1359*0b57cec5SDimitry Andric return Legalized; 1360*0b57cec5SDimitry Andric } 1361*0b57cec5SDimitry Andric case TargetOpcode::G_CONSTANT: { 1362*0b57cec5SDimitry Andric MachineOperand &SrcMO = MI.getOperand(1); 1363*0b57cec5SDimitry Andric LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1364*0b57cec5SDimitry Andric const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); 1365*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1366*0b57cec5SDimitry Andric SrcMO.setCImm(ConstantInt::get(Ctx, Val)); 1367*0b57cec5SDimitry Andric 1368*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy); 1369*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1370*0b57cec5SDimitry Andric return Legalized; 1371*0b57cec5SDimitry Andric } 1372*0b57cec5SDimitry Andric case TargetOpcode::G_FCONSTANT: { 1373*0b57cec5SDimitry Andric MachineOperand &SrcMO = MI.getOperand(1); 1374*0b57cec5SDimitry Andric LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1375*0b57cec5SDimitry Andric APFloat Val = SrcMO.getFPImm()->getValueAPF(); 1376*0b57cec5SDimitry Andric bool LosesInfo; 1377*0b57cec5SDimitry Andric switch (WideTy.getSizeInBits()) { 1378*0b57cec5SDimitry Andric case 32: 1379*0b57cec5SDimitry Andric Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, 1380*0b57cec5SDimitry Andric &LosesInfo); 1381*0b57cec5SDimitry Andric break; 1382*0b57cec5SDimitry Andric case 64: 1383*0b57cec5SDimitry Andric Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, 1384*0b57cec5SDimitry Andric &LosesInfo); 1385*0b57cec5SDimitry Andric break; 1386*0b57cec5SDimitry Andric default: 1387*0b57cec5SDimitry Andric return UnableToLegalize; 1388*0b57cec5SDimitry Andric } 1389*0b57cec5SDimitry Andric 1390*0b57cec5SDimitry Andric assert(!LosesInfo && "extend should always be lossless"); 1391*0b57cec5SDimitry Andric 1392*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1393*0b57cec5SDimitry Andric SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); 1394*0b57cec5SDimitry Andric 1395*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 1396*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1397*0b57cec5SDimitry Andric return Legalized; 1398*0b57cec5SDimitry Andric } 1399*0b57cec5SDimitry Andric case TargetOpcode::G_IMPLICIT_DEF: { 1400*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1401*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy); 1402*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1403*0b57cec5SDimitry Andric return Legalized; 1404*0b57cec5SDimitry Andric } 1405*0b57cec5SDimitry Andric case TargetOpcode::G_BRCOND: 1406*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1407*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false)); 1408*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1409*0b57cec5SDimitry Andric return Legalized; 1410*0b57cec5SDimitry Andric 1411*0b57cec5SDimitry Andric case TargetOpcode::G_FCMP: 1412*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1413*0b57cec5SDimitry Andric if (TypeIdx == 0) 1414*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy); 1415*0b57cec5SDimitry Andric else { 1416*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); 1417*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); 1418*0b57cec5SDimitry Andric } 1419*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1420*0b57cec5SDimitry Andric return Legalized; 1421*0b57cec5SDimitry Andric 1422*0b57cec5SDimitry Andric case TargetOpcode::G_ICMP: 1423*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1424*0b57cec5SDimitry Andric if (TypeIdx == 0) 1425*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy); 1426*0b57cec5SDimitry Andric else { 1427*0b57cec5SDimitry Andric unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( 1428*0b57cec5SDimitry Andric MI.getOperand(1).getPredicate())) 1429*0b57cec5SDimitry Andric ? TargetOpcode::G_SEXT 1430*0b57cec5SDimitry Andric : TargetOpcode::G_ZEXT; 1431*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 2, ExtOpcode); 1432*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 3, ExtOpcode); 1433*0b57cec5SDimitry Andric } 1434*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1435*0b57cec5SDimitry Andric return Legalized; 1436*0b57cec5SDimitry Andric 1437*0b57cec5SDimitry Andric case TargetOpcode::G_GEP: 1438*0b57cec5SDimitry Andric assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); 1439*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1440*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1441*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1442*0b57cec5SDimitry Andric return Legalized; 1443*0b57cec5SDimitry Andric 1444*0b57cec5SDimitry Andric case TargetOpcode::G_PHI: { 1445*0b57cec5SDimitry Andric assert(TypeIdx == 0 && "Expecting only Idx 0"); 1446*0b57cec5SDimitry Andric 1447*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1448*0b57cec5SDimitry Andric for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { 1449*0b57cec5SDimitry Andric MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 1450*0b57cec5SDimitry Andric MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 1451*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); 1452*0b57cec5SDimitry Andric } 1453*0b57cec5SDimitry Andric 1454*0b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 1455*0b57cec5SDimitry Andric MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 1456*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy); 1457*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1458*0b57cec5SDimitry Andric return Legalized; 1459*0b57cec5SDimitry Andric } 1460*0b57cec5SDimitry Andric case TargetOpcode::G_EXTRACT_VECTOR_ELT: { 1461*0b57cec5SDimitry Andric if (TypeIdx == 0) { 1462*0b57cec5SDimitry Andric Register VecReg = MI.getOperand(1).getReg(); 1463*0b57cec5SDimitry Andric LLT VecTy = MRI.getType(VecReg); 1464*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1465*0b57cec5SDimitry Andric 1466*0b57cec5SDimitry Andric widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(), 1467*0b57cec5SDimitry Andric WideTy.getSizeInBits()), 1468*0b57cec5SDimitry Andric 1, TargetOpcode::G_SEXT); 1469*0b57cec5SDimitry Andric 1470*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy, 0); 1471*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1472*0b57cec5SDimitry Andric return Legalized; 1473*0b57cec5SDimitry Andric } 1474*0b57cec5SDimitry Andric 1475*0b57cec5SDimitry Andric if (TypeIdx != 2) 1476*0b57cec5SDimitry Andric return UnableToLegalize; 1477*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1478*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1479*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1480*0b57cec5SDimitry Andric return Legalized; 1481*0b57cec5SDimitry Andric } 1482*0b57cec5SDimitry Andric case TargetOpcode::G_FADD: 1483*0b57cec5SDimitry Andric case TargetOpcode::G_FMUL: 1484*0b57cec5SDimitry Andric case TargetOpcode::G_FSUB: 1485*0b57cec5SDimitry Andric case TargetOpcode::G_FMA: 1486*0b57cec5SDimitry Andric case TargetOpcode::G_FNEG: 1487*0b57cec5SDimitry Andric case TargetOpcode::G_FABS: 1488*0b57cec5SDimitry Andric case TargetOpcode::G_FCANONICALIZE: 1489*0b57cec5SDimitry Andric case TargetOpcode::G_FMINNUM: 1490*0b57cec5SDimitry Andric case TargetOpcode::G_FMAXNUM: 1491*0b57cec5SDimitry Andric case TargetOpcode::G_FMINNUM_IEEE: 1492*0b57cec5SDimitry Andric case TargetOpcode::G_FMAXNUM_IEEE: 1493*0b57cec5SDimitry Andric case TargetOpcode::G_FMINIMUM: 1494*0b57cec5SDimitry Andric case TargetOpcode::G_FMAXIMUM: 1495*0b57cec5SDimitry Andric case TargetOpcode::G_FDIV: 1496*0b57cec5SDimitry Andric case TargetOpcode::G_FREM: 1497*0b57cec5SDimitry Andric case TargetOpcode::G_FCEIL: 1498*0b57cec5SDimitry Andric case TargetOpcode::G_FFLOOR: 1499*0b57cec5SDimitry Andric case TargetOpcode::G_FCOS: 1500*0b57cec5SDimitry Andric case TargetOpcode::G_FSIN: 1501*0b57cec5SDimitry Andric case TargetOpcode::G_FLOG10: 1502*0b57cec5SDimitry Andric case TargetOpcode::G_FLOG: 1503*0b57cec5SDimitry Andric case TargetOpcode::G_FLOG2: 1504*0b57cec5SDimitry Andric case TargetOpcode::G_FRINT: 1505*0b57cec5SDimitry Andric case TargetOpcode::G_FNEARBYINT: 1506*0b57cec5SDimitry Andric case TargetOpcode::G_FSQRT: 1507*0b57cec5SDimitry Andric case TargetOpcode::G_FEXP: 1508*0b57cec5SDimitry Andric case TargetOpcode::G_FEXP2: 1509*0b57cec5SDimitry Andric case TargetOpcode::G_FPOW: 1510*0b57cec5SDimitry Andric case TargetOpcode::G_INTRINSIC_TRUNC: 1511*0b57cec5SDimitry Andric case TargetOpcode::G_INTRINSIC_ROUND: 1512*0b57cec5SDimitry Andric assert(TypeIdx == 0); 1513*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1514*0b57cec5SDimitry Andric 1515*0b57cec5SDimitry Andric for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) 1516*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT); 1517*0b57cec5SDimitry Andric 1518*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 1519*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1520*0b57cec5SDimitry Andric return Legalized; 1521*0b57cec5SDimitry Andric case TargetOpcode::G_INTTOPTR: 1522*0b57cec5SDimitry Andric if (TypeIdx != 1) 1523*0b57cec5SDimitry Andric return UnableToLegalize; 1524*0b57cec5SDimitry Andric 1525*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1526*0b57cec5SDimitry Andric widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1527*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1528*0b57cec5SDimitry Andric return Legalized; 1529*0b57cec5SDimitry Andric case TargetOpcode::G_PTRTOINT: 1530*0b57cec5SDimitry Andric if (TypeIdx != 0) 1531*0b57cec5SDimitry Andric return UnableToLegalize; 1532*0b57cec5SDimitry Andric 1533*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1534*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy, 0); 1535*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1536*0b57cec5SDimitry Andric return Legalized; 1537*0b57cec5SDimitry Andric case TargetOpcode::G_BUILD_VECTOR: { 1538*0b57cec5SDimitry Andric Observer.changingInstr(MI); 1539*0b57cec5SDimitry Andric 1540*0b57cec5SDimitry Andric const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType(); 1541*0b57cec5SDimitry Andric for (int I = 1, E = MI.getNumOperands(); I != E; ++I) 1542*0b57cec5SDimitry Andric widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT); 1543*0b57cec5SDimitry Andric 1544*0b57cec5SDimitry Andric // Avoid changing the result vector type if the source element type was 1545*0b57cec5SDimitry Andric // requested. 1546*0b57cec5SDimitry Andric if (TypeIdx == 1) { 1547*0b57cec5SDimitry Andric auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 1548*0b57cec5SDimitry Andric MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC)); 1549*0b57cec5SDimitry Andric } else { 1550*0b57cec5SDimitry Andric widenScalarDst(MI, WideTy, 0); 1551*0b57cec5SDimitry Andric } 1552*0b57cec5SDimitry Andric 1553*0b57cec5SDimitry Andric Observer.changedInstr(MI); 1554*0b57cec5SDimitry Andric return Legalized; 1555*0b57cec5SDimitry Andric } 1556*0b57cec5SDimitry Andric } 1557*0b57cec5SDimitry Andric } 1558*0b57cec5SDimitry Andric 1559*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 1560*0b57cec5SDimitry Andric LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 1561*0b57cec5SDimitry Andric using namespace TargetOpcode; 1562*0b57cec5SDimitry Andric MIRBuilder.setInstr(MI); 1563*0b57cec5SDimitry Andric 1564*0b57cec5SDimitry Andric switch(MI.getOpcode()) { 1565*0b57cec5SDimitry Andric default: 1566*0b57cec5SDimitry Andric return UnableToLegalize; 1567*0b57cec5SDimitry Andric case TargetOpcode::G_SREM: 1568*0b57cec5SDimitry Andric case TargetOpcode::G_UREM: { 1569*0b57cec5SDimitry Andric Register QuotReg = MRI.createGenericVirtualRegister(Ty); 1570*0b57cec5SDimitry Andric MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) 1571*0b57cec5SDimitry Andric .addDef(QuotReg) 1572*0b57cec5SDimitry Andric .addUse(MI.getOperand(1).getReg()) 1573*0b57cec5SDimitry Andric .addUse(MI.getOperand(2).getReg()); 1574*0b57cec5SDimitry Andric 1575*0b57cec5SDimitry Andric Register ProdReg = MRI.createGenericVirtualRegister(Ty); 1576*0b57cec5SDimitry Andric MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); 1577*0b57cec5SDimitry Andric MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 1578*0b57cec5SDimitry Andric ProdReg); 1579*0b57cec5SDimitry Andric MI.eraseFromParent(); 1580*0b57cec5SDimitry Andric return Legalized; 1581*0b57cec5SDimitry Andric } 1582*0b57cec5SDimitry Andric case TargetOpcode::G_SMULO: 1583*0b57cec5SDimitry Andric case TargetOpcode::G_UMULO: { 1584*0b57cec5SDimitry Andric // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the 1585*0b57cec5SDimitry Andric // result. 1586*0b57cec5SDimitry Andric Register Res = MI.getOperand(0).getReg(); 1587*0b57cec5SDimitry Andric Register Overflow = MI.getOperand(1).getReg(); 1588*0b57cec5SDimitry Andric Register LHS = MI.getOperand(2).getReg(); 1589*0b57cec5SDimitry Andric Register RHS = MI.getOperand(3).getReg(); 1590*0b57cec5SDimitry Andric 1591*0b57cec5SDimitry Andric MIRBuilder.buildMul(Res, LHS, RHS); 1592*0b57cec5SDimitry Andric 1593*0b57cec5SDimitry Andric unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO 1594*0b57cec5SDimitry Andric ? TargetOpcode::G_SMULH 1595*0b57cec5SDimitry Andric : TargetOpcode::G_UMULH; 1596*0b57cec5SDimitry Andric 1597*0b57cec5SDimitry Andric Register HiPart = MRI.createGenericVirtualRegister(Ty); 1598*0b57cec5SDimitry Andric MIRBuilder.buildInstr(Opcode) 1599*0b57cec5SDimitry Andric .addDef(HiPart) 1600*0b57cec5SDimitry Andric .addUse(LHS) 1601*0b57cec5SDimitry Andric .addUse(RHS); 1602*0b57cec5SDimitry Andric 1603*0b57cec5SDimitry Andric Register Zero = MRI.createGenericVirtualRegister(Ty); 1604*0b57cec5SDimitry Andric MIRBuilder.buildConstant(Zero, 0); 1605*0b57cec5SDimitry Andric 1606*0b57cec5SDimitry Andric // For *signed* multiply, overflow is detected by checking: 1607*0b57cec5SDimitry Andric // (hi != (lo >> bitwidth-1)) 1608*0b57cec5SDimitry Andric if (Opcode == TargetOpcode::G_SMULH) { 1609*0b57cec5SDimitry Andric Register Shifted = MRI.createGenericVirtualRegister(Ty); 1610*0b57cec5SDimitry Andric Register ShiftAmt = MRI.createGenericVirtualRegister(Ty); 1611*0b57cec5SDimitry Andric MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); 1612*0b57cec5SDimitry Andric MIRBuilder.buildInstr(TargetOpcode::G_ASHR) 1613*0b57cec5SDimitry Andric .addDef(Shifted) 1614*0b57cec5SDimitry Andric .addUse(Res) 1615*0b57cec5SDimitry Andric .addUse(ShiftAmt); 1616*0b57cec5SDimitry Andric MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); 1617*0b57cec5SDimitry Andric } else { 1618*0b57cec5SDimitry Andric MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); 1619*0b57cec5SDimitry Andric } 1620*0b57cec5SDimitry Andric MI.eraseFromParent(); 1621*0b57cec5SDimitry Andric return Legalized; 1622*0b57cec5SDimitry Andric } 1623*0b57cec5SDimitry Andric case TargetOpcode::G_FNEG: { 1624*0b57cec5SDimitry Andric // TODO: Handle vector types once we are able to 1625*0b57cec5SDimitry Andric // represent them. 1626*0b57cec5SDimitry Andric if (Ty.isVector()) 1627*0b57cec5SDimitry Andric return UnableToLegalize; 1628*0b57cec5SDimitry Andric Register Res = MI.getOperand(0).getReg(); 1629*0b57cec5SDimitry Andric Type *ZeroTy; 1630*0b57cec5SDimitry Andric LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1631*0b57cec5SDimitry Andric switch (Ty.getSizeInBits()) { 1632*0b57cec5SDimitry Andric case 16: 1633*0b57cec5SDimitry Andric ZeroTy = Type::getHalfTy(Ctx); 1634*0b57cec5SDimitry Andric break; 1635*0b57cec5SDimitry Andric case 32: 1636*0b57cec5SDimitry Andric ZeroTy = Type::getFloatTy(Ctx); 1637*0b57cec5SDimitry Andric break; 1638*0b57cec5SDimitry Andric case 64: 1639*0b57cec5SDimitry Andric ZeroTy = Type::getDoubleTy(Ctx); 1640*0b57cec5SDimitry Andric break; 1641*0b57cec5SDimitry Andric case 128: 1642*0b57cec5SDimitry Andric ZeroTy = Type::getFP128Ty(Ctx); 1643*0b57cec5SDimitry Andric break; 1644*0b57cec5SDimitry Andric default: 1645*0b57cec5SDimitry Andric llvm_unreachable("unexpected floating-point type"); 1646*0b57cec5SDimitry Andric } 1647*0b57cec5SDimitry Andric ConstantFP &ZeroForNegation = 1648*0b57cec5SDimitry Andric *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); 1649*0b57cec5SDimitry Andric auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); 1650*0b57cec5SDimitry Andric Register SubByReg = MI.getOperand(1).getReg(); 1651*0b57cec5SDimitry Andric Register ZeroReg = Zero->getOperand(0).getReg(); 1652*0b57cec5SDimitry Andric MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg}, 1653*0b57cec5SDimitry Andric MI.getFlags()); 1654*0b57cec5SDimitry Andric MI.eraseFromParent(); 1655*0b57cec5SDimitry Andric return Legalized; 1656*0b57cec5SDimitry Andric } 1657*0b57cec5SDimitry Andric case TargetOpcode::G_FSUB: { 1658*0b57cec5SDimitry Andric // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). 1659*0b57cec5SDimitry Andric // First, check if G_FNEG is marked as Lower. If so, we may 1660*0b57cec5SDimitry Andric // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. 1661*0b57cec5SDimitry Andric if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) 1662*0b57cec5SDimitry Andric return UnableToLegalize; 1663*0b57cec5SDimitry Andric Register Res = MI.getOperand(0).getReg(); 1664*0b57cec5SDimitry Andric Register LHS = MI.getOperand(1).getReg(); 1665*0b57cec5SDimitry Andric Register RHS = MI.getOperand(2).getReg(); 1666*0b57cec5SDimitry Andric Register Neg = MRI.createGenericVirtualRegister(Ty); 1667*0b57cec5SDimitry Andric MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS); 1668*0b57cec5SDimitry Andric MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags()); 1669*0b57cec5SDimitry Andric MI.eraseFromParent(); 1670*0b57cec5SDimitry Andric return Legalized; 1671*0b57cec5SDimitry Andric } 1672*0b57cec5SDimitry Andric case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { 1673*0b57cec5SDimitry Andric Register OldValRes = MI.getOperand(0).getReg(); 1674*0b57cec5SDimitry Andric Register SuccessRes = MI.getOperand(1).getReg(); 1675*0b57cec5SDimitry Andric Register Addr = MI.getOperand(2).getReg(); 1676*0b57cec5SDimitry Andric Register CmpVal = MI.getOperand(3).getReg(); 1677*0b57cec5SDimitry Andric Register NewVal = MI.getOperand(4).getReg(); 1678*0b57cec5SDimitry Andric MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, 1679*0b57cec5SDimitry Andric **MI.memoperands_begin()); 1680*0b57cec5SDimitry Andric MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); 1681*0b57cec5SDimitry Andric MI.eraseFromParent(); 1682*0b57cec5SDimitry Andric return Legalized; 1683*0b57cec5SDimitry Andric } 1684*0b57cec5SDimitry Andric case TargetOpcode::G_LOAD: 1685*0b57cec5SDimitry Andric case TargetOpcode::G_SEXTLOAD: 1686*0b57cec5SDimitry Andric case TargetOpcode::G_ZEXTLOAD: { 1687*0b57cec5SDimitry Andric // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 1688*0b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1689*0b57cec5SDimitry Andric Register PtrReg = MI.getOperand(1).getReg(); 1690*0b57cec5SDimitry Andric LLT DstTy = MRI.getType(DstReg); 1691*0b57cec5SDimitry Andric auto &MMO = **MI.memoperands_begin(); 1692*0b57cec5SDimitry Andric 1693*0b57cec5SDimitry Andric if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { 1694*0b57cec5SDimitry Andric // In the case of G_LOAD, this was a non-extending load already and we're 1695*0b57cec5SDimitry Andric // about to lower to the same instruction. 1696*0b57cec5SDimitry Andric if (MI.getOpcode() == TargetOpcode::G_LOAD) 1697*0b57cec5SDimitry Andric return UnableToLegalize; 1698*0b57cec5SDimitry Andric MIRBuilder.buildLoad(DstReg, PtrReg, MMO); 1699*0b57cec5SDimitry Andric MI.eraseFromParent(); 1700*0b57cec5SDimitry Andric return Legalized; 1701*0b57cec5SDimitry Andric } 1702*0b57cec5SDimitry Andric 1703*0b57cec5SDimitry Andric if (DstTy.isScalar()) { 1704*0b57cec5SDimitry Andric Register TmpReg = 1705*0b57cec5SDimitry Andric MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits())); 1706*0b57cec5SDimitry Andric MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 1707*0b57cec5SDimitry Andric switch (MI.getOpcode()) { 1708*0b57cec5SDimitry Andric default: 1709*0b57cec5SDimitry Andric llvm_unreachable("Unexpected opcode"); 1710*0b57cec5SDimitry Andric case TargetOpcode::G_LOAD: 1711*0b57cec5SDimitry Andric MIRBuilder.buildAnyExt(DstReg, TmpReg); 1712*0b57cec5SDimitry Andric break; 1713*0b57cec5SDimitry Andric case TargetOpcode::G_SEXTLOAD: 1714*0b57cec5SDimitry Andric MIRBuilder.buildSExt(DstReg, TmpReg); 1715*0b57cec5SDimitry Andric break; 1716*0b57cec5SDimitry Andric case TargetOpcode::G_ZEXTLOAD: 1717*0b57cec5SDimitry Andric MIRBuilder.buildZExt(DstReg, TmpReg); 1718*0b57cec5SDimitry Andric break; 1719*0b57cec5SDimitry Andric } 1720*0b57cec5SDimitry Andric MI.eraseFromParent(); 1721*0b57cec5SDimitry Andric return Legalized; 1722*0b57cec5SDimitry Andric } 1723*0b57cec5SDimitry Andric 1724*0b57cec5SDimitry Andric return UnableToLegalize; 1725*0b57cec5SDimitry Andric } 1726*0b57cec5SDimitry Andric case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1727*0b57cec5SDimitry Andric case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1728*0b57cec5SDimitry Andric case TargetOpcode::G_CTLZ: 1729*0b57cec5SDimitry Andric case TargetOpcode::G_CTTZ: 1730*0b57cec5SDimitry Andric case TargetOpcode::G_CTPOP: 1731*0b57cec5SDimitry Andric return lowerBitCount(MI, TypeIdx, Ty); 1732*0b57cec5SDimitry Andric case G_UADDO: { 1733*0b57cec5SDimitry Andric Register Res = MI.getOperand(0).getReg(); 1734*0b57cec5SDimitry Andric Register CarryOut = MI.getOperand(1).getReg(); 1735*0b57cec5SDimitry Andric Register LHS = MI.getOperand(2).getReg(); 1736*0b57cec5SDimitry Andric Register RHS = MI.getOperand(3).getReg(); 1737*0b57cec5SDimitry Andric 1738*0b57cec5SDimitry Andric MIRBuilder.buildAdd(Res, LHS, RHS); 1739*0b57cec5SDimitry Andric MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS); 1740*0b57cec5SDimitry Andric 1741*0b57cec5SDimitry Andric MI.eraseFromParent(); 1742*0b57cec5SDimitry Andric return Legalized; 1743*0b57cec5SDimitry Andric } 1744*0b57cec5SDimitry Andric case G_UADDE: { 1745*0b57cec5SDimitry Andric Register Res = MI.getOperand(0).getReg(); 1746*0b57cec5SDimitry Andric Register CarryOut = MI.getOperand(1).getReg(); 1747*0b57cec5SDimitry Andric Register LHS = MI.getOperand(2).getReg(); 1748*0b57cec5SDimitry Andric Register RHS = MI.getOperand(3).getReg(); 1749*0b57cec5SDimitry Andric Register CarryIn = MI.getOperand(4).getReg(); 1750*0b57cec5SDimitry Andric 1751*0b57cec5SDimitry Andric Register TmpRes = MRI.createGenericVirtualRegister(Ty); 1752*0b57cec5SDimitry Andric Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty); 1753*0b57cec5SDimitry Andric 1754*0b57cec5SDimitry Andric MIRBuilder.buildAdd(TmpRes, LHS, RHS); 1755*0b57cec5SDimitry Andric MIRBuilder.buildZExt(ZExtCarryIn, CarryIn); 1756*0b57cec5SDimitry Andric MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); 1757*0b57cec5SDimitry Andric MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); 1758*0b57cec5SDimitry Andric 1759*0b57cec5SDimitry Andric MI.eraseFromParent(); 1760*0b57cec5SDimitry Andric return Legalized; 1761*0b57cec5SDimitry Andric } 1762*0b57cec5SDimitry Andric case G_USUBO: { 1763*0b57cec5SDimitry Andric Register Res = MI.getOperand(0).getReg(); 1764*0b57cec5SDimitry Andric Register BorrowOut = MI.getOperand(1).getReg(); 1765*0b57cec5SDimitry Andric Register LHS = MI.getOperand(2).getReg(); 1766*0b57cec5SDimitry Andric Register RHS = MI.getOperand(3).getReg(); 1767*0b57cec5SDimitry Andric 1768*0b57cec5SDimitry Andric MIRBuilder.buildSub(Res, LHS, RHS); 1769*0b57cec5SDimitry Andric MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS); 1770*0b57cec5SDimitry Andric 1771*0b57cec5SDimitry Andric MI.eraseFromParent(); 1772*0b57cec5SDimitry Andric return Legalized; 1773*0b57cec5SDimitry Andric } 1774*0b57cec5SDimitry Andric case G_USUBE: { 1775*0b57cec5SDimitry Andric Register Res = MI.getOperand(0).getReg(); 1776*0b57cec5SDimitry Andric Register BorrowOut = MI.getOperand(1).getReg(); 1777*0b57cec5SDimitry Andric Register LHS = MI.getOperand(2).getReg(); 1778*0b57cec5SDimitry Andric Register RHS = MI.getOperand(3).getReg(); 1779*0b57cec5SDimitry Andric Register BorrowIn = MI.getOperand(4).getReg(); 1780*0b57cec5SDimitry Andric 1781*0b57cec5SDimitry Andric Register TmpRes = MRI.createGenericVirtualRegister(Ty); 1782*0b57cec5SDimitry Andric Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty); 1783*0b57cec5SDimitry Andric Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); 1784*0b57cec5SDimitry Andric Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); 1785*0b57cec5SDimitry Andric 1786*0b57cec5SDimitry Andric MIRBuilder.buildSub(TmpRes, LHS, RHS); 1787*0b57cec5SDimitry Andric MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn); 1788*0b57cec5SDimitry Andric MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn); 1789*0b57cec5SDimitry Andric MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS); 1790*0b57cec5SDimitry Andric MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS); 1791*0b57cec5SDimitry Andric MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS); 1792*0b57cec5SDimitry Andric 1793*0b57cec5SDimitry Andric MI.eraseFromParent(); 1794*0b57cec5SDimitry Andric return Legalized; 1795*0b57cec5SDimitry Andric } 1796*0b57cec5SDimitry Andric case G_UITOFP: 1797*0b57cec5SDimitry Andric return lowerUITOFP(MI, TypeIdx, Ty); 1798*0b57cec5SDimitry Andric case G_SITOFP: 1799*0b57cec5SDimitry Andric return lowerSITOFP(MI, TypeIdx, Ty); 1800*0b57cec5SDimitry Andric case G_SMIN: 1801*0b57cec5SDimitry Andric case G_SMAX: 1802*0b57cec5SDimitry Andric case G_UMIN: 1803*0b57cec5SDimitry Andric case G_UMAX: 1804*0b57cec5SDimitry Andric return lowerMinMax(MI, TypeIdx, Ty); 1805*0b57cec5SDimitry Andric case G_FCOPYSIGN: 1806*0b57cec5SDimitry Andric return lowerFCopySign(MI, TypeIdx, Ty); 1807*0b57cec5SDimitry Andric case G_FMINNUM: 1808*0b57cec5SDimitry Andric case G_FMAXNUM: 1809*0b57cec5SDimitry Andric return lowerFMinNumMaxNum(MI); 1810*0b57cec5SDimitry Andric } 1811*0b57cec5SDimitry Andric } 1812*0b57cec5SDimitry Andric 1813*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( 1814*0b57cec5SDimitry Andric MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { 1815*0b57cec5SDimitry Andric SmallVector<Register, 2> DstRegs; 1816*0b57cec5SDimitry Andric 1817*0b57cec5SDimitry Andric unsigned NarrowSize = NarrowTy.getSizeInBits(); 1818*0b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1819*0b57cec5SDimitry Andric unsigned Size = MRI.getType(DstReg).getSizeInBits(); 1820*0b57cec5SDimitry Andric int NumParts = Size / NarrowSize; 1821*0b57cec5SDimitry Andric // FIXME: Don't know how to handle the situation where the small vectors 1822*0b57cec5SDimitry Andric // aren't all the same size yet. 1823*0b57cec5SDimitry Andric if (Size % NarrowSize != 0) 1824*0b57cec5SDimitry Andric return UnableToLegalize; 1825*0b57cec5SDimitry Andric 1826*0b57cec5SDimitry Andric for (int i = 0; i < NumParts; ++i) { 1827*0b57cec5SDimitry Andric Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 1828*0b57cec5SDimitry Andric MIRBuilder.buildUndef(TmpReg); 1829*0b57cec5SDimitry Andric DstRegs.push_back(TmpReg); 1830*0b57cec5SDimitry Andric } 1831*0b57cec5SDimitry Andric 1832*0b57cec5SDimitry Andric if (NarrowTy.isVector()) 1833*0b57cec5SDimitry Andric MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1834*0b57cec5SDimitry Andric else 1835*0b57cec5SDimitry Andric MIRBuilder.buildBuildVector(DstReg, DstRegs); 1836*0b57cec5SDimitry Andric 1837*0b57cec5SDimitry Andric MI.eraseFromParent(); 1838*0b57cec5SDimitry Andric return Legalized; 1839*0b57cec5SDimitry Andric } 1840*0b57cec5SDimitry Andric 1841*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 1842*0b57cec5SDimitry Andric LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx, 1843*0b57cec5SDimitry Andric LLT NarrowTy) { 1844*0b57cec5SDimitry Andric const unsigned Opc = MI.getOpcode(); 1845*0b57cec5SDimitry Andric const unsigned NumOps = MI.getNumOperands() - 1; 1846*0b57cec5SDimitry Andric const unsigned NarrowSize = NarrowTy.getSizeInBits(); 1847*0b57cec5SDimitry Andric const Register DstReg = MI.getOperand(0).getReg(); 1848*0b57cec5SDimitry Andric const unsigned Flags = MI.getFlags(); 1849*0b57cec5SDimitry Andric const LLT DstTy = MRI.getType(DstReg); 1850*0b57cec5SDimitry Andric const unsigned Size = DstTy.getSizeInBits(); 1851*0b57cec5SDimitry Andric const int NumParts = Size / NarrowSize; 1852*0b57cec5SDimitry Andric const LLT EltTy = DstTy.getElementType(); 1853*0b57cec5SDimitry Andric const unsigned EltSize = EltTy.getSizeInBits(); 1854*0b57cec5SDimitry Andric const unsigned BitsForNumParts = NarrowSize * NumParts; 1855*0b57cec5SDimitry Andric 1856*0b57cec5SDimitry Andric // Check if we have any leftovers. If we do, then only handle the case where 1857*0b57cec5SDimitry Andric // the leftover is one element. 1858*0b57cec5SDimitry Andric if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size) 1859*0b57cec5SDimitry Andric return UnableToLegalize; 1860*0b57cec5SDimitry Andric 1861*0b57cec5SDimitry Andric if (BitsForNumParts != Size) { 1862*0b57cec5SDimitry Andric Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy); 1863*0b57cec5SDimitry Andric MIRBuilder.buildUndef(AccumDstReg); 1864*0b57cec5SDimitry Andric 1865*0b57cec5SDimitry Andric // Handle the pieces which evenly divide into the requested type with 1866*0b57cec5SDimitry Andric // extract/op/insert sequence. 1867*0b57cec5SDimitry Andric for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) { 1868*0b57cec5SDimitry Andric SmallVector<SrcOp, 4> SrcOps; 1869*0b57cec5SDimitry Andric for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 1870*0b57cec5SDimitry Andric Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy); 1871*0b57cec5SDimitry Andric MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset); 1872*0b57cec5SDimitry Andric SrcOps.push_back(PartOpReg); 1873*0b57cec5SDimitry Andric } 1874*0b57cec5SDimitry Andric 1875*0b57cec5SDimitry Andric Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy); 1876*0b57cec5SDimitry Andric MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); 1877*0b57cec5SDimitry Andric 1878*0b57cec5SDimitry Andric Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy); 1879*0b57cec5SDimitry Andric MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset); 1880*0b57cec5SDimitry Andric AccumDstReg = PartInsertReg; 1881*0b57cec5SDimitry Andric } 1882*0b57cec5SDimitry Andric 1883*0b57cec5SDimitry Andric // Handle the remaining element sized leftover piece. 1884*0b57cec5SDimitry Andric SmallVector<SrcOp, 4> SrcOps; 1885*0b57cec5SDimitry Andric for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 1886*0b57cec5SDimitry Andric Register PartOpReg = MRI.createGenericVirtualRegister(EltTy); 1887*0b57cec5SDimitry Andric MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), 1888*0b57cec5SDimitry Andric BitsForNumParts); 1889*0b57cec5SDimitry Andric SrcOps.push_back(PartOpReg); 1890*0b57cec5SDimitry Andric } 1891*0b57cec5SDimitry Andric 1892*0b57cec5SDimitry Andric Register PartDstReg = MRI.createGenericVirtualRegister(EltTy); 1893*0b57cec5SDimitry Andric MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); 1894*0b57cec5SDimitry Andric MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts); 1895*0b57cec5SDimitry Andric MI.eraseFromParent(); 1896*0b57cec5SDimitry Andric 1897*0b57cec5SDimitry Andric return Legalized; 1898*0b57cec5SDimitry Andric } 1899*0b57cec5SDimitry Andric 1900*0b57cec5SDimitry Andric SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 1901*0b57cec5SDimitry Andric 1902*0b57cec5SDimitry Andric extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs); 1903*0b57cec5SDimitry Andric 1904*0b57cec5SDimitry Andric if (NumOps >= 2) 1905*0b57cec5SDimitry Andric extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs); 1906*0b57cec5SDimitry Andric 1907*0b57cec5SDimitry Andric if (NumOps >= 3) 1908*0b57cec5SDimitry Andric extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs); 1909*0b57cec5SDimitry Andric 1910*0b57cec5SDimitry Andric for (int i = 0; i < NumParts; ++i) { 1911*0b57cec5SDimitry Andric Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); 1912*0b57cec5SDimitry Andric 1913*0b57cec5SDimitry Andric if (NumOps == 1) 1914*0b57cec5SDimitry Andric MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags); 1915*0b57cec5SDimitry Andric else if (NumOps == 2) { 1916*0b57cec5SDimitry Andric MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags); 1917*0b57cec5SDimitry Andric } else if (NumOps == 3) { 1918*0b57cec5SDimitry Andric MIRBuilder.buildInstr(Opc, {DstReg}, 1919*0b57cec5SDimitry Andric {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags); 1920*0b57cec5SDimitry Andric } 1921*0b57cec5SDimitry Andric 1922*0b57cec5SDimitry Andric DstRegs.push_back(DstReg); 1923*0b57cec5SDimitry Andric } 1924*0b57cec5SDimitry Andric 1925*0b57cec5SDimitry Andric if (NarrowTy.isVector()) 1926*0b57cec5SDimitry Andric MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1927*0b57cec5SDimitry Andric else 1928*0b57cec5SDimitry Andric MIRBuilder.buildBuildVector(DstReg, DstRegs); 1929*0b57cec5SDimitry Andric 1930*0b57cec5SDimitry Andric MI.eraseFromParent(); 1931*0b57cec5SDimitry Andric return Legalized; 1932*0b57cec5SDimitry Andric } 1933*0b57cec5SDimitry Andric 1934*0b57cec5SDimitry Andric // Handle splitting vector operations which need to have the same number of 1935*0b57cec5SDimitry Andric // elements in each type index, but each type index may have a different element 1936*0b57cec5SDimitry Andric // type. 1937*0b57cec5SDimitry Andric // 1938*0b57cec5SDimitry Andric // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> -> 1939*0b57cec5SDimitry Andric // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 1940*0b57cec5SDimitry Andric // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 1941*0b57cec5SDimitry Andric // 1942*0b57cec5SDimitry Andric // Also handles some irregular breakdown cases, e.g. 1943*0b57cec5SDimitry Andric // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> -> 1944*0b57cec5SDimitry Andric // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 1945*0b57cec5SDimitry Andric // s64 = G_SHL s64, s32 1946*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 1947*0b57cec5SDimitry Andric LegalizerHelper::fewerElementsVectorMultiEltType( 1948*0b57cec5SDimitry Andric MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) { 1949*0b57cec5SDimitry Andric if (TypeIdx != 0) 1950*0b57cec5SDimitry Andric return UnableToLegalize; 1951*0b57cec5SDimitry Andric 1952*0b57cec5SDimitry Andric const LLT NarrowTy0 = NarrowTyArg; 1953*0b57cec5SDimitry Andric const unsigned NewNumElts = 1954*0b57cec5SDimitry Andric NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1; 1955*0b57cec5SDimitry Andric 1956*0b57cec5SDimitry Andric const Register DstReg = MI.getOperand(0).getReg(); 1957*0b57cec5SDimitry Andric LLT DstTy = MRI.getType(DstReg); 1958*0b57cec5SDimitry Andric LLT LeftoverTy0; 1959*0b57cec5SDimitry Andric 1960*0b57cec5SDimitry Andric // All of the operands need to have the same number of elements, so if we can 1961*0b57cec5SDimitry Andric // determine a type breakdown for the result type, we can for all of the 1962*0b57cec5SDimitry Andric // source types. 1963*0b57cec5SDimitry Andric int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first; 1964*0b57cec5SDimitry Andric if (NumParts < 0) 1965*0b57cec5SDimitry Andric return UnableToLegalize; 1966*0b57cec5SDimitry Andric 1967*0b57cec5SDimitry Andric SmallVector<MachineInstrBuilder, 4> NewInsts; 1968*0b57cec5SDimitry Andric 1969*0b57cec5SDimitry Andric SmallVector<Register, 4> DstRegs, LeftoverDstRegs; 1970*0b57cec5SDimitry Andric SmallVector<Register, 4> PartRegs, LeftoverRegs; 1971*0b57cec5SDimitry Andric 1972*0b57cec5SDimitry Andric for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 1973*0b57cec5SDimitry Andric LLT LeftoverTy; 1974*0b57cec5SDimitry Andric Register SrcReg = MI.getOperand(I).getReg(); 1975*0b57cec5SDimitry Andric LLT SrcTyI = MRI.getType(SrcReg); 1976*0b57cec5SDimitry Andric LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType()); 1977*0b57cec5SDimitry Andric LLT LeftoverTyI; 1978*0b57cec5SDimitry Andric 1979*0b57cec5SDimitry Andric // Split this operand into the requested typed registers, and any leftover 1980*0b57cec5SDimitry Andric // required to reproduce the original type. 1981*0b57cec5SDimitry Andric if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs, 1982*0b57cec5SDimitry Andric LeftoverRegs)) 1983*0b57cec5SDimitry Andric return UnableToLegalize; 1984*0b57cec5SDimitry Andric 1985*0b57cec5SDimitry Andric if (I == 1) { 1986*0b57cec5SDimitry Andric // For the first operand, create an instruction for each part and setup 1987*0b57cec5SDimitry Andric // the result. 1988*0b57cec5SDimitry Andric for (Register PartReg : PartRegs) { 1989*0b57cec5SDimitry Andric Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1990*0b57cec5SDimitry Andric NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) 1991*0b57cec5SDimitry Andric .addDef(PartDstReg) 1992*0b57cec5SDimitry Andric .addUse(PartReg)); 1993*0b57cec5SDimitry Andric DstRegs.push_back(PartDstReg); 1994*0b57cec5SDimitry Andric } 1995*0b57cec5SDimitry Andric 1996*0b57cec5SDimitry Andric for (Register LeftoverReg : LeftoverRegs) { 1997*0b57cec5SDimitry Andric Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0); 1998*0b57cec5SDimitry Andric NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) 1999*0b57cec5SDimitry Andric .addDef(PartDstReg) 2000*0b57cec5SDimitry Andric .addUse(LeftoverReg)); 2001*0b57cec5SDimitry Andric LeftoverDstRegs.push_back(PartDstReg); 2002*0b57cec5SDimitry Andric } 2003*0b57cec5SDimitry Andric } else { 2004*0b57cec5SDimitry Andric assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size()); 2005*0b57cec5SDimitry Andric 2006*0b57cec5SDimitry Andric // Add the newly created operand splits to the existing instructions. The 2007*0b57cec5SDimitry Andric // odd-sized pieces are ordered after the requested NarrowTyArg sized 2008*0b57cec5SDimitry Andric // pieces. 2009*0b57cec5SDimitry Andric unsigned InstCount = 0; 2010*0b57cec5SDimitry Andric for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J) 2011*0b57cec5SDimitry Andric NewInsts[InstCount++].addUse(PartRegs[J]); 2012*0b57cec5SDimitry Andric for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J) 2013*0b57cec5SDimitry Andric NewInsts[InstCount++].addUse(LeftoverRegs[J]); 2014*0b57cec5SDimitry Andric } 2015*0b57cec5SDimitry Andric 2016*0b57cec5SDimitry Andric PartRegs.clear(); 2017*0b57cec5SDimitry Andric LeftoverRegs.clear(); 2018*0b57cec5SDimitry Andric } 2019*0b57cec5SDimitry Andric 2020*0b57cec5SDimitry Andric // Insert the newly built operations and rebuild the result register. 2021*0b57cec5SDimitry Andric for (auto &MIB : NewInsts) 2022*0b57cec5SDimitry Andric MIRBuilder.insertInstr(MIB); 2023*0b57cec5SDimitry Andric 2024*0b57cec5SDimitry Andric insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs); 2025*0b57cec5SDimitry Andric 2026*0b57cec5SDimitry Andric MI.eraseFromParent(); 2027*0b57cec5SDimitry Andric return Legalized; 2028*0b57cec5SDimitry Andric } 2029*0b57cec5SDimitry Andric 2030*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 2031*0b57cec5SDimitry Andric LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, 2032*0b57cec5SDimitry Andric LLT NarrowTy) { 2033*0b57cec5SDimitry Andric if (TypeIdx != 0) 2034*0b57cec5SDimitry Andric return UnableToLegalize; 2035*0b57cec5SDimitry Andric 2036*0b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2037*0b57cec5SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2038*0b57cec5SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2039*0b57cec5SDimitry Andric LLT SrcTy = MRI.getType(SrcReg); 2040*0b57cec5SDimitry Andric 2041*0b57cec5SDimitry Andric LLT NarrowTy0 = NarrowTy; 2042*0b57cec5SDimitry Andric LLT NarrowTy1; 2043*0b57cec5SDimitry Andric unsigned NumParts; 2044*0b57cec5SDimitry Andric 2045*0b57cec5SDimitry Andric if (NarrowTy.isVector()) { 2046*0b57cec5SDimitry Andric // Uneven breakdown not handled. 2047*0b57cec5SDimitry Andric NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); 2048*0b57cec5SDimitry Andric if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) 2049*0b57cec5SDimitry Andric return UnableToLegalize; 2050*0b57cec5SDimitry Andric 2051*0b57cec5SDimitry Andric NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits()); 2052*0b57cec5SDimitry Andric } else { 2053*0b57cec5SDimitry Andric NumParts = DstTy.getNumElements(); 2054*0b57cec5SDimitry Andric NarrowTy1 = SrcTy.getElementType(); 2055*0b57cec5SDimitry Andric } 2056*0b57cec5SDimitry Andric 2057*0b57cec5SDimitry Andric SmallVector<Register, 4> SrcRegs, DstRegs; 2058*0b57cec5SDimitry Andric extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); 2059*0b57cec5SDimitry Andric 2060*0b57cec5SDimitry Andric for (unsigned I = 0; I < NumParts; ++I) { 2061*0b57cec5SDimitry Andric Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 2062*0b57cec5SDimitry Andric MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode()) 2063*0b57cec5SDimitry Andric .addDef(DstReg) 2064*0b57cec5SDimitry Andric .addUse(SrcRegs[I]); 2065*0b57cec5SDimitry Andric 2066*0b57cec5SDimitry Andric NewInst->setFlags(MI.getFlags()); 2067*0b57cec5SDimitry Andric DstRegs.push_back(DstReg); 2068*0b57cec5SDimitry Andric } 2069*0b57cec5SDimitry Andric 2070*0b57cec5SDimitry Andric if (NarrowTy.isVector()) 2071*0b57cec5SDimitry Andric MIRBuilder.buildConcatVectors(DstReg, DstRegs); 2072*0b57cec5SDimitry Andric else 2073*0b57cec5SDimitry Andric MIRBuilder.buildBuildVector(DstReg, DstRegs); 2074*0b57cec5SDimitry Andric 2075*0b57cec5SDimitry Andric MI.eraseFromParent(); 2076*0b57cec5SDimitry Andric return Legalized; 2077*0b57cec5SDimitry Andric } 2078*0b57cec5SDimitry Andric 2079*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 2080*0b57cec5SDimitry Andric LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, 2081*0b57cec5SDimitry Andric LLT NarrowTy) { 2082*0b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2083*0b57cec5SDimitry Andric Register Src0Reg = MI.getOperand(2).getReg(); 2084*0b57cec5SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2085*0b57cec5SDimitry Andric LLT SrcTy = MRI.getType(Src0Reg); 2086*0b57cec5SDimitry Andric 2087*0b57cec5SDimitry Andric unsigned NumParts; 2088*0b57cec5SDimitry Andric LLT NarrowTy0, NarrowTy1; 2089*0b57cec5SDimitry Andric 2090*0b57cec5SDimitry Andric if (TypeIdx == 0) { 2091*0b57cec5SDimitry Andric unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 2092*0b57cec5SDimitry Andric unsigned OldElts = DstTy.getNumElements(); 2093*0b57cec5SDimitry Andric 2094*0b57cec5SDimitry Andric NarrowTy0 = NarrowTy; 2095*0b57cec5SDimitry Andric NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); 2096*0b57cec5SDimitry Andric NarrowTy1 = NarrowTy.isVector() ? 2097*0b57cec5SDimitry Andric LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) : 2098*0b57cec5SDimitry Andric SrcTy.getElementType(); 2099*0b57cec5SDimitry Andric 2100*0b57cec5SDimitry Andric } else { 2101*0b57cec5SDimitry Andric unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 2102*0b57cec5SDimitry Andric unsigned OldElts = SrcTy.getNumElements(); 2103*0b57cec5SDimitry Andric 2104*0b57cec5SDimitry Andric NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : 2105*0b57cec5SDimitry Andric NarrowTy.getNumElements(); 2106*0b57cec5SDimitry Andric NarrowTy0 = LLT::vector(NarrowTy.getNumElements(), 2107*0b57cec5SDimitry Andric DstTy.getScalarSizeInBits()); 2108*0b57cec5SDimitry Andric NarrowTy1 = NarrowTy; 2109*0b57cec5SDimitry Andric } 2110*0b57cec5SDimitry Andric 2111*0b57cec5SDimitry Andric // FIXME: Don't know how to handle the situation where the small vectors 2112*0b57cec5SDimitry Andric // aren't all the same size yet. 2113*0b57cec5SDimitry Andric if (NarrowTy1.isVector() && 2114*0b57cec5SDimitry Andric NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) 2115*0b57cec5SDimitry Andric return UnableToLegalize; 2116*0b57cec5SDimitry Andric 2117*0b57cec5SDimitry Andric CmpInst::Predicate Pred 2118*0b57cec5SDimitry Andric = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 2119*0b57cec5SDimitry Andric 2120*0b57cec5SDimitry Andric SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; 2121*0b57cec5SDimitry Andric extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); 2122*0b57cec5SDimitry Andric extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); 2123*0b57cec5SDimitry Andric 2124*0b57cec5SDimitry Andric for (unsigned I = 0; I < NumParts; ++I) { 2125*0b57cec5SDimitry Andric Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 2126*0b57cec5SDimitry Andric DstRegs.push_back(DstReg); 2127*0b57cec5SDimitry Andric 2128*0b57cec5SDimitry Andric if (MI.getOpcode() == TargetOpcode::G_ICMP) 2129*0b57cec5SDimitry Andric MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 2130*0b57cec5SDimitry Andric else { 2131*0b57cec5SDimitry Andric MachineInstr *NewCmp 2132*0b57cec5SDimitry Andric = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 2133*0b57cec5SDimitry Andric NewCmp->setFlags(MI.getFlags()); 2134*0b57cec5SDimitry Andric } 2135*0b57cec5SDimitry Andric } 2136*0b57cec5SDimitry Andric 2137*0b57cec5SDimitry Andric if (NarrowTy1.isVector()) 2138*0b57cec5SDimitry Andric MIRBuilder.buildConcatVectors(DstReg, DstRegs); 2139*0b57cec5SDimitry Andric else 2140*0b57cec5SDimitry Andric MIRBuilder.buildBuildVector(DstReg, DstRegs); 2141*0b57cec5SDimitry Andric 2142*0b57cec5SDimitry Andric MI.eraseFromParent(); 2143*0b57cec5SDimitry Andric return Legalized; 2144*0b57cec5SDimitry Andric } 2145*0b57cec5SDimitry Andric 2146*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 2147*0b57cec5SDimitry Andric LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, 2148*0b57cec5SDimitry Andric LLT NarrowTy) { 2149*0b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2150*0b57cec5SDimitry Andric Register CondReg = MI.getOperand(1).getReg(); 2151*0b57cec5SDimitry Andric 2152*0b57cec5SDimitry Andric unsigned NumParts = 0; 2153*0b57cec5SDimitry Andric LLT NarrowTy0, NarrowTy1; 2154*0b57cec5SDimitry Andric 2155*0b57cec5SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2156*0b57cec5SDimitry Andric LLT CondTy = MRI.getType(CondReg); 2157*0b57cec5SDimitry Andric unsigned Size = DstTy.getSizeInBits(); 2158*0b57cec5SDimitry Andric 2159*0b57cec5SDimitry Andric assert(TypeIdx == 0 || CondTy.isVector()); 2160*0b57cec5SDimitry Andric 2161*0b57cec5SDimitry Andric if (TypeIdx == 0) { 2162*0b57cec5SDimitry Andric NarrowTy0 = NarrowTy; 2163*0b57cec5SDimitry Andric NarrowTy1 = CondTy; 2164*0b57cec5SDimitry Andric 2165*0b57cec5SDimitry Andric unsigned NarrowSize = NarrowTy0.getSizeInBits(); 2166*0b57cec5SDimitry Andric // FIXME: Don't know how to handle the situation where the small vectors 2167*0b57cec5SDimitry Andric // aren't all the same size yet. 2168*0b57cec5SDimitry Andric if (Size % NarrowSize != 0) 2169*0b57cec5SDimitry Andric return UnableToLegalize; 2170*0b57cec5SDimitry Andric 2171*0b57cec5SDimitry Andric NumParts = Size / NarrowSize; 2172*0b57cec5SDimitry Andric 2173*0b57cec5SDimitry Andric // Need to break down the condition type 2174*0b57cec5SDimitry Andric if (CondTy.isVector()) { 2175*0b57cec5SDimitry Andric if (CondTy.getNumElements() == NumParts) 2176*0b57cec5SDimitry Andric NarrowTy1 = CondTy.getElementType(); 2177*0b57cec5SDimitry Andric else 2178*0b57cec5SDimitry Andric NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts, 2179*0b57cec5SDimitry Andric CondTy.getScalarSizeInBits()); 2180*0b57cec5SDimitry Andric } 2181*0b57cec5SDimitry Andric } else { 2182*0b57cec5SDimitry Andric NumParts = CondTy.getNumElements(); 2183*0b57cec5SDimitry Andric if (NarrowTy.isVector()) { 2184*0b57cec5SDimitry Andric // TODO: Handle uneven breakdown. 2185*0b57cec5SDimitry Andric if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) 2186*0b57cec5SDimitry Andric return UnableToLegalize; 2187*0b57cec5SDimitry Andric 2188*0b57cec5SDimitry Andric return UnableToLegalize; 2189*0b57cec5SDimitry Andric } else { 2190*0b57cec5SDimitry Andric NarrowTy0 = DstTy.getElementType(); 2191*0b57cec5SDimitry Andric NarrowTy1 = NarrowTy; 2192*0b57cec5SDimitry Andric } 2193*0b57cec5SDimitry Andric } 2194*0b57cec5SDimitry Andric 2195*0b57cec5SDimitry Andric SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 2196*0b57cec5SDimitry Andric if (CondTy.isVector()) 2197*0b57cec5SDimitry Andric extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); 2198*0b57cec5SDimitry Andric 2199*0b57cec5SDimitry Andric extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); 2200*0b57cec5SDimitry Andric extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); 2201*0b57cec5SDimitry Andric 2202*0b57cec5SDimitry Andric for (unsigned i = 0; i < NumParts; ++i) { 2203*0b57cec5SDimitry Andric Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 2204*0b57cec5SDimitry Andric MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg, 2205*0b57cec5SDimitry Andric Src1Regs[i], Src2Regs[i]); 2206*0b57cec5SDimitry Andric DstRegs.push_back(DstReg); 2207*0b57cec5SDimitry Andric } 2208*0b57cec5SDimitry Andric 2209*0b57cec5SDimitry Andric if (NarrowTy0.isVector()) 2210*0b57cec5SDimitry Andric MIRBuilder.buildConcatVectors(DstReg, DstRegs); 2211*0b57cec5SDimitry Andric else 2212*0b57cec5SDimitry Andric MIRBuilder.buildBuildVector(DstReg, DstRegs); 2213*0b57cec5SDimitry Andric 2214*0b57cec5SDimitry Andric MI.eraseFromParent(); 2215*0b57cec5SDimitry Andric return Legalized; 2216*0b57cec5SDimitry Andric } 2217*0b57cec5SDimitry Andric 2218*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 2219*0b57cec5SDimitry Andric LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, 2220*0b57cec5SDimitry Andric LLT NarrowTy) { 2221*0b57cec5SDimitry Andric const Register DstReg = MI.getOperand(0).getReg(); 2222*0b57cec5SDimitry Andric LLT PhiTy = MRI.getType(DstReg); 2223*0b57cec5SDimitry Andric LLT LeftoverTy; 2224*0b57cec5SDimitry Andric 2225*0b57cec5SDimitry Andric // All of the operands need to have the same number of elements, so if we can 2226*0b57cec5SDimitry Andric // determine a type breakdown for the result type, we can for all of the 2227*0b57cec5SDimitry Andric // source types. 2228*0b57cec5SDimitry Andric int NumParts, NumLeftover; 2229*0b57cec5SDimitry Andric std::tie(NumParts, NumLeftover) 2230*0b57cec5SDimitry Andric = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy); 2231*0b57cec5SDimitry Andric if (NumParts < 0) 2232*0b57cec5SDimitry Andric return UnableToLegalize; 2233*0b57cec5SDimitry Andric 2234*0b57cec5SDimitry Andric SmallVector<Register, 4> DstRegs, LeftoverDstRegs; 2235*0b57cec5SDimitry Andric SmallVector<MachineInstrBuilder, 4> NewInsts; 2236*0b57cec5SDimitry Andric 2237*0b57cec5SDimitry Andric const int TotalNumParts = NumParts + NumLeftover; 2238*0b57cec5SDimitry Andric 2239*0b57cec5SDimitry Andric // Insert the new phis in the result block first. 2240*0b57cec5SDimitry Andric for (int I = 0; I != TotalNumParts; ++I) { 2241*0b57cec5SDimitry Andric LLT Ty = I < NumParts ? NarrowTy : LeftoverTy; 2242*0b57cec5SDimitry Andric Register PartDstReg = MRI.createGenericVirtualRegister(Ty); 2243*0b57cec5SDimitry Andric NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI) 2244*0b57cec5SDimitry Andric .addDef(PartDstReg)); 2245*0b57cec5SDimitry Andric if (I < NumParts) 2246*0b57cec5SDimitry Andric DstRegs.push_back(PartDstReg); 2247*0b57cec5SDimitry Andric else 2248*0b57cec5SDimitry Andric LeftoverDstRegs.push_back(PartDstReg); 2249*0b57cec5SDimitry Andric } 2250*0b57cec5SDimitry Andric 2251*0b57cec5SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 2252*0b57cec5SDimitry Andric MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI()); 2253*0b57cec5SDimitry Andric insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs); 2254*0b57cec5SDimitry Andric 2255*0b57cec5SDimitry Andric SmallVector<Register, 4> PartRegs, LeftoverRegs; 2256*0b57cec5SDimitry Andric 2257*0b57cec5SDimitry Andric // Insert code to extract the incoming values in each predecessor block. 2258*0b57cec5SDimitry Andric for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 2259*0b57cec5SDimitry Andric PartRegs.clear(); 2260*0b57cec5SDimitry Andric LeftoverRegs.clear(); 2261*0b57cec5SDimitry Andric 2262*0b57cec5SDimitry Andric Register SrcReg = MI.getOperand(I).getReg(); 2263*0b57cec5SDimitry Andric MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 2264*0b57cec5SDimitry Andric MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 2265*0b57cec5SDimitry Andric 2266*0b57cec5SDimitry Andric LLT Unused; 2267*0b57cec5SDimitry Andric if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs, 2268*0b57cec5SDimitry Andric LeftoverRegs)) 2269*0b57cec5SDimitry Andric return UnableToLegalize; 2270*0b57cec5SDimitry Andric 2271*0b57cec5SDimitry Andric // Add the newly created operand splits to the existing instructions. The 2272*0b57cec5SDimitry Andric // odd-sized pieces are ordered after the requested NarrowTyArg sized 2273*0b57cec5SDimitry Andric // pieces. 2274*0b57cec5SDimitry Andric for (int J = 0; J != TotalNumParts; ++J) { 2275*0b57cec5SDimitry Andric MachineInstrBuilder MIB = NewInsts[J]; 2276*0b57cec5SDimitry Andric MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]); 2277*0b57cec5SDimitry Andric MIB.addMBB(&OpMBB); 2278*0b57cec5SDimitry Andric } 2279*0b57cec5SDimitry Andric } 2280*0b57cec5SDimitry Andric 2281*0b57cec5SDimitry Andric MI.eraseFromParent(); 2282*0b57cec5SDimitry Andric return Legalized; 2283*0b57cec5SDimitry Andric } 2284*0b57cec5SDimitry Andric 2285*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 2286*0b57cec5SDimitry Andric LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, 2287*0b57cec5SDimitry Andric LLT NarrowTy) { 2288*0b57cec5SDimitry Andric // FIXME: Don't know how to handle secondary types yet. 2289*0b57cec5SDimitry Andric if (TypeIdx != 0) 2290*0b57cec5SDimitry Andric return UnableToLegalize; 2291*0b57cec5SDimitry Andric 2292*0b57cec5SDimitry Andric MachineMemOperand *MMO = *MI.memoperands_begin(); 2293*0b57cec5SDimitry Andric 2294*0b57cec5SDimitry Andric // This implementation doesn't work for atomics. Give up instead of doing 2295*0b57cec5SDimitry Andric // something invalid. 2296*0b57cec5SDimitry Andric if (MMO->getOrdering() != AtomicOrdering::NotAtomic || 2297*0b57cec5SDimitry Andric MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) 2298*0b57cec5SDimitry Andric return UnableToLegalize; 2299*0b57cec5SDimitry Andric 2300*0b57cec5SDimitry Andric bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; 2301*0b57cec5SDimitry Andric Register ValReg = MI.getOperand(0).getReg(); 2302*0b57cec5SDimitry Andric Register AddrReg = MI.getOperand(1).getReg(); 2303*0b57cec5SDimitry Andric LLT ValTy = MRI.getType(ValReg); 2304*0b57cec5SDimitry Andric 2305*0b57cec5SDimitry Andric int NumParts = -1; 2306*0b57cec5SDimitry Andric int NumLeftover = -1; 2307*0b57cec5SDimitry Andric LLT LeftoverTy; 2308*0b57cec5SDimitry Andric SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs; 2309*0b57cec5SDimitry Andric if (IsLoad) { 2310*0b57cec5SDimitry Andric std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy); 2311*0b57cec5SDimitry Andric } else { 2312*0b57cec5SDimitry Andric if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs, 2313*0b57cec5SDimitry Andric NarrowLeftoverRegs)) { 2314*0b57cec5SDimitry Andric NumParts = NarrowRegs.size(); 2315*0b57cec5SDimitry Andric NumLeftover = NarrowLeftoverRegs.size(); 2316*0b57cec5SDimitry Andric } 2317*0b57cec5SDimitry Andric } 2318*0b57cec5SDimitry Andric 2319*0b57cec5SDimitry Andric if (NumParts == -1) 2320*0b57cec5SDimitry Andric return UnableToLegalize; 2321*0b57cec5SDimitry Andric 2322*0b57cec5SDimitry Andric const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); 2323*0b57cec5SDimitry Andric 2324*0b57cec5SDimitry Andric unsigned TotalSize = ValTy.getSizeInBits(); 2325*0b57cec5SDimitry Andric 2326*0b57cec5SDimitry Andric // Split the load/store into PartTy sized pieces starting at Offset. If this 2327*0b57cec5SDimitry Andric // is a load, return the new registers in ValRegs. For a store, each elements 2328*0b57cec5SDimitry Andric // of ValRegs should be PartTy. Returns the next offset that needs to be 2329*0b57cec5SDimitry Andric // handled. 2330*0b57cec5SDimitry Andric auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs, 2331*0b57cec5SDimitry Andric unsigned Offset) -> unsigned { 2332*0b57cec5SDimitry Andric MachineFunction &MF = MIRBuilder.getMF(); 2333*0b57cec5SDimitry Andric unsigned PartSize = PartTy.getSizeInBits(); 2334*0b57cec5SDimitry Andric for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize; 2335*0b57cec5SDimitry Andric Offset += PartSize, ++Idx) { 2336*0b57cec5SDimitry Andric unsigned ByteSize = PartSize / 8; 2337*0b57cec5SDimitry Andric unsigned ByteOffset = Offset / 8; 2338*0b57cec5SDimitry Andric Register NewAddrReg; 2339*0b57cec5SDimitry Andric 2340*0b57cec5SDimitry Andric MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset); 2341*0b57cec5SDimitry Andric 2342*0b57cec5SDimitry Andric MachineMemOperand *NewMMO = 2343*0b57cec5SDimitry Andric MF.getMachineMemOperand(MMO, ByteOffset, ByteSize); 2344*0b57cec5SDimitry Andric 2345*0b57cec5SDimitry Andric if (IsLoad) { 2346*0b57cec5SDimitry Andric Register Dst = MRI.createGenericVirtualRegister(PartTy); 2347*0b57cec5SDimitry Andric ValRegs.push_back(Dst); 2348*0b57cec5SDimitry Andric MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO); 2349*0b57cec5SDimitry Andric } else { 2350*0b57cec5SDimitry Andric MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO); 2351*0b57cec5SDimitry Andric } 2352*0b57cec5SDimitry Andric } 2353*0b57cec5SDimitry Andric 2354*0b57cec5SDimitry Andric return Offset; 2355*0b57cec5SDimitry Andric }; 2356*0b57cec5SDimitry Andric 2357*0b57cec5SDimitry Andric unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0); 2358*0b57cec5SDimitry Andric 2359*0b57cec5SDimitry Andric // Handle the rest of the register if this isn't an even type breakdown. 2360*0b57cec5SDimitry Andric if (LeftoverTy.isValid()) 2361*0b57cec5SDimitry Andric splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset); 2362*0b57cec5SDimitry Andric 2363*0b57cec5SDimitry Andric if (IsLoad) { 2364*0b57cec5SDimitry Andric insertParts(ValReg, ValTy, NarrowTy, NarrowRegs, 2365*0b57cec5SDimitry Andric LeftoverTy, NarrowLeftoverRegs); 2366*0b57cec5SDimitry Andric } 2367*0b57cec5SDimitry Andric 2368*0b57cec5SDimitry Andric MI.eraseFromParent(); 2369*0b57cec5SDimitry Andric return Legalized; 2370*0b57cec5SDimitry Andric } 2371*0b57cec5SDimitry Andric 2372*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 2373*0b57cec5SDimitry Andric LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, 2374*0b57cec5SDimitry Andric LLT NarrowTy) { 2375*0b57cec5SDimitry Andric using namespace TargetOpcode; 2376*0b57cec5SDimitry Andric 2377*0b57cec5SDimitry Andric MIRBuilder.setInstr(MI); 2378*0b57cec5SDimitry Andric switch (MI.getOpcode()) { 2379*0b57cec5SDimitry Andric case G_IMPLICIT_DEF: 2380*0b57cec5SDimitry Andric return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); 2381*0b57cec5SDimitry Andric case G_AND: 2382*0b57cec5SDimitry Andric case G_OR: 2383*0b57cec5SDimitry Andric case G_XOR: 2384*0b57cec5SDimitry Andric case G_ADD: 2385*0b57cec5SDimitry Andric case G_SUB: 2386*0b57cec5SDimitry Andric case G_MUL: 2387*0b57cec5SDimitry Andric case G_SMULH: 2388*0b57cec5SDimitry Andric case G_UMULH: 2389*0b57cec5SDimitry Andric case G_FADD: 2390*0b57cec5SDimitry Andric case G_FMUL: 2391*0b57cec5SDimitry Andric case G_FSUB: 2392*0b57cec5SDimitry Andric case G_FNEG: 2393*0b57cec5SDimitry Andric case G_FABS: 2394*0b57cec5SDimitry Andric case G_FCANONICALIZE: 2395*0b57cec5SDimitry Andric case G_FDIV: 2396*0b57cec5SDimitry Andric case G_FREM: 2397*0b57cec5SDimitry Andric case G_FMA: 2398*0b57cec5SDimitry Andric case G_FPOW: 2399*0b57cec5SDimitry Andric case G_FEXP: 2400*0b57cec5SDimitry Andric case G_FEXP2: 2401*0b57cec5SDimitry Andric case G_FLOG: 2402*0b57cec5SDimitry Andric case G_FLOG2: 2403*0b57cec5SDimitry Andric case G_FLOG10: 2404*0b57cec5SDimitry Andric case G_FNEARBYINT: 2405*0b57cec5SDimitry Andric case G_FCEIL: 2406*0b57cec5SDimitry Andric case G_FFLOOR: 2407*0b57cec5SDimitry Andric case G_FRINT: 2408*0b57cec5SDimitry Andric case G_INTRINSIC_ROUND: 2409*0b57cec5SDimitry Andric case G_INTRINSIC_TRUNC: 2410*0b57cec5SDimitry Andric case G_FCOS: 2411*0b57cec5SDimitry Andric case G_FSIN: 2412*0b57cec5SDimitry Andric case G_FSQRT: 2413*0b57cec5SDimitry Andric case G_BSWAP: 2414*0b57cec5SDimitry Andric case G_SDIV: 2415*0b57cec5SDimitry Andric case G_SMIN: 2416*0b57cec5SDimitry Andric case G_SMAX: 2417*0b57cec5SDimitry Andric case G_UMIN: 2418*0b57cec5SDimitry Andric case G_UMAX: 2419*0b57cec5SDimitry Andric case G_FMINNUM: 2420*0b57cec5SDimitry Andric case G_FMAXNUM: 2421*0b57cec5SDimitry Andric case G_FMINNUM_IEEE: 2422*0b57cec5SDimitry Andric case G_FMAXNUM_IEEE: 2423*0b57cec5SDimitry Andric case G_FMINIMUM: 2424*0b57cec5SDimitry Andric case G_FMAXIMUM: 2425*0b57cec5SDimitry Andric return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy); 2426*0b57cec5SDimitry Andric case G_SHL: 2427*0b57cec5SDimitry Andric case G_LSHR: 2428*0b57cec5SDimitry Andric case G_ASHR: 2429*0b57cec5SDimitry Andric case G_CTLZ: 2430*0b57cec5SDimitry Andric case G_CTLZ_ZERO_UNDEF: 2431*0b57cec5SDimitry Andric case G_CTTZ: 2432*0b57cec5SDimitry Andric case G_CTTZ_ZERO_UNDEF: 2433*0b57cec5SDimitry Andric case G_CTPOP: 2434*0b57cec5SDimitry Andric case G_FCOPYSIGN: 2435*0b57cec5SDimitry Andric return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy); 2436*0b57cec5SDimitry Andric case G_ZEXT: 2437*0b57cec5SDimitry Andric case G_SEXT: 2438*0b57cec5SDimitry Andric case G_ANYEXT: 2439*0b57cec5SDimitry Andric case G_FPEXT: 2440*0b57cec5SDimitry Andric case G_FPTRUNC: 2441*0b57cec5SDimitry Andric case G_SITOFP: 2442*0b57cec5SDimitry Andric case G_UITOFP: 2443*0b57cec5SDimitry Andric case G_FPTOSI: 2444*0b57cec5SDimitry Andric case G_FPTOUI: 2445*0b57cec5SDimitry Andric case G_INTTOPTR: 2446*0b57cec5SDimitry Andric case G_PTRTOINT: 2447*0b57cec5SDimitry Andric case G_ADDRSPACE_CAST: 2448*0b57cec5SDimitry Andric return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); 2449*0b57cec5SDimitry Andric case G_ICMP: 2450*0b57cec5SDimitry Andric case G_FCMP: 2451*0b57cec5SDimitry Andric return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); 2452*0b57cec5SDimitry Andric case G_SELECT: 2453*0b57cec5SDimitry Andric return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); 2454*0b57cec5SDimitry Andric case G_PHI: 2455*0b57cec5SDimitry Andric return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); 2456*0b57cec5SDimitry Andric case G_LOAD: 2457*0b57cec5SDimitry Andric case G_STORE: 2458*0b57cec5SDimitry Andric return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); 2459*0b57cec5SDimitry Andric default: 2460*0b57cec5SDimitry Andric return UnableToLegalize; 2461*0b57cec5SDimitry Andric } 2462*0b57cec5SDimitry Andric } 2463*0b57cec5SDimitry Andric 2464*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 2465*0b57cec5SDimitry Andric LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, 2466*0b57cec5SDimitry Andric const LLT HalfTy, const LLT AmtTy) { 2467*0b57cec5SDimitry Andric 2468*0b57cec5SDimitry Andric Register InL = MRI.createGenericVirtualRegister(HalfTy); 2469*0b57cec5SDimitry Andric Register InH = MRI.createGenericVirtualRegister(HalfTy); 2470*0b57cec5SDimitry Andric MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg()); 2471*0b57cec5SDimitry Andric 2472*0b57cec5SDimitry Andric if (Amt.isNullValue()) { 2473*0b57cec5SDimitry Andric MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH}); 2474*0b57cec5SDimitry Andric MI.eraseFromParent(); 2475*0b57cec5SDimitry Andric return Legalized; 2476*0b57cec5SDimitry Andric } 2477*0b57cec5SDimitry Andric 2478*0b57cec5SDimitry Andric LLT NVT = HalfTy; 2479*0b57cec5SDimitry Andric unsigned NVTBits = HalfTy.getSizeInBits(); 2480*0b57cec5SDimitry Andric unsigned VTBits = 2 * NVTBits; 2481*0b57cec5SDimitry Andric 2482*0b57cec5SDimitry Andric SrcOp Lo(Register(0)), Hi(Register(0)); 2483*0b57cec5SDimitry Andric if (MI.getOpcode() == TargetOpcode::G_SHL) { 2484*0b57cec5SDimitry Andric if (Amt.ugt(VTBits)) { 2485*0b57cec5SDimitry Andric Lo = Hi = MIRBuilder.buildConstant(NVT, 0); 2486*0b57cec5SDimitry Andric } else if (Amt.ugt(NVTBits)) { 2487*0b57cec5SDimitry Andric Lo = MIRBuilder.buildConstant(NVT, 0); 2488*0b57cec5SDimitry Andric Hi = MIRBuilder.buildShl(NVT, InL, 2489*0b57cec5SDimitry Andric MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 2490*0b57cec5SDimitry Andric } else if (Amt == NVTBits) { 2491*0b57cec5SDimitry Andric Lo = MIRBuilder.buildConstant(NVT, 0); 2492*0b57cec5SDimitry Andric Hi = InL; 2493*0b57cec5SDimitry Andric } else { 2494*0b57cec5SDimitry Andric Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt)); 2495*0b57cec5SDimitry Andric auto OrLHS = 2496*0b57cec5SDimitry Andric MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt)); 2497*0b57cec5SDimitry Andric auto OrRHS = MIRBuilder.buildLShr( 2498*0b57cec5SDimitry Andric NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 2499*0b57cec5SDimitry Andric Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 2500*0b57cec5SDimitry Andric } 2501*0b57cec5SDimitry Andric } else if (MI.getOpcode() == TargetOpcode::G_LSHR) { 2502*0b57cec5SDimitry Andric if (Amt.ugt(VTBits)) { 2503*0b57cec5SDimitry Andric Lo = Hi = MIRBuilder.buildConstant(NVT, 0); 2504*0b57cec5SDimitry Andric } else if (Amt.ugt(NVTBits)) { 2505*0b57cec5SDimitry Andric Lo = MIRBuilder.buildLShr(NVT, InH, 2506*0b57cec5SDimitry Andric MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 2507*0b57cec5SDimitry Andric Hi = MIRBuilder.buildConstant(NVT, 0); 2508*0b57cec5SDimitry Andric } else if (Amt == NVTBits) { 2509*0b57cec5SDimitry Andric Lo = InH; 2510*0b57cec5SDimitry Andric Hi = MIRBuilder.buildConstant(NVT, 0); 2511*0b57cec5SDimitry Andric } else { 2512*0b57cec5SDimitry Andric auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); 2513*0b57cec5SDimitry Andric 2514*0b57cec5SDimitry Andric auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); 2515*0b57cec5SDimitry Andric auto OrRHS = MIRBuilder.buildShl( 2516*0b57cec5SDimitry Andric NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 2517*0b57cec5SDimitry Andric 2518*0b57cec5SDimitry Andric Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 2519*0b57cec5SDimitry Andric Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst); 2520*0b57cec5SDimitry Andric } 2521*0b57cec5SDimitry Andric } else { 2522*0b57cec5SDimitry Andric if (Amt.ugt(VTBits)) { 2523*0b57cec5SDimitry Andric Hi = Lo = MIRBuilder.buildAShr( 2524*0b57cec5SDimitry Andric NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 2525*0b57cec5SDimitry Andric } else if (Amt.ugt(NVTBits)) { 2526*0b57cec5SDimitry Andric Lo = MIRBuilder.buildAShr(NVT, InH, 2527*0b57cec5SDimitry Andric MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 2528*0b57cec5SDimitry Andric Hi = MIRBuilder.buildAShr(NVT, InH, 2529*0b57cec5SDimitry Andric MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 2530*0b57cec5SDimitry Andric } else if (Amt == NVTBits) { 2531*0b57cec5SDimitry Andric Lo = InH; 2532*0b57cec5SDimitry Andric Hi = MIRBuilder.buildAShr(NVT, InH, 2533*0b57cec5SDimitry Andric MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 2534*0b57cec5SDimitry Andric } else { 2535*0b57cec5SDimitry Andric auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); 2536*0b57cec5SDimitry Andric 2537*0b57cec5SDimitry Andric auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); 2538*0b57cec5SDimitry Andric auto OrRHS = MIRBuilder.buildShl( 2539*0b57cec5SDimitry Andric NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 2540*0b57cec5SDimitry Andric 2541*0b57cec5SDimitry Andric Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 2542*0b57cec5SDimitry Andric Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst); 2543*0b57cec5SDimitry Andric } 2544*0b57cec5SDimitry Andric } 2545*0b57cec5SDimitry Andric 2546*0b57cec5SDimitry Andric MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()}); 2547*0b57cec5SDimitry Andric MI.eraseFromParent(); 2548*0b57cec5SDimitry Andric 2549*0b57cec5SDimitry Andric return Legalized; 2550*0b57cec5SDimitry Andric } 2551*0b57cec5SDimitry Andric 2552*0b57cec5SDimitry Andric // TODO: Optimize if constant shift amount. 2553*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 2554*0b57cec5SDimitry Andric LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, 2555*0b57cec5SDimitry Andric LLT RequestedTy) { 2556*0b57cec5SDimitry Andric if (TypeIdx == 1) { 2557*0b57cec5SDimitry Andric Observer.changingInstr(MI); 2558*0b57cec5SDimitry Andric narrowScalarSrc(MI, RequestedTy, 2); 2559*0b57cec5SDimitry Andric Observer.changedInstr(MI); 2560*0b57cec5SDimitry Andric return Legalized; 2561*0b57cec5SDimitry Andric } 2562*0b57cec5SDimitry Andric 2563*0b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2564*0b57cec5SDimitry Andric LLT DstTy = MRI.getType(DstReg); 2565*0b57cec5SDimitry Andric if (DstTy.isVector()) 2566*0b57cec5SDimitry Andric return UnableToLegalize; 2567*0b57cec5SDimitry Andric 2568*0b57cec5SDimitry Andric Register Amt = MI.getOperand(2).getReg(); 2569*0b57cec5SDimitry Andric LLT ShiftAmtTy = MRI.getType(Amt); 2570*0b57cec5SDimitry Andric const unsigned DstEltSize = DstTy.getScalarSizeInBits(); 2571*0b57cec5SDimitry Andric if (DstEltSize % 2 != 0) 2572*0b57cec5SDimitry Andric return UnableToLegalize; 2573*0b57cec5SDimitry Andric 2574*0b57cec5SDimitry Andric // Ignore the input type. We can only go to exactly half the size of the 2575*0b57cec5SDimitry Andric // input. If that isn't small enough, the resulting pieces will be further 2576*0b57cec5SDimitry Andric // legalized. 2577*0b57cec5SDimitry Andric const unsigned NewBitSize = DstEltSize / 2; 2578*0b57cec5SDimitry Andric const LLT HalfTy = LLT::scalar(NewBitSize); 2579*0b57cec5SDimitry Andric const LLT CondTy = LLT::scalar(1); 2580*0b57cec5SDimitry Andric 2581*0b57cec5SDimitry Andric if (const MachineInstr *KShiftAmt = 2582*0b57cec5SDimitry Andric getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) { 2583*0b57cec5SDimitry Andric return narrowScalarShiftByConstant( 2584*0b57cec5SDimitry Andric MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy); 2585*0b57cec5SDimitry Andric } 2586*0b57cec5SDimitry Andric 2587*0b57cec5SDimitry Andric // TODO: Expand with known bits. 2588*0b57cec5SDimitry Andric 2589*0b57cec5SDimitry Andric // Handle the fully general expansion by an unknown amount. 2590*0b57cec5SDimitry Andric auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize); 2591*0b57cec5SDimitry Andric 2592*0b57cec5SDimitry Andric Register InL = MRI.createGenericVirtualRegister(HalfTy); 2593*0b57cec5SDimitry Andric Register InH = MRI.createGenericVirtualRegister(HalfTy); 2594*0b57cec5SDimitry Andric MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg()); 2595*0b57cec5SDimitry Andric 2596*0b57cec5SDimitry Andric auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits); 2597*0b57cec5SDimitry Andric auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt); 2598*0b57cec5SDimitry Andric 2599*0b57cec5SDimitry Andric auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0); 2600*0b57cec5SDimitry Andric auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits); 2601*0b57cec5SDimitry Andric auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero); 2602*0b57cec5SDimitry Andric 2603*0b57cec5SDimitry Andric Register ResultRegs[2]; 2604*0b57cec5SDimitry Andric switch (MI.getOpcode()) { 2605*0b57cec5SDimitry Andric case TargetOpcode::G_SHL: { 2606*0b57cec5SDimitry Andric // Short: ShAmt < NewBitSize 2607*0b57cec5SDimitry Andric auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt); 2608*0b57cec5SDimitry Andric 2609*0b57cec5SDimitry Andric auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt); 2610*0b57cec5SDimitry Andric auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); 2611*0b57cec5SDimitry Andric auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); 2612*0b57cec5SDimitry Andric 2613*0b57cec5SDimitry Andric // Long: ShAmt >= NewBitSize 2614*0b57cec5SDimitry Andric auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero. 2615*0b57cec5SDimitry Andric auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part. 2616*0b57cec5SDimitry Andric 2617*0b57cec5SDimitry Andric auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL); 2618*0b57cec5SDimitry Andric auto Hi = MIRBuilder.buildSelect( 2619*0b57cec5SDimitry Andric HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL)); 2620*0b57cec5SDimitry Andric 2621*0b57cec5SDimitry Andric ResultRegs[0] = Lo.getReg(0); 2622*0b57cec5SDimitry Andric ResultRegs[1] = Hi.getReg(0); 2623*0b57cec5SDimitry Andric break; 2624*0b57cec5SDimitry Andric } 2625*0b57cec5SDimitry Andric case TargetOpcode::G_LSHR: { 2626*0b57cec5SDimitry Andric // Short: ShAmt < NewBitSize 2627*0b57cec5SDimitry Andric auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt); 2628*0b57cec5SDimitry Andric 2629*0b57cec5SDimitry Andric auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); 2630*0b57cec5SDimitry Andric auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack); 2631*0b57cec5SDimitry Andric auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); 2632*0b57cec5SDimitry Andric 2633*0b57cec5SDimitry Andric // Long: ShAmt >= NewBitSize 2634*0b57cec5SDimitry Andric auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. 2635*0b57cec5SDimitry Andric auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part. 2636*0b57cec5SDimitry Andric 2637*0b57cec5SDimitry Andric auto Lo = MIRBuilder.buildSelect( 2638*0b57cec5SDimitry Andric HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); 2639*0b57cec5SDimitry Andric auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); 2640*0b57cec5SDimitry Andric 2641*0b57cec5SDimitry Andric ResultRegs[0] = Lo.getReg(0); 2642*0b57cec5SDimitry Andric ResultRegs[1] = Hi.getReg(0); 2643*0b57cec5SDimitry Andric break; 2644*0b57cec5SDimitry Andric } 2645*0b57cec5SDimitry Andric case TargetOpcode::G_ASHR: { 2646*0b57cec5SDimitry Andric // Short: ShAmt < NewBitSize 2647*0b57cec5SDimitry Andric auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt); 2648*0b57cec5SDimitry Andric 2649*0b57cec5SDimitry Andric auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); 2650*0b57cec5SDimitry Andric auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack); 2651*0b57cec5SDimitry Andric auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); 2652*0b57cec5SDimitry Andric 2653*0b57cec5SDimitry Andric // Long: ShAmt >= NewBitSize 2654*0b57cec5SDimitry Andric 2655*0b57cec5SDimitry Andric // Sign of Hi part. 2656*0b57cec5SDimitry Andric auto HiL = MIRBuilder.buildAShr( 2657*0b57cec5SDimitry Andric HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1)); 2658*0b57cec5SDimitry Andric 2659*0b57cec5SDimitry Andric auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part. 2660*0b57cec5SDimitry Andric 2661*0b57cec5SDimitry Andric auto Lo = MIRBuilder.buildSelect( 2662*0b57cec5SDimitry Andric HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); 2663*0b57cec5SDimitry Andric 2664*0b57cec5SDimitry Andric auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); 2665*0b57cec5SDimitry Andric 2666*0b57cec5SDimitry Andric ResultRegs[0] = Lo.getReg(0); 2667*0b57cec5SDimitry Andric ResultRegs[1] = Hi.getReg(0); 2668*0b57cec5SDimitry Andric break; 2669*0b57cec5SDimitry Andric } 2670*0b57cec5SDimitry Andric default: 2671*0b57cec5SDimitry Andric llvm_unreachable("not a shift"); 2672*0b57cec5SDimitry Andric } 2673*0b57cec5SDimitry Andric 2674*0b57cec5SDimitry Andric MIRBuilder.buildMerge(DstReg, ResultRegs); 2675*0b57cec5SDimitry Andric MI.eraseFromParent(); 2676*0b57cec5SDimitry Andric return Legalized; 2677*0b57cec5SDimitry Andric } 2678*0b57cec5SDimitry Andric 2679*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 2680*0b57cec5SDimitry Andric LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, 2681*0b57cec5SDimitry Andric LLT MoreTy) { 2682*0b57cec5SDimitry Andric assert(TypeIdx == 0 && "Expecting only Idx 0"); 2683*0b57cec5SDimitry Andric 2684*0b57cec5SDimitry Andric Observer.changingInstr(MI); 2685*0b57cec5SDimitry Andric for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 2686*0b57cec5SDimitry Andric MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 2687*0b57cec5SDimitry Andric MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 2688*0b57cec5SDimitry Andric moreElementsVectorSrc(MI, MoreTy, I); 2689*0b57cec5SDimitry Andric } 2690*0b57cec5SDimitry Andric 2691*0b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 2692*0b57cec5SDimitry Andric MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 2693*0b57cec5SDimitry Andric moreElementsVectorDst(MI, MoreTy, 0); 2694*0b57cec5SDimitry Andric Observer.changedInstr(MI); 2695*0b57cec5SDimitry Andric return Legalized; 2696*0b57cec5SDimitry Andric } 2697*0b57cec5SDimitry Andric 2698*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 2699*0b57cec5SDimitry Andric LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, 2700*0b57cec5SDimitry Andric LLT MoreTy) { 2701*0b57cec5SDimitry Andric MIRBuilder.setInstr(MI); 2702*0b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 2703*0b57cec5SDimitry Andric switch (Opc) { 2704*0b57cec5SDimitry Andric case TargetOpcode::G_IMPLICIT_DEF: { 2705*0b57cec5SDimitry Andric Observer.changingInstr(MI); 2706*0b57cec5SDimitry Andric moreElementsVectorDst(MI, MoreTy, 0); 2707*0b57cec5SDimitry Andric Observer.changedInstr(MI); 2708*0b57cec5SDimitry Andric return Legalized; 2709*0b57cec5SDimitry Andric } 2710*0b57cec5SDimitry Andric case TargetOpcode::G_AND: 2711*0b57cec5SDimitry Andric case TargetOpcode::G_OR: 2712*0b57cec5SDimitry Andric case TargetOpcode::G_XOR: 2713*0b57cec5SDimitry Andric case TargetOpcode::G_SMIN: 2714*0b57cec5SDimitry Andric case TargetOpcode::G_SMAX: 2715*0b57cec5SDimitry Andric case TargetOpcode::G_UMIN: 2716*0b57cec5SDimitry Andric case TargetOpcode::G_UMAX: { 2717*0b57cec5SDimitry Andric Observer.changingInstr(MI); 2718*0b57cec5SDimitry Andric moreElementsVectorSrc(MI, MoreTy, 1); 2719*0b57cec5SDimitry Andric moreElementsVectorSrc(MI, MoreTy, 2); 2720*0b57cec5SDimitry Andric moreElementsVectorDst(MI, MoreTy, 0); 2721*0b57cec5SDimitry Andric Observer.changedInstr(MI); 2722*0b57cec5SDimitry Andric return Legalized; 2723*0b57cec5SDimitry Andric } 2724*0b57cec5SDimitry Andric case TargetOpcode::G_EXTRACT: 2725*0b57cec5SDimitry Andric if (TypeIdx != 1) 2726*0b57cec5SDimitry Andric return UnableToLegalize; 2727*0b57cec5SDimitry Andric Observer.changingInstr(MI); 2728*0b57cec5SDimitry Andric moreElementsVectorSrc(MI, MoreTy, 1); 2729*0b57cec5SDimitry Andric Observer.changedInstr(MI); 2730*0b57cec5SDimitry Andric return Legalized; 2731*0b57cec5SDimitry Andric case TargetOpcode::G_INSERT: 2732*0b57cec5SDimitry Andric if (TypeIdx != 0) 2733*0b57cec5SDimitry Andric return UnableToLegalize; 2734*0b57cec5SDimitry Andric Observer.changingInstr(MI); 2735*0b57cec5SDimitry Andric moreElementsVectorSrc(MI, MoreTy, 1); 2736*0b57cec5SDimitry Andric moreElementsVectorDst(MI, MoreTy, 0); 2737*0b57cec5SDimitry Andric Observer.changedInstr(MI); 2738*0b57cec5SDimitry Andric return Legalized; 2739*0b57cec5SDimitry Andric case TargetOpcode::G_SELECT: 2740*0b57cec5SDimitry Andric if (TypeIdx != 0) 2741*0b57cec5SDimitry Andric return UnableToLegalize; 2742*0b57cec5SDimitry Andric if (MRI.getType(MI.getOperand(1).getReg()).isVector()) 2743*0b57cec5SDimitry Andric return UnableToLegalize; 2744*0b57cec5SDimitry Andric 2745*0b57cec5SDimitry Andric Observer.changingInstr(MI); 2746*0b57cec5SDimitry Andric moreElementsVectorSrc(MI, MoreTy, 2); 2747*0b57cec5SDimitry Andric moreElementsVectorSrc(MI, MoreTy, 3); 2748*0b57cec5SDimitry Andric moreElementsVectorDst(MI, MoreTy, 0); 2749*0b57cec5SDimitry Andric Observer.changedInstr(MI); 2750*0b57cec5SDimitry Andric return Legalized; 2751*0b57cec5SDimitry Andric case TargetOpcode::G_PHI: 2752*0b57cec5SDimitry Andric return moreElementsVectorPhi(MI, TypeIdx, MoreTy); 2753*0b57cec5SDimitry Andric default: 2754*0b57cec5SDimitry Andric return UnableToLegalize; 2755*0b57cec5SDimitry Andric } 2756*0b57cec5SDimitry Andric } 2757*0b57cec5SDimitry Andric 2758*0b57cec5SDimitry Andric void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs, 2759*0b57cec5SDimitry Andric ArrayRef<Register> Src1Regs, 2760*0b57cec5SDimitry Andric ArrayRef<Register> Src2Regs, 2761*0b57cec5SDimitry Andric LLT NarrowTy) { 2762*0b57cec5SDimitry Andric MachineIRBuilder &B = MIRBuilder; 2763*0b57cec5SDimitry Andric unsigned SrcParts = Src1Regs.size(); 2764*0b57cec5SDimitry Andric unsigned DstParts = DstRegs.size(); 2765*0b57cec5SDimitry Andric 2766*0b57cec5SDimitry Andric unsigned DstIdx = 0; // Low bits of the result. 2767*0b57cec5SDimitry Andric Register FactorSum = 2768*0b57cec5SDimitry Andric B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0); 2769*0b57cec5SDimitry Andric DstRegs[DstIdx] = FactorSum; 2770*0b57cec5SDimitry Andric 2771*0b57cec5SDimitry Andric unsigned CarrySumPrevDstIdx; 2772*0b57cec5SDimitry Andric SmallVector<Register, 4> Factors; 2773*0b57cec5SDimitry Andric 2774*0b57cec5SDimitry Andric for (DstIdx = 1; DstIdx < DstParts; DstIdx++) { 2775*0b57cec5SDimitry Andric // Collect low parts of muls for DstIdx. 2776*0b57cec5SDimitry Andric for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1; 2777*0b57cec5SDimitry Andric i <= std::min(DstIdx, SrcParts - 1); ++i) { 2778*0b57cec5SDimitry Andric MachineInstrBuilder Mul = 2779*0b57cec5SDimitry Andric B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]); 2780*0b57cec5SDimitry Andric Factors.push_back(Mul.getReg(0)); 2781*0b57cec5SDimitry Andric } 2782*0b57cec5SDimitry Andric // Collect high parts of muls from previous DstIdx. 2783*0b57cec5SDimitry Andric for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts; 2784*0b57cec5SDimitry Andric i <= std::min(DstIdx - 1, SrcParts - 1); ++i) { 2785*0b57cec5SDimitry Andric MachineInstrBuilder Umulh = 2786*0b57cec5SDimitry Andric B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]); 2787*0b57cec5SDimitry Andric Factors.push_back(Umulh.getReg(0)); 2788*0b57cec5SDimitry Andric } 2789*0b57cec5SDimitry Andric // Add CarrySum from additons calculated for previous DstIdx. 2790*0b57cec5SDimitry Andric if (DstIdx != 1) { 2791*0b57cec5SDimitry Andric Factors.push_back(CarrySumPrevDstIdx); 2792*0b57cec5SDimitry Andric } 2793*0b57cec5SDimitry Andric 2794*0b57cec5SDimitry Andric Register CarrySum; 2795*0b57cec5SDimitry Andric // Add all factors and accumulate all carries into CarrySum. 2796*0b57cec5SDimitry Andric if (DstIdx != DstParts - 1) { 2797*0b57cec5SDimitry Andric MachineInstrBuilder Uaddo = 2798*0b57cec5SDimitry Andric B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]); 2799*0b57cec5SDimitry Andric FactorSum = Uaddo.getReg(0); 2800*0b57cec5SDimitry Andric CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0); 2801*0b57cec5SDimitry Andric for (unsigned i = 2; i < Factors.size(); ++i) { 2802*0b57cec5SDimitry Andric MachineInstrBuilder Uaddo = 2803*0b57cec5SDimitry Andric B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]); 2804*0b57cec5SDimitry Andric FactorSum = Uaddo.getReg(0); 2805*0b57cec5SDimitry Andric MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1)); 2806*0b57cec5SDimitry Andric CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0); 2807*0b57cec5SDimitry Andric } 2808*0b57cec5SDimitry Andric } else { 2809*0b57cec5SDimitry Andric // Since value for the next index is not calculated, neither is CarrySum. 2810*0b57cec5SDimitry Andric FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0); 2811*0b57cec5SDimitry Andric for (unsigned i = 2; i < Factors.size(); ++i) 2812*0b57cec5SDimitry Andric FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0); 2813*0b57cec5SDimitry Andric } 2814*0b57cec5SDimitry Andric 2815*0b57cec5SDimitry Andric CarrySumPrevDstIdx = CarrySum; 2816*0b57cec5SDimitry Andric DstRegs[DstIdx] = FactorSum; 2817*0b57cec5SDimitry Andric Factors.clear(); 2818*0b57cec5SDimitry Andric } 2819*0b57cec5SDimitry Andric } 2820*0b57cec5SDimitry Andric 2821*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 2822*0b57cec5SDimitry Andric LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { 2823*0b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2824*0b57cec5SDimitry Andric Register Src1 = MI.getOperand(1).getReg(); 2825*0b57cec5SDimitry Andric Register Src2 = MI.getOperand(2).getReg(); 2826*0b57cec5SDimitry Andric 2827*0b57cec5SDimitry Andric LLT Ty = MRI.getType(DstReg); 2828*0b57cec5SDimitry Andric if (Ty.isVector()) 2829*0b57cec5SDimitry Andric return UnableToLegalize; 2830*0b57cec5SDimitry Andric 2831*0b57cec5SDimitry Andric unsigned SrcSize = MRI.getType(Src1).getSizeInBits(); 2832*0b57cec5SDimitry Andric unsigned DstSize = Ty.getSizeInBits(); 2833*0b57cec5SDimitry Andric unsigned NarrowSize = NarrowTy.getSizeInBits(); 2834*0b57cec5SDimitry Andric if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0) 2835*0b57cec5SDimitry Andric return UnableToLegalize; 2836*0b57cec5SDimitry Andric 2837*0b57cec5SDimitry Andric unsigned NumDstParts = DstSize / NarrowSize; 2838*0b57cec5SDimitry Andric unsigned NumSrcParts = SrcSize / NarrowSize; 2839*0b57cec5SDimitry Andric bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH; 2840*0b57cec5SDimitry Andric unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1); 2841*0b57cec5SDimitry Andric 2842*0b57cec5SDimitry Andric SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs; 2843*0b57cec5SDimitry Andric extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts); 2844*0b57cec5SDimitry Andric extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts); 2845*0b57cec5SDimitry Andric DstTmpRegs.resize(DstTmpParts); 2846*0b57cec5SDimitry Andric multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy); 2847*0b57cec5SDimitry Andric 2848*0b57cec5SDimitry Andric // Take only high half of registers if this is high mul. 2849*0b57cec5SDimitry Andric ArrayRef<Register> DstRegs( 2850*0b57cec5SDimitry Andric IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts); 2851*0b57cec5SDimitry Andric MIRBuilder.buildMerge(DstReg, DstRegs); 2852*0b57cec5SDimitry Andric MI.eraseFromParent(); 2853*0b57cec5SDimitry Andric return Legalized; 2854*0b57cec5SDimitry Andric } 2855*0b57cec5SDimitry Andric 2856*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 2857*0b57cec5SDimitry Andric LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, 2858*0b57cec5SDimitry Andric LLT NarrowTy) { 2859*0b57cec5SDimitry Andric if (TypeIdx != 1) 2860*0b57cec5SDimitry Andric return UnableToLegalize; 2861*0b57cec5SDimitry Andric 2862*0b57cec5SDimitry Andric uint64_t NarrowSize = NarrowTy.getSizeInBits(); 2863*0b57cec5SDimitry Andric 2864*0b57cec5SDimitry Andric int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 2865*0b57cec5SDimitry Andric // FIXME: add support for when SizeOp1 isn't an exact multiple of 2866*0b57cec5SDimitry Andric // NarrowSize. 2867*0b57cec5SDimitry Andric if (SizeOp1 % NarrowSize != 0) 2868*0b57cec5SDimitry Andric return UnableToLegalize; 2869*0b57cec5SDimitry Andric int NumParts = SizeOp1 / NarrowSize; 2870*0b57cec5SDimitry Andric 2871*0b57cec5SDimitry Andric SmallVector<Register, 2> SrcRegs, DstRegs; 2872*0b57cec5SDimitry Andric SmallVector<uint64_t, 2> Indexes; 2873*0b57cec5SDimitry Andric extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 2874*0b57cec5SDimitry Andric 2875*0b57cec5SDimitry Andric Register OpReg = MI.getOperand(0).getReg(); 2876*0b57cec5SDimitry Andric uint64_t OpStart = MI.getOperand(2).getImm(); 2877*0b57cec5SDimitry Andric uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 2878*0b57cec5SDimitry Andric for (int i = 0; i < NumParts; ++i) { 2879*0b57cec5SDimitry Andric unsigned SrcStart = i * NarrowSize; 2880*0b57cec5SDimitry Andric 2881*0b57cec5SDimitry Andric if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { 2882*0b57cec5SDimitry Andric // No part of the extract uses this subregister, ignore it. 2883*0b57cec5SDimitry Andric continue; 2884*0b57cec5SDimitry Andric } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 2885*0b57cec5SDimitry Andric // The entire subregister is extracted, forward the value. 2886*0b57cec5SDimitry Andric DstRegs.push_back(SrcRegs[i]); 2887*0b57cec5SDimitry Andric continue; 2888*0b57cec5SDimitry Andric } 2889*0b57cec5SDimitry Andric 2890*0b57cec5SDimitry Andric // OpSegStart is where this destination segment would start in OpReg if it 2891*0b57cec5SDimitry Andric // extended infinitely in both directions. 2892*0b57cec5SDimitry Andric int64_t ExtractOffset; 2893*0b57cec5SDimitry Andric uint64_t SegSize; 2894*0b57cec5SDimitry Andric if (OpStart < SrcStart) { 2895*0b57cec5SDimitry Andric ExtractOffset = 0; 2896*0b57cec5SDimitry Andric SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); 2897*0b57cec5SDimitry Andric } else { 2898*0b57cec5SDimitry Andric ExtractOffset = OpStart - SrcStart; 2899*0b57cec5SDimitry Andric SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); 2900*0b57cec5SDimitry Andric } 2901*0b57cec5SDimitry Andric 2902*0b57cec5SDimitry Andric Register SegReg = SrcRegs[i]; 2903*0b57cec5SDimitry Andric if (ExtractOffset != 0 || SegSize != NarrowSize) { 2904*0b57cec5SDimitry Andric // A genuine extract is needed. 2905*0b57cec5SDimitry Andric SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 2906*0b57cec5SDimitry Andric MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); 2907*0b57cec5SDimitry Andric } 2908*0b57cec5SDimitry Andric 2909*0b57cec5SDimitry Andric DstRegs.push_back(SegReg); 2910*0b57cec5SDimitry Andric } 2911*0b57cec5SDimitry Andric 2912*0b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2913*0b57cec5SDimitry Andric if(MRI.getType(DstReg).isVector()) 2914*0b57cec5SDimitry Andric MIRBuilder.buildBuildVector(DstReg, DstRegs); 2915*0b57cec5SDimitry Andric else 2916*0b57cec5SDimitry Andric MIRBuilder.buildMerge(DstReg, DstRegs); 2917*0b57cec5SDimitry Andric MI.eraseFromParent(); 2918*0b57cec5SDimitry Andric return Legalized; 2919*0b57cec5SDimitry Andric } 2920*0b57cec5SDimitry Andric 2921*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 2922*0b57cec5SDimitry Andric LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, 2923*0b57cec5SDimitry Andric LLT NarrowTy) { 2924*0b57cec5SDimitry Andric // FIXME: Don't know how to handle secondary types yet. 2925*0b57cec5SDimitry Andric if (TypeIdx != 0) 2926*0b57cec5SDimitry Andric return UnableToLegalize; 2927*0b57cec5SDimitry Andric 2928*0b57cec5SDimitry Andric uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 2929*0b57cec5SDimitry Andric uint64_t NarrowSize = NarrowTy.getSizeInBits(); 2930*0b57cec5SDimitry Andric 2931*0b57cec5SDimitry Andric // FIXME: add support for when SizeOp0 isn't an exact multiple of 2932*0b57cec5SDimitry Andric // NarrowSize. 2933*0b57cec5SDimitry Andric if (SizeOp0 % NarrowSize != 0) 2934*0b57cec5SDimitry Andric return UnableToLegalize; 2935*0b57cec5SDimitry Andric 2936*0b57cec5SDimitry Andric int NumParts = SizeOp0 / NarrowSize; 2937*0b57cec5SDimitry Andric 2938*0b57cec5SDimitry Andric SmallVector<Register, 2> SrcRegs, DstRegs; 2939*0b57cec5SDimitry Andric SmallVector<uint64_t, 2> Indexes; 2940*0b57cec5SDimitry Andric extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 2941*0b57cec5SDimitry Andric 2942*0b57cec5SDimitry Andric Register OpReg = MI.getOperand(2).getReg(); 2943*0b57cec5SDimitry Andric uint64_t OpStart = MI.getOperand(3).getImm(); 2944*0b57cec5SDimitry Andric uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 2945*0b57cec5SDimitry Andric for (int i = 0; i < NumParts; ++i) { 2946*0b57cec5SDimitry Andric unsigned DstStart = i * NarrowSize; 2947*0b57cec5SDimitry Andric 2948*0b57cec5SDimitry Andric if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { 2949*0b57cec5SDimitry Andric // No part of the insert affects this subregister, forward the original. 2950*0b57cec5SDimitry Andric DstRegs.push_back(SrcRegs[i]); 2951*0b57cec5SDimitry Andric continue; 2952*0b57cec5SDimitry Andric } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 2953*0b57cec5SDimitry Andric // The entire subregister is defined by this insert, forward the new 2954*0b57cec5SDimitry Andric // value. 2955*0b57cec5SDimitry Andric DstRegs.push_back(OpReg); 2956*0b57cec5SDimitry Andric continue; 2957*0b57cec5SDimitry Andric } 2958*0b57cec5SDimitry Andric 2959*0b57cec5SDimitry Andric // OpSegStart is where this destination segment would start in OpReg if it 2960*0b57cec5SDimitry Andric // extended infinitely in both directions. 2961*0b57cec5SDimitry Andric int64_t ExtractOffset, InsertOffset; 2962*0b57cec5SDimitry Andric uint64_t SegSize; 2963*0b57cec5SDimitry Andric if (OpStart < DstStart) { 2964*0b57cec5SDimitry Andric InsertOffset = 0; 2965*0b57cec5SDimitry Andric ExtractOffset = DstStart - OpStart; 2966*0b57cec5SDimitry Andric SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); 2967*0b57cec5SDimitry Andric } else { 2968*0b57cec5SDimitry Andric InsertOffset = OpStart - DstStart; 2969*0b57cec5SDimitry Andric ExtractOffset = 0; 2970*0b57cec5SDimitry Andric SegSize = 2971*0b57cec5SDimitry Andric std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); 2972*0b57cec5SDimitry Andric } 2973*0b57cec5SDimitry Andric 2974*0b57cec5SDimitry Andric Register SegReg = OpReg; 2975*0b57cec5SDimitry Andric if (ExtractOffset != 0 || SegSize != OpSize) { 2976*0b57cec5SDimitry Andric // A genuine extract is needed. 2977*0b57cec5SDimitry Andric SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 2978*0b57cec5SDimitry Andric MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); 2979*0b57cec5SDimitry Andric } 2980*0b57cec5SDimitry Andric 2981*0b57cec5SDimitry Andric Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); 2982*0b57cec5SDimitry Andric MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); 2983*0b57cec5SDimitry Andric DstRegs.push_back(DstReg); 2984*0b57cec5SDimitry Andric } 2985*0b57cec5SDimitry Andric 2986*0b57cec5SDimitry Andric assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); 2987*0b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 2988*0b57cec5SDimitry Andric if(MRI.getType(DstReg).isVector()) 2989*0b57cec5SDimitry Andric MIRBuilder.buildBuildVector(DstReg, DstRegs); 2990*0b57cec5SDimitry Andric else 2991*0b57cec5SDimitry Andric MIRBuilder.buildMerge(DstReg, DstRegs); 2992*0b57cec5SDimitry Andric MI.eraseFromParent(); 2993*0b57cec5SDimitry Andric return Legalized; 2994*0b57cec5SDimitry Andric } 2995*0b57cec5SDimitry Andric 2996*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 2997*0b57cec5SDimitry Andric LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, 2998*0b57cec5SDimitry Andric LLT NarrowTy) { 2999*0b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3000*0b57cec5SDimitry Andric LLT DstTy = MRI.getType(DstReg); 3001*0b57cec5SDimitry Andric 3002*0b57cec5SDimitry Andric assert(MI.getNumOperands() == 3 && TypeIdx == 0); 3003*0b57cec5SDimitry Andric 3004*0b57cec5SDimitry Andric SmallVector<Register, 4> DstRegs, DstLeftoverRegs; 3005*0b57cec5SDimitry Andric SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs; 3006*0b57cec5SDimitry Andric SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs; 3007*0b57cec5SDimitry Andric LLT LeftoverTy; 3008*0b57cec5SDimitry Andric if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy, 3009*0b57cec5SDimitry Andric Src0Regs, Src0LeftoverRegs)) 3010*0b57cec5SDimitry Andric return UnableToLegalize; 3011*0b57cec5SDimitry Andric 3012*0b57cec5SDimitry Andric LLT Unused; 3013*0b57cec5SDimitry Andric if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused, 3014*0b57cec5SDimitry Andric Src1Regs, Src1LeftoverRegs)) 3015*0b57cec5SDimitry Andric llvm_unreachable("inconsistent extractParts result"); 3016*0b57cec5SDimitry Andric 3017*0b57cec5SDimitry Andric for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { 3018*0b57cec5SDimitry Andric auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, 3019*0b57cec5SDimitry Andric {Src0Regs[I], Src1Regs[I]}); 3020*0b57cec5SDimitry Andric DstRegs.push_back(Inst->getOperand(0).getReg()); 3021*0b57cec5SDimitry Andric } 3022*0b57cec5SDimitry Andric 3023*0b57cec5SDimitry Andric for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { 3024*0b57cec5SDimitry Andric auto Inst = MIRBuilder.buildInstr( 3025*0b57cec5SDimitry Andric MI.getOpcode(), 3026*0b57cec5SDimitry Andric {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]}); 3027*0b57cec5SDimitry Andric DstLeftoverRegs.push_back(Inst->getOperand(0).getReg()); 3028*0b57cec5SDimitry Andric } 3029*0b57cec5SDimitry Andric 3030*0b57cec5SDimitry Andric insertParts(DstReg, DstTy, NarrowTy, DstRegs, 3031*0b57cec5SDimitry Andric LeftoverTy, DstLeftoverRegs); 3032*0b57cec5SDimitry Andric 3033*0b57cec5SDimitry Andric MI.eraseFromParent(); 3034*0b57cec5SDimitry Andric return Legalized; 3035*0b57cec5SDimitry Andric } 3036*0b57cec5SDimitry Andric 3037*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 3038*0b57cec5SDimitry Andric LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, 3039*0b57cec5SDimitry Andric LLT NarrowTy) { 3040*0b57cec5SDimitry Andric if (TypeIdx != 0) 3041*0b57cec5SDimitry Andric return UnableToLegalize; 3042*0b57cec5SDimitry Andric 3043*0b57cec5SDimitry Andric Register CondReg = MI.getOperand(1).getReg(); 3044*0b57cec5SDimitry Andric LLT CondTy = MRI.getType(CondReg); 3045*0b57cec5SDimitry Andric if (CondTy.isVector()) // TODO: Handle vselect 3046*0b57cec5SDimitry Andric return UnableToLegalize; 3047*0b57cec5SDimitry Andric 3048*0b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3049*0b57cec5SDimitry Andric LLT DstTy = MRI.getType(DstReg); 3050*0b57cec5SDimitry Andric 3051*0b57cec5SDimitry Andric SmallVector<Register, 4> DstRegs, DstLeftoverRegs; 3052*0b57cec5SDimitry Andric SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs; 3053*0b57cec5SDimitry Andric SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs; 3054*0b57cec5SDimitry Andric LLT LeftoverTy; 3055*0b57cec5SDimitry Andric if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy, 3056*0b57cec5SDimitry Andric Src1Regs, Src1LeftoverRegs)) 3057*0b57cec5SDimitry Andric return UnableToLegalize; 3058*0b57cec5SDimitry Andric 3059*0b57cec5SDimitry Andric LLT Unused; 3060*0b57cec5SDimitry Andric if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused, 3061*0b57cec5SDimitry Andric Src2Regs, Src2LeftoverRegs)) 3062*0b57cec5SDimitry Andric llvm_unreachable("inconsistent extractParts result"); 3063*0b57cec5SDimitry Andric 3064*0b57cec5SDimitry Andric for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { 3065*0b57cec5SDimitry Andric auto Select = MIRBuilder.buildSelect(NarrowTy, 3066*0b57cec5SDimitry Andric CondReg, Src1Regs[I], Src2Regs[I]); 3067*0b57cec5SDimitry Andric DstRegs.push_back(Select->getOperand(0).getReg()); 3068*0b57cec5SDimitry Andric } 3069*0b57cec5SDimitry Andric 3070*0b57cec5SDimitry Andric for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { 3071*0b57cec5SDimitry Andric auto Select = MIRBuilder.buildSelect( 3072*0b57cec5SDimitry Andric LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]); 3073*0b57cec5SDimitry Andric DstLeftoverRegs.push_back(Select->getOperand(0).getReg()); 3074*0b57cec5SDimitry Andric } 3075*0b57cec5SDimitry Andric 3076*0b57cec5SDimitry Andric insertParts(DstReg, DstTy, NarrowTy, DstRegs, 3077*0b57cec5SDimitry Andric LeftoverTy, DstLeftoverRegs); 3078*0b57cec5SDimitry Andric 3079*0b57cec5SDimitry Andric MI.eraseFromParent(); 3080*0b57cec5SDimitry Andric return Legalized; 3081*0b57cec5SDimitry Andric } 3082*0b57cec5SDimitry Andric 3083*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 3084*0b57cec5SDimitry Andric LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 3085*0b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 3086*0b57cec5SDimitry Andric auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 3087*0b57cec5SDimitry Andric auto isSupported = [this](const LegalityQuery &Q) { 3088*0b57cec5SDimitry Andric auto QAction = LI.getAction(Q).Action; 3089*0b57cec5SDimitry Andric return QAction == Legal || QAction == Libcall || QAction == Custom; 3090*0b57cec5SDimitry Andric }; 3091*0b57cec5SDimitry Andric switch (Opc) { 3092*0b57cec5SDimitry Andric default: 3093*0b57cec5SDimitry Andric return UnableToLegalize; 3094*0b57cec5SDimitry Andric case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 3095*0b57cec5SDimitry Andric // This trivially expands to CTLZ. 3096*0b57cec5SDimitry Andric Observer.changingInstr(MI); 3097*0b57cec5SDimitry Andric MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); 3098*0b57cec5SDimitry Andric Observer.changedInstr(MI); 3099*0b57cec5SDimitry Andric return Legalized; 3100*0b57cec5SDimitry Andric } 3101*0b57cec5SDimitry Andric case TargetOpcode::G_CTLZ: { 3102*0b57cec5SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 3103*0b57cec5SDimitry Andric unsigned Len = Ty.getSizeInBits(); 3104*0b57cec5SDimitry Andric if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) { 3105*0b57cec5SDimitry Andric // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. 3106*0b57cec5SDimitry Andric auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, 3107*0b57cec5SDimitry Andric {Ty}, {SrcReg}); 3108*0b57cec5SDimitry Andric auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 3109*0b57cec5SDimitry Andric auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 3110*0b57cec5SDimitry Andric auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 3111*0b57cec5SDimitry Andric SrcReg, MIBZero); 3112*0b57cec5SDimitry Andric MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 3113*0b57cec5SDimitry Andric MIBCtlzZU); 3114*0b57cec5SDimitry Andric MI.eraseFromParent(); 3115*0b57cec5SDimitry Andric return Legalized; 3116*0b57cec5SDimitry Andric } 3117*0b57cec5SDimitry Andric // for now, we do this: 3118*0b57cec5SDimitry Andric // NewLen = NextPowerOf2(Len); 3119*0b57cec5SDimitry Andric // x = x | (x >> 1); 3120*0b57cec5SDimitry Andric // x = x | (x >> 2); 3121*0b57cec5SDimitry Andric // ... 3122*0b57cec5SDimitry Andric // x = x | (x >>16); 3123*0b57cec5SDimitry Andric // x = x | (x >>32); // for 64-bit input 3124*0b57cec5SDimitry Andric // Upto NewLen/2 3125*0b57cec5SDimitry Andric // return Len - popcount(x); 3126*0b57cec5SDimitry Andric // 3127*0b57cec5SDimitry Andric // Ref: "Hacker's Delight" by Henry Warren 3128*0b57cec5SDimitry Andric Register Op = SrcReg; 3129*0b57cec5SDimitry Andric unsigned NewLen = PowerOf2Ceil(Len); 3130*0b57cec5SDimitry Andric for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { 3131*0b57cec5SDimitry Andric auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); 3132*0b57cec5SDimitry Andric auto MIBOp = MIRBuilder.buildInstr( 3133*0b57cec5SDimitry Andric TargetOpcode::G_OR, {Ty}, 3134*0b57cec5SDimitry Andric {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty}, 3135*0b57cec5SDimitry Andric {Op, MIBShiftAmt})}); 3136*0b57cec5SDimitry Andric Op = MIBOp->getOperand(0).getReg(); 3137*0b57cec5SDimitry Andric } 3138*0b57cec5SDimitry Andric auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op}); 3139*0b57cec5SDimitry Andric MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 3140*0b57cec5SDimitry Andric {MIRBuilder.buildConstant(Ty, Len), MIBPop}); 3141*0b57cec5SDimitry Andric MI.eraseFromParent(); 3142*0b57cec5SDimitry Andric return Legalized; 3143*0b57cec5SDimitry Andric } 3144*0b57cec5SDimitry Andric case TargetOpcode::G_CTTZ_ZERO_UNDEF: { 3145*0b57cec5SDimitry Andric // This trivially expands to CTTZ. 3146*0b57cec5SDimitry Andric Observer.changingInstr(MI); 3147*0b57cec5SDimitry Andric MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); 3148*0b57cec5SDimitry Andric Observer.changedInstr(MI); 3149*0b57cec5SDimitry Andric return Legalized; 3150*0b57cec5SDimitry Andric } 3151*0b57cec5SDimitry Andric case TargetOpcode::G_CTTZ: { 3152*0b57cec5SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 3153*0b57cec5SDimitry Andric unsigned Len = Ty.getSizeInBits(); 3154*0b57cec5SDimitry Andric if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) { 3155*0b57cec5SDimitry Andric // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with 3156*0b57cec5SDimitry Andric // zero. 3157*0b57cec5SDimitry Andric auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, 3158*0b57cec5SDimitry Andric {Ty}, {SrcReg}); 3159*0b57cec5SDimitry Andric auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 3160*0b57cec5SDimitry Andric auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 3161*0b57cec5SDimitry Andric auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 3162*0b57cec5SDimitry Andric SrcReg, MIBZero); 3163*0b57cec5SDimitry Andric MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 3164*0b57cec5SDimitry Andric MIBCttzZU); 3165*0b57cec5SDimitry Andric MI.eraseFromParent(); 3166*0b57cec5SDimitry Andric return Legalized; 3167*0b57cec5SDimitry Andric } 3168*0b57cec5SDimitry Andric // for now, we use: { return popcount(~x & (x - 1)); } 3169*0b57cec5SDimitry Andric // unless the target has ctlz but not ctpop, in which case we use: 3170*0b57cec5SDimitry Andric // { return 32 - nlz(~x & (x-1)); } 3171*0b57cec5SDimitry Andric // Ref: "Hacker's Delight" by Henry Warren 3172*0b57cec5SDimitry Andric auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1); 3173*0b57cec5SDimitry Andric auto MIBNot = 3174*0b57cec5SDimitry Andric MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1}); 3175*0b57cec5SDimitry Andric auto MIBTmp = MIRBuilder.buildInstr( 3176*0b57cec5SDimitry Andric TargetOpcode::G_AND, {Ty}, 3177*0b57cec5SDimitry Andric {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty}, 3178*0b57cec5SDimitry Andric {SrcReg, MIBCstNeg1})}); 3179*0b57cec5SDimitry Andric if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) && 3180*0b57cec5SDimitry Andric isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) { 3181*0b57cec5SDimitry Andric auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); 3182*0b57cec5SDimitry Andric MIRBuilder.buildInstr( 3183*0b57cec5SDimitry Andric TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 3184*0b57cec5SDimitry Andric {MIBCstLen, 3185*0b57cec5SDimitry Andric MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})}); 3186*0b57cec5SDimitry Andric MI.eraseFromParent(); 3187*0b57cec5SDimitry Andric return Legalized; 3188*0b57cec5SDimitry Andric } 3189*0b57cec5SDimitry Andric MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); 3190*0b57cec5SDimitry Andric MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg()); 3191*0b57cec5SDimitry Andric return Legalized; 3192*0b57cec5SDimitry Andric } 3193*0b57cec5SDimitry Andric } 3194*0b57cec5SDimitry Andric } 3195*0b57cec5SDimitry Andric 3196*0b57cec5SDimitry Andric // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float 3197*0b57cec5SDimitry Andric // representation. 3198*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 3199*0b57cec5SDimitry Andric LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { 3200*0b57cec5SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 3201*0b57cec5SDimitry Andric Register Src = MI.getOperand(1).getReg(); 3202*0b57cec5SDimitry Andric const LLT S64 = LLT::scalar(64); 3203*0b57cec5SDimitry Andric const LLT S32 = LLT::scalar(32); 3204*0b57cec5SDimitry Andric const LLT S1 = LLT::scalar(1); 3205*0b57cec5SDimitry Andric 3206*0b57cec5SDimitry Andric assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32); 3207*0b57cec5SDimitry Andric 3208*0b57cec5SDimitry Andric // unsigned cul2f(ulong u) { 3209*0b57cec5SDimitry Andric // uint lz = clz(u); 3210*0b57cec5SDimitry Andric // uint e = (u != 0) ? 127U + 63U - lz : 0; 3211*0b57cec5SDimitry Andric // u = (u << lz) & 0x7fffffffffffffffUL; 3212*0b57cec5SDimitry Andric // ulong t = u & 0xffffffffffUL; 3213*0b57cec5SDimitry Andric // uint v = (e << 23) | (uint)(u >> 40); 3214*0b57cec5SDimitry Andric // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U); 3215*0b57cec5SDimitry Andric // return as_float(v + r); 3216*0b57cec5SDimitry Andric // } 3217*0b57cec5SDimitry Andric 3218*0b57cec5SDimitry Andric auto Zero32 = MIRBuilder.buildConstant(S32, 0); 3219*0b57cec5SDimitry Andric auto Zero64 = MIRBuilder.buildConstant(S64, 0); 3220*0b57cec5SDimitry Andric 3221*0b57cec5SDimitry Andric auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src); 3222*0b57cec5SDimitry Andric 3223*0b57cec5SDimitry Andric auto K = MIRBuilder.buildConstant(S32, 127U + 63U); 3224*0b57cec5SDimitry Andric auto Sub = MIRBuilder.buildSub(S32, K, LZ); 3225*0b57cec5SDimitry Andric 3226*0b57cec5SDimitry Andric auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64); 3227*0b57cec5SDimitry Andric auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32); 3228*0b57cec5SDimitry Andric 3229*0b57cec5SDimitry Andric auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1); 3230*0b57cec5SDimitry Andric auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ); 3231*0b57cec5SDimitry Andric 3232*0b57cec5SDimitry Andric auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0); 3233*0b57cec5SDimitry Andric 3234*0b57cec5SDimitry Andric auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL); 3235*0b57cec5SDimitry Andric auto T = MIRBuilder.buildAnd(S64, U, Mask1); 3236*0b57cec5SDimitry Andric 3237*0b57cec5SDimitry Andric auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40)); 3238*0b57cec5SDimitry Andric auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23)); 3239*0b57cec5SDimitry Andric auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl)); 3240*0b57cec5SDimitry Andric 3241*0b57cec5SDimitry Andric auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL); 3242*0b57cec5SDimitry Andric auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C); 3243*0b57cec5SDimitry Andric auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C); 3244*0b57cec5SDimitry Andric auto One = MIRBuilder.buildConstant(S32, 1); 3245*0b57cec5SDimitry Andric 3246*0b57cec5SDimitry Andric auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One); 3247*0b57cec5SDimitry Andric auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32); 3248*0b57cec5SDimitry Andric auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0); 3249*0b57cec5SDimitry Andric MIRBuilder.buildAdd(Dst, V, R); 3250*0b57cec5SDimitry Andric 3251*0b57cec5SDimitry Andric return Legalized; 3252*0b57cec5SDimitry Andric } 3253*0b57cec5SDimitry Andric 3254*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 3255*0b57cec5SDimitry Andric LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 3256*0b57cec5SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 3257*0b57cec5SDimitry Andric Register Src = MI.getOperand(1).getReg(); 3258*0b57cec5SDimitry Andric LLT DstTy = MRI.getType(Dst); 3259*0b57cec5SDimitry Andric LLT SrcTy = MRI.getType(Src); 3260*0b57cec5SDimitry Andric 3261*0b57cec5SDimitry Andric if (SrcTy != LLT::scalar(64)) 3262*0b57cec5SDimitry Andric return UnableToLegalize; 3263*0b57cec5SDimitry Andric 3264*0b57cec5SDimitry Andric if (DstTy == LLT::scalar(32)) { 3265*0b57cec5SDimitry Andric // TODO: SelectionDAG has several alternative expansions to port which may 3266*0b57cec5SDimitry Andric // be more reasonble depending on the available instructions. If a target 3267*0b57cec5SDimitry Andric // has sitofp, does not have CTLZ, or can efficiently use f64 as an 3268*0b57cec5SDimitry Andric // intermediate type, this is probably worse. 3269*0b57cec5SDimitry Andric return lowerU64ToF32BitOps(MI); 3270*0b57cec5SDimitry Andric } 3271*0b57cec5SDimitry Andric 3272*0b57cec5SDimitry Andric return UnableToLegalize; 3273*0b57cec5SDimitry Andric } 3274*0b57cec5SDimitry Andric 3275*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 3276*0b57cec5SDimitry Andric LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 3277*0b57cec5SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 3278*0b57cec5SDimitry Andric Register Src = MI.getOperand(1).getReg(); 3279*0b57cec5SDimitry Andric LLT DstTy = MRI.getType(Dst); 3280*0b57cec5SDimitry Andric LLT SrcTy = MRI.getType(Src); 3281*0b57cec5SDimitry Andric 3282*0b57cec5SDimitry Andric const LLT S64 = LLT::scalar(64); 3283*0b57cec5SDimitry Andric const LLT S32 = LLT::scalar(32); 3284*0b57cec5SDimitry Andric const LLT S1 = LLT::scalar(1); 3285*0b57cec5SDimitry Andric 3286*0b57cec5SDimitry Andric if (SrcTy != S64) 3287*0b57cec5SDimitry Andric return UnableToLegalize; 3288*0b57cec5SDimitry Andric 3289*0b57cec5SDimitry Andric if (DstTy == S32) { 3290*0b57cec5SDimitry Andric // signed cl2f(long l) { 3291*0b57cec5SDimitry Andric // long s = l >> 63; 3292*0b57cec5SDimitry Andric // float r = cul2f((l + s) ^ s); 3293*0b57cec5SDimitry Andric // return s ? -r : r; 3294*0b57cec5SDimitry Andric // } 3295*0b57cec5SDimitry Andric Register L = Src; 3296*0b57cec5SDimitry Andric auto SignBit = MIRBuilder.buildConstant(S64, 63); 3297*0b57cec5SDimitry Andric auto S = MIRBuilder.buildAShr(S64, L, SignBit); 3298*0b57cec5SDimitry Andric 3299*0b57cec5SDimitry Andric auto LPlusS = MIRBuilder.buildAdd(S64, L, S); 3300*0b57cec5SDimitry Andric auto Xor = MIRBuilder.buildXor(S64, LPlusS, S); 3301*0b57cec5SDimitry Andric auto R = MIRBuilder.buildUITOFP(S32, Xor); 3302*0b57cec5SDimitry Andric 3303*0b57cec5SDimitry Andric auto RNeg = MIRBuilder.buildFNeg(S32, R); 3304*0b57cec5SDimitry Andric auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S, 3305*0b57cec5SDimitry Andric MIRBuilder.buildConstant(S64, 0)); 3306*0b57cec5SDimitry Andric MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R); 3307*0b57cec5SDimitry Andric return Legalized; 3308*0b57cec5SDimitry Andric } 3309*0b57cec5SDimitry Andric 3310*0b57cec5SDimitry Andric return UnableToLegalize; 3311*0b57cec5SDimitry Andric } 3312*0b57cec5SDimitry Andric 3313*0b57cec5SDimitry Andric static CmpInst::Predicate minMaxToCompare(unsigned Opc) { 3314*0b57cec5SDimitry Andric switch (Opc) { 3315*0b57cec5SDimitry Andric case TargetOpcode::G_SMIN: 3316*0b57cec5SDimitry Andric return CmpInst::ICMP_SLT; 3317*0b57cec5SDimitry Andric case TargetOpcode::G_SMAX: 3318*0b57cec5SDimitry Andric return CmpInst::ICMP_SGT; 3319*0b57cec5SDimitry Andric case TargetOpcode::G_UMIN: 3320*0b57cec5SDimitry Andric return CmpInst::ICMP_ULT; 3321*0b57cec5SDimitry Andric case TargetOpcode::G_UMAX: 3322*0b57cec5SDimitry Andric return CmpInst::ICMP_UGT; 3323*0b57cec5SDimitry Andric default: 3324*0b57cec5SDimitry Andric llvm_unreachable("not in integer min/max"); 3325*0b57cec5SDimitry Andric } 3326*0b57cec5SDimitry Andric } 3327*0b57cec5SDimitry Andric 3328*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 3329*0b57cec5SDimitry Andric LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 3330*0b57cec5SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 3331*0b57cec5SDimitry Andric Register Src0 = MI.getOperand(1).getReg(); 3332*0b57cec5SDimitry Andric Register Src1 = MI.getOperand(2).getReg(); 3333*0b57cec5SDimitry Andric 3334*0b57cec5SDimitry Andric const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode()); 3335*0b57cec5SDimitry Andric LLT CmpType = MRI.getType(Dst).changeElementSize(1); 3336*0b57cec5SDimitry Andric 3337*0b57cec5SDimitry Andric auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1); 3338*0b57cec5SDimitry Andric MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1); 3339*0b57cec5SDimitry Andric 3340*0b57cec5SDimitry Andric MI.eraseFromParent(); 3341*0b57cec5SDimitry Andric return Legalized; 3342*0b57cec5SDimitry Andric } 3343*0b57cec5SDimitry Andric 3344*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 3345*0b57cec5SDimitry Andric LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 3346*0b57cec5SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 3347*0b57cec5SDimitry Andric Register Src0 = MI.getOperand(1).getReg(); 3348*0b57cec5SDimitry Andric Register Src1 = MI.getOperand(2).getReg(); 3349*0b57cec5SDimitry Andric 3350*0b57cec5SDimitry Andric const LLT Src0Ty = MRI.getType(Src0); 3351*0b57cec5SDimitry Andric const LLT Src1Ty = MRI.getType(Src1); 3352*0b57cec5SDimitry Andric 3353*0b57cec5SDimitry Andric const int Src0Size = Src0Ty.getScalarSizeInBits(); 3354*0b57cec5SDimitry Andric const int Src1Size = Src1Ty.getScalarSizeInBits(); 3355*0b57cec5SDimitry Andric 3356*0b57cec5SDimitry Andric auto SignBitMask = MIRBuilder.buildConstant( 3357*0b57cec5SDimitry Andric Src0Ty, APInt::getSignMask(Src0Size)); 3358*0b57cec5SDimitry Andric 3359*0b57cec5SDimitry Andric auto NotSignBitMask = MIRBuilder.buildConstant( 3360*0b57cec5SDimitry Andric Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1)); 3361*0b57cec5SDimitry Andric 3362*0b57cec5SDimitry Andric auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask); 3363*0b57cec5SDimitry Andric MachineInstr *Or; 3364*0b57cec5SDimitry Andric 3365*0b57cec5SDimitry Andric if (Src0Ty == Src1Ty) { 3366*0b57cec5SDimitry Andric auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask); 3367*0b57cec5SDimitry Andric Or = MIRBuilder.buildOr(Dst, And0, And1); 3368*0b57cec5SDimitry Andric } else if (Src0Size > Src1Size) { 3369*0b57cec5SDimitry Andric auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size); 3370*0b57cec5SDimitry Andric auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1); 3371*0b57cec5SDimitry Andric auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt); 3372*0b57cec5SDimitry Andric auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask); 3373*0b57cec5SDimitry Andric Or = MIRBuilder.buildOr(Dst, And0, And1); 3374*0b57cec5SDimitry Andric } else { 3375*0b57cec5SDimitry Andric auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size); 3376*0b57cec5SDimitry Andric auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt); 3377*0b57cec5SDimitry Andric auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift); 3378*0b57cec5SDimitry Andric auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask); 3379*0b57cec5SDimitry Andric Or = MIRBuilder.buildOr(Dst, And0, And1); 3380*0b57cec5SDimitry Andric } 3381*0b57cec5SDimitry Andric 3382*0b57cec5SDimitry Andric // Be careful about setting nsz/nnan/ninf on every instruction, since the 3383*0b57cec5SDimitry Andric // constants are a nan and -0.0, but the final result should preserve 3384*0b57cec5SDimitry Andric // everything. 3385*0b57cec5SDimitry Andric if (unsigned Flags = MI.getFlags()) 3386*0b57cec5SDimitry Andric Or->setFlags(Flags); 3387*0b57cec5SDimitry Andric 3388*0b57cec5SDimitry Andric MI.eraseFromParent(); 3389*0b57cec5SDimitry Andric return Legalized; 3390*0b57cec5SDimitry Andric } 3391*0b57cec5SDimitry Andric 3392*0b57cec5SDimitry Andric LegalizerHelper::LegalizeResult 3393*0b57cec5SDimitry Andric LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { 3394*0b57cec5SDimitry Andric unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ? 3395*0b57cec5SDimitry Andric TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE; 3396*0b57cec5SDimitry Andric 3397*0b57cec5SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 3398*0b57cec5SDimitry Andric Register Src0 = MI.getOperand(1).getReg(); 3399*0b57cec5SDimitry Andric Register Src1 = MI.getOperand(2).getReg(); 3400*0b57cec5SDimitry Andric LLT Ty = MRI.getType(Dst); 3401*0b57cec5SDimitry Andric 3402*0b57cec5SDimitry Andric if (!MI.getFlag(MachineInstr::FmNoNans)) { 3403*0b57cec5SDimitry Andric // Insert canonicalizes if it's possible we need to quiet to get correct 3404*0b57cec5SDimitry Andric // sNaN behavior. 3405*0b57cec5SDimitry Andric 3406*0b57cec5SDimitry Andric // Note this must be done here, and not as an optimization combine in the 3407*0b57cec5SDimitry Andric // absence of a dedicate quiet-snan instruction as we're using an 3408*0b57cec5SDimitry Andric // omni-purpose G_FCANONICALIZE. 3409*0b57cec5SDimitry Andric if (!isKnownNeverSNaN(Src0, MRI)) 3410*0b57cec5SDimitry Andric Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0); 3411*0b57cec5SDimitry Andric 3412*0b57cec5SDimitry Andric if (!isKnownNeverSNaN(Src1, MRI)) 3413*0b57cec5SDimitry Andric Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0); 3414*0b57cec5SDimitry Andric } 3415*0b57cec5SDimitry Andric 3416*0b57cec5SDimitry Andric // If there are no nans, it's safe to simply replace this with the non-IEEE 3417*0b57cec5SDimitry Andric // version. 3418*0b57cec5SDimitry Andric MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags()); 3419*0b57cec5SDimitry Andric MI.eraseFromParent(); 3420*0b57cec5SDimitry Andric return Legalized; 3421*0b57cec5SDimitry Andric } 3422