xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1  //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This implements the TargetLowering class.
10  //
11  //===----------------------------------------------------------------------===//
12  
13  #include "llvm/CodeGen/TargetLowering.h"
14  #include "llvm/ADT/STLExtras.h"
15  #include "llvm/Analysis/VectorUtils.h"
16  #include "llvm/CodeGen/CallingConvLower.h"
17  #include "llvm/CodeGen/CodeGenCommonISel.h"
18  #include "llvm/CodeGen/MachineFrameInfo.h"
19  #include "llvm/CodeGen/MachineFunction.h"
20  #include "llvm/CodeGen/MachineJumpTableInfo.h"
21  #include "llvm/CodeGen/MachineModuleInfoImpls.h"
22  #include "llvm/CodeGen/MachineRegisterInfo.h"
23  #include "llvm/CodeGen/SelectionDAG.h"
24  #include "llvm/CodeGen/TargetRegisterInfo.h"
25  #include "llvm/IR/DataLayout.h"
26  #include "llvm/IR/DerivedTypes.h"
27  #include "llvm/IR/GlobalVariable.h"
28  #include "llvm/IR/LLVMContext.h"
29  #include "llvm/MC/MCAsmInfo.h"
30  #include "llvm/MC/MCExpr.h"
31  #include "llvm/Support/DivisionByConstantInfo.h"
32  #include "llvm/Support/ErrorHandling.h"
33  #include "llvm/Support/KnownBits.h"
34  #include "llvm/Support/MathExtras.h"
35  #include "llvm/Target/TargetMachine.h"
36  #include <cctype>
37  using namespace llvm;
38  
39  /// NOTE: The TargetMachine owns TLOF.
TargetLowering(const TargetMachine & tm)40  TargetLowering::TargetLowering(const TargetMachine &tm)
41      : TargetLoweringBase(tm) {}
42  
getTargetNodeName(unsigned Opcode) const43  const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44    return nullptr;
45  }
46  
isPositionIndependent() const47  bool TargetLowering::isPositionIndependent() const {
48    return getTargetMachine().isPositionIndependent();
49  }
50  
51  /// Check whether a given call node is in tail position within its function. If
52  /// so, it sets Chain to the input chain of the tail call.
isInTailCallPosition(SelectionDAG & DAG,SDNode * Node,SDValue & Chain) const53  bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
54                                            SDValue &Chain) const {
55    const Function &F = DAG.getMachineFunction().getFunction();
56  
57    // First, check if tail calls have been disabled in this function.
58    if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
59      return false;
60  
61    // Conservatively require the attributes of the call to match those of
62    // the return. Ignore following attributes because they don't affect the
63    // call sequence.
64    AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65    for (const auto &Attr :
66         {Attribute::Alignment, Attribute::Dereferenceable,
67          Attribute::DereferenceableOrNull, Attribute::NoAlias,
68          Attribute::NonNull, Attribute::NoUndef, Attribute::Range})
69      CallerAttrs.removeAttribute(Attr);
70  
71    if (CallerAttrs.hasAttributes())
72      return false;
73  
74    // It's not safe to eliminate the sign / zero extension of the return value.
75    if (CallerAttrs.contains(Attribute::ZExt) ||
76        CallerAttrs.contains(Attribute::SExt))
77      return false;
78  
79    // Check if the only use is a function return node.
80    return isUsedByReturnOnly(Node, Chain);
81  }
82  
parametersInCSRMatch(const MachineRegisterInfo & MRI,const uint32_t * CallerPreservedMask,const SmallVectorImpl<CCValAssign> & ArgLocs,const SmallVectorImpl<SDValue> & OutVals) const83  bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
84      const uint32_t *CallerPreservedMask,
85      const SmallVectorImpl<CCValAssign> &ArgLocs,
86      const SmallVectorImpl<SDValue> &OutVals) const {
87    for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
88      const CCValAssign &ArgLoc = ArgLocs[I];
89      if (!ArgLoc.isRegLoc())
90        continue;
91      MCRegister Reg = ArgLoc.getLocReg();
92      // Only look at callee saved registers.
93      if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
94        continue;
95      // Check that we pass the value used for the caller.
96      // (We look for a CopyFromReg reading a virtual register that is used
97      //  for the function live-in value of register Reg)
98      SDValue Value = OutVals[I];
99      if (Value->getOpcode() == ISD::AssertZext)
100        Value = Value.getOperand(0);
101      if (Value->getOpcode() != ISD::CopyFromReg)
102        return false;
103      Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
104      if (MRI.getLiveInPhysReg(ArgReg) != Reg)
105        return false;
106    }
107    return true;
108  }
109  
110  /// Set CallLoweringInfo attribute flags based on a call instruction
111  /// and called function attributes.
setAttributes(const CallBase * Call,unsigned ArgIdx)112  void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
113                                                       unsigned ArgIdx) {
114    IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
115    IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
116    IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
117    IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
118    IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
119    IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
120    IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
121    IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
122    IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
123    IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
124    IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
125    IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
126    Alignment = Call->getParamStackAlign(ArgIdx);
127    IndirectType = nullptr;
128    assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
129           "multiple ABI attributes?");
130    if (IsByVal) {
131      IndirectType = Call->getParamByValType(ArgIdx);
132      if (!Alignment)
133        Alignment = Call->getParamAlign(ArgIdx);
134    }
135    if (IsPreallocated)
136      IndirectType = Call->getParamPreallocatedType(ArgIdx);
137    if (IsInAlloca)
138      IndirectType = Call->getParamInAllocaType(ArgIdx);
139    if (IsSRet)
140      IndirectType = Call->getParamStructRetType(ArgIdx);
141  }
142  
143  /// Generate a libcall taking the given operands as arguments and returning a
144  /// result of type RetVT.
145  std::pair<SDValue, SDValue>
makeLibCall(SelectionDAG & DAG,RTLIB::Libcall LC,EVT RetVT,ArrayRef<SDValue> Ops,MakeLibCallOptions CallOptions,const SDLoc & dl,SDValue InChain) const146  TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
147                              ArrayRef<SDValue> Ops,
148                              MakeLibCallOptions CallOptions,
149                              const SDLoc &dl,
150                              SDValue InChain) const {
151    if (!InChain)
152      InChain = DAG.getEntryNode();
153  
154    TargetLowering::ArgListTy Args;
155    Args.reserve(Ops.size());
156  
157    TargetLowering::ArgListEntry Entry;
158    for (unsigned i = 0; i < Ops.size(); ++i) {
159      SDValue NewOp = Ops[i];
160      Entry.Node = NewOp;
161      Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
162      Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
163                                                   CallOptions.IsSExt);
164      Entry.IsZExt = !Entry.IsSExt;
165  
166      if (CallOptions.IsSoften &&
167          !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
168        Entry.IsSExt = Entry.IsZExt = false;
169      }
170      Args.push_back(Entry);
171    }
172  
173    if (LC == RTLIB::UNKNOWN_LIBCALL)
174      report_fatal_error("Unsupported library call operation!");
175    SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
176                                           getPointerTy(DAG.getDataLayout()));
177  
178    Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
179    TargetLowering::CallLoweringInfo CLI(DAG);
180    bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
181    bool zeroExtend = !signExtend;
182  
183    if (CallOptions.IsSoften &&
184        !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
185      signExtend = zeroExtend = false;
186    }
187  
188    CLI.setDebugLoc(dl)
189        .setChain(InChain)
190        .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
191        .setNoReturn(CallOptions.DoesNotReturn)
192        .setDiscardResult(!CallOptions.IsReturnValueUsed)
193        .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
194        .setSExtResult(signExtend)
195        .setZExtResult(zeroExtend);
196    return LowerCallTo(CLI);
197  }
198  
findOptimalMemOpLowering(std::vector<EVT> & MemOps,unsigned Limit,const MemOp & Op,unsigned DstAS,unsigned SrcAS,const AttributeList & FuncAttributes) const199  bool TargetLowering::findOptimalMemOpLowering(
200      std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
201      unsigned SrcAS, const AttributeList &FuncAttributes) const {
202    if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
203        Op.getSrcAlign() < Op.getDstAlign())
204      return false;
205  
206    EVT VT = getOptimalMemOpType(Op, FuncAttributes);
207  
208    if (VT == MVT::Other) {
209      // Use the largest integer type whose alignment constraints are satisfied.
210      // We only need to check DstAlign here as SrcAlign is always greater or
211      // equal to DstAlign (or zero).
212      VT = MVT::LAST_INTEGER_VALUETYPE;
213      if (Op.isFixedDstAlign())
214        while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
215               !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
216          VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
217      assert(VT.isInteger());
218  
219      // Find the largest legal integer type.
220      MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
221      while (!isTypeLegal(LVT))
222        LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
223      assert(LVT.isInteger());
224  
225      // If the type we've chosen is larger than the largest legal integer type
226      // then use that instead.
227      if (VT.bitsGT(LVT))
228        VT = LVT;
229    }
230  
231    unsigned NumMemOps = 0;
232    uint64_t Size = Op.size();
233    while (Size) {
234      unsigned VTSize = VT.getSizeInBits() / 8;
235      while (VTSize > Size) {
236        // For now, only use non-vector load / store's for the left-over pieces.
237        EVT NewVT = VT;
238        unsigned NewVTSize;
239  
240        bool Found = false;
241        if (VT.isVector() || VT.isFloatingPoint()) {
242          NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
243          if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
244              isSafeMemOpType(NewVT.getSimpleVT()))
245            Found = true;
246          else if (NewVT == MVT::i64 &&
247                   isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
248                   isSafeMemOpType(MVT::f64)) {
249            // i64 is usually not legal on 32-bit targets, but f64 may be.
250            NewVT = MVT::f64;
251            Found = true;
252          }
253        }
254  
255        if (!Found) {
256          do {
257            NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
258            if (NewVT == MVT::i8)
259              break;
260          } while (!isSafeMemOpType(NewVT.getSimpleVT()));
261        }
262        NewVTSize = NewVT.getSizeInBits() / 8;
263  
264        // If the new VT cannot cover all of the remaining bits, then consider
265        // issuing a (or a pair of) unaligned and overlapping load / store.
266        unsigned Fast;
267        if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
268            allowsMisalignedMemoryAccesses(
269                VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
270                MachineMemOperand::MONone, &Fast) &&
271            Fast)
272          VTSize = Size;
273        else {
274          VT = NewVT;
275          VTSize = NewVTSize;
276        }
277      }
278  
279      if (++NumMemOps > Limit)
280        return false;
281  
282      MemOps.push_back(VT);
283      Size -= VTSize;
284    }
285  
286    return true;
287  }
288  
289  /// Soften the operands of a comparison. This code is shared among BR_CC,
290  /// SELECT_CC, and SETCC handlers.
softenSetCCOperands(SelectionDAG & DAG,EVT VT,SDValue & NewLHS,SDValue & NewRHS,ISD::CondCode & CCCode,const SDLoc & dl,const SDValue OldLHS,const SDValue OldRHS) const291  void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
292                                           SDValue &NewLHS, SDValue &NewRHS,
293                                           ISD::CondCode &CCCode,
294                                           const SDLoc &dl, const SDValue OldLHS,
295                                           const SDValue OldRHS) const {
296    SDValue Chain;
297    return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
298                               OldRHS, Chain);
299  }
300  
softenSetCCOperands(SelectionDAG & DAG,EVT VT,SDValue & NewLHS,SDValue & NewRHS,ISD::CondCode & CCCode,const SDLoc & dl,const SDValue OldLHS,const SDValue OldRHS,SDValue & Chain,bool IsSignaling) const301  void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
302                                           SDValue &NewLHS, SDValue &NewRHS,
303                                           ISD::CondCode &CCCode,
304                                           const SDLoc &dl, const SDValue OldLHS,
305                                           const SDValue OldRHS,
306                                           SDValue &Chain,
307                                           bool IsSignaling) const {
308    // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
309    // not supporting it. We can update this code when libgcc provides such
310    // functions.
311  
312    assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
313           && "Unsupported setcc type!");
314  
315    // Expand into one or more soft-fp libcall(s).
316    RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
317    bool ShouldInvertCC = false;
318    switch (CCCode) {
319    case ISD::SETEQ:
320    case ISD::SETOEQ:
321      LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
322            (VT == MVT::f64) ? RTLIB::OEQ_F64 :
323            (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
324      break;
325    case ISD::SETNE:
326    case ISD::SETUNE:
327      LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
328            (VT == MVT::f64) ? RTLIB::UNE_F64 :
329            (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
330      break;
331    case ISD::SETGE:
332    case ISD::SETOGE:
333      LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
334            (VT == MVT::f64) ? RTLIB::OGE_F64 :
335            (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
336      break;
337    case ISD::SETLT:
338    case ISD::SETOLT:
339      LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
340            (VT == MVT::f64) ? RTLIB::OLT_F64 :
341            (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
342      break;
343    case ISD::SETLE:
344    case ISD::SETOLE:
345      LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
346            (VT == MVT::f64) ? RTLIB::OLE_F64 :
347            (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
348      break;
349    case ISD::SETGT:
350    case ISD::SETOGT:
351      LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
352            (VT == MVT::f64) ? RTLIB::OGT_F64 :
353            (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
354      break;
355    case ISD::SETO:
356      ShouldInvertCC = true;
357      [[fallthrough]];
358    case ISD::SETUO:
359      LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
360            (VT == MVT::f64) ? RTLIB::UO_F64 :
361            (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
362      break;
363    case ISD::SETONE:
364      // SETONE = O && UNE
365      ShouldInvertCC = true;
366      [[fallthrough]];
367    case ISD::SETUEQ:
368      LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
369            (VT == MVT::f64) ? RTLIB::UO_F64 :
370            (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
371      LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
372            (VT == MVT::f64) ? RTLIB::OEQ_F64 :
373            (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
374      break;
375    default:
376      // Invert CC for unordered comparisons
377      ShouldInvertCC = true;
378      switch (CCCode) {
379      case ISD::SETULT:
380        LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
381              (VT == MVT::f64) ? RTLIB::OGE_F64 :
382              (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
383        break;
384      case ISD::SETULE:
385        LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
386              (VT == MVT::f64) ? RTLIB::OGT_F64 :
387              (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
388        break;
389      case ISD::SETUGT:
390        LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
391              (VT == MVT::f64) ? RTLIB::OLE_F64 :
392              (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
393        break;
394      case ISD::SETUGE:
395        LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
396              (VT == MVT::f64) ? RTLIB::OLT_F64 :
397              (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
398        break;
399      default: llvm_unreachable("Do not know how to soften this setcc!");
400      }
401    }
402  
403    // Use the target specific return value for comparison lib calls.
404    EVT RetVT = getCmpLibcallReturnType();
405    SDValue Ops[2] = {NewLHS, NewRHS};
406    TargetLowering::MakeLibCallOptions CallOptions;
407    EVT OpsVT[2] = { OldLHS.getValueType(),
408                     OldRHS.getValueType() };
409    CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
410    auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
411    NewLHS = Call.first;
412    NewRHS = DAG.getConstant(0, dl, RetVT);
413  
414    CCCode = getCmpLibcallCC(LC1);
415    if (ShouldInvertCC) {
416      assert(RetVT.isInteger());
417      CCCode = getSetCCInverse(CCCode, RetVT);
418    }
419  
420    if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
421      // Update Chain.
422      Chain = Call.second;
423    } else {
424      EVT SetCCVT =
425          getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
426      SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
427      auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
428      CCCode = getCmpLibcallCC(LC2);
429      if (ShouldInvertCC)
430        CCCode = getSetCCInverse(CCCode, RetVT);
431      NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
432      if (Chain)
433        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
434                            Call2.second);
435      NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
436                           Tmp.getValueType(), Tmp, NewLHS);
437      NewRHS = SDValue();
438    }
439  }
440  
441  /// Return the entry encoding for a jump table in the current function. The
442  /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
getJumpTableEncoding() const443  unsigned TargetLowering::getJumpTableEncoding() const {
444    // In non-pic modes, just use the address of a block.
445    if (!isPositionIndependent())
446      return MachineJumpTableInfo::EK_BlockAddress;
447  
448    // In PIC mode, if the target supports a GPRel32 directive, use it.
449    if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
450      return MachineJumpTableInfo::EK_GPRel32BlockAddress;
451  
452    // Otherwise, use a label difference.
453    return MachineJumpTableInfo::EK_LabelDifference32;
454  }
455  
getPICJumpTableRelocBase(SDValue Table,SelectionDAG & DAG) const456  SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
457                                                   SelectionDAG &DAG) const {
458    // If our PIC model is GP relative, use the global offset table as the base.
459    unsigned JTEncoding = getJumpTableEncoding();
460  
461    if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
462        (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
463      return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
464  
465    return Table;
466  }
467  
468  /// This returns the relocation base for the given PIC jumptable, the same as
469  /// getPICJumpTableRelocBase, but as an MCExpr.
470  const MCExpr *
getPICJumpTableRelocBaseExpr(const MachineFunction * MF,unsigned JTI,MCContext & Ctx) const471  TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
472                                               unsigned JTI,MCContext &Ctx) const{
473    // The normal PIC reloc base is the label at the start of the jump table.
474    return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
475  }
476  
expandIndirectJTBranch(const SDLoc & dl,SDValue Value,SDValue Addr,int JTI,SelectionDAG & DAG) const477  SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
478                                                 SDValue Addr, int JTI,
479                                                 SelectionDAG &DAG) const {
480    SDValue Chain = Value;
481    // Jump table debug info is only needed if CodeView is enabled.
482    if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
483      Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
484    }
485    return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
486  }
487  
488  bool
isOffsetFoldingLegal(const GlobalAddressSDNode * GA) const489  TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
490    const TargetMachine &TM = getTargetMachine();
491    const GlobalValue *GV = GA->getGlobal();
492  
493    // If the address is not even local to this DSO we will have to load it from
494    // a got and then add the offset.
495    if (!TM.shouldAssumeDSOLocal(GV))
496      return false;
497  
498    // If the code is position independent we will have to add a base register.
499    if (isPositionIndependent())
500      return false;
501  
502    // Otherwise we can do it.
503    return true;
504  }
505  
506  //===----------------------------------------------------------------------===//
507  //  Optimization Methods
508  //===----------------------------------------------------------------------===//
509  
510  /// If the specified instruction has a constant integer operand and there are
511  /// bits set in that constant that are not demanded, then clear those bits and
512  /// return true.
ShrinkDemandedConstant(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,TargetLoweringOpt & TLO) const513  bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
514                                              const APInt &DemandedBits,
515                                              const APInt &DemandedElts,
516                                              TargetLoweringOpt &TLO) const {
517    SDLoc DL(Op);
518    unsigned Opcode = Op.getOpcode();
519  
520    // Early-out if we've ended up calling an undemanded node, leave this to
521    // constant folding.
522    if (DemandedBits.isZero() || DemandedElts.isZero())
523      return false;
524  
525    // Do target-specific constant optimization.
526    if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
527      return TLO.New.getNode();
528  
529    // FIXME: ISD::SELECT, ISD::SELECT_CC
530    switch (Opcode) {
531    default:
532      break;
533    case ISD::XOR:
534    case ISD::AND:
535    case ISD::OR: {
536      auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
537      if (!Op1C || Op1C->isOpaque())
538        return false;
539  
540      // If this is a 'not' op, don't touch it because that's a canonical form.
541      const APInt &C = Op1C->getAPIntValue();
542      if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
543        return false;
544  
545      if (!C.isSubsetOf(DemandedBits)) {
546        EVT VT = Op.getValueType();
547        SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
548        SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
549                                        Op->getFlags());
550        return TLO.CombineTo(Op, NewOp);
551      }
552  
553      break;
554    }
555    }
556  
557    return false;
558  }
559  
ShrinkDemandedConstant(SDValue Op,const APInt & DemandedBits,TargetLoweringOpt & TLO) const560  bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
561                                              const APInt &DemandedBits,
562                                              TargetLoweringOpt &TLO) const {
563    EVT VT = Op.getValueType();
564    APInt DemandedElts = VT.isVector()
565                             ? APInt::getAllOnes(VT.getVectorNumElements())
566                             : APInt(1, 1);
567    return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
568  }
569  
570  /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
571  /// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
572  /// but it could be generalized for targets with other types of implicit
573  /// widening casts.
ShrinkDemandedOp(SDValue Op,unsigned BitWidth,const APInt & DemandedBits,TargetLoweringOpt & TLO) const574  bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
575                                        const APInt &DemandedBits,
576                                        TargetLoweringOpt &TLO) const {
577    assert(Op.getNumOperands() == 2 &&
578           "ShrinkDemandedOp only supports binary operators!");
579    assert(Op.getNode()->getNumValues() == 1 &&
580           "ShrinkDemandedOp only supports nodes with one result!");
581  
582    EVT VT = Op.getValueType();
583    SelectionDAG &DAG = TLO.DAG;
584    SDLoc dl(Op);
585  
586    // Early return, as this function cannot handle vector types.
587    if (VT.isVector())
588      return false;
589  
590    assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
591           Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
592           "ShrinkDemandedOp only supports operands that have the same size!");
593  
594    // Don't do this if the node has another user, which may require the
595    // full value.
596    if (!Op.getNode()->hasOneUse())
597      return false;
598  
599    // Search for the smallest integer type with free casts to and from
600    // Op's type. For expedience, just check power-of-2 integer types.
601    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
602    unsigned DemandedSize = DemandedBits.getActiveBits();
603    for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
604         SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
605      EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
606      if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
607        // We found a type with free casts.
608        SDValue X = DAG.getNode(
609            Op.getOpcode(), dl, SmallVT,
610            DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
611            DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
612        assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
613        SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
614        return TLO.CombineTo(Op, Z);
615      }
616    }
617    return false;
618  }
619  
SimplifyDemandedBits(SDValue Op,const APInt & DemandedBits,DAGCombinerInfo & DCI) const620  bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
621                                            DAGCombinerInfo &DCI) const {
622    SelectionDAG &DAG = DCI.DAG;
623    TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
624                          !DCI.isBeforeLegalizeOps());
625    KnownBits Known;
626  
627    bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
628    if (Simplified) {
629      DCI.AddToWorklist(Op.getNode());
630      DCI.CommitTargetLoweringOpt(TLO);
631    }
632    return Simplified;
633  }
634  
SimplifyDemandedBits(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,DAGCombinerInfo & DCI) const635  bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
636                                            const APInt &DemandedElts,
637                                            DAGCombinerInfo &DCI) const {
638    SelectionDAG &DAG = DCI.DAG;
639    TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
640                          !DCI.isBeforeLegalizeOps());
641    KnownBits Known;
642  
643    bool Simplified =
644        SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
645    if (Simplified) {
646      DCI.AddToWorklist(Op.getNode());
647      DCI.CommitTargetLoweringOpt(TLO);
648    }
649    return Simplified;
650  }
651  
SimplifyDemandedBits(SDValue Op,const APInt & DemandedBits,KnownBits & Known,TargetLoweringOpt & TLO,unsigned Depth,bool AssumeSingleUse) const652  bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
653                                            KnownBits &Known,
654                                            TargetLoweringOpt &TLO,
655                                            unsigned Depth,
656                                            bool AssumeSingleUse) const {
657    EVT VT = Op.getValueType();
658  
659    // Since the number of lanes in a scalable vector is unknown at compile time,
660    // we track one bit which is implicitly broadcast to all lanes.  This means
661    // that all lanes in a scalable vector are considered demanded.
662    APInt DemandedElts = VT.isFixedLengthVector()
663                             ? APInt::getAllOnes(VT.getVectorNumElements())
664                             : APInt(1, 1);
665    return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
666                                AssumeSingleUse);
667  }
668  
669  // TODO: Under what circumstances can we create nodes? Constant folding?
SimplifyMultipleUseDemandedBits(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,SelectionDAG & DAG,unsigned Depth) const670  SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
671      SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
672      SelectionDAG &DAG, unsigned Depth) const {
673    EVT VT = Op.getValueType();
674  
675    // Limit search depth.
676    if (Depth >= SelectionDAG::MaxRecursionDepth)
677      return SDValue();
678  
679    // Ignore UNDEFs.
680    if (Op.isUndef())
681      return SDValue();
682  
683    // Not demanding any bits/elts from Op.
684    if (DemandedBits == 0 || DemandedElts == 0)
685      return DAG.getUNDEF(VT);
686  
687    bool IsLE = DAG.getDataLayout().isLittleEndian();
688    unsigned NumElts = DemandedElts.getBitWidth();
689    unsigned BitWidth = DemandedBits.getBitWidth();
690    KnownBits LHSKnown, RHSKnown;
691    switch (Op.getOpcode()) {
692    case ISD::BITCAST: {
693      if (VT.isScalableVector())
694        return SDValue();
695  
696      SDValue Src = peekThroughBitcasts(Op.getOperand(0));
697      EVT SrcVT = Src.getValueType();
698      EVT DstVT = Op.getValueType();
699      if (SrcVT == DstVT)
700        return Src;
701  
702      unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
703      unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
704      if (NumSrcEltBits == NumDstEltBits)
705        if (SDValue V = SimplifyMultipleUseDemandedBits(
706                Src, DemandedBits, DemandedElts, DAG, Depth + 1))
707          return DAG.getBitcast(DstVT, V);
708  
709      if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
710        unsigned Scale = NumDstEltBits / NumSrcEltBits;
711        unsigned NumSrcElts = SrcVT.getVectorNumElements();
712        APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
713        APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
714        for (unsigned i = 0; i != Scale; ++i) {
715          unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
716          unsigned BitOffset = EltOffset * NumSrcEltBits;
717          APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
718          if (!Sub.isZero()) {
719            DemandedSrcBits |= Sub;
720            for (unsigned j = 0; j != NumElts; ++j)
721              if (DemandedElts[j])
722                DemandedSrcElts.setBit((j * Scale) + i);
723          }
724        }
725  
726        if (SDValue V = SimplifyMultipleUseDemandedBits(
727                Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
728          return DAG.getBitcast(DstVT, V);
729      }
730  
731      // TODO - bigendian once we have test coverage.
732      if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
733        unsigned Scale = NumSrcEltBits / NumDstEltBits;
734        unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
735        APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
736        APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
737        for (unsigned i = 0; i != NumElts; ++i)
738          if (DemandedElts[i]) {
739            unsigned Offset = (i % Scale) * NumDstEltBits;
740            DemandedSrcBits.insertBits(DemandedBits, Offset);
741            DemandedSrcElts.setBit(i / Scale);
742          }
743  
744        if (SDValue V = SimplifyMultipleUseDemandedBits(
745                Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
746          return DAG.getBitcast(DstVT, V);
747      }
748  
749      break;
750    }
751    case ISD::FREEZE: {
752      SDValue N0 = Op.getOperand(0);
753      if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
754                                               /*PoisonOnly=*/false))
755        return N0;
756      break;
757    }
758    case ISD::AND: {
759      LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
760      RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
761  
762      // If all of the demanded bits are known 1 on one side, return the other.
763      // These bits cannot contribute to the result of the 'and' in this
764      // context.
765      if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
766        return Op.getOperand(0);
767      if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
768        return Op.getOperand(1);
769      break;
770    }
771    case ISD::OR: {
772      LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
773      RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
774  
775      // If all of the demanded bits are known zero on one side, return the
776      // other.  These bits cannot contribute to the result of the 'or' in this
777      // context.
778      if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
779        return Op.getOperand(0);
780      if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
781        return Op.getOperand(1);
782      break;
783    }
784    case ISD::XOR: {
785      LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
786      RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
787  
788      // If all of the demanded bits are known zero on one side, return the
789      // other.
790      if (DemandedBits.isSubsetOf(RHSKnown.Zero))
791        return Op.getOperand(0);
792      if (DemandedBits.isSubsetOf(LHSKnown.Zero))
793        return Op.getOperand(1);
794      break;
795    }
796    case ISD::SHL: {
797      // If we are only demanding sign bits then we can use the shift source
798      // directly.
799      if (std::optional<uint64_t> MaxSA =
800              DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
801        SDValue Op0 = Op.getOperand(0);
802        unsigned ShAmt = *MaxSA;
803        unsigned NumSignBits =
804            DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
805        unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
806        if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
807          return Op0;
808      }
809      break;
810    }
811    case ISD::SETCC: {
812      SDValue Op0 = Op.getOperand(0);
813      SDValue Op1 = Op.getOperand(1);
814      ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
815      // If (1) we only need the sign-bit, (2) the setcc operands are the same
816      // width as the setcc result, and (3) the result of a setcc conforms to 0 or
817      // -1, we may be able to bypass the setcc.
818      if (DemandedBits.isSignMask() &&
819          Op0.getScalarValueSizeInBits() == BitWidth &&
820          getBooleanContents(Op0.getValueType()) ==
821              BooleanContent::ZeroOrNegativeOneBooleanContent) {
822        // If we're testing X < 0, then this compare isn't needed - just use X!
823        // FIXME: We're limiting to integer types here, but this should also work
824        // if we don't care about FP signed-zero. The use of SETLT with FP means
825        // that we don't care about NaNs.
826        if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
827            (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
828          return Op0;
829      }
830      break;
831    }
832    case ISD::SIGN_EXTEND_INREG: {
833      // If none of the extended bits are demanded, eliminate the sextinreg.
834      SDValue Op0 = Op.getOperand(0);
835      EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
836      unsigned ExBits = ExVT.getScalarSizeInBits();
837      if (DemandedBits.getActiveBits() <= ExBits &&
838          shouldRemoveRedundantExtend(Op))
839        return Op0;
840      // If the input is already sign extended, just drop the extension.
841      unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
842      if (NumSignBits >= (BitWidth - ExBits + 1))
843        return Op0;
844      break;
845    }
846    case ISD::ANY_EXTEND_VECTOR_INREG:
847    case ISD::SIGN_EXTEND_VECTOR_INREG:
848    case ISD::ZERO_EXTEND_VECTOR_INREG: {
849      if (VT.isScalableVector())
850        return SDValue();
851  
852      // If we only want the lowest element and none of extended bits, then we can
853      // return the bitcasted source vector.
854      SDValue Src = Op.getOperand(0);
855      EVT SrcVT = Src.getValueType();
856      EVT DstVT = Op.getValueType();
857      if (IsLE && DemandedElts == 1 &&
858          DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
859          DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
860        return DAG.getBitcast(DstVT, Src);
861      }
862      break;
863    }
864    case ISD::INSERT_VECTOR_ELT: {
865      if (VT.isScalableVector())
866        return SDValue();
867  
868      // If we don't demand the inserted element, return the base vector.
869      SDValue Vec = Op.getOperand(0);
870      auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
871      EVT VecVT = Vec.getValueType();
872      if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
873          !DemandedElts[CIdx->getZExtValue()])
874        return Vec;
875      break;
876    }
877    case ISD::INSERT_SUBVECTOR: {
878      if (VT.isScalableVector())
879        return SDValue();
880  
881      SDValue Vec = Op.getOperand(0);
882      SDValue Sub = Op.getOperand(1);
883      uint64_t Idx = Op.getConstantOperandVal(2);
884      unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
885      APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
886      // If we don't demand the inserted subvector, return the base vector.
887      if (DemandedSubElts == 0)
888        return Vec;
889      break;
890    }
891    case ISD::VECTOR_SHUFFLE: {
892      assert(!VT.isScalableVector());
893      ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
894  
895      // If all the demanded elts are from one operand and are inline,
896      // then we can use the operand directly.
897      bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
898      for (unsigned i = 0; i != NumElts; ++i) {
899        int M = ShuffleMask[i];
900        if (M < 0 || !DemandedElts[i])
901          continue;
902        AllUndef = false;
903        IdentityLHS &= (M == (int)i);
904        IdentityRHS &= ((M - NumElts) == i);
905      }
906  
907      if (AllUndef)
908        return DAG.getUNDEF(Op.getValueType());
909      if (IdentityLHS)
910        return Op.getOperand(0);
911      if (IdentityRHS)
912        return Op.getOperand(1);
913      break;
914    }
915    default:
916      // TODO: Probably okay to remove after audit; here to reduce change size
917      // in initial enablement patch for scalable vectors
918      if (VT.isScalableVector())
919        return SDValue();
920  
921      if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
922        if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
923                Op, DemandedBits, DemandedElts, DAG, Depth))
924          return V;
925      break;
926    }
927    return SDValue();
928  }
929  
SimplifyMultipleUseDemandedBits(SDValue Op,const APInt & DemandedBits,SelectionDAG & DAG,unsigned Depth) const930  SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
931      SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
932      unsigned Depth) const {
933    EVT VT = Op.getValueType();
934    // Since the number of lanes in a scalable vector is unknown at compile time,
935    // we track one bit which is implicitly broadcast to all lanes.  This means
936    // that all lanes in a scalable vector are considered demanded.
937    APInt DemandedElts = VT.isFixedLengthVector()
938                             ? APInt::getAllOnes(VT.getVectorNumElements())
939                             : APInt(1, 1);
940    return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
941                                           Depth);
942  }
943  
SimplifyMultipleUseDemandedVectorElts(SDValue Op,const APInt & DemandedElts,SelectionDAG & DAG,unsigned Depth) const944  SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
945      SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
946      unsigned Depth) const {
947    APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
948    return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
949                                           Depth);
950  }
951  
952  // Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
953  //      or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
combineShiftToAVG(SDValue Op,TargetLowering::TargetLoweringOpt & TLO,const TargetLowering & TLI,const APInt & DemandedBits,const APInt & DemandedElts,unsigned Depth)954  static SDValue combineShiftToAVG(SDValue Op,
955                                   TargetLowering::TargetLoweringOpt &TLO,
956                                   const TargetLowering &TLI,
957                                   const APInt &DemandedBits,
958                                   const APInt &DemandedElts, unsigned Depth) {
959    assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
960           "SRL or SRA node is required here!");
961    // Is the right shift using an immediate value of 1?
962    ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
963    if (!N1C || !N1C->isOne())
964      return SDValue();
965  
966    // We are looking for an avgfloor
967    // add(ext, ext)
968    // or one of these as a avgceil
969    // add(add(ext, ext), 1)
970    // add(add(ext, 1), ext)
971    // add(ext, add(ext, 1))
972    SDValue Add = Op.getOperand(0);
973    if (Add.getOpcode() != ISD::ADD)
974      return SDValue();
975  
976    SDValue ExtOpA = Add.getOperand(0);
977    SDValue ExtOpB = Add.getOperand(1);
978    SDValue Add2;
979    auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
980      ConstantSDNode *ConstOp;
981      if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
982          ConstOp->isOne()) {
983        ExtOpA = Op1;
984        ExtOpB = Op3;
985        Add2 = A;
986        return true;
987      }
988      if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
989          ConstOp->isOne()) {
990        ExtOpA = Op1;
991        ExtOpB = Op2;
992        Add2 = A;
993        return true;
994      }
995      return false;
996    };
997    bool IsCeil =
998        (ExtOpA.getOpcode() == ISD::ADD &&
999         MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1000        (ExtOpB.getOpcode() == ISD::ADD &&
1001         MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1002  
1003    // If the shift is signed (sra):
1004    //  - Needs >= 2 sign bit for both operands.
1005    //  - Needs >= 2 zero bits.
1006    // If the shift is unsigned (srl):
1007    //  - Needs >= 1 zero bit for both operands.
1008    //  - Needs 1 demanded bit zero and >= 2 sign bits.
1009    SelectionDAG &DAG = TLO.DAG;
1010    unsigned ShiftOpc = Op.getOpcode();
1011    bool IsSigned = false;
1012    unsigned KnownBits;
1013    unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1014    unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1015    unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1016    unsigned NumZeroA =
1017        DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1018    unsigned NumZeroB =
1019        DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1020    unsigned NumZero = std::min(NumZeroA, NumZeroB);
1021  
1022    switch (ShiftOpc) {
1023    default:
1024      llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1025    case ISD::SRA: {
1026      if (NumZero >= 2 && NumSigned < NumZero) {
1027        IsSigned = false;
1028        KnownBits = NumZero;
1029        break;
1030      }
1031      if (NumSigned >= 1) {
1032        IsSigned = true;
1033        KnownBits = NumSigned;
1034        break;
1035      }
1036      return SDValue();
1037    }
1038    case ISD::SRL: {
1039      if (NumZero >= 1 && NumSigned < NumZero) {
1040        IsSigned = false;
1041        KnownBits = NumZero;
1042        break;
1043      }
1044      if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1045        IsSigned = true;
1046        KnownBits = NumSigned;
1047        break;
1048      }
1049      return SDValue();
1050    }
1051    }
1052  
1053    unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1054                             : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1055  
1056    // Find the smallest power-2 type that is legal for this vector size and
1057    // operation, given the original type size and the number of known sign/zero
1058    // bits.
1059    EVT VT = Op.getValueType();
1060    unsigned MinWidth =
1061        std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1062    EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1063    if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits())
1064      return SDValue();
1065    if (VT.isVector())
1066      NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1067    if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1068      // If we could not transform, and (both) adds are nuw/nsw, we can use the
1069      // larger type size to do the transform.
1070      if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1071        return SDValue();
1072      if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1073                                 Add.getOperand(1)) &&
1074          (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1075                                           Add2.getOperand(1))))
1076        NVT = VT;
1077      else
1078        return SDValue();
1079    }
1080  
1081    // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1082    // this is likely to stop other folds (reassociation, value tracking etc.)
1083    if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1084        (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1085      return SDValue();
1086  
1087    SDLoc DL(Op);
1088    SDValue ResultAVG =
1089        DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1090                    DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1091    return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1092  }
1093  
1094  /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1095  /// result of Op are ever used downstream. If we can use this information to
1096  /// simplify Op, create a new simplified DAG node and return true, returning the
1097  /// original and new nodes in Old and New. Otherwise, analyze the expression and
1098  /// return a mask of Known bits for the expression (used to simplify the
1099  /// caller).  The Known bits may only be accurate for those bits in the
1100  /// OriginalDemandedBits and OriginalDemandedElts.
SimplifyDemandedBits(SDValue Op,const APInt & OriginalDemandedBits,const APInt & OriginalDemandedElts,KnownBits & Known,TargetLoweringOpt & TLO,unsigned Depth,bool AssumeSingleUse) const1101  bool TargetLowering::SimplifyDemandedBits(
1102      SDValue Op, const APInt &OriginalDemandedBits,
1103      const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1104      unsigned Depth, bool AssumeSingleUse) const {
1105    unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1106    assert(Op.getScalarValueSizeInBits() == BitWidth &&
1107           "Mask size mismatches value type size!");
1108  
1109    // Don't know anything.
1110    Known = KnownBits(BitWidth);
1111  
1112    EVT VT = Op.getValueType();
1113    bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1114    unsigned NumElts = OriginalDemandedElts.getBitWidth();
1115    assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1116           "Unexpected vector size");
1117  
1118    APInt DemandedBits = OriginalDemandedBits;
1119    APInt DemandedElts = OriginalDemandedElts;
1120    SDLoc dl(Op);
1121  
1122    // Undef operand.
1123    if (Op.isUndef())
1124      return false;
1125  
1126    // We can't simplify target constants.
1127    if (Op.getOpcode() == ISD::TargetConstant)
1128      return false;
1129  
1130    if (Op.getOpcode() == ISD::Constant) {
1131      // We know all of the bits for a constant!
1132      Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1133      return false;
1134    }
1135  
1136    if (Op.getOpcode() == ISD::ConstantFP) {
1137      // We know all of the bits for a floating point constant!
1138      Known = KnownBits::makeConstant(
1139          cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1140      return false;
1141    }
1142  
1143    // Other users may use these bits.
1144    bool HasMultiUse = false;
1145    if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1146      if (Depth >= SelectionDAG::MaxRecursionDepth) {
1147        // Limit search depth.
1148        return false;
1149      }
1150      // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1151      DemandedBits = APInt::getAllOnes(BitWidth);
1152      DemandedElts = APInt::getAllOnes(NumElts);
1153      HasMultiUse = true;
1154    } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1155      // Not demanding any bits/elts from Op.
1156      return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1157    } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1158      // Limit search depth.
1159      return false;
1160    }
1161  
1162    KnownBits Known2;
1163    switch (Op.getOpcode()) {
1164    case ISD::SCALAR_TO_VECTOR: {
1165      if (VT.isScalableVector())
1166        return false;
1167      if (!DemandedElts[0])
1168        return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1169  
1170      KnownBits SrcKnown;
1171      SDValue Src = Op.getOperand(0);
1172      unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1173      APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1174      if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1175        return true;
1176  
1177      // Upper elements are undef, so only get the knownbits if we just demand
1178      // the bottom element.
1179      if (DemandedElts == 1)
1180        Known = SrcKnown.anyextOrTrunc(BitWidth);
1181      break;
1182    }
1183    case ISD::BUILD_VECTOR:
1184      // Collect the known bits that are shared by every demanded element.
1185      // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1186      Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1187      return false; // Don't fall through, will infinitely loop.
1188    case ISD::SPLAT_VECTOR: {
1189      SDValue Scl = Op.getOperand(0);
1190      APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1191      KnownBits KnownScl;
1192      if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1193        return true;
1194  
1195      // Implicitly truncate the bits to match the official semantics of
1196      // SPLAT_VECTOR.
1197      Known = KnownScl.trunc(BitWidth);
1198      break;
1199    }
1200    case ISD::LOAD: {
1201      auto *LD = cast<LoadSDNode>(Op);
1202      if (getTargetConstantFromLoad(LD)) {
1203        Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1204        return false; // Don't fall through, will infinitely loop.
1205      }
1206      if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1207        // If this is a ZEXTLoad and we are looking at the loaded value.
1208        EVT MemVT = LD->getMemoryVT();
1209        unsigned MemBits = MemVT.getScalarSizeInBits();
1210        Known.Zero.setBitsFrom(MemBits);
1211        return false; // Don't fall through, will infinitely loop.
1212      }
1213      break;
1214    }
1215    case ISD::INSERT_VECTOR_ELT: {
1216      if (VT.isScalableVector())
1217        return false;
1218      SDValue Vec = Op.getOperand(0);
1219      SDValue Scl = Op.getOperand(1);
1220      auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1221      EVT VecVT = Vec.getValueType();
1222  
1223      // If index isn't constant, assume we need all vector elements AND the
1224      // inserted element.
1225      APInt DemandedVecElts(DemandedElts);
1226      if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1227        unsigned Idx = CIdx->getZExtValue();
1228        DemandedVecElts.clearBit(Idx);
1229  
1230        // Inserted element is not required.
1231        if (!DemandedElts[Idx])
1232          return TLO.CombineTo(Op, Vec);
1233      }
1234  
1235      KnownBits KnownScl;
1236      unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1237      APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1238      if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1239        return true;
1240  
1241      Known = KnownScl.anyextOrTrunc(BitWidth);
1242  
1243      KnownBits KnownVec;
1244      if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1245                               Depth + 1))
1246        return true;
1247  
1248      if (!!DemandedVecElts)
1249        Known = Known.intersectWith(KnownVec);
1250  
1251      return false;
1252    }
1253    case ISD::INSERT_SUBVECTOR: {
1254      if (VT.isScalableVector())
1255        return false;
1256      // Demand any elements from the subvector and the remainder from the src its
1257      // inserted into.
1258      SDValue Src = Op.getOperand(0);
1259      SDValue Sub = Op.getOperand(1);
1260      uint64_t Idx = Op.getConstantOperandVal(2);
1261      unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1262      APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1263      APInt DemandedSrcElts = DemandedElts;
1264      DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1265  
1266      KnownBits KnownSub, KnownSrc;
1267      if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1268                               Depth + 1))
1269        return true;
1270      if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1271                               Depth + 1))
1272        return true;
1273  
1274      Known.Zero.setAllBits();
1275      Known.One.setAllBits();
1276      if (!!DemandedSubElts)
1277        Known = Known.intersectWith(KnownSub);
1278      if (!!DemandedSrcElts)
1279        Known = Known.intersectWith(KnownSrc);
1280  
1281      // Attempt to avoid multi-use src if we don't need anything from it.
1282      if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1283          !DemandedSrcElts.isAllOnes()) {
1284        SDValue NewSub = SimplifyMultipleUseDemandedBits(
1285            Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1286        SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1287            Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1288        if (NewSub || NewSrc) {
1289          NewSub = NewSub ? NewSub : Sub;
1290          NewSrc = NewSrc ? NewSrc : Src;
1291          SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1292                                          Op.getOperand(2));
1293          return TLO.CombineTo(Op, NewOp);
1294        }
1295      }
1296      break;
1297    }
1298    case ISD::EXTRACT_SUBVECTOR: {
1299      if (VT.isScalableVector())
1300        return false;
1301      // Offset the demanded elts by the subvector index.
1302      SDValue Src = Op.getOperand(0);
1303      if (Src.getValueType().isScalableVector())
1304        break;
1305      uint64_t Idx = Op.getConstantOperandVal(1);
1306      unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1307      APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1308  
1309      if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1310                               Depth + 1))
1311        return true;
1312  
1313      // Attempt to avoid multi-use src if we don't need anything from it.
1314      if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1315        SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1316            Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1317        if (DemandedSrc) {
1318          SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1319                                          Op.getOperand(1));
1320          return TLO.CombineTo(Op, NewOp);
1321        }
1322      }
1323      break;
1324    }
1325    case ISD::CONCAT_VECTORS: {
1326      if (VT.isScalableVector())
1327        return false;
1328      Known.Zero.setAllBits();
1329      Known.One.setAllBits();
1330      EVT SubVT = Op.getOperand(0).getValueType();
1331      unsigned NumSubVecs = Op.getNumOperands();
1332      unsigned NumSubElts = SubVT.getVectorNumElements();
1333      for (unsigned i = 0; i != NumSubVecs; ++i) {
1334        APInt DemandedSubElts =
1335            DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1336        if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1337                                 Known2, TLO, Depth + 1))
1338          return true;
1339        // Known bits are shared by every demanded subvector element.
1340        if (!!DemandedSubElts)
1341          Known = Known.intersectWith(Known2);
1342      }
1343      break;
1344    }
1345    case ISD::VECTOR_SHUFFLE: {
1346      assert(!VT.isScalableVector());
1347      ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1348  
1349      // Collect demanded elements from shuffle operands..
1350      APInt DemandedLHS, DemandedRHS;
1351      if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1352                                  DemandedRHS))
1353        break;
1354  
1355      if (!!DemandedLHS || !!DemandedRHS) {
1356        SDValue Op0 = Op.getOperand(0);
1357        SDValue Op1 = Op.getOperand(1);
1358  
1359        Known.Zero.setAllBits();
1360        Known.One.setAllBits();
1361        if (!!DemandedLHS) {
1362          if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1363                                   Depth + 1))
1364            return true;
1365          Known = Known.intersectWith(Known2);
1366        }
1367        if (!!DemandedRHS) {
1368          if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1369                                   Depth + 1))
1370            return true;
1371          Known = Known.intersectWith(Known2);
1372        }
1373  
1374        // Attempt to avoid multi-use ops if we don't need anything from them.
1375        SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1376            Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1377        SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1378            Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1379        if (DemandedOp0 || DemandedOp1) {
1380          Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1381          Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1382          SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1383          return TLO.CombineTo(Op, NewOp);
1384        }
1385      }
1386      break;
1387    }
1388    case ISD::AND: {
1389      SDValue Op0 = Op.getOperand(0);
1390      SDValue Op1 = Op.getOperand(1);
1391  
1392      // If the RHS is a constant, check to see if the LHS would be zero without
1393      // using the bits from the RHS.  Below, we use knowledge about the RHS to
1394      // simplify the LHS, here we're using information from the LHS to simplify
1395      // the RHS.
1396      if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1397        // Do not increment Depth here; that can cause an infinite loop.
1398        KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1399        // If the LHS already has zeros where RHSC does, this 'and' is dead.
1400        if ((LHSKnown.Zero & DemandedBits) ==
1401            (~RHSC->getAPIntValue() & DemandedBits))
1402          return TLO.CombineTo(Op, Op0);
1403  
1404        // If any of the set bits in the RHS are known zero on the LHS, shrink
1405        // the constant.
1406        if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1407                                   DemandedElts, TLO))
1408          return true;
1409  
1410        // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1411        // constant, but if this 'and' is only clearing bits that were just set by
1412        // the xor, then this 'and' can be eliminated by shrinking the mask of
1413        // the xor. For example, for a 32-bit X:
1414        // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1415        if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1416            LHSKnown.One == ~RHSC->getAPIntValue()) {
1417          SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1418          return TLO.CombineTo(Op, Xor);
1419        }
1420      }
1421  
1422      // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1423      // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1424      if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1425          (Op0.getOperand(0).isUndef() ||
1426           ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
1427          Op0->hasOneUse()) {
1428        unsigned NumSubElts =
1429            Op0.getOperand(1).getValueType().getVectorNumElements();
1430        unsigned SubIdx = Op0.getConstantOperandVal(2);
1431        APInt DemandedSub =
1432            APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1433        KnownBits KnownSubMask =
1434            TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1435        if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1436          SDValue NewAnd =
1437              TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1438          SDValue NewInsert =
1439              TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1440                              Op0.getOperand(1), Op0.getOperand(2));
1441          return TLO.CombineTo(Op, NewInsert);
1442        }
1443      }
1444  
1445      if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1446                               Depth + 1))
1447        return true;
1448      if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1449                               Known2, TLO, Depth + 1))
1450        return true;
1451  
1452      // If all of the demanded bits are known one on one side, return the other.
1453      // These bits cannot contribute to the result of the 'and'.
1454      if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1455        return TLO.CombineTo(Op, Op0);
1456      if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1457        return TLO.CombineTo(Op, Op1);
1458      // If all of the demanded bits in the inputs are known zeros, return zero.
1459      if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1460        return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1461      // If the RHS is a constant, see if we can simplify it.
1462      if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1463                                 TLO))
1464        return true;
1465      // If the operation can be done in a smaller type, do so.
1466      if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1467        return true;
1468  
1469      // Attempt to avoid multi-use ops if we don't need anything from them.
1470      if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1471        SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1472            Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1473        SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1474            Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1475        if (DemandedOp0 || DemandedOp1) {
1476          Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1477          Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1478          SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1479          return TLO.CombineTo(Op, NewOp);
1480        }
1481      }
1482  
1483      Known &= Known2;
1484      break;
1485    }
1486    case ISD::OR: {
1487      SDValue Op0 = Op.getOperand(0);
1488      SDValue Op1 = Op.getOperand(1);
1489      SDNodeFlags Flags = Op.getNode()->getFlags();
1490      if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1491                               Depth + 1)) {
1492        if (Flags.hasDisjoint()) {
1493          Flags.setDisjoint(false);
1494          Op->setFlags(Flags);
1495        }
1496        return true;
1497      }
1498  
1499      if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1500                               Known2, TLO, Depth + 1)) {
1501        if (Flags.hasDisjoint()) {
1502          Flags.setDisjoint(false);
1503          Op->setFlags(Flags);
1504        }
1505        return true;
1506      }
1507  
1508      // If all of the demanded bits are known zero on one side, return the other.
1509      // These bits cannot contribute to the result of the 'or'.
1510      if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1511        return TLO.CombineTo(Op, Op0);
1512      if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1513        return TLO.CombineTo(Op, Op1);
1514      // If the RHS is a constant, see if we can simplify it.
1515      if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1516        return true;
1517      // If the operation can be done in a smaller type, do so.
1518      if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1519        return true;
1520  
1521      // Attempt to avoid multi-use ops if we don't need anything from them.
1522      if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1523        SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1524            Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1525        SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1526            Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1527        if (DemandedOp0 || DemandedOp1) {
1528          Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1529          Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1530          SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1531          return TLO.CombineTo(Op, NewOp);
1532        }
1533      }
1534  
1535      // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1536      // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1537      if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1538          Op0->hasOneUse() && Op1->hasOneUse()) {
1539        // Attempt to match all commutations - m_c_Or would've been useful!
1540        for (int I = 0; I != 2; ++I) {
1541          SDValue X = Op.getOperand(I).getOperand(0);
1542          SDValue C1 = Op.getOperand(I).getOperand(1);
1543          SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1544          SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1545          if (Alt.getOpcode() == ISD::OR) {
1546            for (int J = 0; J != 2; ++J) {
1547              if (X == Alt.getOperand(J)) {
1548                SDValue Y = Alt.getOperand(1 - J);
1549                if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1550                                                                 {C1, C2})) {
1551                  SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1552                  SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1553                  return TLO.CombineTo(
1554                      Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1555                }
1556              }
1557            }
1558          }
1559        }
1560      }
1561  
1562      Known |= Known2;
1563      break;
1564    }
1565    case ISD::XOR: {
1566      SDValue Op0 = Op.getOperand(0);
1567      SDValue Op1 = Op.getOperand(1);
1568  
1569      if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1570                               Depth + 1))
1571        return true;
1572      if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1573                               Depth + 1))
1574        return true;
1575  
1576      // If all of the demanded bits are known zero on one side, return the other.
1577      // These bits cannot contribute to the result of the 'xor'.
1578      if (DemandedBits.isSubsetOf(Known.Zero))
1579        return TLO.CombineTo(Op, Op0);
1580      if (DemandedBits.isSubsetOf(Known2.Zero))
1581        return TLO.CombineTo(Op, Op1);
1582      // If the operation can be done in a smaller type, do so.
1583      if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1584        return true;
1585  
1586      // If all of the unknown bits are known to be zero on one side or the other
1587      // turn this into an *inclusive* or.
1588      //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1589      if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1590        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1591  
1592      ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1593      if (C) {
1594        // If one side is a constant, and all of the set bits in the constant are
1595        // also known set on the other side, turn this into an AND, as we know
1596        // the bits will be cleared.
1597        //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1598        // NB: it is okay if more bits are known than are requested
1599        if (C->getAPIntValue() == Known2.One) {
1600          SDValue ANDC =
1601              TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1602          return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1603        }
1604  
1605        // If the RHS is a constant, see if we can change it. Don't alter a -1
1606        // constant because that's a 'not' op, and that is better for combining
1607        // and codegen.
1608        if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1609          // We're flipping all demanded bits. Flip the undemanded bits too.
1610          SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1611          return TLO.CombineTo(Op, New);
1612        }
1613  
1614        unsigned Op0Opcode = Op0.getOpcode();
1615        if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1616          if (ConstantSDNode *ShiftC =
1617                  isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1618            // Don't crash on an oversized shift. We can not guarantee that a
1619            // bogus shift has been simplified to undef.
1620            if (ShiftC->getAPIntValue().ult(BitWidth)) {
1621              uint64_t ShiftAmt = ShiftC->getZExtValue();
1622              APInt Ones = APInt::getAllOnes(BitWidth);
1623              Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1624                                           : Ones.lshr(ShiftAmt);
1625              const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1626              if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1627                  TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
1628                // If the xor constant is a demanded mask, do a 'not' before the
1629                // shift:
1630                // xor (X << ShiftC), XorC --> (not X) << ShiftC
1631                // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1632                SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1633                return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1634                                                         Op0.getOperand(1)));
1635              }
1636            }
1637          }
1638        }
1639      }
1640  
1641      // If we can't turn this into a 'not', try to shrink the constant.
1642      if (!C || !C->isAllOnes())
1643        if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1644          return true;
1645  
1646      // Attempt to avoid multi-use ops if we don't need anything from them.
1647      if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1648        SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1649            Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1650        SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1651            Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1652        if (DemandedOp0 || DemandedOp1) {
1653          Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1654          Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1655          SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1656          return TLO.CombineTo(Op, NewOp);
1657        }
1658      }
1659  
1660      Known ^= Known2;
1661      break;
1662    }
1663    case ISD::SELECT:
1664      if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1665                               Known, TLO, Depth + 1))
1666        return true;
1667      if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1668                               Known2, TLO, Depth + 1))
1669        return true;
1670  
1671      // If the operands are constants, see if we can simplify them.
1672      if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1673        return true;
1674  
1675      // Only known if known in both the LHS and RHS.
1676      Known = Known.intersectWith(Known2);
1677      break;
1678    case ISD::VSELECT:
1679      if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1680                               Known, TLO, Depth + 1))
1681        return true;
1682      if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1683                               Known2, TLO, Depth + 1))
1684        return true;
1685  
1686      // Only known if known in both the LHS and RHS.
1687      Known = Known.intersectWith(Known2);
1688      break;
1689    case ISD::SELECT_CC:
1690      if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1691                               Known, TLO, Depth + 1))
1692        return true;
1693      if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1694                               Known2, TLO, Depth + 1))
1695        return true;
1696  
1697      // If the operands are constants, see if we can simplify them.
1698      if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1699        return true;
1700  
1701      // Only known if known in both the LHS and RHS.
1702      Known = Known.intersectWith(Known2);
1703      break;
1704    case ISD::SETCC: {
1705      SDValue Op0 = Op.getOperand(0);
1706      SDValue Op1 = Op.getOperand(1);
1707      ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1708      // If (1) we only need the sign-bit, (2) the setcc operands are the same
1709      // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1710      // -1, we may be able to bypass the setcc.
1711      if (DemandedBits.isSignMask() &&
1712          Op0.getScalarValueSizeInBits() == BitWidth &&
1713          getBooleanContents(Op0.getValueType()) ==
1714              BooleanContent::ZeroOrNegativeOneBooleanContent) {
1715        // If we're testing X < 0, then this compare isn't needed - just use X!
1716        // FIXME: We're limiting to integer types here, but this should also work
1717        // if we don't care about FP signed-zero. The use of SETLT with FP means
1718        // that we don't care about NaNs.
1719        if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1720            (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1721          return TLO.CombineTo(Op, Op0);
1722  
1723        // TODO: Should we check for other forms of sign-bit comparisons?
1724        // Examples: X <= -1, X >= 0
1725      }
1726      if (getBooleanContents(Op0.getValueType()) ==
1727              TargetLowering::ZeroOrOneBooleanContent &&
1728          BitWidth > 1)
1729        Known.Zero.setBitsFrom(1);
1730      break;
1731    }
1732    case ISD::SHL: {
1733      SDValue Op0 = Op.getOperand(0);
1734      SDValue Op1 = Op.getOperand(1);
1735      EVT ShiftVT = Op1.getValueType();
1736  
1737      if (std::optional<uint64_t> KnownSA =
1738              TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1739        unsigned ShAmt = *KnownSA;
1740        if (ShAmt == 0)
1741          return TLO.CombineTo(Op, Op0);
1742  
1743        // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1744        // single shift.  We can do this if the bottom bits (which are shifted
1745        // out) are never demanded.
1746        // TODO - support non-uniform vector amounts.
1747        if (Op0.getOpcode() == ISD::SRL) {
1748          if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1749            if (std::optional<uint64_t> InnerSA =
1750                    TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1751              unsigned C1 = *InnerSA;
1752              unsigned Opc = ISD::SHL;
1753              int Diff = ShAmt - C1;
1754              if (Diff < 0) {
1755                Diff = -Diff;
1756                Opc = ISD::SRL;
1757              }
1758              SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1759              return TLO.CombineTo(
1760                  Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1761            }
1762          }
1763        }
1764  
1765        // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1766        // are not demanded. This will likely allow the anyext to be folded away.
1767        // TODO - support non-uniform vector amounts.
1768        if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1769          SDValue InnerOp = Op0.getOperand(0);
1770          EVT InnerVT = InnerOp.getValueType();
1771          unsigned InnerBits = InnerVT.getScalarSizeInBits();
1772          if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1773              isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1774            SDValue NarrowShl = TLO.DAG.getNode(
1775                ISD::SHL, dl, InnerVT, InnerOp,
1776                TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1777            return TLO.CombineTo(
1778                Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1779          }
1780  
1781          // Repeat the SHL optimization above in cases where an extension
1782          // intervenes: (shl (anyext (shr x, c1)), c2) to
1783          // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
1784          // aren't demanded (as above) and that the shifted upper c1 bits of
1785          // x aren't demanded.
1786          // TODO - support non-uniform vector amounts.
1787          if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1788              InnerOp.hasOneUse()) {
1789            if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
1790                    InnerOp, DemandedElts, Depth + 2)) {
1791              unsigned InnerShAmt = *SA2;
1792              if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1793                  DemandedBits.getActiveBits() <=
1794                      (InnerBits - InnerShAmt + ShAmt) &&
1795                  DemandedBits.countr_zero() >= ShAmt) {
1796                SDValue NewSA =
1797                    TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1798                SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1799                                                 InnerOp.getOperand(0));
1800                return TLO.CombineTo(
1801                    Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1802              }
1803            }
1804          }
1805        }
1806  
1807        APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1808        if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1809                                 Depth + 1)) {
1810          SDNodeFlags Flags = Op.getNode()->getFlags();
1811          if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1812            // Disable the nsw and nuw flags. We can no longer guarantee that we
1813            // won't wrap after simplification.
1814            Flags.setNoSignedWrap(false);
1815            Flags.setNoUnsignedWrap(false);
1816            Op->setFlags(Flags);
1817          }
1818          return true;
1819        }
1820        Known.Zero <<= ShAmt;
1821        Known.One <<= ShAmt;
1822        // low bits known zero.
1823        Known.Zero.setLowBits(ShAmt);
1824  
1825        // Attempt to avoid multi-use ops if we don't need anything from them.
1826        if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1827          SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1828              Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1829          if (DemandedOp0) {
1830            SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1831            return TLO.CombineTo(Op, NewOp);
1832          }
1833        }
1834  
1835        // TODO: Can we merge this fold with the one below?
1836        // Try shrinking the operation as long as the shift amount will still be
1837        // in range.
1838        if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1839            Op.getNode()->hasOneUse()) {
1840          // Search for the smallest integer type with free casts to and from
1841          // Op's type. For expedience, just check power-of-2 integer types.
1842          unsigned DemandedSize = DemandedBits.getActiveBits();
1843          for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1844               SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1845            EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1846            if (isNarrowingProfitable(VT, SmallVT) &&
1847                isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1848                isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1849                (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1850              assert(DemandedSize <= SmallVTBits &&
1851                     "Narrowed below demanded bits?");
1852              // We found a type with free casts.
1853              SDValue NarrowShl = TLO.DAG.getNode(
1854                  ISD::SHL, dl, SmallVT,
1855                  TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1856                  TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1857              return TLO.CombineTo(
1858                  Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1859            }
1860          }
1861        }
1862  
1863        // Narrow shift to lower half - similar to ShrinkDemandedOp.
1864        // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1865        // Only do this if we demand the upper half so the knownbits are correct.
1866        unsigned HalfWidth = BitWidth / 2;
1867        if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1868            DemandedBits.countLeadingOnes() >= HalfWidth) {
1869          EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1870          if (isNarrowingProfitable(VT, HalfVT) &&
1871              isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1872              isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1873              (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1874            // If we're demanding the upper bits at all, we must ensure
1875            // that the upper bits of the shift result are known to be zero,
1876            // which is equivalent to the narrow shift being NUW.
1877            if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1878              bool IsNSW = Known.countMinSignBits() > HalfWidth;
1879              SDNodeFlags Flags;
1880              Flags.setNoSignedWrap(IsNSW);
1881              Flags.setNoUnsignedWrap(IsNUW);
1882              SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1883              SDValue NewShiftAmt =
1884                  TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1885              SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1886                                                 NewShiftAmt, Flags);
1887              SDValue NewExt =
1888                  TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1889              return TLO.CombineTo(Op, NewExt);
1890            }
1891          }
1892        }
1893      } else {
1894        // This is a variable shift, so we can't shift the demand mask by a known
1895        // amount. But if we are not demanding high bits, then we are not
1896        // demanding those bits from the pre-shifted operand either.
1897        if (unsigned CTLZ = DemandedBits.countl_zero()) {
1898          APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1899          if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1900                                   Depth + 1)) {
1901            SDNodeFlags Flags = Op.getNode()->getFlags();
1902            if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1903              // Disable the nsw and nuw flags. We can no longer guarantee that we
1904              // won't wrap after simplification.
1905              Flags.setNoSignedWrap(false);
1906              Flags.setNoUnsignedWrap(false);
1907              Op->setFlags(Flags);
1908            }
1909            return true;
1910          }
1911          Known.resetAll();
1912        }
1913      }
1914  
1915      // If we are only demanding sign bits then we can use the shift source
1916      // directly.
1917      if (std::optional<uint64_t> MaxSA =
1918              TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1919        unsigned ShAmt = *MaxSA;
1920        unsigned NumSignBits =
1921            TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1922        unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1923        if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1924          return TLO.CombineTo(Op, Op0);
1925      }
1926      break;
1927    }
1928    case ISD::SRL: {
1929      SDValue Op0 = Op.getOperand(0);
1930      SDValue Op1 = Op.getOperand(1);
1931      EVT ShiftVT = Op1.getValueType();
1932  
1933      if (std::optional<uint64_t> KnownSA =
1934              TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1935        unsigned ShAmt = *KnownSA;
1936        if (ShAmt == 0)
1937          return TLO.CombineTo(Op, Op0);
1938  
1939        // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1940        // single shift.  We can do this if the top bits (which are shifted out)
1941        // are never demanded.
1942        // TODO - support non-uniform vector amounts.
1943        if (Op0.getOpcode() == ISD::SHL) {
1944          if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1945            if (std::optional<uint64_t> InnerSA =
1946                    TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1947              unsigned C1 = *InnerSA;
1948              unsigned Opc = ISD::SRL;
1949              int Diff = ShAmt - C1;
1950              if (Diff < 0) {
1951                Diff = -Diff;
1952                Opc = ISD::SHL;
1953              }
1954              SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1955              return TLO.CombineTo(
1956                  Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1957            }
1958          }
1959        }
1960  
1961        APInt InDemandedMask = (DemandedBits << ShAmt);
1962  
1963        // If the shift is exact, then it does demand the low bits (and knows that
1964        // they are zero).
1965        if (Op->getFlags().hasExact())
1966          InDemandedMask.setLowBits(ShAmt);
1967  
1968        // Narrow shift to lower half - similar to ShrinkDemandedOp.
1969        // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1970        if ((BitWidth % 2) == 0 && !VT.isVector()) {
1971          APInt HiBits = APInt::getHighBitsSet(BitWidth, BitWidth / 2);
1972          EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
1973          if (isNarrowingProfitable(VT, HalfVT) &&
1974              isTypeDesirableForOp(ISD::SRL, HalfVT) &&
1975              isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1976              (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
1977              ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
1978               TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
1979            SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1980            SDValue NewShiftAmt =
1981                TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1982            SDValue NewShift =
1983                TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
1984            return TLO.CombineTo(
1985                Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
1986          }
1987        }
1988  
1989        // Compute the new bits that are at the top now.
1990        if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1991                                 Depth + 1))
1992          return true;
1993        Known.Zero.lshrInPlace(ShAmt);
1994        Known.One.lshrInPlace(ShAmt);
1995        // High bits known zero.
1996        Known.Zero.setHighBits(ShAmt);
1997  
1998        // Attempt to avoid multi-use ops if we don't need anything from them.
1999        if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2000          SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2001              Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2002          if (DemandedOp0) {
2003            SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2004            return TLO.CombineTo(Op, NewOp);
2005          }
2006        }
2007      } else {
2008        // Use generic knownbits computation as it has support for non-uniform
2009        // shift amounts.
2010        Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2011      }
2012  
2013      // Try to match AVG patterns (after shift simplification).
2014      if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2015                                          DemandedElts, Depth + 1))
2016        return TLO.CombineTo(Op, AVG);
2017  
2018      break;
2019    }
2020    case ISD::SRA: {
2021      SDValue Op0 = Op.getOperand(0);
2022      SDValue Op1 = Op.getOperand(1);
2023      EVT ShiftVT = Op1.getValueType();
2024  
2025      // If we only want bits that already match the signbit then we don't need
2026      // to shift.
2027      unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2028      if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2029          NumHiDemandedBits)
2030        return TLO.CombineTo(Op, Op0);
2031  
2032      // If this is an arithmetic shift right and only the low-bit is set, we can
2033      // always convert this into a logical shr, even if the shift amount is
2034      // variable.  The low bit of the shift cannot be an input sign bit unless
2035      // the shift amount is >= the size of the datatype, which is undefined.
2036      if (DemandedBits.isOne())
2037        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2038  
2039      if (std::optional<uint64_t> KnownSA =
2040              TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2041        unsigned ShAmt = *KnownSA;
2042        if (ShAmt == 0)
2043          return TLO.CombineTo(Op, Op0);
2044  
2045        // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2046        // supports sext_inreg.
2047        if (Op0.getOpcode() == ISD::SHL) {
2048          if (std::optional<uint64_t> InnerSA =
2049                  TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2050            unsigned LowBits = BitWidth - ShAmt;
2051            EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2052            if (VT.isVector())
2053              ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2054                                       VT.getVectorElementCount());
2055  
2056            if (*InnerSA == ShAmt) {
2057              if (!TLO.LegalOperations() ||
2058                  getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) == Legal)
2059                return TLO.CombineTo(
2060                    Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2061                                        Op0.getOperand(0),
2062                                        TLO.DAG.getValueType(ExtVT)));
2063  
2064              // Even if we can't convert to sext_inreg, we might be able to
2065              // remove this shift pair if the input is already sign extended.
2066              unsigned NumSignBits =
2067                  TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2068              if (NumSignBits > ShAmt)
2069                return TLO.CombineTo(Op, Op0.getOperand(0));
2070            }
2071          }
2072        }
2073  
2074        APInt InDemandedMask = (DemandedBits << ShAmt);
2075  
2076        // If the shift is exact, then it does demand the low bits (and knows that
2077        // they are zero).
2078        if (Op->getFlags().hasExact())
2079          InDemandedMask.setLowBits(ShAmt);
2080  
2081        // If any of the demanded bits are produced by the sign extension, we also
2082        // demand the input sign bit.
2083        if (DemandedBits.countl_zero() < ShAmt)
2084          InDemandedMask.setSignBit();
2085  
2086        if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2087                                 Depth + 1))
2088          return true;
2089        Known.Zero.lshrInPlace(ShAmt);
2090        Known.One.lshrInPlace(ShAmt);
2091  
2092        // If the input sign bit is known to be zero, or if none of the top bits
2093        // are demanded, turn this into an unsigned shift right.
2094        if (Known.Zero[BitWidth - ShAmt - 1] ||
2095            DemandedBits.countl_zero() >= ShAmt) {
2096          SDNodeFlags Flags;
2097          Flags.setExact(Op->getFlags().hasExact());
2098          return TLO.CombineTo(
2099              Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2100        }
2101  
2102        int Log2 = DemandedBits.exactLogBase2();
2103        if (Log2 >= 0) {
2104          // The bit must come from the sign.
2105          SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2106          return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2107        }
2108  
2109        if (Known.One[BitWidth - ShAmt - 1])
2110          // New bits are known one.
2111          Known.One.setHighBits(ShAmt);
2112  
2113        // Attempt to avoid multi-use ops if we don't need anything from them.
2114        if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2115          SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2116              Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2117          if (DemandedOp0) {
2118            SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2119            return TLO.CombineTo(Op, NewOp);
2120          }
2121        }
2122      }
2123  
2124      // Try to match AVG patterns (after shift simplification).
2125      if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2126                                          DemandedElts, Depth + 1))
2127        return TLO.CombineTo(Op, AVG);
2128  
2129      break;
2130    }
2131    case ISD::FSHL:
2132    case ISD::FSHR: {
2133      SDValue Op0 = Op.getOperand(0);
2134      SDValue Op1 = Op.getOperand(1);
2135      SDValue Op2 = Op.getOperand(2);
2136      bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2137  
2138      if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2139        unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2140  
2141        // For fshl, 0-shift returns the 1st arg.
2142        // For fshr, 0-shift returns the 2nd arg.
2143        if (Amt == 0) {
2144          if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2145                                   Known, TLO, Depth + 1))
2146            return true;
2147          break;
2148        }
2149  
2150        // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2151        // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2152        APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2153        APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2154        if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2155                                 Depth + 1))
2156          return true;
2157        if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2158                                 Depth + 1))
2159          return true;
2160  
2161        Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2162        Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2163        Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2164        Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2165        Known = Known.unionWith(Known2);
2166  
2167        // Attempt to avoid multi-use ops if we don't need anything from them.
2168        if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2169            !DemandedElts.isAllOnes()) {
2170          SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2171              Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2172          SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2173              Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2174          if (DemandedOp0 || DemandedOp1) {
2175            DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2176            DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2177            SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2178                                            DemandedOp1, Op2);
2179            return TLO.CombineTo(Op, NewOp);
2180          }
2181        }
2182      }
2183  
2184      // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2185      if (isPowerOf2_32(BitWidth)) {
2186        APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2187        if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2188                                 Known2, TLO, Depth + 1))
2189          return true;
2190      }
2191      break;
2192    }
2193    case ISD::ROTL:
2194    case ISD::ROTR: {
2195      SDValue Op0 = Op.getOperand(0);
2196      SDValue Op1 = Op.getOperand(1);
2197      bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2198  
2199      // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2200      if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2201        return TLO.CombineTo(Op, Op0);
2202  
2203      if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2204        unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2205        unsigned RevAmt = BitWidth - Amt;
2206  
2207        // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2208        // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2209        APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2210        if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2211                                 Depth + 1))
2212          return true;
2213  
2214        // rot*(x, 0) --> x
2215        if (Amt == 0)
2216          return TLO.CombineTo(Op, Op0);
2217  
2218        // See if we don't demand either half of the rotated bits.
2219        if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2220            DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2221          Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2222          return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2223        }
2224        if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2225            DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2226          Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2227          return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2228        }
2229      }
2230  
2231      // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2232      if (isPowerOf2_32(BitWidth)) {
2233        APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2234        if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2235                                 Depth + 1))
2236          return true;
2237      }
2238      break;
2239    }
2240    case ISD::SMIN:
2241    case ISD::SMAX:
2242    case ISD::UMIN:
2243    case ISD::UMAX: {
2244      unsigned Opc = Op.getOpcode();
2245      SDValue Op0 = Op.getOperand(0);
2246      SDValue Op1 = Op.getOperand(1);
2247  
2248      // If we're only demanding signbits, then we can simplify to OR/AND node.
2249      unsigned BitOp =
2250          (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2251      unsigned NumSignBits =
2252          std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2253                   TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2254      unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2255      if (NumSignBits >= NumDemandedUpperBits)
2256        return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2257  
2258      // Check if one arg is always less/greater than (or equal) to the other arg.
2259      KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2260      KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2261      switch (Opc) {
2262      case ISD::SMIN:
2263        if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2264          return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2265        if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2266          return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2267        Known = KnownBits::smin(Known0, Known1);
2268        break;
2269      case ISD::SMAX:
2270        if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2271          return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2272        if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2273          return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2274        Known = KnownBits::smax(Known0, Known1);
2275        break;
2276      case ISD::UMIN:
2277        if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2278          return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2279        if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2280          return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2281        Known = KnownBits::umin(Known0, Known1);
2282        break;
2283      case ISD::UMAX:
2284        if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2285          return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2286        if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2287          return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2288        Known = KnownBits::umax(Known0, Known1);
2289        break;
2290      }
2291      break;
2292    }
2293    case ISD::BITREVERSE: {
2294      SDValue Src = Op.getOperand(0);
2295      APInt DemandedSrcBits = DemandedBits.reverseBits();
2296      if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2297                               Depth + 1))
2298        return true;
2299      Known.One = Known2.One.reverseBits();
2300      Known.Zero = Known2.Zero.reverseBits();
2301      break;
2302    }
2303    case ISD::BSWAP: {
2304      SDValue Src = Op.getOperand(0);
2305  
2306      // If the only bits demanded come from one byte of the bswap result,
2307      // just shift the input byte into position to eliminate the bswap.
2308      unsigned NLZ = DemandedBits.countl_zero();
2309      unsigned NTZ = DemandedBits.countr_zero();
2310  
2311      // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
2312      // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
2313      // have 14 leading zeros, round to 8.
2314      NLZ = alignDown(NLZ, 8);
2315      NTZ = alignDown(NTZ, 8);
2316      // If we need exactly one byte, we can do this transformation.
2317      if (BitWidth - NLZ - NTZ == 8) {
2318        // Replace this with either a left or right shift to get the byte into
2319        // the right place.
2320        unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2321        if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2322          unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2323          SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2324          SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2325          return TLO.CombineTo(Op, NewOp);
2326        }
2327      }
2328  
2329      APInt DemandedSrcBits = DemandedBits.byteSwap();
2330      if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2331                               Depth + 1))
2332        return true;
2333      Known.One = Known2.One.byteSwap();
2334      Known.Zero = Known2.Zero.byteSwap();
2335      break;
2336    }
2337    case ISD::CTPOP: {
2338      // If only 1 bit is demanded, replace with PARITY as long as we're before
2339      // op legalization.
2340      // FIXME: Limit to scalars for now.
2341      if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2342        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2343                                                 Op.getOperand(0)));
2344  
2345      Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2346      break;
2347    }
2348    case ISD::SIGN_EXTEND_INREG: {
2349      SDValue Op0 = Op.getOperand(0);
2350      EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2351      unsigned ExVTBits = ExVT.getScalarSizeInBits();
2352  
2353      // If we only care about the highest bit, don't bother shifting right.
2354      if (DemandedBits.isSignMask()) {
2355        unsigned MinSignedBits =
2356            TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2357        bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2358        // However if the input is already sign extended we expect the sign
2359        // extension to be dropped altogether later and do not simplify.
2360        if (!AlreadySignExtended) {
2361          // Compute the correct shift amount type, which must be getShiftAmountTy
2362          // for scalar types after legalization.
2363          SDValue ShiftAmt =
2364              TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2365          return TLO.CombineTo(Op,
2366                               TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2367        }
2368      }
2369  
2370      // If none of the extended bits are demanded, eliminate the sextinreg.
2371      if (DemandedBits.getActiveBits() <= ExVTBits)
2372        return TLO.CombineTo(Op, Op0);
2373  
2374      APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2375  
2376      // Since the sign extended bits are demanded, we know that the sign
2377      // bit is demanded.
2378      InputDemandedBits.setBit(ExVTBits - 1);
2379  
2380      if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2381                               Depth + 1))
2382        return true;
2383  
2384      // If the sign bit of the input is known set or clear, then we know the
2385      // top bits of the result.
2386  
2387      // If the input sign bit is known zero, convert this into a zero extension.
2388      if (Known.Zero[ExVTBits - 1])
2389        return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2390  
2391      APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2392      if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2393        Known.One.setBitsFrom(ExVTBits);
2394        Known.Zero &= Mask;
2395      } else { // Input sign bit unknown
2396        Known.Zero &= Mask;
2397        Known.One &= Mask;
2398      }
2399      break;
2400    }
2401    case ISD::BUILD_PAIR: {
2402      EVT HalfVT = Op.getOperand(0).getValueType();
2403      unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2404  
2405      APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2406      APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2407  
2408      KnownBits KnownLo, KnownHi;
2409  
2410      if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2411        return true;
2412  
2413      if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2414        return true;
2415  
2416      Known = KnownHi.concat(KnownLo);
2417      break;
2418    }
2419    case ISD::ZERO_EXTEND_VECTOR_INREG:
2420      if (VT.isScalableVector())
2421        return false;
2422      [[fallthrough]];
2423    case ISD::ZERO_EXTEND: {
2424      SDValue Src = Op.getOperand(0);
2425      EVT SrcVT = Src.getValueType();
2426      unsigned InBits = SrcVT.getScalarSizeInBits();
2427      unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2428      bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2429  
2430      // If none of the top bits are demanded, convert this into an any_extend.
2431      if (DemandedBits.getActiveBits() <= InBits) {
2432        // If we only need the non-extended bits of the bottom element
2433        // then we can just bitcast to the result.
2434        if (IsLE && IsVecInReg && DemandedElts == 1 &&
2435            VT.getSizeInBits() == SrcVT.getSizeInBits())
2436          return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2437  
2438        unsigned Opc =
2439            IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2440        if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2441          return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2442      }
2443  
2444      SDNodeFlags Flags = Op->getFlags();
2445      APInt InDemandedBits = DemandedBits.trunc(InBits);
2446      APInt InDemandedElts = DemandedElts.zext(InElts);
2447      if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2448                               Depth + 1)) {
2449        if (Flags.hasNonNeg()) {
2450          Flags.setNonNeg(false);
2451          Op->setFlags(Flags);
2452        }
2453        return true;
2454      }
2455      assert(Known.getBitWidth() == InBits && "Src width has changed?");
2456      Known = Known.zext(BitWidth);
2457  
2458      // Attempt to avoid multi-use ops if we don't need anything from them.
2459      if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2460              Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2461        return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2462      break;
2463    }
2464    case ISD::SIGN_EXTEND_VECTOR_INREG:
2465      if (VT.isScalableVector())
2466        return false;
2467      [[fallthrough]];
2468    case ISD::SIGN_EXTEND: {
2469      SDValue Src = Op.getOperand(0);
2470      EVT SrcVT = Src.getValueType();
2471      unsigned InBits = SrcVT.getScalarSizeInBits();
2472      unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2473      bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2474  
2475      APInt InDemandedElts = DemandedElts.zext(InElts);
2476      APInt InDemandedBits = DemandedBits.trunc(InBits);
2477  
2478      // Since some of the sign extended bits are demanded, we know that the sign
2479      // bit is demanded.
2480      InDemandedBits.setBit(InBits - 1);
2481  
2482      // If none of the top bits are demanded, convert this into an any_extend.
2483      if (DemandedBits.getActiveBits() <= InBits) {
2484        // If we only need the non-extended bits of the bottom element
2485        // then we can just bitcast to the result.
2486        if (IsLE && IsVecInReg && DemandedElts == 1 &&
2487            VT.getSizeInBits() == SrcVT.getSizeInBits())
2488          return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2489  
2490        // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2491        if (getBooleanContents(VT) != ZeroOrNegativeOneBooleanContent ||
2492            TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2493                InBits) {
2494          unsigned Opc =
2495              IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2496          if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2497            return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2498        }
2499      }
2500  
2501      if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2502                               Depth + 1))
2503        return true;
2504      assert(Known.getBitWidth() == InBits && "Src width has changed?");
2505  
2506      // If the sign bit is known one, the top bits match.
2507      Known = Known.sext(BitWidth);
2508  
2509      // If the sign bit is known zero, convert this to a zero extend.
2510      if (Known.isNonNegative()) {
2511        unsigned Opc =
2512            IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2513        if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2514          SDNodeFlags Flags;
2515          if (!IsVecInReg)
2516            Flags.setNonNeg(true);
2517          return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2518        }
2519      }
2520  
2521      // Attempt to avoid multi-use ops if we don't need anything from them.
2522      if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2523              Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2524        return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2525      break;
2526    }
2527    case ISD::ANY_EXTEND_VECTOR_INREG:
2528      if (VT.isScalableVector())
2529        return false;
2530      [[fallthrough]];
2531    case ISD::ANY_EXTEND: {
2532      SDValue Src = Op.getOperand(0);
2533      EVT SrcVT = Src.getValueType();
2534      unsigned InBits = SrcVT.getScalarSizeInBits();
2535      unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2536      bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2537  
2538      // If we only need the bottom element then we can just bitcast.
2539      // TODO: Handle ANY_EXTEND?
2540      if (IsLE && IsVecInReg && DemandedElts == 1 &&
2541          VT.getSizeInBits() == SrcVT.getSizeInBits())
2542        return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2543  
2544      APInt InDemandedBits = DemandedBits.trunc(InBits);
2545      APInt InDemandedElts = DemandedElts.zext(InElts);
2546      if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2547                               Depth + 1))
2548        return true;
2549      assert(Known.getBitWidth() == InBits && "Src width has changed?");
2550      Known = Known.anyext(BitWidth);
2551  
2552      // Attempt to avoid multi-use ops if we don't need anything from them.
2553      if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2554              Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2555        return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2556      break;
2557    }
2558    case ISD::TRUNCATE: {
2559      SDValue Src = Op.getOperand(0);
2560  
2561      // Simplify the input, using demanded bit information, and compute the known
2562      // zero/one bits live out.
2563      unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2564      APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2565      if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2566                               Depth + 1))
2567        return true;
2568      Known = Known.trunc(BitWidth);
2569  
2570      // Attempt to avoid multi-use ops if we don't need anything from them.
2571      if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2572              Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2573        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2574  
2575      // If the input is only used by this truncate, see if we can shrink it based
2576      // on the known demanded bits.
2577      switch (Src.getOpcode()) {
2578      default:
2579        break;
2580      case ISD::SRL:
2581        // Shrink SRL by a constant if none of the high bits shifted in are
2582        // demanded.
2583        if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2584          // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2585          // undesirable.
2586          break;
2587  
2588        if (Src.getNode()->hasOneUse()) {
2589          if (isTruncateFree(Src, VT) &&
2590              !isTruncateFree(Src.getValueType(), VT)) {
2591            // If truncate is only free at trunc(srl), do not turn it into
2592            // srl(trunc). The check is done by first check the truncate is free
2593            // at Src's opcode(srl), then check the truncate is not done by
2594            // referencing sub-register. In test, if both trunc(srl) and
2595            // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2596            // trunc(srl)'s trunc is free, trunc(srl) is better.
2597            break;
2598          }
2599  
2600          std::optional<uint64_t> ShAmtC =
2601              TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2602          if (!ShAmtC || *ShAmtC >= BitWidth)
2603            break;
2604          uint64_t ShVal = *ShAmtC;
2605  
2606          APInt HighBits =
2607              APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2608          HighBits.lshrInPlace(ShVal);
2609          HighBits = HighBits.trunc(BitWidth);
2610          if (!(HighBits & DemandedBits)) {
2611            // None of the shifted in bits are needed.  Add a truncate of the
2612            // shift input, then shift it.
2613            SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2614            SDValue NewTrunc =
2615                TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2616            return TLO.CombineTo(
2617                Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2618          }
2619        }
2620        break;
2621      }
2622  
2623      break;
2624    }
2625    case ISD::AssertZext: {
2626      // AssertZext demands all of the high bits, plus any of the low bits
2627      // demanded by its users.
2628      EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2629      APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
2630      if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2631                               TLO, Depth + 1))
2632        return true;
2633  
2634      Known.Zero |= ~InMask;
2635      Known.One &= (~Known.Zero);
2636      break;
2637    }
2638    case ISD::EXTRACT_VECTOR_ELT: {
2639      SDValue Src = Op.getOperand(0);
2640      SDValue Idx = Op.getOperand(1);
2641      ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2642      unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2643  
2644      if (SrcEltCnt.isScalable())
2645        return false;
2646  
2647      // Demand the bits from every vector element without a constant index.
2648      unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2649      APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2650      if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2651        if (CIdx->getAPIntValue().ult(NumSrcElts))
2652          DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2653  
2654      // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2655      // anything about the extended bits.
2656      APInt DemandedSrcBits = DemandedBits;
2657      if (BitWidth > EltBitWidth)
2658        DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2659  
2660      if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2661                               Depth + 1))
2662        return true;
2663  
2664      // Attempt to avoid multi-use ops if we don't need anything from them.
2665      if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2666        if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2667                Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2668          SDValue NewOp =
2669              TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2670          return TLO.CombineTo(Op, NewOp);
2671        }
2672      }
2673  
2674      Known = Known2;
2675      if (BitWidth > EltBitWidth)
2676        Known = Known.anyext(BitWidth);
2677      break;
2678    }
2679    case ISD::BITCAST: {
2680      if (VT.isScalableVector())
2681        return false;
2682      SDValue Src = Op.getOperand(0);
2683      EVT SrcVT = Src.getValueType();
2684      unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2685  
2686      // If this is an FP->Int bitcast and if the sign bit is the only
2687      // thing demanded, turn this into a FGETSIGN.
2688      if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2689          DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2690          SrcVT.isFloatingPoint()) {
2691        bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2692        bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2693        if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2694            SrcVT != MVT::f128) {
2695          // Cannot eliminate/lower SHL for f128 yet.
2696          EVT Ty = OpVTLegal ? VT : MVT::i32;
2697          // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2698          // place.  We expect the SHL to be eliminated by other optimizations.
2699          SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2700          unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2701          if (!OpVTLegal && OpVTSizeInBits > 32)
2702            Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2703          unsigned ShVal = Op.getValueSizeInBits() - 1;
2704          SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2705          return TLO.CombineTo(Op,
2706                               TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2707        }
2708      }
2709  
2710      // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2711      // Demand the elt/bit if any of the original elts/bits are demanded.
2712      if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2713        unsigned Scale = BitWidth / NumSrcEltBits;
2714        unsigned NumSrcElts = SrcVT.getVectorNumElements();
2715        APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2716        APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2717        for (unsigned i = 0; i != Scale; ++i) {
2718          unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2719          unsigned BitOffset = EltOffset * NumSrcEltBits;
2720          APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2721          if (!Sub.isZero()) {
2722            DemandedSrcBits |= Sub;
2723            for (unsigned j = 0; j != NumElts; ++j)
2724              if (DemandedElts[j])
2725                DemandedSrcElts.setBit((j * Scale) + i);
2726          }
2727        }
2728  
2729        APInt KnownSrcUndef, KnownSrcZero;
2730        if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2731                                       KnownSrcZero, TLO, Depth + 1))
2732          return true;
2733  
2734        KnownBits KnownSrcBits;
2735        if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2736                                 KnownSrcBits, TLO, Depth + 1))
2737          return true;
2738      } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2739        // TODO - bigendian once we have test coverage.
2740        unsigned Scale = NumSrcEltBits / BitWidth;
2741        unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2742        APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2743        APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2744        for (unsigned i = 0; i != NumElts; ++i)
2745          if (DemandedElts[i]) {
2746            unsigned Offset = (i % Scale) * BitWidth;
2747            DemandedSrcBits.insertBits(DemandedBits, Offset);
2748            DemandedSrcElts.setBit(i / Scale);
2749          }
2750  
2751        if (SrcVT.isVector()) {
2752          APInt KnownSrcUndef, KnownSrcZero;
2753          if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2754                                         KnownSrcZero, TLO, Depth + 1))
2755            return true;
2756        }
2757  
2758        KnownBits KnownSrcBits;
2759        if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2760                                 KnownSrcBits, TLO, Depth + 1))
2761          return true;
2762  
2763        // Attempt to avoid multi-use ops if we don't need anything from them.
2764        if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2765          if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2766                  Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2767            SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2768            return TLO.CombineTo(Op, NewOp);
2769          }
2770        }
2771      }
2772  
2773      // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
2774      // recursive call where Known may be useful to the caller.
2775      if (Depth > 0) {
2776        Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2777        return false;
2778      }
2779      break;
2780    }
2781    case ISD::MUL:
2782      if (DemandedBits.isPowerOf2()) {
2783        // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2784        // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2785        // odd (has LSB set), then the left-shifted low bit of X is the answer.
2786        unsigned CTZ = DemandedBits.countr_zero();
2787        ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2788        if (C && C->getAPIntValue().countr_zero() == CTZ) {
2789          SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2790          SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2791          return TLO.CombineTo(Op, Shl);
2792        }
2793      }
2794      // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2795      // X * X is odd iff X is odd.
2796      // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2797      if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2798        SDValue One = TLO.DAG.getConstant(1, dl, VT);
2799        SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2800        return TLO.CombineTo(Op, And1);
2801      }
2802      [[fallthrough]];
2803    case ISD::ADD:
2804    case ISD::SUB: {
2805      // Add, Sub, and Mul don't demand any bits in positions beyond that
2806      // of the highest bit demanded of them.
2807      SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2808      SDNodeFlags Flags = Op.getNode()->getFlags();
2809      unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2810      APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2811      KnownBits KnownOp0, KnownOp1;
2812      auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2813                                        const KnownBits &KnownRHS) {
2814        if (Op.getOpcode() == ISD::MUL)
2815          Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2816        return Demanded;
2817      };
2818      if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2819                               Depth + 1) ||
2820          SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2821                               DemandedElts, KnownOp0, TLO, Depth + 1) ||
2822          // See if the operation should be performed at a smaller bit width.
2823          ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2824        if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2825          // Disable the nsw and nuw flags. We can no longer guarantee that we
2826          // won't wrap after simplification.
2827          Flags.setNoSignedWrap(false);
2828          Flags.setNoUnsignedWrap(false);
2829          Op->setFlags(Flags);
2830        }
2831        return true;
2832      }
2833  
2834      // neg x with only low bit demanded is simply x.
2835      if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2836          isNullConstant(Op0))
2837        return TLO.CombineTo(Op, Op1);
2838  
2839      // Attempt to avoid multi-use ops if we don't need anything from them.
2840      if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2841        SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2842            Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2843        SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2844            Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2845        if (DemandedOp0 || DemandedOp1) {
2846          Flags.setNoSignedWrap(false);
2847          Flags.setNoUnsignedWrap(false);
2848          Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2849          Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2850          SDValue NewOp =
2851              TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2852          return TLO.CombineTo(Op, NewOp);
2853        }
2854      }
2855  
2856      // If we have a constant operand, we may be able to turn it into -1 if we
2857      // do not demand the high bits. This can make the constant smaller to
2858      // encode, allow more general folding, or match specialized instruction
2859      // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2860      // is probably not useful (and could be detrimental).
2861      ConstantSDNode *C = isConstOrConstSplat(Op1);
2862      APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2863      if (C && !C->isAllOnes() && !C->isOne() &&
2864          (C->getAPIntValue() | HighMask).isAllOnes()) {
2865        SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2866        // Disable the nsw and nuw flags. We can no longer guarantee that we
2867        // won't wrap after simplification.
2868        Flags.setNoSignedWrap(false);
2869        Flags.setNoUnsignedWrap(false);
2870        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2871        return TLO.CombineTo(Op, NewOp);
2872      }
2873  
2874      // Match a multiply with a disguised negated-power-of-2 and convert to a
2875      // an equivalent shift-left amount.
2876      // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2877      auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2878        if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2879          return 0;
2880  
2881        // Don't touch opaque constants. Also, ignore zero and power-of-2
2882        // multiplies. Those will get folded later.
2883        ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2884        if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2885            !MulC->getAPIntValue().isPowerOf2()) {
2886          APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2887          if (UnmaskedC.isNegatedPowerOf2())
2888            return (-UnmaskedC).logBase2();
2889        }
2890        return 0;
2891      };
2892  
2893      auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2894                         unsigned ShlAmt) {
2895        SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2896        SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2897        SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2898        return TLO.CombineTo(Op, Res);
2899      };
2900  
2901      if (isOperationLegalOrCustom(ISD::SHL, VT)) {
2902        if (Op.getOpcode() == ISD::ADD) {
2903          // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2904          if (unsigned ShAmt = getShiftLeftAmt(Op0))
2905            return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2906          // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2907          if (unsigned ShAmt = getShiftLeftAmt(Op1))
2908            return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2909        }
2910        if (Op.getOpcode() == ISD::SUB) {
2911          // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2912          if (unsigned ShAmt = getShiftLeftAmt(Op1))
2913            return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2914        }
2915      }
2916  
2917      if (Op.getOpcode() == ISD::MUL) {
2918        Known = KnownBits::mul(KnownOp0, KnownOp1);
2919      } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2920        Known = KnownBits::computeForAddSub(
2921            Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2922            Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2923      }
2924      break;
2925    }
2926    default:
2927      // We also ask the target about intrinsics (which could be specific to it).
2928      if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2929          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2930        // TODO: Probably okay to remove after audit; here to reduce change size
2931        // in initial enablement patch for scalable vectors
2932        if (Op.getValueType().isScalableVector())
2933          break;
2934        if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2935                                              Known, TLO, Depth))
2936          return true;
2937        break;
2938      }
2939  
2940      // Just use computeKnownBits to compute output bits.
2941      Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2942      break;
2943    }
2944  
2945    // If we know the value of all of the demanded bits, return this as a
2946    // constant.
2947    if (!isTargetCanonicalConstantNode(Op) &&
2948        DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2949      // Avoid folding to a constant if any OpaqueConstant is involved.
2950      const SDNode *N = Op.getNode();
2951      for (SDNode *Op :
2952           llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
2953        if (auto *C = dyn_cast<ConstantSDNode>(Op))
2954          if (C->isOpaque())
2955            return false;
2956      }
2957      if (VT.isInteger())
2958        return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2959      if (VT.isFloatingPoint())
2960        return TLO.CombineTo(
2961            Op,
2962            TLO.DAG.getConstantFP(
2963                APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2964    }
2965  
2966    // A multi use 'all demanded elts' simplify failed to find any knownbits.
2967    // Try again just for the original demanded elts.
2968    // Ensure we do this AFTER constant folding above.
2969    if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2970      Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2971  
2972    return false;
2973  }
2974  
SimplifyDemandedVectorElts(SDValue Op,const APInt & DemandedElts,DAGCombinerInfo & DCI) const2975  bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2976                                                  const APInt &DemandedElts,
2977                                                  DAGCombinerInfo &DCI) const {
2978    SelectionDAG &DAG = DCI.DAG;
2979    TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2980                          !DCI.isBeforeLegalizeOps());
2981  
2982    APInt KnownUndef, KnownZero;
2983    bool Simplified =
2984        SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2985    if (Simplified) {
2986      DCI.AddToWorklist(Op.getNode());
2987      DCI.CommitTargetLoweringOpt(TLO);
2988    }
2989  
2990    return Simplified;
2991  }
2992  
2993  /// Given a vector binary operation and known undefined elements for each input
2994  /// operand, compute whether each element of the output is undefined.
getKnownUndefForVectorBinop(SDValue BO,SelectionDAG & DAG,const APInt & UndefOp0,const APInt & UndefOp1)2995  static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2996                                           const APInt &UndefOp0,
2997                                           const APInt &UndefOp1) {
2998    EVT VT = BO.getValueType();
2999    assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
3000           "Vector binop only");
3001  
3002    EVT EltVT = VT.getVectorElementType();
3003    unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3004    assert(UndefOp0.getBitWidth() == NumElts &&
3005           UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3006  
3007    auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3008                                     const APInt &UndefVals) {
3009      if (UndefVals[Index])
3010        return DAG.getUNDEF(EltVT);
3011  
3012      if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3013        // Try hard to make sure that the getNode() call is not creating temporary
3014        // nodes. Ignore opaque integers because they do not constant fold.
3015        SDValue Elt = BV->getOperand(Index);
3016        auto *C = dyn_cast<ConstantSDNode>(Elt);
3017        if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3018          return Elt;
3019      }
3020  
3021      return SDValue();
3022    };
3023  
3024    APInt KnownUndef = APInt::getZero(NumElts);
3025    for (unsigned i = 0; i != NumElts; ++i) {
3026      // If both inputs for this element are either constant or undef and match
3027      // the element type, compute the constant/undef result for this element of
3028      // the vector.
3029      // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3030      // not handle FP constants. The code within getNode() should be refactored
3031      // to avoid the danger of creating a bogus temporary node here.
3032      SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3033      SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3034      if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3035        if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3036          KnownUndef.setBit(i);
3037    }
3038    return KnownUndef;
3039  }
3040  
SimplifyDemandedVectorElts(SDValue Op,const APInt & OriginalDemandedElts,APInt & KnownUndef,APInt & KnownZero,TargetLoweringOpt & TLO,unsigned Depth,bool AssumeSingleUse) const3041  bool TargetLowering::SimplifyDemandedVectorElts(
3042      SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3043      APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3044      bool AssumeSingleUse) const {
3045    EVT VT = Op.getValueType();
3046    unsigned Opcode = Op.getOpcode();
3047    APInt DemandedElts = OriginalDemandedElts;
3048    unsigned NumElts = DemandedElts.getBitWidth();
3049    assert(VT.isVector() && "Expected vector op");
3050  
3051    KnownUndef = KnownZero = APInt::getZero(NumElts);
3052  
3053    const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
3054    if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
3055      return false;
3056  
3057    // TODO: For now we assume we know nothing about scalable vectors.
3058    if (VT.isScalableVector())
3059      return false;
3060  
3061    assert(VT.getVectorNumElements() == NumElts &&
3062           "Mask size mismatches value type element count!");
3063  
3064    // Undef operand.
3065    if (Op.isUndef()) {
3066      KnownUndef.setAllBits();
3067      return false;
3068    }
3069  
3070    // If Op has other users, assume that all elements are needed.
3071    if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3072      DemandedElts.setAllBits();
3073  
3074    // Not demanding any elements from Op.
3075    if (DemandedElts == 0) {
3076      KnownUndef.setAllBits();
3077      return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3078    }
3079  
3080    // Limit search depth.
3081    if (Depth >= SelectionDAG::MaxRecursionDepth)
3082      return false;
3083  
3084    SDLoc DL(Op);
3085    unsigned EltSizeInBits = VT.getScalarSizeInBits();
3086    bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3087  
3088    // Helper for demanding the specified elements and all the bits of both binary
3089    // operands.
3090    auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3091      SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3092                                                             TLO.DAG, Depth + 1);
3093      SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3094                                                             TLO.DAG, Depth + 1);
3095      if (NewOp0 || NewOp1) {
3096        SDValue NewOp =
3097            TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3098                            NewOp1 ? NewOp1 : Op1, Op->getFlags());
3099        return TLO.CombineTo(Op, NewOp);
3100      }
3101      return false;
3102    };
3103  
3104    switch (Opcode) {
3105    case ISD::SCALAR_TO_VECTOR: {
3106      if (!DemandedElts[0]) {
3107        KnownUndef.setAllBits();
3108        return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3109      }
3110      SDValue ScalarSrc = Op.getOperand(0);
3111      if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3112        SDValue Src = ScalarSrc.getOperand(0);
3113        SDValue Idx = ScalarSrc.getOperand(1);
3114        EVT SrcVT = Src.getValueType();
3115  
3116        ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3117  
3118        if (SrcEltCnt.isScalable())
3119          return false;
3120  
3121        unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3122        if (isNullConstant(Idx)) {
3123          APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3124          APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3125          APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3126          if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3127                                         TLO, Depth + 1))
3128            return true;
3129        }
3130      }
3131      KnownUndef.setHighBits(NumElts - 1);
3132      break;
3133    }
3134    case ISD::BITCAST: {
3135      SDValue Src = Op.getOperand(0);
3136      EVT SrcVT = Src.getValueType();
3137  
3138      // We only handle vectors here.
3139      // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3140      if (!SrcVT.isVector())
3141        break;
3142  
3143      // Fast handling of 'identity' bitcasts.
3144      unsigned NumSrcElts = SrcVT.getVectorNumElements();
3145      if (NumSrcElts == NumElts)
3146        return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3147                                          KnownZero, TLO, Depth + 1);
3148  
3149      APInt SrcDemandedElts, SrcZero, SrcUndef;
3150  
3151      // Bitcast from 'large element' src vector to 'small element' vector, we
3152      // must demand a source element if any DemandedElt maps to it.
3153      if ((NumElts % NumSrcElts) == 0) {
3154        unsigned Scale = NumElts / NumSrcElts;
3155        SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3156        if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3157                                       TLO, Depth + 1))
3158          return true;
3159  
3160        // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3161        // of the large element.
3162        // TODO - bigendian once we have test coverage.
3163        if (IsLE) {
3164          unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3165          APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3166          for (unsigned i = 0; i != NumElts; ++i)
3167            if (DemandedElts[i]) {
3168              unsigned Ofs = (i % Scale) * EltSizeInBits;
3169              SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3170            }
3171  
3172          KnownBits Known;
3173          if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3174                                   TLO, Depth + 1))
3175            return true;
3176  
3177          // The bitcast has split each wide element into a number of
3178          // narrow subelements. We have just computed the Known bits
3179          // for wide elements. See if element splitting results in
3180          // some subelements being zero. Only for demanded elements!
3181          for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3182            if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3183                     .isAllOnes())
3184              continue;
3185            for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3186              unsigned Elt = Scale * SrcElt + SubElt;
3187              if (DemandedElts[Elt])
3188                KnownZero.setBit(Elt);
3189            }
3190          }
3191        }
3192  
3193        // If the src element is zero/undef then all the output elements will be -
3194        // only demanded elements are guaranteed to be correct.
3195        for (unsigned i = 0; i != NumSrcElts; ++i) {
3196          if (SrcDemandedElts[i]) {
3197            if (SrcZero[i])
3198              KnownZero.setBits(i * Scale, (i + 1) * Scale);
3199            if (SrcUndef[i])
3200              KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3201          }
3202        }
3203      }
3204  
3205      // Bitcast from 'small element' src vector to 'large element' vector, we
3206      // demand all smaller source elements covered by the larger demanded element
3207      // of this vector.
3208      if ((NumSrcElts % NumElts) == 0) {
3209        unsigned Scale = NumSrcElts / NumElts;
3210        SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3211        if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3212                                       TLO, Depth + 1))
3213          return true;
3214  
3215        // If all the src elements covering an output element are zero/undef, then
3216        // the output element will be as well, assuming it was demanded.
3217        for (unsigned i = 0; i != NumElts; ++i) {
3218          if (DemandedElts[i]) {
3219            if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3220              KnownZero.setBit(i);
3221            if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3222              KnownUndef.setBit(i);
3223          }
3224        }
3225      }
3226      break;
3227    }
3228    case ISD::FREEZE: {
3229      SDValue N0 = Op.getOperand(0);
3230      if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3231                                                   /*PoisonOnly=*/false))
3232        return TLO.CombineTo(Op, N0);
3233  
3234      // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3235      // freeze(op(x, ...)) -> op(freeze(x), ...).
3236      if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3237        return TLO.CombineTo(
3238            Op, TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
3239                                TLO.DAG.getFreeze(N0.getOperand(0))));
3240      break;
3241    }
3242    case ISD::BUILD_VECTOR: {
3243      // Check all elements and simplify any unused elements with UNDEF.
3244      if (!DemandedElts.isAllOnes()) {
3245        // Don't simplify BROADCASTS.
3246        if (llvm::any_of(Op->op_values(),
3247                         [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3248          SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
3249          bool Updated = false;
3250          for (unsigned i = 0; i != NumElts; ++i) {
3251            if (!DemandedElts[i] && !Ops[i].isUndef()) {
3252              Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3253              KnownUndef.setBit(i);
3254              Updated = true;
3255            }
3256          }
3257          if (Updated)
3258            return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3259        }
3260      }
3261      for (unsigned i = 0; i != NumElts; ++i) {
3262        SDValue SrcOp = Op.getOperand(i);
3263        if (SrcOp.isUndef()) {
3264          KnownUndef.setBit(i);
3265        } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3266                   (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
3267          KnownZero.setBit(i);
3268        }
3269      }
3270      break;
3271    }
3272    case ISD::CONCAT_VECTORS: {
3273      EVT SubVT = Op.getOperand(0).getValueType();
3274      unsigned NumSubVecs = Op.getNumOperands();
3275      unsigned NumSubElts = SubVT.getVectorNumElements();
3276      for (unsigned i = 0; i != NumSubVecs; ++i) {
3277        SDValue SubOp = Op.getOperand(i);
3278        APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3279        APInt SubUndef, SubZero;
3280        if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3281                                       Depth + 1))
3282          return true;
3283        KnownUndef.insertBits(SubUndef, i * NumSubElts);
3284        KnownZero.insertBits(SubZero, i * NumSubElts);
3285      }
3286  
3287      // Attempt to avoid multi-use ops if we don't need anything from them.
3288      if (!DemandedElts.isAllOnes()) {
3289        bool FoundNewSub = false;
3290        SmallVector<SDValue, 2> DemandedSubOps;
3291        for (unsigned i = 0; i != NumSubVecs; ++i) {
3292          SDValue SubOp = Op.getOperand(i);
3293          APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3294          SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3295              SubOp, SubElts, TLO.DAG, Depth + 1);
3296          DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3297          FoundNewSub = NewSubOp ? true : FoundNewSub;
3298        }
3299        if (FoundNewSub) {
3300          SDValue NewOp =
3301              TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3302          return TLO.CombineTo(Op, NewOp);
3303        }
3304      }
3305      break;
3306    }
3307    case ISD::INSERT_SUBVECTOR: {
3308      // Demand any elements from the subvector and the remainder from the src its
3309      // inserted into.
3310      SDValue Src = Op.getOperand(0);
3311      SDValue Sub = Op.getOperand(1);
3312      uint64_t Idx = Op.getConstantOperandVal(2);
3313      unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3314      APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3315      APInt DemandedSrcElts = DemandedElts;
3316      DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3317  
3318      APInt SubUndef, SubZero;
3319      if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3320                                     Depth + 1))
3321        return true;
3322  
3323      // If none of the src operand elements are demanded, replace it with undef.
3324      if (!DemandedSrcElts && !Src.isUndef())
3325        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3326                                                 TLO.DAG.getUNDEF(VT), Sub,
3327                                                 Op.getOperand(2)));
3328  
3329      if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3330                                     TLO, Depth + 1))
3331        return true;
3332      KnownUndef.insertBits(SubUndef, Idx);
3333      KnownZero.insertBits(SubZero, Idx);
3334  
3335      // Attempt to avoid multi-use ops if we don't need anything from them.
3336      if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3337        SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3338            Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3339        SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3340            Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3341        if (NewSrc || NewSub) {
3342          NewSrc = NewSrc ? NewSrc : Src;
3343          NewSub = NewSub ? NewSub : Sub;
3344          SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3345                                          NewSub, Op.getOperand(2));
3346          return TLO.CombineTo(Op, NewOp);
3347        }
3348      }
3349      break;
3350    }
3351    case ISD::EXTRACT_SUBVECTOR: {
3352      // Offset the demanded elts by the subvector index.
3353      SDValue Src = Op.getOperand(0);
3354      if (Src.getValueType().isScalableVector())
3355        break;
3356      uint64_t Idx = Op.getConstantOperandVal(1);
3357      unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3358      APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3359  
3360      APInt SrcUndef, SrcZero;
3361      if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3362                                     Depth + 1))
3363        return true;
3364      KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3365      KnownZero = SrcZero.extractBits(NumElts, Idx);
3366  
3367      // Attempt to avoid multi-use ops if we don't need anything from them.
3368      if (!DemandedElts.isAllOnes()) {
3369        SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3370            Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3371        if (NewSrc) {
3372          SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3373                                          Op.getOperand(1));
3374          return TLO.CombineTo(Op, NewOp);
3375        }
3376      }
3377      break;
3378    }
3379    case ISD::INSERT_VECTOR_ELT: {
3380      SDValue Vec = Op.getOperand(0);
3381      SDValue Scl = Op.getOperand(1);
3382      auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3383  
3384      // For a legal, constant insertion index, if we don't need this insertion
3385      // then strip it, else remove it from the demanded elts.
3386      if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3387        unsigned Idx = CIdx->getZExtValue();
3388        if (!DemandedElts[Idx])
3389          return TLO.CombineTo(Op, Vec);
3390  
3391        APInt DemandedVecElts(DemandedElts);
3392        DemandedVecElts.clearBit(Idx);
3393        if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3394                                       KnownZero, TLO, Depth + 1))
3395          return true;
3396  
3397        KnownUndef.setBitVal(Idx, Scl.isUndef());
3398  
3399        KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3400        break;
3401      }
3402  
3403      APInt VecUndef, VecZero;
3404      if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3405                                     Depth + 1))
3406        return true;
3407      // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3408      break;
3409    }
3410    case ISD::VSELECT: {
3411      SDValue Sel = Op.getOperand(0);
3412      SDValue LHS = Op.getOperand(1);
3413      SDValue RHS = Op.getOperand(2);
3414  
3415      // Try to transform the select condition based on the current demanded
3416      // elements.
3417      APInt UndefSel, ZeroSel;
3418      if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3419                                     Depth + 1))
3420        return true;
3421  
3422      // See if we can simplify either vselect operand.
3423      APInt DemandedLHS(DemandedElts);
3424      APInt DemandedRHS(DemandedElts);
3425      APInt UndefLHS, ZeroLHS;
3426      APInt UndefRHS, ZeroRHS;
3427      if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3428                                     Depth + 1))
3429        return true;
3430      if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3431                                     Depth + 1))
3432        return true;
3433  
3434      KnownUndef = UndefLHS & UndefRHS;
3435      KnownZero = ZeroLHS & ZeroRHS;
3436  
3437      // If we know that the selected element is always zero, we don't need the
3438      // select value element.
3439      APInt DemandedSel = DemandedElts & ~KnownZero;
3440      if (DemandedSel != DemandedElts)
3441        if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3442                                       Depth + 1))
3443          return true;
3444  
3445      break;
3446    }
3447    case ISD::VECTOR_SHUFFLE: {
3448      SDValue LHS = Op.getOperand(0);
3449      SDValue RHS = Op.getOperand(1);
3450      ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3451  
3452      // Collect demanded elements from shuffle operands..
3453      APInt DemandedLHS(NumElts, 0);
3454      APInt DemandedRHS(NumElts, 0);
3455      for (unsigned i = 0; i != NumElts; ++i) {
3456        int M = ShuffleMask[i];
3457        if (M < 0 || !DemandedElts[i])
3458          continue;
3459        assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3460        if (M < (int)NumElts)
3461          DemandedLHS.setBit(M);
3462        else
3463          DemandedRHS.setBit(M - NumElts);
3464      }
3465  
3466      // See if we can simplify either shuffle operand.
3467      APInt UndefLHS, ZeroLHS;
3468      APInt UndefRHS, ZeroRHS;
3469      if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3470                                     Depth + 1))
3471        return true;
3472      if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3473                                     Depth + 1))
3474        return true;
3475  
3476      // Simplify mask using undef elements from LHS/RHS.
3477      bool Updated = false;
3478      bool IdentityLHS = true, IdentityRHS = true;
3479      SmallVector<int, 32> NewMask(ShuffleMask);
3480      for (unsigned i = 0; i != NumElts; ++i) {
3481        int &M = NewMask[i];
3482        if (M < 0)
3483          continue;
3484        if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3485            (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3486          Updated = true;
3487          M = -1;
3488        }
3489        IdentityLHS &= (M < 0) || (M == (int)i);
3490        IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3491      }
3492  
3493      // Update legal shuffle masks based on demanded elements if it won't reduce
3494      // to Identity which can cause premature removal of the shuffle mask.
3495      if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3496        SDValue LegalShuffle =
3497            buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3498        if (LegalShuffle)
3499          return TLO.CombineTo(Op, LegalShuffle);
3500      }
3501  
3502      // Propagate undef/zero elements from LHS/RHS.
3503      for (unsigned i = 0; i != NumElts; ++i) {
3504        int M = ShuffleMask[i];
3505        if (M < 0) {
3506          KnownUndef.setBit(i);
3507        } else if (M < (int)NumElts) {
3508          if (UndefLHS[M])
3509            KnownUndef.setBit(i);
3510          if (ZeroLHS[M])
3511            KnownZero.setBit(i);
3512        } else {
3513          if (UndefRHS[M - NumElts])
3514            KnownUndef.setBit(i);
3515          if (ZeroRHS[M - NumElts])
3516            KnownZero.setBit(i);
3517        }
3518      }
3519      break;
3520    }
3521    case ISD::ANY_EXTEND_VECTOR_INREG:
3522    case ISD::SIGN_EXTEND_VECTOR_INREG:
3523    case ISD::ZERO_EXTEND_VECTOR_INREG: {
3524      APInt SrcUndef, SrcZero;
3525      SDValue Src = Op.getOperand(0);
3526      unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3527      APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3528      if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3529                                     Depth + 1))
3530        return true;
3531      KnownZero = SrcZero.zextOrTrunc(NumElts);
3532      KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3533  
3534      if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3535          Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3536          DemandedSrcElts == 1) {
3537        // aext - if we just need the bottom element then we can bitcast.
3538        return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3539      }
3540  
3541      if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3542        // zext(undef) upper bits are guaranteed to be zero.
3543        if (DemandedElts.isSubsetOf(KnownUndef))
3544          return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3545        KnownUndef.clearAllBits();
3546  
3547        // zext - if we just need the bottom element then we can mask:
3548        // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3549        if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3550            Op->isOnlyUserOf(Src.getNode()) &&
3551            Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3552          SDLoc DL(Op);
3553          EVT SrcVT = Src.getValueType();
3554          EVT SrcSVT = SrcVT.getScalarType();
3555          SmallVector<SDValue> MaskElts;
3556          MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3557          MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3558          SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3559          if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3560                  ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3561            Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3562            return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3563          }
3564        }
3565      }
3566      break;
3567    }
3568  
3569    // TODO: There are more binop opcodes that could be handled here - MIN,
3570    // MAX, saturated math, etc.
3571    case ISD::ADD: {
3572      SDValue Op0 = Op.getOperand(0);
3573      SDValue Op1 = Op.getOperand(1);
3574      if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3575        APInt UndefLHS, ZeroLHS;
3576        if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3577                                       Depth + 1, /*AssumeSingleUse*/ true))
3578          return true;
3579      }
3580      [[fallthrough]];
3581    }
3582    case ISD::AVGCEILS:
3583    case ISD::AVGCEILU:
3584    case ISD::AVGFLOORS:
3585    case ISD::AVGFLOORU:
3586    case ISD::OR:
3587    case ISD::XOR:
3588    case ISD::SUB:
3589    case ISD::FADD:
3590    case ISD::FSUB:
3591    case ISD::FMUL:
3592    case ISD::FDIV:
3593    case ISD::FREM: {
3594      SDValue Op0 = Op.getOperand(0);
3595      SDValue Op1 = Op.getOperand(1);
3596  
3597      APInt UndefRHS, ZeroRHS;
3598      if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3599                                     Depth + 1))
3600        return true;
3601      APInt UndefLHS, ZeroLHS;
3602      if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3603                                     Depth + 1))
3604        return true;
3605  
3606      KnownZero = ZeroLHS & ZeroRHS;
3607      KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3608  
3609      // Attempt to avoid multi-use ops if we don't need anything from them.
3610      // TODO - use KnownUndef to relax the demandedelts?
3611      if (!DemandedElts.isAllOnes())
3612        if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3613          return true;
3614      break;
3615    }
3616    case ISD::SHL:
3617    case ISD::SRL:
3618    case ISD::SRA:
3619    case ISD::ROTL:
3620    case ISD::ROTR: {
3621      SDValue Op0 = Op.getOperand(0);
3622      SDValue Op1 = Op.getOperand(1);
3623  
3624      APInt UndefRHS, ZeroRHS;
3625      if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3626                                     Depth + 1))
3627        return true;
3628      APInt UndefLHS, ZeroLHS;
3629      if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3630                                     Depth + 1))
3631        return true;
3632  
3633      KnownZero = ZeroLHS;
3634      KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3635  
3636      // Attempt to avoid multi-use ops if we don't need anything from them.
3637      // TODO - use KnownUndef to relax the demandedelts?
3638      if (!DemandedElts.isAllOnes())
3639        if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3640          return true;
3641      break;
3642    }
3643    case ISD::MUL:
3644    case ISD::MULHU:
3645    case ISD::MULHS:
3646    case ISD::AND: {
3647      SDValue Op0 = Op.getOperand(0);
3648      SDValue Op1 = Op.getOperand(1);
3649  
3650      APInt SrcUndef, SrcZero;
3651      if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3652                                     Depth + 1))
3653        return true;
3654      // If we know that a demanded element was zero in Op1 we don't need to
3655      // demand it in Op0 - its guaranteed to be zero.
3656      APInt DemandedElts0 = DemandedElts & ~SrcZero;
3657      if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3658                                     TLO, Depth + 1))
3659        return true;
3660  
3661      KnownUndef &= DemandedElts0;
3662      KnownZero &= DemandedElts0;
3663  
3664      // If every element pair has a zero/undef then just fold to zero.
3665      // fold (and x, undef) -> 0  /  (and x, 0) -> 0
3666      // fold (mul x, undef) -> 0  /  (mul x, 0) -> 0
3667      if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3668        return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3669  
3670      // If either side has a zero element, then the result element is zero, even
3671      // if the other is an UNDEF.
3672      // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3673      // and then handle 'and' nodes with the rest of the binop opcodes.
3674      KnownZero |= SrcZero;
3675      KnownUndef &= SrcUndef;
3676      KnownUndef &= ~KnownZero;
3677  
3678      // Attempt to avoid multi-use ops if we don't need anything from them.
3679      if (!DemandedElts.isAllOnes())
3680        if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3681          return true;
3682      break;
3683    }
3684    case ISD::TRUNCATE:
3685    case ISD::SIGN_EXTEND:
3686    case ISD::ZERO_EXTEND:
3687      if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3688                                     KnownZero, TLO, Depth + 1))
3689        return true;
3690  
3691      if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3692        // zext(undef) upper bits are guaranteed to be zero.
3693        if (DemandedElts.isSubsetOf(KnownUndef))
3694          return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3695        KnownUndef.clearAllBits();
3696      }
3697      break;
3698    default: {
3699      if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3700        if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3701                                                    KnownZero, TLO, Depth))
3702          return true;
3703      } else {
3704        KnownBits Known;
3705        APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3706        if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3707                                 TLO, Depth, AssumeSingleUse))
3708          return true;
3709      }
3710      break;
3711    }
3712    }
3713    assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3714  
3715    // Constant fold all undef cases.
3716    // TODO: Handle zero cases as well.
3717    if (DemandedElts.isSubsetOf(KnownUndef))
3718      return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3719  
3720    return false;
3721  }
3722  
3723  /// Determine which of the bits specified in Mask are known to be either zero or
3724  /// one and return them in the Known.
computeKnownBitsForTargetNode(const SDValue Op,KnownBits & Known,const APInt & DemandedElts,const SelectionDAG & DAG,unsigned Depth) const3725  void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3726                                                     KnownBits &Known,
3727                                                     const APInt &DemandedElts,
3728                                                     const SelectionDAG &DAG,
3729                                                     unsigned Depth) const {
3730    assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3731            Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3732            Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3733            Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3734           "Should use MaskedValueIsZero if you don't know whether Op"
3735           " is a target node!");
3736    Known.resetAll();
3737  }
3738  
computeKnownBitsForTargetInstr(GISelKnownBits & Analysis,Register R,KnownBits & Known,const APInt & DemandedElts,const MachineRegisterInfo & MRI,unsigned Depth) const3739  void TargetLowering::computeKnownBitsForTargetInstr(
3740      GISelKnownBits &Analysis, Register R, KnownBits &Known,
3741      const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3742      unsigned Depth) const {
3743    Known.resetAll();
3744  }
3745  
computeKnownBitsForFrameIndex(const int FrameIdx,KnownBits & Known,const MachineFunction & MF) const3746  void TargetLowering::computeKnownBitsForFrameIndex(
3747    const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3748    // The low bits are known zero if the pointer is aligned.
3749    Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3750  }
3751  
computeKnownAlignForTargetInstr(GISelKnownBits & Analysis,Register R,const MachineRegisterInfo & MRI,unsigned Depth) const3752  Align TargetLowering::computeKnownAlignForTargetInstr(
3753    GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3754    unsigned Depth) const {
3755    return Align(1);
3756  }
3757  
3758  /// This method can be implemented by targets that want to expose additional
3759  /// information about sign bits to the DAG Combiner.
ComputeNumSignBitsForTargetNode(SDValue Op,const APInt &,const SelectionDAG &,unsigned Depth) const3760  unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3761                                                           const APInt &,
3762                                                           const SelectionDAG &,
3763                                                           unsigned Depth) const {
3764    assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3765            Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3766            Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3767            Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3768           "Should use ComputeNumSignBits if you don't know whether Op"
3769           " is a target node!");
3770    return 1;
3771  }
3772  
computeNumSignBitsForTargetInstr(GISelKnownBits & Analysis,Register R,const APInt & DemandedElts,const MachineRegisterInfo & MRI,unsigned Depth) const3773  unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3774    GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3775    const MachineRegisterInfo &MRI, unsigned Depth) const {
3776    return 1;
3777  }
3778  
SimplifyDemandedVectorEltsForTargetNode(SDValue Op,const APInt & DemandedElts,APInt & KnownUndef,APInt & KnownZero,TargetLoweringOpt & TLO,unsigned Depth) const3779  bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3780      SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3781      TargetLoweringOpt &TLO, unsigned Depth) const {
3782    assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3783            Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3784            Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3785            Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3786           "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3787           " is a target node!");
3788    return false;
3789  }
3790  
SimplifyDemandedBitsForTargetNode(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,KnownBits & Known,TargetLoweringOpt & TLO,unsigned Depth) const3791  bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3792      SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3793      KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3794    assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3795            Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3796            Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3797            Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3798           "Should use SimplifyDemandedBits if you don't know whether Op"
3799           " is a target node!");
3800    computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3801    return false;
3802  }
3803  
SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,SelectionDAG & DAG,unsigned Depth) const3804  SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3805      SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3806      SelectionDAG &DAG, unsigned Depth) const {
3807    assert(
3808        (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3809         Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3810         Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3811         Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3812        "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3813        " is a target node!");
3814    return SDValue();
3815  }
3816  
3817  SDValue
buildLegalVectorShuffle(EVT VT,const SDLoc & DL,SDValue N0,SDValue N1,MutableArrayRef<int> Mask,SelectionDAG & DAG) const3818  TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3819                                          SDValue N1, MutableArrayRef<int> Mask,
3820                                          SelectionDAG &DAG) const {
3821    bool LegalMask = isShuffleMaskLegal(Mask, VT);
3822    if (!LegalMask) {
3823      std::swap(N0, N1);
3824      ShuffleVectorSDNode::commuteMask(Mask);
3825      LegalMask = isShuffleMaskLegal(Mask, VT);
3826    }
3827  
3828    if (!LegalMask)
3829      return SDValue();
3830  
3831    return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3832  }
3833  
getTargetConstantFromLoad(LoadSDNode *) const3834  const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
3835    return nullptr;
3836  }
3837  
isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op,const APInt & DemandedElts,const SelectionDAG & DAG,bool PoisonOnly,unsigned Depth) const3838  bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3839      SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3840      bool PoisonOnly, unsigned Depth) const {
3841    assert(
3842        (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3843         Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3844         Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3845         Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3846        "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3847        " is a target node!");
3848  
3849    // If Op can't create undef/poison and none of its operands are undef/poison
3850    // then Op is never undef/poison.
3851    return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3852                                                /*ConsiderFlags*/ true, Depth) &&
3853           all_of(Op->ops(), [&](SDValue V) {
3854             return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
3855                                                         Depth + 1);
3856           });
3857  }
3858  
canCreateUndefOrPoisonForTargetNode(SDValue Op,const APInt & DemandedElts,const SelectionDAG & DAG,bool PoisonOnly,bool ConsiderFlags,unsigned Depth) const3859  bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
3860      SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3861      bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3862    assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3863            Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3864            Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3865            Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3866           "Should use canCreateUndefOrPoison if you don't know whether Op"
3867           " is a target node!");
3868    // Be conservative and return true.
3869    return true;
3870  }
3871  
isKnownNeverNaNForTargetNode(SDValue Op,const SelectionDAG & DAG,bool SNaN,unsigned Depth) const3872  bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3873                                                    const SelectionDAG &DAG,
3874                                                    bool SNaN,
3875                                                    unsigned Depth) const {
3876    assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3877            Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3878            Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3879            Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3880           "Should use isKnownNeverNaN if you don't know whether Op"
3881           " is a target node!");
3882    return false;
3883  }
3884  
isSplatValueForTargetNode(SDValue Op,const APInt & DemandedElts,APInt & UndefElts,const SelectionDAG & DAG,unsigned Depth) const3885  bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
3886                                                 const APInt &DemandedElts,
3887                                                 APInt &UndefElts,
3888                                                 const SelectionDAG &DAG,
3889                                                 unsigned Depth) const {
3890    assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3891            Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3892            Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3893            Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3894           "Should use isSplatValue if you don't know whether Op"
3895           " is a target node!");
3896    return false;
3897  }
3898  
3899  // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3900  // work with truncating build vectors and vectors with elements of less than
3901  // 8 bits.
isConstTrueVal(SDValue N) const3902  bool TargetLowering::isConstTrueVal(SDValue N) const {
3903    if (!N)
3904      return false;
3905  
3906    unsigned EltWidth;
3907    APInt CVal;
3908    if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3909                                                 /*AllowTruncation=*/true)) {
3910      CVal = CN->getAPIntValue();
3911      EltWidth = N.getValueType().getScalarSizeInBits();
3912    } else
3913      return false;
3914  
3915    // If this is a truncating splat, truncate the splat value.
3916    // Otherwise, we may fail to match the expected values below.
3917    if (EltWidth < CVal.getBitWidth())
3918      CVal = CVal.trunc(EltWidth);
3919  
3920    switch (getBooleanContents(N.getValueType())) {
3921    case UndefinedBooleanContent:
3922      return CVal[0];
3923    case ZeroOrOneBooleanContent:
3924      return CVal.isOne();
3925    case ZeroOrNegativeOneBooleanContent:
3926      return CVal.isAllOnes();
3927    }
3928  
3929    llvm_unreachable("Invalid boolean contents");
3930  }
3931  
isConstFalseVal(SDValue N) const3932  bool TargetLowering::isConstFalseVal(SDValue N) const {
3933    if (!N)
3934      return false;
3935  
3936    const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3937    if (!CN) {
3938      const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3939      if (!BV)
3940        return false;
3941  
3942      // Only interested in constant splats, we don't care about undef
3943      // elements in identifying boolean constants and getConstantSplatNode
3944      // returns NULL if all ops are undef;
3945      CN = BV->getConstantSplatNode();
3946      if (!CN)
3947        return false;
3948    }
3949  
3950    if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3951      return !CN->getAPIntValue()[0];
3952  
3953    return CN->isZero();
3954  }
3955  
isExtendedTrueVal(const ConstantSDNode * N,EVT VT,bool SExt) const3956  bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3957                                         bool SExt) const {
3958    if (VT == MVT::i1)
3959      return N->isOne();
3960  
3961    TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
3962    switch (Cnt) {
3963    case TargetLowering::ZeroOrOneBooleanContent:
3964      // An extended value of 1 is always true, unless its original type is i1,
3965      // in which case it will be sign extended to -1.
3966      return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3967    case TargetLowering::UndefinedBooleanContent:
3968    case TargetLowering::ZeroOrNegativeOneBooleanContent:
3969      return N->isAllOnes() && SExt;
3970    }
3971    llvm_unreachable("Unexpected enumeration.");
3972  }
3973  
3974  /// This helper function of SimplifySetCC tries to optimize the comparison when
3975  /// either operand of the SetCC node is a bitwise-and instruction.
foldSetCCWithAnd(EVT VT,SDValue N0,SDValue N1,ISD::CondCode Cond,const SDLoc & DL,DAGCombinerInfo & DCI) const3976  SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3977                                           ISD::CondCode Cond, const SDLoc &DL,
3978                                           DAGCombinerInfo &DCI) const {
3979    if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3980      std::swap(N0, N1);
3981  
3982    SelectionDAG &DAG = DCI.DAG;
3983    EVT OpVT = N0.getValueType();
3984    if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3985        (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3986      return SDValue();
3987  
3988    // (X & Y) != 0 --> zextOrTrunc(X & Y)
3989    // iff everything but LSB is known zero:
3990    if (Cond == ISD::SETNE && isNullConstant(N1) &&
3991        (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent ||
3992         getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
3993      unsigned NumEltBits = OpVT.getScalarSizeInBits();
3994      APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3995      if (DAG.MaskedValueIsZero(N0, UpperBits))
3996        return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3997    }
3998  
3999    // Try to eliminate a power-of-2 mask constant by converting to a signbit
4000    // test in a narrow type that we can truncate to with no cost. Examples:
4001    // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4002    // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4003    // TODO: This conservatively checks for type legality on the source and
4004    //       destination types. That may inhibit optimizations, but it also
4005    //       allows setcc->shift transforms that may be more beneficial.
4006    auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4007    if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4008        isTypeLegal(OpVT) && N0.hasOneUse()) {
4009      EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4010                                       AndC->getAPIntValue().getActiveBits());
4011      if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4012        SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4013        SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4014        return DAG.getSetCC(DL, VT, Trunc, Zero,
4015                            Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
4016      }
4017    }
4018  
4019    // Match these patterns in any of their permutations:
4020    // (X & Y) == Y
4021    // (X & Y) != Y
4022    SDValue X, Y;
4023    if (N0.getOperand(0) == N1) {
4024      X = N0.getOperand(1);
4025      Y = N0.getOperand(0);
4026    } else if (N0.getOperand(1) == N1) {
4027      X = N0.getOperand(0);
4028      Y = N0.getOperand(1);
4029    } else {
4030      return SDValue();
4031    }
4032  
4033    // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4034    // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4035    // its liable to create and infinite loop.
4036    SDValue Zero = DAG.getConstant(0, DL, OpVT);
4037    if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4038        DAG.isKnownToBeAPowerOfTwo(Y)) {
4039      // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4040      // Note that where Y is variable and is known to have at most one bit set
4041      // (for example, if it is Z & 1) we cannot do this; the expressions are not
4042      // equivalent when Y == 0.
4043      assert(OpVT.isInteger());
4044      Cond = ISD::getSetCCInverse(Cond, OpVT);
4045      if (DCI.isBeforeLegalizeOps() ||
4046          isCondCodeLegal(Cond, N0.getSimpleValueType()))
4047        return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4048    } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4049      // If the target supports an 'and-not' or 'and-complement' logic operation,
4050      // try to use that to make a comparison operation more efficient.
4051      // But don't do this transform if the mask is a single bit because there are
4052      // more efficient ways to deal with that case (for example, 'bt' on x86 or
4053      // 'rlwinm' on PPC).
4054  
4055      // Bail out if the compare operand that we want to turn into a zero is
4056      // already a zero (otherwise, infinite loop).
4057      if (isNullConstant(Y))
4058        return SDValue();
4059  
4060      // Transform this into: ~X & Y == 0.
4061      SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4062      SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4063      return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4064    }
4065  
4066    return SDValue();
4067  }
4068  
4069  /// There are multiple IR patterns that could be checking whether certain
4070  /// truncation of a signed number would be lossy or not. The pattern which is
4071  /// best at IR level, may not lower optimally. Thus, we want to unfold it.
4072  /// We are looking for the following pattern: (KeptBits is a constant)
4073  ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4074  /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4075  /// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
4076  /// We will unfold it into the natural trunc+sext pattern:
4077  ///   ((%x << C) a>> C) dstcond %x
4078  /// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
optimizeSetCCOfSignedTruncationCheck(EVT SCCVT,SDValue N0,SDValue N1,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL) const4079  SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4080      EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4081      const SDLoc &DL) const {
4082    // We must be comparing with a constant.
4083    ConstantSDNode *C1;
4084    if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4085      return SDValue();
4086  
4087    // N0 should be:  add %x, (1 << (KeptBits-1))
4088    if (N0->getOpcode() != ISD::ADD)
4089      return SDValue();
4090  
4091    // And we must be 'add'ing a constant.
4092    ConstantSDNode *C01;
4093    if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4094      return SDValue();
4095  
4096    SDValue X = N0->getOperand(0);
4097    EVT XVT = X.getValueType();
4098  
4099    // Validate constants ...
4100  
4101    APInt I1 = C1->getAPIntValue();
4102  
4103    ISD::CondCode NewCond;
4104    if (Cond == ISD::CondCode::SETULT) {
4105      NewCond = ISD::CondCode::SETEQ;
4106    } else if (Cond == ISD::CondCode::SETULE) {
4107      NewCond = ISD::CondCode::SETEQ;
4108      // But need to 'canonicalize' the constant.
4109      I1 += 1;
4110    } else if (Cond == ISD::CondCode::SETUGT) {
4111      NewCond = ISD::CondCode::SETNE;
4112      // But need to 'canonicalize' the constant.
4113      I1 += 1;
4114    } else if (Cond == ISD::CondCode::SETUGE) {
4115      NewCond = ISD::CondCode::SETNE;
4116    } else
4117      return SDValue();
4118  
4119    APInt I01 = C01->getAPIntValue();
4120  
4121    auto checkConstants = [&I1, &I01]() -> bool {
4122      // Both of them must be power-of-two, and the constant from setcc is bigger.
4123      return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4124    };
4125  
4126    if (checkConstants()) {
4127      // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
4128    } else {
4129      // What if we invert constants? (and the target predicate)
4130      I1.negate();
4131      I01.negate();
4132      assert(XVT.isInteger());
4133      NewCond = getSetCCInverse(NewCond, XVT);
4134      if (!checkConstants())
4135        return SDValue();
4136      // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
4137    }
4138  
4139    // They are power-of-two, so which bit is set?
4140    const unsigned KeptBits = I1.logBase2();
4141    const unsigned KeptBitsMinusOne = I01.logBase2();
4142  
4143    // Magic!
4144    if (KeptBits != (KeptBitsMinusOne + 1))
4145      return SDValue();
4146    assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4147  
4148    // We don't want to do this in every single case.
4149    SelectionDAG &DAG = DCI.DAG;
4150    if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
4151            XVT, KeptBits))
4152      return SDValue();
4153  
4154    // Unfold into:  sext_inreg(%x) cond %x
4155    // Where 'cond' will be either 'eq' or 'ne'.
4156    SDValue SExtInReg = DAG.getNode(
4157        ISD::SIGN_EXTEND_INREG, DL, XVT, X,
4158        DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4159    return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4160  }
4161  
4162  // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
optimizeSetCCByHoistingAndByConstFromLogicalShift(EVT SCCVT,SDValue N0,SDValue N1C,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL) const4163  SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4164      EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4165      DAGCombinerInfo &DCI, const SDLoc &DL) const {
4166    assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4167           "Should be a comparison with 0.");
4168    assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4169           "Valid only for [in]equality comparisons.");
4170  
4171    unsigned NewShiftOpcode;
4172    SDValue X, C, Y;
4173  
4174    SelectionDAG &DAG = DCI.DAG;
4175    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4176  
4177    // Look for '(C l>>/<< Y)'.
4178    auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
4179      // The shift should be one-use.
4180      if (!V.hasOneUse())
4181        return false;
4182      unsigned OldShiftOpcode = V.getOpcode();
4183      switch (OldShiftOpcode) {
4184      case ISD::SHL:
4185        NewShiftOpcode = ISD::SRL;
4186        break;
4187      case ISD::SRL:
4188        NewShiftOpcode = ISD::SHL;
4189        break;
4190      default:
4191        return false; // must be a logical shift.
4192      }
4193      // We should be shifting a constant.
4194      // FIXME: best to use isConstantOrConstantVector().
4195      C = V.getOperand(0);
4196      ConstantSDNode *CC =
4197          isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4198      if (!CC)
4199        return false;
4200      Y = V.getOperand(1);
4201  
4202      ConstantSDNode *XC =
4203          isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4204      return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4205          X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4206    };
4207  
4208    // LHS of comparison should be an one-use 'and'.
4209    if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4210      return SDValue();
4211  
4212    X = N0.getOperand(0);
4213    SDValue Mask = N0.getOperand(1);
4214  
4215    // 'and' is commutative!
4216    if (!Match(Mask)) {
4217      std::swap(X, Mask);
4218      if (!Match(Mask))
4219        return SDValue();
4220    }
4221  
4222    EVT VT = X.getValueType();
4223  
4224    // Produce:
4225    // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4226    SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4227    SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4228    SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4229    return T2;
4230  }
4231  
4232  /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4233  /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4234  /// handle the commuted versions of these patterns.
foldSetCCWithBinOp(EVT VT,SDValue N0,SDValue N1,ISD::CondCode Cond,const SDLoc & DL,DAGCombinerInfo & DCI) const4235  SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4236                                             ISD::CondCode Cond, const SDLoc &DL,
4237                                             DAGCombinerInfo &DCI) const {
4238    unsigned BOpcode = N0.getOpcode();
4239    assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4240           "Unexpected binop");
4241    assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4242  
4243    // (X + Y) == X --> Y == 0
4244    // (X - Y) == X --> Y == 0
4245    // (X ^ Y) == X --> Y == 0
4246    SelectionDAG &DAG = DCI.DAG;
4247    EVT OpVT = N0.getValueType();
4248    SDValue X = N0.getOperand(0);
4249    SDValue Y = N0.getOperand(1);
4250    if (X == N1)
4251      return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4252  
4253    if (Y != N1)
4254      return SDValue();
4255  
4256    // (X + Y) == Y --> X == 0
4257    // (X ^ Y) == Y --> X == 0
4258    if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4259      return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4260  
4261    // The shift would not be valid if the operands are boolean (i1).
4262    if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4263      return SDValue();
4264  
4265    // (X - Y) == Y --> X == Y << 1
4266    SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4267    SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4268    if (!DCI.isCalledByLegalizer())
4269      DCI.AddToWorklist(YShl1.getNode());
4270    return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4271  }
4272  
simplifySetCCWithCTPOP(const TargetLowering & TLI,EVT VT,SDValue N0,const APInt & C1,ISD::CondCode Cond,const SDLoc & dl,SelectionDAG & DAG)4273  static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4274                                        SDValue N0, const APInt &C1,
4275                                        ISD::CondCode Cond, const SDLoc &dl,
4276                                        SelectionDAG &DAG) {
4277    // Look through truncs that don't change the value of a ctpop.
4278    // FIXME: Add vector support? Need to be careful with setcc result type below.
4279    SDValue CTPOP = N0;
4280    if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4281        N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
4282      CTPOP = N0.getOperand(0);
4283  
4284    if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4285      return SDValue();
4286  
4287    EVT CTVT = CTPOP.getValueType();
4288    SDValue CTOp = CTPOP.getOperand(0);
4289  
4290    // Expand a power-of-2-or-zero comparison based on ctpop:
4291    // (ctpop x) u< 2 -> (x & x-1) == 0
4292    // (ctpop x) u> 1 -> (x & x-1) != 0
4293    if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4294      // Keep the CTPOP if it is a cheap vector op.
4295      if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4296        return SDValue();
4297  
4298      unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4299      if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4300        return SDValue();
4301      if (C1 == 0 && (Cond == ISD::SETULT))
4302        return SDValue(); // This is handled elsewhere.
4303  
4304      unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4305  
4306      SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4307      SDValue Result = CTOp;
4308      for (unsigned i = 0; i < Passes; i++) {
4309        SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4310        Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4311      }
4312      ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4313      return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4314    }
4315  
4316    // Expand a power-of-2 comparison based on ctpop
4317    if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4318      // Keep the CTPOP if it is cheap.
4319      if (TLI.isCtpopFast(CTVT))
4320        return SDValue();
4321  
4322      SDValue Zero = DAG.getConstant(0, dl, CTVT);
4323      SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4324      assert(CTVT.isInteger());
4325      SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4326  
4327      // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4328      // check before emitting a potentially unnecessary op.
4329      if (DAG.isKnownNeverZero(CTOp)) {
4330        // (ctpop x) == 1 --> (x & x-1) == 0
4331        // (ctpop x) != 1 --> (x & x-1) != 0
4332        SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4333        SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4334        return RHS;
4335      }
4336  
4337      // (ctpop x) == 1 --> (x ^ x-1) >  x-1
4338      // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4339      SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4340      ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4341      return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4342    }
4343  
4344    return SDValue();
4345  }
4346  
foldSetCCWithRotate(EVT VT,SDValue N0,SDValue N1,ISD::CondCode Cond,const SDLoc & dl,SelectionDAG & DAG)4347  static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4348                                     ISD::CondCode Cond, const SDLoc &dl,
4349                                     SelectionDAG &DAG) {
4350    if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4351      return SDValue();
4352  
4353    auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4354    if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4355      return SDValue();
4356  
4357    auto getRotateSource = [](SDValue X) {
4358      if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4359        return X.getOperand(0);
4360      return SDValue();
4361    };
4362  
4363    // Peek through a rotated value compared against 0 or -1:
4364    // (rot X, Y) == 0/-1 --> X == 0/-1
4365    // (rot X, Y) != 0/-1 --> X != 0/-1
4366    if (SDValue R = getRotateSource(N0))
4367      return DAG.getSetCC(dl, VT, R, N1, Cond);
4368  
4369    // Peek through an 'or' of a rotated value compared against 0:
4370    // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4371    // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4372    //
4373    // TODO: Add the 'and' with -1 sibling.
4374    // TODO: Recurse through a series of 'or' ops to find the rotate.
4375    EVT OpVT = N0.getValueType();
4376    if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4377      if (SDValue R = getRotateSource(N0.getOperand(0))) {
4378        SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4379        return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4380      }
4381      if (SDValue R = getRotateSource(N0.getOperand(1))) {
4382        SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4383        return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4384      }
4385    }
4386  
4387    return SDValue();
4388  }
4389  
foldSetCCWithFunnelShift(EVT VT,SDValue N0,SDValue N1,ISD::CondCode Cond,const SDLoc & dl,SelectionDAG & DAG)4390  static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4391                                          ISD::CondCode Cond, const SDLoc &dl,
4392                                          SelectionDAG &DAG) {
4393    // If we are testing for all-bits-clear, we might be able to do that with
4394    // less shifting since bit-order does not matter.
4395    if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4396      return SDValue();
4397  
4398    auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4399    if (!C1 || !C1->isZero())
4400      return SDValue();
4401  
4402    if (!N0.hasOneUse() ||
4403        (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4404      return SDValue();
4405  
4406    unsigned BitWidth = N0.getScalarValueSizeInBits();
4407    auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4408    if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4409      return SDValue();
4410  
4411    // Canonicalize fshr as fshl to reduce pattern-matching.
4412    unsigned ShAmt = ShAmtC->getZExtValue();
4413    if (N0.getOpcode() == ISD::FSHR)
4414      ShAmt = BitWidth - ShAmt;
4415  
4416    // Match an 'or' with a specific operand 'Other' in either commuted variant.
4417    SDValue X, Y;
4418    auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4419      if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4420        return false;
4421      if (Or.getOperand(0) == Other) {
4422        X = Or.getOperand(0);
4423        Y = Or.getOperand(1);
4424        return true;
4425      }
4426      if (Or.getOperand(1) == Other) {
4427        X = Or.getOperand(1);
4428        Y = Or.getOperand(0);
4429        return true;
4430      }
4431      return false;
4432    };
4433  
4434    EVT OpVT = N0.getValueType();
4435    EVT ShAmtVT = N0.getOperand(2).getValueType();
4436    SDValue F0 = N0.getOperand(0);
4437    SDValue F1 = N0.getOperand(1);
4438    if (matchOr(F0, F1)) {
4439      // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4440      SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4441      SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4442      SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4443      return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4444    }
4445    if (matchOr(F1, F0)) {
4446      // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4447      SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4448      SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4449      SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4450      return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4451    }
4452  
4453    return SDValue();
4454  }
4455  
4456  /// Try to simplify a setcc built with the specified operands and cc. If it is
4457  /// unable to simplify it, return a null SDValue.
SimplifySetCC(EVT VT,SDValue N0,SDValue N1,ISD::CondCode Cond,bool foldBooleans,DAGCombinerInfo & DCI,const SDLoc & dl) const4458  SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4459                                        ISD::CondCode Cond, bool foldBooleans,
4460                                        DAGCombinerInfo &DCI,
4461                                        const SDLoc &dl) const {
4462    SelectionDAG &DAG = DCI.DAG;
4463    const DataLayout &Layout = DAG.getDataLayout();
4464    EVT OpVT = N0.getValueType();
4465    AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4466  
4467    // Constant fold or commute setcc.
4468    if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4469      return Fold;
4470  
4471    bool N0ConstOrSplat =
4472        isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4473    bool N1ConstOrSplat =
4474        isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4475  
4476    // Canonicalize toward having the constant on the RHS.
4477    // TODO: Handle non-splat vector constants. All undef causes trouble.
4478    // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4479    // infinite loop here when we encounter one.
4480    ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
4481    if (N0ConstOrSplat && !N1ConstOrSplat &&
4482        (DCI.isBeforeLegalizeOps() ||
4483         isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4484      return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4485  
4486    // If we have a subtract with the same 2 non-constant operands as this setcc
4487    // -- but in reverse order -- then try to commute the operands of this setcc
4488    // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4489    // instruction on some targets.
4490    if (!N0ConstOrSplat && !N1ConstOrSplat &&
4491        (DCI.isBeforeLegalizeOps() ||
4492         isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4493        DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4494        !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4495      return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4496  
4497    if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4498      return V;
4499  
4500    if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4501      return V;
4502  
4503    if (auto *N1C = isConstOrConstSplat(N1)) {
4504      const APInt &C1 = N1C->getAPIntValue();
4505  
4506      // Optimize some CTPOP cases.
4507      if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4508        return V;
4509  
4510      // For equality to 0 of a no-wrap multiply, decompose and test each op:
4511      // X * Y == 0 --> (X == 0) || (Y == 0)
4512      // X * Y != 0 --> (X != 0) && (Y != 0)
4513      // TODO: This bails out if minsize is set, but if the target doesn't have a
4514      //       single instruction multiply for this type, it would likely be
4515      //       smaller to decompose.
4516      if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4517          N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4518          (N0->getFlags().hasNoUnsignedWrap() ||
4519           N0->getFlags().hasNoSignedWrap()) &&
4520          !Attr.hasFnAttr(Attribute::MinSize)) {
4521        SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4522        SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4523        unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4524        return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4525      }
4526  
4527      // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4528      // equality comparison, then we're just comparing whether X itself is
4529      // zero.
4530      if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4531          N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4532          llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4533        if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4534          if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4535              ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4536            if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4537              // (srl (ctlz x), 5) == 0  -> X != 0
4538              // (srl (ctlz x), 5) != 1  -> X != 0
4539              Cond = ISD::SETNE;
4540            } else {
4541              // (srl (ctlz x), 5) != 0  -> X == 0
4542              // (srl (ctlz x), 5) == 1  -> X == 0
4543              Cond = ISD::SETEQ;
4544            }
4545            SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4546            return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4547                                Cond);
4548          }
4549        }
4550      }
4551    }
4552  
4553    // FIXME: Support vectors.
4554    if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4555      const APInt &C1 = N1C->getAPIntValue();
4556  
4557      // (zext x) == C --> x == (trunc C)
4558      // (sext x) == C --> x == (trunc C)
4559      if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4560          DCI.isBeforeLegalize() && N0->hasOneUse()) {
4561        unsigned MinBits = N0.getValueSizeInBits();
4562        SDValue PreExt;
4563        bool Signed = false;
4564        if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4565          // ZExt
4566          MinBits = N0->getOperand(0).getValueSizeInBits();
4567          PreExt = N0->getOperand(0);
4568        } else if (N0->getOpcode() == ISD::AND) {
4569          // DAGCombine turns costly ZExts into ANDs
4570          if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4571            if ((C->getAPIntValue()+1).isPowerOf2()) {
4572              MinBits = C->getAPIntValue().countr_one();
4573              PreExt = N0->getOperand(0);
4574            }
4575        } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4576          // SExt
4577          MinBits = N0->getOperand(0).getValueSizeInBits();
4578          PreExt = N0->getOperand(0);
4579          Signed = true;
4580        } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4581          // ZEXTLOAD / SEXTLOAD
4582          if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4583            MinBits = LN0->getMemoryVT().getSizeInBits();
4584            PreExt = N0;
4585          } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4586            Signed = true;
4587            MinBits = LN0->getMemoryVT().getSizeInBits();
4588            PreExt = N0;
4589          }
4590        }
4591  
4592        // Figure out how many bits we need to preserve this constant.
4593        unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4594  
4595        // Make sure we're not losing bits from the constant.
4596        if (MinBits > 0 &&
4597            MinBits < C1.getBitWidth() &&
4598            MinBits >= ReqdBits) {
4599          EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4600          if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4601            // Will get folded away.
4602            SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4603            if (MinBits == 1 && C1 == 1)
4604              // Invert the condition.
4605              return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4606                                  Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4607            SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4608            return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4609          }
4610  
4611          // If truncating the setcc operands is not desirable, we can still
4612          // simplify the expression in some cases:
4613          // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4614          // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4615          // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4616          // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4617          // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4618          // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4619          SDValue TopSetCC = N0->getOperand(0);
4620          unsigned N0Opc = N0->getOpcode();
4621          bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4622          if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4623              TopSetCC.getOpcode() == ISD::SETCC &&
4624              (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4625              (isConstFalseVal(N1) ||
4626               isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4627  
4628            bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4629                           (!N1C->isZero() && Cond == ISD::SETNE);
4630  
4631            if (!Inverse)
4632              return TopSetCC;
4633  
4634            ISD::CondCode InvCond = ISD::getSetCCInverse(
4635                cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4636                TopSetCC.getOperand(0).getValueType());
4637            return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4638                                        TopSetCC.getOperand(1),
4639                                        InvCond);
4640          }
4641        }
4642      }
4643  
4644      // If the LHS is '(and load, const)', the RHS is 0, the test is for
4645      // equality or unsigned, and all 1 bits of the const are in the same
4646      // partial word, see if we can shorten the load.
4647      if (DCI.isBeforeLegalize() &&
4648          !ISD::isSignedIntSetCC(Cond) &&
4649          N0.getOpcode() == ISD::AND && C1 == 0 &&
4650          N0.getNode()->hasOneUse() &&
4651          isa<LoadSDNode>(N0.getOperand(0)) &&
4652          N0.getOperand(0).getNode()->hasOneUse() &&
4653          isa<ConstantSDNode>(N0.getOperand(1))) {
4654        auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4655        APInt bestMask;
4656        unsigned bestWidth = 0, bestOffset = 0;
4657        if (Lod->isSimple() && Lod->isUnindexed() &&
4658            (Lod->getMemoryVT().isByteSized() ||
4659             isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4660          unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4661          unsigned origWidth = N0.getValueSizeInBits();
4662          unsigned maskWidth = origWidth;
4663          // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4664          // 8 bits, but have to be careful...
4665          if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4666            origWidth = Lod->getMemoryVT().getSizeInBits();
4667          const APInt &Mask = N0.getConstantOperandAPInt(1);
4668          // Only consider power-of-2 widths (and at least one byte) as candiates
4669          // for the narrowed load.
4670          for (unsigned width = 8; width < origWidth; width *= 2) {
4671            EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4672            if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
4673              continue;
4674            APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4675            // Avoid accessing any padding here for now (we could use memWidth
4676            // instead of origWidth here otherwise).
4677            unsigned maxOffset = origWidth - width;
4678            for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4679              if (Mask.isSubsetOf(newMask)) {
4680                unsigned ptrOffset =
4681                    Layout.isLittleEndian() ? offset : memWidth - width - offset;
4682                unsigned IsFast = 0;
4683                Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4684                if (allowsMemoryAccess(
4685                        *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4686                        NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4687                    IsFast) {
4688                  bestOffset = ptrOffset / 8;
4689                  bestMask = Mask.lshr(offset);
4690                  bestWidth = width;
4691                  break;
4692                }
4693              }
4694              newMask <<= 8;
4695            }
4696            if (bestWidth)
4697              break;
4698          }
4699        }
4700        if (bestWidth) {
4701          EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4702          SDValue Ptr = Lod->getBasePtr();
4703          if (bestOffset != 0)
4704            Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4705          SDValue NewLoad =
4706              DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4707                          Lod->getPointerInfo().getWithOffset(bestOffset),
4708                          Lod->getOriginalAlign());
4709          SDValue And =
4710              DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4711                          DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4712          return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4713        }
4714      }
4715  
4716      // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4717      if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4718        unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4719  
4720        // If the comparison constant has bits in the upper part, the
4721        // zero-extended value could never match.
4722        if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
4723                                                C1.getBitWidth() - InSize))) {
4724          switch (Cond) {
4725          case ISD::SETUGT:
4726          case ISD::SETUGE:
4727          case ISD::SETEQ:
4728            return DAG.getConstant(0, dl, VT);
4729          case ISD::SETULT:
4730          case ISD::SETULE:
4731          case ISD::SETNE:
4732            return DAG.getConstant(1, dl, VT);
4733          case ISD::SETGT:
4734          case ISD::SETGE:
4735            // True if the sign bit of C1 is set.
4736            return DAG.getConstant(C1.isNegative(), dl, VT);
4737          case ISD::SETLT:
4738          case ISD::SETLE:
4739            // True if the sign bit of C1 isn't set.
4740            return DAG.getConstant(C1.isNonNegative(), dl, VT);
4741          default:
4742            break;
4743          }
4744        }
4745  
4746        // Otherwise, we can perform the comparison with the low bits.
4747        switch (Cond) {
4748        case ISD::SETEQ:
4749        case ISD::SETNE:
4750        case ISD::SETUGT:
4751        case ISD::SETUGE:
4752        case ISD::SETULT:
4753        case ISD::SETULE: {
4754          EVT newVT = N0.getOperand(0).getValueType();
4755          if (DCI.isBeforeLegalizeOps() ||
4756              (isOperationLegal(ISD::SETCC, newVT) &&
4757               isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
4758            EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4759            SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4760  
4761            SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4762                                            NewConst, Cond);
4763            return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4764          }
4765          break;
4766        }
4767        default:
4768          break; // todo, be more careful with signed comparisons
4769        }
4770      } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4771                 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4772                 !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4773                                        OpVT)) {
4774        EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4775        unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4776        EVT ExtDstTy = N0.getValueType();
4777        unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4778  
4779        // If the constant doesn't fit into the number of bits for the source of
4780        // the sign extension, it is impossible for both sides to be equal.
4781        if (C1.getSignificantBits() > ExtSrcTyBits)
4782          return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4783  
4784        assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4785               ExtDstTy != ExtSrcTy && "Unexpected types!");
4786        APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4787        SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4788                                     DAG.getConstant(Imm, dl, ExtDstTy));
4789        if (!DCI.isCalledByLegalizer())
4790          DCI.AddToWorklist(ZextOp.getNode());
4791        // Otherwise, make this a use of a zext.
4792        return DAG.getSetCC(dl, VT, ZextOp,
4793                            DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4794      } else if ((N1C->isZero() || N1C->isOne()) &&
4795                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4796        // SETCC (X), [0|1], [EQ|NE]  -> X if X is known 0/1. i1 types are
4797        // excluded as they are handled below whilst checking for foldBooleans.
4798        if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4799            isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4800            (N0.getValueType() == MVT::i1 ||
4801             getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
4802            DAG.MaskedValueIsZero(
4803                N0, APInt::getBitsSetFrom(N0.getValueSizeInBits(), 1))) {
4804          bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4805          if (TrueWhenTrue)
4806            return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4807          // Invert the condition.
4808          if (N0.getOpcode() == ISD::SETCC) {
4809            ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4810            CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
4811            if (DCI.isBeforeLegalizeOps() ||
4812                isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
4813              return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4814          }
4815        }
4816  
4817        if ((N0.getOpcode() == ISD::XOR ||
4818             (N0.getOpcode() == ISD::AND &&
4819              N0.getOperand(0).getOpcode() == ISD::XOR &&
4820              N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4821            isOneConstant(N0.getOperand(1))) {
4822          // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
4823          // can only do this if the top bits are known zero.
4824          unsigned BitWidth = N0.getValueSizeInBits();
4825          if (DAG.MaskedValueIsZero(N0,
4826                                    APInt::getHighBitsSet(BitWidth,
4827                                                          BitWidth-1))) {
4828            // Okay, get the un-inverted input value.
4829            SDValue Val;
4830            if (N0.getOpcode() == ISD::XOR) {
4831              Val = N0.getOperand(0);
4832            } else {
4833              assert(N0.getOpcode() == ISD::AND &&
4834                      N0.getOperand(0).getOpcode() == ISD::XOR);
4835              // ((X^1)&1)^1 -> X & 1
4836              Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4837                                N0.getOperand(0).getOperand(0),
4838                                N0.getOperand(1));
4839            }
4840  
4841            return DAG.getSetCC(dl, VT, Val, N1,
4842                                Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4843          }
4844        } else if (N1C->isOne()) {
4845          SDValue Op0 = N0;
4846          if (Op0.getOpcode() == ISD::TRUNCATE)
4847            Op0 = Op0.getOperand(0);
4848  
4849          if ((Op0.getOpcode() == ISD::XOR) &&
4850              Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4851              Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4852            SDValue XorLHS = Op0.getOperand(0);
4853            SDValue XorRHS = Op0.getOperand(1);
4854            // Ensure that the input setccs return an i1 type or 0/1 value.
4855            if (Op0.getValueType() == MVT::i1 ||
4856                (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
4857                        ZeroOrOneBooleanContent &&
4858                 getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
4859                          ZeroOrOneBooleanContent)) {
4860              // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4861              Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
4862              return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4863            }
4864          }
4865          if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4866            // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4867            if (Op0.getValueType().bitsGT(VT))
4868              Op0 = DAG.getNode(ISD::AND, dl, VT,
4869                            DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4870                            DAG.getConstant(1, dl, VT));
4871            else if (Op0.getValueType().bitsLT(VT))
4872              Op0 = DAG.getNode(ISD::AND, dl, VT,
4873                          DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4874                          DAG.getConstant(1, dl, VT));
4875  
4876            return DAG.getSetCC(dl, VT, Op0,
4877                                DAG.getConstant(0, dl, Op0.getValueType()),
4878                                Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4879          }
4880          if (Op0.getOpcode() == ISD::AssertZext &&
4881              cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4882            return DAG.getSetCC(dl, VT, Op0,
4883                                DAG.getConstant(0, dl, Op0.getValueType()),
4884                                Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4885        }
4886      }
4887  
4888      // Given:
4889      //   icmp eq/ne (urem %x, %y), 0
4890      // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4891      //   icmp eq/ne %x, 0
4892      if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4893          (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4894        KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4895        KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4896        if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4897          return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4898      }
4899  
4900      // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4901      //  and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4902      if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4903          N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4904          N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4905          N1C && N1C->isAllOnes()) {
4906        return DAG.getSetCC(dl, VT, N0.getOperand(0),
4907                            DAG.getConstant(0, dl, OpVT),
4908                            Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
4909      }
4910  
4911      if (SDValue V =
4912              optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4913        return V;
4914    }
4915  
4916    // These simplifications apply to splat vectors as well.
4917    // TODO: Handle more splat vector cases.
4918    if (auto *N1C = isConstOrConstSplat(N1)) {
4919      const APInt &C1 = N1C->getAPIntValue();
4920  
4921      APInt MinVal, MaxVal;
4922      unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4923      if (ISD::isSignedIntSetCC(Cond)) {
4924        MinVal = APInt::getSignedMinValue(OperandBitSize);
4925        MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4926      } else {
4927        MinVal = APInt::getMinValue(OperandBitSize);
4928        MaxVal = APInt::getMaxValue(OperandBitSize);
4929      }
4930  
4931      // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4932      if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4933        // X >= MIN --> true
4934        if (C1 == MinVal)
4935          return DAG.getBoolConstant(true, dl, VT, OpVT);
4936  
4937        if (!VT.isVector()) { // TODO: Support this for vectors.
4938          // X >= C0 --> X > (C0 - 1)
4939          APInt C = C1 - 1;
4940          ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
4941          if ((DCI.isBeforeLegalizeOps() ||
4942               isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4943              (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4944                                    isLegalICmpImmediate(C.getSExtValue())))) {
4945            return DAG.getSetCC(dl, VT, N0,
4946                                DAG.getConstant(C, dl, N1.getValueType()),
4947                                NewCC);
4948          }
4949        }
4950      }
4951  
4952      if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
4953        // X <= MAX --> true
4954        if (C1 == MaxVal)
4955          return DAG.getBoolConstant(true, dl, VT, OpVT);
4956  
4957        // X <= C0 --> X < (C0 + 1)
4958        if (!VT.isVector()) { // TODO: Support this for vectors.
4959          APInt C = C1 + 1;
4960          ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
4961          if ((DCI.isBeforeLegalizeOps() ||
4962               isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4963              (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4964                                    isLegalICmpImmediate(C.getSExtValue())))) {
4965            return DAG.getSetCC(dl, VT, N0,
4966                                DAG.getConstant(C, dl, N1.getValueType()),
4967                                NewCC);
4968          }
4969        }
4970      }
4971  
4972      if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
4973        if (C1 == MinVal)
4974          return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
4975  
4976        // TODO: Support this for vectors after legalize ops.
4977        if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4978          // Canonicalize setlt X, Max --> setne X, Max
4979          if (C1 == MaxVal)
4980            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4981  
4982          // If we have setult X, 1, turn it into seteq X, 0
4983          if (C1 == MinVal+1)
4984            return DAG.getSetCC(dl, VT, N0,
4985                                DAG.getConstant(MinVal, dl, N0.getValueType()),
4986                                ISD::SETEQ);
4987        }
4988      }
4989  
4990      if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
4991        if (C1 == MaxVal)
4992          return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
4993  
4994        // TODO: Support this for vectors after legalize ops.
4995        if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4996          // Canonicalize setgt X, Min --> setne X, Min
4997          if (C1 == MinVal)
4998            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4999  
5000          // If we have setugt X, Max-1, turn it into seteq X, Max
5001          if (C1 == MaxVal-1)
5002            return DAG.getSetCC(dl, VT, N0,
5003                                DAG.getConstant(MaxVal, dl, N0.getValueType()),
5004                                ISD::SETEQ);
5005        }
5006      }
5007  
5008      if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5009        // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
5010        if (C1.isZero())
5011          if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5012                  VT, N0, N1, Cond, DCI, dl))
5013            return CC;
5014  
5015        // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5016        // For example, when high 32-bits of i64 X are known clear:
5017        // all bits clear: (X | (Y<<32)) ==  0 --> (X | Y) ==  0
5018        // all bits set:   (X | (Y<<32)) == -1 --> (X & Y) == -1
5019        bool CmpZero = N1C->isZero();
5020        bool CmpNegOne = N1C->isAllOnes();
5021        if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5022          // Match or(lo,shl(hi,bw/2)) pattern.
5023          auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5024            unsigned EltBits = V.getScalarValueSizeInBits();
5025            if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5026              return false;
5027            SDValue LHS = V.getOperand(0);
5028            SDValue RHS = V.getOperand(1);
5029            APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5030            // Unshifted element must have zero upperbits.
5031            if (RHS.getOpcode() == ISD::SHL &&
5032                isa<ConstantSDNode>(RHS.getOperand(1)) &&
5033                RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5034                DAG.MaskedValueIsZero(LHS, HiBits)) {
5035              Lo = LHS;
5036              Hi = RHS.getOperand(0);
5037              return true;
5038            }
5039            if (LHS.getOpcode() == ISD::SHL &&
5040                isa<ConstantSDNode>(LHS.getOperand(1)) &&
5041                LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5042                DAG.MaskedValueIsZero(RHS, HiBits)) {
5043              Lo = RHS;
5044              Hi = LHS.getOperand(0);
5045              return true;
5046            }
5047            return false;
5048          };
5049  
5050          auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5051            unsigned EltBits = N0.getScalarValueSizeInBits();
5052            unsigned HalfBits = EltBits / 2;
5053            APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5054            SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5055            SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5056            SDValue NewN0 =
5057                DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5058            SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5059            return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5060          };
5061  
5062          SDValue Lo, Hi;
5063          if (IsConcat(N0, Lo, Hi))
5064            return MergeConcat(Lo, Hi);
5065  
5066          if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5067            SDValue Lo0, Lo1, Hi0, Hi1;
5068            if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5069                IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5070              return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5071                                 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5072            }
5073          }
5074        }
5075      }
5076  
5077      // If we have "setcc X, C0", check to see if we can shrink the immediate
5078      // by changing cc.
5079      // TODO: Support this for vectors after legalize ops.
5080      if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5081        // SETUGT X, SINTMAX  -> SETLT X, 0
5082        // SETUGE X, SINTMIN -> SETLT X, 0
5083        if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5084            (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5085          return DAG.getSetCC(dl, VT, N0,
5086                              DAG.getConstant(0, dl, N1.getValueType()),
5087                              ISD::SETLT);
5088  
5089        // SETULT X, SINTMIN  -> SETGT X, -1
5090        // SETULE X, SINTMAX  -> SETGT X, -1
5091        if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5092            (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5093          return DAG.getSetCC(dl, VT, N0,
5094                              DAG.getAllOnesConstant(dl, N1.getValueType()),
5095                              ISD::SETGT);
5096      }
5097    }
5098  
5099    // Back to non-vector simplifications.
5100    // TODO: Can we do these for vector splats?
5101    if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5102      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5103      const APInt &C1 = N1C->getAPIntValue();
5104      EVT ShValTy = N0.getValueType();
5105  
5106      // Fold bit comparisons when we can. This will result in an
5107      // incorrect value when boolean false is negative one, unless
5108      // the bitsize is 1 in which case the false value is the same
5109      // in practice regardless of the representation.
5110      if ((VT.getSizeInBits() == 1 ||
5111           getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
5112          (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5113          (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5114          N0.getOpcode() == ISD::AND) {
5115        if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5116          if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
5117            // Perform the xform if the AND RHS is a single bit.
5118            unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5119            if (AndRHS->getAPIntValue().isPowerOf2() &&
5120                !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
5121              return DAG.getNode(
5122                  ISD::TRUNCATE, dl, VT,
5123                  DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5124                              DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5125            }
5126          } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5127            // (X & 8) == 8  -->  (X & 8) >> 3
5128            // Perform the xform if C1 is a single bit.
5129            unsigned ShCt = C1.logBase2();
5130            if (C1.isPowerOf2() &&
5131                !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
5132              return DAG.getNode(
5133                  ISD::TRUNCATE, dl, VT,
5134                  DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5135                              DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5136            }
5137          }
5138        }
5139      }
5140  
5141      if (C1.getSignificantBits() <= 64 &&
5142          !isLegalICmpImmediate(C1.getSExtValue())) {
5143        // (X & -256) == 256 -> (X >> 8) == 1
5144        if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5145            N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5146          if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5147            const APInt &AndRHSC = AndRHS->getAPIntValue();
5148            if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
5149              unsigned ShiftBits = AndRHSC.countr_zero();
5150              if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5151                SDValue Shift = DAG.getNode(
5152                    ISD::SRL, dl, ShValTy, N0.getOperand(0),
5153                    DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5154                SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5155                return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5156              }
5157            }
5158          }
5159        } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5160                   Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5161          bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5162          // X <  0x100000000 -> (X >> 32) <  1
5163          // X >= 0x100000000 -> (X >> 32) >= 1
5164          // X <= 0x0ffffffff -> (X >> 32) <  1
5165          // X >  0x0ffffffff -> (X >> 32) >= 1
5166          unsigned ShiftBits;
5167          APInt NewC = C1;
5168          ISD::CondCode NewCond = Cond;
5169          if (AdjOne) {
5170            ShiftBits = C1.countr_one();
5171            NewC = NewC + 1;
5172            NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5173          } else {
5174            ShiftBits = C1.countr_zero();
5175          }
5176          NewC.lshrInPlace(ShiftBits);
5177          if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5178              isLegalICmpImmediate(NewC.getSExtValue()) &&
5179              !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5180            SDValue Shift =
5181                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5182                            DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5183            SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5184            return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5185          }
5186        }
5187      }
5188    }
5189  
5190    if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5191      auto *CFP = cast<ConstantFPSDNode>(N1);
5192      assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5193  
5194      // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
5195      // constant if knowing that the operand is non-nan is enough.  We prefer to
5196      // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5197      // materialize 0.0.
5198      if (Cond == ISD::SETO || Cond == ISD::SETUO)
5199        return DAG.getSetCC(dl, VT, N0, N0, Cond);
5200  
5201      // setcc (fneg x), C -> setcc swap(pred) x, -C
5202      if (N0.getOpcode() == ISD::FNEG) {
5203        ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
5204        if (DCI.isBeforeLegalizeOps() ||
5205            isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5206          SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5207          return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5208        }
5209      }
5210  
5211      // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5212      if (isOperationLegalOrCustom(ISD::IS_FPCLASS, N0.getValueType()) &&
5213          !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5214        bool IsFabs = N0.getOpcode() == ISD::FABS;
5215        SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5216        if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5217          FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5218                                               : (IsFabs ? fcInf : fcPosInf);
5219          if (Cond == ISD::SETUEQ)
5220            Flag |= fcNan;
5221          return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5222                             DAG.getTargetConstant(Flag, dl, MVT::i32));
5223        }
5224      }
5225  
5226      // If the condition is not legal, see if we can find an equivalent one
5227      // which is legal.
5228      if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
5229        // If the comparison was an awkward floating-point == or != and one of
5230        // the comparison operands is infinity or negative infinity, convert the
5231        // condition to a less-awkward <= or >=.
5232        if (CFP->getValueAPF().isInfinity()) {
5233          bool IsNegInf = CFP->getValueAPF().isNegative();
5234          ISD::CondCode NewCond = ISD::SETCC_INVALID;
5235          switch (Cond) {
5236          case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5237          case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5238          case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5239          case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5240          default: break;
5241          }
5242          if (NewCond != ISD::SETCC_INVALID &&
5243              isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5244            return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5245        }
5246      }
5247    }
5248  
5249    if (N0 == N1) {
5250      // The sext(setcc()) => setcc() optimization relies on the appropriate
5251      // constant being emitted.
5252      assert(!N0.getValueType().isInteger() &&
5253             "Integer types should be handled by FoldSetCC");
5254  
5255      bool EqTrue = ISD::isTrueWhenEqual(Cond);
5256      unsigned UOF = ISD::getUnorderedFlavor(Cond);
5257      if (UOF == 2) // FP operators that are undefined on NaNs.
5258        return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5259      if (UOF == unsigned(EqTrue))
5260        return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5261      // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
5262      // if it is not already.
5263      ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5264      if (NewCond != Cond &&
5265          (DCI.isBeforeLegalizeOps() ||
5266                              isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5267        return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5268    }
5269  
5270    // ~X > ~Y --> Y > X
5271    // ~X < ~Y --> Y < X
5272    // ~X < C --> X > ~C
5273    // ~X > C --> X < ~C
5274    if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5275        N0.getValueType().isInteger()) {
5276      if (isBitwiseNot(N0)) {
5277        if (isBitwiseNot(N1))
5278          return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5279  
5280        if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
5281            !DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(0))) {
5282          SDValue Not = DAG.getNOT(dl, N1, OpVT);
5283          return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5284        }
5285      }
5286    }
5287  
5288    if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5289        N0.getValueType().isInteger()) {
5290      if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5291          N0.getOpcode() == ISD::XOR) {
5292        // Simplify (X+Y) == (X+Z) -->  Y == Z
5293        if (N0.getOpcode() == N1.getOpcode()) {
5294          if (N0.getOperand(0) == N1.getOperand(0))
5295            return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5296          if (N0.getOperand(1) == N1.getOperand(1))
5297            return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5298          if (isCommutativeBinOp(N0.getOpcode())) {
5299            // If X op Y == Y op X, try other combinations.
5300            if (N0.getOperand(0) == N1.getOperand(1))
5301              return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5302                                  Cond);
5303            if (N0.getOperand(1) == N1.getOperand(0))
5304              return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5305                                  Cond);
5306          }
5307        }
5308  
5309        // If RHS is a legal immediate value for a compare instruction, we need
5310        // to be careful about increasing register pressure needlessly.
5311        bool LegalRHSImm = false;
5312  
5313        if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5314          if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5315            // Turn (X+C1) == C2 --> X == C2-C1
5316            if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5317              return DAG.getSetCC(
5318                  dl, VT, N0.getOperand(0),
5319                  DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5320                                  dl, N0.getValueType()),
5321                  Cond);
5322  
5323            // Turn (X^C1) == C2 --> X == C1^C2
5324            if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5325              return DAG.getSetCC(
5326                  dl, VT, N0.getOperand(0),
5327                  DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5328                                  dl, N0.getValueType()),
5329                  Cond);
5330          }
5331  
5332          // Turn (C1-X) == C2 --> X == C1-C2
5333          if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5334            if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5335              return DAG.getSetCC(
5336                  dl, VT, N0.getOperand(1),
5337                  DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5338                                  dl, N0.getValueType()),
5339                  Cond);
5340  
5341          // Could RHSC fold directly into a compare?
5342          if (RHSC->getValueType(0).getSizeInBits() <= 64)
5343            LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5344        }
5345  
5346        // (X+Y) == X --> Y == 0 and similar folds.
5347        // Don't do this if X is an immediate that can fold into a cmp
5348        // instruction and X+Y has other uses. It could be an induction variable
5349        // chain, and the transform would increase register pressure.
5350        if (!LegalRHSImm || N0.hasOneUse())
5351          if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5352            return V;
5353      }
5354  
5355      if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5356          N1.getOpcode() == ISD::XOR)
5357        if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5358          return V;
5359  
5360      if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5361        return V;
5362    }
5363  
5364    // Fold remainder of division by a constant.
5365    if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5366        N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5367      // When division is cheap or optimizing for minimum size,
5368      // fall through to DIVREM creation by skipping this fold.
5369      if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5370        if (N0.getOpcode() == ISD::UREM) {
5371          if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5372            return Folded;
5373        } else if (N0.getOpcode() == ISD::SREM) {
5374          if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5375            return Folded;
5376        }
5377      }
5378    }
5379  
5380    // Fold away ALL boolean setcc's.
5381    if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5382      SDValue Temp;
5383      switch (Cond) {
5384      default: llvm_unreachable("Unknown integer setcc!");
5385      case ISD::SETEQ:  // X == Y  -> ~(X^Y)
5386        Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5387        N0 = DAG.getNOT(dl, Temp, OpVT);
5388        if (!DCI.isCalledByLegalizer())
5389          DCI.AddToWorklist(Temp.getNode());
5390        break;
5391      case ISD::SETNE:  // X != Y   -->  (X^Y)
5392        N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5393        break;
5394      case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
5395      case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
5396        Temp = DAG.getNOT(dl, N0, OpVT);
5397        N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5398        if (!DCI.isCalledByLegalizer())
5399          DCI.AddToWorklist(Temp.getNode());
5400        break;
5401      case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
5402      case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
5403        Temp = DAG.getNOT(dl, N1, OpVT);
5404        N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5405        if (!DCI.isCalledByLegalizer())
5406          DCI.AddToWorklist(Temp.getNode());
5407        break;
5408      case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
5409      case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
5410        Temp = DAG.getNOT(dl, N0, OpVT);
5411        N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5412        if (!DCI.isCalledByLegalizer())
5413          DCI.AddToWorklist(Temp.getNode());
5414        break;
5415      case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
5416      case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
5417        Temp = DAG.getNOT(dl, N1, OpVT);
5418        N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5419        break;
5420      }
5421      if (VT.getScalarType() != MVT::i1) {
5422        if (!DCI.isCalledByLegalizer())
5423          DCI.AddToWorklist(N0.getNode());
5424        // FIXME: If running after legalize, we probably can't do this.
5425        ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
5426        N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5427      }
5428      return N0;
5429    }
5430  
5431    // Could not fold it.
5432    return SDValue();
5433  }
5434  
5435  /// Returns true (and the GlobalValue and the offset) if the node is a
5436  /// GlobalAddress + offset.
isGAPlusOffset(SDNode * WN,const GlobalValue * & GA,int64_t & Offset) const5437  bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
5438                                      int64_t &Offset) const {
5439  
5440    SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5441  
5442    if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5443      GA = GASD->getGlobal();
5444      Offset += GASD->getOffset();
5445      return true;
5446    }
5447  
5448    if (N->getOpcode() == ISD::ADD) {
5449      SDValue N1 = N->getOperand(0);
5450      SDValue N2 = N->getOperand(1);
5451      if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5452        if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5453          Offset += V->getSExtValue();
5454          return true;
5455        }
5456      } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5457        if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5458          Offset += V->getSExtValue();
5459          return true;
5460        }
5461      }
5462    }
5463  
5464    return false;
5465  }
5466  
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const5467  SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5468                                            DAGCombinerInfo &DCI) const {
5469    // Default implementation: no optimization.
5470    return SDValue();
5471  }
5472  
5473  //===----------------------------------------------------------------------===//
5474  //  Inline Assembler Implementation Methods
5475  //===----------------------------------------------------------------------===//
5476  
5477  TargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const5478  TargetLowering::getConstraintType(StringRef Constraint) const {
5479    unsigned S = Constraint.size();
5480  
5481    if (S == 1) {
5482      switch (Constraint[0]) {
5483      default: break;
5484      case 'r':
5485        return C_RegisterClass;
5486      case 'm': // memory
5487      case 'o': // offsetable
5488      case 'V': // not offsetable
5489        return C_Memory;
5490      case 'p': // Address.
5491        return C_Address;
5492      case 'n': // Simple Integer
5493      case 'E': // Floating Point Constant
5494      case 'F': // Floating Point Constant
5495        return C_Immediate;
5496      case 'i': // Simple Integer or Relocatable Constant
5497      case 's': // Relocatable Constant
5498      case 'X': // Allow ANY value.
5499      case 'I': // Target registers.
5500      case 'J':
5501      case 'K':
5502      case 'L':
5503      case 'M':
5504      case 'N':
5505      case 'O':
5506      case 'P':
5507      case '<':
5508      case '>':
5509        return C_Other;
5510      }
5511    }
5512  
5513    if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5514      if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5515        return C_Memory;
5516      return C_Register;
5517    }
5518    return C_Unknown;
5519  }
5520  
5521  /// Try to replace an X constraint, which matches anything, with another that
5522  /// has more specific requirements based on the type of the corresponding
5523  /// operand.
LowerXConstraint(EVT ConstraintVT) const5524  const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5525    if (ConstraintVT.isInteger())
5526      return "r";
5527    if (ConstraintVT.isFloatingPoint())
5528      return "f"; // works for many targets
5529    return nullptr;
5530  }
5531  
LowerAsmOutputForConstraint(SDValue & Chain,SDValue & Glue,const SDLoc & DL,const AsmOperandInfo & OpInfo,SelectionDAG & DAG) const5532  SDValue TargetLowering::LowerAsmOutputForConstraint(
5533      SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5534      const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5535    return SDValue();
5536  }
5537  
5538  /// Lower the specified operand into the Ops vector.
5539  /// If it is invalid, don't add anything to Ops.
LowerAsmOperandForConstraint(SDValue Op,StringRef Constraint,std::vector<SDValue> & Ops,SelectionDAG & DAG) const5540  void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5541                                                    StringRef Constraint,
5542                                                    std::vector<SDValue> &Ops,
5543                                                    SelectionDAG &DAG) const {
5544  
5545    if (Constraint.size() > 1)
5546      return;
5547  
5548    char ConstraintLetter = Constraint[0];
5549    switch (ConstraintLetter) {
5550    default: break;
5551    case 'X':    // Allows any operand
5552    case 'i':    // Simple Integer or Relocatable Constant
5553    case 'n':    // Simple Integer
5554    case 's': {  // Relocatable Constant
5555  
5556      ConstantSDNode *C;
5557      uint64_t Offset = 0;
5558  
5559      // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5560      // etc., since getelementpointer is variadic. We can't use
5561      // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5562      // while in this case the GA may be furthest from the root node which is
5563      // likely an ISD::ADD.
5564      while (true) {
5565        if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5566          // gcc prints these as sign extended.  Sign extend value to 64 bits
5567          // now; without this it would get ZExt'd later in
5568          // ScheduleDAGSDNodes::EmitNode, which is very generic.
5569          bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5570          BooleanContent BCont = getBooleanContents(MVT::i64);
5571          ISD::NodeType ExtOpc =
5572              IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5573          int64_t ExtVal =
5574              ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5575          Ops.push_back(
5576              DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5577          return;
5578        }
5579        if (ConstraintLetter != 'n') {
5580          if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5581            Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5582                                                     GA->getValueType(0),
5583                                                     Offset + GA->getOffset()));
5584            return;
5585          }
5586          if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5587            Ops.push_back(DAG.getTargetBlockAddress(
5588                BA->getBlockAddress(), BA->getValueType(0),
5589                Offset + BA->getOffset(), BA->getTargetFlags()));
5590            return;
5591          }
5592          if (isa<BasicBlockSDNode>(Op)) {
5593            Ops.push_back(Op);
5594            return;
5595          }
5596        }
5597        const unsigned OpCode = Op.getOpcode();
5598        if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5599          if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5600            Op = Op.getOperand(1);
5601          // Subtraction is not commutative.
5602          else if (OpCode == ISD::ADD &&
5603                   (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5604            Op = Op.getOperand(0);
5605          else
5606            return;
5607          Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5608          continue;
5609        }
5610        return;
5611      }
5612      break;
5613    }
5614    }
5615  }
5616  
CollectTargetIntrinsicOperands(const CallInst & I,SmallVectorImpl<SDValue> & Ops,SelectionDAG & DAG) const5617  void TargetLowering::CollectTargetIntrinsicOperands(
5618      const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5619  }
5620  
5621  std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * RI,StringRef Constraint,MVT VT) const5622  TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5623                                               StringRef Constraint,
5624                                               MVT VT) const {
5625    if (!Constraint.starts_with("{"))
5626      return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5627    assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5628  
5629    // Remove the braces from around the name.
5630    StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5631  
5632    std::pair<unsigned, const TargetRegisterClass *> R =
5633        std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5634  
5635    // Figure out which register class contains this reg.
5636    for (const TargetRegisterClass *RC : RI->regclasses()) {
5637      // If none of the value types for this register class are valid, we
5638      // can't use it.  For example, 64-bit reg classes on 32-bit targets.
5639      if (!isLegalRC(*RI, *RC))
5640        continue;
5641  
5642      for (const MCPhysReg &PR : *RC) {
5643        if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5644          std::pair<unsigned, const TargetRegisterClass *> S =
5645              std::make_pair(PR, RC);
5646  
5647          // If this register class has the requested value type, return it,
5648          // otherwise keep searching and return the first class found
5649          // if no other is found which explicitly has the requested type.
5650          if (RI->isTypeLegalForClass(*RC, VT))
5651            return S;
5652          if (!R.second)
5653            R = S;
5654        }
5655      }
5656    }
5657  
5658    return R;
5659  }
5660  
5661  //===----------------------------------------------------------------------===//
5662  // Constraint Selection.
5663  
5664  /// Return true of this is an input operand that is a matching constraint like
5665  /// "4".
isMatchingInputConstraint() const5666  bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5667    assert(!ConstraintCode.empty() && "No known constraint!");
5668    return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5669  }
5670  
5671  /// If this is an input matching constraint, this method returns the output
5672  /// operand it matches.
getMatchedOperand() const5673  unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5674    assert(!ConstraintCode.empty() && "No known constraint!");
5675    return atoi(ConstraintCode.c_str());
5676  }
5677  
5678  /// Split up the constraint string from the inline assembly value into the
5679  /// specific constraints and their prefixes, and also tie in the associated
5680  /// operand values.
5681  /// If this returns an empty vector, and if the constraint string itself
5682  /// isn't empty, there was an error parsing.
5683  TargetLowering::AsmOperandInfoVector
ParseConstraints(const DataLayout & DL,const TargetRegisterInfo * TRI,const CallBase & Call) const5684  TargetLowering::ParseConstraints(const DataLayout &DL,
5685                                   const TargetRegisterInfo *TRI,
5686                                   const CallBase &Call) const {
5687    /// Information about all of the constraints.
5688    AsmOperandInfoVector ConstraintOperands;
5689    const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5690    unsigned maCount = 0; // Largest number of multiple alternative constraints.
5691  
5692    // Do a prepass over the constraints, canonicalizing them, and building up the
5693    // ConstraintOperands list.
5694    unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5695    unsigned ResNo = 0; // ResNo - The result number of the next output.
5696    unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5697  
5698    for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5699      ConstraintOperands.emplace_back(std::move(CI));
5700      AsmOperandInfo &OpInfo = ConstraintOperands.back();
5701  
5702      // Update multiple alternative constraint count.
5703      if (OpInfo.multipleAlternatives.size() > maCount)
5704        maCount = OpInfo.multipleAlternatives.size();
5705  
5706      OpInfo.ConstraintVT = MVT::Other;
5707  
5708      // Compute the value type for each operand.
5709      switch (OpInfo.Type) {
5710      case InlineAsm::isOutput:
5711        // Indirect outputs just consume an argument.
5712        if (OpInfo.isIndirect) {
5713          OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5714          break;
5715        }
5716  
5717        // The return value of the call is this value.  As such, there is no
5718        // corresponding argument.
5719        assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5720        if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5721          OpInfo.ConstraintVT =
5722              getSimpleValueType(DL, STy->getElementType(ResNo));
5723        } else {
5724          assert(ResNo == 0 && "Asm only has one result!");
5725          OpInfo.ConstraintVT =
5726              getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5727        }
5728        ++ResNo;
5729        break;
5730      case InlineAsm::isInput:
5731        OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5732        break;
5733      case InlineAsm::isLabel:
5734        OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5735        ++LabelNo;
5736        continue;
5737      case InlineAsm::isClobber:
5738        // Nothing to do.
5739        break;
5740      }
5741  
5742      if (OpInfo.CallOperandVal) {
5743        llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5744        if (OpInfo.isIndirect) {
5745          OpTy = Call.getParamElementType(ArgNo);
5746          assert(OpTy && "Indirect operand must have elementtype attribute");
5747        }
5748  
5749        // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5750        if (StructType *STy = dyn_cast<StructType>(OpTy))
5751          if (STy->getNumElements() == 1)
5752            OpTy = STy->getElementType(0);
5753  
5754        // If OpTy is not a single value, it may be a struct/union that we
5755        // can tile with integers.
5756        if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5757          unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5758          switch (BitSize) {
5759          default: break;
5760          case 1:
5761          case 8:
5762          case 16:
5763          case 32:
5764          case 64:
5765          case 128:
5766            OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5767            break;
5768          }
5769        }
5770  
5771        EVT VT = getAsmOperandValueType(DL, OpTy, true);
5772        OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5773        ArgNo++;
5774      }
5775    }
5776  
5777    // If we have multiple alternative constraints, select the best alternative.
5778    if (!ConstraintOperands.empty()) {
5779      if (maCount) {
5780        unsigned bestMAIndex = 0;
5781        int bestWeight = -1;
5782        // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
5783        int weight = -1;
5784        unsigned maIndex;
5785        // Compute the sums of the weights for each alternative, keeping track
5786        // of the best (highest weight) one so far.
5787        for (maIndex = 0; maIndex < maCount; ++maIndex) {
5788          int weightSum = 0;
5789          for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5790               cIndex != eIndex; ++cIndex) {
5791            AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5792            if (OpInfo.Type == InlineAsm::isClobber)
5793              continue;
5794  
5795            // If this is an output operand with a matching input operand,
5796            // look up the matching input. If their types mismatch, e.g. one
5797            // is an integer, the other is floating point, or their sizes are
5798            // different, flag it as an maCantMatch.
5799            if (OpInfo.hasMatchingInput()) {
5800              AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5801              if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5802                if ((OpInfo.ConstraintVT.isInteger() !=
5803                     Input.ConstraintVT.isInteger()) ||
5804                    (OpInfo.ConstraintVT.getSizeInBits() !=
5805                     Input.ConstraintVT.getSizeInBits())) {
5806                  weightSum = -1; // Can't match.
5807                  break;
5808                }
5809              }
5810            }
5811            weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5812            if (weight == -1) {
5813              weightSum = -1;
5814              break;
5815            }
5816            weightSum += weight;
5817          }
5818          // Update best.
5819          if (weightSum > bestWeight) {
5820            bestWeight = weightSum;
5821            bestMAIndex = maIndex;
5822          }
5823        }
5824  
5825        // Now select chosen alternative in each constraint.
5826        for (AsmOperandInfo &cInfo : ConstraintOperands)
5827          if (cInfo.Type != InlineAsm::isClobber)
5828            cInfo.selectAlternative(bestMAIndex);
5829      }
5830    }
5831  
5832    // Check and hook up tied operands, choose constraint code to use.
5833    for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5834         cIndex != eIndex; ++cIndex) {
5835      AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5836  
5837      // If this is an output operand with a matching input operand, look up the
5838      // matching input. If their types mismatch, e.g. one is an integer, the
5839      // other is floating point, or their sizes are different, flag it as an
5840      // error.
5841      if (OpInfo.hasMatchingInput()) {
5842        AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5843  
5844        if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5845          std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5846              getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5847                                           OpInfo.ConstraintVT);
5848          std::pair<unsigned, const TargetRegisterClass *> InputRC =
5849              getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5850                                           Input.ConstraintVT);
5851          if ((OpInfo.ConstraintVT.isInteger() !=
5852               Input.ConstraintVT.isInteger()) ||
5853              (MatchRC.second != InputRC.second)) {
5854            report_fatal_error("Unsupported asm: input constraint"
5855                               " with a matching output constraint of"
5856                               " incompatible type!");
5857          }
5858        }
5859      }
5860    }
5861  
5862    return ConstraintOperands;
5863  }
5864  
5865  /// Return a number indicating our preference for chosing a type of constraint
5866  /// over another, for the purpose of sorting them. Immediates are almost always
5867  /// preferrable (when they can be emitted). A higher return value means a
5868  /// stronger preference for one constraint type relative to another.
5869  /// FIXME: We should prefer registers over memory but doing so may lead to
5870  /// unrecoverable register exhaustion later.
5871  /// https://github.com/llvm/llvm-project/issues/20571
getConstraintPiority(TargetLowering::ConstraintType CT)5872  static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
5873    switch (CT) {
5874    case TargetLowering::C_Immediate:
5875    case TargetLowering::C_Other:
5876      return 4;
5877    case TargetLowering::C_Memory:
5878    case TargetLowering::C_Address:
5879      return 3;
5880    case TargetLowering::C_RegisterClass:
5881      return 2;
5882    case TargetLowering::C_Register:
5883      return 1;
5884    case TargetLowering::C_Unknown:
5885      return 0;
5886    }
5887    llvm_unreachable("Invalid constraint type");
5888  }
5889  
5890  /// Examine constraint type and operand type and determine a weight value.
5891  /// This object must already have been set up with the operand type
5892  /// and the current alternative constraint selected.
5893  TargetLowering::ConstraintWeight
getMultipleConstraintMatchWeight(AsmOperandInfo & info,int maIndex) const5894    TargetLowering::getMultipleConstraintMatchWeight(
5895      AsmOperandInfo &info, int maIndex) const {
5896    InlineAsm::ConstraintCodeVector *rCodes;
5897    if (maIndex >= (int)info.multipleAlternatives.size())
5898      rCodes = &info.Codes;
5899    else
5900      rCodes = &info.multipleAlternatives[maIndex].Codes;
5901    ConstraintWeight BestWeight = CW_Invalid;
5902  
5903    // Loop over the options, keeping track of the most general one.
5904    for (const std::string &rCode : *rCodes) {
5905      ConstraintWeight weight =
5906          getSingleConstraintMatchWeight(info, rCode.c_str());
5907      if (weight > BestWeight)
5908        BestWeight = weight;
5909    }
5910  
5911    return BestWeight;
5912  }
5913  
5914  /// Examine constraint type and operand type and determine a weight value.
5915  /// This object must already have been set up with the operand type
5916  /// and the current alternative constraint selected.
5917  TargetLowering::ConstraintWeight
getSingleConstraintMatchWeight(AsmOperandInfo & info,const char * constraint) const5918    TargetLowering::getSingleConstraintMatchWeight(
5919      AsmOperandInfo &info, const char *constraint) const {
5920    ConstraintWeight weight = CW_Invalid;
5921    Value *CallOperandVal = info.CallOperandVal;
5922      // If we don't have a value, we can't do a match,
5923      // but allow it at the lowest weight.
5924    if (!CallOperandVal)
5925      return CW_Default;
5926    // Look at the constraint type.
5927    switch (*constraint) {
5928      case 'i': // immediate integer.
5929      case 'n': // immediate integer with a known value.
5930        if (isa<ConstantInt>(CallOperandVal))
5931          weight = CW_Constant;
5932        break;
5933      case 's': // non-explicit intregal immediate.
5934        if (isa<GlobalValue>(CallOperandVal))
5935          weight = CW_Constant;
5936        break;
5937      case 'E': // immediate float if host format.
5938      case 'F': // immediate float.
5939        if (isa<ConstantFP>(CallOperandVal))
5940          weight = CW_Constant;
5941        break;
5942      case '<': // memory operand with autodecrement.
5943      case '>': // memory operand with autoincrement.
5944      case 'm': // memory operand.
5945      case 'o': // offsettable memory operand
5946      case 'V': // non-offsettable memory operand
5947        weight = CW_Memory;
5948        break;
5949      case 'r': // general register.
5950      case 'g': // general register, memory operand or immediate integer.
5951                // note: Clang converts "g" to "imr".
5952        if (CallOperandVal->getType()->isIntegerTy())
5953          weight = CW_Register;
5954        break;
5955      case 'X': // any operand.
5956    default:
5957      weight = CW_Default;
5958      break;
5959    }
5960    return weight;
5961  }
5962  
5963  /// If there are multiple different constraints that we could pick for this
5964  /// operand (e.g. "imr") try to pick the 'best' one.
5965  /// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
5966  /// into seven classes:
5967  ///    Register      -> one specific register
5968  ///    RegisterClass -> a group of regs
5969  ///    Memory        -> memory
5970  ///    Address       -> a symbolic memory reference
5971  ///    Immediate     -> immediate values
5972  ///    Other         -> magic values (such as "Flag Output Operands")
5973  ///    Unknown       -> something we don't recognize yet and can't handle
5974  /// Ideally, we would pick the most specific constraint possible: if we have
5975  /// something that fits into a register, we would pick it.  The problem here
5976  /// is that if we have something that could either be in a register or in
5977  /// memory that use of the register could cause selection of *other*
5978  /// operands to fail: they might only succeed if we pick memory.  Because of
5979  /// this the heuristic we use is:
5980  ///
5981  ///  1) If there is an 'other' constraint, and if the operand is valid for
5982  ///     that constraint, use it.  This makes us take advantage of 'i'
5983  ///     constraints when available.
5984  ///  2) Otherwise, pick the most general constraint present.  This prefers
5985  ///     'm' over 'r', for example.
5986  ///
getConstraintPreferences(TargetLowering::AsmOperandInfo & OpInfo) const5987  TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
5988      TargetLowering::AsmOperandInfo &OpInfo) const {
5989    ConstraintGroup Ret;
5990  
5991    Ret.reserve(OpInfo.Codes.size());
5992    for (StringRef Code : OpInfo.Codes) {
5993      TargetLowering::ConstraintType CType = getConstraintType(Code);
5994  
5995      // Indirect 'other' or 'immediate' constraints are not allowed.
5996      if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
5997                                 CType == TargetLowering::C_Register ||
5998                                 CType == TargetLowering::C_RegisterClass))
5999        continue;
6000  
6001      // Things with matching constraints can only be registers, per gcc
6002      // documentation.  This mainly affects "g" constraints.
6003      if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6004        continue;
6005  
6006      Ret.emplace_back(Code, CType);
6007    }
6008  
6009    std::stable_sort(
6010        Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
6011          return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6012        });
6013  
6014    return Ret;
6015  }
6016  
6017  /// If we have an immediate, see if we can lower it. Return true if we can,
6018  /// false otherwise.
lowerImmediateIfPossible(TargetLowering::ConstraintPair & P,SDValue Op,SelectionDAG * DAG,const TargetLowering & TLI)6019  static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
6020                                       SDValue Op, SelectionDAG *DAG,
6021                                       const TargetLowering &TLI) {
6022  
6023    assert((P.second == TargetLowering::C_Other ||
6024            P.second == TargetLowering::C_Immediate) &&
6025           "need immediate or other");
6026  
6027    if (!Op.getNode())
6028      return false;
6029  
6030    std::vector<SDValue> ResultOps;
6031    TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6032    return !ResultOps.empty();
6033  }
6034  
6035  /// Determines the constraint code and constraint type to use for the specific
6036  /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
ComputeConstraintToUse(AsmOperandInfo & OpInfo,SDValue Op,SelectionDAG * DAG) const6037  void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
6038                                              SDValue Op,
6039                                              SelectionDAG *DAG) const {
6040    assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6041  
6042    // Single-letter constraints ('r') are very common.
6043    if (OpInfo.Codes.size() == 1) {
6044      OpInfo.ConstraintCode = OpInfo.Codes[0];
6045      OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6046    } else {
6047      ConstraintGroup G = getConstraintPreferences(OpInfo);
6048      if (G.empty())
6049        return;
6050  
6051      unsigned BestIdx = 0;
6052      for (const unsigned E = G.size();
6053           BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6054                           G[BestIdx].second == TargetLowering::C_Immediate);
6055           ++BestIdx) {
6056        if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6057          break;
6058        // If we're out of constraints, just pick the first one.
6059        if (BestIdx + 1 == E) {
6060          BestIdx = 0;
6061          break;
6062        }
6063      }
6064  
6065      OpInfo.ConstraintCode = G[BestIdx].first;
6066      OpInfo.ConstraintType = G[BestIdx].second;
6067    }
6068  
6069    // 'X' matches anything.
6070    if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6071      // Constants are handled elsewhere.  For Functions, the type here is the
6072      // type of the result, which is not what we want to look at; leave them
6073      // alone.
6074      Value *v = OpInfo.CallOperandVal;
6075      if (isa<ConstantInt>(v) || isa<Function>(v)) {
6076        return;
6077      }
6078  
6079      if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6080        OpInfo.ConstraintCode = "i";
6081        return;
6082      }
6083  
6084      // Otherwise, try to resolve it to something we know about by looking at
6085      // the actual operand type.
6086      if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6087        OpInfo.ConstraintCode = Repl;
6088        OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6089      }
6090    }
6091  }
6092  
6093  /// Given an exact SDIV by a constant, create a multiplication
6094  /// with the multiplicative inverse of the constant.
6095  /// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
BuildExactSDIV(const TargetLowering & TLI,SDNode * N,const SDLoc & dl,SelectionDAG & DAG,SmallVectorImpl<SDNode * > & Created)6096  static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6097                                const SDLoc &dl, SelectionDAG &DAG,
6098                                SmallVectorImpl<SDNode *> &Created) {
6099    SDValue Op0 = N->getOperand(0);
6100    SDValue Op1 = N->getOperand(1);
6101    EVT VT = N->getValueType(0);
6102    EVT SVT = VT.getScalarType();
6103    EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6104    EVT ShSVT = ShVT.getScalarType();
6105  
6106    bool UseSRA = false;
6107    SmallVector<SDValue, 16> Shifts, Factors;
6108  
6109    auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6110      if (C->isZero())
6111        return false;
6112      APInt Divisor = C->getAPIntValue();
6113      unsigned Shift = Divisor.countr_zero();
6114      if (Shift) {
6115        Divisor.ashrInPlace(Shift);
6116        UseSRA = true;
6117      }
6118      APInt Factor = Divisor.multiplicativeInverse();
6119      Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6120      Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6121      return true;
6122    };
6123  
6124    // Collect all magic values from the build vector.
6125    if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6126      return SDValue();
6127  
6128    SDValue Shift, Factor;
6129    if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6130      Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6131      Factor = DAG.getBuildVector(VT, dl, Factors);
6132    } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6133      assert(Shifts.size() == 1 && Factors.size() == 1 &&
6134             "Expected matchUnaryPredicate to return one element for scalable "
6135             "vectors");
6136      Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6137      Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6138    } else {
6139      assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6140      Shift = Shifts[0];
6141      Factor = Factors[0];
6142    }
6143  
6144    SDValue Res = Op0;
6145    if (UseSRA) {
6146      SDNodeFlags Flags;
6147      Flags.setExact(true);
6148      Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
6149      Created.push_back(Res.getNode());
6150    }
6151  
6152    return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6153  }
6154  
6155  /// Given an exact UDIV by a constant, create a multiplication
6156  /// with the multiplicative inverse of the constant.
6157  /// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
BuildExactUDIV(const TargetLowering & TLI,SDNode * N,const SDLoc & dl,SelectionDAG & DAG,SmallVectorImpl<SDNode * > & Created)6158  static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
6159                                const SDLoc &dl, SelectionDAG &DAG,
6160                                SmallVectorImpl<SDNode *> &Created) {
6161    EVT VT = N->getValueType(0);
6162    EVT SVT = VT.getScalarType();
6163    EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6164    EVT ShSVT = ShVT.getScalarType();
6165  
6166    bool UseSRL = false;
6167    SmallVector<SDValue, 16> Shifts, Factors;
6168  
6169    auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6170      if (C->isZero())
6171        return false;
6172      APInt Divisor = C->getAPIntValue();
6173      unsigned Shift = Divisor.countr_zero();
6174      if (Shift) {
6175        Divisor.lshrInPlace(Shift);
6176        UseSRL = true;
6177      }
6178      // Calculate the multiplicative inverse modulo BW.
6179      APInt Factor = Divisor.multiplicativeInverse();
6180      Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6181      Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6182      return true;
6183    };
6184  
6185    SDValue Op1 = N->getOperand(1);
6186  
6187    // Collect all magic values from the build vector.
6188    if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6189      return SDValue();
6190  
6191    SDValue Shift, Factor;
6192    if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6193      Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6194      Factor = DAG.getBuildVector(VT, dl, Factors);
6195    } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6196      assert(Shifts.size() == 1 && Factors.size() == 1 &&
6197             "Expected matchUnaryPredicate to return one element for scalable "
6198             "vectors");
6199      Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6200      Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6201    } else {
6202      assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6203      Shift = Shifts[0];
6204      Factor = Factors[0];
6205    }
6206  
6207    SDValue Res = N->getOperand(0);
6208    if (UseSRL) {
6209      SDNodeFlags Flags;
6210      Flags.setExact(true);
6211      Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, Flags);
6212      Created.push_back(Res.getNode());
6213    }
6214  
6215    return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6216  }
6217  
BuildSDIVPow2(SDNode * N,const APInt & Divisor,SelectionDAG & DAG,SmallVectorImpl<SDNode * > & Created) const6218  SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
6219                                SelectionDAG &DAG,
6220                                SmallVectorImpl<SDNode *> &Created) const {
6221    AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6222    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6223    if (TLI.isIntDivCheap(N->getValueType(0), Attr))
6224      return SDValue(N, 0); // Lower SDIV as SDIV
6225    return SDValue();
6226  }
6227  
6228  SDValue
BuildSREMPow2(SDNode * N,const APInt & Divisor,SelectionDAG & DAG,SmallVectorImpl<SDNode * > & Created) const6229  TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
6230                                SelectionDAG &DAG,
6231                                SmallVectorImpl<SDNode *> &Created) const {
6232    AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6233    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6234    if (TLI.isIntDivCheap(N->getValueType(0), Attr))
6235      return SDValue(N, 0); // Lower SREM as SREM
6236    return SDValue();
6237  }
6238  
6239  /// Build sdiv by power-of-2 with conditional move instructions
6240  /// Ref: "Hacker's Delight" by Henry Warren 10-1
6241  /// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6242  ///   bgez x, label
6243  ///   add x, x, 2**k-1
6244  /// label:
6245  ///   sra res, x, k
6246  ///   neg res, res (when the divisor is negative)
buildSDIVPow2WithCMov(SDNode * N,const APInt & Divisor,SelectionDAG & DAG,SmallVectorImpl<SDNode * > & Created) const6247  SDValue TargetLowering::buildSDIVPow2WithCMov(
6248      SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6249      SmallVectorImpl<SDNode *> &Created) const {
6250    unsigned Lg2 = Divisor.countr_zero();
6251    EVT VT = N->getValueType(0);
6252  
6253    SDLoc DL(N);
6254    SDValue N0 = N->getOperand(0);
6255    SDValue Zero = DAG.getConstant(0, DL, VT);
6256    APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6257    SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6258  
6259    // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6260    EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6261    SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6262    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6263    SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6264  
6265    Created.push_back(Cmp.getNode());
6266    Created.push_back(Add.getNode());
6267    Created.push_back(CMov.getNode());
6268  
6269    // Divide by pow2.
6270    SDValue SRA =
6271        DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6272  
6273    // If we're dividing by a positive value, we're done.  Otherwise, we must
6274    // negate the result.
6275    if (Divisor.isNonNegative())
6276      return SRA;
6277  
6278    Created.push_back(SRA.getNode());
6279    return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6280  }
6281  
6282  /// Given an ISD::SDIV node expressing a divide by constant,
6283  /// return a DAG expression to select that will generate the same value by
6284  /// multiplying by a magic number.
6285  /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
BuildSDIV(SDNode * N,SelectionDAG & DAG,bool IsAfterLegalization,SmallVectorImpl<SDNode * > & Created) const6286  SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6287                                    bool IsAfterLegalization,
6288                                    SmallVectorImpl<SDNode *> &Created) const {
6289    SDLoc dl(N);
6290    EVT VT = N->getValueType(0);
6291    EVT SVT = VT.getScalarType();
6292    EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6293    EVT ShSVT = ShVT.getScalarType();
6294    unsigned EltBits = VT.getScalarSizeInBits();
6295    EVT MulVT;
6296  
6297    // Check to see if we can do this.
6298    // FIXME: We should be more aggressive here.
6299    if (!isTypeLegal(VT)) {
6300      // Limit this to simple scalars for now.
6301      if (VT.isVector() || !VT.isSimple())
6302        return SDValue();
6303  
6304      // If this type will be promoted to a large enough type with a legal
6305      // multiply operation, we can go ahead and do this transform.
6306      if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6307        return SDValue();
6308  
6309      MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6310      if (MulVT.getSizeInBits() < (2 * EltBits) ||
6311          !isOperationLegal(ISD::MUL, MulVT))
6312        return SDValue();
6313    }
6314  
6315    // If the sdiv has an 'exact' bit we can use a simpler lowering.
6316    if (N->getFlags().hasExact())
6317      return BuildExactSDIV(*this, N, dl, DAG, Created);
6318  
6319    SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6320  
6321    auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6322      if (C->isZero())
6323        return false;
6324  
6325      const APInt &Divisor = C->getAPIntValue();
6326      SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
6327      int NumeratorFactor = 0;
6328      int ShiftMask = -1;
6329  
6330      if (Divisor.isOne() || Divisor.isAllOnes()) {
6331        // If d is +1/-1, we just multiply the numerator by +1/-1.
6332        NumeratorFactor = Divisor.getSExtValue();
6333        magics.Magic = 0;
6334        magics.ShiftAmount = 0;
6335        ShiftMask = 0;
6336      } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6337        // If d > 0 and m < 0, add the numerator.
6338        NumeratorFactor = 1;
6339      } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6340        // If d < 0 and m > 0, subtract the numerator.
6341        NumeratorFactor = -1;
6342      }
6343  
6344      MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6345      Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
6346      Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6347      ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
6348      return true;
6349    };
6350  
6351    SDValue N0 = N->getOperand(0);
6352    SDValue N1 = N->getOperand(1);
6353  
6354    // Collect the shifts / magic values from each element.
6355    if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6356      return SDValue();
6357  
6358    SDValue MagicFactor, Factor, Shift, ShiftMask;
6359    if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6360      MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6361      Factor = DAG.getBuildVector(VT, dl, Factors);
6362      Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6363      ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6364    } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6365      assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6366             Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6367             "Expected matchUnaryPredicate to return one element for scalable "
6368             "vectors");
6369      MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6370      Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6371      Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6372      ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6373    } else {
6374      assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6375      MagicFactor = MagicFactors[0];
6376      Factor = Factors[0];
6377      Shift = Shifts[0];
6378      ShiftMask = ShiftMasks[0];
6379    }
6380  
6381    // Multiply the numerator (operand 0) by the magic value.
6382    // FIXME: We should support doing a MUL in a wider type.
6383    auto GetMULHS = [&](SDValue X, SDValue Y) {
6384      // If the type isn't legal, use a wider mul of the type calculated
6385      // earlier.
6386      if (!isTypeLegal(VT)) {
6387        X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6388        Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6389        Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6390        Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6391                        DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6392        return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6393      }
6394  
6395      if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6396        return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6397      if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6398        SDValue LoHi =
6399            DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6400        return SDValue(LoHi.getNode(), 1);
6401      }
6402      // If type twice as wide legal, widen and use a mul plus a shift.
6403      unsigned Size = VT.getScalarSizeInBits();
6404      EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6405      if (VT.isVector())
6406        WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6407                                  VT.getVectorElementCount());
6408      if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6409        X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6410        Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6411        Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6412        Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6413                        DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6414        return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6415      }
6416      return SDValue();
6417    };
6418  
6419    SDValue Q = GetMULHS(N0, MagicFactor);
6420    if (!Q)
6421      return SDValue();
6422  
6423    Created.push_back(Q.getNode());
6424  
6425    // (Optionally) Add/subtract the numerator using Factor.
6426    Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6427    Created.push_back(Factor.getNode());
6428    Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6429    Created.push_back(Q.getNode());
6430  
6431    // Shift right algebraic by shift value.
6432    Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6433    Created.push_back(Q.getNode());
6434  
6435    // Extract the sign bit, mask it and add it to the quotient.
6436    SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6437    SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6438    Created.push_back(T.getNode());
6439    T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6440    Created.push_back(T.getNode());
6441    return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6442  }
6443  
6444  /// Given an ISD::UDIV node expressing a divide by constant,
6445  /// return a DAG expression to select that will generate the same value by
6446  /// multiplying by a magic number.
6447  /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
BuildUDIV(SDNode * N,SelectionDAG & DAG,bool IsAfterLegalization,SmallVectorImpl<SDNode * > & Created) const6448  SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6449                                    bool IsAfterLegalization,
6450                                    SmallVectorImpl<SDNode *> &Created) const {
6451    SDLoc dl(N);
6452    EVT VT = N->getValueType(0);
6453    EVT SVT = VT.getScalarType();
6454    EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6455    EVT ShSVT = ShVT.getScalarType();
6456    unsigned EltBits = VT.getScalarSizeInBits();
6457    EVT MulVT;
6458  
6459    // Check to see if we can do this.
6460    // FIXME: We should be more aggressive here.
6461    if (!isTypeLegal(VT)) {
6462      // Limit this to simple scalars for now.
6463      if (VT.isVector() || !VT.isSimple())
6464        return SDValue();
6465  
6466      // If this type will be promoted to a large enough type with a legal
6467      // multiply operation, we can go ahead and do this transform.
6468      if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6469        return SDValue();
6470  
6471      MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6472      if (MulVT.getSizeInBits() < (2 * EltBits) ||
6473          !isOperationLegal(ISD::MUL, MulVT))
6474        return SDValue();
6475    }
6476  
6477    // If the udiv has an 'exact' bit we can use a simpler lowering.
6478    if (N->getFlags().hasExact())
6479      return BuildExactUDIV(*this, N, dl, DAG, Created);
6480  
6481    SDValue N0 = N->getOperand(0);
6482    SDValue N1 = N->getOperand(1);
6483  
6484    // Try to use leading zeros of the dividend to reduce the multiplier and
6485    // avoid expensive fixups.
6486    unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6487  
6488    bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6489    SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6490  
6491    auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6492      if (C->isZero())
6493        return false;
6494      const APInt& Divisor = C->getAPIntValue();
6495  
6496      SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6497  
6498      // Magic algorithm doesn't work for division by 1. We need to emit a select
6499      // at the end.
6500      if (Divisor.isOne()) {
6501        PreShift = PostShift = DAG.getUNDEF(ShSVT);
6502        MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6503      } else {
6504        UnsignedDivisionByConstantInfo magics =
6505            UnsignedDivisionByConstantInfo::get(
6506                Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6507  
6508        MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6509  
6510        assert(magics.PreShift < Divisor.getBitWidth() &&
6511               "We shouldn't generate an undefined shift!");
6512        assert(magics.PostShift < Divisor.getBitWidth() &&
6513               "We shouldn't generate an undefined shift!");
6514        assert((!magics.IsAdd || magics.PreShift == 0) &&
6515               "Unexpected pre-shift");
6516        PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6517        PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6518        NPQFactor = DAG.getConstant(
6519            magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6520                         : APInt::getZero(EltBits),
6521            dl, SVT);
6522        UseNPQ |= magics.IsAdd;
6523        UsePreShift |= magics.PreShift != 0;
6524        UsePostShift |= magics.PostShift != 0;
6525      }
6526  
6527      PreShifts.push_back(PreShift);
6528      MagicFactors.push_back(MagicFactor);
6529      NPQFactors.push_back(NPQFactor);
6530      PostShifts.push_back(PostShift);
6531      return true;
6532    };
6533  
6534    // Collect the shifts/magic values from each element.
6535    if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6536      return SDValue();
6537  
6538    SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6539    if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6540      PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6541      MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6542      NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6543      PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6544    } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6545      assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6546             NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6547             "Expected matchUnaryPredicate to return one for scalable vectors");
6548      PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6549      MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6550      NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6551      PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6552    } else {
6553      assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6554      PreShift = PreShifts[0];
6555      MagicFactor = MagicFactors[0];
6556      PostShift = PostShifts[0];
6557    }
6558  
6559    SDValue Q = N0;
6560    if (UsePreShift) {
6561      Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6562      Created.push_back(Q.getNode());
6563    }
6564  
6565    // FIXME: We should support doing a MUL in a wider type.
6566    auto GetMULHU = [&](SDValue X, SDValue Y) {
6567      // If the type isn't legal, use a wider mul of the type calculated
6568      // earlier.
6569      if (!isTypeLegal(VT)) {
6570        X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6571        Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6572        Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6573        Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6574                        DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6575        return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6576      }
6577  
6578      if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6579        return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6580      if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6581        SDValue LoHi =
6582            DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6583        return SDValue(LoHi.getNode(), 1);
6584      }
6585      // If type twice as wide legal, widen and use a mul plus a shift.
6586      unsigned Size = VT.getScalarSizeInBits();
6587      EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6588      if (VT.isVector())
6589        WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6590                                  VT.getVectorElementCount());
6591      if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6592        X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6593        Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6594        Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6595        Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6596                        DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6597        return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6598      }
6599      return SDValue(); // No mulhu or equivalent
6600    };
6601  
6602    // Multiply the numerator (operand 0) by the magic value.
6603    Q = GetMULHU(Q, MagicFactor);
6604    if (!Q)
6605      return SDValue();
6606  
6607    Created.push_back(Q.getNode());
6608  
6609    if (UseNPQ) {
6610      SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6611      Created.push_back(NPQ.getNode());
6612  
6613      // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6614      // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6615      if (VT.isVector())
6616        NPQ = GetMULHU(NPQ, NPQFactor);
6617      else
6618        NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6619  
6620      Created.push_back(NPQ.getNode());
6621  
6622      Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6623      Created.push_back(Q.getNode());
6624    }
6625  
6626    if (UsePostShift) {
6627      Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6628      Created.push_back(Q.getNode());
6629    }
6630  
6631    EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6632  
6633    SDValue One = DAG.getConstant(1, dl, VT);
6634    SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6635    return DAG.getSelect(dl, VT, IsOne, N0, Q);
6636  }
6637  
6638  /// If all values in Values that *don't* match the predicate are same 'splat'
6639  /// value, then replace all values with that splat value.
6640  /// Else, if AlternativeReplacement was provided, then replace all values that
6641  /// do match predicate with AlternativeReplacement value.
6642  static void
turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,std::function<bool (SDValue)> Predicate,SDValue AlternativeReplacement=SDValue ())6643  turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6644                            std::function<bool(SDValue)> Predicate,
6645                            SDValue AlternativeReplacement = SDValue()) {
6646    SDValue Replacement;
6647    // Is there a value for which the Predicate does *NOT* match? What is it?
6648    auto SplatValue = llvm::find_if_not(Values, Predicate);
6649    if (SplatValue != Values.end()) {
6650      // Does Values consist only of SplatValue's and values matching Predicate?
6651      if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6652            return Value == *SplatValue || Predicate(Value);
6653          })) // Then we shall replace values matching predicate with SplatValue.
6654        Replacement = *SplatValue;
6655    }
6656    if (!Replacement) {
6657      // Oops, we did not find the "baseline" splat value.
6658      if (!AlternativeReplacement)
6659        return; // Nothing to do.
6660      // Let's replace with provided value then.
6661      Replacement = AlternativeReplacement;
6662    }
6663    std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6664  }
6665  
6666  /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6667  /// where the divisor is constant and the comparison target is zero,
6668  /// return a DAG expression that will generate the same comparison result
6669  /// using only multiplications, additions and shifts/rotations.
6670  /// Ref: "Hacker's Delight" 10-17.
buildUREMEqFold(EVT SETCCVT,SDValue REMNode,SDValue CompTargetNode,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL) const6671  SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6672                                          SDValue CompTargetNode,
6673                                          ISD::CondCode Cond,
6674                                          DAGCombinerInfo &DCI,
6675                                          const SDLoc &DL) const {
6676    SmallVector<SDNode *, 5> Built;
6677    if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6678                                           DCI, DL, Built)) {
6679      for (SDNode *N : Built)
6680        DCI.AddToWorklist(N);
6681      return Folded;
6682    }
6683  
6684    return SDValue();
6685  }
6686  
6687  SDValue
prepareUREMEqFold(EVT SETCCVT,SDValue REMNode,SDValue CompTargetNode,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL,SmallVectorImpl<SDNode * > & Created) const6688  TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6689                                    SDValue CompTargetNode, ISD::CondCode Cond,
6690                                    DAGCombinerInfo &DCI, const SDLoc &DL,
6691                                    SmallVectorImpl<SDNode *> &Created) const {
6692    // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6693    // - D must be constant, with D = D0 * 2^K where D0 is odd
6694    // - P is the multiplicative inverse of D0 modulo 2^W
6695    // - Q = floor(((2^W) - 1) / D)
6696    // where W is the width of the common type of N and D.
6697    assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6698           "Only applicable for (in)equality comparisons.");
6699  
6700    SelectionDAG &DAG = DCI.DAG;
6701  
6702    EVT VT = REMNode.getValueType();
6703    EVT SVT = VT.getScalarType();
6704    EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6705    EVT ShSVT = ShVT.getScalarType();
6706  
6707    // If MUL is unavailable, we cannot proceed in any case.
6708    if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6709      return SDValue();
6710  
6711    bool ComparingWithAllZeros = true;
6712    bool AllComparisonsWithNonZerosAreTautological = true;
6713    bool HadTautologicalLanes = false;
6714    bool AllLanesAreTautological = true;
6715    bool HadEvenDivisor = false;
6716    bool AllDivisorsArePowerOfTwo = true;
6717    bool HadTautologicalInvertedLanes = false;
6718    SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6719  
6720    auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6721      // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6722      if (CDiv->isZero())
6723        return false;
6724  
6725      const APInt &D = CDiv->getAPIntValue();
6726      const APInt &Cmp = CCmp->getAPIntValue();
6727  
6728      ComparingWithAllZeros &= Cmp.isZero();
6729  
6730      // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6731      // if C2 is not less than C1, the comparison is always false.
6732      // But we will only be able to produce the comparison that will give the
6733      // opposive tautological answer. So this lane would need to be fixed up.
6734      bool TautologicalInvertedLane = D.ule(Cmp);
6735      HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6736  
6737      // If all lanes are tautological (either all divisors are ones, or divisor
6738      // is not greater than the constant we are comparing with),
6739      // we will prefer to avoid the fold.
6740      bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6741      HadTautologicalLanes |= TautologicalLane;
6742      AllLanesAreTautological &= TautologicalLane;
6743  
6744      // If we are comparing with non-zero, we need'll need  to subtract said
6745      // comparison value from the LHS. But there is no point in doing that if
6746      // every lane where we are comparing with non-zero is tautological..
6747      if (!Cmp.isZero())
6748        AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6749  
6750      // Decompose D into D0 * 2^K
6751      unsigned K = D.countr_zero();
6752      assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6753      APInt D0 = D.lshr(K);
6754  
6755      // D is even if it has trailing zeros.
6756      HadEvenDivisor |= (K != 0);
6757      // D is a power-of-two if D0 is one.
6758      // If all divisors are power-of-two, we will prefer to avoid the fold.
6759      AllDivisorsArePowerOfTwo &= D0.isOne();
6760  
6761      // P = inv(D0, 2^W)
6762      // 2^W requires W + 1 bits, so we have to extend and then truncate.
6763      unsigned W = D.getBitWidth();
6764      APInt P = D0.multiplicativeInverse();
6765      assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6766  
6767      // Q = floor((2^W - 1) u/ D)
6768      // R = ((2^W - 1) u% D)
6769      APInt Q, R;
6770      APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
6771  
6772      // If we are comparing with zero, then that comparison constant is okay,
6773      // else it may need to be one less than that.
6774      if (Cmp.ugt(R))
6775        Q -= 1;
6776  
6777      assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6778             "We are expecting that K is always less than all-ones for ShSVT");
6779  
6780      // If the lane is tautological the result can be constant-folded.
6781      if (TautologicalLane) {
6782        // Set P and K amount to a bogus values so we can try to splat them.
6783        P = 0;
6784        K = -1;
6785        // And ensure that comparison constant is tautological,
6786        // it will always compare true/false.
6787        Q = -1;
6788      }
6789  
6790      PAmts.push_back(DAG.getConstant(P, DL, SVT));
6791      KAmts.push_back(
6792          DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6793      QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6794      return true;
6795    };
6796  
6797    SDValue N = REMNode.getOperand(0);
6798    SDValue D = REMNode.getOperand(1);
6799  
6800    // Collect the values from each element.
6801    if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6802      return SDValue();
6803  
6804    // If all lanes are tautological, the result can be constant-folded.
6805    if (AllLanesAreTautological)
6806      return SDValue();
6807  
6808    // If this is a urem by a powers-of-two, avoid the fold since it can be
6809    // best implemented as a bit test.
6810    if (AllDivisorsArePowerOfTwo)
6811      return SDValue();
6812  
6813    SDValue PVal, KVal, QVal;
6814    if (D.getOpcode() == ISD::BUILD_VECTOR) {
6815      if (HadTautologicalLanes) {
6816        // Try to turn PAmts into a splat, since we don't care about the values
6817        // that are currently '0'. If we can't, just keep '0'`s.
6818        turnVectorIntoSplatVector(PAmts, isNullConstant);
6819        // Try to turn KAmts into a splat, since we don't care about the values
6820        // that are currently '-1'. If we can't, change them to '0'`s.
6821        turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
6822                                  DAG.getConstant(0, DL, ShSVT));
6823      }
6824  
6825      PVal = DAG.getBuildVector(VT, DL, PAmts);
6826      KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6827      QVal = DAG.getBuildVector(VT, DL, QAmts);
6828    } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6829      assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6830             "Expected matchBinaryPredicate to return one element for "
6831             "SPLAT_VECTORs");
6832      PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6833      KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6834      QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6835    } else {
6836      PVal = PAmts[0];
6837      KVal = KAmts[0];
6838      QVal = QAmts[0];
6839    }
6840  
6841    if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6842      if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6843        return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6844      assert(CompTargetNode.getValueType() == N.getValueType() &&
6845             "Expecting that the types on LHS and RHS of comparisons match.");
6846      N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6847    }
6848  
6849    // (mul N, P)
6850    SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6851    Created.push_back(Op0.getNode());
6852  
6853    // Rotate right only if any divisor was even. We avoid rotates for all-odd
6854    // divisors as a performance improvement, since rotating by 0 is a no-op.
6855    if (HadEvenDivisor) {
6856      // We need ROTR to do this.
6857      if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6858        return SDValue();
6859      // UREM: (rotr (mul N, P), K)
6860      Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6861      Created.push_back(Op0.getNode());
6862    }
6863  
6864    // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6865    SDValue NewCC =
6866        DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6867                     ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6868    if (!HadTautologicalInvertedLanes)
6869      return NewCC;
6870  
6871    // If any lanes previously compared always-false, the NewCC will give
6872    // always-true result for them, so we need to fixup those lanes.
6873    // Or the other way around for inequality predicate.
6874    assert(VT.isVector() && "Can/should only get here for vectors.");
6875    Created.push_back(NewCC.getNode());
6876  
6877    // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6878    // if C2 is not less than C1, the comparison is always false.
6879    // But we have produced the comparison that will give the
6880    // opposive tautological answer. So these lanes would need to be fixed up.
6881    SDValue TautologicalInvertedChannels =
6882        DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6883    Created.push_back(TautologicalInvertedChannels.getNode());
6884  
6885    // NOTE: we avoid letting illegal types through even if we're before legalize
6886    // ops – legalization has a hard time producing good code for this.
6887    if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6888      // If we have a vector select, let's replace the comparison results in the
6889      // affected lanes with the correct tautological result.
6890      SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6891                                                DL, SETCCVT, SETCCVT);
6892      return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6893                         Replacement, NewCC);
6894    }
6895  
6896    // Else, we can just invert the comparison result in the appropriate lanes.
6897    //
6898    // NOTE: see the note above VSELECT above.
6899    if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6900      return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6901                         TautologicalInvertedChannels);
6902  
6903    return SDValue(); // Don't know how to lower.
6904  }
6905  
6906  /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6907  /// where the divisor is constant and the comparison target is zero,
6908  /// return a DAG expression that will generate the same comparison result
6909  /// using only multiplications, additions and shifts/rotations.
6910  /// Ref: "Hacker's Delight" 10-17.
buildSREMEqFold(EVT SETCCVT,SDValue REMNode,SDValue CompTargetNode,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL) const6911  SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6912                                          SDValue CompTargetNode,
6913                                          ISD::CondCode Cond,
6914                                          DAGCombinerInfo &DCI,
6915                                          const SDLoc &DL) const {
6916    SmallVector<SDNode *, 7> Built;
6917    if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6918                                           DCI, DL, Built)) {
6919      assert(Built.size() <= 7 && "Max size prediction failed.");
6920      for (SDNode *N : Built)
6921        DCI.AddToWorklist(N);
6922      return Folded;
6923    }
6924  
6925    return SDValue();
6926  }
6927  
6928  SDValue
prepareSREMEqFold(EVT SETCCVT,SDValue REMNode,SDValue CompTargetNode,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL,SmallVectorImpl<SDNode * > & Created) const6929  TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6930                                    SDValue CompTargetNode, ISD::CondCode Cond,
6931                                    DAGCombinerInfo &DCI, const SDLoc &DL,
6932                                    SmallVectorImpl<SDNode *> &Created) const {
6933    // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6934    // Fold:
6935    //   (seteq/ne (srem N, D), 0)
6936    // To:
6937    //   (setule/ugt (rotr (add (mul N, P), A), K), Q)
6938    //
6939    // - D must be constant, with D = D0 * 2^K where D0 is odd
6940    // - P is the multiplicative inverse of D0 modulo 2^W
6941    // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6942    // - Q = floor((2 * A) / (2^K))
6943    // where W is the width of the common type of N and D.
6944    //
6945    // When D is a power of two (and thus D0 is 1), the normal
6946    // formula for A and Q don't apply, because the derivation
6947    // depends on D not dividing 2^(W-1), and thus theorem ZRS
6948    // does not apply. This specifically fails when N = INT_MIN.
6949    //
6950    // Instead, for power-of-two D, we use:
6951    // - A = 2^(W-1)
6952    // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
6953    // - Q = 2^(W-K) - 1
6954    // |-> Test that the top K bits are zero after rotation
6955    assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6956           "Only applicable for (in)equality comparisons.");
6957  
6958    SelectionDAG &DAG = DCI.DAG;
6959  
6960    EVT VT = REMNode.getValueType();
6961    EVT SVT = VT.getScalarType();
6962    EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6963    EVT ShSVT = ShVT.getScalarType();
6964  
6965    // If we are after ops legalization, and MUL is unavailable, we can not
6966    // proceed.
6967    if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6968      return SDValue();
6969  
6970    // TODO: Could support comparing with non-zero too.
6971    ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
6972    if (!CompTarget || !CompTarget->isZero())
6973      return SDValue();
6974  
6975    bool HadIntMinDivisor = false;
6976    bool HadOneDivisor = false;
6977    bool AllDivisorsAreOnes = true;
6978    bool HadEvenDivisor = false;
6979    bool NeedToApplyOffset = false;
6980    bool AllDivisorsArePowerOfTwo = true;
6981    SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
6982  
6983    auto BuildSREMPattern = [&](ConstantSDNode *C) {
6984      // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6985      if (C->isZero())
6986        return false;
6987  
6988      // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
6989  
6990      // WARNING: this fold is only valid for positive divisors!
6991      APInt D = C->getAPIntValue();
6992      if (D.isNegative())
6993        D.negate(); //  `rem %X, -C` is equivalent to `rem %X, C`
6994  
6995      HadIntMinDivisor |= D.isMinSignedValue();
6996  
6997      // If all divisors are ones, we will prefer to avoid the fold.
6998      HadOneDivisor |= D.isOne();
6999      AllDivisorsAreOnes &= D.isOne();
7000  
7001      // Decompose D into D0 * 2^K
7002      unsigned K = D.countr_zero();
7003      assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7004      APInt D0 = D.lshr(K);
7005  
7006      if (!D.isMinSignedValue()) {
7007        // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7008        // we don't care about this lane in this fold, we'll special-handle it.
7009        HadEvenDivisor |= (K != 0);
7010      }
7011  
7012      // D is a power-of-two if D0 is one. This includes INT_MIN.
7013      // If all divisors are power-of-two, we will prefer to avoid the fold.
7014      AllDivisorsArePowerOfTwo &= D0.isOne();
7015  
7016      // P = inv(D0, 2^W)
7017      // 2^W requires W + 1 bits, so we have to extend and then truncate.
7018      unsigned W = D.getBitWidth();
7019      APInt P = D0.multiplicativeInverse();
7020      assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7021  
7022      // A = floor((2^(W - 1) - 1) / D0) & -2^K
7023      APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7024      A.clearLowBits(K);
7025  
7026      if (!D.isMinSignedValue()) {
7027        // If divisor INT_MIN, then we don't care about this lane in this fold,
7028        // we'll special-handle it.
7029        NeedToApplyOffset |= A != 0;
7030      }
7031  
7032      // Q = floor((2 * A) / (2^K))
7033      APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7034  
7035      assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
7036             "We are expecting that A is always less than all-ones for SVT");
7037      assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7038             "We are expecting that K is always less than all-ones for ShSVT");
7039  
7040      // If D was a power of two, apply the alternate constant derivation.
7041      if (D0.isOne()) {
7042        // A = 2^(W-1)
7043        A = APInt::getSignedMinValue(W);
7044        // - Q = 2^(W-K) - 1
7045        Q = APInt::getAllOnes(W - K).zext(W);
7046      }
7047  
7048      // If the divisor is 1 the result can be constant-folded. Likewise, we
7049      // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7050      if (D.isOne()) {
7051        // Set P, A and K to a bogus values so we can try to splat them.
7052        P = 0;
7053        A = -1;
7054        K = -1;
7055  
7056        // x ?% 1 == 0  <-->  true  <-->  x u<= -1
7057        Q = -1;
7058      }
7059  
7060      PAmts.push_back(DAG.getConstant(P, DL, SVT));
7061      AAmts.push_back(DAG.getConstant(A, DL, SVT));
7062      KAmts.push_back(
7063          DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
7064      QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7065      return true;
7066    };
7067  
7068    SDValue N = REMNode.getOperand(0);
7069    SDValue D = REMNode.getOperand(1);
7070  
7071    // Collect the values from each element.
7072    if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7073      return SDValue();
7074  
7075    // If this is a srem by a one, avoid the fold since it can be constant-folded.
7076    if (AllDivisorsAreOnes)
7077      return SDValue();
7078  
7079    // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7080    // since it can be best implemented as a bit test.
7081    if (AllDivisorsArePowerOfTwo)
7082      return SDValue();
7083  
7084    SDValue PVal, AVal, KVal, QVal;
7085    if (D.getOpcode() == ISD::BUILD_VECTOR) {
7086      if (HadOneDivisor) {
7087        // Try to turn PAmts into a splat, since we don't care about the values
7088        // that are currently '0'. If we can't, just keep '0'`s.
7089        turnVectorIntoSplatVector(PAmts, isNullConstant);
7090        // Try to turn AAmts into a splat, since we don't care about the
7091        // values that are currently '-1'. If we can't, change them to '0'`s.
7092        turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
7093                                  DAG.getConstant(0, DL, SVT));
7094        // Try to turn KAmts into a splat, since we don't care about the values
7095        // that are currently '-1'. If we can't, change them to '0'`s.
7096        turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
7097                                  DAG.getConstant(0, DL, ShSVT));
7098      }
7099  
7100      PVal = DAG.getBuildVector(VT, DL, PAmts);
7101      AVal = DAG.getBuildVector(VT, DL, AAmts);
7102      KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7103      QVal = DAG.getBuildVector(VT, DL, QAmts);
7104    } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7105      assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7106             QAmts.size() == 1 &&
7107             "Expected matchUnaryPredicate to return one element for scalable "
7108             "vectors");
7109      PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7110      AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7111      KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7112      QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7113    } else {
7114      assert(isa<ConstantSDNode>(D) && "Expected a constant");
7115      PVal = PAmts[0];
7116      AVal = AAmts[0];
7117      KVal = KAmts[0];
7118      QVal = QAmts[0];
7119    }
7120  
7121    // (mul N, P)
7122    SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7123    Created.push_back(Op0.getNode());
7124  
7125    if (NeedToApplyOffset) {
7126      // We need ADD to do this.
7127      if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7128        return SDValue();
7129  
7130      // (add (mul N, P), A)
7131      Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7132      Created.push_back(Op0.getNode());
7133    }
7134  
7135    // Rotate right only if any divisor was even. We avoid rotates for all-odd
7136    // divisors as a performance improvement, since rotating by 0 is a no-op.
7137    if (HadEvenDivisor) {
7138      // We need ROTR to do this.
7139      if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7140        return SDValue();
7141      // SREM: (rotr (add (mul N, P), A), K)
7142      Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7143      Created.push_back(Op0.getNode());
7144    }
7145  
7146    // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7147    SDValue Fold =
7148        DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7149                     ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7150  
7151    // If we didn't have lanes with INT_MIN divisor, then we're done.
7152    if (!HadIntMinDivisor)
7153      return Fold;
7154  
7155    // That fold is only valid for positive divisors. Which effectively means,
7156    // it is invalid for INT_MIN divisors. So if we have such a lane,
7157    // we must fix-up results for said lanes.
7158    assert(VT.isVector() && "Can/should only get here for vectors.");
7159  
7160    // NOTE: we avoid letting illegal types through even if we're before legalize
7161    // ops – legalization has a hard time producing good code for the code that
7162    // follows.
7163    if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7164        !isOperationLegalOrCustom(ISD::AND, VT) ||
7165        !isCondCodeLegalOrCustom(Cond, VT.getSimpleVT()) ||
7166        !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
7167      return SDValue();
7168  
7169    Created.push_back(Fold.getNode());
7170  
7171    SDValue IntMin = DAG.getConstant(
7172        APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
7173    SDValue IntMax = DAG.getConstant(
7174        APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
7175    SDValue Zero =
7176        DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
7177  
7178    // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7179    SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7180    Created.push_back(DivisorIsIntMin.getNode());
7181  
7182    // (N s% INT_MIN) ==/!= 0  <-->  (N & INT_MAX) ==/!= 0
7183    SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7184    Created.push_back(Masked.getNode());
7185    SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7186    Created.push_back(MaskedIsZero.getNode());
7187  
7188    // To produce final result we need to blend 2 vectors: 'SetCC' and
7189    // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7190    // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7191    // constant-folded, select can get lowered to a shuffle with constant mask.
7192    SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7193                                  MaskedIsZero, Fold);
7194  
7195    return Blended;
7196  }
7197  
7198  bool TargetLowering::
verifyReturnAddressArgumentIsConstant(SDValue Op,SelectionDAG & DAG) const7199  verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
7200    if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7201      DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7202                                  "be a constant integer");
7203      return true;
7204    }
7205  
7206    return false;
7207  }
7208  
getSqrtInputTest(SDValue Op,SelectionDAG & DAG,const DenormalMode & Mode) const7209  SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7210                                           const DenormalMode &Mode) const {
7211    SDLoc DL(Op);
7212    EVT VT = Op.getValueType();
7213    EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7214    SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7215  
7216    // This is specifically a check for the handling of denormal inputs, not the
7217    // result.
7218    if (Mode.Input == DenormalMode::PreserveSign ||
7219        Mode.Input == DenormalMode::PositiveZero) {
7220      // Test = X == 0.0
7221      return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7222    }
7223  
7224    // Testing it with denormal inputs to avoid wrong estimate.
7225    //
7226    // Test = fabs(X) < SmallestNormal
7227    const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
7228    APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7229    SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7230    SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7231    return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7232  }
7233  
getNegatedExpression(SDValue Op,SelectionDAG & DAG,bool LegalOps,bool OptForSize,NegatibleCost & Cost,unsigned Depth) const7234  SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7235                                               bool LegalOps, bool OptForSize,
7236                                               NegatibleCost &Cost,
7237                                               unsigned Depth) const {
7238    // fneg is removable even if it has multiple uses.
7239    if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7240      Cost = NegatibleCost::Cheaper;
7241      return Op.getOperand(0);
7242    }
7243  
7244    // Don't recurse exponentially.
7245    if (Depth > SelectionDAG::MaxRecursionDepth)
7246      return SDValue();
7247  
7248    // Pre-increment recursion depth for use in recursive calls.
7249    ++Depth;
7250    const SDNodeFlags Flags = Op->getFlags();
7251    const TargetOptions &Options = DAG.getTarget().Options;
7252    EVT VT = Op.getValueType();
7253    unsigned Opcode = Op.getOpcode();
7254  
7255    // Don't allow anything with multiple uses unless we know it is free.
7256    if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7257      bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7258                          isFPExtFree(VT, Op.getOperand(0).getValueType());
7259      if (!IsFreeExtend)
7260        return SDValue();
7261    }
7262  
7263    auto RemoveDeadNode = [&](SDValue N) {
7264      if (N && N.getNode()->use_empty())
7265        DAG.RemoveDeadNode(N.getNode());
7266    };
7267  
7268    SDLoc DL(Op);
7269  
7270    // Because getNegatedExpression can delete nodes we need a handle to keep
7271    // temporary nodes alive in case the recursion manages to create an identical
7272    // node.
7273    std::list<HandleSDNode> Handles;
7274  
7275    switch (Opcode) {
7276    case ISD::ConstantFP: {
7277      // Don't invert constant FP values after legalization unless the target says
7278      // the negated constant is legal.
7279      bool IsOpLegal =
7280          isOperationLegal(ISD::ConstantFP, VT) ||
7281          isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7282                       OptForSize);
7283  
7284      if (LegalOps && !IsOpLegal)
7285        break;
7286  
7287      APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7288      V.changeSign();
7289      SDValue CFP = DAG.getConstantFP(V, DL, VT);
7290  
7291      // If we already have the use of the negated floating constant, it is free
7292      // to negate it even it has multiple uses.
7293      if (!Op.hasOneUse() && CFP.use_empty())
7294        break;
7295      Cost = NegatibleCost::Neutral;
7296      return CFP;
7297    }
7298    case ISD::BUILD_VECTOR: {
7299      // Only permit BUILD_VECTOR of constants.
7300      if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7301            return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7302          }))
7303        break;
7304  
7305      bool IsOpLegal =
7306          (isOperationLegal(ISD::ConstantFP, VT) &&
7307           isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
7308          llvm::all_of(Op->op_values(), [&](SDValue N) {
7309            return N.isUndef() ||
7310                   isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7311                                OptForSize);
7312          });
7313  
7314      if (LegalOps && !IsOpLegal)
7315        break;
7316  
7317      SmallVector<SDValue, 4> Ops;
7318      for (SDValue C : Op->op_values()) {
7319        if (C.isUndef()) {
7320          Ops.push_back(C);
7321          continue;
7322        }
7323        APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7324        V.changeSign();
7325        Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7326      }
7327      Cost = NegatibleCost::Neutral;
7328      return DAG.getBuildVector(VT, DL, Ops);
7329    }
7330    case ISD::FADD: {
7331      if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7332        break;
7333  
7334      // After operation legalization, it might not be legal to create new FSUBs.
7335      if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7336        break;
7337      SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7338  
7339      // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7340      NegatibleCost CostX = NegatibleCost::Expensive;
7341      SDValue NegX =
7342          getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7343      // Prevent this node from being deleted by the next call.
7344      if (NegX)
7345        Handles.emplace_back(NegX);
7346  
7347      // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7348      NegatibleCost CostY = NegatibleCost::Expensive;
7349      SDValue NegY =
7350          getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7351  
7352      // We're done with the handles.
7353      Handles.clear();
7354  
7355      // Negate the X if its cost is less or equal than Y.
7356      if (NegX && (CostX <= CostY)) {
7357        Cost = CostX;
7358        SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7359        if (NegY != N)
7360          RemoveDeadNode(NegY);
7361        return N;
7362      }
7363  
7364      // Negate the Y if it is not expensive.
7365      if (NegY) {
7366        Cost = CostY;
7367        SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7368        if (NegX != N)
7369          RemoveDeadNode(NegX);
7370        return N;
7371      }
7372      break;
7373    }
7374    case ISD::FSUB: {
7375      // We can't turn -(A-B) into B-A when we honor signed zeros.
7376      if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7377        break;
7378  
7379      SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7380      // fold (fneg (fsub 0, Y)) -> Y
7381      if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7382        if (C->isZero()) {
7383          Cost = NegatibleCost::Cheaper;
7384          return Y;
7385        }
7386  
7387      // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7388      Cost = NegatibleCost::Neutral;
7389      return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7390    }
7391    case ISD::FMUL:
7392    case ISD::FDIV: {
7393      SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7394  
7395      // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7396      NegatibleCost CostX = NegatibleCost::Expensive;
7397      SDValue NegX =
7398          getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7399      // Prevent this node from being deleted by the next call.
7400      if (NegX)
7401        Handles.emplace_back(NegX);
7402  
7403      // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7404      NegatibleCost CostY = NegatibleCost::Expensive;
7405      SDValue NegY =
7406          getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7407  
7408      // We're done with the handles.
7409      Handles.clear();
7410  
7411      // Negate the X if its cost is less or equal than Y.
7412      if (NegX && (CostX <= CostY)) {
7413        Cost = CostX;
7414        SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7415        if (NegY != N)
7416          RemoveDeadNode(NegY);
7417        return N;
7418      }
7419  
7420      // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7421      if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7422        if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7423          break;
7424  
7425      // Negate the Y if it is not expensive.
7426      if (NegY) {
7427        Cost = CostY;
7428        SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7429        if (NegX != N)
7430          RemoveDeadNode(NegX);
7431        return N;
7432      }
7433      break;
7434    }
7435    case ISD::FMA:
7436    case ISD::FMAD: {
7437      if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7438        break;
7439  
7440      SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7441      NegatibleCost CostZ = NegatibleCost::Expensive;
7442      SDValue NegZ =
7443          getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7444      // Give up if fail to negate the Z.
7445      if (!NegZ)
7446        break;
7447  
7448      // Prevent this node from being deleted by the next two calls.
7449      Handles.emplace_back(NegZ);
7450  
7451      // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7452      NegatibleCost CostX = NegatibleCost::Expensive;
7453      SDValue NegX =
7454          getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7455      // Prevent this node from being deleted by the next call.
7456      if (NegX)
7457        Handles.emplace_back(NegX);
7458  
7459      // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7460      NegatibleCost CostY = NegatibleCost::Expensive;
7461      SDValue NegY =
7462          getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7463  
7464      // We're done with the handles.
7465      Handles.clear();
7466  
7467      // Negate the X if its cost is less or equal than Y.
7468      if (NegX && (CostX <= CostY)) {
7469        Cost = std::min(CostX, CostZ);
7470        SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7471        if (NegY != N)
7472          RemoveDeadNode(NegY);
7473        return N;
7474      }
7475  
7476      // Negate the Y if it is not expensive.
7477      if (NegY) {
7478        Cost = std::min(CostY, CostZ);
7479        SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7480        if (NegX != N)
7481          RemoveDeadNode(NegX);
7482        return N;
7483      }
7484      break;
7485    }
7486  
7487    case ISD::FP_EXTEND:
7488    case ISD::FSIN:
7489      if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7490                                              OptForSize, Cost, Depth))
7491        return DAG.getNode(Opcode, DL, VT, NegV);
7492      break;
7493    case ISD::FP_ROUND:
7494      if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7495                                              OptForSize, Cost, Depth))
7496        return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7497      break;
7498    case ISD::SELECT:
7499    case ISD::VSELECT: {
7500      // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7501      // iff at least one cost is cheaper and the other is neutral/cheaper
7502      SDValue LHS = Op.getOperand(1);
7503      NegatibleCost CostLHS = NegatibleCost::Expensive;
7504      SDValue NegLHS =
7505          getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7506      if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7507        RemoveDeadNode(NegLHS);
7508        break;
7509      }
7510  
7511      // Prevent this node from being deleted by the next call.
7512      Handles.emplace_back(NegLHS);
7513  
7514      SDValue RHS = Op.getOperand(2);
7515      NegatibleCost CostRHS = NegatibleCost::Expensive;
7516      SDValue NegRHS =
7517          getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7518  
7519      // We're done with the handles.
7520      Handles.clear();
7521  
7522      if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7523          (CostLHS != NegatibleCost::Cheaper &&
7524           CostRHS != NegatibleCost::Cheaper)) {
7525        RemoveDeadNode(NegLHS);
7526        RemoveDeadNode(NegRHS);
7527        break;
7528      }
7529  
7530      Cost = std::min(CostLHS, CostRHS);
7531      return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7532    }
7533    }
7534  
7535    return SDValue();
7536  }
7537  
7538  //===----------------------------------------------------------------------===//
7539  // Legalization Utilities
7540  //===----------------------------------------------------------------------===//
7541  
expandMUL_LOHI(unsigned Opcode,EVT VT,const SDLoc & dl,SDValue LHS,SDValue RHS,SmallVectorImpl<SDValue> & Result,EVT HiLoVT,SelectionDAG & DAG,MulExpansionKind Kind,SDValue LL,SDValue LH,SDValue RL,SDValue RH) const7542  bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7543                                      SDValue LHS, SDValue RHS,
7544                                      SmallVectorImpl<SDValue> &Result,
7545                                      EVT HiLoVT, SelectionDAG &DAG,
7546                                      MulExpansionKind Kind, SDValue LL,
7547                                      SDValue LH, SDValue RL, SDValue RH) const {
7548    assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7549           Opcode == ISD::SMUL_LOHI);
7550  
7551    bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7552                    isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
7553    bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7554                    isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
7555    bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7556                        isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
7557    bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7558                        isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
7559  
7560    if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7561      return false;
7562  
7563    unsigned OuterBitSize = VT.getScalarSizeInBits();
7564    unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7565  
7566    // LL, LH, RL, and RH must be either all NULL or all set to a value.
7567    assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7568           (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7569  
7570    SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7571    auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7572                            bool Signed) -> bool {
7573      if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7574        Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7575        Hi = SDValue(Lo.getNode(), 1);
7576        return true;
7577      }
7578      if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7579        Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7580        Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7581        return true;
7582      }
7583      return false;
7584    };
7585  
7586    SDValue Lo, Hi;
7587  
7588    if (!LL.getNode() && !RL.getNode() &&
7589        isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7590      LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7591      RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7592    }
7593  
7594    if (!LL.getNode())
7595      return false;
7596  
7597    APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7598    if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7599        DAG.MaskedValueIsZero(RHS, HighMask)) {
7600      // The inputs are both zero-extended.
7601      if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7602        Result.push_back(Lo);
7603        Result.push_back(Hi);
7604        if (Opcode != ISD::MUL) {
7605          SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7606          Result.push_back(Zero);
7607          Result.push_back(Zero);
7608        }
7609        return true;
7610      }
7611    }
7612  
7613    if (!VT.isVector() && Opcode == ISD::MUL &&
7614        DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7615        DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7616      // The input values are both sign-extended.
7617      // TODO non-MUL case?
7618      if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7619        Result.push_back(Lo);
7620        Result.push_back(Hi);
7621        return true;
7622      }
7623    }
7624  
7625    unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7626    SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7627  
7628    if (!LH.getNode() && !RH.getNode() &&
7629        isOperationLegalOrCustom(ISD::SRL, VT) &&
7630        isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7631      LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7632      LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7633      RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7634      RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7635    }
7636  
7637    if (!LH.getNode())
7638      return false;
7639  
7640    if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7641      return false;
7642  
7643    Result.push_back(Lo);
7644  
7645    if (Opcode == ISD::MUL) {
7646      RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7647      LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7648      Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7649      Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7650      Result.push_back(Hi);
7651      return true;
7652    }
7653  
7654    // Compute the full width result.
7655    auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7656      Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7657      Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7658      Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7659      return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7660    };
7661  
7662    SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7663    if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7664      return false;
7665  
7666    // This is effectively the add part of a multiply-add of half-sized operands,
7667    // so it cannot overflow.
7668    Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7669  
7670    if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7671      return false;
7672  
7673    SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7674    EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7675  
7676    bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7677                    isOperationLegalOrCustom(ISD::ADDE, VT));
7678    if (UseGlue)
7679      Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7680                         Merge(Lo, Hi));
7681    else
7682      Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7683                         Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7684  
7685    SDValue Carry = Next.getValue(1);
7686    Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7687    Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7688  
7689    if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7690      return false;
7691  
7692    if (UseGlue)
7693      Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7694                       Carry);
7695    else
7696      Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7697                       Zero, Carry);
7698  
7699    Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7700  
7701    if (Opcode == ISD::SMUL_LOHI) {
7702      SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7703                                    DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7704      Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7705  
7706      NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7707                            DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7708      Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7709    }
7710  
7711    Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7712    Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7713    Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7714    return true;
7715  }
7716  
expandMUL(SDNode * N,SDValue & Lo,SDValue & Hi,EVT HiLoVT,SelectionDAG & DAG,MulExpansionKind Kind,SDValue LL,SDValue LH,SDValue RL,SDValue RH) const7717  bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
7718                                 SelectionDAG &DAG, MulExpansionKind Kind,
7719                                 SDValue LL, SDValue LH, SDValue RL,
7720                                 SDValue RH) const {
7721    SmallVector<SDValue, 2> Result;
7722    bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7723                             N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7724                             DAG, Kind, LL, LH, RL, RH);
7725    if (Ok) {
7726      assert(Result.size() == 2);
7727      Lo = Result[0];
7728      Hi = Result[1];
7729    }
7730    return Ok;
7731  }
7732  
7733  // Optimize unsigned division or remainder by constants for types twice as large
7734  // as a legal VT.
7735  //
7736  // If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7737  // can be computed
7738  // as:
7739  //   Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7740  //   Remainder = Sum % Constant
7741  // This is based on "Remainder by Summing Digits" from Hacker's Delight.
7742  //
7743  // For division, we can compute the remainder using the algorithm described
7744  // above, subtract it from the dividend to get an exact multiple of Constant.
7745  // Then multiply that exact multiply by the multiplicative inverse modulo
7746  // (1 << (BitWidth / 2)) to get the quotient.
7747  
7748  // If Constant is even, we can shift right the dividend and the divisor by the
7749  // number of trailing zeros in Constant before applying the remainder algorithm.
7750  // If we're after the quotient, we can subtract this value from the shifted
7751  // dividend and multiply by the multiplicative inverse of the shifted divisor.
7752  // If we want the remainder, we shift the value left by the number of trailing
7753  // zeros and add the bits that were shifted out of the dividend.
expandDIVREMByConstant(SDNode * N,SmallVectorImpl<SDValue> & Result,EVT HiLoVT,SelectionDAG & DAG,SDValue LL,SDValue LH) const7754  bool TargetLowering::expandDIVREMByConstant(SDNode *N,
7755                                              SmallVectorImpl<SDValue> &Result,
7756                                              EVT HiLoVT, SelectionDAG &DAG,
7757                                              SDValue LL, SDValue LH) const {
7758    unsigned Opcode = N->getOpcode();
7759    EVT VT = N->getValueType(0);
7760  
7761    // TODO: Support signed division/remainder.
7762    if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7763      return false;
7764    assert(
7765        (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7766        "Unexpected opcode");
7767  
7768    auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7769    if (!CN)
7770      return false;
7771  
7772    APInt Divisor = CN->getAPIntValue();
7773    unsigned BitWidth = Divisor.getBitWidth();
7774    unsigned HBitWidth = BitWidth / 2;
7775    assert(VT.getScalarSizeInBits() == BitWidth &&
7776           HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7777  
7778    // Divisor needs to less than (1 << HBitWidth).
7779    APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7780    if (Divisor.uge(HalfMaxPlus1))
7781      return false;
7782  
7783    // We depend on the UREM by constant optimization in DAGCombiner that requires
7784    // high multiply.
7785    if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7786        !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT))
7787      return false;
7788  
7789    // Don't expand if optimizing for size.
7790    if (DAG.shouldOptForSize())
7791      return false;
7792  
7793    // Early out for 0 or 1 divisors.
7794    if (Divisor.ule(1))
7795      return false;
7796  
7797    // If the divisor is even, shift it until it becomes odd.
7798    unsigned TrailingZeros = 0;
7799    if (!Divisor[0]) {
7800      TrailingZeros = Divisor.countr_zero();
7801      Divisor.lshrInPlace(TrailingZeros);
7802    }
7803  
7804    SDLoc dl(N);
7805    SDValue Sum;
7806    SDValue PartialRem;
7807  
7808    // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7809    // then add in the carry.
7810    // TODO: If we can't split it in half, we might be able to split into 3 or
7811    // more pieces using a smaller bit width.
7812    if (HalfMaxPlus1.urem(Divisor).isOne()) {
7813      assert(!LL == !LH && "Expected both input halves or no input halves!");
7814      if (!LL)
7815        std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7816  
7817      // Shift the input by the number of TrailingZeros in the divisor. The
7818      // shifted out bits will be added to the remainder later.
7819      if (TrailingZeros) {
7820        // Save the shifted off bits if we need the remainder.
7821        if (Opcode != ISD::UDIV) {
7822          APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7823          PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7824                                   DAG.getConstant(Mask, dl, HiLoVT));
7825        }
7826  
7827        LL = DAG.getNode(
7828            ISD::OR, dl, HiLoVT,
7829            DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7830                        DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7831            DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7832                        DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7833                                                   HiLoVT, dl)));
7834        LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7835                         DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7836      }
7837  
7838      // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7839      EVT SetCCType =
7840          getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7841      if (isOperationLegalOrCustom(ISD::UADDO_CARRY, HiLoVT)) {
7842        SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7843        Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7844        Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7845                          DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7846      } else {
7847        Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7848        SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7849        // If the boolean for the target is 0 or 1, we can add the setcc result
7850        // directly.
7851        if (getBooleanContents(HiLoVT) ==
7852            TargetLoweringBase::ZeroOrOneBooleanContent)
7853          Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7854        else
7855          Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7856                                DAG.getConstant(0, dl, HiLoVT));
7857        Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7858      }
7859    }
7860  
7861    // If we didn't find a sum, we can't do the expansion.
7862    if (!Sum)
7863      return false;
7864  
7865    // Perform a HiLoVT urem on the Sum using truncated divisor.
7866    SDValue RemL =
7867        DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7868                    DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7869    SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7870  
7871    if (Opcode != ISD::UREM) {
7872      // Subtract the remainder from the shifted dividend.
7873      SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7874      SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7875  
7876      Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7877  
7878      // Multiply by the multiplicative inverse of the divisor modulo
7879      // (1 << BitWidth).
7880      APInt MulFactor = Divisor.multiplicativeInverse();
7881  
7882      SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7883                                     DAG.getConstant(MulFactor, dl, VT));
7884  
7885      // Split the quotient into low and high parts.
7886      SDValue QuotL, QuotH;
7887      std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7888      Result.push_back(QuotL);
7889      Result.push_back(QuotH);
7890    }
7891  
7892    if (Opcode != ISD::UDIV) {
7893      // If we shifted the input, shift the remainder left and add the bits we
7894      // shifted off the input.
7895      if (TrailingZeros) {
7896        APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7897        RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7898                           DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7899        RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7900      }
7901      Result.push_back(RemL);
7902      Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7903    }
7904  
7905    return true;
7906  }
7907  
7908  // Check that (every element of) Z is undef or not an exact multiple of BW.
isNonZeroModBitWidthOrUndef(SDValue Z,unsigned BW)7909  static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7910    return ISD::matchUnaryPredicate(
7911        Z,
7912        [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7913        true);
7914  }
7915  
expandVPFunnelShift(SDNode * Node,SelectionDAG & DAG)7916  static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
7917    EVT VT = Node->getValueType(0);
7918    SDValue ShX, ShY;
7919    SDValue ShAmt, InvShAmt;
7920    SDValue X = Node->getOperand(0);
7921    SDValue Y = Node->getOperand(1);
7922    SDValue Z = Node->getOperand(2);
7923    SDValue Mask = Node->getOperand(3);
7924    SDValue VL = Node->getOperand(4);
7925  
7926    unsigned BW = VT.getScalarSizeInBits();
7927    bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7928    SDLoc DL(SDValue(Node, 0));
7929  
7930    EVT ShVT = Z.getValueType();
7931    if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7932      // fshl: X << C | Y >> (BW - C)
7933      // fshr: X << (BW - C) | Y >> C
7934      // where C = Z % BW is not zero
7935      SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7936      ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7937      InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7938      ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
7939                        VL);
7940      ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
7941                        VL);
7942    } else {
7943      // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7944      // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7945      SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
7946      if (isPowerOf2_32(BW)) {
7947        // Z % BW -> Z & (BW - 1)
7948        ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
7949        // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7950        SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
7951                                   DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
7952        InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
7953      } else {
7954        SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7955        ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7956        InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
7957      }
7958  
7959      SDValue One = DAG.getConstant(1, DL, ShVT);
7960      if (IsFSHL) {
7961        ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
7962        SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
7963        ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
7964      } else {
7965        SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
7966        ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
7967        ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
7968      }
7969    }
7970    return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
7971  }
7972  
expandFunnelShift(SDNode * Node,SelectionDAG & DAG) const7973  SDValue TargetLowering::expandFunnelShift(SDNode *Node,
7974                                            SelectionDAG &DAG) const {
7975    if (Node->isVPOpcode())
7976      return expandVPFunnelShift(Node, DAG);
7977  
7978    EVT VT = Node->getValueType(0);
7979  
7980    if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
7981                          !isOperationLegalOrCustom(ISD::SRL, VT) ||
7982                          !isOperationLegalOrCustom(ISD::SUB, VT) ||
7983                          !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
7984      return SDValue();
7985  
7986    SDValue X = Node->getOperand(0);
7987    SDValue Y = Node->getOperand(1);
7988    SDValue Z = Node->getOperand(2);
7989  
7990    unsigned BW = VT.getScalarSizeInBits();
7991    bool IsFSHL = Node->getOpcode() == ISD::FSHL;
7992    SDLoc DL(SDValue(Node, 0));
7993  
7994    EVT ShVT = Z.getValueType();
7995  
7996    // If a funnel shift in the other direction is more supported, use it.
7997    unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
7998    if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7999        isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8000      if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8001        // fshl X, Y, Z -> fshr X, Y, -Z
8002        // fshr X, Y, Z -> fshl X, Y, -Z
8003        SDValue Zero = DAG.getConstant(0, DL, ShVT);
8004        Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
8005      } else {
8006        // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8007        // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8008        SDValue One = DAG.getConstant(1, DL, ShVT);
8009        if (IsFSHL) {
8010          Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8011          X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8012        } else {
8013          X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8014          Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8015        }
8016        Z = DAG.getNOT(DL, Z, ShVT);
8017      }
8018      return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8019    }
8020  
8021    SDValue ShX, ShY;
8022    SDValue ShAmt, InvShAmt;
8023    if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8024      // fshl: X << C | Y >> (BW - C)
8025      // fshr: X << (BW - C) | Y >> C
8026      // where C = Z % BW is not zero
8027      SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8028      ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8029      InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8030      ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8031      ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8032    } else {
8033      // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8034      // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8035      SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8036      if (isPowerOf2_32(BW)) {
8037        // Z % BW -> Z & (BW - 1)
8038        ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8039        // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8040        InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8041      } else {
8042        SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8043        ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8044        InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8045      }
8046  
8047      SDValue One = DAG.getConstant(1, DL, ShVT);
8048      if (IsFSHL) {
8049        ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8050        SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8051        ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8052      } else {
8053        SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8054        ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8055        ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8056      }
8057    }
8058    return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8059  }
8060  
8061  // TODO: Merge with expandFunnelShift.
expandROT(SDNode * Node,bool AllowVectorOps,SelectionDAG & DAG) const8062  SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8063                                    SelectionDAG &DAG) const {
8064    EVT VT = Node->getValueType(0);
8065    unsigned EltSizeInBits = VT.getScalarSizeInBits();
8066    bool IsLeft = Node->getOpcode() == ISD::ROTL;
8067    SDValue Op0 = Node->getOperand(0);
8068    SDValue Op1 = Node->getOperand(1);
8069    SDLoc DL(SDValue(Node, 0));
8070  
8071    EVT ShVT = Op1.getValueType();
8072    SDValue Zero = DAG.getConstant(0, DL, ShVT);
8073  
8074    // If a rotate in the other direction is more supported, use it.
8075    unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8076    if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8077        isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8078      SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8079      return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8080    }
8081  
8082    if (!AllowVectorOps && VT.isVector() &&
8083        (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8084         !isOperationLegalOrCustom(ISD::SRL, VT) ||
8085         !isOperationLegalOrCustom(ISD::SUB, VT) ||
8086         !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
8087         !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
8088      return SDValue();
8089  
8090    unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8091    unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8092    SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8093    SDValue ShVal;
8094    SDValue HsVal;
8095    if (isPowerOf2_32(EltSizeInBits)) {
8096      // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8097      // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8098      SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8099      SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8100      ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8101      SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8102      HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8103    } else {
8104      // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8105      // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8106      SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8107      SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8108      ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8109      SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8110      SDValue One = DAG.getConstant(1, DL, ShVT);
8111      HsVal =
8112          DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8113    }
8114    return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8115  }
8116  
expandShiftParts(SDNode * Node,SDValue & Lo,SDValue & Hi,SelectionDAG & DAG) const8117  void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
8118                                        SelectionDAG &DAG) const {
8119    assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8120    EVT VT = Node->getValueType(0);
8121    unsigned VTBits = VT.getScalarSizeInBits();
8122    assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8123  
8124    bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8125    bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8126    SDValue ShOpLo = Node->getOperand(0);
8127    SDValue ShOpHi = Node->getOperand(1);
8128    SDValue ShAmt = Node->getOperand(2);
8129    EVT ShAmtVT = ShAmt.getValueType();
8130    EVT ShAmtCCVT =
8131        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8132    SDLoc dl(Node);
8133  
8134    // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8135    // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8136    // away during isel.
8137    SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8138                                    DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8139    SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8140                                       DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8141                         : DAG.getConstant(0, dl, VT);
8142  
8143    SDValue Tmp2, Tmp3;
8144    if (IsSHL) {
8145      Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8146      Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8147    } else {
8148      Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8149      Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8150    }
8151  
8152    // If the shift amount is larger or equal than the width of a part we don't
8153    // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8154    // values for large shift amounts.
8155    SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8156                                  DAG.getConstant(VTBits, dl, ShAmtVT));
8157    SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8158                                DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8159  
8160    if (IsSHL) {
8161      Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8162      Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8163    } else {
8164      Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8165      Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8166    }
8167  }
8168  
expandFP_TO_SINT(SDNode * Node,SDValue & Result,SelectionDAG & DAG) const8169  bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8170                                        SelectionDAG &DAG) const {
8171    unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8172    SDValue Src = Node->getOperand(OpNo);
8173    EVT SrcVT = Src.getValueType();
8174    EVT DstVT = Node->getValueType(0);
8175    SDLoc dl(SDValue(Node, 0));
8176  
8177    // FIXME: Only f32 to i64 conversions are supported.
8178    if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8179      return false;
8180  
8181    if (Node->isStrictFPOpcode())
8182      // When a NaN is converted to an integer a trap is allowed. We can't
8183      // use this expansion here because it would eliminate that trap. Other
8184      // traps are also allowed and cannot be eliminated. See
8185      // IEEE 754-2008 sec 5.8.
8186      return false;
8187  
8188    // Expand f32 -> i64 conversion
8189    // This algorithm comes from compiler-rt's implementation of fixsfdi:
8190    // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8191    unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8192    EVT IntVT = SrcVT.changeTypeToInteger();
8193    EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8194  
8195    SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8196    SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8197    SDValue Bias = DAG.getConstant(127, dl, IntVT);
8198    SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8199    SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8200    SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8201  
8202    SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8203  
8204    SDValue ExponentBits = DAG.getNode(
8205        ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8206        DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8207    SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8208  
8209    SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8210                               DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8211                               DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8212    Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8213  
8214    SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8215                            DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8216                            DAG.getConstant(0x00800000, dl, IntVT));
8217  
8218    R = DAG.getZExtOrTrunc(R, dl, DstVT);
8219  
8220    R = DAG.getSelectCC(
8221        dl, Exponent, ExponentLoBit,
8222        DAG.getNode(ISD::SHL, dl, DstVT, R,
8223                    DAG.getZExtOrTrunc(
8224                        DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8225                        dl, IntShVT)),
8226        DAG.getNode(ISD::SRL, dl, DstVT, R,
8227                    DAG.getZExtOrTrunc(
8228                        DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8229                        dl, IntShVT)),
8230        ISD::SETGT);
8231  
8232    SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8233                              DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8234  
8235    Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8236                             DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8237    return true;
8238  }
8239  
expandFP_TO_UINT(SDNode * Node,SDValue & Result,SDValue & Chain,SelectionDAG & DAG) const8240  bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8241                                        SDValue &Chain,
8242                                        SelectionDAG &DAG) const {
8243    SDLoc dl(SDValue(Node, 0));
8244    unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8245    SDValue Src = Node->getOperand(OpNo);
8246  
8247    EVT SrcVT = Src.getValueType();
8248    EVT DstVT = Node->getValueType(0);
8249    EVT SetCCVT =
8250        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8251    EVT DstSetCCVT =
8252        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8253  
8254    // Only expand vector types if we have the appropriate vector bit operations.
8255    unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8256                                                     ISD::FP_TO_SINT;
8257    if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8258                             !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
8259      return false;
8260  
8261    // If the maximum float value is smaller then the signed integer range,
8262    // the destination signmask can't be represented by the float, so we can
8263    // just use FP_TO_SINT directly.
8264    const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
8265    APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8266    APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8267    if (APFloat::opOverflow &
8268        APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8269      if (Node->isStrictFPOpcode()) {
8270        Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8271                             { Node->getOperand(0), Src });
8272        Chain = Result.getValue(1);
8273      } else
8274        Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8275      return true;
8276    }
8277  
8278    // Don't expand it if there isn't cheap fsub instruction.
8279    if (!isOperationLegalOrCustom(
8280            Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8281      return false;
8282  
8283    SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8284    SDValue Sel;
8285  
8286    if (Node->isStrictFPOpcode()) {
8287      Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8288                         Node->getOperand(0), /*IsSignaling*/ true);
8289      Chain = Sel.getValue(1);
8290    } else {
8291      Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8292    }
8293  
8294    bool Strict = Node->isStrictFPOpcode() ||
8295                  shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8296  
8297    if (Strict) {
8298      // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8299      // signmask then offset (the result of which should be fully representable).
8300      // Sel = Src < 0x8000000000000000
8301      // FltOfs = select Sel, 0, 0x8000000000000000
8302      // IntOfs = select Sel, 0, 0x8000000000000000
8303      // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8304  
8305      // TODO: Should any fast-math-flags be set for the FSUB?
8306      SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8307                                     DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8308      Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8309      SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8310                                     DAG.getConstant(0, dl, DstVT),
8311                                     DAG.getConstant(SignMask, dl, DstVT));
8312      SDValue SInt;
8313      if (Node->isStrictFPOpcode()) {
8314        SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8315                                  { Chain, Src, FltOfs });
8316        SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8317                           { Val.getValue(1), Val });
8318        Chain = SInt.getValue(1);
8319      } else {
8320        SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8321        SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8322      }
8323      Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8324    } else {
8325      // Expand based on maximum range of FP_TO_SINT:
8326      // True = fp_to_sint(Src)
8327      // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8328      // Result = select (Src < 0x8000000000000000), True, False
8329  
8330      SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8331      // TODO: Should any fast-math-flags be set for the FSUB?
8332      SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8333                                  DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8334      False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8335                          DAG.getConstant(SignMask, dl, DstVT));
8336      Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8337      Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8338    }
8339    return true;
8340  }
8341  
expandUINT_TO_FP(SDNode * Node,SDValue & Result,SDValue & Chain,SelectionDAG & DAG) const8342  bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8343                                        SDValue &Chain,
8344                                        SelectionDAG &DAG) const {
8345    // This transform is not correct for converting 0 when rounding mode is set
8346    // to round toward negative infinity which will produce -0.0. So disable under
8347    // strictfp.
8348    if (Node->isStrictFPOpcode())
8349      return false;
8350  
8351    SDValue Src = Node->getOperand(0);
8352    EVT SrcVT = Src.getValueType();
8353    EVT DstVT = Node->getValueType(0);
8354  
8355    if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8356      return false;
8357  
8358    // Only expand vector types if we have the appropriate vector bit operations.
8359    if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8360                             !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
8361                             !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
8362                             !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
8363                             !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
8364      return false;
8365  
8366    SDLoc dl(SDValue(Node, 0));
8367    EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
8368  
8369    // Implementation of unsigned i64 to f64 following the algorithm in
8370    // __floatundidf in compiler_rt.  This implementation performs rounding
8371    // correctly in all rounding modes with the exception of converting 0
8372    // when rounding toward negative infinity. In that case the fsub will produce
8373    // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
8374    SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8375    SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8376        llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8377    SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8378    SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8379    SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
8380  
8381    SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8382    SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8383    SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8384    SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8385    SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8386    SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8387    SDValue HiSub =
8388        DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8389    Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8390    return true;
8391  }
8392  
8393  SDValue
createSelectForFMINNUM_FMAXNUM(SDNode * Node,SelectionDAG & DAG) const8394  TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8395                                                 SelectionDAG &DAG) const {
8396    unsigned Opcode = Node->getOpcode();
8397    assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8398            Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8399           "Wrong opcode");
8400  
8401    if (Node->getFlags().hasNoNaNs()) {
8402      ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8403      SDValue Op1 = Node->getOperand(0);
8404      SDValue Op2 = Node->getOperand(1);
8405      SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8406      // Copy FMF flags, but always set the no-signed-zeros flag
8407      // as this is implied by the FMINNUM/FMAXNUM semantics.
8408      SDNodeFlags Flags = Node->getFlags();
8409      Flags.setNoSignedZeros(true);
8410      SelCC->setFlags(Flags);
8411      return SelCC;
8412    }
8413  
8414    return SDValue();
8415  }
8416  
expandFMINNUM_FMAXNUM(SDNode * Node,SelectionDAG & DAG) const8417  SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8418                                                SelectionDAG &DAG) const {
8419    SDLoc dl(Node);
8420    unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
8421      ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8422    EVT VT = Node->getValueType(0);
8423  
8424    if (VT.isScalableVector())
8425      report_fatal_error(
8426          "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8427  
8428    if (isOperationLegalOrCustom(NewOp, VT)) {
8429      SDValue Quiet0 = Node->getOperand(0);
8430      SDValue Quiet1 = Node->getOperand(1);
8431  
8432      if (!Node->getFlags().hasNoNaNs()) {
8433        // Insert canonicalizes if it's possible we need to quiet to get correct
8434        // sNaN behavior.
8435        if (!DAG.isKnownNeverSNaN(Quiet0)) {
8436          Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8437                               Node->getFlags());
8438        }
8439        if (!DAG.isKnownNeverSNaN(Quiet1)) {
8440          Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8441                               Node->getFlags());
8442        }
8443      }
8444  
8445      return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8446    }
8447  
8448    // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8449    // instead if there are no NaNs and there can't be an incompatible zero
8450    // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8451    if ((Node->getFlags().hasNoNaNs() ||
8452         (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8453          DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8454        (Node->getFlags().hasNoSignedZeros() ||
8455         DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8456         DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8457      unsigned IEEE2018Op =
8458          Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8459      if (isOperationLegalOrCustom(IEEE2018Op, VT))
8460        return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8461                           Node->getOperand(1), Node->getFlags());
8462    }
8463  
8464    if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8465      return SelCC;
8466  
8467    return SDValue();
8468  }
8469  
expandFMINIMUM_FMAXIMUM(SDNode * N,SelectionDAG & DAG) const8470  SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
8471                                                  SelectionDAG &DAG) const {
8472    SDLoc DL(N);
8473    SDValue LHS = N->getOperand(0);
8474    SDValue RHS = N->getOperand(1);
8475    unsigned Opc = N->getOpcode();
8476    EVT VT = N->getValueType(0);
8477    EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8478    bool IsMax = Opc == ISD::FMAXIMUM;
8479    SDNodeFlags Flags = N->getFlags();
8480  
8481    // First, implement comparison not propagating NaN. If no native fmin or fmax
8482    // available, use plain select with setcc instead.
8483    SDValue MinMax;
8484    unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8485    unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8486  
8487    // FIXME: We should probably define fminnum/fmaxnum variants with correct
8488    // signed zero behavior.
8489    bool MinMaxMustRespectOrderedZero = false;
8490  
8491    if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8492      MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8493      MinMaxMustRespectOrderedZero = true;
8494    } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8495      MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8496    } else {
8497      if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8498        return DAG.UnrollVectorOp(N);
8499  
8500      // NaN (if exists) will be propagated later, so orderness doesn't matter.
8501      SDValue Compare =
8502          DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8503      MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8504    }
8505  
8506    // Propagate any NaN of both operands
8507    if (!N->getFlags().hasNoNaNs() &&
8508        (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8509      ConstantFP *FPNaN = ConstantFP::get(
8510          *DAG.getContext(), APFloat::getNaN(DAG.EVTToAPFloatSemantics(VT)));
8511      MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8512                             DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8513    }
8514  
8515    // fminimum/fmaximum requires -0.0 less than +0.0
8516    if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8517        !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
8518      SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8519                                    DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8520      SDValue TestZero =
8521          DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8522      SDValue LCmp = DAG.getSelect(
8523          DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8524          MinMax, Flags);
8525      SDValue RCmp = DAG.getSelect(
8526          DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8527          LCmp, Flags);
8528      MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8529    }
8530  
8531    return MinMax;
8532  }
8533  
8534  /// Returns a true value if if this FPClassTest can be performed with an ordered
8535  /// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8536  /// std::nullopt if it cannot be performed as a compare with 0.
isFCmpEqualZero(FPClassTest Test,const fltSemantics & Semantics,const MachineFunction & MF)8537  static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8538                                             const fltSemantics &Semantics,
8539                                             const MachineFunction &MF) {
8540    FPClassTest OrderedMask = Test & ~fcNan;
8541    FPClassTest NanTest = Test & fcNan;
8542    bool IsOrdered = NanTest == fcNone;
8543    bool IsUnordered = NanTest == fcNan;
8544  
8545    // Skip cases that are testing for only a qnan or snan.
8546    if (!IsOrdered && !IsUnordered)
8547      return std::nullopt;
8548  
8549    if (OrderedMask == fcZero &&
8550        MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8551      return IsOrdered;
8552    if (OrderedMask == (fcZero | fcSubnormal) &&
8553        MF.getDenormalMode(Semantics).inputsAreZero())
8554      return IsOrdered;
8555    return std::nullopt;
8556  }
8557  
expandIS_FPCLASS(EVT ResultVT,SDValue Op,FPClassTest Test,SDNodeFlags Flags,const SDLoc & DL,SelectionDAG & DAG) const8558  SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8559                                           FPClassTest Test, SDNodeFlags Flags,
8560                                           const SDLoc &DL,
8561                                           SelectionDAG &DAG) const {
8562    EVT OperandVT = Op.getValueType();
8563    assert(OperandVT.isFloatingPoint());
8564  
8565    // Degenerated cases.
8566    if (Test == fcNone)
8567      return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8568    if ((Test & fcAllFlags) == fcAllFlags)
8569      return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8570  
8571    // PPC double double is a pair of doubles, of which the higher part determines
8572    // the value class.
8573    if (OperandVT == MVT::ppcf128) {
8574      Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8575                       DAG.getConstant(1, DL, MVT::i32));
8576      OperandVT = MVT::f64;
8577    }
8578  
8579    // Some checks may be represented as inversion of simpler check, for example
8580    // "inf|normal|subnormal|zero" => !"nan".
8581    bool IsInverted = false;
8582    if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
8583      IsInverted = true;
8584      Test = InvertedCheck;
8585    }
8586  
8587    // Floating-point type properties.
8588    EVT ScalarFloatVT = OperandVT.getScalarType();
8589    const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8590    const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8591    bool IsF80 = (ScalarFloatVT == MVT::f80);
8592  
8593    // Some checks can be implemented using float comparisons, if floating point
8594    // exceptions are ignored.
8595    if (Flags.hasNoFPExcept() &&
8596        isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
8597      ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8598      ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8599  
8600      if (std::optional<bool> IsCmp0 =
8601              isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
8602          IsCmp0 && (isCondCodeLegalOrCustom(
8603                        *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8604                        OperandVT.getScalarType().getSimpleVT()))) {
8605  
8606        // If denormals could be implicitly treated as 0, this is not equivalent
8607        // to a compare with 0 since it will also be true for denormals.
8608        return DAG.getSetCC(DL, ResultVT, Op,
8609                            DAG.getConstantFP(0.0, DL, OperandVT),
8610                            *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8611      }
8612  
8613      if (Test == fcNan &&
8614          isCondCodeLegalOrCustom(IsInverted ? ISD::SETO : ISD::SETUO,
8615                                  OperandVT.getScalarType().getSimpleVT())) {
8616        return DAG.getSetCC(DL, ResultVT, Op, Op,
8617                            IsInverted ? ISD::SETO : ISD::SETUO);
8618      }
8619  
8620      if (Test == fcInf &&
8621          isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
8622                                  OperandVT.getScalarType().getSimpleVT()) &&
8623          isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
8624        // isinf(x) --> fabs(x) == inf
8625        SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8626        SDValue Inf =
8627            DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8628        return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8629                            IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8630      }
8631    }
8632  
8633    // In the general case use integer operations.
8634    unsigned BitSize = OperandVT.getScalarSizeInBits();
8635    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8636    if (OperandVT.isVector())
8637      IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8638                               OperandVT.getVectorElementCount());
8639    SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8640  
8641    // Various masks.
8642    APInt SignBit = APInt::getSignMask(BitSize);
8643    APInt ValueMask = APInt::getSignedMaxValue(BitSize);     // All bits but sign.
8644    APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8645    const unsigned ExplicitIntBitInF80 = 63;
8646    APInt ExpMask = Inf;
8647    if (IsF80)
8648      ExpMask.clearBit(ExplicitIntBitInF80);
8649    APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8650    APInt QNaNBitMask =
8651        APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8652    APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8653  
8654    SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8655    SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8656    SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8657    SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8658    SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8659    SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8660  
8661    SDValue Res;
8662    const auto appendResult = [&](SDValue PartialRes) {
8663      if (PartialRes) {
8664        if (Res)
8665          Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8666        else
8667          Res = PartialRes;
8668      }
8669    };
8670  
8671    SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8672    const auto getIntBitIsSet = [&]() -> SDValue {
8673      if (!IntBitIsSetV) {
8674        APInt IntBitMask(BitSize, 0);
8675        IntBitMask.setBit(ExplicitIntBitInF80);
8676        SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8677        SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8678        IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8679      }
8680      return IntBitIsSetV;
8681    };
8682  
8683    // Split the value into sign bit and absolute value.
8684    SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8685    SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8686                                 DAG.getConstant(0.0, DL, IntVT), ISD::SETLT);
8687  
8688    // Tests that involve more than one class should be processed first.
8689    SDValue PartialRes;
8690  
8691    if (IsF80)
8692      ; // Detect finite numbers of f80 by checking individual classes because
8693        // they have different settings of the explicit integer bit.
8694    else if ((Test & fcFinite) == fcFinite) {
8695      // finite(V) ==> abs(V) < exp_mask
8696      PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8697      Test &= ~fcFinite;
8698    } else if ((Test & fcFinite) == fcPosFinite) {
8699      // finite(V) && V > 0 ==> V < exp_mask
8700      PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8701      Test &= ~fcPosFinite;
8702    } else if ((Test & fcFinite) == fcNegFinite) {
8703      // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8704      PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8705      PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8706      Test &= ~fcNegFinite;
8707    }
8708    appendResult(PartialRes);
8709  
8710    if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8711      // fcZero | fcSubnormal => test all exponent bits are 0
8712      // TODO: Handle sign bit specific cases
8713      if (PartialCheck == (fcZero | fcSubnormal)) {
8714        SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
8715        SDValue ExpIsZero =
8716            DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8717        appendResult(ExpIsZero);
8718        Test &= ~PartialCheck & fcAllFlags;
8719      }
8720    }
8721  
8722    // Check for individual classes.
8723  
8724    if (unsigned PartialCheck = Test & fcZero) {
8725      if (PartialCheck == fcPosZero)
8726        PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8727      else if (PartialCheck == fcZero)
8728        PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8729      else // ISD::fcNegZero
8730        PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8731      appendResult(PartialRes);
8732    }
8733  
8734    if (unsigned PartialCheck = Test & fcSubnormal) {
8735      // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8736      // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8737      SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8738      SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8739      SDValue VMinusOneV =
8740          DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8741      PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8742      if (PartialCheck == fcNegSubnormal)
8743        PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8744      appendResult(PartialRes);
8745    }
8746  
8747    if (unsigned PartialCheck = Test & fcInf) {
8748      if (PartialCheck == fcPosInf)
8749        PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
8750      else if (PartialCheck == fcInf)
8751        PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
8752      else { // ISD::fcNegInf
8753        APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8754        SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
8755        PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
8756      }
8757      appendResult(PartialRes);
8758    }
8759  
8760    if (unsigned PartialCheck = Test & fcNan) {
8761      APInt InfWithQnanBit = Inf | QNaNBitMask;
8762      SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
8763      if (PartialCheck == fcNan) {
8764        // isnan(V) ==> abs(V) > int(inf)
8765        PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8766        if (IsF80) {
8767          // Recognize unsupported values as NaNs for compatibility with glibc.
8768          // In them (exp(V)==0) == int_bit.
8769          SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
8770          SDValue ExpIsZero =
8771              DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8772          SDValue IsPseudo =
8773              DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
8774          PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
8775        }
8776      } else if (PartialCheck == fcQNan) {
8777        // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
8778        PartialRes =
8779            DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
8780      } else { // ISD::fcSNan
8781        // issignaling(V) ==> abs(V) > unsigned(Inf) &&
8782        //                    abs(V) < (unsigned(Inf) | quiet_bit)
8783        SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8784        SDValue IsNotQnan =
8785            DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
8786        PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
8787      }
8788      appendResult(PartialRes);
8789    }
8790  
8791    if (unsigned PartialCheck = Test & fcNormal) {
8792      // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
8793      APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8794      SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
8795      SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
8796      APInt ExpLimit = ExpMask - ExpLSB;
8797      SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
8798      PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
8799      if (PartialCheck == fcNegNormal)
8800        PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8801      else if (PartialCheck == fcPosNormal) {
8802        SDValue PosSignV =
8803            DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
8804        PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
8805      }
8806      if (IsF80)
8807        PartialRes =
8808            DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
8809      appendResult(PartialRes);
8810    }
8811  
8812    if (!Res)
8813      return DAG.getConstant(IsInverted, DL, ResultVT);
8814    if (IsInverted)
8815      Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
8816    return Res;
8817  }
8818  
8819  // Only expand vector types if we have the appropriate vector bit operations.
canExpandVectorCTPOP(const TargetLowering & TLI,EVT VT)8820  static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
8821    assert(VT.isVector() && "Expected vector type");
8822    unsigned Len = VT.getScalarSizeInBits();
8823    return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
8824           TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
8825           TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
8826           (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
8827           TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
8828  }
8829  
expandCTPOP(SDNode * Node,SelectionDAG & DAG) const8830  SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
8831    SDLoc dl(Node);
8832    EVT VT = Node->getValueType(0);
8833    EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8834    SDValue Op = Node->getOperand(0);
8835    unsigned Len = VT.getScalarSizeInBits();
8836    assert(VT.isInteger() && "CTPOP not implemented for this type.");
8837  
8838    // TODO: Add support for irregular type lengths.
8839    if (!(Len <= 128 && Len % 8 == 0))
8840      return SDValue();
8841  
8842    // Only expand vector types if we have the appropriate vector bit operations.
8843    if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
8844      return SDValue();
8845  
8846    // This is the "best" algorithm from
8847    // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8848    SDValue Mask55 =
8849        DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8850    SDValue Mask33 =
8851        DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8852    SDValue Mask0F =
8853        DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8854  
8855    // v = v - ((v >> 1) & 0x55555555...)
8856    Op = DAG.getNode(ISD::SUB, dl, VT, Op,
8857                     DAG.getNode(ISD::AND, dl, VT,
8858                                 DAG.getNode(ISD::SRL, dl, VT, Op,
8859                                             DAG.getConstant(1, dl, ShVT)),
8860                                 Mask55));
8861    // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8862    Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
8863                     DAG.getNode(ISD::AND, dl, VT,
8864                                 DAG.getNode(ISD::SRL, dl, VT, Op,
8865                                             DAG.getConstant(2, dl, ShVT)),
8866                                 Mask33));
8867    // v = (v + (v >> 4)) & 0x0F0F0F0F...
8868    Op = DAG.getNode(ISD::AND, dl, VT,
8869                     DAG.getNode(ISD::ADD, dl, VT, Op,
8870                                 DAG.getNode(ISD::SRL, dl, VT, Op,
8871                                             DAG.getConstant(4, dl, ShVT))),
8872                     Mask0F);
8873  
8874    if (Len <= 8)
8875      return Op;
8876  
8877    // Avoid the multiply if we only have 2 bytes to add.
8878    // TODO: Only doing this for scalars because vectors weren't as obviously
8879    // improved.
8880    if (Len == 16 && !VT.isVector()) {
8881      // v = (v + (v >> 8)) & 0x00FF;
8882      return DAG.getNode(ISD::AND, dl, VT,
8883                       DAG.getNode(ISD::ADD, dl, VT, Op,
8884                                   DAG.getNode(ISD::SRL, dl, VT, Op,
8885                                               DAG.getConstant(8, dl, ShVT))),
8886                       DAG.getConstant(0xFF, dl, VT));
8887    }
8888  
8889    // v = (v * 0x01010101...) >> (Len - 8)
8890    SDValue V;
8891    if (isOperationLegalOrCustomOrPromote(
8892            ISD::MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
8893      SDValue Mask01 =
8894          DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8895      V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
8896    } else {
8897      V = Op;
8898      for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
8899        SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
8900        V = DAG.getNode(ISD::ADD, dl, VT, V,
8901                        DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
8902      }
8903    }
8904    return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
8905  }
8906  
expandVPCTPOP(SDNode * Node,SelectionDAG & DAG) const8907  SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
8908    SDLoc dl(Node);
8909    EVT VT = Node->getValueType(0);
8910    EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8911    SDValue Op = Node->getOperand(0);
8912    SDValue Mask = Node->getOperand(1);
8913    SDValue VL = Node->getOperand(2);
8914    unsigned Len = VT.getScalarSizeInBits();
8915    assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
8916  
8917    // TODO: Add support for irregular type lengths.
8918    if (!(Len <= 128 && Len % 8 == 0))
8919      return SDValue();
8920  
8921    // This is same algorithm of expandCTPOP from
8922    // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8923    SDValue Mask55 =
8924        DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8925    SDValue Mask33 =
8926        DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8927    SDValue Mask0F =
8928        DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8929  
8930    SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
8931  
8932    // v = v - ((v >> 1) & 0x55555555...)
8933    Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
8934                       DAG.getNode(ISD::VP_SRL, dl, VT, Op,
8935                                   DAG.getConstant(1, dl, ShVT), Mask, VL),
8936                       Mask55, Mask, VL);
8937    Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
8938  
8939    // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8940    Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
8941    Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
8942                       DAG.getNode(ISD::VP_SRL, dl, VT, Op,
8943                                   DAG.getConstant(2, dl, ShVT), Mask, VL),
8944                       Mask33, Mask, VL);
8945    Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
8946  
8947    // v = (v + (v >> 4)) & 0x0F0F0F0F...
8948    Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
8949                       Mask, VL),
8950    Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
8951    Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
8952  
8953    if (Len <= 8)
8954      return Op;
8955  
8956    // v = (v * 0x01010101...) >> (Len - 8)
8957    SDValue V;
8958    if (isOperationLegalOrCustomOrPromote(
8959            ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
8960      SDValue Mask01 =
8961          DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8962      V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
8963    } else {
8964      V = Op;
8965      for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
8966        SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
8967        V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
8968                        DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
8969                        Mask, VL);
8970      }
8971    }
8972    return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
8973                       Mask, VL);
8974  }
8975  
expandCTLZ(SDNode * Node,SelectionDAG & DAG) const8976  SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
8977    SDLoc dl(Node);
8978    EVT VT = Node->getValueType(0);
8979    EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8980    SDValue Op = Node->getOperand(0);
8981    unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8982  
8983    // If the non-ZERO_UNDEF version is supported we can use that instead.
8984    if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
8985        isOperationLegalOrCustom(ISD::CTLZ, VT))
8986      return DAG.getNode(ISD::CTLZ, dl, VT, Op);
8987  
8988    // If the ZERO_UNDEF version is supported use that and handle the zero case.
8989    if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
8990      EVT SetCCVT =
8991          getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8992      SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
8993      SDValue Zero = DAG.getConstant(0, dl, VT);
8994      SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
8995      return DAG.getSelect(dl, VT, SrcIsZero,
8996                           DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
8997    }
8998  
8999    // Only expand vector types if we have the appropriate vector bit operations.
9000    // This includes the operations needed to expand CTPOP if it isn't supported.
9001    if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9002                          (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
9003                           !canExpandVectorCTPOP(*this, VT)) ||
9004                          !isOperationLegalOrCustom(ISD::SRL, VT) ||
9005                          !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
9006      return SDValue();
9007  
9008    // for now, we do this:
9009    // x = x | (x >> 1);
9010    // x = x | (x >> 2);
9011    // ...
9012    // x = x | (x >>16);
9013    // x = x | (x >>32); // for 64-bit input
9014    // return popcount(~x);
9015    //
9016    // Ref: "Hacker's Delight" by Henry Warren
9017    for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9018      SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9019      Op = DAG.getNode(ISD::OR, dl, VT, Op,
9020                       DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9021    }
9022    Op = DAG.getNOT(dl, Op, VT);
9023    return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9024  }
9025  
expandVPCTLZ(SDNode * Node,SelectionDAG & DAG) const9026  SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9027    SDLoc dl(Node);
9028    EVT VT = Node->getValueType(0);
9029    EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9030    SDValue Op = Node->getOperand(0);
9031    SDValue Mask = Node->getOperand(1);
9032    SDValue VL = Node->getOperand(2);
9033    unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9034  
9035    // do this:
9036    // x = x | (x >> 1);
9037    // x = x | (x >> 2);
9038    // ...
9039    // x = x | (x >>16);
9040    // x = x | (x >>32); // for 64-bit input
9041    // return popcount(~x);
9042    for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9043      SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9044      Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9045                       DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9046                       VL);
9047    }
9048    Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getConstant(-1, dl, VT), Mask,
9049                     VL);
9050    return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9051  }
9052  
CTTZTableLookup(SDNode * Node,SelectionDAG & DAG,const SDLoc & DL,EVT VT,SDValue Op,unsigned BitWidth) const9053  SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
9054                                          const SDLoc &DL, EVT VT, SDValue Op,
9055                                          unsigned BitWidth) const {
9056    if (BitWidth != 32 && BitWidth != 64)
9057      return SDValue();
9058    APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9059                                    : APInt(64, 0x0218A392CD3D5DBFULL);
9060    const DataLayout &TD = DAG.getDataLayout();
9061    MachinePointerInfo PtrInfo =
9062        MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
9063    unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9064    SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9065    SDValue Lookup = DAG.getNode(
9066        ISD::SRL, DL, VT,
9067        DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9068                    DAG.getConstant(DeBruijn, DL, VT)),
9069        DAG.getConstant(ShiftAmt, DL, VT));
9070    Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD));
9071  
9072    SmallVector<uint8_t> Table(BitWidth, 0);
9073    for (unsigned i = 0; i < BitWidth; i++) {
9074      APInt Shl = DeBruijn.shl(i);
9075      APInt Lshr = Shl.lshr(ShiftAmt);
9076      Table[Lshr.getZExtValue()] = i;
9077    }
9078  
9079    // Create a ConstantArray in Constant Pool
9080    auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9081    SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9082                                        TD.getPrefTypeAlign(CA->getType()));
9083    SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9084                                     DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9085                                     PtrInfo, MVT::i8);
9086    if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9087      return ExtLoad;
9088  
9089    EVT SetCCVT =
9090        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9091    SDValue Zero = DAG.getConstant(0, DL, VT);
9092    SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9093    return DAG.getSelect(DL, VT, SrcIsZero,
9094                         DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9095  }
9096  
expandCTTZ(SDNode * Node,SelectionDAG & DAG) const9097  SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9098    SDLoc dl(Node);
9099    EVT VT = Node->getValueType(0);
9100    SDValue Op = Node->getOperand(0);
9101    unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9102  
9103    // If the non-ZERO_UNDEF version is supported we can use that instead.
9104    if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9105        isOperationLegalOrCustom(ISD::CTTZ, VT))
9106      return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9107  
9108    // If the ZERO_UNDEF version is supported use that and handle the zero case.
9109    if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
9110      EVT SetCCVT =
9111          getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9112      SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9113      SDValue Zero = DAG.getConstant(0, dl, VT);
9114      SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9115      return DAG.getSelect(dl, VT, SrcIsZero,
9116                           DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9117    }
9118  
9119    // Only expand vector types if we have the appropriate vector bit operations.
9120    // This includes the operations needed to expand CTPOP if it isn't supported.
9121    if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9122                          (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
9123                           !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
9124                           !canExpandVectorCTPOP(*this, VT)) ||
9125                          !isOperationLegalOrCustom(ISD::SUB, VT) ||
9126                          !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
9127                          !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
9128      return SDValue();
9129  
9130    // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
9131    if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
9132        !isOperationLegal(ISD::CTLZ, VT))
9133      if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9134        return V;
9135  
9136    // for now, we use: { return popcount(~x & (x - 1)); }
9137    // unless the target has ctlz but not ctpop, in which case we use:
9138    // { return 32 - nlz(~x & (x-1)); }
9139    // Ref: "Hacker's Delight" by Henry Warren
9140    SDValue Tmp = DAG.getNode(
9141        ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9142        DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9143  
9144    // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9145    if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
9146      return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9147                         DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9148    }
9149  
9150    return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9151  }
9152  
expandVPCTTZ(SDNode * Node,SelectionDAG & DAG) const9153  SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9154    SDValue Op = Node->getOperand(0);
9155    SDValue Mask = Node->getOperand(1);
9156    SDValue VL = Node->getOperand(2);
9157    SDLoc dl(Node);
9158    EVT VT = Node->getValueType(0);
9159  
9160    // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9161    SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9162                              DAG.getConstant(-1, dl, VT), Mask, VL);
9163    SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9164                                   DAG.getConstant(1, dl, VT), Mask, VL);
9165    SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9166    return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9167  }
9168  
expandVPCTTZElements(SDNode * N,SelectionDAG & DAG) const9169  SDValue TargetLowering::expandVPCTTZElements(SDNode *N,
9170                                               SelectionDAG &DAG) const {
9171    // %cond = to_bool_vec %source
9172    // %splat = splat /*val=*/VL
9173    // %tz = step_vector
9174    // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9175    // %r = vp.reduce.umin %v
9176    SDLoc DL(N);
9177    SDValue Source = N->getOperand(0);
9178    SDValue Mask = N->getOperand(1);
9179    SDValue EVL = N->getOperand(2);
9180    EVT SrcVT = Source.getValueType();
9181    EVT ResVT = N->getValueType(0);
9182    EVT ResVecVT =
9183        EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9184  
9185    // Convert to boolean vector.
9186    if (SrcVT.getScalarType() != MVT::i1) {
9187      SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9188      SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9189                               SrcVT.getVectorElementCount());
9190      Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9191                           DAG.getCondCode(ISD::SETNE), Mask, EVL);
9192    }
9193  
9194    SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9195    SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9196    SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9197    SDValue Select =
9198        DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9199    return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9200  }
9201  
expandABS(SDNode * N,SelectionDAG & DAG,bool IsNegative) const9202  SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
9203                                    bool IsNegative) const {
9204    SDLoc dl(N);
9205    EVT VT = N->getValueType(0);
9206    SDValue Op = N->getOperand(0);
9207  
9208    // abs(x) -> smax(x,sub(0,x))
9209    if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9210        isOperationLegal(ISD::SMAX, VT)) {
9211      SDValue Zero = DAG.getConstant(0, dl, VT);
9212      Op = DAG.getFreeze(Op);
9213      return DAG.getNode(ISD::SMAX, dl, VT, Op,
9214                         DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9215    }
9216  
9217    // abs(x) -> umin(x,sub(0,x))
9218    if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9219        isOperationLegal(ISD::UMIN, VT)) {
9220      SDValue Zero = DAG.getConstant(0, dl, VT);
9221      Op = DAG.getFreeze(Op);
9222      return DAG.getNode(ISD::UMIN, dl, VT, Op,
9223                         DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9224    }
9225  
9226    // 0 - abs(x) -> smin(x, sub(0,x))
9227    if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9228        isOperationLegal(ISD::SMIN, VT)) {
9229      SDValue Zero = DAG.getConstant(0, dl, VT);
9230      Op = DAG.getFreeze(Op);
9231      return DAG.getNode(ISD::SMIN, dl, VT, Op,
9232                         DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9233    }
9234  
9235    // Only expand vector types if we have the appropriate vector operations.
9236    if (VT.isVector() &&
9237        (!isOperationLegalOrCustom(ISD::SRA, VT) ||
9238         (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9239         (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9240         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
9241      return SDValue();
9242  
9243    Op = DAG.getFreeze(Op);
9244    SDValue Shift = DAG.getNode(
9245        ISD::SRA, dl, VT, Op,
9246        DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9247    SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9248  
9249    // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9250    if (!IsNegative)
9251      return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9252  
9253    // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9254    return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9255  }
9256  
expandABD(SDNode * N,SelectionDAG & DAG) const9257  SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
9258    SDLoc dl(N);
9259    EVT VT = N->getValueType(0);
9260    SDValue LHS = DAG.getFreeze(N->getOperand(0));
9261    SDValue RHS = DAG.getFreeze(N->getOperand(1));
9262    bool IsSigned = N->getOpcode() == ISD::ABDS;
9263  
9264    // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9265    // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9266    unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9267    unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9268    if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9269      SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9270      SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9271      return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9272    }
9273  
9274    // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9275    if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9276      return DAG.getNode(ISD::OR, dl, VT,
9277                         DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9278                         DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9279  
9280    EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9281    ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
9282    SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9283  
9284    // Branchless expansion iff cmp result is allbits:
9285    // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9286    // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9287    if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9288      SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9289      SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9290      return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9291    }
9292  
9293    // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9294    // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9295    return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9296                         DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9297  }
9298  
expandAVG(SDNode * N,SelectionDAG & DAG) const9299  SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
9300    SDLoc dl(N);
9301    EVT VT = N->getValueType(0);
9302    SDValue LHS = N->getOperand(0);
9303    SDValue RHS = N->getOperand(1);
9304  
9305    unsigned Opc = N->getOpcode();
9306    bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9307    bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9308    unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9309    unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9310    unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9311    unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9312    assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
9313            Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9314           "Unknown AVG node");
9315  
9316    // If the operands are already extended, we can add+shift.
9317    bool IsExt =
9318        (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9319         DAG.ComputeNumSignBits(RHS) >= 2) ||
9320        (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9321         DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
9322    if (IsExt) {
9323      SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9324      if (!IsFloor)
9325        Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9326      return DAG.getNode(ShiftOpc, dl, VT, Sum,
9327                         DAG.getShiftAmountConstant(1, VT, dl));
9328    }
9329  
9330    // For scalars, see if we can efficiently extend/truncate to use add+shift.
9331    if (VT.isScalarInteger()) {
9332      unsigned BW = VT.getScalarSizeInBits();
9333      EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9334      if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9335        LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9336        RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9337        SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9338        if (!IsFloor)
9339          Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9340                            DAG.getConstant(1, dl, ExtVT));
9341        // Just use SRL as we will be truncating away the extended sign bits.
9342        Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9343                          DAG.getShiftAmountConstant(1, ExtVT, dl));
9344        return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9345      }
9346    }
9347  
9348    // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9349    // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9350    // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9351    // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9352    LHS = DAG.getFreeze(LHS);
9353    RHS = DAG.getFreeze(RHS);
9354    SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9355    SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9356    SDValue Shift =
9357        DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9358    return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9359  }
9360  
expandBSWAP(SDNode * N,SelectionDAG & DAG) const9361  SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
9362    SDLoc dl(N);
9363    EVT VT = N->getValueType(0);
9364    SDValue Op = N->getOperand(0);
9365  
9366    if (!VT.isSimple())
9367      return SDValue();
9368  
9369    EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9370    SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9371    switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9372    default:
9373      return SDValue();
9374    case MVT::i16:
9375      // Use a rotate by 8. This can be further expanded if necessary.
9376      return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9377    case MVT::i32:
9378      Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9379      Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9380                         DAG.getConstant(0xFF00, dl, VT));
9381      Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9382      Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9383      Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9384      Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9385      Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9386      Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9387      return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9388    case MVT::i64:
9389      Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9390      Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9391                         DAG.getConstant(255ULL<<8, dl, VT));
9392      Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9393      Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9394                         DAG.getConstant(255ULL<<16, dl, VT));
9395      Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9396      Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9397                         DAG.getConstant(255ULL<<24, dl, VT));
9398      Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9399      Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9400      Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9401                         DAG.getConstant(255ULL<<24, dl, VT));
9402      Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9403      Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9404                         DAG.getConstant(255ULL<<16, dl, VT));
9405      Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9406      Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9407                         DAG.getConstant(255ULL<<8, dl, VT));
9408      Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9409      Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9410      Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9411      Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9412      Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9413      Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9414      Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9415      return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9416    }
9417  }
9418  
expandVPBSWAP(SDNode * N,SelectionDAG & DAG) const9419  SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
9420    SDLoc dl(N);
9421    EVT VT = N->getValueType(0);
9422    SDValue Op = N->getOperand(0);
9423    SDValue Mask = N->getOperand(1);
9424    SDValue EVL = N->getOperand(2);
9425  
9426    if (!VT.isSimple())
9427      return SDValue();
9428  
9429    EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9430    SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9431    switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9432    default:
9433      return SDValue();
9434    case MVT::i16:
9435      Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9436                         Mask, EVL);
9437      Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9438                         Mask, EVL);
9439      return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9440    case MVT::i32:
9441      Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9442                         Mask, EVL);
9443      Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9444                         Mask, EVL);
9445      Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9446                         Mask, EVL);
9447      Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9448                         Mask, EVL);
9449      Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9450                         DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9451      Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9452                         Mask, EVL);
9453      Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9454      Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9455      return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9456    case MVT::i64:
9457      Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9458                         Mask, EVL);
9459      Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9460                         DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9461      Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9462                         Mask, EVL);
9463      Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9464                         DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9465      Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9466                         Mask, EVL);
9467      Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9468                         DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9469      Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9470                         Mask, EVL);
9471      Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9472                         Mask, EVL);
9473      Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9474                         DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9475      Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9476                         Mask, EVL);
9477      Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9478                         DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9479      Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9480                         Mask, EVL);
9481      Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9482                         DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9483      Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9484                         Mask, EVL);
9485      Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9486      Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9487      Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9488      Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9489      Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9490      Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9491      return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9492    }
9493  }
9494  
expandBITREVERSE(SDNode * N,SelectionDAG & DAG) const9495  SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9496    SDLoc dl(N);
9497    EVT VT = N->getValueType(0);
9498    SDValue Op = N->getOperand(0);
9499    EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9500    unsigned Sz = VT.getScalarSizeInBits();
9501  
9502    SDValue Tmp, Tmp2, Tmp3;
9503  
9504    // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9505    // and finally the i1 pairs.
9506    // TODO: We can easily support i4/i2 legal types if any target ever does.
9507    if (Sz >= 8 && isPowerOf2_32(Sz)) {
9508      // Create the masks - repeating the pattern every byte.
9509      APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9510      APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9511      APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9512  
9513      // BSWAP if the type is wider than a single byte.
9514      Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
9515  
9516      // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9517      Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9518      Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9519      Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9520      Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9521      Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9522  
9523      // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9524      Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9525      Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9526      Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9527      Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9528      Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9529  
9530      // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9531      Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9532      Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9533      Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9534      Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9535      Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9536      return Tmp;
9537    }
9538  
9539    Tmp = DAG.getConstant(0, dl, VT);
9540    for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9541      if (I < J)
9542        Tmp2 =
9543            DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
9544      else
9545        Tmp2 =
9546            DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
9547  
9548      APInt Shift = APInt::getOneBitSet(Sz, J);
9549      Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9550      Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9551    }
9552  
9553    return Tmp;
9554  }
9555  
expandVPBITREVERSE(SDNode * N,SelectionDAG & DAG) const9556  SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9557    assert(N->getOpcode() == ISD::VP_BITREVERSE);
9558  
9559    SDLoc dl(N);
9560    EVT VT = N->getValueType(0);
9561    SDValue Op = N->getOperand(0);
9562    SDValue Mask = N->getOperand(1);
9563    SDValue EVL = N->getOperand(2);
9564    EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9565    unsigned Sz = VT.getScalarSizeInBits();
9566  
9567    SDValue Tmp, Tmp2, Tmp3;
9568  
9569    // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9570    // and finally the i1 pairs.
9571    // TODO: We can easily support i4/i2 legal types if any target ever does.
9572    if (Sz >= 8 && isPowerOf2_32(Sz)) {
9573      // Create the masks - repeating the pattern every byte.
9574      APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9575      APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9576      APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9577  
9578      // BSWAP if the type is wider than a single byte.
9579      Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
9580  
9581      // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9582      Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9583                         Mask, EVL);
9584      Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9585                         DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9586      Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9587                         Mask, EVL);
9588      Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9589                         Mask, EVL);
9590      Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9591  
9592      // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9593      Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9594                         Mask, EVL);
9595      Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9596                         DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9597      Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9598                         Mask, EVL);
9599      Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9600                         Mask, EVL);
9601      Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9602  
9603      // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9604      Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9605                         Mask, EVL);
9606      Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9607                         DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9608      Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9609                         Mask, EVL);
9610      Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9611                         Mask, EVL);
9612      Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9613      return Tmp;
9614    }
9615    return SDValue();
9616  }
9617  
9618  std::pair<SDValue, SDValue>
scalarizeVectorLoad(LoadSDNode * LD,SelectionDAG & DAG) const9619  TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
9620                                      SelectionDAG &DAG) const {
9621    SDLoc SL(LD);
9622    SDValue Chain = LD->getChain();
9623    SDValue BasePTR = LD->getBasePtr();
9624    EVT SrcVT = LD->getMemoryVT();
9625    EVT DstVT = LD->getValueType(0);
9626    ISD::LoadExtType ExtType = LD->getExtensionType();
9627  
9628    if (SrcVT.isScalableVector())
9629      report_fatal_error("Cannot scalarize scalable vector loads");
9630  
9631    unsigned NumElem = SrcVT.getVectorNumElements();
9632  
9633    EVT SrcEltVT = SrcVT.getScalarType();
9634    EVT DstEltVT = DstVT.getScalarType();
9635  
9636    // A vector must always be stored in memory as-is, i.e. without any padding
9637    // between the elements, since various code depend on it, e.g. in the
9638    // handling of a bitcast of a vector type to int, which may be done with a
9639    // vector store followed by an integer load. A vector that does not have
9640    // elements that are byte-sized must therefore be stored as an integer
9641    // built out of the extracted vector elements.
9642    if (!SrcEltVT.isByteSized()) {
9643      unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9644      EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9645  
9646      unsigned NumSrcBits = SrcVT.getSizeInBits();
9647      EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9648  
9649      unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9650      SDValue SrcEltBitMask = DAG.getConstant(
9651          APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9652  
9653      // Load the whole vector and avoid masking off the top bits as it makes
9654      // the codegen worse.
9655      SDValue Load =
9656          DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9657                         LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
9658                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
9659  
9660      SmallVector<SDValue, 8> Vals;
9661      for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9662        unsigned ShiftIntoIdx =
9663            (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9664        SDValue ShiftAmount = DAG.getShiftAmountConstant(
9665            ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
9666        SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
9667        SDValue Elt =
9668            DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
9669        SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
9670  
9671        if (ExtType != ISD::NON_EXTLOAD) {
9672          unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
9673          Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
9674        }
9675  
9676        Vals.push_back(Scalar);
9677      }
9678  
9679      SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9680      return std::make_pair(Value, Load.getValue(1));
9681    }
9682  
9683    unsigned Stride = SrcEltVT.getSizeInBits() / 8;
9684    assert(SrcEltVT.isByteSized());
9685  
9686    SmallVector<SDValue, 8> Vals;
9687    SmallVector<SDValue, 8> LoadChains;
9688  
9689    for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9690      SDValue ScalarLoad =
9691          DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
9692                         LD->getPointerInfo().getWithOffset(Idx * Stride),
9693                         SrcEltVT, LD->getOriginalAlign(),
9694                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
9695  
9696      BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
9697  
9698      Vals.push_back(ScalarLoad.getValue(0));
9699      LoadChains.push_back(ScalarLoad.getValue(1));
9700    }
9701  
9702    SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
9703    SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9704  
9705    return std::make_pair(Value, NewChain);
9706  }
9707  
scalarizeVectorStore(StoreSDNode * ST,SelectionDAG & DAG) const9708  SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
9709                                               SelectionDAG &DAG) const {
9710    SDLoc SL(ST);
9711  
9712    SDValue Chain = ST->getChain();
9713    SDValue BasePtr = ST->getBasePtr();
9714    SDValue Value = ST->getValue();
9715    EVT StVT = ST->getMemoryVT();
9716  
9717    if (StVT.isScalableVector())
9718      report_fatal_error("Cannot scalarize scalable vector stores");
9719  
9720    // The type of the data we want to save
9721    EVT RegVT = Value.getValueType();
9722    EVT RegSclVT = RegVT.getScalarType();
9723  
9724    // The type of data as saved in memory.
9725    EVT MemSclVT = StVT.getScalarType();
9726  
9727    unsigned NumElem = StVT.getVectorNumElements();
9728  
9729    // A vector must always be stored in memory as-is, i.e. without any padding
9730    // between the elements, since various code depend on it, e.g. in the
9731    // handling of a bitcast of a vector type to int, which may be done with a
9732    // vector store followed by an integer load. A vector that does not have
9733    // elements that are byte-sized must therefore be stored as an integer
9734    // built out of the extracted vector elements.
9735    if (!MemSclVT.isByteSized()) {
9736      unsigned NumBits = StVT.getSizeInBits();
9737      EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
9738  
9739      SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
9740  
9741      for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9742        SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9743                                  DAG.getVectorIdxConstant(Idx, SL));
9744        SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
9745        SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
9746        unsigned ShiftIntoIdx =
9747            (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9748        SDValue ShiftAmount =
9749            DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
9750        SDValue ShiftedElt =
9751            DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
9752        CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
9753      }
9754  
9755      return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
9756                          ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9757                          ST->getAAInfo());
9758    }
9759  
9760    // Store Stride in bytes
9761    unsigned Stride = MemSclVT.getSizeInBits() / 8;
9762    assert(Stride && "Zero stride!");
9763    // Extract each of the elements from the original vector and save them into
9764    // memory individually.
9765    SmallVector<SDValue, 8> Stores;
9766    for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9767      SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9768                                DAG.getVectorIdxConstant(Idx, SL));
9769  
9770      SDValue Ptr =
9771          DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
9772  
9773      // This scalar TruncStore may be illegal, but we legalize it later.
9774      SDValue Store = DAG.getTruncStore(
9775          Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
9776          MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9777          ST->getAAInfo());
9778  
9779      Stores.push_back(Store);
9780    }
9781  
9782    return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
9783  }
9784  
9785  std::pair<SDValue, SDValue>
expandUnalignedLoad(LoadSDNode * LD,SelectionDAG & DAG) const9786  TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
9787    assert(LD->getAddressingMode() == ISD::UNINDEXED &&
9788           "unaligned indexed loads not implemented!");
9789    SDValue Chain = LD->getChain();
9790    SDValue Ptr = LD->getBasePtr();
9791    EVT VT = LD->getValueType(0);
9792    EVT LoadedVT = LD->getMemoryVT();
9793    SDLoc dl(LD);
9794    auto &MF = DAG.getMachineFunction();
9795  
9796    if (VT.isFloatingPoint() || VT.isVector()) {
9797      EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
9798      if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
9799        if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
9800            LoadedVT.isVector()) {
9801          // Scalarize the load and let the individual components be handled.
9802          return scalarizeVectorLoad(LD, DAG);
9803        }
9804  
9805        // Expand to a (misaligned) integer load of the same size,
9806        // then bitconvert to floating point or vector.
9807        SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
9808                                      LD->getMemOperand());
9809        SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
9810        if (LoadedVT != VT)
9811          Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
9812                               ISD::ANY_EXTEND, dl, VT, Result);
9813  
9814        return std::make_pair(Result, newLoad.getValue(1));
9815      }
9816  
9817      // Copy the value to a (aligned) stack slot using (unaligned) integer
9818      // loads and stores, then do a (aligned) load from the stack slot.
9819      MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
9820      unsigned LoadedBytes = LoadedVT.getStoreSize();
9821      unsigned RegBytes = RegVT.getSizeInBits() / 8;
9822      unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
9823  
9824      // Make sure the stack slot is also aligned for the register type.
9825      SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
9826      auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
9827      SmallVector<SDValue, 8> Stores;
9828      SDValue StackPtr = StackBase;
9829      unsigned Offset = 0;
9830  
9831      EVT PtrVT = Ptr.getValueType();
9832      EVT StackPtrVT = StackPtr.getValueType();
9833  
9834      SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
9835      SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
9836  
9837      // Do all but one copies using the full register width.
9838      for (unsigned i = 1; i < NumRegs; i++) {
9839        // Load one integer register's worth from the original location.
9840        SDValue Load = DAG.getLoad(
9841            RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
9842            LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
9843            LD->getAAInfo());
9844        // Follow the load with a store to the stack slot.  Remember the store.
9845        Stores.push_back(DAG.getStore(
9846            Load.getValue(1), dl, Load, StackPtr,
9847            MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
9848        // Increment the pointers.
9849        Offset += RegBytes;
9850  
9851        Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
9852        StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
9853      }
9854  
9855      // The last copy may be partial.  Do an extending load.
9856      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
9857                                    8 * (LoadedBytes - Offset));
9858      SDValue Load =
9859          DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
9860                         LD->getPointerInfo().getWithOffset(Offset), MemVT,
9861                         LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
9862                         LD->getAAInfo());
9863      // Follow the load with a store to the stack slot.  Remember the store.
9864      // On big-endian machines this requires a truncating store to ensure
9865      // that the bits end up in the right place.
9866      Stores.push_back(DAG.getTruncStore(
9867          Load.getValue(1), dl, Load, StackPtr,
9868          MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
9869  
9870      // The order of the stores doesn't matter - say it with a TokenFactor.
9871      SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9872  
9873      // Finally, perform the original load only redirected to the stack slot.
9874      Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
9875                            MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
9876                            LoadedVT);
9877  
9878      // Callers expect a MERGE_VALUES node.
9879      return std::make_pair(Load, TF);
9880    }
9881  
9882    assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
9883           "Unaligned load of unsupported type.");
9884  
9885    // Compute the new VT that is half the size of the old one.  This is an
9886    // integer MVT.
9887    unsigned NumBits = LoadedVT.getSizeInBits();
9888    EVT NewLoadedVT;
9889    NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
9890    NumBits >>= 1;
9891  
9892    Align Alignment = LD->getOriginalAlign();
9893    unsigned IncrementSize = NumBits / 8;
9894    ISD::LoadExtType HiExtType = LD->getExtensionType();
9895  
9896    // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
9897    if (HiExtType == ISD::NON_EXTLOAD)
9898      HiExtType = ISD::ZEXTLOAD;
9899  
9900    // Load the value in two parts
9901    SDValue Lo, Hi;
9902    if (DAG.getDataLayout().isLittleEndian()) {
9903      Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
9904                          NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9905                          LD->getAAInfo());
9906  
9907      Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9908      Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
9909                          LD->getPointerInfo().getWithOffset(IncrementSize),
9910                          NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9911                          LD->getAAInfo());
9912    } else {
9913      Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
9914                          NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9915                          LD->getAAInfo());
9916  
9917      Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9918      Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
9919                          LD->getPointerInfo().getWithOffset(IncrementSize),
9920                          NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9921                          LD->getAAInfo());
9922    }
9923  
9924    // aggregate the two parts
9925    SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
9926    SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
9927    Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
9928  
9929    SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
9930                               Hi.getValue(1));
9931  
9932    return std::make_pair(Result, TF);
9933  }
9934  
expandUnalignedStore(StoreSDNode * ST,SelectionDAG & DAG) const9935  SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
9936                                               SelectionDAG &DAG) const {
9937    assert(ST->getAddressingMode() == ISD::UNINDEXED &&
9938           "unaligned indexed stores not implemented!");
9939    SDValue Chain = ST->getChain();
9940    SDValue Ptr = ST->getBasePtr();
9941    SDValue Val = ST->getValue();
9942    EVT VT = Val.getValueType();
9943    Align Alignment = ST->getOriginalAlign();
9944    auto &MF = DAG.getMachineFunction();
9945    EVT StoreMemVT = ST->getMemoryVT();
9946  
9947    SDLoc dl(ST);
9948    if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
9949      EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
9950      if (isTypeLegal(intVT)) {
9951        if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
9952            StoreMemVT.isVector()) {
9953          // Scalarize the store and let the individual components be handled.
9954          SDValue Result = scalarizeVectorStore(ST, DAG);
9955          return Result;
9956        }
9957        // Expand to a bitconvert of the value to the integer type of the
9958        // same size, then a (misaligned) int store.
9959        // FIXME: Does not handle truncating floating point stores!
9960        SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
9961        Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
9962                              Alignment, ST->getMemOperand()->getFlags());
9963        return Result;
9964      }
9965      // Do a (aligned) store to a stack slot, then copy from the stack slot
9966      // to the final destination using (unaligned) integer loads and stores.
9967      MVT RegVT = getRegisterType(
9968          *DAG.getContext(),
9969          EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
9970      EVT PtrVT = Ptr.getValueType();
9971      unsigned StoredBytes = StoreMemVT.getStoreSize();
9972      unsigned RegBytes = RegVT.getSizeInBits() / 8;
9973      unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
9974  
9975      // Make sure the stack slot is also aligned for the register type.
9976      SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
9977      auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
9978  
9979      // Perform the original store, only redirected to the stack slot.
9980      SDValue Store = DAG.getTruncStore(
9981          Chain, dl, Val, StackPtr,
9982          MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
9983  
9984      EVT StackPtrVT = StackPtr.getValueType();
9985  
9986      SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
9987      SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
9988      SmallVector<SDValue, 8> Stores;
9989      unsigned Offset = 0;
9990  
9991      // Do all but one copies using the full register width.
9992      for (unsigned i = 1; i < NumRegs; i++) {
9993        // Load one integer register's worth from the stack slot.
9994        SDValue Load = DAG.getLoad(
9995            RegVT, dl, Store, StackPtr,
9996            MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
9997        // Store it to the final location.  Remember the store.
9998        Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
9999                                      ST->getPointerInfo().getWithOffset(Offset),
10000                                      ST->getOriginalAlign(),
10001                                      ST->getMemOperand()->getFlags()));
10002        // Increment the pointers.
10003        Offset += RegBytes;
10004        StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10005        Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10006      }
10007  
10008      // The last store may be partial.  Do a truncating store.  On big-endian
10009      // machines this requires an extending load from the stack slot to ensure
10010      // that the bits are in the right place.
10011      EVT LoadMemVT =
10012          EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10013  
10014      // Load from the stack slot.
10015      SDValue Load = DAG.getExtLoad(
10016          ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10017          MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10018  
10019      Stores.push_back(
10020          DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
10021                            ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10022                            ST->getOriginalAlign(),
10023                            ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10024      // The order of the stores doesn't matter - say it with a TokenFactor.
10025      SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10026      return Result;
10027    }
10028  
10029    assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10030           "Unaligned store of unknown type.");
10031    // Get the half-size VT
10032    EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10033    unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10034    unsigned IncrementSize = NumBits / 8;
10035  
10036    // Divide the stored value in two parts.
10037    SDValue ShiftAmount =
10038        DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10039    SDValue Lo = Val;
10040    // If Val is a constant, replace the upper bits with 0. The SRL will constant
10041    // fold and not use the upper bits. A smaller constant may be easier to
10042    // materialize.
10043    if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10044      Lo = DAG.getNode(
10045          ISD::AND, dl, VT, Lo,
10046          DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10047                          VT));
10048    SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10049  
10050    // Store the two parts
10051    SDValue Store1, Store2;
10052    Store1 = DAG.getTruncStore(Chain, dl,
10053                               DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10054                               Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10055                               ST->getMemOperand()->getFlags());
10056  
10057    Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10058    Store2 = DAG.getTruncStore(
10059        Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10060        ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10061        ST->getMemOperand()->getFlags(), ST->getAAInfo());
10062  
10063    SDValue Result =
10064        DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10065    return Result;
10066  }
10067  
10068  SDValue
IncrementMemoryAddress(SDValue Addr,SDValue Mask,const SDLoc & DL,EVT DataVT,SelectionDAG & DAG,bool IsCompressedMemory) const10069  TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
10070                                         const SDLoc &DL, EVT DataVT,
10071                                         SelectionDAG &DAG,
10072                                         bool IsCompressedMemory) const {
10073    SDValue Increment;
10074    EVT AddrVT = Addr.getValueType();
10075    EVT MaskVT = Mask.getValueType();
10076    assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10077           "Incompatible types of Data and Mask");
10078    if (IsCompressedMemory) {
10079      if (DataVT.isScalableVector())
10080        report_fatal_error(
10081            "Cannot currently handle compressed memory with scalable vectors");
10082      // Incrementing the pointer according to number of '1's in the mask.
10083      EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10084      SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10085      if (MaskIntVT.getSizeInBits() < 32) {
10086        MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10087        MaskIntVT = MVT::i32;
10088      }
10089  
10090      // Count '1's with POPCNT.
10091      Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10092      Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10093      // Scale is an element size in bytes.
10094      SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10095                                      AddrVT);
10096      Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10097    } else if (DataVT.isScalableVector()) {
10098      Increment = DAG.getVScale(DL, AddrVT,
10099                                APInt(AddrVT.getFixedSizeInBits(),
10100                                      DataVT.getStoreSize().getKnownMinValue()));
10101    } else
10102      Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10103  
10104    return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10105  }
10106  
clampDynamicVectorIndex(SelectionDAG & DAG,SDValue Idx,EVT VecVT,const SDLoc & dl,ElementCount SubEC)10107  static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
10108                                         EVT VecVT, const SDLoc &dl,
10109                                         ElementCount SubEC) {
10110    assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10111           "Cannot index a scalable vector within a fixed-width vector");
10112  
10113    unsigned NElts = VecVT.getVectorMinNumElements();
10114    unsigned NumSubElts = SubEC.getKnownMinValue();
10115    EVT IdxVT = Idx.getValueType();
10116  
10117    if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10118      // If this is a constant index and we know the value plus the number of the
10119      // elements in the subvector minus one is less than the minimum number of
10120      // elements then it's safe to return Idx.
10121      if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10122        if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10123          return Idx;
10124      SDValue VS =
10125          DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10126      unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10127      SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10128                                DAG.getConstant(NumSubElts, dl, IdxVT));
10129      return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10130    }
10131    if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10132      APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10133      return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10134                         DAG.getConstant(Imm, dl, IdxVT));
10135    }
10136    unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10137    return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10138                       DAG.getConstant(MaxIndex, dl, IdxVT));
10139  }
10140  
getVectorElementPointer(SelectionDAG & DAG,SDValue VecPtr,EVT VecVT,SDValue Index) const10141  SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
10142                                                  SDValue VecPtr, EVT VecVT,
10143                                                  SDValue Index) const {
10144    return getVectorSubVecPointer(
10145        DAG, VecPtr, VecVT,
10146        EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
10147        Index);
10148  }
10149  
getVectorSubVecPointer(SelectionDAG & DAG,SDValue VecPtr,EVT VecVT,EVT SubVecVT,SDValue Index) const10150  SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
10151                                                 SDValue VecPtr, EVT VecVT,
10152                                                 EVT SubVecVT,
10153                                                 SDValue Index) const {
10154    SDLoc dl(Index);
10155    // Make sure the index type is big enough to compute in.
10156    Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10157  
10158    EVT EltVT = VecVT.getVectorElementType();
10159  
10160    // Calculate the element offset and add it to the pointer.
10161    unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10162    assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10163           "Converting bits to bytes lost precision");
10164    assert(SubVecVT.getVectorElementType() == EltVT &&
10165           "Sub-vector must be a vector with matching element type");
10166    Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10167                                    SubVecVT.getVectorElementCount());
10168  
10169    EVT IdxVT = Index.getValueType();
10170    if (SubVecVT.isScalableVector())
10171      Index =
10172          DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10173                      DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10174  
10175    Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10176                        DAG.getConstant(EltSize, dl, IdxVT));
10177    return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10178  }
10179  
10180  //===----------------------------------------------------------------------===//
10181  // Implementation of Emulated TLS Model
10182  //===----------------------------------------------------------------------===//
10183  
LowerToTLSEmulatedModel(const GlobalAddressSDNode * GA,SelectionDAG & DAG) const10184  SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
10185                                                  SelectionDAG &DAG) const {
10186    // Access to address of TLS varialbe xyz is lowered to a function call:
10187    //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10188    EVT PtrVT = getPointerTy(DAG.getDataLayout());
10189    PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10190    SDLoc dl(GA);
10191  
10192    ArgListTy Args;
10193    ArgListEntry Entry;
10194    std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
10195    Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
10196    StringRef EmuTlsVarName(NameString);
10197    GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
10198    assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10199    Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
10200    Entry.Ty = VoidPtrType;
10201    Args.push_back(Entry);
10202  
10203    SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10204  
10205    TargetLowering::CallLoweringInfo CLI(DAG);
10206    CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10207    CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10208    std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10209  
10210    // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10211    // At last for X86 targets, maybe good for other targets too?
10212    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10213    MFI.setAdjustsStack(true); // Is this only for X86 target?
10214    MFI.setHasCalls(true);
10215  
10216    assert((GA->getOffset() == 0) &&
10217           "Emulated TLS must have zero offset in GlobalAddressSDNode");
10218    return CallResult.first;
10219  }
10220  
lowerCmpEqZeroToCtlzSrl(SDValue Op,SelectionDAG & DAG) const10221  SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
10222                                                  SelectionDAG &DAG) const {
10223    assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10224    if (!isCtlzFast())
10225      return SDValue();
10226    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10227    SDLoc dl(Op);
10228    if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10229      EVT VT = Op.getOperand(0).getValueType();
10230      SDValue Zext = Op.getOperand(0);
10231      if (VT.bitsLT(MVT::i32)) {
10232        VT = MVT::i32;
10233        Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10234      }
10235      unsigned Log2b = Log2_32(VT.getSizeInBits());
10236      SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10237      SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10238                                DAG.getConstant(Log2b, dl, MVT::i32));
10239      return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10240    }
10241    return SDValue();
10242  }
10243  
expandIntMINMAX(SDNode * Node,SelectionDAG & DAG) const10244  SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
10245    SDValue Op0 = Node->getOperand(0);
10246    SDValue Op1 = Node->getOperand(1);
10247    EVT VT = Op0.getValueType();
10248    EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10249    unsigned Opcode = Node->getOpcode();
10250    SDLoc DL(Node);
10251  
10252    // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10253    if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10254        getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10255      Op0 = DAG.getFreeze(Op0);
10256      SDValue Zero = DAG.getConstant(0, DL, VT);
10257      return DAG.getNode(ISD::SUB, DL, VT, Op0,
10258                         DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10259    }
10260  
10261    // umin(x,y) -> sub(x,usubsat(x,y))
10262    // TODO: Missing freeze(Op0)?
10263    if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10264        isOperationLegal(ISD::USUBSAT, VT)) {
10265      return DAG.getNode(ISD::SUB, DL, VT, Op0,
10266                         DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10267    }
10268  
10269    // umax(x,y) -> add(x,usubsat(y,x))
10270    // TODO: Missing freeze(Op0)?
10271    if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10272        isOperationLegal(ISD::USUBSAT, VT)) {
10273      return DAG.getNode(ISD::ADD, DL, VT, Op0,
10274                         DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10275    }
10276  
10277    // FIXME: Should really try to split the vector in case it's legal on a
10278    // subvector.
10279    if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10280      return DAG.UnrollVectorOp(Node);
10281  
10282    // Attempt to find an existing SETCC node that we can reuse.
10283    // TODO: Do we need a generic doesSETCCNodeExist?
10284    // TODO: Missing freeze(Op0)/freeze(Op1)?
10285    auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10286                           ISD::CondCode PrefCommuteCC,
10287                           ISD::CondCode AltCommuteCC) {
10288      SDVTList BoolVTList = DAG.getVTList(BoolVT);
10289      for (ISD::CondCode CC : {PrefCC, AltCC}) {
10290        if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10291                              {Op0, Op1, DAG.getCondCode(CC)})) {
10292          SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10293          return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10294        }
10295      }
10296      for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10297        if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10298                              {Op0, Op1, DAG.getCondCode(CC)})) {
10299          SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10300          return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10301        }
10302      }
10303      SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10304      return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10305    };
10306  
10307    // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10308    //                      -> Y = (A < B) ? B : A
10309    //                      -> Y = (A >= B) ? A : B
10310    //                      -> Y = (A <= B) ? B : A
10311    switch (Opcode) {
10312    case ISD::SMAX:
10313      return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10314    case ISD::SMIN:
10315      return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10316    case ISD::UMAX:
10317      return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10318    case ISD::UMIN:
10319      return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10320    }
10321  
10322    llvm_unreachable("How did we get here?");
10323  }
10324  
expandAddSubSat(SDNode * Node,SelectionDAG & DAG) const10325  SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
10326    unsigned Opcode = Node->getOpcode();
10327    SDValue LHS = Node->getOperand(0);
10328    SDValue RHS = Node->getOperand(1);
10329    EVT VT = LHS.getValueType();
10330    SDLoc dl(Node);
10331  
10332    assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10333    assert(VT.isInteger() && "Expected operands to be integers");
10334  
10335    // usub.sat(a, b) -> umax(a, b) - b
10336    if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10337      SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10338      return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10339    }
10340  
10341    // uadd.sat(a, b) -> umin(a, ~b) + b
10342    if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10343      SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10344      SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10345      return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10346    }
10347  
10348    unsigned OverflowOp;
10349    switch (Opcode) {
10350    case ISD::SADDSAT:
10351      OverflowOp = ISD::SADDO;
10352      break;
10353    case ISD::UADDSAT:
10354      OverflowOp = ISD::UADDO;
10355      break;
10356    case ISD::SSUBSAT:
10357      OverflowOp = ISD::SSUBO;
10358      break;
10359    case ISD::USUBSAT:
10360      OverflowOp = ISD::USUBO;
10361      break;
10362    default:
10363      llvm_unreachable("Expected method to receive signed or unsigned saturation "
10364                       "addition or subtraction node.");
10365    }
10366  
10367    // FIXME: Should really try to split the vector in case it's legal on a
10368    // subvector.
10369    if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10370      return DAG.UnrollVectorOp(Node);
10371  
10372    unsigned BitWidth = LHS.getScalarValueSizeInBits();
10373    EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10374    SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10375    SDValue SumDiff = Result.getValue(0);
10376    SDValue Overflow = Result.getValue(1);
10377    SDValue Zero = DAG.getConstant(0, dl, VT);
10378    SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10379  
10380    if (Opcode == ISD::UADDSAT) {
10381      if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10382        // (LHS + RHS) | OverflowMask
10383        SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10384        return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10385      }
10386      // Overflow ? 0xffff.... : (LHS + RHS)
10387      return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10388    }
10389  
10390    if (Opcode == ISD::USUBSAT) {
10391      if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10392        // (LHS - RHS) & ~OverflowMask
10393        SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10394        SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10395        return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10396      }
10397      // Overflow ? 0 : (LHS - RHS)
10398      return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10399    }
10400  
10401    if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10402      APInt MinVal = APInt::getSignedMinValue(BitWidth);
10403      APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
10404  
10405      KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10406      KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10407  
10408      // If either of the operand signs are known, then they are guaranteed to
10409      // only saturate in one direction. If non-negative they will saturate
10410      // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10411      //
10412      // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10413      // sign of 'y' has to be flipped.
10414  
10415      bool LHSIsNonNegative = KnownLHS.isNonNegative();
10416      bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10417                                                     : KnownRHS.isNegative();
10418      if (LHSIsNonNegative || RHSIsNonNegative) {
10419        SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10420        return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10421      }
10422  
10423      bool LHSIsNegative = KnownLHS.isNegative();
10424      bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10425                                                  : KnownRHS.isNonNegative();
10426      if (LHSIsNegative || RHSIsNegative) {
10427        SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10428        return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10429      }
10430    }
10431  
10432    // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10433    APInt MinVal = APInt::getSignedMinValue(BitWidth);
10434    SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10435    SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10436                                DAG.getConstant(BitWidth - 1, dl, VT));
10437    Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10438    return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10439  }
10440  
expandCMP(SDNode * Node,SelectionDAG & DAG) const10441  SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {
10442    unsigned Opcode = Node->getOpcode();
10443    SDValue LHS = Node->getOperand(0);
10444    SDValue RHS = Node->getOperand(1);
10445    EVT VT = LHS.getValueType();
10446    EVT ResVT = Node->getValueType(0);
10447    EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10448    SDLoc dl(Node);
10449  
10450    auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10451    auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10452    SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10453    SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10454  
10455    // We can't perform arithmetic on i1 values. Extending them would
10456    // probably result in worse codegen, so let's just use two selects instead.
10457    // Some targets are also just better off using selects rather than subtraction
10458    // because one of the conditions can be merged with one of the selects.
10459    // And finally, if we don't know the contents of high bits of a boolean value
10460    // we can't perform any arithmetic either.
10461    if (shouldExpandCmpUsingSelects() || BoolVT.getScalarSizeInBits() == 1 ||
10462        getBooleanContents(BoolVT) == UndefinedBooleanContent) {
10463      SDValue SelectZeroOrOne =
10464          DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10465                        DAG.getConstant(0, dl, ResVT));
10466      return DAG.getSelect(dl, ResVT, IsLT, DAG.getConstant(-1, dl, ResVT),
10467                           SelectZeroOrOne);
10468    }
10469  
10470    if (getBooleanContents(BoolVT) == ZeroOrNegativeOneBooleanContent)
10471      std::swap(IsGT, IsLT);
10472    return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10473                              ResVT);
10474  }
10475  
expandShlSat(SDNode * Node,SelectionDAG & DAG) const10476  SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
10477    unsigned Opcode = Node->getOpcode();
10478    bool IsSigned = Opcode == ISD::SSHLSAT;
10479    SDValue LHS = Node->getOperand(0);
10480    SDValue RHS = Node->getOperand(1);
10481    EVT VT = LHS.getValueType();
10482    SDLoc dl(Node);
10483  
10484    assert((Node->getOpcode() == ISD::SSHLSAT ||
10485            Node->getOpcode() == ISD::USHLSAT) &&
10486            "Expected a SHLSAT opcode");
10487    assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10488    assert(VT.isInteger() && "Expected operands to be integers");
10489  
10490    if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10491      return DAG.UnrollVectorOp(Node);
10492  
10493    // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10494  
10495    unsigned BW = VT.getScalarSizeInBits();
10496    EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10497    SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
10498    SDValue Orig =
10499        DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
10500  
10501    SDValue SatVal;
10502    if (IsSigned) {
10503      SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10504      SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10505      SDValue Cond =
10506          DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
10507      SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
10508    } else {
10509      SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10510    }
10511    SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
10512    return DAG.getSelect(dl, VT, Cond, SatVal, Result);
10513  }
10514  
forceExpandWideMUL(SelectionDAG & DAG,const SDLoc & dl,bool Signed,EVT WideVT,const SDValue LL,const SDValue LH,const SDValue RL,const SDValue RH,SDValue & Lo,SDValue & Hi) const10515  void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10516                                          bool Signed, EVT WideVT,
10517                                          const SDValue LL, const SDValue LH,
10518                                          const SDValue RL, const SDValue RH,
10519                                          SDValue &Lo, SDValue &Hi) const {
10520    // We can fall back to a libcall with an illegal type for the MUL if we
10521    // have a libcall big enough.
10522    // Also, we can fall back to a division in some cases, but that's a big
10523    // performance hit in the general case.
10524    RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10525    if (WideVT == MVT::i16)
10526      LC = RTLIB::MUL_I16;
10527    else if (WideVT == MVT::i32)
10528      LC = RTLIB::MUL_I32;
10529    else if (WideVT == MVT::i64)
10530      LC = RTLIB::MUL_I64;
10531    else if (WideVT == MVT::i128)
10532      LC = RTLIB::MUL_I128;
10533  
10534    if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10535      // We'll expand the multiplication by brute force because we have no other
10536      // options. This is a trivially-generalized version of the code from
10537      // Hacker's Delight (itself derived from Knuth's Algorithm M from section
10538      // 4.3.1).
10539      EVT VT = LL.getValueType();
10540      unsigned Bits = VT.getSizeInBits();
10541      unsigned HalfBits = Bits >> 1;
10542      SDValue Mask =
10543          DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10544      SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10545      SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10546  
10547      SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10548      SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10549  
10550      SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10551      SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10552      SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10553      SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10554  
10555      SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10556                              DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10557      SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10558      SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10559  
10560      SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10561                              DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10562      SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10563  
10564      SDValue W =
10565          DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10566                      DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10567      Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10568                       DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10569  
10570      Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10571                       DAG.getNode(ISD::ADD, dl, VT,
10572                                   DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10573                                   DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10574    } else {
10575      // Attempt a libcall.
10576      SDValue Ret;
10577      TargetLowering::MakeLibCallOptions CallOptions;
10578      CallOptions.setSExt(Signed);
10579      CallOptions.setIsPostTypeLegalization(true);
10580      if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10581        // Halves of WideVT are packed into registers in different order
10582        // depending on platform endianness. This is usually handled by
10583        // the C calling convention, but we can't defer to it in
10584        // the legalizer.
10585        SDValue Args[] = {LL, LH, RL, RH};
10586        Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10587      } else {
10588        SDValue Args[] = {LH, LL, RH, RL};
10589        Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10590      }
10591      assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10592             "Ret value is a collection of constituent nodes holding result.");
10593      if (DAG.getDataLayout().isLittleEndian()) {
10594        // Same as above.
10595        Lo = Ret.getOperand(0);
10596        Hi = Ret.getOperand(1);
10597      } else {
10598        Lo = Ret.getOperand(1);
10599        Hi = Ret.getOperand(0);
10600      }
10601    }
10602  }
10603  
forceExpandWideMUL(SelectionDAG & DAG,const SDLoc & dl,bool Signed,const SDValue LHS,const SDValue RHS,SDValue & Lo,SDValue & Hi) const10604  void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10605                                          bool Signed, const SDValue LHS,
10606                                          const SDValue RHS, SDValue &Lo,
10607                                          SDValue &Hi) const {
10608    EVT VT = LHS.getValueType();
10609    assert(RHS.getValueType() == VT && "Mismatching operand types");
10610  
10611    SDValue HiLHS;
10612    SDValue HiRHS;
10613    if (Signed) {
10614      // The high part is obtained by SRA'ing all but one of the bits of low
10615      // part.
10616      unsigned LoSize = VT.getFixedSizeInBits();
10617      HiLHS = DAG.getNode(
10618          ISD::SRA, dl, VT, LHS,
10619          DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10620      HiRHS = DAG.getNode(
10621          ISD::SRA, dl, VT, RHS,
10622          DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10623    } else {
10624      HiLHS = DAG.getConstant(0, dl, VT);
10625      HiRHS = DAG.getConstant(0, dl, VT);
10626    }
10627    EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10628    forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
10629  }
10630  
10631  SDValue
expandFixedPointMul(SDNode * Node,SelectionDAG & DAG) const10632  TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
10633    assert((Node->getOpcode() == ISD::SMULFIX ||
10634            Node->getOpcode() == ISD::UMULFIX ||
10635            Node->getOpcode() == ISD::SMULFIXSAT ||
10636            Node->getOpcode() == ISD::UMULFIXSAT) &&
10637           "Expected a fixed point multiplication opcode");
10638  
10639    SDLoc dl(Node);
10640    SDValue LHS = Node->getOperand(0);
10641    SDValue RHS = Node->getOperand(1);
10642    EVT VT = LHS.getValueType();
10643    unsigned Scale = Node->getConstantOperandVal(2);
10644    bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10645                       Node->getOpcode() == ISD::UMULFIXSAT);
10646    bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10647                   Node->getOpcode() == ISD::SMULFIXSAT);
10648    EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10649    unsigned VTSize = VT.getScalarSizeInBits();
10650  
10651    if (!Scale) {
10652      // [us]mul.fix(a, b, 0) -> mul(a, b)
10653      if (!Saturating) {
10654        if (isOperationLegalOrCustom(ISD::MUL, VT))
10655          return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10656      } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
10657        SDValue Result =
10658            DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10659        SDValue Product = Result.getValue(0);
10660        SDValue Overflow = Result.getValue(1);
10661        SDValue Zero = DAG.getConstant(0, dl, VT);
10662  
10663        APInt MinVal = APInt::getSignedMinValue(VTSize);
10664        APInt MaxVal = APInt::getSignedMaxValue(VTSize);
10665        SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10666        SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10667        // Xor the inputs, if resulting sign bit is 0 the product will be
10668        // positive, else negative.
10669        SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10670        SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
10671        Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
10672        return DAG.getSelect(dl, VT, Overflow, Result, Product);
10673      } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
10674        SDValue Result =
10675            DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10676        SDValue Product = Result.getValue(0);
10677        SDValue Overflow = Result.getValue(1);
10678  
10679        APInt MaxVal = APInt::getMaxValue(VTSize);
10680        SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10681        return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
10682      }
10683    }
10684  
10685    assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
10686           "Expected scale to be less than the number of bits if signed or at "
10687           "most the number of bits if unsigned.");
10688    assert(LHS.getValueType() == RHS.getValueType() &&
10689           "Expected both operands to be the same type");
10690  
10691    // Get the upper and lower bits of the result.
10692    SDValue Lo, Hi;
10693    unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10694    unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
10695    EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
10696    if (isOperationLegalOrCustom(LoHiOp, VT)) {
10697      SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
10698      Lo = Result.getValue(0);
10699      Hi = Result.getValue(1);
10700    } else if (isOperationLegalOrCustom(HiOp, VT)) {
10701      Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10702      Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
10703    } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
10704      // Try for a multiplication using a wider type.
10705      unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10706      SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
10707      SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
10708      SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
10709      Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
10710      SDValue Shifted =
10711          DAG.getNode(ISD::SRA, dl, WideVT, Res,
10712                      DAG.getShiftAmountConstant(VTSize, WideVT, dl));
10713      Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
10714    } else if (VT.isVector()) {
10715      return SDValue();
10716    } else {
10717      forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
10718    }
10719  
10720    if (Scale == VTSize)
10721      // Result is just the top half since we'd be shifting by the width of the
10722      // operand. Overflow impossible so this works for both UMULFIX and
10723      // UMULFIXSAT.
10724      return Hi;
10725  
10726    // The result will need to be shifted right by the scale since both operands
10727    // are scaled. The result is given to us in 2 halves, so we only want part of
10728    // both in the result.
10729    SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
10730                                 DAG.getShiftAmountConstant(Scale, VT, dl));
10731    if (!Saturating)
10732      return Result;
10733  
10734    if (!Signed) {
10735      // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
10736      // widened multiplication) aren't all zeroes.
10737  
10738      // Saturate to max if ((Hi >> Scale) != 0),
10739      // which is the same as if (Hi > ((1 << Scale) - 1))
10740      APInt MaxVal = APInt::getMaxValue(VTSize);
10741      SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
10742                                        dl, VT);
10743      Result = DAG.getSelectCC(dl, Hi, LowMask,
10744                               DAG.getConstant(MaxVal, dl, VT), Result,
10745                               ISD::SETUGT);
10746  
10747      return Result;
10748    }
10749  
10750    // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
10751    // widened multiplication) aren't all ones or all zeroes.
10752  
10753    SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
10754    SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
10755  
10756    if (Scale == 0) {
10757      SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
10758                                 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
10759      SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
10760      // Saturated to SatMin if wide product is negative, and SatMax if wide
10761      // product is positive ...
10762      SDValue Zero = DAG.getConstant(0, dl, VT);
10763      SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
10764                                                 ISD::SETLT);
10765      // ... but only if we overflowed.
10766      return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
10767    }
10768  
10769    //  We handled Scale==0 above so all the bits to examine is in Hi.
10770  
10771    // Saturate to max if ((Hi >> (Scale - 1)) > 0),
10772    // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
10773    SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
10774                                      dl, VT);
10775    Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
10776    // Saturate to min if (Hi >> (Scale - 1)) < -1),
10777    // which is the same as if (HI < (-1 << (Scale - 1))
10778    SDValue HighMask =
10779        DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
10780                        dl, VT);
10781    Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
10782    return Result;
10783  }
10784  
10785  SDValue
expandFixedPointDiv(unsigned Opcode,const SDLoc & dl,SDValue LHS,SDValue RHS,unsigned Scale,SelectionDAG & DAG) const10786  TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
10787                                      SDValue LHS, SDValue RHS,
10788                                      unsigned Scale, SelectionDAG &DAG) const {
10789    assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
10790            Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
10791           "Expected a fixed point division opcode");
10792  
10793    EVT VT = LHS.getValueType();
10794    bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
10795    bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
10796    EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10797  
10798    // If there is enough room in the type to upscale the LHS or downscale the
10799    // RHS before the division, we can perform it in this type without having to
10800    // resize. For signed operations, the LHS headroom is the number of
10801    // redundant sign bits, and for unsigned ones it is the number of zeroes.
10802    // The headroom for the RHS is the number of trailing zeroes.
10803    unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
10804                              : DAG.computeKnownBits(LHS).countMinLeadingZeros();
10805    unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
10806  
10807    // For signed saturating operations, we need to be able to detect true integer
10808    // division overflow; that is, when you have MIN / -EPS. However, this
10809    // is undefined behavior and if we emit divisions that could take such
10810    // values it may cause undesired behavior (arithmetic exceptions on x86, for
10811    // example).
10812    // Avoid this by requiring an extra bit so that we never get this case.
10813    // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
10814    // signed saturating division, we need to emit a whopping 32-bit division.
10815    if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
10816      return SDValue();
10817  
10818    unsigned LHSShift = std::min(LHSLead, Scale);
10819    unsigned RHSShift = Scale - LHSShift;
10820  
10821    // At this point, we know that if we shift the LHS up by LHSShift and the
10822    // RHS down by RHSShift, we can emit a regular division with a final scaling
10823    // factor of Scale.
10824  
10825    if (LHSShift)
10826      LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
10827                        DAG.getShiftAmountConstant(LHSShift, VT, dl));
10828    if (RHSShift)
10829      RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
10830                        DAG.getShiftAmountConstant(RHSShift, VT, dl));
10831  
10832    SDValue Quot;
10833    if (Signed) {
10834      // For signed operations, if the resulting quotient is negative and the
10835      // remainder is nonzero, subtract 1 from the quotient to round towards
10836      // negative infinity.
10837      SDValue Rem;
10838      // FIXME: Ideally we would always produce an SDIVREM here, but if the
10839      // type isn't legal, SDIVREM cannot be expanded. There is no reason why
10840      // we couldn't just form a libcall, but the type legalizer doesn't do it.
10841      if (isTypeLegal(VT) &&
10842          isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
10843        Quot = DAG.getNode(ISD::SDIVREM, dl,
10844                           DAG.getVTList(VT, VT),
10845                           LHS, RHS);
10846        Rem = Quot.getValue(1);
10847        Quot = Quot.getValue(0);
10848      } else {
10849        Quot = DAG.getNode(ISD::SDIV, dl, VT,
10850                           LHS, RHS);
10851        Rem = DAG.getNode(ISD::SREM, dl, VT,
10852                          LHS, RHS);
10853      }
10854      SDValue Zero = DAG.getConstant(0, dl, VT);
10855      SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
10856      SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
10857      SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
10858      SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
10859      SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
10860                                 DAG.getConstant(1, dl, VT));
10861      Quot = DAG.getSelect(dl, VT,
10862                           DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
10863                           Sub1, Quot);
10864    } else
10865      Quot = DAG.getNode(ISD::UDIV, dl, VT,
10866                         LHS, RHS);
10867  
10868    return Quot;
10869  }
10870  
expandUADDSUBO(SDNode * Node,SDValue & Result,SDValue & Overflow,SelectionDAG & DAG) const10871  void TargetLowering::expandUADDSUBO(
10872      SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10873    SDLoc dl(Node);
10874    SDValue LHS = Node->getOperand(0);
10875    SDValue RHS = Node->getOperand(1);
10876    bool IsAdd = Node->getOpcode() == ISD::UADDO;
10877  
10878    // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
10879    unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
10880    if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
10881      SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
10882      SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
10883                                      { LHS, RHS, CarryIn });
10884      Result = SDValue(NodeCarry.getNode(), 0);
10885      Overflow = SDValue(NodeCarry.getNode(), 1);
10886      return;
10887    }
10888  
10889    Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
10890                              LHS.getValueType(), LHS, RHS);
10891  
10892    EVT ResultType = Node->getValueType(1);
10893    EVT SetCCType = getSetCCResultType(
10894        DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
10895    SDValue SetCC;
10896    if (IsAdd && isOneConstant(RHS)) {
10897      // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
10898      // the live range of X. We assume comparing with 0 is cheap.
10899      // The general case (X + C) < C is not necessarily beneficial. Although we
10900      // reduce the live range of X, we may introduce the materialization of
10901      // constant C.
10902      SetCC =
10903          DAG.getSetCC(dl, SetCCType, Result,
10904                       DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
10905    } else if (IsAdd && isAllOnesConstant(RHS)) {
10906      // Special case: uaddo X, -1 overflows if X != 0.
10907      SetCC =
10908          DAG.getSetCC(dl, SetCCType, LHS,
10909                       DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
10910    } else {
10911      ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
10912      SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
10913    }
10914    Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
10915  }
10916  
expandSADDSUBO(SDNode * Node,SDValue & Result,SDValue & Overflow,SelectionDAG & DAG) const10917  void TargetLowering::expandSADDSUBO(
10918      SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10919    SDLoc dl(Node);
10920    SDValue LHS = Node->getOperand(0);
10921    SDValue RHS = Node->getOperand(1);
10922    bool IsAdd = Node->getOpcode() == ISD::SADDO;
10923  
10924    Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
10925                              LHS.getValueType(), LHS, RHS);
10926  
10927    EVT ResultType = Node->getValueType(1);
10928    EVT OType = getSetCCResultType(
10929        DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
10930  
10931    // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
10932    unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
10933    if (isOperationLegal(OpcSat, LHS.getValueType())) {
10934      SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
10935      SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
10936      Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
10937      return;
10938    }
10939  
10940    SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
10941  
10942    // For an addition, the result should be less than one of the operands (LHS)
10943    // if and only if the other operand (RHS) is negative, otherwise there will
10944    // be overflow.
10945    // For a subtraction, the result should be less than one of the operands
10946    // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
10947    // otherwise there will be overflow.
10948    SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
10949    SDValue ConditionRHS =
10950        DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
10951  
10952    Overflow = DAG.getBoolExtOrTrunc(
10953        DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
10954        ResultType, ResultType);
10955  }
10956  
expandMULO(SDNode * Node,SDValue & Result,SDValue & Overflow,SelectionDAG & DAG) const10957  bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
10958                                  SDValue &Overflow, SelectionDAG &DAG) const {
10959    SDLoc dl(Node);
10960    EVT VT = Node->getValueType(0);
10961    EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10962    SDValue LHS = Node->getOperand(0);
10963    SDValue RHS = Node->getOperand(1);
10964    bool isSigned = Node->getOpcode() == ISD::SMULO;
10965  
10966    // For power-of-two multiplications we can use a simpler shift expansion.
10967    if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
10968      const APInt &C = RHSC->getAPIntValue();
10969      // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
10970      if (C.isPowerOf2()) {
10971        // smulo(x, signed_min) is same as umulo(x, signed_min).
10972        bool UseArithShift = isSigned && !C.isMinSignedValue();
10973        SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
10974        Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
10975        Overflow = DAG.getSetCC(dl, SetCCVT,
10976            DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
10977                        dl, VT, Result, ShiftAmt),
10978            LHS, ISD::SETNE);
10979        return true;
10980      }
10981    }
10982  
10983    EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
10984    if (VT.isVector())
10985      WideVT =
10986          EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
10987  
10988    SDValue BottomHalf;
10989    SDValue TopHalf;
10990    static const unsigned Ops[2][3] =
10991        { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
10992          { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
10993    if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
10994      BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10995      TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
10996    } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
10997      BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
10998                               RHS);
10999      TopHalf = BottomHalf.getValue(1);
11000    } else if (isTypeLegal(WideVT)) {
11001      LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11002      RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11003      SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11004      BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11005      SDValue ShiftAmt =
11006          DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11007      TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11008                            DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11009    } else {
11010      if (VT.isVector())
11011        return false;
11012  
11013      forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11014    }
11015  
11016    Result = BottomHalf;
11017    if (isSigned) {
11018      SDValue ShiftAmt = DAG.getShiftAmountConstant(
11019          VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11020      SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11021      Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11022    } else {
11023      Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11024                              DAG.getConstant(0, dl, VT), ISD::SETNE);
11025    }
11026  
11027    // Truncate the result if SetCC returns a larger type than needed.
11028    EVT RType = Node->getValueType(1);
11029    if (RType.bitsLT(Overflow.getValueType()))
11030      Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11031  
11032    assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11033           "Unexpected result type for S/UMULO legalization");
11034    return true;
11035  }
11036  
expandVecReduce(SDNode * Node,SelectionDAG & DAG) const11037  SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
11038    SDLoc dl(Node);
11039    unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11040    SDValue Op = Node->getOperand(0);
11041    EVT VT = Op.getValueType();
11042  
11043    if (VT.isScalableVector())
11044      report_fatal_error(
11045          "Expanding reductions for scalable vectors is undefined.");
11046  
11047    // Try to use a shuffle reduction for power of two vectors.
11048    if (VT.isPow2VectorType()) {
11049      while (VT.getVectorNumElements() > 1) {
11050        EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11051        if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11052          break;
11053  
11054        SDValue Lo, Hi;
11055        std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11056        Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11057        VT = HalfVT;
11058      }
11059    }
11060  
11061    EVT EltVT = VT.getVectorElementType();
11062    unsigned NumElts = VT.getVectorNumElements();
11063  
11064    SmallVector<SDValue, 8> Ops;
11065    DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11066  
11067    SDValue Res = Ops[0];
11068    for (unsigned i = 1; i < NumElts; i++)
11069      Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11070  
11071    // Result type may be wider than element type.
11072    if (EltVT != Node->getValueType(0))
11073      Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11074    return Res;
11075  }
11076  
expandVecReduceSeq(SDNode * Node,SelectionDAG & DAG) const11077  SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
11078    SDLoc dl(Node);
11079    SDValue AccOp = Node->getOperand(0);
11080    SDValue VecOp = Node->getOperand(1);
11081    SDNodeFlags Flags = Node->getFlags();
11082  
11083    EVT VT = VecOp.getValueType();
11084    EVT EltVT = VT.getVectorElementType();
11085  
11086    if (VT.isScalableVector())
11087      report_fatal_error(
11088          "Expanding reductions for scalable vectors is undefined.");
11089  
11090    unsigned NumElts = VT.getVectorNumElements();
11091  
11092    SmallVector<SDValue, 8> Ops;
11093    DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11094  
11095    unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11096  
11097    SDValue Res = AccOp;
11098    for (unsigned i = 0; i < NumElts; i++)
11099      Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11100  
11101    return Res;
11102  }
11103  
expandREM(SDNode * Node,SDValue & Result,SelectionDAG & DAG) const11104  bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
11105                                 SelectionDAG &DAG) const {
11106    EVT VT = Node->getValueType(0);
11107    SDLoc dl(Node);
11108    bool isSigned = Node->getOpcode() == ISD::SREM;
11109    unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11110    unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11111    SDValue Dividend = Node->getOperand(0);
11112    SDValue Divisor = Node->getOperand(1);
11113    if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11114      SDVTList VTs = DAG.getVTList(VT, VT);
11115      Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11116      return true;
11117    }
11118    if (isOperationLegalOrCustom(DivOpc, VT)) {
11119      // X % Y -> X-X/Y*Y
11120      SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11121      SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11122      Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11123      return true;
11124    }
11125    return false;
11126  }
11127  
expandFP_TO_INT_SAT(SDNode * Node,SelectionDAG & DAG) const11128  SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
11129                                              SelectionDAG &DAG) const {
11130    bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11131    SDLoc dl(SDValue(Node, 0));
11132    SDValue Src = Node->getOperand(0);
11133  
11134    // DstVT is the result type, while SatVT is the size to which we saturate
11135    EVT SrcVT = Src.getValueType();
11136    EVT DstVT = Node->getValueType(0);
11137  
11138    EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11139    unsigned SatWidth = SatVT.getScalarSizeInBits();
11140    unsigned DstWidth = DstVT.getScalarSizeInBits();
11141    assert(SatWidth <= DstWidth &&
11142           "Expected saturation width smaller than result width");
11143  
11144    // Determine minimum and maximum integer values and their corresponding
11145    // floating-point values.
11146    APInt MinInt, MaxInt;
11147    if (IsSigned) {
11148      MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11149      MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11150    } else {
11151      MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11152      MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11153    }
11154  
11155    // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11156    // libcall emission cannot handle this. Large result types will fail.
11157    if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11158      Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11159      SrcVT = Src.getValueType();
11160    }
11161  
11162    APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
11163    APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
11164  
11165    APFloat::opStatus MinStatus =
11166        MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11167    APFloat::opStatus MaxStatus =
11168        MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11169    bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11170                               !(MaxStatus & APFloat::opStatus::opInexact);
11171  
11172    SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11173    SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11174  
11175    // If the integer bounds are exactly representable as floats and min/max are
11176    // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11177    // of comparisons and selects.
11178    bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11179                       isOperationLegal(ISD::FMAXNUM, SrcVT);
11180    if (AreExactFloatBounds && MinMaxLegal) {
11181      SDValue Clamped = Src;
11182  
11183      // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11184      Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11185      // Clamp by MaxFloat from above. NaN cannot occur.
11186      Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11187      // Convert clamped value to integer.
11188      SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11189                                    dl, DstVT, Clamped);
11190  
11191      // In the unsigned case we're done, because we mapped NaN to MinFloat,
11192      // which will cast to zero.
11193      if (!IsSigned)
11194        return FpToInt;
11195  
11196      // Otherwise, select 0 if Src is NaN.
11197      SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11198      EVT SetCCVT =
11199          getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11200      SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11201      return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11202    }
11203  
11204    SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11205    SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11206  
11207    // Result of direct conversion. The assumption here is that the operation is
11208    // non-trapping and it's fine to apply it to an out-of-range value if we
11209    // select it away later.
11210    SDValue FpToInt =
11211        DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11212  
11213    SDValue Select = FpToInt;
11214  
11215    EVT SetCCVT =
11216        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11217  
11218    // If Src ULT MinFloat, select MinInt. In particular, this also selects
11219    // MinInt if Src is NaN.
11220    SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11221    Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11222    // If Src OGT MaxFloat, select MaxInt.
11223    SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11224    Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11225  
11226    // In the unsigned case we are done, because we mapped NaN to MinInt, which
11227    // is already zero.
11228    if (!IsSigned)
11229      return Select;
11230  
11231    // Otherwise, select 0 if Src is NaN.
11232    SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11233    SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11234    return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11235  }
11236  
expandRoundInexactToOdd(EVT ResultVT,SDValue Op,const SDLoc & dl,SelectionDAG & DAG) const11237  SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op,
11238                                                  const SDLoc &dl,
11239                                                  SelectionDAG &DAG) const {
11240    EVT OperandVT = Op.getValueType();
11241    if (OperandVT.getScalarType() == ResultVT.getScalarType())
11242      return Op;
11243    EVT ResultIntVT = ResultVT.changeTypeToInteger();
11244    // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11245    // can induce double-rounding which may alter the results. We can
11246    // correct for this using a trick explained in: Boldo, Sylvie, and
11247    // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11248    // World Congress. 2005.
11249    unsigned BitSize = OperandVT.getScalarSizeInBits();
11250    EVT WideIntVT = OperandVT.changeTypeToInteger();
11251    SDValue OpAsInt = DAG.getBitcast(WideIntVT, Op);
11252    SDValue SignBit =
11253        DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,
11254                    DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));
11255    SDValue AbsWide;
11256    if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {
11257      AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11258    } else {
11259      SDValue ClearedSign = DAG.getNode(
11260          ISD::AND, dl, WideIntVT, OpAsInt,
11261          DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));
11262      AbsWide = DAG.getBitcast(OperandVT, ClearedSign);
11263    }
11264    SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);
11265    SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);
11266  
11267    // We can keep the narrow value as-is if narrowing was exact (no
11268    // rounding error), the wide value was NaN (the narrow value is also
11269    // NaN and should be preserved) or if we rounded to the odd value.
11270    SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);
11271    SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11272    SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11273    SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11274    EVT ResultIntVTCCVT = getSetCCResultType(
11275        DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11276    SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11277    // The result is already odd so we don't need to do anything.
11278    SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11279  
11280    EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11281                                         AbsWide.getValueType());
11282    // We keep results which are exact, odd or NaN.
11283    SDValue KeepNarrow =
11284        DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETUEQ);
11285    KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11286    // We morally performed a round-down if AbsNarrow is smaller than
11287    // AbsWide.
11288    SDValue NarrowIsRd =
11289        DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11290    // If the narrow value is odd or exact, pick it.
11291    // Otherwise, narrow is even and corresponds to either the rounded-up
11292    // or rounded-down value. If narrow is the rounded-down value, we want
11293    // the rounded-up value as it will be odd.
11294    SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11295    SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11296    Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11297    int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
11298    SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);
11299    SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);
11300    SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);
11301    Op = DAG.getNode(ISD::OR, dl, ResultIntVT, Op, SignBit);
11302    return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11303  }
11304  
expandFP_ROUND(SDNode * Node,SelectionDAG & DAG) const11305  SDValue TargetLowering::expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const {
11306    assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11307    SDValue Op = Node->getOperand(0);
11308    EVT VT = Node->getValueType(0);
11309    SDLoc dl(Node);
11310    if (VT.getScalarType() == MVT::bf16) {
11311      if (Node->getConstantOperandVal(1) == 1) {
11312        return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11313      }
11314      EVT OperandVT = Op.getValueType();
11315      SDValue IsNaN = DAG.getSetCC(
11316          dl,
11317          getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11318          Op, Op, ISD::SETUO);
11319  
11320      // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11321      // can induce double-rounding which may alter the results. We can
11322      // correct for this using a trick explained in: Boldo, Sylvie, and
11323      // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11324      // World Congress. 2005.
11325      EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11326      EVT I32 = F32.changeTypeToInteger();
11327      Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11328      Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11329  
11330      // Conversions should set NaN's quiet bit. This also prevents NaNs from
11331      // turning into infinities.
11332      SDValue NaN =
11333          DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11334  
11335      // Factor in the contribution of the low 16 bits.
11336      SDValue One = DAG.getConstant(1, dl, I32);
11337      SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11338                                DAG.getShiftAmountConstant(16, I32, dl));
11339      Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11340      SDValue RoundingBias =
11341          DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11342      SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11343  
11344      // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11345      // 0x80000000.
11346      Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11347  
11348      // Now that we have rounded, shift the bits into position.
11349      Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11350                       DAG.getShiftAmountConstant(16, I32, dl));
11351      Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11352      EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11353      Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11354      return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11355    }
11356    return SDValue();
11357  }
11358  
expandVectorSplice(SDNode * Node,SelectionDAG & DAG) const11359  SDValue TargetLowering::expandVectorSplice(SDNode *Node,
11360                                             SelectionDAG &DAG) const {
11361    assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11362    assert(Node->getValueType(0).isScalableVector() &&
11363           "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11364  
11365    EVT VT = Node->getValueType(0);
11366    SDValue V1 = Node->getOperand(0);
11367    SDValue V2 = Node->getOperand(1);
11368    int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11369    SDLoc DL(Node);
11370  
11371    // Expand through memory thusly:
11372    //  Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11373    //  Store V1, Ptr
11374    //  Store V2, Ptr + sizeof(V1)
11375    //  If (Imm < 0)
11376    //    TrailingElts = -Imm
11377    //    Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11378    //  else
11379    //    Ptr = Ptr + (Imm * sizeof(VT.Elt))
11380    //  Res = Load Ptr
11381  
11382    Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11383  
11384    EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
11385                                 VT.getVectorElementCount() * 2);
11386    SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11387    EVT PtrVT = StackPtr.getValueType();
11388    auto &MF = DAG.getMachineFunction();
11389    auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11390    auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11391  
11392    // Store the lo part of CONCAT_VECTORS(V1, V2)
11393    SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11394    // Store the hi part of CONCAT_VECTORS(V1, V2)
11395    SDValue OffsetToV2 = DAG.getVScale(
11396        DL, PtrVT,
11397        APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
11398    SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11399    SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11400  
11401    if (Imm >= 0) {
11402      // Load back the required element. getVectorElementPointer takes care of
11403      // clamping the index if it's out-of-bounds.
11404      StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11405      // Load the spliced result
11406      return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11407                         MachinePointerInfo::getUnknownStack(MF));
11408    }
11409  
11410    uint64_t TrailingElts = -Imm;
11411  
11412    // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11413    TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11414    SDValue TrailingBytes =
11415        DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11416  
11417    if (TrailingElts > VT.getVectorMinNumElements()) {
11418      SDValue VLBytes =
11419          DAG.getVScale(DL, PtrVT,
11420                        APInt(PtrVT.getFixedSizeInBits(),
11421                              VT.getStoreSize().getKnownMinValue()));
11422      TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11423    }
11424  
11425    // Calculate the start address of the spliced result.
11426    StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11427  
11428    // Load the spliced result
11429    return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11430                       MachinePointerInfo::getUnknownStack(MF));
11431  }
11432  
expandVECTOR_COMPRESS(SDNode * Node,SelectionDAG & DAG) const11433  SDValue TargetLowering::expandVECTOR_COMPRESS(SDNode *Node,
11434                                                SelectionDAG &DAG) const {
11435    SDLoc DL(Node);
11436    SDValue Vec = Node->getOperand(0);
11437    SDValue Mask = Node->getOperand(1);
11438    SDValue Passthru = Node->getOperand(2);
11439  
11440    EVT VecVT = Vec.getValueType();
11441    EVT ScalarVT = VecVT.getScalarType();
11442    EVT MaskVT = Mask.getValueType();
11443    EVT MaskScalarVT = MaskVT.getScalarType();
11444  
11445    // Needs to be handled by targets that have scalable vector types.
11446    if (VecVT.isScalableVector())
11447      report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11448  
11449    SDValue StackPtr = DAG.CreateStackTemporary(
11450        VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
11451    int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11452    MachinePointerInfo PtrInfo =
11453        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
11454  
11455    MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
11456    SDValue Chain = DAG.getEntryNode();
11457    SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
11458  
11459    bool HasPassthru = !Passthru.isUndef();
11460  
11461    // If we have a passthru vector, store it on the stack, overwrite the matching
11462    // positions and then re-write the last element that was potentially
11463    // overwritten even though mask[i] = false.
11464    if (HasPassthru)
11465      Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
11466  
11467    SDValue LastWriteVal;
11468    APInt PassthruSplatVal;
11469    bool IsSplatPassthru =
11470        ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
11471  
11472    if (IsSplatPassthru) {
11473      // As we do not know which position we wrote to last, we cannot simply
11474      // access that index from the passthru vector. So we first check if passthru
11475      // is a splat vector, to use any element ...
11476      LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
11477    } else if (HasPassthru) {
11478      // ... if it is not a splat vector, we need to get the passthru value at
11479      // position = popcount(mask) and re-load it from the stack before it is
11480      // overwritten in the loop below.
11481      SDValue Popcount = DAG.getNode(
11482          ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
11483      Popcount = DAG.getNode(ISD::ZERO_EXTEND, DL,
11484                             MaskVT.changeVectorElementType(ScalarVT), Popcount);
11485      Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, ScalarVT, Popcount);
11486      SDValue LastElmtPtr =
11487          getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
11488      LastWriteVal = DAG.getLoad(
11489          ScalarVT, DL, Chain, LastElmtPtr,
11490          MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11491      Chain = LastWriteVal.getValue(1);
11492    }
11493  
11494    unsigned NumElms = VecVT.getVectorNumElements();
11495    for (unsigned I = 0; I < NumElms; I++) {
11496      SDValue Idx = DAG.getVectorIdxConstant(I, DL);
11497  
11498      SDValue ValI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec, Idx);
11499      SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11500      Chain = DAG.getStore(
11501          Chain, DL, ValI, OutPtr,
11502          MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11503  
11504      // Get the mask value and add it to the current output position. This
11505      // either increments by 1 if MaskI is true or adds 0 otherwise.
11506      // Freeze in case we have poison/undef mask entries.
11507      SDValue MaskI = DAG.getFreeze(
11508          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskScalarVT, Mask, Idx));
11509      MaskI = DAG.getFreeze(MaskI);
11510      MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
11511      MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
11512      OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
11513  
11514      if (HasPassthru && I == NumElms - 1) {
11515        SDValue EndOfVector =
11516            DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
11517        SDValue AllLanesSelected =
11518            DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
11519        OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
11520        OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11521  
11522        // Re-write the last ValI if all lanes were selected. Otherwise,
11523        // overwrite the last write it with the passthru value.
11524        LastWriteVal =
11525            DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI, LastWriteVal);
11526        Chain = DAG.getStore(
11527            Chain, DL, LastWriteVal, OutPtr,
11528            MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
11529      }
11530    }
11531  
11532    return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
11533  }
11534  
LegalizeSetCCCondCode(SelectionDAG & DAG,EVT VT,SDValue & LHS,SDValue & RHS,SDValue & CC,SDValue Mask,SDValue EVL,bool & NeedInvert,const SDLoc & dl,SDValue & Chain,bool IsSignaling) const11535  bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
11536                                             SDValue &LHS, SDValue &RHS,
11537                                             SDValue &CC, SDValue Mask,
11538                                             SDValue EVL, bool &NeedInvert,
11539                                             const SDLoc &dl, SDValue &Chain,
11540                                             bool IsSignaling) const {
11541    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11542    MVT OpVT = LHS.getSimpleValueType();
11543    ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
11544    NeedInvert = false;
11545    assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11546    bool IsNonVP = !EVL;
11547    switch (TLI.getCondCodeAction(CCCode, OpVT)) {
11548    default:
11549      llvm_unreachable("Unknown condition code action!");
11550    case TargetLowering::Legal:
11551      // Nothing to do.
11552      break;
11553    case TargetLowering::Expand: {
11554      ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
11555      if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
11556        std::swap(LHS, RHS);
11557        CC = DAG.getCondCode(InvCC);
11558        return true;
11559      }
11560      // Swapping operands didn't work. Try inverting the condition.
11561      bool NeedSwap = false;
11562      InvCC = getSetCCInverse(CCCode, OpVT);
11563      if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
11564        // If inverting the condition is not enough, try swapping operands
11565        // on top of it.
11566        InvCC = ISD::getSetCCSwappedOperands(InvCC);
11567        NeedSwap = true;
11568      }
11569      if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
11570        CC = DAG.getCondCode(InvCC);
11571        NeedInvert = true;
11572        if (NeedSwap)
11573          std::swap(LHS, RHS);
11574        return true;
11575      }
11576  
11577      ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
11578      unsigned Opc = 0;
11579      switch (CCCode) {
11580      default:
11581        llvm_unreachable("Don't know how to expand this condition!");
11582      case ISD::SETUO:
11583        if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
11584          CC1 = ISD::SETUNE;
11585          CC2 = ISD::SETUNE;
11586          Opc = ISD::OR;
11587          break;
11588        }
11589        assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11590               "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11591        NeedInvert = true;
11592        [[fallthrough]];
11593      case ISD::SETO:
11594        assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11595               "If SETO is expanded, SETOEQ must be legal!");
11596        CC1 = ISD::SETOEQ;
11597        CC2 = ISD::SETOEQ;
11598        Opc = ISD::AND;
11599        break;
11600      case ISD::SETONE:
11601      case ISD::SETUEQ:
11602        // If the SETUO or SETO CC isn't legal, we might be able to use
11603        // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
11604        // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11605        // the operands.
11606        CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11607        if (!TLI.isCondCodeLegal(CC2, OpVT) &&
11608            (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
11609             TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
11610          CC1 = ISD::SETOGT;
11611          CC2 = ISD::SETOLT;
11612          Opc = ISD::OR;
11613          NeedInvert = ((unsigned)CCCode & 0x8U);
11614          break;
11615        }
11616        [[fallthrough]];
11617      case ISD::SETOEQ:
11618      case ISD::SETOGT:
11619      case ISD::SETOGE:
11620      case ISD::SETOLT:
11621      case ISD::SETOLE:
11622      case ISD::SETUNE:
11623      case ISD::SETUGT:
11624      case ISD::SETUGE:
11625      case ISD::SETULT:
11626      case ISD::SETULE:
11627        // If we are floating point, assign and break, otherwise fall through.
11628        if (!OpVT.isInteger()) {
11629          // We can use the 4th bit to tell if we are the unordered
11630          // or ordered version of the opcode.
11631          CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11632          Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
11633          CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
11634          break;
11635        }
11636        // Fallthrough if we are unsigned integer.
11637        [[fallthrough]];
11638      case ISD::SETLE:
11639      case ISD::SETGT:
11640      case ISD::SETGE:
11641      case ISD::SETLT:
11642      case ISD::SETNE:
11643      case ISD::SETEQ:
11644        // If all combinations of inverting the condition and swapping operands
11645        // didn't work then we have no means to expand the condition.
11646        llvm_unreachable("Don't know how to expand this condition!");
11647      }
11648  
11649      SDValue SetCC1, SetCC2;
11650      if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
11651        // If we aren't the ordered or unorder operation,
11652        // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
11653        if (IsNonVP) {
11654          SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
11655          SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
11656        } else {
11657          SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
11658          SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
11659        }
11660      } else {
11661        // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
11662        if (IsNonVP) {
11663          SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
11664          SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
11665        } else {
11666          SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
11667          SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
11668        }
11669      }
11670      if (Chain)
11671        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
11672                            SetCC2.getValue(1));
11673      if (IsNonVP)
11674        LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
11675      else {
11676        // Transform the binary opcode to the VP equivalent.
11677        assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
11678        Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
11679        LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
11680      }
11681      RHS = SDValue();
11682      CC = SDValue();
11683      return true;
11684    }
11685    }
11686    return false;
11687  }
11688