xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (revision 73ff7384e025033abc98fd5437a48beb8077a90b)
1  //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This file implements the SelectionDAG::LegalizeVectors method.
10  //
11  // The vector legalizer looks for vector operations which might need to be
12  // scalarized and legalizes them. This is a separate step from Legalize because
13  // scalarizing can introduce illegal types.  For example, suppose we have an
14  // ISD::SDIV of type v2i64 on x86-32.  The type is legal (for example, addition
15  // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
16  // operation, which introduces nodes with the illegal type i64 which must be
17  // expanded.  Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
18  // the operation must be unrolled, which introduces nodes with the illegal
19  // type i8 which must be promoted.
20  //
21  // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
22  // or operations that happen to take a vector which are custom-lowered;
23  // the legalization for such operations never produces nodes
24  // with illegal types, so it's okay to put off legalizing them until
25  // SelectionDAG::Legalize runs.
26  //
27  //===----------------------------------------------------------------------===//
28  
29  #include "llvm/ADT/DenseMap.h"
30  #include "llvm/ADT/SmallVector.h"
31  #include "llvm/CodeGen/ISDOpcodes.h"
32  #include "llvm/CodeGen/MachineValueType.h"
33  #include "llvm/CodeGen/SelectionDAG.h"
34  #include "llvm/CodeGen/SelectionDAGNodes.h"
35  #include "llvm/CodeGen/TargetLowering.h"
36  #include "llvm/CodeGen/ValueTypes.h"
37  #include "llvm/IR/DataLayout.h"
38  #include "llvm/Support/Casting.h"
39  #include "llvm/Support/Compiler.h"
40  #include "llvm/Support/Debug.h"
41  #include "llvm/Support/ErrorHandling.h"
42  #include <cassert>
43  #include <cstdint>
44  #include <iterator>
45  #include <utility>
46  
47  using namespace llvm;
48  
49  #define DEBUG_TYPE "legalizevectorops"
50  
51  namespace {
52  
53  class VectorLegalizer {
54    SelectionDAG& DAG;
55    const TargetLowering &TLI;
56    bool Changed = false; // Keep track of whether anything changed
57  
58    /// For nodes that are of legal width, and that have more than one use, this
59    /// map indicates what regularized operand to use.  This allows us to avoid
60    /// legalizing the same thing more than once.
61    SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
62  
63    /// Adds a node to the translation cache.
64    void AddLegalizedOperand(SDValue From, SDValue To) {
65      LegalizedNodes.insert(std::make_pair(From, To));
66      // If someone requests legalization of the new node, return itself.
67      if (From != To)
68        LegalizedNodes.insert(std::make_pair(To, To));
69    }
70  
71    /// Legalizes the given node.
72    SDValue LegalizeOp(SDValue Op);
73  
74    /// Assuming the node is legal, "legalize" the results.
75    SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result);
76  
77    /// Make sure Results are legal and update the translation cache.
78    SDValue RecursivelyLegalizeResults(SDValue Op,
79                                       MutableArrayRef<SDValue> Results);
80  
81    /// Wrapper to interface LowerOperation with a vector of Results.
82    /// Returns false if the target wants to use default expansion. Otherwise
83    /// returns true. If return is true and the Results are empty, then the
84    /// target wants to keep the input node as is.
85    bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results);
86  
87    /// Implements unrolling a VSETCC.
88    SDValue UnrollVSETCC(SDNode *Node);
89  
90    /// Implement expand-based legalization of vector operations.
91    ///
92    /// This is just a high-level routine to dispatch to specific code paths for
93    /// operations to legalize them.
94    void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results);
95  
96    /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
97    /// FP_TO_SINT isn't legal.
98    void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
99  
100    /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
101    /// SINT_TO_FLOAT and SHR on vectors isn't legal.
102    void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
103  
104    /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
105    SDValue ExpandSEXTINREG(SDNode *Node);
106  
107    /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
108    ///
109    /// Shuffles the low lanes of the operand into place and bitcasts to the proper
110    /// type. The contents of the bits in the extended part of each element are
111    /// undef.
112    SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node);
113  
114    /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
115    ///
116    /// Shuffles the low lanes of the operand into place, bitcasts to the proper
117    /// type, then shifts left and arithmetic shifts right to introduce a sign
118    /// extension.
119    SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node);
120  
121    /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
122    ///
123    /// Shuffles the low lanes of the operand into place and blends zeros into
124    /// the remaining lanes, finally bitcasting to the proper type.
125    SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node);
126  
127    /// Expand bswap of vectors into a shuffle if legal.
128    SDValue ExpandBSWAP(SDNode *Node);
129  
130    /// Implement vselect in terms of XOR, AND, OR when blend is not
131    /// supported by the target.
132    SDValue ExpandVSELECT(SDNode *Node);
133    SDValue ExpandVP_SELECT(SDNode *Node);
134    SDValue ExpandVP_MERGE(SDNode *Node);
135    SDValue ExpandVP_REM(SDNode *Node);
136    SDValue ExpandSELECT(SDNode *Node);
137    std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
138    SDValue ExpandStore(SDNode *N);
139    SDValue ExpandFNEG(SDNode *Node);
140    void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
141    void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
142    void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results);
143    void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
144    void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
145    void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
146    void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
147    void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
148    void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
149  
150    void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
151  
152    /// Implements vector promotion.
153    ///
154    /// This is essentially just bitcasting the operands to a different type and
155    /// bitcasting the result back to the original type.
156    void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results);
157  
158    /// Implements [SU]INT_TO_FP vector promotion.
159    ///
160    /// This is a [zs]ext of the input operand to a larger integer type.
161    void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results);
162  
163    /// Implements FP_TO_[SU]INT vector promotion of the result type.
164    ///
165    /// It is promoted to a larger integer type.  The result is then
166    /// truncated back to the original type.
167    void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
168  
169  public:
170    VectorLegalizer(SelectionDAG& dag) :
171        DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
172  
173    /// Begin legalizer the vector operations in the DAG.
174    bool Run();
175  };
176  
177  } // end anonymous namespace
178  
179  bool VectorLegalizer::Run() {
180    // Before we start legalizing vector nodes, check if there are any vectors.
181    bool HasVectors = false;
182    for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
183         E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
184      // Check if the values of the nodes contain vectors. We don't need to check
185      // the operands because we are going to check their values at some point.
186      HasVectors = llvm::any_of(I->values(), [](EVT T) { return T.isVector(); });
187  
188      // If we found a vector node we can start the legalization.
189      if (HasVectors)
190        break;
191    }
192  
193    // If this basic block has no vectors then no need to legalize vectors.
194    if (!HasVectors)
195      return false;
196  
197    // The legalize process is inherently a bottom-up recursive process (users
198    // legalize their uses before themselves).  Given infinite stack space, we
199    // could just start legalizing on the root and traverse the whole graph.  In
200    // practice however, this causes us to run out of stack space on large basic
201    // blocks.  To avoid this problem, compute an ordering of the nodes where each
202    // node is only legalized after all of its operands are legalized.
203    DAG.AssignTopologicalOrder();
204    for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
205         E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
206      LegalizeOp(SDValue(&*I, 0));
207  
208    // Finally, it's possible the root changed.  Get the new root.
209    SDValue OldRoot = DAG.getRoot();
210    assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
211    DAG.setRoot(LegalizedNodes[OldRoot]);
212  
213    LegalizedNodes.clear();
214  
215    // Remove dead nodes now.
216    DAG.RemoveDeadNodes();
217  
218    return Changed;
219  }
220  
221  SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) {
222    assert(Op->getNumValues() == Result->getNumValues() &&
223           "Unexpected number of results");
224    // Generic legalization: just pass the operand through.
225    for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i)
226      AddLegalizedOperand(Op.getValue(i), SDValue(Result, i));
227    return SDValue(Result, Op.getResNo());
228  }
229  
230  SDValue
231  VectorLegalizer::RecursivelyLegalizeResults(SDValue Op,
232                                              MutableArrayRef<SDValue> Results) {
233    assert(Results.size() == Op->getNumValues() &&
234           "Unexpected number of results");
235    // Make sure that the generated code is itself legal.
236    for (unsigned i = 0, e = Results.size(); i != e; ++i) {
237      Results[i] = LegalizeOp(Results[i]);
238      AddLegalizedOperand(Op.getValue(i), Results[i]);
239    }
240  
241    return Results[Op.getResNo()];
242  }
243  
244  SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
245    // Note that LegalizeOp may be reentered even from single-use nodes, which
246    // means that we always must cache transformed nodes.
247    DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
248    if (I != LegalizedNodes.end()) return I->second;
249  
250    // Legalize the operands
251    SmallVector<SDValue, 8> Ops;
252    for (const SDValue &Oper : Op->op_values())
253      Ops.push_back(LegalizeOp(Oper));
254  
255    SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops);
256  
257    bool HasVectorValueOrOp =
258        llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) ||
259        llvm::any_of(Node->op_values(),
260                     [](SDValue O) { return O.getValueType().isVector(); });
261    if (!HasVectorValueOrOp)
262      return TranslateLegalizeResults(Op, Node);
263  
264    TargetLowering::LegalizeAction Action = TargetLowering::Legal;
265    EVT ValVT;
266    switch (Op.getOpcode()) {
267    default:
268      return TranslateLegalizeResults(Op, Node);
269    case ISD::LOAD: {
270      LoadSDNode *LD = cast<LoadSDNode>(Node);
271      ISD::LoadExtType ExtType = LD->getExtensionType();
272      EVT LoadedVT = LD->getMemoryVT();
273      if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD)
274        Action = TLI.getLoadExtAction(ExtType, LD->getValueType(0), LoadedVT);
275      break;
276    }
277    case ISD::STORE: {
278      StoreSDNode *ST = cast<StoreSDNode>(Node);
279      EVT StVT = ST->getMemoryVT();
280      MVT ValVT = ST->getValue().getSimpleValueType();
281      if (StVT.isVector() && ST->isTruncatingStore())
282        Action = TLI.getTruncStoreAction(ValVT, StVT);
283      break;
284    }
285    case ISD::MERGE_VALUES:
286      Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
287      // This operation lies about being legal: when it claims to be legal,
288      // it should actually be expanded.
289      if (Action == TargetLowering::Legal)
290        Action = TargetLowering::Expand;
291      break;
292  #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)               \
293    case ISD::STRICT_##DAGN:
294  #include "llvm/IR/ConstrainedOps.def"
295      ValVT = Node->getValueType(0);
296      if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
297          Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
298        ValVT = Node->getOperand(1).getValueType();
299      if (Op.getOpcode() == ISD::STRICT_FSETCC ||
300          Op.getOpcode() == ISD::STRICT_FSETCCS) {
301        MVT OpVT = Node->getOperand(1).getSimpleValueType();
302        ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(3))->get();
303        Action = TLI.getCondCodeAction(CCCode, OpVT);
304        if (Action == TargetLowering::Legal)
305          Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
306      } else {
307        Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
308      }
309      // If we're asked to expand a strict vector floating-point operation,
310      // by default we're going to simply unroll it.  That is usually the
311      // best approach, except in the case where the resulting strict (scalar)
312      // operations would themselves use the fallback mutation to non-strict.
313      // In that specific case, just do the fallback on the vector op.
314      if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
315          TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) ==
316              TargetLowering::Legal) {
317        EVT EltVT = ValVT.getVectorElementType();
318        if (TLI.getOperationAction(Node->getOpcode(), EltVT)
319            == TargetLowering::Expand &&
320            TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
321            == TargetLowering::Legal)
322          Action = TargetLowering::Legal;
323      }
324      break;
325    case ISD::ADD:
326    case ISD::SUB:
327    case ISD::MUL:
328    case ISD::MULHS:
329    case ISD::MULHU:
330    case ISD::SDIV:
331    case ISD::UDIV:
332    case ISD::SREM:
333    case ISD::UREM:
334    case ISD::SDIVREM:
335    case ISD::UDIVREM:
336    case ISD::FADD:
337    case ISD::FSUB:
338    case ISD::FMUL:
339    case ISD::FDIV:
340    case ISD::FREM:
341    case ISD::AND:
342    case ISD::OR:
343    case ISD::XOR:
344    case ISD::SHL:
345    case ISD::SRA:
346    case ISD::SRL:
347    case ISD::FSHL:
348    case ISD::FSHR:
349    case ISD::ROTL:
350    case ISD::ROTR:
351    case ISD::ABS:
352    case ISD::BSWAP:
353    case ISD::BITREVERSE:
354    case ISD::CTLZ:
355    case ISD::CTTZ:
356    case ISD::CTLZ_ZERO_UNDEF:
357    case ISD::CTTZ_ZERO_UNDEF:
358    case ISD::CTPOP:
359    case ISD::SELECT:
360    case ISD::VSELECT:
361    case ISD::SELECT_CC:
362    case ISD::ZERO_EXTEND:
363    case ISD::ANY_EXTEND:
364    case ISD::TRUNCATE:
365    case ISD::SIGN_EXTEND:
366    case ISD::FP_TO_SINT:
367    case ISD::FP_TO_UINT:
368    case ISD::FNEG:
369    case ISD::FABS:
370    case ISD::FMINNUM:
371    case ISD::FMAXNUM:
372    case ISD::FMINNUM_IEEE:
373    case ISD::FMAXNUM_IEEE:
374    case ISD::FMINIMUM:
375    case ISD::FMAXIMUM:
376    case ISD::FCOPYSIGN:
377    case ISD::FSQRT:
378    case ISD::FSIN:
379    case ISD::FCOS:
380    case ISD::FLDEXP:
381    case ISD::FPOWI:
382    case ISD::FPOW:
383    case ISD::FLOG:
384    case ISD::FLOG2:
385    case ISD::FLOG10:
386    case ISD::FEXP:
387    case ISD::FEXP2:
388    case ISD::FCEIL:
389    case ISD::FTRUNC:
390    case ISD::FRINT:
391    case ISD::FNEARBYINT:
392    case ISD::FROUND:
393    case ISD::FROUNDEVEN:
394    case ISD::FFLOOR:
395    case ISD::FP_ROUND:
396    case ISD::FP_EXTEND:
397    case ISD::FMA:
398    case ISD::SIGN_EXTEND_INREG:
399    case ISD::ANY_EXTEND_VECTOR_INREG:
400    case ISD::SIGN_EXTEND_VECTOR_INREG:
401    case ISD::ZERO_EXTEND_VECTOR_INREG:
402    case ISD::SMIN:
403    case ISD::SMAX:
404    case ISD::UMIN:
405    case ISD::UMAX:
406    case ISD::SMUL_LOHI:
407    case ISD::UMUL_LOHI:
408    case ISD::SADDO:
409    case ISD::UADDO:
410    case ISD::SSUBO:
411    case ISD::USUBO:
412    case ISD::SMULO:
413    case ISD::UMULO:
414    case ISD::FCANONICALIZE:
415    case ISD::FFREXP:
416    case ISD::SADDSAT:
417    case ISD::UADDSAT:
418    case ISD::SSUBSAT:
419    case ISD::USUBSAT:
420    case ISD::SSHLSAT:
421    case ISD::USHLSAT:
422    case ISD::FP_TO_SINT_SAT:
423    case ISD::FP_TO_UINT_SAT:
424    case ISD::MGATHER:
425      Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
426      break;
427    case ISD::SMULFIX:
428    case ISD::SMULFIXSAT:
429    case ISD::UMULFIX:
430    case ISD::UMULFIXSAT:
431    case ISD::SDIVFIX:
432    case ISD::SDIVFIXSAT:
433    case ISD::UDIVFIX:
434    case ISD::UDIVFIXSAT: {
435      unsigned Scale = Node->getConstantOperandVal(2);
436      Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
437                                                Node->getValueType(0), Scale);
438      break;
439    }
440    case ISD::SINT_TO_FP:
441    case ISD::UINT_TO_FP:
442    case ISD::VECREDUCE_ADD:
443    case ISD::VECREDUCE_MUL:
444    case ISD::VECREDUCE_AND:
445    case ISD::VECREDUCE_OR:
446    case ISD::VECREDUCE_XOR:
447    case ISD::VECREDUCE_SMAX:
448    case ISD::VECREDUCE_SMIN:
449    case ISD::VECREDUCE_UMAX:
450    case ISD::VECREDUCE_UMIN:
451    case ISD::VECREDUCE_FADD:
452    case ISD::VECREDUCE_FMUL:
453    case ISD::VECREDUCE_FMAX:
454    case ISD::VECREDUCE_FMIN:
455    case ISD::VECREDUCE_FMAXIMUM:
456    case ISD::VECREDUCE_FMINIMUM:
457      Action = TLI.getOperationAction(Node->getOpcode(),
458                                      Node->getOperand(0).getValueType());
459      break;
460    case ISD::VECREDUCE_SEQ_FADD:
461    case ISD::VECREDUCE_SEQ_FMUL:
462      Action = TLI.getOperationAction(Node->getOpcode(),
463                                      Node->getOperand(1).getValueType());
464      break;
465    case ISD::SETCC: {
466      MVT OpVT = Node->getOperand(0).getSimpleValueType();
467      ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
468      Action = TLI.getCondCodeAction(CCCode, OpVT);
469      if (Action == TargetLowering::Legal)
470        Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
471      break;
472    }
473  
474  #define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...)                          \
475    case ISD::VPID: {                                                            \
476      EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS))        \
477                                    : Node->getOperand(LEGALPOS).getValueType(); \
478      if (ISD::VPID == ISD::VP_SETCC) {                                          \
479        ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \
480        Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT());        \
481        if (Action != TargetLowering::Legal)                                     \
482          break;                                                                 \
483      }                                                                          \
484      Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT);            \
485    } break;
486  #include "llvm/IR/VPIntrinsics.def"
487    }
488  
489    LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
490  
491    SmallVector<SDValue, 8> ResultVals;
492    switch (Action) {
493    default: llvm_unreachable("This action is not supported yet!");
494    case TargetLowering::Promote:
495      assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) &&
496             "This action is not supported yet!");
497      LLVM_DEBUG(dbgs() << "Promoting\n");
498      Promote(Node, ResultVals);
499      assert(!ResultVals.empty() && "No results for promotion?");
500      break;
501    case TargetLowering::Legal:
502      LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
503      break;
504    case TargetLowering::Custom:
505      LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
506      if (LowerOperationWrapper(Node, ResultVals))
507        break;
508      LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
509      [[fallthrough]];
510    case TargetLowering::Expand:
511      LLVM_DEBUG(dbgs() << "Expanding\n");
512      Expand(Node, ResultVals);
513      break;
514    }
515  
516    if (ResultVals.empty())
517      return TranslateLegalizeResults(Op, Node);
518  
519    Changed = true;
520    return RecursivelyLegalizeResults(Op, ResultVals);
521  }
522  
523  // FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
524  // merge them somehow?
525  bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
526                                              SmallVectorImpl<SDValue> &Results) {
527    SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
528  
529    if (!Res.getNode())
530      return false;
531  
532    if (Res == SDValue(Node, 0))
533      return true;
534  
535    // If the original node has one result, take the return value from
536    // LowerOperation as is. It might not be result number 0.
537    if (Node->getNumValues() == 1) {
538      Results.push_back(Res);
539      return true;
540    }
541  
542    // If the original node has multiple results, then the return node should
543    // have the same number of results.
544    assert((Node->getNumValues() == Res->getNumValues()) &&
545           "Lowering returned the wrong number of results!");
546  
547    // Places new result values base on N result number.
548    for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I)
549      Results.push_back(Res.getValue(I));
550  
551    return true;
552  }
553  
554  void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
555    // For a few operations there is a specific concept for promotion based on
556    // the operand's type.
557    switch (Node->getOpcode()) {
558    case ISD::SINT_TO_FP:
559    case ISD::UINT_TO_FP:
560    case ISD::STRICT_SINT_TO_FP:
561    case ISD::STRICT_UINT_TO_FP:
562      // "Promote" the operation by extending the operand.
563      PromoteINT_TO_FP(Node, Results);
564      return;
565    case ISD::FP_TO_UINT:
566    case ISD::FP_TO_SINT:
567    case ISD::STRICT_FP_TO_UINT:
568    case ISD::STRICT_FP_TO_SINT:
569      // Promote the operation by extending the operand.
570      PromoteFP_TO_INT(Node, Results);
571      return;
572    case ISD::FP_ROUND:
573    case ISD::FP_EXTEND:
574      // These operations are used to do promotion so they can't be promoted
575      // themselves.
576      llvm_unreachable("Don't know how to promote this operation!");
577    }
578  
579    // There are currently two cases of vector promotion:
580    // 1) Bitcasting a vector of integers to a different type to a vector of the
581    //    same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
582    // 2) Extending a vector of floats to a vector of the same number of larger
583    //    floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
584    assert(Node->getNumValues() == 1 &&
585           "Can't promote a vector with multiple results!");
586    MVT VT = Node->getSimpleValueType(0);
587    MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
588    SDLoc dl(Node);
589    SmallVector<SDValue, 4> Operands(Node->getNumOperands());
590  
591    for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
592      if (Node->getOperand(j).getValueType().isVector())
593        if (Node->getOperand(j)
594                .getValueType()
595                .getVectorElementType()
596                .isFloatingPoint() &&
597            NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
598          Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
599        else
600          Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j));
601      else
602        Operands[j] = Node->getOperand(j);
603    }
604  
605    SDValue Res =
606        DAG.getNode(Node->getOpcode(), dl, NVT, Operands, Node->getFlags());
607  
608    if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
609        (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
610         NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
611      Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res,
612                        DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
613    else
614      Res = DAG.getNode(ISD::BITCAST, dl, VT, Res);
615  
616    Results.push_back(Res);
617  }
618  
619  void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node,
620                                         SmallVectorImpl<SDValue> &Results) {
621    // INT_TO_FP operations may require the input operand be promoted even
622    // when the type is otherwise legal.
623    bool IsStrict = Node->isStrictFPOpcode();
624    MVT VT = Node->getOperand(IsStrict ? 1 : 0).getSimpleValueType();
625    MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
626    assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
627           "Vectors have different number of elements!");
628  
629    SDLoc dl(Node);
630    SmallVector<SDValue, 4> Operands(Node->getNumOperands());
631  
632    unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP ||
633                    Node->getOpcode() == ISD::STRICT_UINT_TO_FP)
634                       ? ISD::ZERO_EXTEND
635                       : ISD::SIGN_EXTEND;
636    for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
637      if (Node->getOperand(j).getValueType().isVector())
638        Operands[j] = DAG.getNode(Opc, dl, NVT, Node->getOperand(j));
639      else
640        Operands[j] = Node->getOperand(j);
641    }
642  
643    if (IsStrict) {
644      SDValue Res = DAG.getNode(Node->getOpcode(), dl,
645                                {Node->getValueType(0), MVT::Other}, Operands);
646      Results.push_back(Res);
647      Results.push_back(Res.getValue(1));
648      return;
649    }
650  
651    SDValue Res =
652        DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Operands);
653    Results.push_back(Res);
654  }
655  
656  // For FP_TO_INT we promote the result type to a vector type with wider
657  // elements and then truncate the result.  This is different from the default
658  // PromoteVector which uses bitcast to promote thus assumning that the
659  // promoted vector type has the same overall size.
660  void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
661                                         SmallVectorImpl<SDValue> &Results) {
662    MVT VT = Node->getSimpleValueType(0);
663    MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
664    bool IsStrict = Node->isStrictFPOpcode();
665    assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
666           "Vectors have different number of elements!");
667  
668    unsigned NewOpc = Node->getOpcode();
669    // Change FP_TO_UINT to FP_TO_SINT if possible.
670    // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
671    if (NewOpc == ISD::FP_TO_UINT &&
672        TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
673      NewOpc = ISD::FP_TO_SINT;
674  
675    if (NewOpc == ISD::STRICT_FP_TO_UINT &&
676        TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
677      NewOpc = ISD::STRICT_FP_TO_SINT;
678  
679    SDLoc dl(Node);
680    SDValue Promoted, Chain;
681    if (IsStrict) {
682      Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
683                             {Node->getOperand(0), Node->getOperand(1)});
684      Chain = Promoted.getValue(1);
685    } else
686      Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0));
687  
688    // Assert that the converted value fits in the original type.  If it doesn't
689    // (eg: because the value being converted is too big), then the result of the
690    // original operation was undefined anyway, so the assert is still correct.
691    if (Node->getOpcode() == ISD::FP_TO_UINT ||
692        Node->getOpcode() == ISD::STRICT_FP_TO_UINT)
693      NewOpc = ISD::AssertZext;
694    else
695      NewOpc = ISD::AssertSext;
696  
697    Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted,
698                           DAG.getValueType(VT.getScalarType()));
699    Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
700    Results.push_back(Promoted);
701    if (IsStrict)
702      Results.push_back(Chain);
703  }
704  
705  std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
706    LoadSDNode *LD = cast<LoadSDNode>(N);
707    return TLI.scalarizeVectorLoad(LD, DAG);
708  }
709  
710  SDValue VectorLegalizer::ExpandStore(SDNode *N) {
711    StoreSDNode *ST = cast<StoreSDNode>(N);
712    SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
713    return TF;
714  }
715  
716  void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
717    switch (Node->getOpcode()) {
718    case ISD::LOAD: {
719      std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node);
720      Results.push_back(Tmp.first);
721      Results.push_back(Tmp.second);
722      return;
723    }
724    case ISD::STORE:
725      Results.push_back(ExpandStore(Node));
726      return;
727    case ISD::MERGE_VALUES:
728      for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
729        Results.push_back(Node->getOperand(i));
730      return;
731    case ISD::SIGN_EXTEND_INREG:
732      Results.push_back(ExpandSEXTINREG(Node));
733      return;
734    case ISD::ANY_EXTEND_VECTOR_INREG:
735      Results.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node));
736      return;
737    case ISD::SIGN_EXTEND_VECTOR_INREG:
738      Results.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node));
739      return;
740    case ISD::ZERO_EXTEND_VECTOR_INREG:
741      Results.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node));
742      return;
743    case ISD::BSWAP:
744      Results.push_back(ExpandBSWAP(Node));
745      return;
746    case ISD::VP_BSWAP:
747      Results.push_back(TLI.expandVPBSWAP(Node, DAG));
748      return;
749    case ISD::VSELECT:
750      Results.push_back(ExpandVSELECT(Node));
751      return;
752    case ISD::VP_SELECT:
753      Results.push_back(ExpandVP_SELECT(Node));
754      return;
755    case ISD::VP_SREM:
756    case ISD::VP_UREM:
757      if (SDValue Expanded = ExpandVP_REM(Node)) {
758        Results.push_back(Expanded);
759        return;
760      }
761      break;
762    case ISD::SELECT:
763      Results.push_back(ExpandSELECT(Node));
764      return;
765    case ISD::SELECT_CC: {
766      if (Node->getValueType(0).isScalableVector()) {
767        EVT CondVT = TLI.getSetCCResultType(
768            DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
769        SDValue SetCC =
770            DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0),
771                        Node->getOperand(1), Node->getOperand(4));
772        Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC,
773                                        Node->getOperand(2),
774                                        Node->getOperand(3)));
775        return;
776      }
777      break;
778    }
779    case ISD::FP_TO_UINT:
780      ExpandFP_TO_UINT(Node, Results);
781      return;
782    case ISD::UINT_TO_FP:
783      ExpandUINT_TO_FLOAT(Node, Results);
784      return;
785    case ISD::FNEG:
786      Results.push_back(ExpandFNEG(Node));
787      return;
788    case ISD::FSUB:
789      ExpandFSUB(Node, Results);
790      return;
791    case ISD::SETCC:
792    case ISD::VP_SETCC:
793      ExpandSETCC(Node, Results);
794      return;
795    case ISD::ABS:
796      if (SDValue Expanded = TLI.expandABS(Node, DAG)) {
797        Results.push_back(Expanded);
798        return;
799      }
800      break;
801    case ISD::ABDS:
802    case ISD::ABDU:
803      if (SDValue Expanded = TLI.expandABD(Node, DAG)) {
804        Results.push_back(Expanded);
805        return;
806      }
807      break;
808    case ISD::BITREVERSE:
809      ExpandBITREVERSE(Node, Results);
810      return;
811    case ISD::VP_BITREVERSE:
812      if (SDValue Expanded = TLI.expandVPBITREVERSE(Node, DAG)) {
813        Results.push_back(Expanded);
814        return;
815      }
816      break;
817    case ISD::CTPOP:
818      if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) {
819        Results.push_back(Expanded);
820        return;
821      }
822      break;
823    case ISD::VP_CTPOP:
824      if (SDValue Expanded = TLI.expandVPCTPOP(Node, DAG)) {
825        Results.push_back(Expanded);
826        return;
827      }
828      break;
829    case ISD::CTLZ:
830    case ISD::CTLZ_ZERO_UNDEF:
831      if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) {
832        Results.push_back(Expanded);
833        return;
834      }
835      break;
836    case ISD::VP_CTLZ:
837    case ISD::VP_CTLZ_ZERO_UNDEF:
838      if (SDValue Expanded = TLI.expandVPCTLZ(Node, DAG)) {
839        Results.push_back(Expanded);
840        return;
841      }
842      break;
843    case ISD::CTTZ:
844    case ISD::CTTZ_ZERO_UNDEF:
845      if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) {
846        Results.push_back(Expanded);
847        return;
848      }
849      break;
850    case ISD::VP_CTTZ:
851    case ISD::VP_CTTZ_ZERO_UNDEF:
852      if (SDValue Expanded = TLI.expandVPCTTZ(Node, DAG)) {
853        Results.push_back(Expanded);
854        return;
855      }
856      break;
857    case ISD::FSHL:
858    case ISD::VP_FSHL:
859    case ISD::FSHR:
860    case ISD::VP_FSHR:
861      if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) {
862        Results.push_back(Expanded);
863        return;
864      }
865      break;
866    case ISD::ROTL:
867    case ISD::ROTR:
868      if (SDValue Expanded = TLI.expandROT(Node, false /*AllowVectorOps*/, DAG)) {
869        Results.push_back(Expanded);
870        return;
871      }
872      break;
873    case ISD::FMINNUM:
874    case ISD::FMAXNUM:
875      if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) {
876        Results.push_back(Expanded);
877        return;
878      }
879      break;
880    case ISD::SMIN:
881    case ISD::SMAX:
882    case ISD::UMIN:
883    case ISD::UMAX:
884      if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) {
885        Results.push_back(Expanded);
886        return;
887      }
888      break;
889    case ISD::UADDO:
890    case ISD::USUBO:
891      ExpandUADDSUBO(Node, Results);
892      return;
893    case ISD::SADDO:
894    case ISD::SSUBO:
895      ExpandSADDSUBO(Node, Results);
896      return;
897    case ISD::UMULO:
898    case ISD::SMULO:
899      ExpandMULO(Node, Results);
900      return;
901    case ISD::USUBSAT:
902    case ISD::SSUBSAT:
903    case ISD::UADDSAT:
904    case ISD::SADDSAT:
905      if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) {
906        Results.push_back(Expanded);
907        return;
908      }
909      break;
910    case ISD::USHLSAT:
911    case ISD::SSHLSAT:
912      if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) {
913        Results.push_back(Expanded);
914        return;
915      }
916      break;
917    case ISD::FP_TO_SINT_SAT:
918    case ISD::FP_TO_UINT_SAT:
919      // Expand the fpsosisat if it is scalable to prevent it from unrolling below.
920      if (Node->getValueType(0).isScalableVector()) {
921        if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) {
922          Results.push_back(Expanded);
923          return;
924        }
925      }
926      break;
927    case ISD::SMULFIX:
928    case ISD::UMULFIX:
929      if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
930        Results.push_back(Expanded);
931        return;
932      }
933      break;
934    case ISD::SMULFIXSAT:
935    case ISD::UMULFIXSAT:
936      // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
937      // why. Maybe it results in worse codegen compared to the unroll for some
938      // targets? This should probably be investigated. And if we still prefer to
939      // unroll an explanation could be helpful.
940      break;
941    case ISD::SDIVFIX:
942    case ISD::UDIVFIX:
943      ExpandFixedPointDiv(Node, Results);
944      return;
945    case ISD::SDIVFIXSAT:
946    case ISD::UDIVFIXSAT:
947      break;
948  #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)               \
949    case ISD::STRICT_##DAGN:
950  #include "llvm/IR/ConstrainedOps.def"
951      ExpandStrictFPOp(Node, Results);
952      return;
953    case ISD::VECREDUCE_ADD:
954    case ISD::VECREDUCE_MUL:
955    case ISD::VECREDUCE_AND:
956    case ISD::VECREDUCE_OR:
957    case ISD::VECREDUCE_XOR:
958    case ISD::VECREDUCE_SMAX:
959    case ISD::VECREDUCE_SMIN:
960    case ISD::VECREDUCE_UMAX:
961    case ISD::VECREDUCE_UMIN:
962    case ISD::VECREDUCE_FADD:
963    case ISD::VECREDUCE_FMUL:
964    case ISD::VECREDUCE_FMAX:
965    case ISD::VECREDUCE_FMIN:
966    case ISD::VECREDUCE_FMAXIMUM:
967    case ISD::VECREDUCE_FMINIMUM:
968      Results.push_back(TLI.expandVecReduce(Node, DAG));
969      return;
970    case ISD::VECREDUCE_SEQ_FADD:
971    case ISD::VECREDUCE_SEQ_FMUL:
972      Results.push_back(TLI.expandVecReduceSeq(Node, DAG));
973      return;
974    case ISD::SREM:
975    case ISD::UREM:
976      ExpandREM(Node, Results);
977      return;
978    case ISD::VP_MERGE:
979      Results.push_back(ExpandVP_MERGE(Node));
980      return;
981    }
982  
983    SDValue Unrolled = DAG.UnrollVectorOp(Node);
984    for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
985      Results.push_back(Unrolled.getValue(I));
986  }
987  
988  SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
989    // Lower a select instruction where the condition is a scalar and the
990    // operands are vectors. Lower this select to VSELECT and implement it
991    // using XOR AND OR. The selector bit is broadcasted.
992    EVT VT = Node->getValueType(0);
993    SDLoc DL(Node);
994  
995    SDValue Mask = Node->getOperand(0);
996    SDValue Op1 = Node->getOperand(1);
997    SDValue Op2 = Node->getOperand(2);
998  
999    assert(VT.isVector() && !Mask.getValueType().isVector()
1000           && Op1.getValueType() == Op2.getValueType() && "Invalid type");
1001  
1002    // If we can't even use the basic vector operations of
1003    // AND,OR,XOR, we will have to scalarize the op.
1004    // Notice that the operation may be 'promoted' which means that it is
1005    // 'bitcasted' to another type which is handled.
1006    // Also, we need to be able to construct a splat vector using either
1007    // BUILD_VECTOR or SPLAT_VECTOR.
1008    // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
1009    // BUILD_VECTOR?
1010    if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
1011        TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
1012        TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
1013        TLI.getOperationAction(VT.isFixedLengthVector() ? ISD::BUILD_VECTOR
1014                                                        : ISD::SPLAT_VECTOR,
1015                               VT) == TargetLowering::Expand)
1016      return DAG.UnrollVectorOp(Node);
1017  
1018    // Generate a mask operand.
1019    EVT MaskTy = VT.changeVectorElementTypeToInteger();
1020  
1021    // What is the size of each element in the vector mask.
1022    EVT BitTy = MaskTy.getScalarType();
1023  
1024    Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy),
1025                         DAG.getConstant(0, DL, BitTy));
1026  
1027    // Broadcast the mask so that the entire vector is all one or all zero.
1028    Mask = DAG.getSplat(MaskTy, DL, Mask);
1029  
1030    // Bitcast the operands to be the same type as the mask.
1031    // This is needed when we select between FP types because
1032    // the mask is a vector of integers.
1033    Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
1034    Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
1035  
1036    SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy);
1037  
1038    Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
1039    Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
1040    SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
1041    return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
1042  }
1043  
1044  SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) {
1045    EVT VT = Node->getValueType(0);
1046  
1047    // Make sure that the SRA and SHL instructions are available.
1048    if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
1049        TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
1050      return DAG.UnrollVectorOp(Node);
1051  
1052    SDLoc DL(Node);
1053    EVT OrigTy = cast<VTSDNode>(Node->getOperand(1))->getVT();
1054  
1055    unsigned BW = VT.getScalarSizeInBits();
1056    unsigned OrigBW = OrigTy.getScalarSizeInBits();
1057    SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
1058  
1059    SDValue Op = DAG.getNode(ISD::SHL, DL, VT, Node->getOperand(0), ShiftSz);
1060    return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
1061  }
1062  
1063  // Generically expand a vector anyext in register to a shuffle of the relevant
1064  // lanes into the appropriate locations, with other lanes left undef.
1065  SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
1066    SDLoc DL(Node);
1067    EVT VT = Node->getValueType(0);
1068    int NumElements = VT.getVectorNumElements();
1069    SDValue Src = Node->getOperand(0);
1070    EVT SrcVT = Src.getValueType();
1071    int NumSrcElements = SrcVT.getVectorNumElements();
1072  
1073    // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1074    // into a larger vector type.
1075    if (SrcVT.bitsLE(VT)) {
1076      assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1077             "ANY_EXTEND_VECTOR_INREG vector size mismatch");
1078      NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1079      SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
1080                               NumSrcElements);
1081      Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT),
1082                        Src, DAG.getVectorIdxConstant(0, DL));
1083    }
1084  
1085    // Build a base mask of undef shuffles.
1086    SmallVector<int, 16> ShuffleMask;
1087    ShuffleMask.resize(NumSrcElements, -1);
1088  
1089    // Place the extended lanes into the correct locations.
1090    int ExtLaneScale = NumSrcElements / NumElements;
1091    int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1092    for (int i = 0; i < NumElements; ++i)
1093      ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
1094  
1095    return DAG.getNode(
1096        ISD::BITCAST, DL, VT,
1097        DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
1098  }
1099  
1100  SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) {
1101    SDLoc DL(Node);
1102    EVT VT = Node->getValueType(0);
1103    SDValue Src = Node->getOperand(0);
1104    EVT SrcVT = Src.getValueType();
1105  
1106    // First build an any-extend node which can be legalized above when we
1107    // recurse through it.
1108    SDValue Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src);
1109  
1110    // Now we need sign extend. Do this by shifting the elements. Even if these
1111    // aren't legal operations, they have a better chance of being legalized
1112    // without full scalarization than the sign extension does.
1113    unsigned EltWidth = VT.getScalarSizeInBits();
1114    unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
1115    SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
1116    return DAG.getNode(ISD::SRA, DL, VT,
1117                       DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
1118                       ShiftAmount);
1119  }
1120  
1121  // Generically expand a vector zext in register to a shuffle of the relevant
1122  // lanes into the appropriate locations, a blend of zero into the high bits,
1123  // and a bitcast to the wider element type.
1124  SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
1125    SDLoc DL(Node);
1126    EVT VT = Node->getValueType(0);
1127    int NumElements = VT.getVectorNumElements();
1128    SDValue Src = Node->getOperand(0);
1129    EVT SrcVT = Src.getValueType();
1130    int NumSrcElements = SrcVT.getVectorNumElements();
1131  
1132    // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1133    // into a larger vector type.
1134    if (SrcVT.bitsLE(VT)) {
1135      assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1136             "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
1137      NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1138      SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
1139                               NumSrcElements);
1140      Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT),
1141                        Src, DAG.getVectorIdxConstant(0, DL));
1142    }
1143  
1144    // Build up a zero vector to blend into this one.
1145    SDValue Zero = DAG.getConstant(0, DL, SrcVT);
1146  
1147    // Shuffle the incoming lanes into the correct position, and pull all other
1148    // lanes from the zero vector.
1149    auto ShuffleMask = llvm::to_vector<16>(llvm::seq<int>(0, NumSrcElements));
1150  
1151    int ExtLaneScale = NumSrcElements / NumElements;
1152    int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1153    for (int i = 0; i < NumElements; ++i)
1154      ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
1155  
1156    return DAG.getNode(ISD::BITCAST, DL, VT,
1157                       DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
1158  }
1159  
1160  static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
1161    int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
1162    for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
1163      for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
1164        ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
1165  }
1166  
1167  SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
1168    EVT VT = Node->getValueType(0);
1169  
1170    // Scalable vectors can't use shuffle expansion.
1171    if (VT.isScalableVector())
1172      return TLI.expandBSWAP(Node, DAG);
1173  
1174    // Generate a byte wise shuffle mask for the BSWAP.
1175    SmallVector<int, 16> ShuffleMask;
1176    createBSWAPShuffleMask(VT, ShuffleMask);
1177    EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
1178  
1179    // Only emit a shuffle if the mask is legal.
1180    if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {
1181      SDLoc DL(Node);
1182      SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
1183      Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
1184      return DAG.getNode(ISD::BITCAST, DL, VT, Op);
1185    }
1186  
1187    // If we have the appropriate vector bit operations, it is better to use them
1188    // than unrolling and expanding each component.
1189    if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
1190        TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
1191        TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
1192        TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
1193      return TLI.expandBSWAP(Node, DAG);
1194  
1195    // Otherwise unroll.
1196    return DAG.UnrollVectorOp(Node);
1197  }
1198  
1199  void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,
1200                                         SmallVectorImpl<SDValue> &Results) {
1201    EVT VT = Node->getValueType(0);
1202  
1203    // We can't unroll or use shuffles for scalable vectors.
1204    if (VT.isScalableVector()) {
1205      Results.push_back(TLI.expandBITREVERSE(Node, DAG));
1206      return;
1207    }
1208  
1209    // If we have the scalar operation, it's probably cheaper to unroll it.
1210    if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) {
1211      SDValue Tmp = DAG.UnrollVectorOp(Node);
1212      Results.push_back(Tmp);
1213      return;
1214    }
1215  
1216    // If the vector element width is a whole number of bytes, test if its legal
1217    // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
1218    // vector. This greatly reduces the number of bit shifts necessary.
1219    unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
1220    if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
1221      SmallVector<int, 16> BSWAPMask;
1222      createBSWAPShuffleMask(VT, BSWAPMask);
1223  
1224      EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
1225      if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
1226          (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
1227           (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
1228            TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
1229            TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
1230            TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
1231        SDLoc DL(Node);
1232        SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
1233        Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
1234                                  BSWAPMask);
1235        Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
1236        Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
1237        Results.push_back(Op);
1238        return;
1239      }
1240    }
1241  
1242    // If we have the appropriate vector bit operations, it is better to use them
1243    // than unrolling and expanding each component.
1244    if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
1245        TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
1246        TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
1247        TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) {
1248      Results.push_back(TLI.expandBITREVERSE(Node, DAG));
1249      return;
1250    }
1251  
1252    // Otherwise unroll.
1253    SDValue Tmp = DAG.UnrollVectorOp(Node);
1254    Results.push_back(Tmp);
1255  }
1256  
1257  SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
1258    // Implement VSELECT in terms of XOR, AND, OR
1259    // on platforms which do not support blend natively.
1260    SDLoc DL(Node);
1261  
1262    SDValue Mask = Node->getOperand(0);
1263    SDValue Op1 = Node->getOperand(1);
1264    SDValue Op2 = Node->getOperand(2);
1265  
1266    EVT VT = Mask.getValueType();
1267  
1268    // If we can't even use the basic vector operations of
1269    // AND,OR,XOR, we will have to scalarize the op.
1270    // Notice that the operation may be 'promoted' which means that it is
1271    // 'bitcasted' to another type which is handled.
1272    if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
1273        TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
1274        TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand)
1275      return DAG.UnrollVectorOp(Node);
1276  
1277    // This operation also isn't safe with AND, OR, XOR when the boolean type is
1278    // 0/1 and the select operands aren't also booleans, as we need an all-ones
1279    // vector constant to mask with.
1280    // FIXME: Sign extend 1 to all ones if that's legal on the target.
1281    auto BoolContents = TLI.getBooleanContents(Op1.getValueType());
1282    if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent &&
1283        !(BoolContents == TargetLowering::ZeroOrOneBooleanContent &&
1284          Op1.getValueType().getVectorElementType() == MVT::i1))
1285      return DAG.UnrollVectorOp(Node);
1286  
1287    // If the mask and the type are different sizes, unroll the vector op. This
1288    // can occur when getSetCCResultType returns something that is different in
1289    // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
1290    if (VT.getSizeInBits() != Op1.getValueSizeInBits())
1291      return DAG.UnrollVectorOp(Node);
1292  
1293    // Bitcast the operands to be the same type as the mask.
1294    // This is needed when we select between FP types because
1295    // the mask is a vector of integers.
1296    Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
1297    Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
1298  
1299    SDValue NotMask = DAG.getNOT(DL, Mask, VT);
1300  
1301    Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
1302    Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
1303    SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
1304    return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
1305  }
1306  
1307  SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
1308    // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
1309    // do not support it natively.
1310    SDLoc DL(Node);
1311  
1312    SDValue Mask = Node->getOperand(0);
1313    SDValue Op1 = Node->getOperand(1);
1314    SDValue Op2 = Node->getOperand(2);
1315    SDValue EVL = Node->getOperand(3);
1316  
1317    EVT VT = Mask.getValueType();
1318  
1319    // If we can't even use the basic vector operations of
1320    // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
1321    if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand ||
1322        TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand ||
1323        TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand)
1324      return DAG.UnrollVectorOp(Node);
1325  
1326    // This operation also isn't safe when the operands aren't also booleans.
1327    if (Op1.getValueType().getVectorElementType() != MVT::i1)
1328      return DAG.UnrollVectorOp(Node);
1329  
1330    SDValue Ones = DAG.getAllOnesConstant(DL, VT);
1331    SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Ones, EVL);
1332  
1333    Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Ones, EVL);
1334    Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Ones, EVL);
1335    return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Ones, EVL);
1336  }
1337  
1338  SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
1339    // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector
1340    // indices less than the EVL/pivot are true. Combine that with the original
1341    // mask for a full-length mask. Use a full-length VSELECT to select between
1342    // the true and false values.
1343    SDLoc DL(Node);
1344  
1345    SDValue Mask = Node->getOperand(0);
1346    SDValue Op1 = Node->getOperand(1);
1347    SDValue Op2 = Node->getOperand(2);
1348    SDValue EVL = Node->getOperand(3);
1349  
1350    EVT MaskVT = Mask.getValueType();
1351    bool IsFixedLen = MaskVT.isFixedLengthVector();
1352  
1353    EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), EVL.getValueType(),
1354                                    MaskVT.getVectorElementCount());
1355  
1356    // If we can't construct the EVL mask efficiently, it's better to unroll.
1357    if ((IsFixedLen &&
1358         !TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, EVLVecVT)) ||
1359        (!IsFixedLen &&
1360         (!TLI.isOperationLegalOrCustom(ISD::STEP_VECTOR, EVLVecVT) ||
1361          !TLI.isOperationLegalOrCustom(ISD::SPLAT_VECTOR, EVLVecVT))))
1362      return DAG.UnrollVectorOp(Node);
1363  
1364    // If using a SETCC would result in a different type than the mask type,
1365    // unroll.
1366    if (TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
1367                               EVLVecVT) != MaskVT)
1368      return DAG.UnrollVectorOp(Node);
1369  
1370    SDValue StepVec = DAG.getStepVector(DL, EVLVecVT);
1371    SDValue SplatEVL = DAG.getSplat(EVLVecVT, DL, EVL);
1372    SDValue EVLMask =
1373        DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT);
1374  
1375    SDValue FullMask = DAG.getNode(ISD::AND, DL, MaskVT, Mask, EVLMask);
1376    return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2);
1377  }
1378  
1379  SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) {
1380    // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB.
1381    EVT VT = Node->getValueType(0);
1382  
1383    unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV;
1384  
1385    if (!TLI.isOperationLegalOrCustom(DivOpc, VT) ||
1386        !TLI.isOperationLegalOrCustom(ISD::VP_MUL, VT) ||
1387        !TLI.isOperationLegalOrCustom(ISD::VP_SUB, VT))
1388      return SDValue();
1389  
1390    SDLoc DL(Node);
1391  
1392    SDValue Dividend = Node->getOperand(0);
1393    SDValue Divisor = Node->getOperand(1);
1394    SDValue Mask = Node->getOperand(2);
1395    SDValue EVL = Node->getOperand(3);
1396  
1397    // X % Y -> X-X/Y*Y
1398    SDValue Div = DAG.getNode(DivOpc, DL, VT, Dividend, Divisor, Mask, EVL);
1399    SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, Divisor, Div, Mask, EVL);
1400    return DAG.getNode(ISD::VP_SUB, DL, VT, Dividend, Mul, Mask, EVL);
1401  }
1402  
1403  void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
1404                                         SmallVectorImpl<SDValue> &Results) {
1405    // Attempt to expand using TargetLowering.
1406    SDValue Result, Chain;
1407    if (TLI.expandFP_TO_UINT(Node, Result, Chain, DAG)) {
1408      Results.push_back(Result);
1409      if (Node->isStrictFPOpcode())
1410        Results.push_back(Chain);
1411      return;
1412    }
1413  
1414    // Otherwise go ahead and unroll.
1415    if (Node->isStrictFPOpcode()) {
1416      UnrollStrictFPOp(Node, Results);
1417      return;
1418    }
1419  
1420    Results.push_back(DAG.UnrollVectorOp(Node));
1421  }
1422  
1423  void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
1424                                            SmallVectorImpl<SDValue> &Results) {
1425    bool IsStrict = Node->isStrictFPOpcode();
1426    unsigned OpNo = IsStrict ? 1 : 0;
1427    SDValue Src = Node->getOperand(OpNo);
1428    EVT VT = Src.getValueType();
1429    SDLoc DL(Node);
1430  
1431    // Attempt to expand using TargetLowering.
1432    SDValue Result;
1433    SDValue Chain;
1434    if (TLI.expandUINT_TO_FP(Node, Result, Chain, DAG)) {
1435      Results.push_back(Result);
1436      if (IsStrict)
1437        Results.push_back(Chain);
1438      return;
1439    }
1440  
1441    // Make sure that the SINT_TO_FP and SRL instructions are available.
1442    if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) ==
1443                           TargetLowering::Expand) ||
1444         (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) ==
1445                          TargetLowering::Expand)) ||
1446        TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) {
1447      if (IsStrict) {
1448        UnrollStrictFPOp(Node, Results);
1449        return;
1450      }
1451  
1452      Results.push_back(DAG.UnrollVectorOp(Node));
1453      return;
1454    }
1455  
1456    unsigned BW = VT.getScalarSizeInBits();
1457    assert((BW == 64 || BW == 32) &&
1458           "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1459  
1460    SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
1461  
1462    // Constants to clear the upper part of the word.
1463    // Notice that we can also use SHL+SHR, but using a constant is slightly
1464    // faster on x86.
1465    uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
1466    SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
1467  
1468    // Two to the power of half-word-size.
1469    SDValue TWOHW =
1470        DAG.getConstantFP(1ULL << (BW / 2), DL, Node->getValueType(0));
1471  
1472    // Clear upper part of LO, lower HI
1473    SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord);
1474    SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask);
1475  
1476    if (IsStrict) {
1477      // Convert hi and lo to floats
1478      // Convert the hi part back to the upper values
1479      // TODO: Can any fast-math-flags be set on these nodes?
1480      SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
1481                                {Node->getValueType(0), MVT::Other},
1482                                {Node->getOperand(0), HI});
1483      fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Node->getValueType(0), MVT::Other},
1484                        {fHI.getValue(1), fHI, TWOHW});
1485      SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
1486                                {Node->getValueType(0), MVT::Other},
1487                                {Node->getOperand(0), LO});
1488  
1489      SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1),
1490                               fLO.getValue(1));
1491  
1492      // Add the two halves
1493      SDValue Result =
1494          DAG.getNode(ISD::STRICT_FADD, DL, {Node->getValueType(0), MVT::Other},
1495                      {TF, fHI, fLO});
1496  
1497      Results.push_back(Result);
1498      Results.push_back(Result.getValue(1));
1499      return;
1500    }
1501  
1502    // Convert hi and lo to floats
1503    // Convert the hi part back to the upper values
1504    // TODO: Can any fast-math-flags be set on these nodes?
1505    SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), HI);
1506    fHI = DAG.getNode(ISD::FMUL, DL, Node->getValueType(0), fHI, TWOHW);
1507    SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), LO);
1508  
1509    // Add the two halves
1510    Results.push_back(
1511        DAG.getNode(ISD::FADD, DL, Node->getValueType(0), fHI, fLO));
1512  }
1513  
1514  SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
1515    if (TLI.isOperationLegalOrCustom(ISD::FSUB, Node->getValueType(0))) {
1516      SDLoc DL(Node);
1517      SDValue Zero = DAG.getConstantFP(-0.0, DL, Node->getValueType(0));
1518      // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
1519      return DAG.getNode(ISD::FSUB, DL, Node->getValueType(0), Zero,
1520                         Node->getOperand(0));
1521    }
1522    return DAG.UnrollVectorOp(Node);
1523  }
1524  
1525  void VectorLegalizer::ExpandFSUB(SDNode *Node,
1526                                   SmallVectorImpl<SDValue> &Results) {
1527    // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
1528    // we can defer this to operation legalization where it will be lowered as
1529    // a+(-b).
1530    EVT VT = Node->getValueType(0);
1531    if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
1532        TLI.isOperationLegalOrCustom(ISD::FADD, VT))
1533      return; // Defer to LegalizeDAG
1534  
1535    SDValue Tmp = DAG.UnrollVectorOp(Node);
1536    Results.push_back(Tmp);
1537  }
1538  
1539  void VectorLegalizer::ExpandSETCC(SDNode *Node,
1540                                    SmallVectorImpl<SDValue> &Results) {
1541    bool NeedInvert = false;
1542    bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
1543    bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC ||
1544                    Node->getOpcode() == ISD::STRICT_FSETCCS;
1545    bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
1546    unsigned Offset = IsStrict ? 1 : 0;
1547  
1548    SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
1549    SDValue LHS = Node->getOperand(0 + Offset);
1550    SDValue RHS = Node->getOperand(1 + Offset);
1551    SDValue CC = Node->getOperand(2 + Offset);
1552  
1553    MVT OpVT = LHS.getSimpleValueType();
1554    ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
1555  
1556    if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) {
1557      if (IsStrict) {
1558        UnrollStrictFPOp(Node, Results);
1559        return;
1560      }
1561      Results.push_back(UnrollVSETCC(Node));
1562      return;
1563    }
1564  
1565    SDValue Mask, EVL;
1566    if (IsVP) {
1567      Mask = Node->getOperand(3 + Offset);
1568      EVL = Node->getOperand(4 + Offset);
1569    }
1570  
1571    SDLoc dl(Node);
1572    bool Legalized =
1573        TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, RHS, CC, Mask,
1574                                  EVL, NeedInvert, dl, Chain, IsSignaling);
1575  
1576    if (Legalized) {
1577      // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
1578      // condition code, create a new SETCC node.
1579      if (CC.getNode()) {
1580        if (IsStrict) {
1581          LHS = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(),
1582                            {Chain, LHS, RHS, CC}, Node->getFlags());
1583          Chain = LHS.getValue(1);
1584        } else if (IsVP) {
1585          LHS = DAG.getNode(ISD::VP_SETCC, dl, Node->getValueType(0),
1586                            {LHS, RHS, CC, Mask, EVL}, Node->getFlags());
1587        } else {
1588          LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
1589                            Node->getFlags());
1590        }
1591      }
1592  
1593      // If we expanded the SETCC by inverting the condition code, then wrap
1594      // the existing SETCC in a NOT to restore the intended condition.
1595      if (NeedInvert) {
1596        if (!IsVP)
1597          LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
1598        else
1599          LHS = DAG.getVPLogicalNOT(dl, LHS, Mask, EVL, LHS->getValueType(0));
1600      }
1601    } else {
1602      assert(!IsStrict && "Don't know how to expand for strict nodes.");
1603  
1604      // Otherwise, SETCC for the given comparison type must be completely
1605      // illegal; expand it into a SELECT_CC.
1606      EVT VT = Node->getValueType(0);
1607      LHS =
1608          DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS,
1609                      DAG.getBoolConstant(true, dl, VT, LHS.getValueType()),
1610                      DAG.getBoolConstant(false, dl, VT, LHS.getValueType()), CC);
1611      LHS->setFlags(Node->getFlags());
1612    }
1613  
1614    Results.push_back(LHS);
1615    if (IsStrict)
1616      Results.push_back(Chain);
1617  }
1618  
1619  void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
1620                                       SmallVectorImpl<SDValue> &Results) {
1621    SDValue Result, Overflow;
1622    TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
1623    Results.push_back(Result);
1624    Results.push_back(Overflow);
1625  }
1626  
1627  void VectorLegalizer::ExpandSADDSUBO(SDNode *Node,
1628                                       SmallVectorImpl<SDValue> &Results) {
1629    SDValue Result, Overflow;
1630    TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
1631    Results.push_back(Result);
1632    Results.push_back(Overflow);
1633  }
1634  
1635  void VectorLegalizer::ExpandMULO(SDNode *Node,
1636                                   SmallVectorImpl<SDValue> &Results) {
1637    SDValue Result, Overflow;
1638    if (!TLI.expandMULO(Node, Result, Overflow, DAG))
1639      std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Node);
1640  
1641    Results.push_back(Result);
1642    Results.push_back(Overflow);
1643  }
1644  
1645  void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
1646                                            SmallVectorImpl<SDValue> &Results) {
1647    SDNode *N = Node;
1648    if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N),
1649            N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG))
1650      Results.push_back(Expanded);
1651  }
1652  
1653  void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
1654                                         SmallVectorImpl<SDValue> &Results) {
1655    if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) {
1656      ExpandUINT_TO_FLOAT(Node, Results);
1657      return;
1658    }
1659    if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) {
1660      ExpandFP_TO_UINT(Node, Results);
1661      return;
1662    }
1663  
1664    if (Node->getOpcode() == ISD::STRICT_FSETCC ||
1665        Node->getOpcode() == ISD::STRICT_FSETCCS) {
1666      ExpandSETCC(Node, Results);
1667      return;
1668    }
1669  
1670    UnrollStrictFPOp(Node, Results);
1671  }
1672  
1673  void VectorLegalizer::ExpandREM(SDNode *Node,
1674                                  SmallVectorImpl<SDValue> &Results) {
1675    assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) &&
1676           "Expected REM node");
1677  
1678    SDValue Result;
1679    if (!TLI.expandREM(Node, Result, DAG))
1680      Result = DAG.UnrollVectorOp(Node);
1681    Results.push_back(Result);
1682  }
1683  
1684  void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
1685                                         SmallVectorImpl<SDValue> &Results) {
1686    EVT VT = Node->getValueType(0);
1687    EVT EltVT = VT.getVectorElementType();
1688    unsigned NumElems = VT.getVectorNumElements();
1689    unsigned NumOpers = Node->getNumOperands();
1690    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1691  
1692    EVT TmpEltVT = EltVT;
1693    if (Node->getOpcode() == ISD::STRICT_FSETCC ||
1694        Node->getOpcode() == ISD::STRICT_FSETCCS)
1695      TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(),
1696                                        *DAG.getContext(), TmpEltVT);
1697  
1698    EVT ValueVTs[] = {TmpEltVT, MVT::Other};
1699    SDValue Chain = Node->getOperand(0);
1700    SDLoc dl(Node);
1701  
1702    SmallVector<SDValue, 32> OpValues;
1703    SmallVector<SDValue, 32> OpChains;
1704    for (unsigned i = 0; i < NumElems; ++i) {
1705      SmallVector<SDValue, 4> Opers;
1706      SDValue Idx = DAG.getVectorIdxConstant(i, dl);
1707  
1708      // The Chain is the first operand.
1709      Opers.push_back(Chain);
1710  
1711      // Now process the remaining operands.
1712      for (unsigned j = 1; j < NumOpers; ++j) {
1713        SDValue Oper = Node->getOperand(j);
1714        EVT OperVT = Oper.getValueType();
1715  
1716        if (OperVT.isVector())
1717          Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
1718                             OperVT.getVectorElementType(), Oper, Idx);
1719  
1720        Opers.push_back(Oper);
1721      }
1722  
1723      SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers);
1724      SDValue ScalarResult = ScalarOp.getValue(0);
1725      SDValue ScalarChain = ScalarOp.getValue(1);
1726  
1727      if (Node->getOpcode() == ISD::STRICT_FSETCC ||
1728          Node->getOpcode() == ISD::STRICT_FSETCCS)
1729        ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,
1730                                     DAG.getAllOnesConstant(dl, EltVT),
1731                                     DAG.getConstant(0, dl, EltVT));
1732  
1733      OpValues.push_back(ScalarResult);
1734      OpChains.push_back(ScalarChain);
1735    }
1736  
1737    SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
1738    SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
1739  
1740    Results.push_back(Result);
1741    Results.push_back(NewChain);
1742  }
1743  
1744  SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
1745    EVT VT = Node->getValueType(0);
1746    unsigned NumElems = VT.getVectorNumElements();
1747    EVT EltVT = VT.getVectorElementType();
1748    SDValue LHS = Node->getOperand(0);
1749    SDValue RHS = Node->getOperand(1);
1750    SDValue CC = Node->getOperand(2);
1751    EVT TmpEltVT = LHS.getValueType().getVectorElementType();
1752    SDLoc dl(Node);
1753    SmallVector<SDValue, 8> Ops(NumElems);
1754    for (unsigned i = 0; i < NumElems; ++i) {
1755      SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
1756                                    DAG.getVectorIdxConstant(i, dl));
1757      SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
1758                                    DAG.getVectorIdxConstant(i, dl));
1759      Ops[i] = DAG.getNode(ISD::SETCC, dl,
1760                           TLI.getSetCCResultType(DAG.getDataLayout(),
1761                                                  *DAG.getContext(), TmpEltVT),
1762                           LHSElem, RHSElem, CC);
1763      Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getAllOnesConstant(dl, EltVT),
1764                             DAG.getConstant(0, dl, EltVT));
1765    }
1766    return DAG.getBuildVector(VT, dl, Ops);
1767  }
1768  
1769  bool SelectionDAG::LegalizeVectors() {
1770    return VectorLegalizer(*this).Run();
1771  }
1772