xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SelectionDAG::LegalizeVectors method.
10 //
11 // The vector legalizer looks for vector operations which might need to be
12 // scalarized and legalizes them. This is a separate step from Legalize because
13 // scalarizing can introduce illegal types.  For example, suppose we have an
14 // ISD::SDIV of type v2i64 on x86-32.  The type is legal (for example, addition
15 // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
16 // operation, which introduces nodes with the illegal type i64 which must be
17 // expanded.  Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
18 // the operation must be unrolled, which introduces nodes with the illegal
19 // type i8 which must be promoted.
20 //
21 // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
22 // or operations that happen to take a vector which are custom-lowered;
23 // the legalization for such operations never produces nodes
24 // with illegal types, so it's okay to put off legalizing them until
25 // SelectionDAG::Legalize runs.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/Analysis/TargetLibraryInfo.h"
32 #include "llvm/Analysis/VectorUtils.h"
33 #include "llvm/CodeGen/ISDOpcodes.h"
34 #include "llvm/CodeGen/SelectionDAG.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/CodeGen/TargetLowering.h"
37 #include "llvm/CodeGen/ValueTypes.h"
38 #include "llvm/CodeGenTypes/MachineValueType.h"
39 #include "llvm/IR/DataLayout.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/Compiler.h"
42 #include "llvm/Support/Debug.h"
43 #include "llvm/Support/ErrorHandling.h"
44 #include <cassert>
45 #include <cstdint>
46 #include <iterator>
47 #include <utility>
48 
49 using namespace llvm;
50 
51 #define DEBUG_TYPE "legalizevectorops"
52 
53 namespace {
54 
55 class VectorLegalizer {
56   SelectionDAG& DAG;
57   const TargetLowering &TLI;
58   bool Changed = false; // Keep track of whether anything changed
59 
60   /// For nodes that are of legal width, and that have more than one use, this
61   /// map indicates what regularized operand to use.  This allows us to avoid
62   /// legalizing the same thing more than once.
63   SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
64 
65   /// Adds a node to the translation cache.
66   void AddLegalizedOperand(SDValue From, SDValue To) {
67     LegalizedNodes.insert(std::make_pair(From, To));
68     // If someone requests legalization of the new node, return itself.
69     if (From != To)
70       LegalizedNodes.insert(std::make_pair(To, To));
71   }
72 
73   /// Legalizes the given node.
74   SDValue LegalizeOp(SDValue Op);
75 
76   /// Assuming the node is legal, "legalize" the results.
77   SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result);
78 
79   /// Make sure Results are legal and update the translation cache.
80   SDValue RecursivelyLegalizeResults(SDValue Op,
81                                      MutableArrayRef<SDValue> Results);
82 
83   /// Wrapper to interface LowerOperation with a vector of Results.
84   /// Returns false if the target wants to use default expansion. Otherwise
85   /// returns true. If return is true and the Results are empty, then the
86   /// target wants to keep the input node as is.
87   bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results);
88 
89   /// Implements unrolling a VSETCC.
90   SDValue UnrollVSETCC(SDNode *Node);
91 
92   /// Implement expand-based legalization of vector operations.
93   ///
94   /// This is just a high-level routine to dispatch to specific code paths for
95   /// operations to legalize them.
96   void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results);
97 
98   /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
99   /// FP_TO_SINT isn't legal.
100   void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
101 
102   /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
103   /// SINT_TO_FLOAT and SHR on vectors isn't legal.
104   void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
105 
106   /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
107   SDValue ExpandSEXTINREG(SDNode *Node);
108 
109   /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
110   ///
111   /// Shuffles the low lanes of the operand into place and bitcasts to the proper
112   /// type. The contents of the bits in the extended part of each element are
113   /// undef.
114   SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node);
115 
116   /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
117   ///
118   /// Shuffles the low lanes of the operand into place, bitcasts to the proper
119   /// type, then shifts left and arithmetic shifts right to introduce a sign
120   /// extension.
121   SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node);
122 
123   /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
124   ///
125   /// Shuffles the low lanes of the operand into place and blends zeros into
126   /// the remaining lanes, finally bitcasting to the proper type.
127   SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node);
128 
129   /// Expand bswap of vectors into a shuffle if legal.
130   SDValue ExpandBSWAP(SDNode *Node);
131 
132   /// Implement vselect in terms of XOR, AND, OR when blend is not
133   /// supported by the target.
134   SDValue ExpandVSELECT(SDNode *Node);
135   SDValue ExpandVP_SELECT(SDNode *Node);
136   SDValue ExpandVP_MERGE(SDNode *Node);
137   SDValue ExpandVP_REM(SDNode *Node);
138   SDValue ExpandVP_FNEG(SDNode *Node);
139   SDValue ExpandVP_FABS(SDNode *Node);
140   SDValue ExpandVP_FCOPYSIGN(SDNode *Node);
141   SDValue ExpandSELECT(SDNode *Node);
142   std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
143   SDValue ExpandStore(SDNode *N);
144   SDValue ExpandFNEG(SDNode *Node);
145   SDValue ExpandFABS(SDNode *Node);
146   SDValue ExpandFCOPYSIGN(SDNode *Node);
147   void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
148   void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
149   SDValue ExpandBITREVERSE(SDNode *Node);
150   void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
151   void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
152   void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
153   void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
154   void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
155   void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
156 
157   bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
158                             SmallVectorImpl<SDValue> &Results);
159   bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall Call_F32,
160                             RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
161                             RTLIB::Libcall Call_F128,
162                             RTLIB::Libcall Call_PPCF128,
163                             SmallVectorImpl<SDValue> &Results);
164 
165   void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
166 
167   /// Implements vector promotion.
168   ///
169   /// This is essentially just bitcasting the operands to a different type and
170   /// bitcasting the result back to the original type.
171   void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results);
172 
173   /// Implements [SU]INT_TO_FP vector promotion.
174   ///
175   /// This is a [zs]ext of the input operand to a larger integer type.
176   void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results);
177 
178   /// Implements FP_TO_[SU]INT vector promotion of the result type.
179   ///
180   /// It is promoted to a larger integer type.  The result is then
181   /// truncated back to the original type.
182   void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
183 
184   /// Implements vector setcc operation promotion.
185   ///
186   /// All vector operands are promoted to a vector type with larger element
187   /// type.
188   void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
189 
190   void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
191 
192   /// Calculate the reduction using a type of higher precision and round the
193   /// result to match the original type. Setting NonArithmetic signifies the
194   /// rounding of the result does not affect its value.
195   void PromoteFloatVECREDUCE(SDNode *Node, SmallVectorImpl<SDValue> &Results,
196                              bool NonArithmetic);
197 
198 public:
199   VectorLegalizer(SelectionDAG& dag) :
200       DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
201 
202   /// Begin legalizer the vector operations in the DAG.
203   bool Run();
204 };
205 
206 } // end anonymous namespace
207 
208 bool VectorLegalizer::Run() {
209   // Before we start legalizing vector nodes, check if there are any vectors.
210   bool HasVectors = false;
211   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
212        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
213     // Check if the values of the nodes contain vectors. We don't need to check
214     // the operands because we are going to check their values at some point.
215     HasVectors = llvm::any_of(I->values(), [](EVT T) { return T.isVector(); });
216 
217     // If we found a vector node we can start the legalization.
218     if (HasVectors)
219       break;
220   }
221 
222   // If this basic block has no vectors then no need to legalize vectors.
223   if (!HasVectors)
224     return false;
225 
226   // The legalize process is inherently a bottom-up recursive process (users
227   // legalize their uses before themselves).  Given infinite stack space, we
228   // could just start legalizing on the root and traverse the whole graph.  In
229   // practice however, this causes us to run out of stack space on large basic
230   // blocks.  To avoid this problem, compute an ordering of the nodes where each
231   // node is only legalized after all of its operands are legalized.
232   DAG.AssignTopologicalOrder();
233   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
234        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
235     LegalizeOp(SDValue(&*I, 0));
236 
237   // Finally, it's possible the root changed.  Get the new root.
238   SDValue OldRoot = DAG.getRoot();
239   assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
240   DAG.setRoot(LegalizedNodes[OldRoot]);
241 
242   LegalizedNodes.clear();
243 
244   // Remove dead nodes now.
245   DAG.RemoveDeadNodes();
246 
247   return Changed;
248 }
249 
250 SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) {
251   assert(Op->getNumValues() == Result->getNumValues() &&
252          "Unexpected number of results");
253   // Generic legalization: just pass the operand through.
254   for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i)
255     AddLegalizedOperand(Op.getValue(i), SDValue(Result, i));
256   return SDValue(Result, Op.getResNo());
257 }
258 
259 SDValue
260 VectorLegalizer::RecursivelyLegalizeResults(SDValue Op,
261                                             MutableArrayRef<SDValue> Results) {
262   assert(Results.size() == Op->getNumValues() &&
263          "Unexpected number of results");
264   // Make sure that the generated code is itself legal.
265   for (unsigned i = 0, e = Results.size(); i != e; ++i) {
266     Results[i] = LegalizeOp(Results[i]);
267     AddLegalizedOperand(Op.getValue(i), Results[i]);
268   }
269 
270   return Results[Op.getResNo()];
271 }
272 
273 SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
274   // Note that LegalizeOp may be reentered even from single-use nodes, which
275   // means that we always must cache transformed nodes.
276   DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
277   if (I != LegalizedNodes.end()) return I->second;
278 
279   // Legalize the operands
280   SmallVector<SDValue, 8> Ops;
281   for (const SDValue &Oper : Op->op_values())
282     Ops.push_back(LegalizeOp(Oper));
283 
284   SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops);
285 
286   bool HasVectorValueOrOp =
287       llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) ||
288       llvm::any_of(Node->op_values(),
289                    [](SDValue O) { return O.getValueType().isVector(); });
290   if (!HasVectorValueOrOp)
291     return TranslateLegalizeResults(Op, Node);
292 
293   TargetLowering::LegalizeAction Action = TargetLowering::Legal;
294   EVT ValVT;
295   switch (Op.getOpcode()) {
296   default:
297     return TranslateLegalizeResults(Op, Node);
298   case ISD::LOAD: {
299     LoadSDNode *LD = cast<LoadSDNode>(Node);
300     ISD::LoadExtType ExtType = LD->getExtensionType();
301     EVT LoadedVT = LD->getMemoryVT();
302     if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD)
303       Action = TLI.getLoadExtAction(ExtType, LD->getValueType(0), LoadedVT);
304     break;
305   }
306   case ISD::STORE: {
307     StoreSDNode *ST = cast<StoreSDNode>(Node);
308     EVT StVT = ST->getMemoryVT();
309     MVT ValVT = ST->getValue().getSimpleValueType();
310     if (StVT.isVector() && ST->isTruncatingStore())
311       Action = TLI.getTruncStoreAction(ValVT, StVT);
312     break;
313   }
314   case ISD::MERGE_VALUES:
315     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
316     // This operation lies about being legal: when it claims to be legal,
317     // it should actually be expanded.
318     if (Action == TargetLowering::Legal)
319       Action = TargetLowering::Expand;
320     break;
321 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)               \
322   case ISD::STRICT_##DAGN:
323 #include "llvm/IR/ConstrainedOps.def"
324     ValVT = Node->getValueType(0);
325     if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
326         Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
327       ValVT = Node->getOperand(1).getValueType();
328     if (Op.getOpcode() == ISD::STRICT_FSETCC ||
329         Op.getOpcode() == ISD::STRICT_FSETCCS) {
330       MVT OpVT = Node->getOperand(1).getSimpleValueType();
331       ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(3))->get();
332       Action = TLI.getCondCodeAction(CCCode, OpVT);
333       if (Action == TargetLowering::Legal)
334         Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
335     } else {
336       Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
337     }
338     // If we're asked to expand a strict vector floating-point operation,
339     // by default we're going to simply unroll it.  That is usually the
340     // best approach, except in the case where the resulting strict (scalar)
341     // operations would themselves use the fallback mutation to non-strict.
342     // In that specific case, just do the fallback on the vector op.
343     if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
344         TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) ==
345             TargetLowering::Legal) {
346       EVT EltVT = ValVT.getVectorElementType();
347       if (TLI.getOperationAction(Node->getOpcode(), EltVT)
348           == TargetLowering::Expand &&
349           TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
350           == TargetLowering::Legal)
351         Action = TargetLowering::Legal;
352     }
353     break;
354   case ISD::ADD:
355   case ISD::SUB:
356   case ISD::MUL:
357   case ISD::MULHS:
358   case ISD::MULHU:
359   case ISD::SDIV:
360   case ISD::UDIV:
361   case ISD::SREM:
362   case ISD::UREM:
363   case ISD::SDIVREM:
364   case ISD::UDIVREM:
365   case ISD::FADD:
366   case ISD::FSUB:
367   case ISD::FMUL:
368   case ISD::FDIV:
369   case ISD::FREM:
370   case ISD::AND:
371   case ISD::OR:
372   case ISD::XOR:
373   case ISD::SHL:
374   case ISD::SRA:
375   case ISD::SRL:
376   case ISD::FSHL:
377   case ISD::FSHR:
378   case ISD::ROTL:
379   case ISD::ROTR:
380   case ISD::ABS:
381   case ISD::ABDS:
382   case ISD::ABDU:
383   case ISD::AVGCEILS:
384   case ISD::AVGCEILU:
385   case ISD::AVGFLOORS:
386   case ISD::AVGFLOORU:
387   case ISD::BSWAP:
388   case ISD::BITREVERSE:
389   case ISD::CTLZ:
390   case ISD::CTTZ:
391   case ISD::CTLZ_ZERO_UNDEF:
392   case ISD::CTTZ_ZERO_UNDEF:
393   case ISD::CTPOP:
394   case ISD::SELECT:
395   case ISD::VSELECT:
396   case ISD::SELECT_CC:
397   case ISD::ZERO_EXTEND:
398   case ISD::ANY_EXTEND:
399   case ISD::TRUNCATE:
400   case ISD::SIGN_EXTEND:
401   case ISD::FP_TO_SINT:
402   case ISD::FP_TO_UINT:
403   case ISD::FNEG:
404   case ISD::FABS:
405   case ISD::FMINNUM:
406   case ISD::FMAXNUM:
407   case ISD::FMINNUM_IEEE:
408   case ISD::FMAXNUM_IEEE:
409   case ISD::FMINIMUM:
410   case ISD::FMAXIMUM:
411   case ISD::FMINIMUMNUM:
412   case ISD::FMAXIMUMNUM:
413   case ISD::FCOPYSIGN:
414   case ISD::FSQRT:
415   case ISD::FSIN:
416   case ISD::FCOS:
417   case ISD::FTAN:
418   case ISD::FASIN:
419   case ISD::FACOS:
420   case ISD::FATAN:
421   case ISD::FATAN2:
422   case ISD::FSINH:
423   case ISD::FCOSH:
424   case ISD::FTANH:
425   case ISD::FLDEXP:
426   case ISD::FPOWI:
427   case ISD::FPOW:
428   case ISD::FLOG:
429   case ISD::FLOG2:
430   case ISD::FLOG10:
431   case ISD::FEXP:
432   case ISD::FEXP2:
433   case ISD::FEXP10:
434   case ISD::FCEIL:
435   case ISD::FTRUNC:
436   case ISD::FRINT:
437   case ISD::FNEARBYINT:
438   case ISD::FROUND:
439   case ISD::FROUNDEVEN:
440   case ISD::FFLOOR:
441   case ISD::FP_ROUND:
442   case ISD::FP_EXTEND:
443   case ISD::FPTRUNC_ROUND:
444   case ISD::FMA:
445   case ISD::SIGN_EXTEND_INREG:
446   case ISD::ANY_EXTEND_VECTOR_INREG:
447   case ISD::SIGN_EXTEND_VECTOR_INREG:
448   case ISD::ZERO_EXTEND_VECTOR_INREG:
449   case ISD::SMIN:
450   case ISD::SMAX:
451   case ISD::UMIN:
452   case ISD::UMAX:
453   case ISD::SMUL_LOHI:
454   case ISD::UMUL_LOHI:
455   case ISD::SADDO:
456   case ISD::UADDO:
457   case ISD::SSUBO:
458   case ISD::USUBO:
459   case ISD::SMULO:
460   case ISD::UMULO:
461   case ISD::FCANONICALIZE:
462   case ISD::FFREXP:
463   case ISD::FMODF:
464   case ISD::FSINCOS:
465   case ISD::FSINCOSPI:
466   case ISD::SADDSAT:
467   case ISD::UADDSAT:
468   case ISD::SSUBSAT:
469   case ISD::USUBSAT:
470   case ISD::SSHLSAT:
471   case ISD::USHLSAT:
472   case ISD::FP_TO_SINT_SAT:
473   case ISD::FP_TO_UINT_SAT:
474   case ISD::MGATHER:
475   case ISD::VECTOR_COMPRESS:
476   case ISD::SCMP:
477   case ISD::UCMP:
478     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
479     break;
480   case ISD::SMULFIX:
481   case ISD::SMULFIXSAT:
482   case ISD::UMULFIX:
483   case ISD::UMULFIXSAT:
484   case ISD::SDIVFIX:
485   case ISD::SDIVFIXSAT:
486   case ISD::UDIVFIX:
487   case ISD::UDIVFIXSAT: {
488     unsigned Scale = Node->getConstantOperandVal(2);
489     Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
490                                               Node->getValueType(0), Scale);
491     break;
492   }
493   case ISD::LROUND:
494   case ISD::LLROUND:
495   case ISD::LRINT:
496   case ISD::LLRINT:
497   case ISD::SINT_TO_FP:
498   case ISD::UINT_TO_FP:
499   case ISD::VECREDUCE_ADD:
500   case ISD::VECREDUCE_MUL:
501   case ISD::VECREDUCE_AND:
502   case ISD::VECREDUCE_OR:
503   case ISD::VECREDUCE_XOR:
504   case ISD::VECREDUCE_SMAX:
505   case ISD::VECREDUCE_SMIN:
506   case ISD::VECREDUCE_UMAX:
507   case ISD::VECREDUCE_UMIN:
508   case ISD::VECREDUCE_FADD:
509   case ISD::VECREDUCE_FMAX:
510   case ISD::VECREDUCE_FMAXIMUM:
511   case ISD::VECREDUCE_FMIN:
512   case ISD::VECREDUCE_FMINIMUM:
513   case ISD::VECREDUCE_FMUL:
514   case ISD::VECTOR_FIND_LAST_ACTIVE:
515     Action = TLI.getOperationAction(Node->getOpcode(),
516                                     Node->getOperand(0).getValueType());
517     break;
518   case ISD::VECREDUCE_SEQ_FADD:
519   case ISD::VECREDUCE_SEQ_FMUL:
520     Action = TLI.getOperationAction(Node->getOpcode(),
521                                     Node->getOperand(1).getValueType());
522     break;
523   case ISD::SETCC: {
524     MVT OpVT = Node->getOperand(0).getSimpleValueType();
525     ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
526     Action = TLI.getCondCodeAction(CCCode, OpVT);
527     if (Action == TargetLowering::Legal)
528       Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
529     break;
530   }
531   case ISD::PARTIAL_REDUCE_UMLA:
532   case ISD::PARTIAL_REDUCE_SMLA:
533   case ISD::PARTIAL_REDUCE_SUMLA:
534     Action =
535         TLI.getPartialReduceMLAAction(Op.getOpcode(), Node->getValueType(0),
536                                       Node->getOperand(1).getValueType());
537     break;
538 
539 #define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...)                          \
540   case ISD::VPID: {                                                            \
541     EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS))        \
542                                   : Node->getOperand(LEGALPOS).getValueType(); \
543     if (ISD::VPID == ISD::VP_SETCC) {                                          \
544       ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \
545       Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT());        \
546       if (Action != TargetLowering::Legal)                                     \
547         break;                                                                 \
548     }                                                                          \
549     /* Defer non-vector results to LegalizeDAG. */                             \
550     if (!Node->getValueType(0).isVector() &&                                   \
551         Node->getValueType(0) != MVT::Other) {                                 \
552       Action = TargetLowering::Legal;                                          \
553       break;                                                                   \
554     }                                                                          \
555     Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT);            \
556   } break;
557 #include "llvm/IR/VPIntrinsics.def"
558   }
559 
560   LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
561 
562   SmallVector<SDValue, 8> ResultVals;
563   switch (Action) {
564   default: llvm_unreachable("This action is not supported yet!");
565   case TargetLowering::Promote:
566     assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) &&
567            "This action is not supported yet!");
568     LLVM_DEBUG(dbgs() << "Promoting\n");
569     Promote(Node, ResultVals);
570     assert(!ResultVals.empty() && "No results for promotion?");
571     break;
572   case TargetLowering::Legal:
573     LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
574     break;
575   case TargetLowering::Custom:
576     LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
577     if (LowerOperationWrapper(Node, ResultVals))
578       break;
579     LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
580     [[fallthrough]];
581   case TargetLowering::Expand:
582     LLVM_DEBUG(dbgs() << "Expanding\n");
583     Expand(Node, ResultVals);
584     break;
585   }
586 
587   if (ResultVals.empty())
588     return TranslateLegalizeResults(Op, Node);
589 
590   Changed = true;
591   return RecursivelyLegalizeResults(Op, ResultVals);
592 }
593 
594 // FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
595 // merge them somehow?
596 bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
597                                             SmallVectorImpl<SDValue> &Results) {
598   SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
599 
600   if (!Res.getNode())
601     return false;
602 
603   if (Res == SDValue(Node, 0))
604     return true;
605 
606   // If the original node has one result, take the return value from
607   // LowerOperation as is. It might not be result number 0.
608   if (Node->getNumValues() == 1) {
609     Results.push_back(Res);
610     return true;
611   }
612 
613   // If the original node has multiple results, then the return node should
614   // have the same number of results.
615   assert((Node->getNumValues() == Res->getNumValues()) &&
616          "Lowering returned the wrong number of results!");
617 
618   // Places new result values base on N result number.
619   for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I)
620     Results.push_back(Res.getValue(I));
621 
622   return true;
623 }
624 
625 void VectorLegalizer::PromoteSETCC(SDNode *Node,
626                                    SmallVectorImpl<SDValue> &Results) {
627   MVT VecVT = Node->getOperand(0).getSimpleValueType();
628   MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
629 
630   unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
631 
632   SDLoc DL(Node);
633   SmallVector<SDValue, 5> Operands(Node->getNumOperands());
634 
635   Operands[0] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(0));
636   Operands[1] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(1));
637   Operands[2] = Node->getOperand(2);
638 
639   if (Node->getOpcode() == ISD::VP_SETCC) {
640     Operands[3] = Node->getOperand(3); // mask
641     Operands[4] = Node->getOperand(4); // evl
642   }
643 
644   SDValue Res = DAG.getNode(Node->getOpcode(), DL, Node->getSimpleValueType(0),
645                             Operands, Node->getFlags());
646 
647   Results.push_back(Res);
648 }
649 
650 void VectorLegalizer::PromoteSTRICT(SDNode *Node,
651                                     SmallVectorImpl<SDValue> &Results) {
652   MVT VecVT = Node->getOperand(1).getSimpleValueType();
653   MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
654 
655   assert(VecVT.isFloatingPoint());
656 
657   SDLoc DL(Node);
658   SmallVector<SDValue, 5> Operands(Node->getNumOperands());
659   SmallVector<SDValue, 2> Chains;
660 
661   for (unsigned j = 1; j != Node->getNumOperands(); ++j)
662     if (Node->getOperand(j).getValueType().isVector() &&
663         !(ISD::isVPOpcode(Node->getOpcode()) &&
664           ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand.
665     {
666       // promote the vector operand.
667       SDValue Ext =
668           DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {NewVecVT, MVT::Other},
669                       {Node->getOperand(0), Node->getOperand(j)});
670       Operands[j] = Ext.getValue(0);
671       Chains.push_back(Ext.getValue(1));
672     } else
673       Operands[j] = Node->getOperand(j); // Skip no vector operand.
674 
675   SDVTList VTs = DAG.getVTList(NewVecVT, Node->getValueType(1));
676 
677   Operands[0] = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
678 
679   SDValue Res =
680       DAG.getNode(Node->getOpcode(), DL, VTs, Operands, Node->getFlags());
681 
682   SDValue Round =
683       DAG.getNode(ISD::STRICT_FP_ROUND, DL, {VecVT, MVT::Other},
684                   {Res.getValue(1), Res.getValue(0),
685                    DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
686 
687   Results.push_back(Round.getValue(0));
688   Results.push_back(Round.getValue(1));
689 }
690 
691 void VectorLegalizer::PromoteFloatVECREDUCE(SDNode *Node,
692                                             SmallVectorImpl<SDValue> &Results,
693                                             bool NonArithmetic) {
694   MVT OpVT = Node->getOperand(0).getSimpleValueType();
695   assert(OpVT.isFloatingPoint() && "Expected floating point reduction!");
696   MVT NewOpVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OpVT);
697 
698   SDLoc DL(Node);
699   SDValue NewOp = DAG.getNode(ISD::FP_EXTEND, DL, NewOpVT, Node->getOperand(0));
700   SDValue Rdx =
701       DAG.getNode(Node->getOpcode(), DL, NewOpVT.getVectorElementType(), NewOp,
702                   Node->getFlags());
703   SDValue Res =
704       DAG.getNode(ISD::FP_ROUND, DL, Node->getValueType(0), Rdx,
705                   DAG.getIntPtrConstant(NonArithmetic, DL, /*isTarget=*/true));
706   Results.push_back(Res);
707 }
708 
709 void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
710   // For a few operations there is a specific concept for promotion based on
711   // the operand's type.
712   switch (Node->getOpcode()) {
713   case ISD::SINT_TO_FP:
714   case ISD::UINT_TO_FP:
715   case ISD::STRICT_SINT_TO_FP:
716   case ISD::STRICT_UINT_TO_FP:
717     // "Promote" the operation by extending the operand.
718     PromoteINT_TO_FP(Node, Results);
719     return;
720   case ISD::FP_TO_UINT:
721   case ISD::FP_TO_SINT:
722   case ISD::STRICT_FP_TO_UINT:
723   case ISD::STRICT_FP_TO_SINT:
724     // Promote the operation by extending the operand.
725     PromoteFP_TO_INT(Node, Results);
726     return;
727   case ISD::VP_SETCC:
728   case ISD::SETCC:
729     // Promote the operation by extending the operand.
730     PromoteSETCC(Node, Results);
731     return;
732   case ISD::STRICT_FADD:
733   case ISD::STRICT_FSUB:
734   case ISD::STRICT_FMUL:
735   case ISD::STRICT_FDIV:
736   case ISD::STRICT_FSQRT:
737   case ISD::STRICT_FMA:
738     PromoteSTRICT(Node, Results);
739     return;
740   case ISD::VECREDUCE_FADD:
741     PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/false);
742     return;
743   case ISD::VECREDUCE_FMAX:
744   case ISD::VECREDUCE_FMAXIMUM:
745   case ISD::VECREDUCE_FMIN:
746   case ISD::VECREDUCE_FMINIMUM:
747     PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/true);
748     return;
749   case ISD::FP_ROUND:
750   case ISD::FP_EXTEND:
751     // These operations are used to do promotion so they can't be promoted
752     // themselves.
753     llvm_unreachable("Don't know how to promote this operation!");
754   case ISD::VP_FABS:
755   case ISD::VP_FCOPYSIGN:
756   case ISD::VP_FNEG:
757     // Promoting fabs, fneg, and fcopysign changes their semantics.
758     llvm_unreachable("These operations should not be promoted");
759   }
760 
761   // There are currently two cases of vector promotion:
762   // 1) Bitcasting a vector of integers to a different type to a vector of the
763   //    same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
764   // 2) Extending a vector of floats to a vector of the same number of larger
765   //    floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
766   assert(Node->getNumValues() == 1 &&
767          "Can't promote a vector with multiple results!");
768   MVT VT = Node->getSimpleValueType(0);
769   MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
770   SDLoc dl(Node);
771   SmallVector<SDValue, 4> Operands(Node->getNumOperands());
772 
773   for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
774     // Do not promote the mask operand of a VP OP.
775     bool SkipPromote = ISD::isVPOpcode(Node->getOpcode()) &&
776                        ISD::getVPMaskIdx(Node->getOpcode()) == j;
777     if (Node->getOperand(j).getValueType().isVector() && !SkipPromote)
778       if (Node->getOperand(j)
779               .getValueType()
780               .getVectorElementType()
781               .isFloatingPoint() &&
782           NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
783         if (ISD::isVPOpcode(Node->getOpcode())) {
784           unsigned EVLIdx =
785               *ISD::getVPExplicitVectorLengthIdx(Node->getOpcode());
786           unsigned MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode());
787           Operands[j] =
788               DAG.getNode(ISD::VP_FP_EXTEND, dl, NVT, Node->getOperand(j),
789                           Node->getOperand(MaskIdx), Node->getOperand(EVLIdx));
790         } else {
791           Operands[j] =
792               DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
793         }
794       else
795         Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j));
796     else
797       Operands[j] = Node->getOperand(j);
798   }
799 
800   SDValue Res =
801       DAG.getNode(Node->getOpcode(), dl, NVT, Operands, Node->getFlags());
802 
803   if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
804       (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
805        NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
806     if (ISD::isVPOpcode(Node->getOpcode())) {
807       unsigned EVLIdx = *ISD::getVPExplicitVectorLengthIdx(Node->getOpcode());
808       unsigned MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode());
809       Res = DAG.getNode(ISD::VP_FP_ROUND, dl, VT, Res,
810                         Node->getOperand(MaskIdx), Node->getOperand(EVLIdx));
811     } else {
812       Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res,
813                         DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
814     }
815   else
816     Res = DAG.getNode(ISD::BITCAST, dl, VT, Res);
817 
818   Results.push_back(Res);
819 }
820 
821 void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node,
822                                        SmallVectorImpl<SDValue> &Results) {
823   // INT_TO_FP operations may require the input operand be promoted even
824   // when the type is otherwise legal.
825   bool IsStrict = Node->isStrictFPOpcode();
826   MVT VT = Node->getOperand(IsStrict ? 1 : 0).getSimpleValueType();
827   MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
828   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
829          "Vectors have different number of elements!");
830 
831   SDLoc dl(Node);
832   SmallVector<SDValue, 4> Operands(Node->getNumOperands());
833 
834   unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP ||
835                   Node->getOpcode() == ISD::STRICT_UINT_TO_FP)
836                      ? ISD::ZERO_EXTEND
837                      : ISD::SIGN_EXTEND;
838   for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
839     if (Node->getOperand(j).getValueType().isVector())
840       Operands[j] = DAG.getNode(Opc, dl, NVT, Node->getOperand(j));
841     else
842       Operands[j] = Node->getOperand(j);
843   }
844 
845   if (IsStrict) {
846     SDValue Res = DAG.getNode(Node->getOpcode(), dl,
847                               {Node->getValueType(0), MVT::Other}, Operands);
848     Results.push_back(Res);
849     Results.push_back(Res.getValue(1));
850     return;
851   }
852 
853   SDValue Res =
854       DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Operands);
855   Results.push_back(Res);
856 }
857 
858 // For FP_TO_INT we promote the result type to a vector type with wider
859 // elements and then truncate the result.  This is different from the default
860 // PromoteVector which uses bitcast to promote thus assumning that the
861 // promoted vector type has the same overall size.
862 void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
863                                        SmallVectorImpl<SDValue> &Results) {
864   MVT VT = Node->getSimpleValueType(0);
865   MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
866   bool IsStrict = Node->isStrictFPOpcode();
867   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
868          "Vectors have different number of elements!");
869 
870   unsigned NewOpc = Node->getOpcode();
871   // Change FP_TO_UINT to FP_TO_SINT if possible.
872   // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
873   if (NewOpc == ISD::FP_TO_UINT &&
874       TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
875     NewOpc = ISD::FP_TO_SINT;
876 
877   if (NewOpc == ISD::STRICT_FP_TO_UINT &&
878       TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
879     NewOpc = ISD::STRICT_FP_TO_SINT;
880 
881   SDLoc dl(Node);
882   SDValue Promoted, Chain;
883   if (IsStrict) {
884     Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
885                            {Node->getOperand(0), Node->getOperand(1)});
886     Chain = Promoted.getValue(1);
887   } else
888     Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0));
889 
890   // Assert that the converted value fits in the original type.  If it doesn't
891   // (eg: because the value being converted is too big), then the result of the
892   // original operation was undefined anyway, so the assert is still correct.
893   if (Node->getOpcode() == ISD::FP_TO_UINT ||
894       Node->getOpcode() == ISD::STRICT_FP_TO_UINT)
895     NewOpc = ISD::AssertZext;
896   else
897     NewOpc = ISD::AssertSext;
898 
899   Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted,
900                          DAG.getValueType(VT.getScalarType()));
901   Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
902   Results.push_back(Promoted);
903   if (IsStrict)
904     Results.push_back(Chain);
905 }
906 
907 std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
908   LoadSDNode *LD = cast<LoadSDNode>(N);
909   return TLI.scalarizeVectorLoad(LD, DAG);
910 }
911 
912 SDValue VectorLegalizer::ExpandStore(SDNode *N) {
913   StoreSDNode *ST = cast<StoreSDNode>(N);
914   SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
915   return TF;
916 }
917 
918 void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
919   switch (Node->getOpcode()) {
920   case ISD::LOAD: {
921     std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node);
922     Results.push_back(Tmp.first);
923     Results.push_back(Tmp.second);
924     return;
925   }
926   case ISD::STORE:
927     Results.push_back(ExpandStore(Node));
928     return;
929   case ISD::MERGE_VALUES:
930     for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
931       Results.push_back(Node->getOperand(i));
932     return;
933   case ISD::SIGN_EXTEND_INREG:
934     if (SDValue Expanded = ExpandSEXTINREG(Node)) {
935       Results.push_back(Expanded);
936       return;
937     }
938     break;
939   case ISD::ANY_EXTEND_VECTOR_INREG:
940     Results.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node));
941     return;
942   case ISD::SIGN_EXTEND_VECTOR_INREG:
943     Results.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node));
944     return;
945   case ISD::ZERO_EXTEND_VECTOR_INREG:
946     Results.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node));
947     return;
948   case ISD::BSWAP:
949     if (SDValue Expanded = ExpandBSWAP(Node)) {
950       Results.push_back(Expanded);
951       return;
952     }
953     break;
954   case ISD::VP_BSWAP:
955     Results.push_back(TLI.expandVPBSWAP(Node, DAG));
956     return;
957   case ISD::VSELECT:
958     if (SDValue Expanded = ExpandVSELECT(Node)) {
959       Results.push_back(Expanded);
960       return;
961     }
962     break;
963   case ISD::VP_SELECT:
964     if (SDValue Expanded = ExpandVP_SELECT(Node)) {
965       Results.push_back(Expanded);
966       return;
967     }
968     break;
969   case ISD::VP_SREM:
970   case ISD::VP_UREM:
971     if (SDValue Expanded = ExpandVP_REM(Node)) {
972       Results.push_back(Expanded);
973       return;
974     }
975     break;
976   case ISD::VP_FNEG:
977     if (SDValue Expanded = ExpandVP_FNEG(Node)) {
978       Results.push_back(Expanded);
979       return;
980     }
981     break;
982   case ISD::VP_FABS:
983     if (SDValue Expanded = ExpandVP_FABS(Node)) {
984       Results.push_back(Expanded);
985       return;
986     }
987     break;
988   case ISD::VP_FCOPYSIGN:
989     if (SDValue Expanded = ExpandVP_FCOPYSIGN(Node)) {
990       Results.push_back(Expanded);
991       return;
992     }
993     break;
994   case ISD::SELECT:
995     if (SDValue Expanded = ExpandSELECT(Node)) {
996       Results.push_back(Expanded);
997       return;
998     }
999     break;
1000   case ISD::SELECT_CC: {
1001     if (Node->getValueType(0).isScalableVector()) {
1002       EVT CondVT = TLI.getSetCCResultType(
1003           DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
1004       SDValue SetCC =
1005           DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0),
1006                       Node->getOperand(1), Node->getOperand(4));
1007       Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC,
1008                                       Node->getOperand(2),
1009                                       Node->getOperand(3)));
1010       return;
1011     }
1012     break;
1013   }
1014   case ISD::FP_TO_UINT:
1015     ExpandFP_TO_UINT(Node, Results);
1016     return;
1017   case ISD::UINT_TO_FP:
1018     ExpandUINT_TO_FLOAT(Node, Results);
1019     return;
1020   case ISD::FNEG:
1021     if (SDValue Expanded = ExpandFNEG(Node)) {
1022       Results.push_back(Expanded);
1023       return;
1024     }
1025     break;
1026   case ISD::FABS:
1027     if (SDValue Expanded = ExpandFABS(Node)) {
1028       Results.push_back(Expanded);
1029       return;
1030     }
1031     break;
1032   case ISD::FCOPYSIGN:
1033     if (SDValue Expanded = ExpandFCOPYSIGN(Node)) {
1034       Results.push_back(Expanded);
1035       return;
1036     }
1037     break;
1038   case ISD::FSUB:
1039     ExpandFSUB(Node, Results);
1040     return;
1041   case ISD::SETCC:
1042   case ISD::VP_SETCC:
1043     ExpandSETCC(Node, Results);
1044     return;
1045   case ISD::ABS:
1046     if (SDValue Expanded = TLI.expandABS(Node, DAG)) {
1047       Results.push_back(Expanded);
1048       return;
1049     }
1050     break;
1051   case ISD::ABDS:
1052   case ISD::ABDU:
1053     if (SDValue Expanded = TLI.expandABD(Node, DAG)) {
1054       Results.push_back(Expanded);
1055       return;
1056     }
1057     break;
1058   case ISD::AVGCEILS:
1059   case ISD::AVGCEILU:
1060   case ISD::AVGFLOORS:
1061   case ISD::AVGFLOORU:
1062     if (SDValue Expanded = TLI.expandAVG(Node, DAG)) {
1063       Results.push_back(Expanded);
1064       return;
1065     }
1066     break;
1067   case ISD::BITREVERSE:
1068     if (SDValue Expanded = ExpandBITREVERSE(Node)) {
1069       Results.push_back(Expanded);
1070       return;
1071     }
1072     break;
1073   case ISD::VP_BITREVERSE:
1074     if (SDValue Expanded = TLI.expandVPBITREVERSE(Node, DAG)) {
1075       Results.push_back(Expanded);
1076       return;
1077     }
1078     break;
1079   case ISD::CTPOP:
1080     if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) {
1081       Results.push_back(Expanded);
1082       return;
1083     }
1084     break;
1085   case ISD::VP_CTPOP:
1086     if (SDValue Expanded = TLI.expandVPCTPOP(Node, DAG)) {
1087       Results.push_back(Expanded);
1088       return;
1089     }
1090     break;
1091   case ISD::CTLZ:
1092   case ISD::CTLZ_ZERO_UNDEF:
1093     if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) {
1094       Results.push_back(Expanded);
1095       return;
1096     }
1097     break;
1098   case ISD::VP_CTLZ:
1099   case ISD::VP_CTLZ_ZERO_UNDEF:
1100     if (SDValue Expanded = TLI.expandVPCTLZ(Node, DAG)) {
1101       Results.push_back(Expanded);
1102       return;
1103     }
1104     break;
1105   case ISD::CTTZ:
1106   case ISD::CTTZ_ZERO_UNDEF:
1107     if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) {
1108       Results.push_back(Expanded);
1109       return;
1110     }
1111     break;
1112   case ISD::VP_CTTZ:
1113   case ISD::VP_CTTZ_ZERO_UNDEF:
1114     if (SDValue Expanded = TLI.expandVPCTTZ(Node, DAG)) {
1115       Results.push_back(Expanded);
1116       return;
1117     }
1118     break;
1119   case ISD::FSHL:
1120   case ISD::VP_FSHL:
1121   case ISD::FSHR:
1122   case ISD::VP_FSHR:
1123     if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) {
1124       Results.push_back(Expanded);
1125       return;
1126     }
1127     break;
1128   case ISD::ROTL:
1129   case ISD::ROTR:
1130     if (SDValue Expanded = TLI.expandROT(Node, false /*AllowVectorOps*/, DAG)) {
1131       Results.push_back(Expanded);
1132       return;
1133     }
1134     break;
1135   case ISD::FMINNUM:
1136   case ISD::FMAXNUM:
1137     if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) {
1138       Results.push_back(Expanded);
1139       return;
1140     }
1141     break;
1142   case ISD::FMINIMUM:
1143   case ISD::FMAXIMUM:
1144     Results.push_back(TLI.expandFMINIMUM_FMAXIMUM(Node, DAG));
1145     return;
1146   case ISD::FMINIMUMNUM:
1147   case ISD::FMAXIMUMNUM:
1148     Results.push_back(TLI.expandFMINIMUMNUM_FMAXIMUMNUM(Node, DAG));
1149     return;
1150   case ISD::SMIN:
1151   case ISD::SMAX:
1152   case ISD::UMIN:
1153   case ISD::UMAX:
1154     if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) {
1155       Results.push_back(Expanded);
1156       return;
1157     }
1158     break;
1159   case ISD::UADDO:
1160   case ISD::USUBO:
1161     ExpandUADDSUBO(Node, Results);
1162     return;
1163   case ISD::SADDO:
1164   case ISD::SSUBO:
1165     ExpandSADDSUBO(Node, Results);
1166     return;
1167   case ISD::UMULO:
1168   case ISD::SMULO:
1169     ExpandMULO(Node, Results);
1170     return;
1171   case ISD::USUBSAT:
1172   case ISD::SSUBSAT:
1173   case ISD::UADDSAT:
1174   case ISD::SADDSAT:
1175     if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) {
1176       Results.push_back(Expanded);
1177       return;
1178     }
1179     break;
1180   case ISD::USHLSAT:
1181   case ISD::SSHLSAT:
1182     if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) {
1183       Results.push_back(Expanded);
1184       return;
1185     }
1186     break;
1187   case ISD::FP_TO_SINT_SAT:
1188   case ISD::FP_TO_UINT_SAT:
1189     // Expand the fpsosisat if it is scalable to prevent it from unrolling below.
1190     if (Node->getValueType(0).isScalableVector()) {
1191       if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) {
1192         Results.push_back(Expanded);
1193         return;
1194       }
1195     }
1196     break;
1197   case ISD::SMULFIX:
1198   case ISD::UMULFIX:
1199     if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
1200       Results.push_back(Expanded);
1201       return;
1202     }
1203     break;
1204   case ISD::SMULFIXSAT:
1205   case ISD::UMULFIXSAT:
1206     // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
1207     // why. Maybe it results in worse codegen compared to the unroll for some
1208     // targets? This should probably be investigated. And if we still prefer to
1209     // unroll an explanation could be helpful.
1210     break;
1211   case ISD::SDIVFIX:
1212   case ISD::UDIVFIX:
1213     ExpandFixedPointDiv(Node, Results);
1214     return;
1215   case ISD::SDIVFIXSAT:
1216   case ISD::UDIVFIXSAT:
1217     break;
1218 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)               \
1219   case ISD::STRICT_##DAGN:
1220 #include "llvm/IR/ConstrainedOps.def"
1221     ExpandStrictFPOp(Node, Results);
1222     return;
1223   case ISD::VECREDUCE_ADD:
1224   case ISD::VECREDUCE_MUL:
1225   case ISD::VECREDUCE_AND:
1226   case ISD::VECREDUCE_OR:
1227   case ISD::VECREDUCE_XOR:
1228   case ISD::VECREDUCE_SMAX:
1229   case ISD::VECREDUCE_SMIN:
1230   case ISD::VECREDUCE_UMAX:
1231   case ISD::VECREDUCE_UMIN:
1232   case ISD::VECREDUCE_FADD:
1233   case ISD::VECREDUCE_FMUL:
1234   case ISD::VECREDUCE_FMAX:
1235   case ISD::VECREDUCE_FMIN:
1236   case ISD::VECREDUCE_FMAXIMUM:
1237   case ISD::VECREDUCE_FMINIMUM:
1238     Results.push_back(TLI.expandVecReduce(Node, DAG));
1239     return;
1240   case ISD::PARTIAL_REDUCE_UMLA:
1241   case ISD::PARTIAL_REDUCE_SMLA:
1242   case ISD::PARTIAL_REDUCE_SUMLA:
1243     Results.push_back(TLI.expandPartialReduceMLA(Node, DAG));
1244     return;
1245   case ISD::VECREDUCE_SEQ_FADD:
1246   case ISD::VECREDUCE_SEQ_FMUL:
1247     Results.push_back(TLI.expandVecReduceSeq(Node, DAG));
1248     return;
1249   case ISD::SREM:
1250   case ISD::UREM:
1251     ExpandREM(Node, Results);
1252     return;
1253   case ISD::VP_MERGE:
1254     if (SDValue Expanded = ExpandVP_MERGE(Node)) {
1255       Results.push_back(Expanded);
1256       return;
1257     }
1258     break;
1259   case ISD::FREM:
1260     if (tryExpandVecMathCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
1261                              RTLIB::REM_F80, RTLIB::REM_F128,
1262                              RTLIB::REM_PPCF128, Results))
1263       return;
1264 
1265     break;
1266   case ISD::FSINCOS:
1267   case ISD::FSINCOSPI: {
1268     EVT VT = Node->getValueType(0).getVectorElementType();
1269     RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
1270                             ? RTLIB::getSINCOS(VT)
1271                             : RTLIB::getSINCOSPI(VT);
1272     if (DAG.expandMultipleResultFPLibCall(LC, Node, Results))
1273       return;
1274     break;
1275   }
1276   case ISD::FMODF: {
1277     RTLIB::Libcall LC =
1278         RTLIB::getMODF(Node->getValueType(0).getVectorElementType());
1279     if (DAG.expandMultipleResultFPLibCall(LC, Node, Results,
1280                                           /*CallRetResNo=*/0))
1281       return;
1282     break;
1283   }
1284   case ISD::VECTOR_COMPRESS:
1285     Results.push_back(TLI.expandVECTOR_COMPRESS(Node, DAG));
1286     return;
1287   case ISD::VECTOR_FIND_LAST_ACTIVE:
1288     Results.push_back(TLI.expandVectorFindLastActive(Node, DAG));
1289     return;
1290   case ISD::SCMP:
1291   case ISD::UCMP:
1292     Results.push_back(TLI.expandCMP(Node, DAG));
1293     return;
1294 
1295   case ISD::FADD:
1296   case ISD::FMUL:
1297   case ISD::FMA:
1298   case ISD::FDIV:
1299   case ISD::FCEIL:
1300   case ISD::FFLOOR:
1301   case ISD::FNEARBYINT:
1302   case ISD::FRINT:
1303   case ISD::FROUND:
1304   case ISD::FROUNDEVEN:
1305   case ISD::FTRUNC:
1306   case ISD::FSQRT:
1307     if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) {
1308       Results.push_back(Expanded);
1309       return;
1310     }
1311     break;
1312   }
1313 
1314   SDValue Unrolled = DAG.UnrollVectorOp(Node);
1315   if (Node->getNumValues() == 1) {
1316     Results.push_back(Unrolled);
1317   } else {
1318     assert(Node->getNumValues() == Unrolled->getNumValues() &&
1319       "VectorLegalizer Expand returned wrong number of results!");
1320     for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
1321       Results.push_back(Unrolled.getValue(I));
1322   }
1323 }
1324 
1325 SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
1326   // Lower a select instruction where the condition is a scalar and the
1327   // operands are vectors. Lower this select to VSELECT and implement it
1328   // using XOR AND OR. The selector bit is broadcasted.
1329   EVT VT = Node->getValueType(0);
1330   SDLoc DL(Node);
1331 
1332   SDValue Mask = Node->getOperand(0);
1333   SDValue Op1 = Node->getOperand(1);
1334   SDValue Op2 = Node->getOperand(2);
1335 
1336   assert(VT.isVector() && !Mask.getValueType().isVector()
1337          && Op1.getValueType() == Op2.getValueType() && "Invalid type");
1338 
1339   // If we can't even use the basic vector operations of
1340   // AND,OR,XOR, we will have to scalarize the op.
1341   // Notice that the operation may be 'promoted' which means that it is
1342   // 'bitcasted' to another type which is handled.
1343   // Also, we need to be able to construct a splat vector using either
1344   // BUILD_VECTOR or SPLAT_VECTOR.
1345   // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
1346   // BUILD_VECTOR?
1347   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
1348       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
1349       TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
1350       TLI.getOperationAction(VT.isFixedLengthVector() ? ISD::BUILD_VECTOR
1351                                                       : ISD::SPLAT_VECTOR,
1352                              VT) == TargetLowering::Expand)
1353     return SDValue();
1354 
1355   // Generate a mask operand.
1356   EVT MaskTy = VT.changeVectorElementTypeToInteger();
1357 
1358   // What is the size of each element in the vector mask.
1359   EVT BitTy = MaskTy.getScalarType();
1360 
1361   Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy),
1362                        DAG.getConstant(0, DL, BitTy));
1363 
1364   // Broadcast the mask so that the entire vector is all one or all zero.
1365   Mask = DAG.getSplat(MaskTy, DL, Mask);
1366 
1367   // Bitcast the operands to be the same type as the mask.
1368   // This is needed when we select between FP types because
1369   // the mask is a vector of integers.
1370   Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
1371   Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
1372 
1373   SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy);
1374 
1375   Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
1376   Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
1377   SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
1378   return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
1379 }
1380 
1381 SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) {
1382   EVT VT = Node->getValueType(0);
1383 
1384   // Make sure that the SRA and SHL instructions are available.
1385   if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
1386       TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
1387     return SDValue();
1388 
1389   SDLoc DL(Node);
1390   EVT OrigTy = cast<VTSDNode>(Node->getOperand(1))->getVT();
1391 
1392   unsigned BW = VT.getScalarSizeInBits();
1393   unsigned OrigBW = OrigTy.getScalarSizeInBits();
1394   SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
1395 
1396   SDValue Op = DAG.getNode(ISD::SHL, DL, VT, Node->getOperand(0), ShiftSz);
1397   return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
1398 }
1399 
1400 // Generically expand a vector anyext in register to a shuffle of the relevant
1401 // lanes into the appropriate locations, with other lanes left undef.
1402 SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
1403   SDLoc DL(Node);
1404   EVT VT = Node->getValueType(0);
1405   int NumElements = VT.getVectorNumElements();
1406   SDValue Src = Node->getOperand(0);
1407   EVT SrcVT = Src.getValueType();
1408   int NumSrcElements = SrcVT.getVectorNumElements();
1409 
1410   // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1411   // into a larger vector type.
1412   if (SrcVT.bitsLE(VT)) {
1413     assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1414            "ANY_EXTEND_VECTOR_INREG vector size mismatch");
1415     NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1416     SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
1417                              NumSrcElements);
1418     Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(SrcVT), Src, 0);
1419   }
1420 
1421   // Build a base mask of undef shuffles.
1422   SmallVector<int, 16> ShuffleMask;
1423   ShuffleMask.resize(NumSrcElements, -1);
1424 
1425   // Place the extended lanes into the correct locations.
1426   int ExtLaneScale = NumSrcElements / NumElements;
1427   int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1428   for (int i = 0; i < NumElements; ++i)
1429     ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
1430 
1431   return DAG.getNode(
1432       ISD::BITCAST, DL, VT,
1433       DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
1434 }
1435 
1436 SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) {
1437   SDLoc DL(Node);
1438   EVT VT = Node->getValueType(0);
1439   SDValue Src = Node->getOperand(0);
1440   EVT SrcVT = Src.getValueType();
1441 
1442   // First build an any-extend node which can be legalized above when we
1443   // recurse through it.
1444   SDValue Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src);
1445 
1446   // Now we need sign extend. Do this by shifting the elements. Even if these
1447   // aren't legal operations, they have a better chance of being legalized
1448   // without full scalarization than the sign extension does.
1449   unsigned EltWidth = VT.getScalarSizeInBits();
1450   unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
1451   SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
1452   return DAG.getNode(ISD::SRA, DL, VT,
1453                      DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
1454                      ShiftAmount);
1455 }
1456 
1457 // Generically expand a vector zext in register to a shuffle of the relevant
1458 // lanes into the appropriate locations, a blend of zero into the high bits,
1459 // and a bitcast to the wider element type.
1460 SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
1461   SDLoc DL(Node);
1462   EVT VT = Node->getValueType(0);
1463   int NumElements = VT.getVectorNumElements();
1464   SDValue Src = Node->getOperand(0);
1465   EVT SrcVT = Src.getValueType();
1466   int NumSrcElements = SrcVT.getVectorNumElements();
1467 
1468   // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1469   // into a larger vector type.
1470   if (SrcVT.bitsLE(VT)) {
1471     assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1472            "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
1473     NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1474     SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
1475                              NumSrcElements);
1476     Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(SrcVT), Src, 0);
1477   }
1478 
1479   // Build up a zero vector to blend into this one.
1480   SDValue Zero = DAG.getConstant(0, DL, SrcVT);
1481 
1482   // Shuffle the incoming lanes into the correct position, and pull all other
1483   // lanes from the zero vector.
1484   auto ShuffleMask = llvm::to_vector<16>(llvm::seq<int>(0, NumSrcElements));
1485 
1486   int ExtLaneScale = NumSrcElements / NumElements;
1487   int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1488   for (int i = 0; i < NumElements; ++i)
1489     ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
1490 
1491   return DAG.getNode(ISD::BITCAST, DL, VT,
1492                      DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
1493 }
1494 
1495 static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
1496   int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
1497   for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
1498     for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
1499       ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
1500 }
1501 
1502 SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
1503   EVT VT = Node->getValueType(0);
1504 
1505   // Scalable vectors can't use shuffle expansion.
1506   if (VT.isScalableVector())
1507     return TLI.expandBSWAP(Node, DAG);
1508 
1509   // Generate a byte wise shuffle mask for the BSWAP.
1510   SmallVector<int, 16> ShuffleMask;
1511   createBSWAPShuffleMask(VT, ShuffleMask);
1512   EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
1513 
1514   // Only emit a shuffle if the mask is legal.
1515   if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {
1516     SDLoc DL(Node);
1517     SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
1518     Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
1519     return DAG.getNode(ISD::BITCAST, DL, VT, Op);
1520   }
1521 
1522   // If we have the appropriate vector bit operations, it is better to use them
1523   // than unrolling and expanding each component.
1524   if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
1525       TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
1526       TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
1527       TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
1528     return TLI.expandBSWAP(Node, DAG);
1529 
1530   // Otherwise let the caller unroll.
1531   return SDValue();
1532 }
1533 
1534 SDValue VectorLegalizer::ExpandBITREVERSE(SDNode *Node) {
1535   EVT VT = Node->getValueType(0);
1536 
1537   // We can't unroll or use shuffles for scalable vectors.
1538   if (VT.isScalableVector())
1539     return TLI.expandBITREVERSE(Node, DAG);
1540 
1541   // If we have the scalar operation, it's probably cheaper to unroll it.
1542   if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
1543     return SDValue();
1544 
1545   // If the vector element width is a whole number of bytes, test if its legal
1546   // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
1547   // vector. This greatly reduces the number of bit shifts necessary.
1548   unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
1549   if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
1550     SmallVector<int, 16> BSWAPMask;
1551     createBSWAPShuffleMask(VT, BSWAPMask);
1552 
1553     EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
1554     if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
1555         (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
1556          (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
1557           TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
1558           TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
1559           TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
1560       SDLoc DL(Node);
1561       SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
1562       Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
1563                                 BSWAPMask);
1564       Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
1565       Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
1566       return Op;
1567     }
1568   }
1569 
1570   // If we have the appropriate vector bit operations, it is better to use them
1571   // than unrolling and expanding each component.
1572   if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
1573       TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
1574       TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
1575       TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
1576     return TLI.expandBITREVERSE(Node, DAG);
1577 
1578   // Otherwise unroll.
1579   return SDValue();
1580 }
1581 
1582 SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
1583   // Implement VSELECT in terms of XOR, AND, OR
1584   // on platforms which do not support blend natively.
1585   SDLoc DL(Node);
1586 
1587   SDValue Mask = Node->getOperand(0);
1588   SDValue Op1 = Node->getOperand(1);
1589   SDValue Op2 = Node->getOperand(2);
1590 
1591   EVT VT = Mask.getValueType();
1592 
1593   // If we can't even use the basic vector operations of
1594   // AND,OR,XOR, we will have to scalarize the op.
1595   // Notice that the operation may be 'promoted' which means that it is
1596   // 'bitcasted' to another type which is handled.
1597   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
1598       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
1599       TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand)
1600     return SDValue();
1601 
1602   // This operation also isn't safe with AND, OR, XOR when the boolean type is
1603   // 0/1 and the select operands aren't also booleans, as we need an all-ones
1604   // vector constant to mask with.
1605   // FIXME: Sign extend 1 to all ones if that's legal on the target.
1606   auto BoolContents = TLI.getBooleanContents(Op1.getValueType());
1607   if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent &&
1608       !(BoolContents == TargetLowering::ZeroOrOneBooleanContent &&
1609         Op1.getValueType().getVectorElementType() == MVT::i1))
1610     return SDValue();
1611 
1612   // If the mask and the type are different sizes, unroll the vector op. This
1613   // can occur when getSetCCResultType returns something that is different in
1614   // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
1615   if (VT.getSizeInBits() != Op1.getValueSizeInBits())
1616     return SDValue();
1617 
1618   // Bitcast the operands to be the same type as the mask.
1619   // This is needed when we select between FP types because
1620   // the mask is a vector of integers.
1621   Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
1622   Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
1623 
1624   SDValue NotMask = DAG.getNOT(DL, Mask, VT);
1625 
1626   Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
1627   Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
1628   SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
1629   return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
1630 }
1631 
1632 SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
1633   // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
1634   // do not support it natively.
1635   SDLoc DL(Node);
1636 
1637   SDValue Mask = Node->getOperand(0);
1638   SDValue Op1 = Node->getOperand(1);
1639   SDValue Op2 = Node->getOperand(2);
1640   SDValue EVL = Node->getOperand(3);
1641 
1642   EVT VT = Mask.getValueType();
1643 
1644   // If we can't even use the basic vector operations of
1645   // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
1646   if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand ||
1647       TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand ||
1648       TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand)
1649     return SDValue();
1650 
1651   // This operation also isn't safe when the operands aren't also booleans.
1652   if (Op1.getValueType().getVectorElementType() != MVT::i1)
1653     return SDValue();
1654 
1655   SDValue Ones = DAG.getAllOnesConstant(DL, VT);
1656   SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Ones, EVL);
1657 
1658   Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Ones, EVL);
1659   Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Ones, EVL);
1660   return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Ones, EVL);
1661 }
1662 
1663 SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
1664   // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector
1665   // indices less than the EVL/pivot are true. Combine that with the original
1666   // mask for a full-length mask. Use a full-length VSELECT to select between
1667   // the true and false values.
1668   SDLoc DL(Node);
1669 
1670   SDValue Mask = Node->getOperand(0);
1671   SDValue Op1 = Node->getOperand(1);
1672   SDValue Op2 = Node->getOperand(2);
1673   SDValue EVL = Node->getOperand(3);
1674 
1675   EVT MaskVT = Mask.getValueType();
1676   bool IsFixedLen = MaskVT.isFixedLengthVector();
1677 
1678   EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), EVL.getValueType(),
1679                                   MaskVT.getVectorElementCount());
1680 
1681   // If we can't construct the EVL mask efficiently, it's better to unroll.
1682   if ((IsFixedLen &&
1683        !TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, EVLVecVT)) ||
1684       (!IsFixedLen &&
1685        (!TLI.isOperationLegalOrCustom(ISD::STEP_VECTOR, EVLVecVT) ||
1686         !TLI.isOperationLegalOrCustom(ISD::SPLAT_VECTOR, EVLVecVT))))
1687     return SDValue();
1688 
1689   // If using a SETCC would result in a different type than the mask type,
1690   // unroll.
1691   if (TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
1692                              EVLVecVT) != MaskVT)
1693     return SDValue();
1694 
1695   SDValue StepVec = DAG.getStepVector(DL, EVLVecVT);
1696   SDValue SplatEVL = DAG.getSplat(EVLVecVT, DL, EVL);
1697   SDValue EVLMask =
1698       DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT);
1699 
1700   SDValue FullMask = DAG.getNode(ISD::AND, DL, MaskVT, Mask, EVLMask);
1701   return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2);
1702 }
1703 
1704 SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) {
1705   // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB.
1706   EVT VT = Node->getValueType(0);
1707 
1708   unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV;
1709 
1710   if (!TLI.isOperationLegalOrCustom(DivOpc, VT) ||
1711       !TLI.isOperationLegalOrCustom(ISD::VP_MUL, VT) ||
1712       !TLI.isOperationLegalOrCustom(ISD::VP_SUB, VT))
1713     return SDValue();
1714 
1715   SDLoc DL(Node);
1716 
1717   SDValue Dividend = Node->getOperand(0);
1718   SDValue Divisor = Node->getOperand(1);
1719   SDValue Mask = Node->getOperand(2);
1720   SDValue EVL = Node->getOperand(3);
1721 
1722   // X % Y -> X-X/Y*Y
1723   SDValue Div = DAG.getNode(DivOpc, DL, VT, Dividend, Divisor, Mask, EVL);
1724   SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, Divisor, Div, Mask, EVL);
1725   return DAG.getNode(ISD::VP_SUB, DL, VT, Dividend, Mul, Mask, EVL);
1726 }
1727 
1728 SDValue VectorLegalizer::ExpandVP_FNEG(SDNode *Node) {
1729   EVT VT = Node->getValueType(0);
1730   EVT IntVT = VT.changeVectorElementTypeToInteger();
1731 
1732   if (!TLI.isOperationLegalOrCustom(ISD::VP_XOR, IntVT))
1733     return SDValue();
1734 
1735   SDValue Mask = Node->getOperand(1);
1736   SDValue EVL = Node->getOperand(2);
1737 
1738   SDLoc DL(Node);
1739   SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1740   SDValue SignMask = DAG.getConstant(
1741       APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
1742   SDValue Xor = DAG.getNode(ISD::VP_XOR, DL, IntVT, Cast, SignMask, Mask, EVL);
1743   return DAG.getNode(ISD::BITCAST, DL, VT, Xor);
1744 }
1745 
1746 SDValue VectorLegalizer::ExpandVP_FABS(SDNode *Node) {
1747   EVT VT = Node->getValueType(0);
1748   EVT IntVT = VT.changeVectorElementTypeToInteger();
1749 
1750   if (!TLI.isOperationLegalOrCustom(ISD::VP_AND, IntVT))
1751     return SDValue();
1752 
1753   SDValue Mask = Node->getOperand(1);
1754   SDValue EVL = Node->getOperand(2);
1755 
1756   SDLoc DL(Node);
1757   SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1758   SDValue ClearSignMask = DAG.getConstant(
1759       APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
1760   SDValue ClearSign =
1761       DAG.getNode(ISD::VP_AND, DL, IntVT, Cast, ClearSignMask, Mask, EVL);
1762   return DAG.getNode(ISD::BITCAST, DL, VT, ClearSign);
1763 }
1764 
1765 SDValue VectorLegalizer::ExpandVP_FCOPYSIGN(SDNode *Node) {
1766   EVT VT = Node->getValueType(0);
1767 
1768   if (VT != Node->getOperand(1).getValueType())
1769     return SDValue();
1770 
1771   EVT IntVT = VT.changeVectorElementTypeToInteger();
1772   if (!TLI.isOperationLegalOrCustom(ISD::VP_AND, IntVT) ||
1773       !TLI.isOperationLegalOrCustom(ISD::VP_XOR, IntVT))
1774     return SDValue();
1775 
1776   SDValue Mask = Node->getOperand(2);
1777   SDValue EVL = Node->getOperand(3);
1778 
1779   SDLoc DL(Node);
1780   SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1781   SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
1782 
1783   SDValue SignMask = DAG.getConstant(
1784       APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
1785   SDValue SignBit =
1786       DAG.getNode(ISD::VP_AND, DL, IntVT, Sign, SignMask, Mask, EVL);
1787 
1788   SDValue ClearSignMask = DAG.getConstant(
1789       APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
1790   SDValue ClearedSign =
1791       DAG.getNode(ISD::VP_AND, DL, IntVT, Mag, ClearSignMask, Mask, EVL);
1792 
1793   SDValue CopiedSign = DAG.getNode(ISD::VP_OR, DL, IntVT, ClearedSign, SignBit,
1794                                    Mask, EVL, SDNodeFlags::Disjoint);
1795 
1796   return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
1797 }
1798 
1799 void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
1800                                        SmallVectorImpl<SDValue> &Results) {
1801   // Attempt to expand using TargetLowering.
1802   SDValue Result, Chain;
1803   if (TLI.expandFP_TO_UINT(Node, Result, Chain, DAG)) {
1804     Results.push_back(Result);
1805     if (Node->isStrictFPOpcode())
1806       Results.push_back(Chain);
1807     return;
1808   }
1809 
1810   // Otherwise go ahead and unroll.
1811   if (Node->isStrictFPOpcode()) {
1812     UnrollStrictFPOp(Node, Results);
1813     return;
1814   }
1815 
1816   Results.push_back(DAG.UnrollVectorOp(Node));
1817 }
1818 
1819 void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
1820                                           SmallVectorImpl<SDValue> &Results) {
1821   bool IsStrict = Node->isStrictFPOpcode();
1822   unsigned OpNo = IsStrict ? 1 : 0;
1823   SDValue Src = Node->getOperand(OpNo);
1824   EVT SrcVT = Src.getValueType();
1825   EVT DstVT = Node->getValueType(0);
1826   SDLoc DL(Node);
1827 
1828   // Attempt to expand using TargetLowering.
1829   SDValue Result;
1830   SDValue Chain;
1831   if (TLI.expandUINT_TO_FP(Node, Result, Chain, DAG)) {
1832     Results.push_back(Result);
1833     if (IsStrict)
1834       Results.push_back(Chain);
1835     return;
1836   }
1837 
1838   // Make sure that the SINT_TO_FP and SRL instructions are available.
1839   if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) ==
1840                          TargetLowering::Expand) ||
1841        (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, SrcVT) ==
1842                         TargetLowering::Expand)) ||
1843       TLI.getOperationAction(ISD::SRL, SrcVT) == TargetLowering::Expand) {
1844     if (IsStrict) {
1845       UnrollStrictFPOp(Node, Results);
1846       return;
1847     }
1848 
1849     Results.push_back(DAG.UnrollVectorOp(Node));
1850     return;
1851   }
1852 
1853   unsigned BW = SrcVT.getScalarSizeInBits();
1854   assert((BW == 64 || BW == 32) &&
1855          "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1856 
1857   // If STRICT_/FMUL is not supported by the target (in case of f16) replace the
1858   // UINT_TO_FP with a larger float and round to the smaller type
1859   if ((!IsStrict && !TLI.isOperationLegalOrCustom(ISD::FMUL, DstVT)) ||
1860       (IsStrict && !TLI.isOperationLegalOrCustom(ISD::STRICT_FMUL, DstVT))) {
1861     EVT FPVT = BW == 32 ? MVT::f32 : MVT::f64;
1862     SDValue UIToFP;
1863     SDValue Result;
1864     SDValue TargetZero = DAG.getIntPtrConstant(0, DL, /*isTarget=*/true);
1865     EVT FloatVecVT = SrcVT.changeVectorElementType(FPVT);
1866     if (IsStrict) {
1867       UIToFP = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {FloatVecVT, MVT::Other},
1868                            {Node->getOperand(0), Src});
1869       Result = DAG.getNode(ISD::STRICT_FP_ROUND, DL, {DstVT, MVT::Other},
1870                            {Node->getOperand(0), UIToFP, TargetZero});
1871       Results.push_back(Result);
1872       Results.push_back(Result.getValue(1));
1873     } else {
1874       UIToFP = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVecVT, Src);
1875       Result = DAG.getNode(ISD::FP_ROUND, DL, DstVT, UIToFP, TargetZero);
1876       Results.push_back(Result);
1877     }
1878 
1879     return;
1880   }
1881 
1882   SDValue HalfWord = DAG.getConstant(BW / 2, DL, SrcVT);
1883 
1884   // Constants to clear the upper part of the word.
1885   // Notice that we can also use SHL+SHR, but using a constant is slightly
1886   // faster on x86.
1887   uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
1888   SDValue HalfWordMask = DAG.getConstant(HWMask, DL, SrcVT);
1889 
1890   // Two to the power of half-word-size.
1891   SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, DstVT);
1892 
1893   // Clear upper part of LO, lower HI
1894   SDValue HI = DAG.getNode(ISD::SRL, DL, SrcVT, Src, HalfWord);
1895   SDValue LO = DAG.getNode(ISD::AND, DL, SrcVT, Src, HalfWordMask);
1896 
1897   if (IsStrict) {
1898     // Convert hi and lo to floats
1899     // Convert the hi part back to the upper values
1900     // TODO: Can any fast-math-flags be set on these nodes?
1901     SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other},
1902                               {Node->getOperand(0), HI});
1903     fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {DstVT, MVT::Other},
1904                       {fHI.getValue(1), fHI, TWOHW});
1905     SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other},
1906                               {Node->getOperand(0), LO});
1907 
1908     SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1),
1909                              fLO.getValue(1));
1910 
1911     // Add the two halves
1912     SDValue Result =
1913         DAG.getNode(ISD::STRICT_FADD, DL, {DstVT, MVT::Other}, {TF, fHI, fLO});
1914 
1915     Results.push_back(Result);
1916     Results.push_back(Result.getValue(1));
1917     return;
1918   }
1919 
1920   // Convert hi and lo to floats
1921   // Convert the hi part back to the upper values
1922   // TODO: Can any fast-math-flags be set on these nodes?
1923   SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, HI);
1924   fHI = DAG.getNode(ISD::FMUL, DL, DstVT, fHI, TWOHW);
1925   SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, LO);
1926 
1927   // Add the two halves
1928   Results.push_back(DAG.getNode(ISD::FADD, DL, DstVT, fHI, fLO));
1929 }
1930 
1931 SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
1932   EVT VT = Node->getValueType(0);
1933   EVT IntVT = VT.changeVectorElementTypeToInteger();
1934 
1935   if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT))
1936     return SDValue();
1937 
1938   // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
1939   if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) &&
1940       !VT.isScalableVector())
1941     return SDValue();
1942 
1943   SDLoc DL(Node);
1944   SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1945   SDValue SignMask = DAG.getConstant(
1946       APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
1947   SDValue Xor = DAG.getNode(ISD::XOR, DL, IntVT, Cast, SignMask);
1948   return DAG.getNode(ISD::BITCAST, DL, VT, Xor);
1949 }
1950 
1951 SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
1952   EVT VT = Node->getValueType(0);
1953   EVT IntVT = VT.changeVectorElementTypeToInteger();
1954 
1955   if (!TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
1956     return SDValue();
1957 
1958   // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
1959   if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) &&
1960       !VT.isScalableVector())
1961     return SDValue();
1962 
1963   SDLoc DL(Node);
1964   SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1965   SDValue ClearSignMask = DAG.getConstant(
1966       APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
1967   SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Cast, ClearSignMask);
1968   return DAG.getNode(ISD::BITCAST, DL, VT, ClearedSign);
1969 }
1970 
1971 SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
1972   EVT VT = Node->getValueType(0);
1973   EVT IntVT = VT.changeVectorElementTypeToInteger();
1974 
1975   if (VT != Node->getOperand(1).getValueType() ||
1976       !TLI.isOperationLegalOrCustom(ISD::AND, IntVT) ||
1977       !TLI.isOperationLegalOrCustom(ISD::OR, IntVT))
1978     return SDValue();
1979 
1980   // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
1981   if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) &&
1982       !VT.isScalableVector())
1983     return SDValue();
1984 
1985   SDLoc DL(Node);
1986   SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1987   SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
1988 
1989   SDValue SignMask = DAG.getConstant(
1990       APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
1991   SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, Sign, SignMask);
1992 
1993   SDValue ClearSignMask = DAG.getConstant(
1994       APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
1995   SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Mag, ClearSignMask);
1996 
1997   SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit,
1998                                    SDNodeFlags::Disjoint);
1999 
2000   return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
2001 }
2002 
2003 void VectorLegalizer::ExpandFSUB(SDNode *Node,
2004                                  SmallVectorImpl<SDValue> &Results) {
2005   // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
2006   // we can defer this to operation legalization where it will be lowered as
2007   // a+(-b).
2008   EVT VT = Node->getValueType(0);
2009   if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
2010       TLI.isOperationLegalOrCustom(ISD::FADD, VT))
2011     return; // Defer to LegalizeDAG
2012 
2013   if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) {
2014     Results.push_back(Expanded);
2015     return;
2016   }
2017 
2018   SDValue Tmp = DAG.UnrollVectorOp(Node);
2019   Results.push_back(Tmp);
2020 }
2021 
2022 void VectorLegalizer::ExpandSETCC(SDNode *Node,
2023                                   SmallVectorImpl<SDValue> &Results) {
2024   bool NeedInvert = false;
2025   bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
2026   bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC ||
2027                   Node->getOpcode() == ISD::STRICT_FSETCCS;
2028   bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
2029   unsigned Offset = IsStrict ? 1 : 0;
2030 
2031   SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
2032   SDValue LHS = Node->getOperand(0 + Offset);
2033   SDValue RHS = Node->getOperand(1 + Offset);
2034   SDValue CC = Node->getOperand(2 + Offset);
2035 
2036   MVT OpVT = LHS.getSimpleValueType();
2037   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
2038 
2039   if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) {
2040     if (IsStrict) {
2041       UnrollStrictFPOp(Node, Results);
2042       return;
2043     }
2044     Results.push_back(UnrollVSETCC(Node));
2045     return;
2046   }
2047 
2048   SDValue Mask, EVL;
2049   if (IsVP) {
2050     Mask = Node->getOperand(3 + Offset);
2051     EVL = Node->getOperand(4 + Offset);
2052   }
2053 
2054   SDLoc dl(Node);
2055   bool Legalized =
2056       TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, RHS, CC, Mask,
2057                                 EVL, NeedInvert, dl, Chain, IsSignaling);
2058 
2059   if (Legalized) {
2060     // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
2061     // condition code, create a new SETCC node.
2062     if (CC.getNode()) {
2063       if (IsStrict) {
2064         LHS = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(),
2065                           {Chain, LHS, RHS, CC}, Node->getFlags());
2066         Chain = LHS.getValue(1);
2067       } else if (IsVP) {
2068         LHS = DAG.getNode(ISD::VP_SETCC, dl, Node->getValueType(0),
2069                           {LHS, RHS, CC, Mask, EVL}, Node->getFlags());
2070       } else {
2071         LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
2072                           Node->getFlags());
2073       }
2074     }
2075 
2076     // If we expanded the SETCC by inverting the condition code, then wrap
2077     // the existing SETCC in a NOT to restore the intended condition.
2078     if (NeedInvert) {
2079       if (!IsVP)
2080         LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
2081       else
2082         LHS = DAG.getVPLogicalNOT(dl, LHS, Mask, EVL, LHS->getValueType(0));
2083     }
2084   } else {
2085     assert(!IsStrict && "Don't know how to expand for strict nodes.");
2086 
2087     // Otherwise, SETCC for the given comparison type must be completely
2088     // illegal; expand it into a SELECT_CC.
2089     EVT VT = Node->getValueType(0);
2090     LHS =
2091         DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS,
2092                     DAG.getBoolConstant(true, dl, VT, LHS.getValueType()),
2093                     DAG.getBoolConstant(false, dl, VT, LHS.getValueType()), CC);
2094     LHS->setFlags(Node->getFlags());
2095   }
2096 
2097   Results.push_back(LHS);
2098   if (IsStrict)
2099     Results.push_back(Chain);
2100 }
2101 
2102 void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
2103                                      SmallVectorImpl<SDValue> &Results) {
2104   SDValue Result, Overflow;
2105   TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
2106   Results.push_back(Result);
2107   Results.push_back(Overflow);
2108 }
2109 
2110 void VectorLegalizer::ExpandSADDSUBO(SDNode *Node,
2111                                      SmallVectorImpl<SDValue> &Results) {
2112   SDValue Result, Overflow;
2113   TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
2114   Results.push_back(Result);
2115   Results.push_back(Overflow);
2116 }
2117 
2118 void VectorLegalizer::ExpandMULO(SDNode *Node,
2119                                  SmallVectorImpl<SDValue> &Results) {
2120   SDValue Result, Overflow;
2121   if (!TLI.expandMULO(Node, Result, Overflow, DAG))
2122     std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Node);
2123 
2124   Results.push_back(Result);
2125   Results.push_back(Overflow);
2126 }
2127 
2128 void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
2129                                           SmallVectorImpl<SDValue> &Results) {
2130   SDNode *N = Node;
2131   if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N),
2132           N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG))
2133     Results.push_back(Expanded);
2134 }
2135 
2136 void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
2137                                        SmallVectorImpl<SDValue> &Results) {
2138   if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) {
2139     ExpandUINT_TO_FLOAT(Node, Results);
2140     return;
2141   }
2142   if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) {
2143     ExpandFP_TO_UINT(Node, Results);
2144     return;
2145   }
2146 
2147   if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2148       Node->getOpcode() == ISD::STRICT_FSETCCS) {
2149     ExpandSETCC(Node, Results);
2150     return;
2151   }
2152 
2153   UnrollStrictFPOp(Node, Results);
2154 }
2155 
2156 void VectorLegalizer::ExpandREM(SDNode *Node,
2157                                 SmallVectorImpl<SDValue> &Results) {
2158   assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) &&
2159          "Expected REM node");
2160 
2161   SDValue Result;
2162   if (!TLI.expandREM(Node, Result, DAG))
2163     Result = DAG.UnrollVectorOp(Node);
2164   Results.push_back(Result);
2165 }
2166 
2167 // Try to expand libm nodes into vector math routine calls. Callers provide the
2168 // LibFunc equivalent of the passed in Node, which is used to lookup mappings
2169 // within TargetLibraryInfo. The only mappings considered are those where the
2170 // result and all operands are the same vector type. While predicated nodes are
2171 // not supported, we will emit calls to masked routines by passing in an all
2172 // true mask.
2173 bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
2174                                            SmallVectorImpl<SDValue> &Results) {
2175   // Chain must be propagated but currently strict fp operations are down
2176   // converted to their none strict counterpart.
2177   assert(!Node->isStrictFPOpcode() && "Unexpected strict fp operation!");
2178 
2179   const char *LCName = TLI.getLibcallName(LC);
2180   if (!LCName)
2181     return false;
2182   LLVM_DEBUG(dbgs() << "Looking for vector variant of " << LCName << "\n");
2183 
2184   EVT VT = Node->getValueType(0);
2185   ElementCount VL = VT.getVectorElementCount();
2186 
2187   // Lookup a vector function equivalent to the specified libcall. Prefer
2188   // unmasked variants but we will generate a mask if need be.
2189   const TargetLibraryInfo &TLibInfo = DAG.getLibInfo();
2190   const VecDesc *VD = TLibInfo.getVectorMappingInfo(LCName, VL, false);
2191   if (!VD)
2192     VD = TLibInfo.getVectorMappingInfo(LCName, VL, /*Masked=*/true);
2193   if (!VD)
2194     return false;
2195 
2196   LLVMContext *Ctx = DAG.getContext();
2197   Type *Ty = VT.getTypeForEVT(*Ctx);
2198   Type *ScalarTy = Ty->getScalarType();
2199 
2200   // Construct a scalar function type based on Node's operands.
2201   SmallVector<Type *, 8> ArgTys;
2202   for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
2203     assert(Node->getOperand(i).getValueType() == VT &&
2204            "Expected matching vector types!");
2205     ArgTys.push_back(ScalarTy);
2206   }
2207   FunctionType *ScalarFTy = FunctionType::get(ScalarTy, ArgTys, false);
2208 
2209   // Generate call information for the vector function.
2210   const std::string MangledName = VD->getVectorFunctionABIVariantString();
2211   auto OptVFInfo = VFABI::tryDemangleForVFABI(MangledName, ScalarFTy);
2212   if (!OptVFInfo)
2213     return false;
2214 
2215   LLVM_DEBUG(dbgs() << "Found vector variant " << VD->getVectorFnName()
2216                     << "\n");
2217 
2218   // Sanity check just in case OptVFInfo has unexpected parameters.
2219   if (OptVFInfo->Shape.Parameters.size() !=
2220       Node->getNumOperands() + VD->isMasked())
2221     return false;
2222 
2223   // Collect vector call operands.
2224 
2225   SDLoc DL(Node);
2226   TargetLowering::ArgListTy Args;
2227   TargetLowering::ArgListEntry Entry;
2228   Entry.IsSExt = false;
2229   Entry.IsZExt = false;
2230 
2231   unsigned OpNum = 0;
2232   for (auto &VFParam : OptVFInfo->Shape.Parameters) {
2233     if (VFParam.ParamKind == VFParamKind::GlobalPredicate) {
2234       EVT MaskVT = TLI.getSetCCResultType(DAG.getDataLayout(), *Ctx, VT);
2235       Entry.Node = DAG.getBoolConstant(true, DL, MaskVT, VT);
2236       Entry.Ty = MaskVT.getTypeForEVT(*Ctx);
2237       Args.push_back(Entry);
2238       continue;
2239     }
2240 
2241     // Only vector operands are supported.
2242     if (VFParam.ParamKind != VFParamKind::Vector)
2243       return false;
2244 
2245     Entry.Node = Node->getOperand(OpNum++);
2246     Entry.Ty = Ty;
2247     Args.push_back(Entry);
2248   }
2249 
2250   // Emit a call to the vector function.
2251   SDValue Callee = DAG.getExternalSymbol(VD->getVectorFnName().data(),
2252                                          TLI.getPointerTy(DAG.getDataLayout()));
2253   TargetLowering::CallLoweringInfo CLI(DAG);
2254   CLI.setDebugLoc(DL)
2255       .setChain(DAG.getEntryNode())
2256       .setLibCallee(CallingConv::C, Ty, Callee, std::move(Args));
2257 
2258   std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
2259   Results.push_back(CallResult.first);
2260   return true;
2261 }
2262 
2263 /// Try to expand the node to a vector libcall based on the result type.
2264 bool VectorLegalizer::tryExpandVecMathCall(
2265     SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
2266     RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
2267     RTLIB::Libcall Call_PPCF128, SmallVectorImpl<SDValue> &Results) {
2268   RTLIB::Libcall LC = RTLIB::getFPLibCall(
2269       Node->getValueType(0).getVectorElementType(), Call_F32, Call_F64,
2270       Call_F80, Call_F128, Call_PPCF128);
2271 
2272   if (LC == RTLIB::UNKNOWN_LIBCALL)
2273     return false;
2274 
2275   return tryExpandVecMathCall(Node, LC, Results);
2276 }
2277 
2278 void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
2279                                        SmallVectorImpl<SDValue> &Results) {
2280   EVT VT = Node->getValueType(0);
2281   EVT EltVT = VT.getVectorElementType();
2282   unsigned NumElems = VT.getVectorNumElements();
2283   unsigned NumOpers = Node->getNumOperands();
2284   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2285 
2286   EVT TmpEltVT = EltVT;
2287   if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2288       Node->getOpcode() == ISD::STRICT_FSETCCS)
2289     TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(),
2290                                       *DAG.getContext(), TmpEltVT);
2291 
2292   EVT ValueVTs[] = {TmpEltVT, MVT::Other};
2293   SDValue Chain = Node->getOperand(0);
2294   SDLoc dl(Node);
2295 
2296   SmallVector<SDValue, 32> OpValues;
2297   SmallVector<SDValue, 32> OpChains;
2298   for (unsigned i = 0; i < NumElems; ++i) {
2299     SmallVector<SDValue, 4> Opers;
2300     SDValue Idx = DAG.getVectorIdxConstant(i, dl);
2301 
2302     // The Chain is the first operand.
2303     Opers.push_back(Chain);
2304 
2305     // Now process the remaining operands.
2306     for (unsigned j = 1; j < NumOpers; ++j) {
2307       SDValue Oper = Node->getOperand(j);
2308       EVT OperVT = Oper.getValueType();
2309 
2310       if (OperVT.isVector())
2311         Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
2312                            OperVT.getVectorElementType(), Oper, Idx);
2313 
2314       Opers.push_back(Oper);
2315     }
2316 
2317     SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers);
2318     SDValue ScalarResult = ScalarOp.getValue(0);
2319     SDValue ScalarChain = ScalarOp.getValue(1);
2320 
2321     if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2322         Node->getOpcode() == ISD::STRICT_FSETCCS)
2323       ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,
2324                                    DAG.getAllOnesConstant(dl, EltVT),
2325                                    DAG.getConstant(0, dl, EltVT));
2326 
2327     OpValues.push_back(ScalarResult);
2328     OpChains.push_back(ScalarChain);
2329   }
2330 
2331   SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
2332   SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
2333 
2334   Results.push_back(Result);
2335   Results.push_back(NewChain);
2336 }
2337 
2338 SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
2339   EVT VT = Node->getValueType(0);
2340   unsigned NumElems = VT.getVectorNumElements();
2341   EVT EltVT = VT.getVectorElementType();
2342   SDValue LHS = Node->getOperand(0);
2343   SDValue RHS = Node->getOperand(1);
2344   SDValue CC = Node->getOperand(2);
2345   EVT TmpEltVT = LHS.getValueType().getVectorElementType();
2346   SDLoc dl(Node);
2347   SmallVector<SDValue, 8> Ops(NumElems);
2348   for (unsigned i = 0; i < NumElems; ++i) {
2349     SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
2350                                   DAG.getVectorIdxConstant(i, dl));
2351     SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
2352                                   DAG.getVectorIdxConstant(i, dl));
2353     // FIXME: We should use i1 setcc + boolext here, but it causes regressions.
2354     Ops[i] = DAG.getNode(ISD::SETCC, dl,
2355                          TLI.getSetCCResultType(DAG.getDataLayout(),
2356                                                 *DAG.getContext(), TmpEltVT),
2357                          LHSElem, RHSElem, CC);
2358     Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
2359                            DAG.getBoolConstant(true, dl, EltVT, VT),
2360                            DAG.getConstant(0, dl, EltVT));
2361   }
2362   return DAG.getBuildVector(VT, dl, Ops);
2363 }
2364 
2365 bool SelectionDAG::LegalizeVectors() {
2366   return VectorLegalizer(*this).Run();
2367 }
2368