xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (revision 2434137f690dabc35586ab45fc4c4ecc5b71184f)
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallBitVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/MemoryLocation.h"
34 #include "llvm/Analysis/TargetLibraryInfo.h"
35 #include "llvm/Analysis/VectorUtils.h"
36 #include "llvm/CodeGen/DAGCombine.h"
37 #include "llvm/CodeGen/ISDOpcodes.h"
38 #include "llvm/CodeGen/MachineFrameInfo.h"
39 #include "llvm/CodeGen/MachineFunction.h"
40 #include "llvm/CodeGen/MachineMemOperand.h"
41 #include "llvm/CodeGen/RuntimeLibcalls.h"
42 #include "llvm/CodeGen/SelectionDAG.h"
43 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
44 #include "llvm/CodeGen/SelectionDAGNodes.h"
45 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
46 #include "llvm/CodeGen/TargetLowering.h"
47 #include "llvm/CodeGen/TargetRegisterInfo.h"
48 #include "llvm/CodeGen/TargetSubtargetInfo.h"
49 #include "llvm/CodeGen/ValueTypes.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/Constant.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/Metadata.h"
57 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/CodeGen.h"
59 #include "llvm/Support/CommandLine.h"
60 #include "llvm/Support/Compiler.h"
61 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/ErrorHandling.h"
63 #include "llvm/Support/KnownBits.h"
64 #include "llvm/Support/MachineValueType.h"
65 #include "llvm/Support/MathExtras.h"
66 #include "llvm/Support/raw_ostream.h"
67 #include "llvm/Target/TargetMachine.h"
68 #include "llvm/Target/TargetOptions.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <functional>
73 #include <iterator>
74 #include <string>
75 #include <tuple>
76 #include <utility>
77 
78 using namespace llvm;
79 
80 #define DEBUG_TYPE "dagcombine"
81 
82 STATISTIC(NodesCombined   , "Number of dag nodes combined");
83 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
84 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
85 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
86 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
87 STATISTIC(SlicedLoads, "Number of load sliced");
88 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
89 
90 static cl::opt<bool>
91 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
92                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
93 
94 static cl::opt<bool>
95 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
96         cl::desc("Enable DAG combiner's use of TBAA"));
97 
98 #ifndef NDEBUG
99 static cl::opt<std::string>
100 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
101                    cl::desc("Only use DAG-combiner alias analysis in this"
102                             " function"));
103 #endif
104 
105 /// Hidden option to stress test load slicing, i.e., when this option
106 /// is enabled, load slicing bypasses most of its profitability guards.
107 static cl::opt<bool>
108 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
109                   cl::desc("Bypass the profitability model of load slicing"),
110                   cl::init(false));
111 
112 static cl::opt<bool>
113   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
114                     cl::desc("DAG combiner may split indexing from loads"));
115 
116 static cl::opt<bool>
117     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
118                        cl::desc("DAG combiner enable merging multiple stores "
119                                 "into a wider store"));
120 
121 static cl::opt<unsigned> TokenFactorInlineLimit(
122     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
123     cl::desc("Limit the number of operands to inline for Token Factors"));
124 
125 static cl::opt<unsigned> StoreMergeDependenceLimit(
126     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
127     cl::desc("Limit the number of times for the same StoreNode and RootNode "
128              "to bail out in store merging dependence check"));
129 
130 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
131     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
132     cl::desc("DAG combiner enable reducing the width of load/op/store "
133              "sequence"));
134 
135 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
136     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
137     cl::desc("DAG combiner enable load/<replace bytes>/store with "
138              "a narrower store"));
139 
140 namespace {
141 
142   class DAGCombiner {
143     SelectionDAG &DAG;
144     const TargetLowering &TLI;
145     const SelectionDAGTargetInfo *STI;
146     CombineLevel Level = BeforeLegalizeTypes;
147     CodeGenOpt::Level OptLevel;
148     bool LegalDAG = false;
149     bool LegalOperations = false;
150     bool LegalTypes = false;
151     bool ForCodeSize;
152     bool DisableGenericCombines;
153 
154     /// Worklist of all of the nodes that need to be simplified.
155     ///
156     /// This must behave as a stack -- new nodes to process are pushed onto the
157     /// back and when processing we pop off of the back.
158     ///
159     /// The worklist will not contain duplicates but may contain null entries
160     /// due to nodes being deleted from the underlying DAG.
161     SmallVector<SDNode *, 64> Worklist;
162 
163     /// Mapping from an SDNode to its position on the worklist.
164     ///
165     /// This is used to find and remove nodes from the worklist (by nulling
166     /// them) when they are deleted from the underlying DAG. It relies on
167     /// stable indices of nodes within the worklist.
168     DenseMap<SDNode *, unsigned> WorklistMap;
169     /// This records all nodes attempted to add to the worklist since we
170     /// considered a new worklist entry. As we keep do not add duplicate nodes
171     /// in the worklist, this is different from the tail of the worklist.
172     SmallSetVector<SDNode *, 32> PruningList;
173 
174     /// Set of nodes which have been combined (at least once).
175     ///
176     /// This is used to allow us to reliably add any operands of a DAG node
177     /// which have not yet been combined to the worklist.
178     SmallPtrSet<SDNode *, 32> CombinedNodes;
179 
180     /// Map from candidate StoreNode to the pair of RootNode and count.
181     /// The count is used to track how many times we have seen the StoreNode
182     /// with the same RootNode bail out in dependence check. If we have seen
183     /// the bail out for the same pair many times over a limit, we won't
184     /// consider the StoreNode with the same RootNode as store merging
185     /// candidate again.
186     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
187 
188     // AA - Used for DAG load/store alias analysis.
189     AliasAnalysis *AA;
190 
191     /// When an instruction is simplified, add all users of the instruction to
192     /// the work lists because they might get more simplified now.
193     void AddUsersToWorklist(SDNode *N) {
194       for (SDNode *Node : N->uses())
195         AddToWorklist(Node);
196     }
197 
198     /// Convenient shorthand to add a node and all of its user to the worklist.
199     void AddToWorklistWithUsers(SDNode *N) {
200       AddUsersToWorklist(N);
201       AddToWorklist(N);
202     }
203 
204     // Prune potentially dangling nodes. This is called after
205     // any visit to a node, but should also be called during a visit after any
206     // failed combine which may have created a DAG node.
207     void clearAddedDanglingWorklistEntries() {
208       // Check any nodes added to the worklist to see if they are prunable.
209       while (!PruningList.empty()) {
210         auto *N = PruningList.pop_back_val();
211         if (N->use_empty())
212           recursivelyDeleteUnusedNodes(N);
213       }
214     }
215 
216     SDNode *getNextWorklistEntry() {
217       // Before we do any work, remove nodes that are not in use.
218       clearAddedDanglingWorklistEntries();
219       SDNode *N = nullptr;
220       // The Worklist holds the SDNodes in order, but it may contain null
221       // entries.
222       while (!N && !Worklist.empty()) {
223         N = Worklist.pop_back_val();
224       }
225 
226       if (N) {
227         bool GoodWorklistEntry = WorklistMap.erase(N);
228         (void)GoodWorklistEntry;
229         assert(GoodWorklistEntry &&
230                "Found a worklist entry without a corresponding map entry!");
231       }
232       return N;
233     }
234 
235     /// Call the node-specific routine that folds each particular type of node.
236     SDValue visit(SDNode *N);
237 
238   public:
239     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
240         : DAG(D), TLI(D.getTargetLoweringInfo()),
241           STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
242       ForCodeSize = DAG.shouldOptForSize();
243       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
244 
245       MaximumLegalStoreInBits = 0;
246       // We use the minimum store size here, since that's all we can guarantee
247       // for the scalable vector types.
248       for (MVT VT : MVT::all_valuetypes())
249         if (EVT(VT).isSimple() && VT != MVT::Other &&
250             TLI.isTypeLegal(EVT(VT)) &&
251             VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
252           MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
253     }
254 
255     void ConsiderForPruning(SDNode *N) {
256       // Mark this for potential pruning.
257       PruningList.insert(N);
258     }
259 
260     /// Add to the worklist making sure its instance is at the back (next to be
261     /// processed.)
262     void AddToWorklist(SDNode *N) {
263       assert(N->getOpcode() != ISD::DELETED_NODE &&
264              "Deleted Node added to Worklist");
265 
266       // Skip handle nodes as they can't usefully be combined and confuse the
267       // zero-use deletion strategy.
268       if (N->getOpcode() == ISD::HANDLENODE)
269         return;
270 
271       ConsiderForPruning(N);
272 
273       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
274         Worklist.push_back(N);
275     }
276 
277     /// Remove all instances of N from the worklist.
278     void removeFromWorklist(SDNode *N) {
279       CombinedNodes.erase(N);
280       PruningList.remove(N);
281       StoreRootCountMap.erase(N);
282 
283       auto It = WorklistMap.find(N);
284       if (It == WorklistMap.end())
285         return; // Not in the worklist.
286 
287       // Null out the entry rather than erasing it to avoid a linear operation.
288       Worklist[It->second] = nullptr;
289       WorklistMap.erase(It);
290     }
291 
292     void deleteAndRecombine(SDNode *N);
293     bool recursivelyDeleteUnusedNodes(SDNode *N);
294 
295     /// Replaces all uses of the results of one DAG node with new values.
296     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
297                       bool AddTo = true);
298 
299     /// Replaces all uses of the results of one DAG node with new values.
300     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
301       return CombineTo(N, &Res, 1, AddTo);
302     }
303 
304     /// Replaces all uses of the results of one DAG node with new values.
305     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
306                       bool AddTo = true) {
307       SDValue To[] = { Res0, Res1 };
308       return CombineTo(N, To, 2, AddTo);
309     }
310 
311     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
312 
313   private:
314     unsigned MaximumLegalStoreInBits;
315 
316     /// Check the specified integer node value to see if it can be simplified or
317     /// if things it uses can be simplified by bit propagation.
318     /// If so, return true.
319     bool SimplifyDemandedBits(SDValue Op) {
320       unsigned BitWidth = Op.getScalarValueSizeInBits();
321       APInt DemandedBits = APInt::getAllOnes(BitWidth);
322       return SimplifyDemandedBits(Op, DemandedBits);
323     }
324 
325     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
326       TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
327       KnownBits Known;
328       if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
329         return false;
330 
331       // Revisit the node.
332       AddToWorklist(Op.getNode());
333 
334       CommitTargetLoweringOpt(TLO);
335       return true;
336     }
337 
338     /// Check the specified vector node value to see if it can be simplified or
339     /// if things it uses can be simplified as it only uses some of the
340     /// elements. If so, return true.
341     bool SimplifyDemandedVectorElts(SDValue Op) {
342       // TODO: For now just pretend it cannot be simplified.
343       if (Op.getValueType().isScalableVector())
344         return false;
345 
346       unsigned NumElts = Op.getValueType().getVectorNumElements();
347       APInt DemandedElts = APInt::getAllOnes(NumElts);
348       return SimplifyDemandedVectorElts(Op, DemandedElts);
349     }
350 
351     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
352                               const APInt &DemandedElts,
353                               bool AssumeSingleUse = false);
354     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
355                                     bool AssumeSingleUse = false);
356 
357     bool CombineToPreIndexedLoadStore(SDNode *N);
358     bool CombineToPostIndexedLoadStore(SDNode *N);
359     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
360     bool SliceUpLoad(SDNode *N);
361 
362     // Scalars have size 0 to distinguish from singleton vectors.
363     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
364     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
365     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
366 
367     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
368     ///   load.
369     ///
370     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
371     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
372     /// \param EltNo index of the vector element to load.
373     /// \param OriginalLoad load that EVE came from to be replaced.
374     /// \returns EVE on success SDValue() on failure.
375     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
376                                          SDValue EltNo,
377                                          LoadSDNode *OriginalLoad);
378     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
379     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
380     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
381     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
382     SDValue PromoteIntBinOp(SDValue Op);
383     SDValue PromoteIntShiftOp(SDValue Op);
384     SDValue PromoteExtend(SDValue Op);
385     bool PromoteLoad(SDValue Op);
386 
387     /// Call the node-specific routine that knows how to fold each
388     /// particular type of node. If that doesn't do anything, try the
389     /// target-specific DAG combines.
390     SDValue combine(SDNode *N);
391 
392     // Visitation implementation - Implement dag node combining for different
393     // node types.  The semantics are as follows:
394     // Return Value:
395     //   SDValue.getNode() == 0 - No change was made
396     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
397     //   otherwise              - N should be replaced by the returned Operand.
398     //
399     SDValue visitTokenFactor(SDNode *N);
400     SDValue visitMERGE_VALUES(SDNode *N);
401     SDValue visitADD(SDNode *N);
402     SDValue visitADDLike(SDNode *N);
403     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
404     SDValue visitSUB(SDNode *N);
405     SDValue visitADDSAT(SDNode *N);
406     SDValue visitSUBSAT(SDNode *N);
407     SDValue visitADDC(SDNode *N);
408     SDValue visitADDO(SDNode *N);
409     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
410     SDValue visitSUBC(SDNode *N);
411     SDValue visitSUBO(SDNode *N);
412     SDValue visitADDE(SDNode *N);
413     SDValue visitADDCARRY(SDNode *N);
414     SDValue visitSADDO_CARRY(SDNode *N);
415     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
416     SDValue visitSUBE(SDNode *N);
417     SDValue visitSUBCARRY(SDNode *N);
418     SDValue visitSSUBO_CARRY(SDNode *N);
419     SDValue visitMUL(SDNode *N);
420     SDValue visitMULFIX(SDNode *N);
421     SDValue useDivRem(SDNode *N);
422     SDValue visitSDIV(SDNode *N);
423     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
424     SDValue visitUDIV(SDNode *N);
425     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
426     SDValue visitREM(SDNode *N);
427     SDValue visitMULHU(SDNode *N);
428     SDValue visitMULHS(SDNode *N);
429     SDValue visitSMUL_LOHI(SDNode *N);
430     SDValue visitUMUL_LOHI(SDNode *N);
431     SDValue visitMULO(SDNode *N);
432     SDValue visitIMINMAX(SDNode *N);
433     SDValue visitAND(SDNode *N);
434     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
435     SDValue visitOR(SDNode *N);
436     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
437     SDValue visitXOR(SDNode *N);
438     SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
439     SDValue visitSHL(SDNode *N);
440     SDValue visitSRA(SDNode *N);
441     SDValue visitSRL(SDNode *N);
442     SDValue visitFunnelShift(SDNode *N);
443     SDValue visitSHLSAT(SDNode *N);
444     SDValue visitRotate(SDNode *N);
445     SDValue visitABS(SDNode *N);
446     SDValue visitBSWAP(SDNode *N);
447     SDValue visitBITREVERSE(SDNode *N);
448     SDValue visitCTLZ(SDNode *N);
449     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
450     SDValue visitCTTZ(SDNode *N);
451     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
452     SDValue visitCTPOP(SDNode *N);
453     SDValue visitSELECT(SDNode *N);
454     SDValue visitVSELECT(SDNode *N);
455     SDValue visitSELECT_CC(SDNode *N);
456     SDValue visitSETCC(SDNode *N);
457     SDValue visitSETCCCARRY(SDNode *N);
458     SDValue visitSIGN_EXTEND(SDNode *N);
459     SDValue visitZERO_EXTEND(SDNode *N);
460     SDValue visitANY_EXTEND(SDNode *N);
461     SDValue visitAssertExt(SDNode *N);
462     SDValue visitAssertAlign(SDNode *N);
463     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
464     SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
465     SDValue visitTRUNCATE(SDNode *N);
466     SDValue visitBITCAST(SDNode *N);
467     SDValue visitFREEZE(SDNode *N);
468     SDValue visitBUILD_PAIR(SDNode *N);
469     SDValue visitFADD(SDNode *N);
470     SDValue visitSTRICT_FADD(SDNode *N);
471     SDValue visitFSUB(SDNode *N);
472     SDValue visitFMUL(SDNode *N);
473     SDValue visitFMA(SDNode *N);
474     SDValue visitFDIV(SDNode *N);
475     SDValue visitFREM(SDNode *N);
476     SDValue visitFSQRT(SDNode *N);
477     SDValue visitFCOPYSIGN(SDNode *N);
478     SDValue visitFPOW(SDNode *N);
479     SDValue visitSINT_TO_FP(SDNode *N);
480     SDValue visitUINT_TO_FP(SDNode *N);
481     SDValue visitFP_TO_SINT(SDNode *N);
482     SDValue visitFP_TO_UINT(SDNode *N);
483     SDValue visitFP_ROUND(SDNode *N);
484     SDValue visitFP_EXTEND(SDNode *N);
485     SDValue visitFNEG(SDNode *N);
486     SDValue visitFABS(SDNode *N);
487     SDValue visitFCEIL(SDNode *N);
488     SDValue visitFTRUNC(SDNode *N);
489     SDValue visitFFLOOR(SDNode *N);
490     SDValue visitFMinMax(SDNode *N);
491     SDValue visitBRCOND(SDNode *N);
492     SDValue visitBR_CC(SDNode *N);
493     SDValue visitLOAD(SDNode *N);
494 
495     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
496     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
497 
498     SDValue visitSTORE(SDNode *N);
499     SDValue visitLIFETIME_END(SDNode *N);
500     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
501     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
502     SDValue visitBUILD_VECTOR(SDNode *N);
503     SDValue visitCONCAT_VECTORS(SDNode *N);
504     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
505     SDValue visitVECTOR_SHUFFLE(SDNode *N);
506     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
507     SDValue visitINSERT_SUBVECTOR(SDNode *N);
508     SDValue visitMLOAD(SDNode *N);
509     SDValue visitMSTORE(SDNode *N);
510     SDValue visitMGATHER(SDNode *N);
511     SDValue visitMSCATTER(SDNode *N);
512     SDValue visitFP_TO_FP16(SDNode *N);
513     SDValue visitFP16_TO_FP(SDNode *N);
514     SDValue visitVECREDUCE(SDNode *N);
515     SDValue visitVPOp(SDNode *N);
516 
517     SDValue visitFADDForFMACombine(SDNode *N);
518     SDValue visitFSUBForFMACombine(SDNode *N);
519     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
520 
521     SDValue XformToShuffleWithZero(SDNode *N);
522     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
523                                                     const SDLoc &DL, SDValue N0,
524                                                     SDValue N1);
525     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
526                                       SDValue N1);
527     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
528                            SDValue N1, SDNodeFlags Flags);
529 
530     SDValue visitShiftByConstant(SDNode *N);
531 
532     SDValue foldSelectOfConstants(SDNode *N);
533     SDValue foldVSelectOfConstants(SDNode *N);
534     SDValue foldBinOpIntoSelect(SDNode *BO);
535     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
536     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
537     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
538     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
539                              SDValue N2, SDValue N3, ISD::CondCode CC,
540                              bool NotExtCompare = false);
541     SDValue convertSelectOfFPConstantsToLoadOffset(
542         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
543         ISD::CondCode CC);
544     SDValue foldSignChangeInBitcast(SDNode *N);
545     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
546                                    SDValue N2, SDValue N3, ISD::CondCode CC);
547     SDValue foldSelectOfBinops(SDNode *N);
548     SDValue foldSextSetcc(SDNode *N);
549     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
550                               const SDLoc &DL);
551     SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
552     SDValue unfoldMaskedMerge(SDNode *N);
553     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
554     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
555                           const SDLoc &DL, bool foldBooleans);
556     SDValue rebuildSetCC(SDValue N);
557 
558     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
559                            SDValue &CC, bool MatchStrict = false) const;
560     bool isOneUseSetCC(SDValue N) const;
561 
562     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
563                                          unsigned HiOp);
564     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
565     SDValue CombineExtLoad(SDNode *N);
566     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
567     SDValue combineRepeatedFPDivisors(SDNode *N);
568     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
569     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
570     SDValue BuildSDIV(SDNode *N);
571     SDValue BuildSDIVPow2(SDNode *N);
572     SDValue BuildUDIV(SDNode *N);
573     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
574     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
575     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
576     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
577     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
578     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
579                                 SDNodeFlags Flags, bool Reciprocal);
580     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
581                                 SDNodeFlags Flags, bool Reciprocal);
582     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
583                                bool DemandHighBits = true);
584     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
585     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
586                               SDValue InnerPos, SDValue InnerNeg,
587                               unsigned PosOpcode, unsigned NegOpcode,
588                               const SDLoc &DL);
589     SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
590                               SDValue InnerPos, SDValue InnerNeg,
591                               unsigned PosOpcode, unsigned NegOpcode,
592                               const SDLoc &DL);
593     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
594     SDValue MatchLoadCombine(SDNode *N);
595     SDValue mergeTruncStores(StoreSDNode *N);
596     SDValue reduceLoadWidth(SDNode *N);
597     SDValue ReduceLoadOpStoreWidth(SDNode *N);
598     SDValue splitMergedValStore(StoreSDNode *ST);
599     SDValue TransformFPLoadStorePair(SDNode *N);
600     SDValue convertBuildVecZextToZext(SDNode *N);
601     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
602     SDValue reduceBuildVecTruncToBitCast(SDNode *N);
603     SDValue reduceBuildVecToShuffle(SDNode *N);
604     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
605                                   ArrayRef<int> VectorMask, SDValue VecIn1,
606                                   SDValue VecIn2, unsigned LeftIdx,
607                                   bool DidSplitVec);
608     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
609 
610     /// Walk up chain skipping non-aliasing memory nodes,
611     /// looking for aliasing nodes and adding them to the Aliases vector.
612     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
613                           SmallVectorImpl<SDValue> &Aliases);
614 
615     /// Return true if there is any possibility that the two addresses overlap.
616     bool mayAlias(SDNode *Op0, SDNode *Op1) const;
617 
618     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
619     /// chain (aliasing node.)
620     SDValue FindBetterChain(SDNode *N, SDValue Chain);
621 
622     /// Try to replace a store and any possibly adjacent stores on
623     /// consecutive chains with better chains. Return true only if St is
624     /// replaced.
625     ///
626     /// Notice that other chains may still be replaced even if the function
627     /// returns false.
628     bool findBetterNeighborChains(StoreSDNode *St);
629 
630     // Helper for findBetterNeighborChains. Walk up store chain add additional
631     // chained stores that do not overlap and can be parallelized.
632     bool parallelizeChainedStores(StoreSDNode *St);
633 
634     /// Holds a pointer to an LSBaseSDNode as well as information on where it
635     /// is located in a sequence of memory operations connected by a chain.
636     struct MemOpLink {
637       // Ptr to the mem node.
638       LSBaseSDNode *MemNode;
639 
640       // Offset from the base ptr.
641       int64_t OffsetFromBase;
642 
643       MemOpLink(LSBaseSDNode *N, int64_t Offset)
644           : MemNode(N), OffsetFromBase(Offset) {}
645     };
646 
647     // Classify the origin of a stored value.
648     enum class StoreSource { Unknown, Constant, Extract, Load };
649     StoreSource getStoreSource(SDValue StoreVal) {
650       switch (StoreVal.getOpcode()) {
651       case ISD::Constant:
652       case ISD::ConstantFP:
653         return StoreSource::Constant;
654       case ISD::EXTRACT_VECTOR_ELT:
655       case ISD::EXTRACT_SUBVECTOR:
656         return StoreSource::Extract;
657       case ISD::LOAD:
658         return StoreSource::Load;
659       default:
660         return StoreSource::Unknown;
661       }
662     }
663 
664     /// This is a helper function for visitMUL to check the profitability
665     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
666     /// MulNode is the original multiply, AddNode is (add x, c1),
667     /// and ConstNode is c2.
668     bool isMulAddWithConstProfitable(SDNode *MulNode,
669                                      SDValue &AddNode,
670                                      SDValue &ConstNode);
671 
672     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
673     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
674     /// the type of the loaded value to be extended.
675     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
676                           EVT LoadResultTy, EVT &ExtVT);
677 
678     /// Helper function to calculate whether the given Load/Store can have its
679     /// width reduced to ExtVT.
680     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
681                            EVT &MemVT, unsigned ShAmt = 0);
682 
683     /// Used by BackwardsPropagateMask to find suitable loads.
684     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
685                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
686                            ConstantSDNode *Mask, SDNode *&NodeToMask);
687     /// Attempt to propagate a given AND node back to load leaves so that they
688     /// can be combined into narrow loads.
689     bool BackwardsPropagateMask(SDNode *N);
690 
691     /// Helper function for mergeConsecutiveStores which merges the component
692     /// store chains.
693     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
694                                 unsigned NumStores);
695 
696     /// This is a helper function for mergeConsecutiveStores. When the source
697     /// elements of the consecutive stores are all constants or all extracted
698     /// vector elements, try to merge them into one larger store introducing
699     /// bitcasts if necessary.  \return True if a merged store was created.
700     bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
701                                          EVT MemVT, unsigned NumStores,
702                                          bool IsConstantSrc, bool UseVector,
703                                          bool UseTrunc);
704 
705     /// This is a helper function for mergeConsecutiveStores. Stores that
706     /// potentially may be merged with St are placed in StoreNodes. RootNode is
707     /// a chain predecessor to all store candidates.
708     void getStoreMergeCandidates(StoreSDNode *St,
709                                  SmallVectorImpl<MemOpLink> &StoreNodes,
710                                  SDNode *&Root);
711 
712     /// Helper function for mergeConsecutiveStores. Checks if candidate stores
713     /// have indirect dependency through their operands. RootNode is the
714     /// predecessor to all stores calculated by getStoreMergeCandidates and is
715     /// used to prune the dependency check. \return True if safe to merge.
716     bool checkMergeStoreCandidatesForDependencies(
717         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
718         SDNode *RootNode);
719 
720     /// This is a helper function for mergeConsecutiveStores. Given a list of
721     /// store candidates, find the first N that are consecutive in memory.
722     /// Returns 0 if there are not at least 2 consecutive stores to try merging.
723     unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
724                                   int64_t ElementSizeBytes) const;
725 
726     /// This is a helper function for mergeConsecutiveStores. It is used for
727     /// store chains that are composed entirely of constant values.
728     bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
729                                   unsigned NumConsecutiveStores,
730                                   EVT MemVT, SDNode *Root, bool AllowVectors);
731 
732     /// This is a helper function for mergeConsecutiveStores. It is used for
733     /// store chains that are composed entirely of extracted vector elements.
734     /// When extracting multiple vector elements, try to store them in one
735     /// vector store rather than a sequence of scalar stores.
736     bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
737                                  unsigned NumConsecutiveStores, EVT MemVT,
738                                  SDNode *Root);
739 
740     /// This is a helper function for mergeConsecutiveStores. It is used for
741     /// store chains that are composed entirely of loaded values.
742     bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
743                               unsigned NumConsecutiveStores, EVT MemVT,
744                               SDNode *Root, bool AllowVectors,
745                               bool IsNonTemporalStore, bool IsNonTemporalLoad);
746 
747     /// Merge consecutive store operations into a wide store.
748     /// This optimization uses wide integers or vectors when possible.
749     /// \return true if stores were merged.
750     bool mergeConsecutiveStores(StoreSDNode *St);
751 
752     /// Try to transform a truncation where C is a constant:
753     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
754     ///
755     /// \p N needs to be a truncation and its first operand an AND. Other
756     /// requirements are checked by the function (e.g. that trunc is
757     /// single-use) and if missed an empty SDValue is returned.
758     SDValue distributeTruncateThroughAnd(SDNode *N);
759 
760     /// Helper function to determine whether the target supports operation
761     /// given by \p Opcode for type \p VT, that is, whether the operation
762     /// is legal or custom before legalizing operations, and whether is
763     /// legal (but not custom) after legalization.
764     bool hasOperation(unsigned Opcode, EVT VT) {
765       return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
766     }
767 
768   public:
769     /// Runs the dag combiner on all nodes in the work list
770     void Run(CombineLevel AtLevel);
771 
772     SelectionDAG &getDAG() const { return DAG; }
773 
774     /// Returns a type large enough to hold any valid shift amount - before type
775     /// legalization these can be huge.
776     EVT getShiftAmountTy(EVT LHSTy) {
777       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
778       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
779     }
780 
781     /// This method returns true if we are running before type legalization or
782     /// if the specified VT is legal.
783     bool isTypeLegal(const EVT &VT) {
784       if (!LegalTypes) return true;
785       return TLI.isTypeLegal(VT);
786     }
787 
788     /// Convenience wrapper around TargetLowering::getSetCCResultType
789     EVT getSetCCResultType(EVT VT) const {
790       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
791     }
792 
793     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
794                          SDValue OrigLoad, SDValue ExtLoad,
795                          ISD::NodeType ExtType);
796   };
797 
798 /// This class is a DAGUpdateListener that removes any deleted
799 /// nodes from the worklist.
800 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
801   DAGCombiner &DC;
802 
803 public:
804   explicit WorklistRemover(DAGCombiner &dc)
805     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
806 
807   void NodeDeleted(SDNode *N, SDNode *E) override {
808     DC.removeFromWorklist(N);
809   }
810 };
811 
812 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
813   DAGCombiner &DC;
814 
815 public:
816   explicit WorklistInserter(DAGCombiner &dc)
817       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
818 
819   // FIXME: Ideally we could add N to the worklist, but this causes exponential
820   //        compile time costs in large DAGs, e.g. Halide.
821   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
822 };
823 
824 } // end anonymous namespace
825 
826 //===----------------------------------------------------------------------===//
827 //  TargetLowering::DAGCombinerInfo implementation
828 //===----------------------------------------------------------------------===//
829 
830 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
831   ((DAGCombiner*)DC)->AddToWorklist(N);
832 }
833 
834 SDValue TargetLowering::DAGCombinerInfo::
835 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
836   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
837 }
838 
839 SDValue TargetLowering::DAGCombinerInfo::
840 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
841   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
842 }
843 
844 SDValue TargetLowering::DAGCombinerInfo::
845 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
846   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
847 }
848 
849 bool TargetLowering::DAGCombinerInfo::
850 recursivelyDeleteUnusedNodes(SDNode *N) {
851   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
852 }
853 
854 void TargetLowering::DAGCombinerInfo::
855 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
856   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
857 }
858 
859 //===----------------------------------------------------------------------===//
860 // Helper Functions
861 //===----------------------------------------------------------------------===//
862 
863 void DAGCombiner::deleteAndRecombine(SDNode *N) {
864   removeFromWorklist(N);
865 
866   // If the operands of this node are only used by the node, they will now be
867   // dead. Make sure to re-visit them and recursively delete dead nodes.
868   for (const SDValue &Op : N->ops())
869     // For an operand generating multiple values, one of the values may
870     // become dead allowing further simplification (e.g. split index
871     // arithmetic from an indexed load).
872     if (Op->hasOneUse() || Op->getNumValues() > 1)
873       AddToWorklist(Op.getNode());
874 
875   DAG.DeleteNode(N);
876 }
877 
878 // APInts must be the same size for most operations, this helper
879 // function zero extends the shorter of the pair so that they match.
880 // We provide an Offset so that we can create bitwidths that won't overflow.
881 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
882   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
883   LHS = LHS.zextOrSelf(Bits);
884   RHS = RHS.zextOrSelf(Bits);
885 }
886 
887 // Return true if this node is a setcc, or is a select_cc
888 // that selects between the target values used for true and false, making it
889 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
890 // the appropriate nodes based on the type of node we are checking. This
891 // simplifies life a bit for the callers.
892 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
893                                     SDValue &CC, bool MatchStrict) const {
894   if (N.getOpcode() == ISD::SETCC) {
895     LHS = N.getOperand(0);
896     RHS = N.getOperand(1);
897     CC  = N.getOperand(2);
898     return true;
899   }
900 
901   if (MatchStrict &&
902       (N.getOpcode() == ISD::STRICT_FSETCC ||
903        N.getOpcode() == ISD::STRICT_FSETCCS)) {
904     LHS = N.getOperand(1);
905     RHS = N.getOperand(2);
906     CC  = N.getOperand(3);
907     return true;
908   }
909 
910   if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
911       !TLI.isConstFalseVal(N.getOperand(3)))
912     return false;
913 
914   if (TLI.getBooleanContents(N.getValueType()) ==
915       TargetLowering::UndefinedBooleanContent)
916     return false;
917 
918   LHS = N.getOperand(0);
919   RHS = N.getOperand(1);
920   CC  = N.getOperand(4);
921   return true;
922 }
923 
924 /// Return true if this is a SetCC-equivalent operation with only one use.
925 /// If this is true, it allows the users to invert the operation for free when
926 /// it is profitable to do so.
927 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
928   SDValue N0, N1, N2;
929   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
930     return true;
931   return false;
932 }
933 
934 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
935   if (!ScalarTy.isSimple())
936     return false;
937 
938   uint64_t MaskForTy = 0ULL;
939   switch (ScalarTy.getSimpleVT().SimpleTy) {
940   case MVT::i8:
941     MaskForTy = 0xFFULL;
942     break;
943   case MVT::i16:
944     MaskForTy = 0xFFFFULL;
945     break;
946   case MVT::i32:
947     MaskForTy = 0xFFFFFFFFULL;
948     break;
949   default:
950     return false;
951     break;
952   }
953 
954   APInt Val;
955   if (ISD::isConstantSplatVector(N, Val))
956     return Val.getLimitedValue() == MaskForTy;
957 
958   return false;
959 }
960 
961 // Determines if it is a constant integer or a splat/build vector of constant
962 // integers (and undefs).
963 // Do not permit build vector implicit truncation.
964 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
965   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
966     return !(Const->isOpaque() && NoOpaques);
967   if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
968     return false;
969   unsigned BitWidth = N.getScalarValueSizeInBits();
970   for (const SDValue &Op : N->op_values()) {
971     if (Op.isUndef())
972       continue;
973     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
974     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
975         (Const->isOpaque() && NoOpaques))
976       return false;
977   }
978   return true;
979 }
980 
981 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
982 // undef's.
983 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
984   if (V.getOpcode() != ISD::BUILD_VECTOR)
985     return false;
986   return isConstantOrConstantVector(V, NoOpaques) ||
987          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
988 }
989 
990 // Determine if this an indexed load with an opaque target constant index.
991 static bool canSplitIdx(LoadSDNode *LD) {
992   return MaySplitLoadIndex &&
993          (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
994           !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
995 }
996 
997 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
998                                                              const SDLoc &DL,
999                                                              SDValue N0,
1000                                                              SDValue N1) {
1001   // Currently this only tries to ensure we don't undo the GEP splits done by
1002   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1003   // we check if the following transformation would be problematic:
1004   // (load/store (add, (add, x, offset1), offset2)) ->
1005   // (load/store (add, x, offset1+offset2)).
1006 
1007   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1008     return false;
1009 
1010   if (N0.hasOneUse())
1011     return false;
1012 
1013   auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1014   auto *C2 = dyn_cast<ConstantSDNode>(N1);
1015   if (!C1 || !C2)
1016     return false;
1017 
1018   const APInt &C1APIntVal = C1->getAPIntValue();
1019   const APInt &C2APIntVal = C2->getAPIntValue();
1020   if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1021     return false;
1022 
1023   const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1024   if (CombinedValueIntVal.getBitWidth() > 64)
1025     return false;
1026   const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1027 
1028   for (SDNode *Node : N0->uses()) {
1029     auto LoadStore = dyn_cast<MemSDNode>(Node);
1030     if (LoadStore) {
1031       // Is x[offset2] already not a legal addressing mode? If so then
1032       // reassociating the constants breaks nothing (we test offset2 because
1033       // that's the one we hope to fold into the load or store).
1034       TargetLoweringBase::AddrMode AM;
1035       AM.HasBaseReg = true;
1036       AM.BaseOffs = C2APIntVal.getSExtValue();
1037       EVT VT = LoadStore->getMemoryVT();
1038       unsigned AS = LoadStore->getAddressSpace();
1039       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1040       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1041         continue;
1042 
1043       // Would x[offset1+offset2] still be a legal addressing mode?
1044       AM.BaseOffs = CombinedValue;
1045       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1046         return true;
1047     }
1048   }
1049 
1050   return false;
1051 }
1052 
1053 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1054 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1055 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1056                                                SDValue N0, SDValue N1) {
1057   EVT VT = N0.getValueType();
1058 
1059   if (N0.getOpcode() != Opc)
1060     return SDValue();
1061 
1062   SDValue N00 = N0.getOperand(0);
1063   SDValue N01 = N0.getOperand(1);
1064 
1065   if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
1066     if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
1067       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1068       if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1069         return DAG.getNode(Opc, DL, VT, N00, OpNode);
1070       return SDValue();
1071     }
1072     if (TLI.isReassocProfitable(DAG, N0, N1)) {
1073       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1074       //              iff (op x, c1) has one use
1075       if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1))
1076         return DAG.getNode(Opc, DL, VT, OpNode, N01);
1077       return SDValue();
1078     }
1079   }
1080   return SDValue();
1081 }
1082 
1083 // Try to reassociate commutative binops.
1084 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1085                                     SDValue N1, SDNodeFlags Flags) {
1086   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1087 
1088   // Floating-point reassociation is not allowed without loose FP math.
1089   if (N0.getValueType().isFloatingPoint() ||
1090       N1.getValueType().isFloatingPoint())
1091     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1092       return SDValue();
1093 
1094   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1095     return Combined;
1096   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1097     return Combined;
1098   return SDValue();
1099 }
1100 
1101 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1102                                bool AddTo) {
1103   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1104   ++NodesCombined;
1105   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1106              To[0].getNode()->dump(&DAG);
1107              dbgs() << " and " << NumTo - 1 << " other values\n");
1108   for (unsigned i = 0, e = NumTo; i != e; ++i)
1109     assert((!To[i].getNode() ||
1110             N->getValueType(i) == To[i].getValueType()) &&
1111            "Cannot combine value to value of different type!");
1112 
1113   WorklistRemover DeadNodes(*this);
1114   DAG.ReplaceAllUsesWith(N, To);
1115   if (AddTo) {
1116     // Push the new nodes and any users onto the worklist
1117     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1118       if (To[i].getNode()) {
1119         AddToWorklist(To[i].getNode());
1120         AddUsersToWorklist(To[i].getNode());
1121       }
1122     }
1123   }
1124 
1125   // Finally, if the node is now dead, remove it from the graph.  The node
1126   // may not be dead if the replacement process recursively simplified to
1127   // something else needing this node.
1128   if (N->use_empty())
1129     deleteAndRecombine(N);
1130   return SDValue(N, 0);
1131 }
1132 
1133 void DAGCombiner::
1134 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1135   // Replace the old value with the new one.
1136   ++NodesCombined;
1137   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1138              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1139              dbgs() << '\n');
1140 
1141   // Replace all uses.  If any nodes become isomorphic to other nodes and
1142   // are deleted, make sure to remove them from our worklist.
1143   WorklistRemover DeadNodes(*this);
1144   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1145 
1146   // Push the new node and any (possibly new) users onto the worklist.
1147   AddToWorklistWithUsers(TLO.New.getNode());
1148 
1149   // Finally, if the node is now dead, remove it from the graph.  The node
1150   // may not be dead if the replacement process recursively simplified to
1151   // something else needing this node.
1152   if (TLO.Old.getNode()->use_empty())
1153     deleteAndRecombine(TLO.Old.getNode());
1154 }
1155 
1156 /// Check the specified integer node value to see if it can be simplified or if
1157 /// things it uses can be simplified by bit propagation. If so, return true.
1158 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1159                                        const APInt &DemandedElts,
1160                                        bool AssumeSingleUse) {
1161   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1162   KnownBits Known;
1163   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1164                                 AssumeSingleUse))
1165     return false;
1166 
1167   // Revisit the node.
1168   AddToWorklist(Op.getNode());
1169 
1170   CommitTargetLoweringOpt(TLO);
1171   return true;
1172 }
1173 
1174 /// Check the specified vector node value to see if it can be simplified or
1175 /// if things it uses can be simplified as it only uses some of the elements.
1176 /// If so, return true.
1177 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1178                                              const APInt &DemandedElts,
1179                                              bool AssumeSingleUse) {
1180   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1181   APInt KnownUndef, KnownZero;
1182   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1183                                       TLO, 0, AssumeSingleUse))
1184     return false;
1185 
1186   // Revisit the node.
1187   AddToWorklist(Op.getNode());
1188 
1189   CommitTargetLoweringOpt(TLO);
1190   return true;
1191 }
1192 
1193 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1194   SDLoc DL(Load);
1195   EVT VT = Load->getValueType(0);
1196   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1197 
1198   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1199              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1200   WorklistRemover DeadNodes(*this);
1201   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1202   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1203   deleteAndRecombine(Load);
1204   AddToWorklist(Trunc.getNode());
1205 }
1206 
1207 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1208   Replace = false;
1209   SDLoc DL(Op);
1210   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1211     LoadSDNode *LD = cast<LoadSDNode>(Op);
1212     EVT MemVT = LD->getMemoryVT();
1213     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1214                                                       : LD->getExtensionType();
1215     Replace = true;
1216     return DAG.getExtLoad(ExtType, DL, PVT,
1217                           LD->getChain(), LD->getBasePtr(),
1218                           MemVT, LD->getMemOperand());
1219   }
1220 
1221   unsigned Opc = Op.getOpcode();
1222   switch (Opc) {
1223   default: break;
1224   case ISD::AssertSext:
1225     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1226       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1227     break;
1228   case ISD::AssertZext:
1229     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1230       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1231     break;
1232   case ISD::Constant: {
1233     unsigned ExtOpc =
1234       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1235     return DAG.getNode(ExtOpc, DL, PVT, Op);
1236   }
1237   }
1238 
1239   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1240     return SDValue();
1241   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1242 }
1243 
1244 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1245   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1246     return SDValue();
1247   EVT OldVT = Op.getValueType();
1248   SDLoc DL(Op);
1249   bool Replace = false;
1250   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1251   if (!NewOp.getNode())
1252     return SDValue();
1253   AddToWorklist(NewOp.getNode());
1254 
1255   if (Replace)
1256     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1257   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1258                      DAG.getValueType(OldVT));
1259 }
1260 
1261 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1262   EVT OldVT = Op.getValueType();
1263   SDLoc DL(Op);
1264   bool Replace = false;
1265   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1266   if (!NewOp.getNode())
1267     return SDValue();
1268   AddToWorklist(NewOp.getNode());
1269 
1270   if (Replace)
1271     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1272   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1273 }
1274 
1275 /// Promote the specified integer binary operation if the target indicates it is
1276 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1277 /// i32 since i16 instructions are longer.
1278 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1279   if (!LegalOperations)
1280     return SDValue();
1281 
1282   EVT VT = Op.getValueType();
1283   if (VT.isVector() || !VT.isInteger())
1284     return SDValue();
1285 
1286   // If operation type is 'undesirable', e.g. i16 on x86, consider
1287   // promoting it.
1288   unsigned Opc = Op.getOpcode();
1289   if (TLI.isTypeDesirableForOp(Opc, VT))
1290     return SDValue();
1291 
1292   EVT PVT = VT;
1293   // Consult target whether it is a good idea to promote this operation and
1294   // what's the right type to promote it to.
1295   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1296     assert(PVT != VT && "Don't know what type to promote to!");
1297 
1298     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1299 
1300     bool Replace0 = false;
1301     SDValue N0 = Op.getOperand(0);
1302     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1303 
1304     bool Replace1 = false;
1305     SDValue N1 = Op.getOperand(1);
1306     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1307     SDLoc DL(Op);
1308 
1309     SDValue RV =
1310         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1311 
1312     // We are always replacing N0/N1's use in N and only need additional
1313     // replacements if there are additional uses.
1314     // Note: We are checking uses of the *nodes* (SDNode) rather than values
1315     //       (SDValue) here because the node may reference multiple values
1316     //       (for example, the chain value of a load node).
1317     Replace0 &= !N0->hasOneUse();
1318     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1319 
1320     // Combine Op here so it is preserved past replacements.
1321     CombineTo(Op.getNode(), RV);
1322 
1323     // If operands have a use ordering, make sure we deal with
1324     // predecessor first.
1325     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1326       std::swap(N0, N1);
1327       std::swap(NN0, NN1);
1328     }
1329 
1330     if (Replace0) {
1331       AddToWorklist(NN0.getNode());
1332       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1333     }
1334     if (Replace1) {
1335       AddToWorklist(NN1.getNode());
1336       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1337     }
1338     return Op;
1339   }
1340   return SDValue();
1341 }
1342 
1343 /// Promote the specified integer shift operation if the target indicates it is
1344 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1345 /// i32 since i16 instructions are longer.
1346 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1347   if (!LegalOperations)
1348     return SDValue();
1349 
1350   EVT VT = Op.getValueType();
1351   if (VT.isVector() || !VT.isInteger())
1352     return SDValue();
1353 
1354   // If operation type is 'undesirable', e.g. i16 on x86, consider
1355   // promoting it.
1356   unsigned Opc = Op.getOpcode();
1357   if (TLI.isTypeDesirableForOp(Opc, VT))
1358     return SDValue();
1359 
1360   EVT PVT = VT;
1361   // Consult target whether it is a good idea to promote this operation and
1362   // what's the right type to promote it to.
1363   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1364     assert(PVT != VT && "Don't know what type to promote to!");
1365 
1366     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1367 
1368     bool Replace = false;
1369     SDValue N0 = Op.getOperand(0);
1370     SDValue N1 = Op.getOperand(1);
1371     if (Opc == ISD::SRA)
1372       N0 = SExtPromoteOperand(N0, PVT);
1373     else if (Opc == ISD::SRL)
1374       N0 = ZExtPromoteOperand(N0, PVT);
1375     else
1376       N0 = PromoteOperand(N0, PVT, Replace);
1377 
1378     if (!N0.getNode())
1379       return SDValue();
1380 
1381     SDLoc DL(Op);
1382     SDValue RV =
1383         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1384 
1385     if (Replace)
1386       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1387 
1388     // Deal with Op being deleted.
1389     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1390       return RV;
1391   }
1392   return SDValue();
1393 }
1394 
1395 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1396   if (!LegalOperations)
1397     return SDValue();
1398 
1399   EVT VT = Op.getValueType();
1400   if (VT.isVector() || !VT.isInteger())
1401     return SDValue();
1402 
1403   // If operation type is 'undesirable', e.g. i16 on x86, consider
1404   // promoting it.
1405   unsigned Opc = Op.getOpcode();
1406   if (TLI.isTypeDesirableForOp(Opc, VT))
1407     return SDValue();
1408 
1409   EVT PVT = VT;
1410   // Consult target whether it is a good idea to promote this operation and
1411   // what's the right type to promote it to.
1412   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1413     assert(PVT != VT && "Don't know what type to promote to!");
1414     // fold (aext (aext x)) -> (aext x)
1415     // fold (aext (zext x)) -> (zext x)
1416     // fold (aext (sext x)) -> (sext x)
1417     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1418     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1419   }
1420   return SDValue();
1421 }
1422 
1423 bool DAGCombiner::PromoteLoad(SDValue Op) {
1424   if (!LegalOperations)
1425     return false;
1426 
1427   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1428     return false;
1429 
1430   EVT VT = Op.getValueType();
1431   if (VT.isVector() || !VT.isInteger())
1432     return false;
1433 
1434   // If operation type is 'undesirable', e.g. i16 on x86, consider
1435   // promoting it.
1436   unsigned Opc = Op.getOpcode();
1437   if (TLI.isTypeDesirableForOp(Opc, VT))
1438     return false;
1439 
1440   EVT PVT = VT;
1441   // Consult target whether it is a good idea to promote this operation and
1442   // what's the right type to promote it to.
1443   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1444     assert(PVT != VT && "Don't know what type to promote to!");
1445 
1446     SDLoc DL(Op);
1447     SDNode *N = Op.getNode();
1448     LoadSDNode *LD = cast<LoadSDNode>(N);
1449     EVT MemVT = LD->getMemoryVT();
1450     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1451                                                       : LD->getExtensionType();
1452     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1453                                    LD->getChain(), LD->getBasePtr(),
1454                                    MemVT, LD->getMemOperand());
1455     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1456 
1457     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1458                Result.getNode()->dump(&DAG); dbgs() << '\n');
1459     WorklistRemover DeadNodes(*this);
1460     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1461     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1462     deleteAndRecombine(N);
1463     AddToWorklist(Result.getNode());
1464     return true;
1465   }
1466   return false;
1467 }
1468 
1469 /// Recursively delete a node which has no uses and any operands for
1470 /// which it is the only use.
1471 ///
1472 /// Note that this both deletes the nodes and removes them from the worklist.
1473 /// It also adds any nodes who have had a user deleted to the worklist as they
1474 /// may now have only one use and subject to other combines.
1475 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1476   if (!N->use_empty())
1477     return false;
1478 
1479   SmallSetVector<SDNode *, 16> Nodes;
1480   Nodes.insert(N);
1481   do {
1482     N = Nodes.pop_back_val();
1483     if (!N)
1484       continue;
1485 
1486     if (N->use_empty()) {
1487       for (const SDValue &ChildN : N->op_values())
1488         Nodes.insert(ChildN.getNode());
1489 
1490       removeFromWorklist(N);
1491       DAG.DeleteNode(N);
1492     } else {
1493       AddToWorklist(N);
1494     }
1495   } while (!Nodes.empty());
1496   return true;
1497 }
1498 
1499 //===----------------------------------------------------------------------===//
1500 //  Main DAG Combiner implementation
1501 //===----------------------------------------------------------------------===//
1502 
1503 void DAGCombiner::Run(CombineLevel AtLevel) {
1504   // set the instance variables, so that the various visit routines may use it.
1505   Level = AtLevel;
1506   LegalDAG = Level >= AfterLegalizeDAG;
1507   LegalOperations = Level >= AfterLegalizeVectorOps;
1508   LegalTypes = Level >= AfterLegalizeTypes;
1509 
1510   WorklistInserter AddNodes(*this);
1511 
1512   // Add all the dag nodes to the worklist.
1513   for (SDNode &Node : DAG.allnodes())
1514     AddToWorklist(&Node);
1515 
1516   // Create a dummy node (which is not added to allnodes), that adds a reference
1517   // to the root node, preventing it from being deleted, and tracking any
1518   // changes of the root.
1519   HandleSDNode Dummy(DAG.getRoot());
1520 
1521   // While we have a valid worklist entry node, try to combine it.
1522   while (SDNode *N = getNextWorklistEntry()) {
1523     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1524     // N is deleted from the DAG, since they too may now be dead or may have a
1525     // reduced number of uses, allowing other xforms.
1526     if (recursivelyDeleteUnusedNodes(N))
1527       continue;
1528 
1529     WorklistRemover DeadNodes(*this);
1530 
1531     // If this combine is running after legalizing the DAG, re-legalize any
1532     // nodes pulled off the worklist.
1533     if (LegalDAG) {
1534       SmallSetVector<SDNode *, 16> UpdatedNodes;
1535       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1536 
1537       for (SDNode *LN : UpdatedNodes)
1538         AddToWorklistWithUsers(LN);
1539 
1540       if (!NIsValid)
1541         continue;
1542     }
1543 
1544     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1545 
1546     // Add any operands of the new node which have not yet been combined to the
1547     // worklist as well. Because the worklist uniques things already, this
1548     // won't repeatedly process the same operand.
1549     CombinedNodes.insert(N);
1550     for (const SDValue &ChildN : N->op_values())
1551       if (!CombinedNodes.count(ChildN.getNode()))
1552         AddToWorklist(ChildN.getNode());
1553 
1554     SDValue RV = combine(N);
1555 
1556     if (!RV.getNode())
1557       continue;
1558 
1559     ++NodesCombined;
1560 
1561     // If we get back the same node we passed in, rather than a new node or
1562     // zero, we know that the node must have defined multiple values and
1563     // CombineTo was used.  Since CombineTo takes care of the worklist
1564     // mechanics for us, we have no work to do in this case.
1565     if (RV.getNode() == N)
1566       continue;
1567 
1568     assert(N->getOpcode() != ISD::DELETED_NODE &&
1569            RV.getOpcode() != ISD::DELETED_NODE &&
1570            "Node was deleted but visit returned new node!");
1571 
1572     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1573 
1574     if (N->getNumValues() == RV.getNode()->getNumValues())
1575       DAG.ReplaceAllUsesWith(N, RV.getNode());
1576     else {
1577       assert(N->getValueType(0) == RV.getValueType() &&
1578              N->getNumValues() == 1 && "Type mismatch");
1579       DAG.ReplaceAllUsesWith(N, &RV);
1580     }
1581 
1582     // Push the new node and any users onto the worklist.  Omit this if the
1583     // new node is the EntryToken (e.g. if a store managed to get optimized
1584     // out), because re-visiting the EntryToken and its users will not uncover
1585     // any additional opportunities, but there may be a large number of such
1586     // users, potentially causing compile time explosion.
1587     if (RV.getOpcode() != ISD::EntryToken) {
1588       AddToWorklist(RV.getNode());
1589       AddUsersToWorklist(RV.getNode());
1590     }
1591 
1592     // Finally, if the node is now dead, remove it from the graph.  The node
1593     // may not be dead if the replacement process recursively simplified to
1594     // something else needing this node. This will also take care of adding any
1595     // operands which have lost a user to the worklist.
1596     recursivelyDeleteUnusedNodes(N);
1597   }
1598 
1599   // If the root changed (e.g. it was a dead load, update the root).
1600   DAG.setRoot(Dummy.getValue());
1601   DAG.RemoveDeadNodes();
1602 }
1603 
1604 SDValue DAGCombiner::visit(SDNode *N) {
1605   switch (N->getOpcode()) {
1606   default: break;
1607   case ISD::TokenFactor:        return visitTokenFactor(N);
1608   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1609   case ISD::ADD:                return visitADD(N);
1610   case ISD::SUB:                return visitSUB(N);
1611   case ISD::SADDSAT:
1612   case ISD::UADDSAT:            return visitADDSAT(N);
1613   case ISD::SSUBSAT:
1614   case ISD::USUBSAT:            return visitSUBSAT(N);
1615   case ISD::ADDC:               return visitADDC(N);
1616   case ISD::SADDO:
1617   case ISD::UADDO:              return visitADDO(N);
1618   case ISD::SUBC:               return visitSUBC(N);
1619   case ISD::SSUBO:
1620   case ISD::USUBO:              return visitSUBO(N);
1621   case ISD::ADDE:               return visitADDE(N);
1622   case ISD::ADDCARRY:           return visitADDCARRY(N);
1623   case ISD::SADDO_CARRY:        return visitSADDO_CARRY(N);
1624   case ISD::SUBE:               return visitSUBE(N);
1625   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1626   case ISD::SSUBO_CARRY:        return visitSSUBO_CARRY(N);
1627   case ISD::SMULFIX:
1628   case ISD::SMULFIXSAT:
1629   case ISD::UMULFIX:
1630   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1631   case ISD::MUL:                return visitMUL(N);
1632   case ISD::SDIV:               return visitSDIV(N);
1633   case ISD::UDIV:               return visitUDIV(N);
1634   case ISD::SREM:
1635   case ISD::UREM:               return visitREM(N);
1636   case ISD::MULHU:              return visitMULHU(N);
1637   case ISD::MULHS:              return visitMULHS(N);
1638   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1639   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1640   case ISD::SMULO:
1641   case ISD::UMULO:              return visitMULO(N);
1642   case ISD::SMIN:
1643   case ISD::SMAX:
1644   case ISD::UMIN:
1645   case ISD::UMAX:               return visitIMINMAX(N);
1646   case ISD::AND:                return visitAND(N);
1647   case ISD::OR:                 return visitOR(N);
1648   case ISD::XOR:                return visitXOR(N);
1649   case ISD::SHL:                return visitSHL(N);
1650   case ISD::SRA:                return visitSRA(N);
1651   case ISD::SRL:                return visitSRL(N);
1652   case ISD::ROTR:
1653   case ISD::ROTL:               return visitRotate(N);
1654   case ISD::FSHL:
1655   case ISD::FSHR:               return visitFunnelShift(N);
1656   case ISD::SSHLSAT:
1657   case ISD::USHLSAT:            return visitSHLSAT(N);
1658   case ISD::ABS:                return visitABS(N);
1659   case ISD::BSWAP:              return visitBSWAP(N);
1660   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1661   case ISD::CTLZ:               return visitCTLZ(N);
1662   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1663   case ISD::CTTZ:               return visitCTTZ(N);
1664   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1665   case ISD::CTPOP:              return visitCTPOP(N);
1666   case ISD::SELECT:             return visitSELECT(N);
1667   case ISD::VSELECT:            return visitVSELECT(N);
1668   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1669   case ISD::SETCC:              return visitSETCC(N);
1670   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1671   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1672   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1673   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1674   case ISD::AssertSext:
1675   case ISD::AssertZext:         return visitAssertExt(N);
1676   case ISD::AssertAlign:        return visitAssertAlign(N);
1677   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1678   case ISD::SIGN_EXTEND_VECTOR_INREG:
1679   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1680   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1681   case ISD::BITCAST:            return visitBITCAST(N);
1682   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1683   case ISD::FADD:               return visitFADD(N);
1684   case ISD::STRICT_FADD:        return visitSTRICT_FADD(N);
1685   case ISD::FSUB:               return visitFSUB(N);
1686   case ISD::FMUL:               return visitFMUL(N);
1687   case ISD::FMA:                return visitFMA(N);
1688   case ISD::FDIV:               return visitFDIV(N);
1689   case ISD::FREM:               return visitFREM(N);
1690   case ISD::FSQRT:              return visitFSQRT(N);
1691   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1692   case ISD::FPOW:               return visitFPOW(N);
1693   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1694   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1695   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1696   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1697   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1698   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1699   case ISD::FNEG:               return visitFNEG(N);
1700   case ISD::FABS:               return visitFABS(N);
1701   case ISD::FFLOOR:             return visitFFLOOR(N);
1702   case ISD::FMINNUM:
1703   case ISD::FMAXNUM:
1704   case ISD::FMINIMUM:
1705   case ISD::FMAXIMUM:           return visitFMinMax(N);
1706   case ISD::FCEIL:              return visitFCEIL(N);
1707   case ISD::FTRUNC:             return visitFTRUNC(N);
1708   case ISD::BRCOND:             return visitBRCOND(N);
1709   case ISD::BR_CC:              return visitBR_CC(N);
1710   case ISD::LOAD:               return visitLOAD(N);
1711   case ISD::STORE:              return visitSTORE(N);
1712   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1713   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1714   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1715   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1716   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1717   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1718   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1719   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1720   case ISD::MGATHER:            return visitMGATHER(N);
1721   case ISD::MLOAD:              return visitMLOAD(N);
1722   case ISD::MSCATTER:           return visitMSCATTER(N);
1723   case ISD::MSTORE:             return visitMSTORE(N);
1724   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1725   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1726   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1727   case ISD::FREEZE:             return visitFREEZE(N);
1728   case ISD::VECREDUCE_FADD:
1729   case ISD::VECREDUCE_FMUL:
1730   case ISD::VECREDUCE_ADD:
1731   case ISD::VECREDUCE_MUL:
1732   case ISD::VECREDUCE_AND:
1733   case ISD::VECREDUCE_OR:
1734   case ISD::VECREDUCE_XOR:
1735   case ISD::VECREDUCE_SMAX:
1736   case ISD::VECREDUCE_SMIN:
1737   case ISD::VECREDUCE_UMAX:
1738   case ISD::VECREDUCE_UMIN:
1739   case ISD::VECREDUCE_FMAX:
1740   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1741 #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1742 #include "llvm/IR/VPIntrinsics.def"
1743     return visitVPOp(N);
1744   }
1745   return SDValue();
1746 }
1747 
1748 SDValue DAGCombiner::combine(SDNode *N) {
1749   SDValue RV;
1750   if (!DisableGenericCombines)
1751     RV = visit(N);
1752 
1753   // If nothing happened, try a target-specific DAG combine.
1754   if (!RV.getNode()) {
1755     assert(N->getOpcode() != ISD::DELETED_NODE &&
1756            "Node was deleted but visit returned NULL!");
1757 
1758     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1759         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1760 
1761       // Expose the DAG combiner to the target combiner impls.
1762       TargetLowering::DAGCombinerInfo
1763         DagCombineInfo(DAG, Level, false, this);
1764 
1765       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1766     }
1767   }
1768 
1769   // If nothing happened still, try promoting the operation.
1770   if (!RV.getNode()) {
1771     switch (N->getOpcode()) {
1772     default: break;
1773     case ISD::ADD:
1774     case ISD::SUB:
1775     case ISD::MUL:
1776     case ISD::AND:
1777     case ISD::OR:
1778     case ISD::XOR:
1779       RV = PromoteIntBinOp(SDValue(N, 0));
1780       break;
1781     case ISD::SHL:
1782     case ISD::SRA:
1783     case ISD::SRL:
1784       RV = PromoteIntShiftOp(SDValue(N, 0));
1785       break;
1786     case ISD::SIGN_EXTEND:
1787     case ISD::ZERO_EXTEND:
1788     case ISD::ANY_EXTEND:
1789       RV = PromoteExtend(SDValue(N, 0));
1790       break;
1791     case ISD::LOAD:
1792       if (PromoteLoad(SDValue(N, 0)))
1793         RV = SDValue(N, 0);
1794       break;
1795     }
1796   }
1797 
1798   // If N is a commutative binary node, try to eliminate it if the commuted
1799   // version is already present in the DAG.
1800   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1801       N->getNumValues() == 1) {
1802     SDValue N0 = N->getOperand(0);
1803     SDValue N1 = N->getOperand(1);
1804 
1805     // Constant operands are canonicalized to RHS.
1806     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1807       SDValue Ops[] = {N1, N0};
1808       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1809                                             N->getFlags());
1810       if (CSENode)
1811         return SDValue(CSENode, 0);
1812     }
1813   }
1814 
1815   return RV;
1816 }
1817 
1818 /// Given a node, return its input chain if it has one, otherwise return a null
1819 /// sd operand.
1820 static SDValue getInputChainForNode(SDNode *N) {
1821   if (unsigned NumOps = N->getNumOperands()) {
1822     if (N->getOperand(0).getValueType() == MVT::Other)
1823       return N->getOperand(0);
1824     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1825       return N->getOperand(NumOps-1);
1826     for (unsigned i = 1; i < NumOps-1; ++i)
1827       if (N->getOperand(i).getValueType() == MVT::Other)
1828         return N->getOperand(i);
1829   }
1830   return SDValue();
1831 }
1832 
1833 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1834   // If N has two operands, where one has an input chain equal to the other,
1835   // the 'other' chain is redundant.
1836   if (N->getNumOperands() == 2) {
1837     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1838       return N->getOperand(0);
1839     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1840       return N->getOperand(1);
1841   }
1842 
1843   // Don't simplify token factors if optnone.
1844   if (OptLevel == CodeGenOpt::None)
1845     return SDValue();
1846 
1847   // Don't simplify the token factor if the node itself has too many operands.
1848   if (N->getNumOperands() > TokenFactorInlineLimit)
1849     return SDValue();
1850 
1851   // If the sole user is a token factor, we should make sure we have a
1852   // chance to merge them together. This prevents TF chains from inhibiting
1853   // optimizations.
1854   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1855     AddToWorklist(*(N->use_begin()));
1856 
1857   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1858   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1859   SmallPtrSet<SDNode*, 16> SeenOps;
1860   bool Changed = false;             // If we should replace this token factor.
1861 
1862   // Start out with this token factor.
1863   TFs.push_back(N);
1864 
1865   // Iterate through token factors.  The TFs grows when new token factors are
1866   // encountered.
1867   for (unsigned i = 0; i < TFs.size(); ++i) {
1868     // Limit number of nodes to inline, to avoid quadratic compile times.
1869     // We have to add the outstanding Token Factors to Ops, otherwise we might
1870     // drop Ops from the resulting Token Factors.
1871     if (Ops.size() > TokenFactorInlineLimit) {
1872       for (unsigned j = i; j < TFs.size(); j++)
1873         Ops.emplace_back(TFs[j], 0);
1874       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1875       // combiner worklist later.
1876       TFs.resize(i);
1877       break;
1878     }
1879 
1880     SDNode *TF = TFs[i];
1881     // Check each of the operands.
1882     for (const SDValue &Op : TF->op_values()) {
1883       switch (Op.getOpcode()) {
1884       case ISD::EntryToken:
1885         // Entry tokens don't need to be added to the list. They are
1886         // redundant.
1887         Changed = true;
1888         break;
1889 
1890       case ISD::TokenFactor:
1891         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1892           // Queue up for processing.
1893           TFs.push_back(Op.getNode());
1894           Changed = true;
1895           break;
1896         }
1897         LLVM_FALLTHROUGH;
1898 
1899       default:
1900         // Only add if it isn't already in the list.
1901         if (SeenOps.insert(Op.getNode()).second)
1902           Ops.push_back(Op);
1903         else
1904           Changed = true;
1905         break;
1906       }
1907     }
1908   }
1909 
1910   // Re-visit inlined Token Factors, to clean them up in case they have been
1911   // removed. Skip the first Token Factor, as this is the current node.
1912   for (unsigned i = 1, e = TFs.size(); i < e; i++)
1913     AddToWorklist(TFs[i]);
1914 
1915   // Remove Nodes that are chained to another node in the list. Do so
1916   // by walking up chains breath-first stopping when we've seen
1917   // another operand. In general we must climb to the EntryNode, but we can exit
1918   // early if we find all remaining work is associated with just one operand as
1919   // no further pruning is possible.
1920 
1921   // List of nodes to search through and original Ops from which they originate.
1922   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1923   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1924   SmallPtrSet<SDNode *, 16> SeenChains;
1925   bool DidPruneOps = false;
1926 
1927   unsigned NumLeftToConsider = 0;
1928   for (const SDValue &Op : Ops) {
1929     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1930     OpWorkCount.push_back(1);
1931   }
1932 
1933   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1934     // If this is an Op, we can remove the op from the list. Remark any
1935     // search associated with it as from the current OpNumber.
1936     if (SeenOps.contains(Op)) {
1937       Changed = true;
1938       DidPruneOps = true;
1939       unsigned OrigOpNumber = 0;
1940       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1941         OrigOpNumber++;
1942       assert((OrigOpNumber != Ops.size()) &&
1943              "expected to find TokenFactor Operand");
1944       // Re-mark worklist from OrigOpNumber to OpNumber
1945       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1946         if (Worklist[i].second == OrigOpNumber) {
1947           Worklist[i].second = OpNumber;
1948         }
1949       }
1950       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1951       OpWorkCount[OrigOpNumber] = 0;
1952       NumLeftToConsider--;
1953     }
1954     // Add if it's a new chain
1955     if (SeenChains.insert(Op).second) {
1956       OpWorkCount[OpNumber]++;
1957       Worklist.push_back(std::make_pair(Op, OpNumber));
1958     }
1959   };
1960 
1961   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1962     // We need at least be consider at least 2 Ops to prune.
1963     if (NumLeftToConsider <= 1)
1964       break;
1965     auto CurNode = Worklist[i].first;
1966     auto CurOpNumber = Worklist[i].second;
1967     assert((OpWorkCount[CurOpNumber] > 0) &&
1968            "Node should not appear in worklist");
1969     switch (CurNode->getOpcode()) {
1970     case ISD::EntryToken:
1971       // Hitting EntryToken is the only way for the search to terminate without
1972       // hitting
1973       // another operand's search. Prevent us from marking this operand
1974       // considered.
1975       NumLeftToConsider++;
1976       break;
1977     case ISD::TokenFactor:
1978       for (const SDValue &Op : CurNode->op_values())
1979         AddToWorklist(i, Op.getNode(), CurOpNumber);
1980       break;
1981     case ISD::LIFETIME_START:
1982     case ISD::LIFETIME_END:
1983     case ISD::CopyFromReg:
1984     case ISD::CopyToReg:
1985       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1986       break;
1987     default:
1988       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1989         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1990       break;
1991     }
1992     OpWorkCount[CurOpNumber]--;
1993     if (OpWorkCount[CurOpNumber] == 0)
1994       NumLeftToConsider--;
1995   }
1996 
1997   // If we've changed things around then replace token factor.
1998   if (Changed) {
1999     SDValue Result;
2000     if (Ops.empty()) {
2001       // The entry token is the only possible outcome.
2002       Result = DAG.getEntryNode();
2003     } else {
2004       if (DidPruneOps) {
2005         SmallVector<SDValue, 8> PrunedOps;
2006         //
2007         for (const SDValue &Op : Ops) {
2008           if (SeenChains.count(Op.getNode()) == 0)
2009             PrunedOps.push_back(Op);
2010         }
2011         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2012       } else {
2013         Result = DAG.getTokenFactor(SDLoc(N), Ops);
2014       }
2015     }
2016     return Result;
2017   }
2018   return SDValue();
2019 }
2020 
2021 /// MERGE_VALUES can always be eliminated.
2022 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2023   WorklistRemover DeadNodes(*this);
2024   // Replacing results may cause a different MERGE_VALUES to suddenly
2025   // be CSE'd with N, and carry its uses with it. Iterate until no
2026   // uses remain, to ensure that the node can be safely deleted.
2027   // First add the users of this node to the work list so that they
2028   // can be tried again once they have new operands.
2029   AddUsersToWorklist(N);
2030   do {
2031     // Do as a single replacement to avoid rewalking use lists.
2032     SmallVector<SDValue, 8> Ops;
2033     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2034       Ops.push_back(N->getOperand(i));
2035     DAG.ReplaceAllUsesWith(N, Ops.data());
2036   } while (!N->use_empty());
2037   deleteAndRecombine(N);
2038   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2039 }
2040 
2041 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2042 /// ConstantSDNode pointer else nullptr.
2043 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2044   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2045   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2046 }
2047 
2048 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2049 /// and that N may be folded in the load / store addressing mode.
2050 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2051                                     const TargetLowering &TLI) {
2052   EVT VT;
2053   unsigned AS;
2054 
2055   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2056     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2057       return false;
2058     VT = LD->getMemoryVT();
2059     AS = LD->getAddressSpace();
2060   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2061     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2062       return false;
2063     VT = ST->getMemoryVT();
2064     AS = ST->getAddressSpace();
2065   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2066     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2067       return false;
2068     VT = LD->getMemoryVT();
2069     AS = LD->getAddressSpace();
2070   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2071     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2072       return false;
2073     VT = ST->getMemoryVT();
2074     AS = ST->getAddressSpace();
2075   } else
2076     return false;
2077 
2078   TargetLowering::AddrMode AM;
2079   if (N->getOpcode() == ISD::ADD) {
2080     AM.HasBaseReg = true;
2081     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2082     if (Offset)
2083       // [reg +/- imm]
2084       AM.BaseOffs = Offset->getSExtValue();
2085     else
2086       // [reg +/- reg]
2087       AM.Scale = 1;
2088   } else if (N->getOpcode() == ISD::SUB) {
2089     AM.HasBaseReg = true;
2090     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2091     if (Offset)
2092       // [reg +/- imm]
2093       AM.BaseOffs = -Offset->getSExtValue();
2094     else
2095       // [reg +/- reg]
2096       AM.Scale = 1;
2097   } else
2098     return false;
2099 
2100   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2101                                    VT.getTypeForEVT(*DAG.getContext()), AS);
2102 }
2103 
2104 /// This inverts a canonicalization in IR that replaces a variable select arm
2105 /// with an identity constant. Codegen improves if we re-use the variable
2106 /// operand rather than load a constant. This can also be converted into a
2107 /// masked vector operation if the target supports it.
2108 static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
2109                                               bool ShouldCommuteOperands) {
2110   // Match a select as operand 1. The identity constant that we are looking for
2111   // is only valid as operand 1 of a non-commutative binop.
2112   SDValue N0 = N->getOperand(0);
2113   SDValue N1 = N->getOperand(1);
2114   if (ShouldCommuteOperands)
2115     std::swap(N0, N1);
2116 
2117   // TODO: Should this apply to scalar select too?
2118   if (!N1.hasOneUse() || N1.getOpcode() != ISD::VSELECT)
2119     return SDValue();
2120 
2121   unsigned Opcode = N->getOpcode();
2122   EVT VT = N->getValueType(0);
2123   SDValue Cond = N1.getOperand(0);
2124   SDValue TVal = N1.getOperand(1);
2125   SDValue FVal = N1.getOperand(2);
2126 
2127   // TODO: The cases should match with IR's ConstantExpr::getBinOpIdentity().
2128   // TODO: Target-specific opcodes could be added. Ex: "isCommutativeBinOp()".
2129   // TODO: With fast-math (NSZ), allow the opposite-sign form of zero?
2130   auto isIdentityConstantForOpcode = [](unsigned Opcode, SDValue V) {
2131     if (ConstantFPSDNode *C = isConstOrConstSplatFP(V)) {
2132       switch (Opcode) {
2133       case ISD::FADD: // X + -0.0 --> X
2134         return C->isZero() && C->isNegative();
2135       case ISD::FSUB: // X - 0.0 --> X
2136         return C->isZero() && !C->isNegative();
2137       case ISD::FMUL: // X * 1.0 --> X
2138       case ISD::FDIV: // X / 1.0 --> X
2139         return C->isExactlyValue(1.0);
2140       }
2141     }
2142     return false;
2143   };
2144 
2145   // This transform increases uses of N0, so freeze it to be safe.
2146   // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2147   if (isIdentityConstantForOpcode(Opcode, TVal)) {
2148     SDValue F0 = DAG.getFreeze(N0);
2149     SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2150     return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2151   }
2152   // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2153   if (isIdentityConstantForOpcode(Opcode, FVal)) {
2154     SDValue F0 = DAG.getFreeze(N0);
2155     SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2156     return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2157   }
2158 
2159   return SDValue();
2160 }
2161 
2162 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2163   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2164          "Unexpected binary operator");
2165 
2166   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2167   auto BinOpcode = BO->getOpcode();
2168   EVT VT = BO->getValueType(0);
2169   if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2170     if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2171       return Sel;
2172 
2173     if (TLI.isCommutativeBinOp(BO->getOpcode()))
2174       if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2175         return Sel;
2176   }
2177 
2178   // Don't do this unless the old select is going away. We want to eliminate the
2179   // binary operator, not replace a binop with a select.
2180   // TODO: Handle ISD::SELECT_CC.
2181   unsigned SelOpNo = 0;
2182   SDValue Sel = BO->getOperand(0);
2183   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2184     SelOpNo = 1;
2185     Sel = BO->getOperand(1);
2186   }
2187 
2188   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2189     return SDValue();
2190 
2191   SDValue CT = Sel.getOperand(1);
2192   if (!isConstantOrConstantVector(CT, true) &&
2193       !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2194     return SDValue();
2195 
2196   SDValue CF = Sel.getOperand(2);
2197   if (!isConstantOrConstantVector(CF, true) &&
2198       !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2199     return SDValue();
2200 
2201   // Bail out if any constants are opaque because we can't constant fold those.
2202   // The exception is "and" and "or" with either 0 or -1 in which case we can
2203   // propagate non constant operands into select. I.e.:
2204   // and (select Cond, 0, -1), X --> select Cond, 0, X
2205   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2206   bool CanFoldNonConst =
2207       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2208       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2209       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2210 
2211   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2212   if (!CanFoldNonConst &&
2213       !isConstantOrConstantVector(CBO, true) &&
2214       !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2215     return SDValue();
2216 
2217   // We have a select-of-constants followed by a binary operator with a
2218   // constant. Eliminate the binop by pulling the constant math into the select.
2219   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2220   SDLoc DL(Sel);
2221   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2222                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2223   if (!CanFoldNonConst && !NewCT.isUndef() &&
2224       !isConstantOrConstantVector(NewCT, true) &&
2225       !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
2226     return SDValue();
2227 
2228   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2229                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2230   if (!CanFoldNonConst && !NewCF.isUndef() &&
2231       !isConstantOrConstantVector(NewCF, true) &&
2232       !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
2233     return SDValue();
2234 
2235   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2236   SelectOp->setFlags(BO->getFlags());
2237   return SelectOp;
2238 }
2239 
2240 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2241   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2242          "Expecting add or sub");
2243 
2244   // Match a constant operand and a zext operand for the math instruction:
2245   // add Z, C
2246   // sub C, Z
2247   bool IsAdd = N->getOpcode() == ISD::ADD;
2248   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2249   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2250   auto *CN = dyn_cast<ConstantSDNode>(C);
2251   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2252     return SDValue();
2253 
2254   // Match the zext operand as a setcc of a boolean.
2255   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2256       Z.getOperand(0).getValueType() != MVT::i1)
2257     return SDValue();
2258 
2259   // Match the compare as: setcc (X & 1), 0, eq.
2260   SDValue SetCC = Z.getOperand(0);
2261   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2262   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2263       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2264       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2265     return SDValue();
2266 
2267   // We are adding/subtracting a constant and an inverted low bit. Turn that
2268   // into a subtract/add of the low bit with incremented/decremented constant:
2269   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2270   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2271   EVT VT = C.getValueType();
2272   SDLoc DL(N);
2273   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2274   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2275                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2276   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2277 }
2278 
2279 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2280 /// a shift and add with a different constant.
2281 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2282   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2283          "Expecting add or sub");
2284 
2285   // We need a constant operand for the add/sub, and the other operand is a
2286   // logical shift right: add (srl), C or sub C, (srl).
2287   bool IsAdd = N->getOpcode() == ISD::ADD;
2288   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2289   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2290   if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2291       ShiftOp.getOpcode() != ISD::SRL)
2292     return SDValue();
2293 
2294   // The shift must be of a 'not' value.
2295   SDValue Not = ShiftOp.getOperand(0);
2296   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2297     return SDValue();
2298 
2299   // The shift must be moving the sign bit to the least-significant-bit.
2300   EVT VT = ShiftOp.getValueType();
2301   SDValue ShAmt = ShiftOp.getOperand(1);
2302   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2303   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2304     return SDValue();
2305 
2306   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2307   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2308   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2309   SDLoc DL(N);
2310   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2311   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2312   if (SDValue NewC =
2313           DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2314                                      {ConstantOp, DAG.getConstant(1, DL, VT)}))
2315     return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2316   return SDValue();
2317 }
2318 
2319 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2320 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2321 /// are no common bits set in the operands).
2322 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2323   SDValue N0 = N->getOperand(0);
2324   SDValue N1 = N->getOperand(1);
2325   EVT VT = N0.getValueType();
2326   SDLoc DL(N);
2327 
2328   // fold (add x, undef) -> undef
2329   if (N0.isUndef())
2330     return N0;
2331   if (N1.isUndef())
2332     return N1;
2333 
2334   // fold (add c1, c2) -> c1+c2
2335   if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2336     return C;
2337 
2338   // canonicalize constant to RHS
2339   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2340       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2341     return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2342 
2343   // fold vector ops
2344   if (VT.isVector()) {
2345     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2346       return FoldedVOp;
2347 
2348     // fold (add x, 0) -> x, vector edition
2349     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2350       return N0;
2351   }
2352 
2353   // fold (add x, 0) -> x
2354   if (isNullConstant(N1))
2355     return N0;
2356 
2357   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2358     // fold ((A-c1)+c2) -> (A+(c2-c1))
2359     if (N0.getOpcode() == ISD::SUB &&
2360         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2361       SDValue Sub =
2362           DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2363       assert(Sub && "Constant folding failed");
2364       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2365     }
2366 
2367     // fold ((c1-A)+c2) -> (c1+c2)-A
2368     if (N0.getOpcode() == ISD::SUB &&
2369         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2370       SDValue Add =
2371           DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2372       assert(Add && "Constant folding failed");
2373       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2374     }
2375 
2376     // add (sext i1 X), 1 -> zext (not i1 X)
2377     // We don't transform this pattern:
2378     //   add (zext i1 X), -1 -> sext (not i1 X)
2379     // because most (?) targets generate better code for the zext form.
2380     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2381         isOneOrOneSplat(N1)) {
2382       SDValue X = N0.getOperand(0);
2383       if ((!LegalOperations ||
2384            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2385             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2386           X.getScalarValueSizeInBits() == 1) {
2387         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2388         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2389       }
2390     }
2391 
2392     // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2393     // equivalent to (add x, c0).
2394     if (N0.getOpcode() == ISD::OR &&
2395         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2396         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2397       if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2398                                                     {N1, N0.getOperand(1)}))
2399         return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2400     }
2401   }
2402 
2403   if (SDValue NewSel = foldBinOpIntoSelect(N))
2404     return NewSel;
2405 
2406   // reassociate add
2407   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2408     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2409       return RADD;
2410 
2411     // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2412     // equivalent to (add x, c).
2413     auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2414       if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
2415           isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2416           DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2417         return DAG.getNode(ISD::ADD, DL, VT,
2418                            DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2419                            N0.getOperand(1));
2420       }
2421       return SDValue();
2422     };
2423     if (SDValue Add = ReassociateAddOr(N0, N1))
2424       return Add;
2425     if (SDValue Add = ReassociateAddOr(N1, N0))
2426       return Add;
2427   }
2428   // fold ((0-A) + B) -> B-A
2429   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2430     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2431 
2432   // fold (A + (0-B)) -> A-B
2433   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2434     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2435 
2436   // fold (A+(B-A)) -> B
2437   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2438     return N1.getOperand(0);
2439 
2440   // fold ((B-A)+A) -> B
2441   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2442     return N0.getOperand(0);
2443 
2444   // fold ((A-B)+(C-A)) -> (C-B)
2445   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2446       N0.getOperand(0) == N1.getOperand(1))
2447     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2448                        N0.getOperand(1));
2449 
2450   // fold ((A-B)+(B-C)) -> (A-C)
2451   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2452       N0.getOperand(1) == N1.getOperand(0))
2453     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2454                        N1.getOperand(1));
2455 
2456   // fold (A+(B-(A+C))) to (B-C)
2457   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2458       N0 == N1.getOperand(1).getOperand(0))
2459     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2460                        N1.getOperand(1).getOperand(1));
2461 
2462   // fold (A+(B-(C+A))) to (B-C)
2463   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2464       N0 == N1.getOperand(1).getOperand(1))
2465     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2466                        N1.getOperand(1).getOperand(0));
2467 
2468   // fold (A+((B-A)+or-C)) to (B+or-C)
2469   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2470       N1.getOperand(0).getOpcode() == ISD::SUB &&
2471       N0 == N1.getOperand(0).getOperand(1))
2472     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2473                        N1.getOperand(1));
2474 
2475   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2476   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2477     SDValue N00 = N0.getOperand(0);
2478     SDValue N01 = N0.getOperand(1);
2479     SDValue N10 = N1.getOperand(0);
2480     SDValue N11 = N1.getOperand(1);
2481 
2482     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2483       return DAG.getNode(ISD::SUB, DL, VT,
2484                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2485                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2486   }
2487 
2488   // fold (add (umax X, C), -C) --> (usubsat X, C)
2489   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2490     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2491       return (!Max && !Op) ||
2492              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2493     };
2494     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2495                                   /*AllowUndefs*/ true))
2496       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2497                          N0.getOperand(1));
2498   }
2499 
2500   if (SimplifyDemandedBits(SDValue(N, 0)))
2501     return SDValue(N, 0);
2502 
2503   if (isOneOrOneSplat(N1)) {
2504     // fold (add (xor a, -1), 1) -> (sub 0, a)
2505     if (isBitwiseNot(N0))
2506       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2507                          N0.getOperand(0));
2508 
2509     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2510     if (N0.getOpcode() == ISD::ADD) {
2511       SDValue A, Xor;
2512 
2513       if (isBitwiseNot(N0.getOperand(0))) {
2514         A = N0.getOperand(1);
2515         Xor = N0.getOperand(0);
2516       } else if (isBitwiseNot(N0.getOperand(1))) {
2517         A = N0.getOperand(0);
2518         Xor = N0.getOperand(1);
2519       }
2520 
2521       if (Xor)
2522         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2523     }
2524 
2525     // Look for:
2526     //   add (add x, y), 1
2527     // And if the target does not like this form then turn into:
2528     //   sub y, (xor x, -1)
2529     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2530         N0.getOpcode() == ISD::ADD) {
2531       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2532                                 DAG.getAllOnesConstant(DL, VT));
2533       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2534     }
2535   }
2536 
2537   // (x - y) + -1  ->  add (xor y, -1), x
2538   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2539       isAllOnesOrAllOnesSplat(N1)) {
2540     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2541     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2542   }
2543 
2544   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2545     return Combined;
2546 
2547   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2548     return Combined;
2549 
2550   return SDValue();
2551 }
2552 
2553 SDValue DAGCombiner::visitADD(SDNode *N) {
2554   SDValue N0 = N->getOperand(0);
2555   SDValue N1 = N->getOperand(1);
2556   EVT VT = N0.getValueType();
2557   SDLoc DL(N);
2558 
2559   if (SDValue Combined = visitADDLike(N))
2560     return Combined;
2561 
2562   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2563     return V;
2564 
2565   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2566     return V;
2567 
2568   // fold (a+b) -> (a|b) iff a and b share no bits.
2569   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2570       DAG.haveNoCommonBitsSet(N0, N1))
2571     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2572 
2573   // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2574   if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2575     const APInt &C0 = N0->getConstantOperandAPInt(0);
2576     const APInt &C1 = N1->getConstantOperandAPInt(0);
2577     return DAG.getVScale(DL, VT, C0 + C1);
2578   }
2579 
2580   // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2581   if ((N0.getOpcode() == ISD::ADD) &&
2582       (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2583       (N1.getOpcode() == ISD::VSCALE)) {
2584     const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2585     const APInt &VS1 = N1->getConstantOperandAPInt(0);
2586     SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2587     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2588   }
2589 
2590   // Fold (add step_vector(c1), step_vector(c2)  to step_vector(c1+c2))
2591   if (N0.getOpcode() == ISD::STEP_VECTOR &&
2592       N1.getOpcode() == ISD::STEP_VECTOR) {
2593     const APInt &C0 = N0->getConstantOperandAPInt(0);
2594     const APInt &C1 = N1->getConstantOperandAPInt(0);
2595     APInt NewStep = C0 + C1;
2596     return DAG.getStepVector(DL, VT, NewStep);
2597   }
2598 
2599   // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2600   if ((N0.getOpcode() == ISD::ADD) &&
2601       (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2602       (N1.getOpcode() == ISD::STEP_VECTOR)) {
2603     const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2604     const APInt &SV1 = N1->getConstantOperandAPInt(0);
2605     APInt NewStep = SV0 + SV1;
2606     SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2607     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2608   }
2609 
2610   return SDValue();
2611 }
2612 
2613 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2614   unsigned Opcode = N->getOpcode();
2615   SDValue N0 = N->getOperand(0);
2616   SDValue N1 = N->getOperand(1);
2617   EVT VT = N0.getValueType();
2618   SDLoc DL(N);
2619 
2620   // fold (add_sat x, undef) -> -1
2621   if (N0.isUndef() || N1.isUndef())
2622     return DAG.getAllOnesConstant(DL, VT);
2623 
2624   // fold (add_sat c1, c2) -> c3
2625   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2626     return C;
2627 
2628   // canonicalize constant to RHS
2629   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2630       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2631     return DAG.getNode(Opcode, DL, VT, N1, N0);
2632 
2633   // fold vector ops
2634   if (VT.isVector()) {
2635     // TODO SimplifyVBinOp
2636 
2637     // fold (add_sat x, 0) -> x, vector edition
2638     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2639       return N0;
2640   }
2641 
2642   // fold (add_sat x, 0) -> x
2643   if (isNullConstant(N1))
2644     return N0;
2645 
2646   // If it cannot overflow, transform into an add.
2647   if (Opcode == ISD::UADDSAT)
2648     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2649       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2650 
2651   return SDValue();
2652 }
2653 
2654 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2655   bool Masked = false;
2656 
2657   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2658   while (true) {
2659     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2660       V = V.getOperand(0);
2661       continue;
2662     }
2663 
2664     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2665       Masked = true;
2666       V = V.getOperand(0);
2667       continue;
2668     }
2669 
2670     break;
2671   }
2672 
2673   // If this is not a carry, return.
2674   if (V.getResNo() != 1)
2675     return SDValue();
2676 
2677   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2678       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2679     return SDValue();
2680 
2681   EVT VT = V.getNode()->getValueType(0);
2682   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2683     return SDValue();
2684 
2685   // If the result is masked, then no matter what kind of bool it is we can
2686   // return. If it isn't, then we need to make sure the bool type is either 0 or
2687   // 1 and not other values.
2688   if (Masked ||
2689       TLI.getBooleanContents(V.getValueType()) ==
2690           TargetLoweringBase::ZeroOrOneBooleanContent)
2691     return V;
2692 
2693   return SDValue();
2694 }
2695 
2696 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2697 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2698 /// the opcode and bypass the mask operation.
2699 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2700                                  SelectionDAG &DAG, const SDLoc &DL) {
2701   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2702     return SDValue();
2703 
2704   EVT VT = N0.getValueType();
2705   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2706     return SDValue();
2707 
2708   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2709   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2710   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2711 }
2712 
2713 /// Helper for doing combines based on N0 and N1 being added to each other.
2714 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2715                                           SDNode *LocReference) {
2716   EVT VT = N0.getValueType();
2717   SDLoc DL(LocReference);
2718 
2719   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2720   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2721       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2722     return DAG.getNode(ISD::SUB, DL, VT, N0,
2723                        DAG.getNode(ISD::SHL, DL, VT,
2724                                    N1.getOperand(0).getOperand(1),
2725                                    N1.getOperand(1)));
2726 
2727   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2728     return V;
2729 
2730   // Look for:
2731   //   add (add x, 1), y
2732   // And if the target does not like this form then turn into:
2733   //   sub y, (xor x, -1)
2734   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2735       N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2736     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2737                               DAG.getAllOnesConstant(DL, VT));
2738     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2739   }
2740 
2741   // Hoist one-use subtraction by non-opaque constant:
2742   //   (x - C) + y  ->  (x + y) - C
2743   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2744   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2745       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2746     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2747     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2748   }
2749   // Hoist one-use subtraction from non-opaque constant:
2750   //   (C - x) + y  ->  (y - x) + C
2751   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2752       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2753     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2754     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2755   }
2756 
2757   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2758   // rather than 'add 0/-1' (the zext should get folded).
2759   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2760   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2761       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2762       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2763     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2764     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2765   }
2766 
2767   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2768   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2769     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2770     if (TN->getVT() == MVT::i1) {
2771       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2772                                  DAG.getConstant(1, DL, VT));
2773       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2774     }
2775   }
2776 
2777   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2778   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2779       N1.getResNo() == 0)
2780     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2781                        N0, N1.getOperand(0), N1.getOperand(2));
2782 
2783   // (add X, Carry) -> (addcarry X, 0, Carry)
2784   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2785     if (SDValue Carry = getAsCarry(TLI, N1))
2786       return DAG.getNode(ISD::ADDCARRY, DL,
2787                          DAG.getVTList(VT, Carry.getValueType()), N0,
2788                          DAG.getConstant(0, DL, VT), Carry);
2789 
2790   return SDValue();
2791 }
2792 
2793 SDValue DAGCombiner::visitADDC(SDNode *N) {
2794   SDValue N0 = N->getOperand(0);
2795   SDValue N1 = N->getOperand(1);
2796   EVT VT = N0.getValueType();
2797   SDLoc DL(N);
2798 
2799   // If the flag result is dead, turn this into an ADD.
2800   if (!N->hasAnyUseOfValue(1))
2801     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2802                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2803 
2804   // canonicalize constant to RHS.
2805   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2806   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2807   if (N0C && !N1C)
2808     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2809 
2810   // fold (addc x, 0) -> x + no carry out
2811   if (isNullConstant(N1))
2812     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2813                                         DL, MVT::Glue));
2814 
2815   // If it cannot overflow, transform into an add.
2816   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2817     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2818                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2819 
2820   return SDValue();
2821 }
2822 
2823 /**
2824  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2825  * then the flip also occurs if computing the inverse is the same cost.
2826  * This function returns an empty SDValue in case it cannot flip the boolean
2827  * without increasing the cost of the computation. If you want to flip a boolean
2828  * no matter what, use DAG.getLogicalNOT.
2829  */
2830 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2831                                   const TargetLowering &TLI,
2832                                   bool Force) {
2833   if (Force && isa<ConstantSDNode>(V))
2834     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2835 
2836   if (V.getOpcode() != ISD::XOR)
2837     return SDValue();
2838 
2839   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2840   if (!Const)
2841     return SDValue();
2842 
2843   EVT VT = V.getValueType();
2844 
2845   bool IsFlip = false;
2846   switch(TLI.getBooleanContents(VT)) {
2847     case TargetLowering::ZeroOrOneBooleanContent:
2848       IsFlip = Const->isOne();
2849       break;
2850     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2851       IsFlip = Const->isAllOnes();
2852       break;
2853     case TargetLowering::UndefinedBooleanContent:
2854       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2855       break;
2856   }
2857 
2858   if (IsFlip)
2859     return V.getOperand(0);
2860   if (Force)
2861     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2862   return SDValue();
2863 }
2864 
2865 SDValue DAGCombiner::visitADDO(SDNode *N) {
2866   SDValue N0 = N->getOperand(0);
2867   SDValue N1 = N->getOperand(1);
2868   EVT VT = N0.getValueType();
2869   bool IsSigned = (ISD::SADDO == N->getOpcode());
2870 
2871   EVT CarryVT = N->getValueType(1);
2872   SDLoc DL(N);
2873 
2874   // If the flag result is dead, turn this into an ADD.
2875   if (!N->hasAnyUseOfValue(1))
2876     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2877                      DAG.getUNDEF(CarryVT));
2878 
2879   // canonicalize constant to RHS.
2880   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2881       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2882     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2883 
2884   // fold (addo x, 0) -> x + no carry out
2885   if (isNullOrNullSplat(N1))
2886     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2887 
2888   if (!IsSigned) {
2889     // If it cannot overflow, transform into an add.
2890     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2891       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2892                        DAG.getConstant(0, DL, CarryVT));
2893 
2894     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2895     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2896       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2897                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2898       return CombineTo(
2899           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2900     }
2901 
2902     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2903       return Combined;
2904 
2905     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2906       return Combined;
2907   }
2908 
2909   return SDValue();
2910 }
2911 
2912 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2913   EVT VT = N0.getValueType();
2914   if (VT.isVector())
2915     return SDValue();
2916 
2917   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2918   // If Y + 1 cannot overflow.
2919   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2920     SDValue Y = N1.getOperand(0);
2921     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2922     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2923       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2924                          N1.getOperand(2));
2925   }
2926 
2927   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2928   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2929     if (SDValue Carry = getAsCarry(TLI, N1))
2930       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2931                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2932 
2933   return SDValue();
2934 }
2935 
2936 SDValue DAGCombiner::visitADDE(SDNode *N) {
2937   SDValue N0 = N->getOperand(0);
2938   SDValue N1 = N->getOperand(1);
2939   SDValue CarryIn = N->getOperand(2);
2940 
2941   // canonicalize constant to RHS
2942   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2943   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2944   if (N0C && !N1C)
2945     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2946                        N1, N0, CarryIn);
2947 
2948   // fold (adde x, y, false) -> (addc x, y)
2949   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2950     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2951 
2952   return SDValue();
2953 }
2954 
2955 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2956   SDValue N0 = N->getOperand(0);
2957   SDValue N1 = N->getOperand(1);
2958   SDValue CarryIn = N->getOperand(2);
2959   SDLoc DL(N);
2960 
2961   // canonicalize constant to RHS
2962   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2963   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2964   if (N0C && !N1C)
2965     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2966 
2967   // fold (addcarry x, y, false) -> (uaddo x, y)
2968   if (isNullConstant(CarryIn)) {
2969     if (!LegalOperations ||
2970         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2971       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2972   }
2973 
2974   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2975   if (isNullConstant(N0) && isNullConstant(N1)) {
2976     EVT VT = N0.getValueType();
2977     EVT CarryVT = CarryIn.getValueType();
2978     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2979     AddToWorklist(CarryExt.getNode());
2980     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2981                                     DAG.getConstant(1, DL, VT)),
2982                      DAG.getConstant(0, DL, CarryVT));
2983   }
2984 
2985   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2986     return Combined;
2987 
2988   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2989     return Combined;
2990 
2991   return SDValue();
2992 }
2993 
2994 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2995   SDValue N0 = N->getOperand(0);
2996   SDValue N1 = N->getOperand(1);
2997   SDValue CarryIn = N->getOperand(2);
2998   SDLoc DL(N);
2999 
3000   // canonicalize constant to RHS
3001   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3002   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3003   if (N0C && !N1C)
3004     return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3005 
3006   // fold (saddo_carry x, y, false) -> (saddo x, y)
3007   if (isNullConstant(CarryIn)) {
3008     if (!LegalOperations ||
3009         TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3010       return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3011   }
3012 
3013   return SDValue();
3014 }
3015 
3016 /**
3017  * If we are facing some sort of diamond carry propapagtion pattern try to
3018  * break it up to generate something like:
3019  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
3020  *
3021  * The end result is usually an increase in operation required, but because the
3022  * carry is now linearized, other tranforms can kick in and optimize the DAG.
3023  *
3024  * Patterns typically look something like
3025  *            (uaddo A, B)
3026  *             /       \
3027  *          Carry      Sum
3028  *            |          \
3029  *            | (addcarry *, 0, Z)
3030  *            |       /
3031  *             \   Carry
3032  *              |   /
3033  * (addcarry X, *, *)
3034  *
3035  * But numerous variation exist. Our goal is to identify A, B, X and Z and
3036  * produce a combine with a single path for carry propagation.
3037  */
3038 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3039                                       SDValue X, SDValue Carry0, SDValue Carry1,
3040                                       SDNode *N) {
3041   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3042     return SDValue();
3043   if (Carry1.getOpcode() != ISD::UADDO)
3044     return SDValue();
3045 
3046   SDValue Z;
3047 
3048   /**
3049    * First look for a suitable Z. It will present itself in the form of
3050    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3051    */
3052   if (Carry0.getOpcode() == ISD::ADDCARRY &&
3053       isNullConstant(Carry0.getOperand(1))) {
3054     Z = Carry0.getOperand(2);
3055   } else if (Carry0.getOpcode() == ISD::UADDO &&
3056              isOneConstant(Carry0.getOperand(1))) {
3057     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
3058     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3059   } else {
3060     // We couldn't find a suitable Z.
3061     return SDValue();
3062   }
3063 
3064 
3065   auto cancelDiamond = [&](SDValue A,SDValue B) {
3066     SDLoc DL(N);
3067     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3068     Combiner.AddToWorklist(NewY.getNode());
3069     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3070                        DAG.getConstant(0, DL, X.getValueType()),
3071                        NewY.getValue(1));
3072   };
3073 
3074   /**
3075    *      (uaddo A, B)
3076    *           |
3077    *          Sum
3078    *           |
3079    * (addcarry *, 0, Z)
3080    */
3081   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3082     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3083   }
3084 
3085   /**
3086    * (addcarry A, 0, Z)
3087    *         |
3088    *        Sum
3089    *         |
3090    *  (uaddo *, B)
3091    */
3092   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3093     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3094   }
3095 
3096   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3097     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3098   }
3099 
3100   return SDValue();
3101 }
3102 
3103 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3104 // match patterns like:
3105 //
3106 //          (uaddo A, B)            CarryIn
3107 //            |  \                     |
3108 //            |   \                    |
3109 //    PartialSum   PartialCarryOutX   /
3110 //            |        |             /
3111 //            |    ____|____________/
3112 //            |   /    |
3113 //     (uaddo *, *)    \________
3114 //       |  \                   \
3115 //       |   \                   |
3116 //       |    PartialCarryOutY   |
3117 //       |        \              |
3118 //       |         \            /
3119 //   AddCarrySum    |    ______/
3120 //                  |   /
3121 //   CarryOut = (or *, *)
3122 //
3123 // And generate ADDCARRY (or SUBCARRY) with two result values:
3124 //
3125 //    {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3126 //
3127 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3128 // a single path for carry/borrow out propagation:
3129 static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
3130                                    SDValue Carry0, SDValue Carry1, SDNode *N) {
3131   if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
3132     return SDValue();
3133   unsigned Opcode = Carry0.getOpcode();
3134   if (Opcode != Carry1.getOpcode())
3135     return SDValue();
3136   if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3137     return SDValue();
3138 
3139   // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3140   // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3141   // the above ASCII art.)
3142   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3143       Carry1.getOperand(1) != Carry0.getValue(0))
3144     std::swap(Carry0, Carry1);
3145   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3146       Carry1.getOperand(1) != Carry0.getValue(0))
3147     return SDValue();
3148 
3149   // The carry in value must be on the righthand side for subtraction.
3150   unsigned CarryInOperandNum =
3151       Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3152   if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3153     return SDValue();
3154   SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3155 
3156   unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3157   if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3158     return SDValue();
3159 
3160   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3161   // TODO: make getAsCarry() aware of how partial carries are merged.
3162   if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3163     return SDValue();
3164   CarryIn = CarryIn.getOperand(0);
3165   if (CarryIn.getValueType() != MVT::i1)
3166     return SDValue();
3167 
3168   SDLoc DL(N);
3169   SDValue Merged =
3170       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3171                   Carry0.getOperand(1), CarryIn);
3172 
3173   // Please note that because we have proven that the result of the UADDO/USUBO
3174   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3175   // therefore prove that if the first UADDO/USUBO overflows, the second
3176   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3177   // maximum value.
3178   //
3179   //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3180   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3181   //
3182   // This is important because it means that OR and XOR can be used to merge
3183   // carry flags; and that AND can return a constant zero.
3184   //
3185   // TODO: match other operations that can merge flags (ADD, etc)
3186   DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3187   if (N->getOpcode() == ISD::AND)
3188     return DAG.getConstant(0, DL, MVT::i1);
3189   return Merged.getValue(1);
3190 }
3191 
3192 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3193                                        SDNode *N) {
3194   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3195   if (isBitwiseNot(N0))
3196     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3197       SDLoc DL(N);
3198       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3199                                 N0.getOperand(0), NotC);
3200       return CombineTo(
3201           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3202     }
3203 
3204   // Iff the flag result is dead:
3205   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3206   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3207   // or the dependency between the instructions.
3208   if ((N0.getOpcode() == ISD::ADD ||
3209        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3210         N0.getValue(1) != CarryIn)) &&
3211       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3212     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3213                        N0.getOperand(0), N0.getOperand(1), CarryIn);
3214 
3215   /**
3216    * When one of the addcarry argument is itself a carry, we may be facing
3217    * a diamond carry propagation. In which case we try to transform the DAG
3218    * to ensure linear carry propagation if that is possible.
3219    */
3220   if (auto Y = getAsCarry(TLI, N1)) {
3221     // Because both are carries, Y and Z can be swapped.
3222     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3223       return R;
3224     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3225       return R;
3226   }
3227 
3228   return SDValue();
3229 }
3230 
3231 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3232 // clamp/truncation if necessary.
3233 static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3234                                    SDValue RHS, SelectionDAG &DAG,
3235                                    const SDLoc &DL) {
3236   assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3237          "Illegal truncation");
3238 
3239   if (DstVT == SrcVT)
3240     return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3241 
3242   // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3243   // clamping RHS.
3244   APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3245                                           DstVT.getScalarSizeInBits());
3246   if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3247     return SDValue();
3248 
3249   SDValue SatLimit =
3250       DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3251                                            DstVT.getScalarSizeInBits()),
3252                       DL, SrcVT);
3253   RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3254   RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3255   LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3256   return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3257 }
3258 
3259 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3260 // usubsat(a,b), optionally as a truncated type.
3261 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3262   if (N->getOpcode() != ISD::SUB ||
3263       !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3264     return SDValue();
3265 
3266   EVT SubVT = N->getValueType(0);
3267   SDValue Op0 = N->getOperand(0);
3268   SDValue Op1 = N->getOperand(1);
3269 
3270   // Try to find umax(a,b) - b or a - umin(a,b) patterns
3271   // they may be converted to usubsat(a,b).
3272   if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3273     SDValue MaxLHS = Op0.getOperand(0);
3274     SDValue MaxRHS = Op0.getOperand(1);
3275     if (MaxLHS == Op1)
3276       return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3277     if (MaxRHS == Op1)
3278       return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3279   }
3280 
3281   if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3282     SDValue MinLHS = Op1.getOperand(0);
3283     SDValue MinRHS = Op1.getOperand(1);
3284     if (MinLHS == Op0)
3285       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3286     if (MinRHS == Op0)
3287       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3288   }
3289 
3290   // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3291   if (Op1.getOpcode() == ISD::TRUNCATE &&
3292       Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3293       Op1.getOperand(0).hasOneUse()) {
3294     SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3295     SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3296     if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3297       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3298                                  DAG, SDLoc(N));
3299     if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3300       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3301                                  DAG, SDLoc(N));
3302   }
3303 
3304   return SDValue();
3305 }
3306 
3307 // Since it may not be valid to emit a fold to zero for vector initializers
3308 // check if we can before folding.
3309 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3310                              SelectionDAG &DAG, bool LegalOperations) {
3311   if (!VT.isVector())
3312     return DAG.getConstant(0, DL, VT);
3313   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3314     return DAG.getConstant(0, DL, VT);
3315   return SDValue();
3316 }
3317 
3318 SDValue DAGCombiner::visitSUB(SDNode *N) {
3319   SDValue N0 = N->getOperand(0);
3320   SDValue N1 = N->getOperand(1);
3321   EVT VT = N0.getValueType();
3322   SDLoc DL(N);
3323 
3324   // fold (sub x, x) -> 0
3325   // FIXME: Refactor this and xor and other similar operations together.
3326   if (N0 == N1)
3327     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3328 
3329   // fold (sub c1, c2) -> c3
3330   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3331     return C;
3332 
3333   // fold vector ops
3334   if (VT.isVector()) {
3335     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3336       return FoldedVOp;
3337 
3338     // fold (sub x, 0) -> x, vector edition
3339     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3340       return N0;
3341   }
3342 
3343   if (SDValue NewSel = foldBinOpIntoSelect(N))
3344     return NewSel;
3345 
3346   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3347 
3348   // fold (sub x, c) -> (add x, -c)
3349   if (N1C) {
3350     return DAG.getNode(ISD::ADD, DL, VT, N0,
3351                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3352   }
3353 
3354   if (isNullOrNullSplat(N0)) {
3355     unsigned BitWidth = VT.getScalarSizeInBits();
3356     // Right-shifting everything out but the sign bit followed by negation is
3357     // the same as flipping arithmetic/logical shift type without the negation:
3358     // -(X >>u 31) -> (X >>s 31)
3359     // -(X >>s 31) -> (X >>u 31)
3360     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3361       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3362       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3363         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3364         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3365           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3366       }
3367     }
3368 
3369     // 0 - X --> 0 if the sub is NUW.
3370     if (N->getFlags().hasNoUnsignedWrap())
3371       return N0;
3372 
3373     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3374       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3375       // N1 must be 0 because negating the minimum signed value is undefined.
3376       if (N->getFlags().hasNoSignedWrap())
3377         return N0;
3378 
3379       // 0 - X --> X if X is 0 or the minimum signed value.
3380       return N1;
3381     }
3382 
3383     // Convert 0 - abs(x).
3384     if (N1->getOpcode() == ISD::ABS &&
3385         !TLI.isOperationLegalOrCustom(ISD::ABS, VT))
3386       if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3387         return Result;
3388 
3389     // Fold neg(splat(neg(x)) -> splat(x)
3390     if (VT.isVector()) {
3391       SDValue N1S = DAG.getSplatValue(N1, true);
3392       if (N1S && N1S.getOpcode() == ISD::SUB &&
3393           isNullConstant(N1S.getOperand(0))) {
3394         if (VT.isScalableVector())
3395           return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
3396         return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
3397       }
3398     }
3399   }
3400 
3401   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3402   if (isAllOnesOrAllOnesSplat(N0))
3403     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3404 
3405   // fold (A - (0-B)) -> A+B
3406   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3407     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3408 
3409   // fold A-(A-B) -> B
3410   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3411     return N1.getOperand(1);
3412 
3413   // fold (A+B)-A -> B
3414   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3415     return N0.getOperand(1);
3416 
3417   // fold (A+B)-B -> A
3418   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3419     return N0.getOperand(0);
3420 
3421   // fold (A+C1)-C2 -> A+(C1-C2)
3422   if (N0.getOpcode() == ISD::ADD &&
3423       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3424       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3425     SDValue NewC =
3426         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3427     assert(NewC && "Constant folding failed");
3428     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3429   }
3430 
3431   // fold C2-(A+C1) -> (C2-C1)-A
3432   if (N1.getOpcode() == ISD::ADD) {
3433     SDValue N11 = N1.getOperand(1);
3434     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3435         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3436       SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3437       assert(NewC && "Constant folding failed");
3438       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3439     }
3440   }
3441 
3442   // fold (A-C1)-C2 -> A-(C1+C2)
3443   if (N0.getOpcode() == ISD::SUB &&
3444       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3445       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3446     SDValue NewC =
3447         DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3448     assert(NewC && "Constant folding failed");
3449     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3450   }
3451 
3452   // fold (c1-A)-c2 -> (c1-c2)-A
3453   if (N0.getOpcode() == ISD::SUB &&
3454       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3455       isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3456     SDValue NewC =
3457         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3458     assert(NewC && "Constant folding failed");
3459     return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3460   }
3461 
3462   // fold ((A+(B+or-C))-B) -> A+or-C
3463   if (N0.getOpcode() == ISD::ADD &&
3464       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3465        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3466       N0.getOperand(1).getOperand(0) == N1)
3467     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3468                        N0.getOperand(1).getOperand(1));
3469 
3470   // fold ((A+(C+B))-B) -> A+C
3471   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3472       N0.getOperand(1).getOperand(1) == N1)
3473     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3474                        N0.getOperand(1).getOperand(0));
3475 
3476   // fold ((A-(B-C))-C) -> A-B
3477   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3478       N0.getOperand(1).getOperand(1) == N1)
3479     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3480                        N0.getOperand(1).getOperand(0));
3481 
3482   // fold (A-(B-C)) -> A+(C-B)
3483   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3484     return DAG.getNode(ISD::ADD, DL, VT, N0,
3485                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3486                                    N1.getOperand(0)));
3487 
3488   // A - (A & B)  ->  A & (~B)
3489   if (N1.getOpcode() == ISD::AND) {
3490     SDValue A = N1.getOperand(0);
3491     SDValue B = N1.getOperand(1);
3492     if (A != N0)
3493       std::swap(A, B);
3494     if (A == N0 &&
3495         (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3496       SDValue InvB =
3497           DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3498       return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3499     }
3500   }
3501 
3502   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3503   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3504     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3505         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3506       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3507                                 N1.getOperand(0).getOperand(1),
3508                                 N1.getOperand(1));
3509       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3510     }
3511     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3512         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3513       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3514                                 N1.getOperand(0),
3515                                 N1.getOperand(1).getOperand(1));
3516       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3517     }
3518   }
3519 
3520   // If either operand of a sub is undef, the result is undef
3521   if (N0.isUndef())
3522     return N0;
3523   if (N1.isUndef())
3524     return N1;
3525 
3526   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3527     return V;
3528 
3529   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3530     return V;
3531 
3532   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3533     return V;
3534 
3535   if (SDValue V = foldSubToUSubSat(VT, N))
3536     return V;
3537 
3538   // (x - y) - 1  ->  add (xor y, -1), x
3539   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3540     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3541                               DAG.getAllOnesConstant(DL, VT));
3542     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3543   }
3544 
3545   // Look for:
3546   //   sub y, (xor x, -1)
3547   // And if the target does not like this form then turn into:
3548   //   add (add x, y), 1
3549   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3550     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3551     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3552   }
3553 
3554   // Hoist one-use addition by non-opaque constant:
3555   //   (x + C) - y  ->  (x - y) + C
3556   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3557       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3558     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3559     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3560   }
3561   // y - (x + C)  ->  (y - x) - C
3562   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3563       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3564     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3565     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3566   }
3567   // (x - C) - y  ->  (x - y) - C
3568   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3569   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3570       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3571     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3572     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3573   }
3574   // (C - x) - y  ->  C - (x + y)
3575   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3576       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3577     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3578     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3579   }
3580 
3581   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3582   // rather than 'sub 0/1' (the sext should get folded).
3583   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3584   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3585       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3586       TLI.getBooleanContents(VT) ==
3587           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3588     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3589     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3590   }
3591 
3592   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3593   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3594     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3595       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3596       SDValue S0 = N1.getOperand(0);
3597       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3598         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3599           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3600             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3601     }
3602   }
3603 
3604   // If the relocation model supports it, consider symbol offsets.
3605   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3606     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3607       // fold (sub Sym, c) -> Sym-c
3608       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3609         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3610                                     GA->getOffset() -
3611                                         (uint64_t)N1C->getSExtValue());
3612       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3613       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3614         if (GA->getGlobal() == GB->getGlobal())
3615           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3616                                  DL, VT);
3617     }
3618 
3619   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3620   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3621     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3622     if (TN->getVT() == MVT::i1) {
3623       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3624                                  DAG.getConstant(1, DL, VT));
3625       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3626     }
3627   }
3628 
3629   // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3630   if (N1.getOpcode() == ISD::VSCALE) {
3631     const APInt &IntVal = N1.getConstantOperandAPInt(0);
3632     return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3633   }
3634 
3635   // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3636   if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3637     APInt NewStep = -N1.getConstantOperandAPInt(0);
3638     return DAG.getNode(ISD::ADD, DL, VT, N0,
3639                        DAG.getStepVector(DL, VT, NewStep));
3640   }
3641 
3642   // Prefer an add for more folding potential and possibly better codegen:
3643   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3644   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3645     SDValue ShAmt = N1.getOperand(1);
3646     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3647     if (ShAmtC &&
3648         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3649       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3650       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3651     }
3652   }
3653 
3654   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3655     // (sub Carry, X)  ->  (addcarry (sub 0, X), 0, Carry)
3656     if (SDValue Carry = getAsCarry(TLI, N0)) {
3657       SDValue X = N1;
3658       SDValue Zero = DAG.getConstant(0, DL, VT);
3659       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3660       return DAG.getNode(ISD::ADDCARRY, DL,
3661                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3662                          Carry);
3663     }
3664   }
3665 
3666   return SDValue();
3667 }
3668 
3669 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3670   SDValue N0 = N->getOperand(0);
3671   SDValue N1 = N->getOperand(1);
3672   EVT VT = N0.getValueType();
3673   SDLoc DL(N);
3674 
3675   // fold (sub_sat x, undef) -> 0
3676   if (N0.isUndef() || N1.isUndef())
3677     return DAG.getConstant(0, DL, VT);
3678 
3679   // fold (sub_sat x, x) -> 0
3680   if (N0 == N1)
3681     return DAG.getConstant(0, DL, VT);
3682 
3683   // fold (sub_sat c1, c2) -> c3
3684   if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3685     return C;
3686 
3687   // fold vector ops
3688   if (VT.isVector()) {
3689     // TODO SimplifyVBinOp
3690 
3691     // fold (sub_sat x, 0) -> x, vector edition
3692     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3693       return N0;
3694   }
3695 
3696   // fold (sub_sat x, 0) -> x
3697   if (isNullConstant(N1))
3698     return N0;
3699 
3700   return SDValue();
3701 }
3702 
3703 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3704   SDValue N0 = N->getOperand(0);
3705   SDValue N1 = N->getOperand(1);
3706   EVT VT = N0.getValueType();
3707   SDLoc DL(N);
3708 
3709   // If the flag result is dead, turn this into an SUB.
3710   if (!N->hasAnyUseOfValue(1))
3711     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3712                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3713 
3714   // fold (subc x, x) -> 0 + no borrow
3715   if (N0 == N1)
3716     return CombineTo(N, DAG.getConstant(0, DL, VT),
3717                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3718 
3719   // fold (subc x, 0) -> x + no borrow
3720   if (isNullConstant(N1))
3721     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3722 
3723   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3724   if (isAllOnesConstant(N0))
3725     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3726                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3727 
3728   return SDValue();
3729 }
3730 
3731 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3732   SDValue N0 = N->getOperand(0);
3733   SDValue N1 = N->getOperand(1);
3734   EVT VT = N0.getValueType();
3735   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3736 
3737   EVT CarryVT = N->getValueType(1);
3738   SDLoc DL(N);
3739 
3740   // If the flag result is dead, turn this into an SUB.
3741   if (!N->hasAnyUseOfValue(1))
3742     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3743                      DAG.getUNDEF(CarryVT));
3744 
3745   // fold (subo x, x) -> 0 + no borrow
3746   if (N0 == N1)
3747     return CombineTo(N, DAG.getConstant(0, DL, VT),
3748                      DAG.getConstant(0, DL, CarryVT));
3749 
3750   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3751 
3752   // fold (subox, c) -> (addo x, -c)
3753   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3754     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3755                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3756   }
3757 
3758   // fold (subo x, 0) -> x + no borrow
3759   if (isNullOrNullSplat(N1))
3760     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3761 
3762   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3763   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3764     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3765                      DAG.getConstant(0, DL, CarryVT));
3766 
3767   return SDValue();
3768 }
3769 
3770 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3771   SDValue N0 = N->getOperand(0);
3772   SDValue N1 = N->getOperand(1);
3773   SDValue CarryIn = N->getOperand(2);
3774 
3775   // fold (sube x, y, false) -> (subc x, y)
3776   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3777     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3778 
3779   return SDValue();
3780 }
3781 
3782 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3783   SDValue N0 = N->getOperand(0);
3784   SDValue N1 = N->getOperand(1);
3785   SDValue CarryIn = N->getOperand(2);
3786 
3787   // fold (subcarry x, y, false) -> (usubo x, y)
3788   if (isNullConstant(CarryIn)) {
3789     if (!LegalOperations ||
3790         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3791       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3792   }
3793 
3794   return SDValue();
3795 }
3796 
3797 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3798   SDValue N0 = N->getOperand(0);
3799   SDValue N1 = N->getOperand(1);
3800   SDValue CarryIn = N->getOperand(2);
3801 
3802   // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3803   if (isNullConstant(CarryIn)) {
3804     if (!LegalOperations ||
3805         TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3806       return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3807   }
3808 
3809   return SDValue();
3810 }
3811 
3812 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3813 // UMULFIXSAT here.
3814 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3815   SDValue N0 = N->getOperand(0);
3816   SDValue N1 = N->getOperand(1);
3817   SDValue Scale = N->getOperand(2);
3818   EVT VT = N0.getValueType();
3819 
3820   // fold (mulfix x, undef, scale) -> 0
3821   if (N0.isUndef() || N1.isUndef())
3822     return DAG.getConstant(0, SDLoc(N), VT);
3823 
3824   // Canonicalize constant to RHS (vector doesn't have to splat)
3825   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3826      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3827     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3828 
3829   // fold (mulfix x, 0, scale) -> 0
3830   if (isNullConstant(N1))
3831     return DAG.getConstant(0, SDLoc(N), VT);
3832 
3833   return SDValue();
3834 }
3835 
3836 SDValue DAGCombiner::visitMUL(SDNode *N) {
3837   SDValue N0 = N->getOperand(0);
3838   SDValue N1 = N->getOperand(1);
3839   EVT VT = N0.getValueType();
3840 
3841   // fold (mul x, undef) -> 0
3842   if (N0.isUndef() || N1.isUndef())
3843     return DAG.getConstant(0, SDLoc(N), VT);
3844 
3845   // fold (mul c1, c2) -> c1*c2
3846   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3847     return C;
3848 
3849   // canonicalize constant to RHS (vector doesn't have to splat)
3850   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3851       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3852     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3853 
3854   bool N1IsConst = false;
3855   bool N1IsOpaqueConst = false;
3856   APInt ConstValue1;
3857 
3858   // fold vector ops
3859   if (VT.isVector()) {
3860     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
3861       return FoldedVOp;
3862 
3863     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3864     assert((!N1IsConst ||
3865             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3866            "Splat APInt should be element width");
3867   } else {
3868     N1IsConst = isa<ConstantSDNode>(N1);
3869     if (N1IsConst) {
3870       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3871       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3872     }
3873   }
3874 
3875   // fold (mul x, 0) -> 0
3876   if (N1IsConst && ConstValue1.isZero())
3877     return N1;
3878 
3879   // fold (mul x, 1) -> x
3880   if (N1IsConst && ConstValue1.isOne())
3881     return N0;
3882 
3883   if (SDValue NewSel = foldBinOpIntoSelect(N))
3884     return NewSel;
3885 
3886   // fold (mul x, -1) -> 0-x
3887   if (N1IsConst && ConstValue1.isAllOnes()) {
3888     SDLoc DL(N);
3889     return DAG.getNode(ISD::SUB, DL, VT,
3890                        DAG.getConstant(0, DL, VT), N0);
3891   }
3892 
3893   // fold (mul x, (1 << c)) -> x << c
3894   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3895       DAG.isKnownToBeAPowerOfTwo(N1) &&
3896       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3897     SDLoc DL(N);
3898     SDValue LogBase2 = BuildLogBase2(N1, DL);
3899     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3900     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3901     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3902   }
3903 
3904   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3905   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
3906     unsigned Log2Val = (-ConstValue1).logBase2();
3907     SDLoc DL(N);
3908     // FIXME: If the input is something that is easily negated (e.g. a
3909     // single-use add), we should put the negate there.
3910     return DAG.getNode(ISD::SUB, DL, VT,
3911                        DAG.getConstant(0, DL, VT),
3912                        DAG.getNode(ISD::SHL, DL, VT, N0,
3913                             DAG.getConstant(Log2Val, DL,
3914                                       getShiftAmountTy(N0.getValueType()))));
3915   }
3916 
3917   // Try to transform:
3918   // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3919   // mul x, (2^N + 1) --> add (shl x, N), x
3920   // mul x, (2^N - 1) --> sub (shl x, N), x
3921   // Examples: x * 33 --> (x << 5) + x
3922   //           x * 15 --> (x << 4) - x
3923   //           x * -33 --> -((x << 5) + x)
3924   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3925   // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3926   // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3927   // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3928   // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3929   //           x * 0xf800 --> (x << 16) - (x << 11)
3930   //           x * -0x8800 --> -((x << 15) + (x << 11))
3931   //           x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3932   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3933     // TODO: We could handle more general decomposition of any constant by
3934     //       having the target set a limit on number of ops and making a
3935     //       callback to determine that sequence (similar to sqrt expansion).
3936     unsigned MathOp = ISD::DELETED_NODE;
3937     APInt MulC = ConstValue1.abs();
3938     // The constant `2` should be treated as (2^0 + 1).
3939     unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3940     MulC.lshrInPlace(TZeros);
3941     if ((MulC - 1).isPowerOf2())
3942       MathOp = ISD::ADD;
3943     else if ((MulC + 1).isPowerOf2())
3944       MathOp = ISD::SUB;
3945 
3946     if (MathOp != ISD::DELETED_NODE) {
3947       unsigned ShAmt =
3948           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3949       ShAmt += TZeros;
3950       assert(ShAmt < VT.getScalarSizeInBits() &&
3951              "multiply-by-constant generated out of bounds shift");
3952       SDLoc DL(N);
3953       SDValue Shl =
3954           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3955       SDValue R =
3956           TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3957                                DAG.getNode(ISD::SHL, DL, VT, N0,
3958                                            DAG.getConstant(TZeros, DL, VT)))
3959                  : DAG.getNode(MathOp, DL, VT, Shl, N0);
3960       if (ConstValue1.isNegative())
3961         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3962       return R;
3963     }
3964   }
3965 
3966   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3967   if (N0.getOpcode() == ISD::SHL &&
3968       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3969       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3970     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3971     if (isConstantOrConstantVector(C3))
3972       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3973   }
3974 
3975   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3976   // use.
3977   {
3978     SDValue Sh, Y;
3979 
3980     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3981     if (N0.getOpcode() == ISD::SHL &&
3982         isConstantOrConstantVector(N0.getOperand(1)) &&
3983         N0.getNode()->hasOneUse()) {
3984       Sh = N0; Y = N1;
3985     } else if (N1.getOpcode() == ISD::SHL &&
3986                isConstantOrConstantVector(N1.getOperand(1)) &&
3987                N1.getNode()->hasOneUse()) {
3988       Sh = N1; Y = N0;
3989     }
3990 
3991     if (Sh.getNode()) {
3992       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3993       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3994     }
3995   }
3996 
3997   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3998   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3999       N0.getOpcode() == ISD::ADD &&
4000       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
4001       isMulAddWithConstProfitable(N, N0, N1))
4002       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
4003                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
4004                                      N0.getOperand(0), N1),
4005                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
4006                                      N0.getOperand(1), N1));
4007 
4008   // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4009   if (N0.getOpcode() == ISD::VSCALE)
4010     if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
4011       const APInt &C0 = N0.getConstantOperandAPInt(0);
4012       const APInt &C1 = NC1->getAPIntValue();
4013       return DAG.getVScale(SDLoc(N), VT, C0 * C1);
4014     }
4015 
4016   // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4017   APInt MulVal;
4018   if (N0.getOpcode() == ISD::STEP_VECTOR)
4019     if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4020       const APInt &C0 = N0.getConstantOperandAPInt(0);
4021       APInt NewStep = C0 * MulVal;
4022       return DAG.getStepVector(SDLoc(N), VT, NewStep);
4023     }
4024 
4025   // Fold ((mul x, 0/undef) -> 0,
4026   //       (mul x, 1) -> x) -> x)
4027   // -> and(x, mask)
4028   // We can replace vectors with '0' and '1' factors with a clearing mask.
4029   if (VT.isFixedLengthVector()) {
4030     unsigned NumElts = VT.getVectorNumElements();
4031     SmallBitVector ClearMask;
4032     ClearMask.reserve(NumElts);
4033     auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4034       if (!V || V->isZero()) {
4035         ClearMask.push_back(true);
4036         return true;
4037       }
4038       ClearMask.push_back(false);
4039       return V->isOne();
4040     };
4041     if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4042         ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4043       assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4044       SDLoc DL(N);
4045       EVT LegalSVT = N1.getOperand(0).getValueType();
4046       SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4047       SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4048       SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
4049       for (unsigned I = 0; I != NumElts; ++I)
4050         if (ClearMask[I])
4051           Mask[I] = Zero;
4052       return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4053     }
4054   }
4055 
4056   // reassociate mul
4057   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
4058     return RMUL;
4059 
4060   return SDValue();
4061 }
4062 
4063 /// Return true if divmod libcall is available.
4064 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4065                                      const TargetLowering &TLI) {
4066   RTLIB::Libcall LC;
4067   EVT NodeType = Node->getValueType(0);
4068   if (!NodeType.isSimple())
4069     return false;
4070   switch (NodeType.getSimpleVT().SimpleTy) {
4071   default: return false; // No libcall for vector types.
4072   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
4073   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4074   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4075   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4076   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4077   }
4078 
4079   return TLI.getLibcallName(LC) != nullptr;
4080 }
4081 
4082 /// Issue divrem if both quotient and remainder are needed.
4083 SDValue DAGCombiner::useDivRem(SDNode *Node) {
4084   if (Node->use_empty())
4085     return SDValue(); // This is a dead node, leave it alone.
4086 
4087   unsigned Opcode = Node->getOpcode();
4088   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4089   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4090 
4091   // DivMod lib calls can still work on non-legal types if using lib-calls.
4092   EVT VT = Node->getValueType(0);
4093   if (VT.isVector() || !VT.isInteger())
4094     return SDValue();
4095 
4096   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4097     return SDValue();
4098 
4099   // If DIVREM is going to get expanded into a libcall,
4100   // but there is no libcall available, then don't combine.
4101   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4102       !isDivRemLibcallAvailable(Node, isSigned, TLI))
4103     return SDValue();
4104 
4105   // If div is legal, it's better to do the normal expansion
4106   unsigned OtherOpcode = 0;
4107   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4108     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4109     if (TLI.isOperationLegalOrCustom(Opcode, VT))
4110       return SDValue();
4111   } else {
4112     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4113     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4114       return SDValue();
4115   }
4116 
4117   SDValue Op0 = Node->getOperand(0);
4118   SDValue Op1 = Node->getOperand(1);
4119   SDValue combined;
4120   for (SDNode *User : Op0.getNode()->uses()) {
4121     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4122         User->use_empty())
4123       continue;
4124     // Convert the other matching node(s), too;
4125     // otherwise, the DIVREM may get target-legalized into something
4126     // target-specific that we won't be able to recognize.
4127     unsigned UserOpc = User->getOpcode();
4128     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4129         User->getOperand(0) == Op0 &&
4130         User->getOperand(1) == Op1) {
4131       if (!combined) {
4132         if (UserOpc == OtherOpcode) {
4133           SDVTList VTs = DAG.getVTList(VT, VT);
4134           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4135         } else if (UserOpc == DivRemOpc) {
4136           combined = SDValue(User, 0);
4137         } else {
4138           assert(UserOpc == Opcode);
4139           continue;
4140         }
4141       }
4142       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4143         CombineTo(User, combined);
4144       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4145         CombineTo(User, combined.getValue(1));
4146     }
4147   }
4148   return combined;
4149 }
4150 
4151 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4152   SDValue N0 = N->getOperand(0);
4153   SDValue N1 = N->getOperand(1);
4154   EVT VT = N->getValueType(0);
4155   SDLoc DL(N);
4156 
4157   unsigned Opc = N->getOpcode();
4158   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4159   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4160 
4161   // X / undef -> undef
4162   // X % undef -> undef
4163   // X / 0 -> undef
4164   // X % 0 -> undef
4165   // NOTE: This includes vectors where any divisor element is zero/undef.
4166   if (DAG.isUndef(Opc, {N0, N1}))
4167     return DAG.getUNDEF(VT);
4168 
4169   // undef / X -> 0
4170   // undef % X -> 0
4171   if (N0.isUndef())
4172     return DAG.getConstant(0, DL, VT);
4173 
4174   // 0 / X -> 0
4175   // 0 % X -> 0
4176   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4177   if (N0C && N0C->isZero())
4178     return N0;
4179 
4180   // X / X -> 1
4181   // X % X -> 0
4182   if (N0 == N1)
4183     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4184 
4185   // X / 1 -> X
4186   // X % 1 -> 0
4187   // If this is a boolean op (single-bit element type), we can't have
4188   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4189   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4190   // it's a 1.
4191   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4192     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4193 
4194   return SDValue();
4195 }
4196 
4197 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4198   SDValue N0 = N->getOperand(0);
4199   SDValue N1 = N->getOperand(1);
4200   EVT VT = N->getValueType(0);
4201   EVT CCVT = getSetCCResultType(VT);
4202   SDLoc DL(N);
4203 
4204   // fold (sdiv c1, c2) -> c1/c2
4205   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4206     return C;
4207 
4208   // fold vector ops
4209   if (VT.isVector())
4210     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4211       return FoldedVOp;
4212 
4213   // fold (sdiv X, -1) -> 0-X
4214   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4215   if (N1C && N1C->isAllOnes())
4216     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4217 
4218   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4219   if (N1C && N1C->getAPIntValue().isMinSignedValue())
4220     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4221                          DAG.getConstant(1, DL, VT),
4222                          DAG.getConstant(0, DL, VT));
4223 
4224   if (SDValue V = simplifyDivRem(N, DAG))
4225     return V;
4226 
4227   if (SDValue NewSel = foldBinOpIntoSelect(N))
4228     return NewSel;
4229 
4230   // If we know the sign bits of both operands are zero, strength reduce to a
4231   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
4232   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4233     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4234 
4235   if (SDValue V = visitSDIVLike(N0, N1, N)) {
4236     // If the corresponding remainder node exists, update its users with
4237     // (Dividend - (Quotient * Divisor).
4238     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4239                                               { N0, N1 })) {
4240       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4241       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4242       AddToWorklist(Mul.getNode());
4243       AddToWorklist(Sub.getNode());
4244       CombineTo(RemNode, Sub);
4245     }
4246     return V;
4247   }
4248 
4249   // sdiv, srem -> sdivrem
4250   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4251   // true.  Otherwise, we break the simplification logic in visitREM().
4252   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4253   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4254     if (SDValue DivRem = useDivRem(N))
4255         return DivRem;
4256 
4257   return SDValue();
4258 }
4259 
4260 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4261   SDLoc DL(N);
4262   EVT VT = N->getValueType(0);
4263   EVT CCVT = getSetCCResultType(VT);
4264   unsigned BitWidth = VT.getScalarSizeInBits();
4265 
4266   // Helper for determining whether a value is a power-2 constant scalar or a
4267   // vector of such elements.
4268   auto IsPowerOfTwo = [](ConstantSDNode *C) {
4269     if (C->isZero() || C->isOpaque())
4270       return false;
4271     if (C->getAPIntValue().isPowerOf2())
4272       return true;
4273     if (C->getAPIntValue().isNegatedPowerOf2())
4274       return true;
4275     return false;
4276   };
4277 
4278   // fold (sdiv X, pow2) -> simple ops after legalize
4279   // FIXME: We check for the exact bit here because the generic lowering gives
4280   // better results in that case. The target-specific lowering should learn how
4281   // to handle exact sdivs efficiently.
4282   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4283     // Target-specific implementation of sdiv x, pow2.
4284     if (SDValue Res = BuildSDIVPow2(N))
4285       return Res;
4286 
4287     // Create constants that are functions of the shift amount value.
4288     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4289     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4290     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4291     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4292     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4293     if (!isConstantOrConstantVector(Inexact))
4294       return SDValue();
4295 
4296     // Splat the sign bit into the register
4297     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4298                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4299     AddToWorklist(Sign.getNode());
4300 
4301     // Add (N0 < 0) ? abs2 - 1 : 0;
4302     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4303     AddToWorklist(Srl.getNode());
4304     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4305     AddToWorklist(Add.getNode());
4306     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4307     AddToWorklist(Sra.getNode());
4308 
4309     // Special case: (sdiv X, 1) -> X
4310     // Special Case: (sdiv X, -1) -> 0-X
4311     SDValue One = DAG.getConstant(1, DL, VT);
4312     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4313     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4314     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4315     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4316     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4317 
4318     // If dividing by a positive value, we're done. Otherwise, the result must
4319     // be negated.
4320     SDValue Zero = DAG.getConstant(0, DL, VT);
4321     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4322 
4323     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4324     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4325     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4326     return Res;
4327   }
4328 
4329   // If integer divide is expensive and we satisfy the requirements, emit an
4330   // alternate sequence.  Targets may check function attributes for size/speed
4331   // trade-offs.
4332   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4333   if (isConstantOrConstantVector(N1) &&
4334       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4335     if (SDValue Op = BuildSDIV(N))
4336       return Op;
4337 
4338   return SDValue();
4339 }
4340 
4341 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4342   SDValue N0 = N->getOperand(0);
4343   SDValue N1 = N->getOperand(1);
4344   EVT VT = N->getValueType(0);
4345   EVT CCVT = getSetCCResultType(VT);
4346   SDLoc DL(N);
4347 
4348   // fold (udiv c1, c2) -> c1/c2
4349   if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4350     return C;
4351 
4352   // fold vector ops
4353   if (VT.isVector())
4354     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4355       return FoldedVOp;
4356 
4357   // fold (udiv X, -1) -> select(X == -1, 1, 0)
4358   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4359   if (N1C && N1C->isAllOnes())
4360     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4361                          DAG.getConstant(1, DL, VT),
4362                          DAG.getConstant(0, DL, VT));
4363 
4364   if (SDValue V = simplifyDivRem(N, DAG))
4365     return V;
4366 
4367   if (SDValue NewSel = foldBinOpIntoSelect(N))
4368     return NewSel;
4369 
4370   if (SDValue V = visitUDIVLike(N0, N1, N)) {
4371     // If the corresponding remainder node exists, update its users with
4372     // (Dividend - (Quotient * Divisor).
4373     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4374                                               { N0, N1 })) {
4375       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4376       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4377       AddToWorklist(Mul.getNode());
4378       AddToWorklist(Sub.getNode());
4379       CombineTo(RemNode, Sub);
4380     }
4381     return V;
4382   }
4383 
4384   // sdiv, srem -> sdivrem
4385   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4386   // true.  Otherwise, we break the simplification logic in visitREM().
4387   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4388   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4389     if (SDValue DivRem = useDivRem(N))
4390         return DivRem;
4391 
4392   return SDValue();
4393 }
4394 
4395 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4396   SDLoc DL(N);
4397   EVT VT = N->getValueType(0);
4398 
4399   // fold (udiv x, (1 << c)) -> x >>u c
4400   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4401       DAG.isKnownToBeAPowerOfTwo(N1)) {
4402     SDValue LogBase2 = BuildLogBase2(N1, DL);
4403     AddToWorklist(LogBase2.getNode());
4404 
4405     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4406     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4407     AddToWorklist(Trunc.getNode());
4408     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4409   }
4410 
4411   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4412   if (N1.getOpcode() == ISD::SHL) {
4413     SDValue N10 = N1.getOperand(0);
4414     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4415         DAG.isKnownToBeAPowerOfTwo(N10)) {
4416       SDValue LogBase2 = BuildLogBase2(N10, DL);
4417       AddToWorklist(LogBase2.getNode());
4418 
4419       EVT ADDVT = N1.getOperand(1).getValueType();
4420       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4421       AddToWorklist(Trunc.getNode());
4422       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4423       AddToWorklist(Add.getNode());
4424       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4425     }
4426   }
4427 
4428   // fold (udiv x, c) -> alternate
4429   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4430   if (isConstantOrConstantVector(N1) &&
4431       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4432     if (SDValue Op = BuildUDIV(N))
4433       return Op;
4434 
4435   return SDValue();
4436 }
4437 
4438 // handles ISD::SREM and ISD::UREM
4439 SDValue DAGCombiner::visitREM(SDNode *N) {
4440   unsigned Opcode = N->getOpcode();
4441   SDValue N0 = N->getOperand(0);
4442   SDValue N1 = N->getOperand(1);
4443   EVT VT = N->getValueType(0);
4444   EVT CCVT = getSetCCResultType(VT);
4445 
4446   bool isSigned = (Opcode == ISD::SREM);
4447   SDLoc DL(N);
4448 
4449   // fold (rem c1, c2) -> c1%c2
4450   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4451   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4452     return C;
4453 
4454   // fold (urem X, -1) -> select(X == -1, 0, x)
4455   if (!isSigned && N1C && N1C->isAllOnes())
4456     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4457                          DAG.getConstant(0, DL, VT), N0);
4458 
4459   if (SDValue V = simplifyDivRem(N, DAG))
4460     return V;
4461 
4462   if (SDValue NewSel = foldBinOpIntoSelect(N))
4463     return NewSel;
4464 
4465   if (isSigned) {
4466     // If we know the sign bits of both operands are zero, strength reduce to a
4467     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4468     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4469       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4470   } else {
4471     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4472       // fold (urem x, pow2) -> (and x, pow2-1)
4473       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4474       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4475       AddToWorklist(Add.getNode());
4476       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4477     }
4478     if (N1.getOpcode() == ISD::SHL &&
4479         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4480       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4481       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4482       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4483       AddToWorklist(Add.getNode());
4484       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4485     }
4486   }
4487 
4488   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4489 
4490   // If X/C can be simplified by the division-by-constant logic, lower
4491   // X%C to the equivalent of X-X/C*C.
4492   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4493   // speculative DIV must not cause a DIVREM conversion.  We guard against this
4494   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4495   // combine will not return a DIVREM.  Regardless, checking cheapness here
4496   // makes sense since the simplification results in fatter code.
4497   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4498     SDValue OptimizedDiv =
4499         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4500     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
4501       // If the equivalent Div node also exists, update its users.
4502       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4503       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4504                                                 { N0, N1 }))
4505         CombineTo(DivNode, OptimizedDiv);
4506       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4507       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4508       AddToWorklist(OptimizedDiv.getNode());
4509       AddToWorklist(Mul.getNode());
4510       return Sub;
4511     }
4512   }
4513 
4514   // sdiv, srem -> sdivrem
4515   if (SDValue DivRem = useDivRem(N))
4516     return DivRem.getValue(1);
4517 
4518   return SDValue();
4519 }
4520 
4521 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4522   SDValue N0 = N->getOperand(0);
4523   SDValue N1 = N->getOperand(1);
4524   EVT VT = N->getValueType(0);
4525   SDLoc DL(N);
4526 
4527   // fold (mulhs c1, c2)
4528   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4529     return C;
4530 
4531   // canonicalize constant to RHS.
4532   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4533       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4534     return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4535 
4536   if (VT.isVector()) {
4537     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4538       return FoldedVOp;
4539 
4540     // fold (mulhs x, 0) -> 0
4541     // do not return N1, because undef node may exist.
4542     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4543       return DAG.getConstant(0, DL, VT);
4544   }
4545 
4546   // fold (mulhs x, 0) -> 0
4547   if (isNullConstant(N1))
4548     return N1;
4549 
4550   // fold (mulhs x, 1) -> (sra x, size(x)-1)
4551   if (isOneConstant(N1))
4552     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4553                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4554                                        getShiftAmountTy(N0.getValueType())));
4555 
4556   // fold (mulhs x, undef) -> 0
4557   if (N0.isUndef() || N1.isUndef())
4558     return DAG.getConstant(0, DL, VT);
4559 
4560   // If the type twice as wide is legal, transform the mulhs to a wider multiply
4561   // plus a shift.
4562   if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4563       !VT.isVector()) {
4564     MVT Simple = VT.getSimpleVT();
4565     unsigned SimpleSize = Simple.getSizeInBits();
4566     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4567     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4568       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4569       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4570       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4571       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4572             DAG.getConstant(SimpleSize, DL,
4573                             getShiftAmountTy(N1.getValueType())));
4574       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4575     }
4576   }
4577 
4578   return SDValue();
4579 }
4580 
4581 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4582   SDValue N0 = N->getOperand(0);
4583   SDValue N1 = N->getOperand(1);
4584   EVT VT = N->getValueType(0);
4585   SDLoc DL(N);
4586 
4587   // fold (mulhu c1, c2)
4588   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
4589     return C;
4590 
4591   // canonicalize constant to RHS.
4592   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4593       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4594     return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
4595 
4596   if (VT.isVector()) {
4597     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4598       return FoldedVOp;
4599 
4600     // fold (mulhu x, 0) -> 0
4601     // do not return N1, because undef node may exist.
4602     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4603       return DAG.getConstant(0, DL, VT);
4604   }
4605 
4606   // fold (mulhu x, 0) -> 0
4607   if (isNullConstant(N1))
4608     return N1;
4609 
4610   // fold (mulhu x, 1) -> 0
4611   if (isOneConstant(N1))
4612     return DAG.getConstant(0, DL, N0.getValueType());
4613 
4614   // fold (mulhu x, undef) -> 0
4615   if (N0.isUndef() || N1.isUndef())
4616     return DAG.getConstant(0, DL, VT);
4617 
4618   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4619   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4620       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4621     unsigned NumEltBits = VT.getScalarSizeInBits();
4622     SDValue LogBase2 = BuildLogBase2(N1, DL);
4623     SDValue SRLAmt = DAG.getNode(
4624         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4625     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4626     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4627     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4628   }
4629 
4630   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4631   // plus a shift.
4632   if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4633       !VT.isVector()) {
4634     MVT Simple = VT.getSimpleVT();
4635     unsigned SimpleSize = Simple.getSizeInBits();
4636     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4637     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4638       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4639       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4640       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4641       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4642             DAG.getConstant(SimpleSize, DL,
4643                             getShiftAmountTy(N1.getValueType())));
4644       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4645     }
4646   }
4647 
4648   // Simplify the operands using demanded-bits information.
4649   // We don't have demanded bits support for MULHU so this just enables constant
4650   // folding based on known bits.
4651   if (SimplifyDemandedBits(SDValue(N, 0)))
4652     return SDValue(N, 0);
4653 
4654   return SDValue();
4655 }
4656 
4657 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4658 /// give the opcodes for the two computations that are being performed. Return
4659 /// true if a simplification was made.
4660 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4661                                                 unsigned HiOp) {
4662   // If the high half is not needed, just compute the low half.
4663   bool HiExists = N->hasAnyUseOfValue(1);
4664   if (!HiExists && (!LegalOperations ||
4665                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4666     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4667     return CombineTo(N, Res, Res);
4668   }
4669 
4670   // If the low half is not needed, just compute the high half.
4671   bool LoExists = N->hasAnyUseOfValue(0);
4672   if (!LoExists && (!LegalOperations ||
4673                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4674     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4675     return CombineTo(N, Res, Res);
4676   }
4677 
4678   // If both halves are used, return as it is.
4679   if (LoExists && HiExists)
4680     return SDValue();
4681 
4682   // If the two computed results can be simplified separately, separate them.
4683   if (LoExists) {
4684     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4685     AddToWorklist(Lo.getNode());
4686     SDValue LoOpt = combine(Lo.getNode());
4687     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4688         (!LegalOperations ||
4689          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4690       return CombineTo(N, LoOpt, LoOpt);
4691   }
4692 
4693   if (HiExists) {
4694     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4695     AddToWorklist(Hi.getNode());
4696     SDValue HiOpt = combine(Hi.getNode());
4697     if (HiOpt.getNode() && HiOpt != Hi &&
4698         (!LegalOperations ||
4699          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4700       return CombineTo(N, HiOpt, HiOpt);
4701   }
4702 
4703   return SDValue();
4704 }
4705 
4706 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4707   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4708     return Res;
4709 
4710   EVT VT = N->getValueType(0);
4711   SDLoc DL(N);
4712 
4713   // If the type is twice as wide is legal, transform the mulhu to a wider
4714   // multiply plus a shift.
4715   if (VT.isSimple() && !VT.isVector()) {
4716     MVT Simple = VT.getSimpleVT();
4717     unsigned SimpleSize = Simple.getSizeInBits();
4718     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4719     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4720       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4721       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4722       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4723       // Compute the high part as N1.
4724       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4725             DAG.getConstant(SimpleSize, DL,
4726                             getShiftAmountTy(Lo.getValueType())));
4727       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4728       // Compute the low part as N0.
4729       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4730       return CombineTo(N, Lo, Hi);
4731     }
4732   }
4733 
4734   return SDValue();
4735 }
4736 
4737 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4738   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4739     return Res;
4740 
4741   EVT VT = N->getValueType(0);
4742   SDLoc DL(N);
4743 
4744   // (umul_lohi N0, 0) -> (0, 0)
4745   if (isNullConstant(N->getOperand(1))) {
4746     SDValue Zero = DAG.getConstant(0, DL, VT);
4747     return CombineTo(N, Zero, Zero);
4748   }
4749 
4750   // (umul_lohi N0, 1) -> (N0, 0)
4751   if (isOneConstant(N->getOperand(1))) {
4752     SDValue Zero = DAG.getConstant(0, DL, VT);
4753     return CombineTo(N, N->getOperand(0), Zero);
4754   }
4755 
4756   // If the type is twice as wide is legal, transform the mulhu to a wider
4757   // multiply plus a shift.
4758   if (VT.isSimple() && !VT.isVector()) {
4759     MVT Simple = VT.getSimpleVT();
4760     unsigned SimpleSize = Simple.getSizeInBits();
4761     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4762     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4763       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4764       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4765       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4766       // Compute the high part as N1.
4767       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4768             DAG.getConstant(SimpleSize, DL,
4769                             getShiftAmountTy(Lo.getValueType())));
4770       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4771       // Compute the low part as N0.
4772       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4773       return CombineTo(N, Lo, Hi);
4774     }
4775   }
4776 
4777   return SDValue();
4778 }
4779 
4780 SDValue DAGCombiner::visitMULO(SDNode *N) {
4781   SDValue N0 = N->getOperand(0);
4782   SDValue N1 = N->getOperand(1);
4783   EVT VT = N0.getValueType();
4784   bool IsSigned = (ISD::SMULO == N->getOpcode());
4785 
4786   EVT CarryVT = N->getValueType(1);
4787   SDLoc DL(N);
4788 
4789   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4790   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4791 
4792   // fold operation with constant operands.
4793   // TODO: Move this to FoldConstantArithmetic when it supports nodes with
4794   // multiple results.
4795   if (N0C && N1C) {
4796     bool Overflow;
4797     APInt Result =
4798         IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4799                  : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4800     return CombineTo(N, DAG.getConstant(Result, DL, VT),
4801                      DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
4802   }
4803 
4804   // canonicalize constant to RHS.
4805   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4806       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4807     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4808 
4809   // fold (mulo x, 0) -> 0 + no carry out
4810   if (isNullOrNullSplat(N1))
4811     return CombineTo(N, DAG.getConstant(0, DL, VT),
4812                      DAG.getConstant(0, DL, CarryVT));
4813 
4814   // (mulo x, 2) -> (addo x, x)
4815   if (N1C && N1C->getAPIntValue() == 2)
4816     return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4817                        N->getVTList(), N0, N0);
4818 
4819   if (IsSigned) {
4820     // A 1 bit SMULO overflows if both inputs are 1.
4821     if (VT.getScalarSizeInBits() == 1) {
4822       SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
4823       return CombineTo(N, And,
4824                        DAG.getSetCC(DL, CarryVT, And,
4825                                     DAG.getConstant(0, DL, VT), ISD::SETNE));
4826     }
4827 
4828     // Multiplying n * m significant bits yields a result of n + m significant
4829     // bits. If the total number of significant bits does not exceed the
4830     // result bit width (minus 1), there is no overflow.
4831     unsigned SignBits = DAG.ComputeNumSignBits(N0);
4832     if (SignBits > 1)
4833       SignBits += DAG.ComputeNumSignBits(N1);
4834     if (SignBits > VT.getScalarSizeInBits() + 1)
4835       return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4836                        DAG.getConstant(0, DL, CarryVT));
4837   } else {
4838     KnownBits N1Known = DAG.computeKnownBits(N1);
4839     KnownBits N0Known = DAG.computeKnownBits(N0);
4840     bool Overflow;
4841     (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4842     if (!Overflow)
4843       return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4844                        DAG.getConstant(0, DL, CarryVT));
4845   }
4846 
4847   return SDValue();
4848 }
4849 
4850 // Function to calculate whether the Min/Max pair of SDNodes (potentially
4851 // swapped around) make a signed saturate pattern, clamping to between a signed
4852 // saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
4853 // Returns the node being clamped and the bitwidth of the clamp in BW. Should
4854 // work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
4855 // same as SimplifySelectCC. N0<N1 ? N2 : N3.
4856 static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
4857                                   SDValue N3, ISD::CondCode CC, unsigned &BW,
4858                                   bool &Unsigned) {
4859   auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
4860                             ISD::CondCode CC) {
4861     // The compare and select operand should be the same or the select operands
4862     // should be truncated versions of the comparison.
4863     if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
4864       return 0;
4865     // The constants need to be the same or a truncated version of each other.
4866     ConstantSDNode *N1C = isConstOrConstSplat(N1);
4867     ConstantSDNode *N3C = isConstOrConstSplat(N3);
4868     if (!N1C || !N3C)
4869       return 0;
4870     const APInt &C1 = N1C->getAPIntValue();
4871     const APInt &C2 = N3C->getAPIntValue();
4872     if (C1.getBitWidth() < C2.getBitWidth() ||
4873         C1 != C2.sextOrSelf(C1.getBitWidth()))
4874       return 0;
4875     return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
4876   };
4877 
4878   // Check the initial value is a SMIN/SMAX equivalent.
4879   unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
4880   if (!Opcode0)
4881     return SDValue();
4882 
4883   SDValue N00, N01, N02, N03;
4884   ISD::CondCode N0CC;
4885   switch (N0.getOpcode()) {
4886   case ISD::SMIN:
4887   case ISD::SMAX:
4888     N00 = N02 = N0.getOperand(0);
4889     N01 = N03 = N0.getOperand(1);
4890     N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
4891     break;
4892   case ISD::SELECT_CC:
4893     N00 = N0.getOperand(0);
4894     N01 = N0.getOperand(1);
4895     N02 = N0.getOperand(2);
4896     N03 = N0.getOperand(3);
4897     N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
4898     break;
4899   case ISD::SELECT:
4900   case ISD::VSELECT:
4901     if (N0.getOperand(0).getOpcode() != ISD::SETCC)
4902       return SDValue();
4903     N00 = N0.getOperand(0).getOperand(0);
4904     N01 = N0.getOperand(0).getOperand(1);
4905     N02 = N0.getOperand(1);
4906     N03 = N0.getOperand(2);
4907     N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
4908     break;
4909   default:
4910     return SDValue();
4911   }
4912 
4913   unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
4914   if (!Opcode1 || Opcode0 == Opcode1)
4915     return SDValue();
4916 
4917   ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
4918   ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
4919   if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
4920     return SDValue();
4921 
4922   const APInt &MinC = MinCOp->getAPIntValue();
4923   const APInt &MaxC = MaxCOp->getAPIntValue();
4924   APInt MinCPlus1 = MinC + 1;
4925   if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
4926     BW = MinCPlus1.exactLogBase2() + 1;
4927     Unsigned = false;
4928     return N02;
4929   }
4930 
4931   if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
4932     BW = MinCPlus1.exactLogBase2();
4933     Unsigned = true;
4934     return N02;
4935   }
4936 
4937   return SDValue();
4938 }
4939 
4940 static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
4941                                            SDValue N3, ISD::CondCode CC,
4942                                            SelectionDAG &DAG) {
4943   unsigned BW;
4944   bool Unsigned;
4945   SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned);
4946   if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
4947     return SDValue();
4948   EVT FPVT = Fp.getOperand(0).getValueType();
4949   EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
4950   if (FPVT.isVector())
4951     NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
4952                              FPVT.getVectorElementCount());
4953   unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
4954   if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
4955     return SDValue();
4956   SDLoc DL(Fp);
4957   SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
4958                             DAG.getValueType(NewVT.getScalarType()));
4959   return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0))
4960                   : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
4961 }
4962 
4963 static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
4964                                          SDValue N3, ISD::CondCode CC,
4965                                          SelectionDAG &DAG) {
4966   // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
4967   // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
4968   // be truncated versions of the the setcc (N0/N1).
4969   if ((N0 != N2 &&
4970        (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
4971       N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
4972     return SDValue();
4973   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4974   ConstantSDNode *N3C = isConstOrConstSplat(N3);
4975   if (!N1C || !N3C)
4976     return SDValue();
4977   const APInt &C1 = N1C->getAPIntValue();
4978   const APInt &C3 = N3C->getAPIntValue();
4979   if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
4980       C1 != C3.zextOrSelf(C1.getBitWidth()))
4981     return SDValue();
4982 
4983   unsigned BW = (C1 + 1).exactLogBase2();
4984   EVT FPVT = N0.getOperand(0).getValueType();
4985   EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
4986   if (FPVT.isVector())
4987     NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
4988                              FPVT.getVectorElementCount());
4989   if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
4990                                                         FPVT, NewVT))
4991     return SDValue();
4992 
4993   SDValue Sat =
4994       DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
4995                   DAG.getValueType(NewVT.getScalarType()));
4996   return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
4997 }
4998 
4999 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5000   SDValue N0 = N->getOperand(0);
5001   SDValue N1 = N->getOperand(1);
5002   EVT VT = N0.getValueType();
5003   unsigned Opcode = N->getOpcode();
5004   SDLoc DL(N);
5005 
5006   // fold operation with constant operands.
5007   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5008     return C;
5009 
5010   // canonicalize constant to RHS
5011   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5012       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5013     return DAG.getNode(Opcode, DL, VT, N1, N0);
5014 
5015   // fold vector ops
5016   if (VT.isVector())
5017     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5018       return FoldedVOp;
5019 
5020   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5021   // Only do this if the current op isn't legal and the flipped is.
5022   if (!TLI.isOperationLegal(Opcode, VT) &&
5023       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5024       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5025     unsigned AltOpcode;
5026     switch (Opcode) {
5027     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5028     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5029     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5030     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5031     default: llvm_unreachable("Unknown MINMAX opcode");
5032     }
5033     if (TLI.isOperationLegal(AltOpcode, VT))
5034       return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5035   }
5036 
5037   if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5038     if (SDValue S = PerformMinMaxFpToSatCombine(
5039             N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5040       return S;
5041   if (Opcode == ISD::UMIN)
5042     if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5043       return S;
5044 
5045   // Simplify the operands using demanded-bits information.
5046   if (SimplifyDemandedBits(SDValue(N, 0)))
5047     return SDValue(N, 0);
5048 
5049   return SDValue();
5050 }
5051 
5052 /// If this is a bitwise logic instruction and both operands have the same
5053 /// opcode, try to sink the other opcode after the logic instruction.
5054 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5055   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5056   EVT VT = N0.getValueType();
5057   unsigned LogicOpcode = N->getOpcode();
5058   unsigned HandOpcode = N0.getOpcode();
5059   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
5060           LogicOpcode == ISD::XOR) && "Expected logic opcode");
5061   assert(HandOpcode == N1.getOpcode() && "Bad input!");
5062 
5063   // Bail early if none of these transforms apply.
5064   if (N0.getNumOperands() == 0)
5065     return SDValue();
5066 
5067   // FIXME: We should check number of uses of the operands to not increase
5068   //        the instruction count for all transforms.
5069 
5070   // Handle size-changing casts.
5071   SDValue X = N0.getOperand(0);
5072   SDValue Y = N1.getOperand(0);
5073   EVT XVT = X.getValueType();
5074   SDLoc DL(N);
5075   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
5076       HandOpcode == ISD::SIGN_EXTEND) {
5077     // If both operands have other uses, this transform would create extra
5078     // instructions without eliminating anything.
5079     if (!N0.hasOneUse() && !N1.hasOneUse())
5080       return SDValue();
5081     // We need matching integer source types.
5082     if (XVT != Y.getValueType())
5083       return SDValue();
5084     // Don't create an illegal op during or after legalization. Don't ever
5085     // create an unsupported vector op.
5086     if ((VT.isVector() || LegalOperations) &&
5087         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5088       return SDValue();
5089     // Avoid infinite looping with PromoteIntBinOp.
5090     // TODO: Should we apply desirable/legal constraints to all opcodes?
5091     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
5092         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5093       return SDValue();
5094     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5095     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5096     return DAG.getNode(HandOpcode, DL, VT, Logic);
5097   }
5098 
5099   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5100   if (HandOpcode == ISD::TRUNCATE) {
5101     // If both operands have other uses, this transform would create extra
5102     // instructions without eliminating anything.
5103     if (!N0.hasOneUse() && !N1.hasOneUse())
5104       return SDValue();
5105     // We need matching source types.
5106     if (XVT != Y.getValueType())
5107       return SDValue();
5108     // Don't create an illegal op during or after legalization.
5109     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5110       return SDValue();
5111     // Be extra careful sinking truncate. If it's free, there's no benefit in
5112     // widening a binop. Also, don't create a logic op on an illegal type.
5113     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5114       return SDValue();
5115     if (!TLI.isTypeLegal(XVT))
5116       return SDValue();
5117     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5118     return DAG.getNode(HandOpcode, DL, VT, Logic);
5119   }
5120 
5121   // For binops SHL/SRL/SRA/AND:
5122   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5123   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5124        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5125       N0.getOperand(1) == N1.getOperand(1)) {
5126     // If either operand has other uses, this transform is not an improvement.
5127     if (!N0.hasOneUse() || !N1.hasOneUse())
5128       return SDValue();
5129     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5130     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5131   }
5132 
5133   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5134   if (HandOpcode == ISD::BSWAP) {
5135     // If either operand has other uses, this transform is not an improvement.
5136     if (!N0.hasOneUse() || !N1.hasOneUse())
5137       return SDValue();
5138     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5139     return DAG.getNode(HandOpcode, DL, VT, Logic);
5140   }
5141 
5142   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5143   // Only perform this optimization up until type legalization, before
5144   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5145   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5146   // we don't want to undo this promotion.
5147   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5148   // on scalars.
5149   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5150        Level <= AfterLegalizeTypes) {
5151     // Input types must be integer and the same.
5152     if (XVT.isInteger() && XVT == Y.getValueType() &&
5153         !(VT.isVector() && TLI.isTypeLegal(VT) &&
5154           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5155       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5156       return DAG.getNode(HandOpcode, DL, VT, Logic);
5157     }
5158   }
5159 
5160   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5161   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5162   // If both shuffles use the same mask, and both shuffle within a single
5163   // vector, then it is worthwhile to move the swizzle after the operation.
5164   // The type-legalizer generates this pattern when loading illegal
5165   // vector types from memory. In many cases this allows additional shuffle
5166   // optimizations.
5167   // There are other cases where moving the shuffle after the xor/and/or
5168   // is profitable even if shuffles don't perform a swizzle.
5169   // If both shuffles use the same mask, and both shuffles have the same first
5170   // or second operand, then it might still be profitable to move the shuffle
5171   // after the xor/and/or operation.
5172   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5173     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5174     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5175     assert(X.getValueType() == Y.getValueType() &&
5176            "Inputs to shuffles are not the same type");
5177 
5178     // Check that both shuffles use the same mask. The masks are known to be of
5179     // the same length because the result vector type is the same.
5180     // Check also that shuffles have only one use to avoid introducing extra
5181     // instructions.
5182     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5183         !SVN0->getMask().equals(SVN1->getMask()))
5184       return SDValue();
5185 
5186     // Don't try to fold this node if it requires introducing a
5187     // build vector of all zeros that might be illegal at this stage.
5188     SDValue ShOp = N0.getOperand(1);
5189     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5190       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5191 
5192     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5193     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5194       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5195                                   N0.getOperand(0), N1.getOperand(0));
5196       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5197     }
5198 
5199     // Don't try to fold this node if it requires introducing a
5200     // build vector of all zeros that might be illegal at this stage.
5201     ShOp = N0.getOperand(0);
5202     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5203       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5204 
5205     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5206     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5207       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5208                                   N1.getOperand(1));
5209       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5210     }
5211   }
5212 
5213   return SDValue();
5214 }
5215 
5216 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5217 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5218                                        const SDLoc &DL) {
5219   SDValue LL, LR, RL, RR, N0CC, N1CC;
5220   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5221       !isSetCCEquivalent(N1, RL, RR, N1CC))
5222     return SDValue();
5223 
5224   assert(N0.getValueType() == N1.getValueType() &&
5225          "Unexpected operand types for bitwise logic op");
5226   assert(LL.getValueType() == LR.getValueType() &&
5227          RL.getValueType() == RR.getValueType() &&
5228          "Unexpected operand types for setcc");
5229 
5230   // If we're here post-legalization or the logic op type is not i1, the logic
5231   // op type must match a setcc result type. Also, all folds require new
5232   // operations on the left and right operands, so those types must match.
5233   EVT VT = N0.getValueType();
5234   EVT OpVT = LL.getValueType();
5235   if (LegalOperations || VT.getScalarType() != MVT::i1)
5236     if (VT != getSetCCResultType(OpVT))
5237       return SDValue();
5238   if (OpVT != RL.getValueType())
5239     return SDValue();
5240 
5241   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5242   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5243   bool IsInteger = OpVT.isInteger();
5244   if (LR == RR && CC0 == CC1 && IsInteger) {
5245     bool IsZero = isNullOrNullSplat(LR);
5246     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5247 
5248     // All bits clear?
5249     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5250     // All sign bits clear?
5251     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5252     // Any bits set?
5253     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5254     // Any sign bits set?
5255     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5256 
5257     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
5258     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5259     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
5260     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
5261     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5262       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5263       AddToWorklist(Or.getNode());
5264       return DAG.getSetCC(DL, VT, Or, LR, CC1);
5265     }
5266 
5267     // All bits set?
5268     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5269     // All sign bits set?
5270     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5271     // Any bits clear?
5272     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5273     // Any sign bits clear?
5274     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5275 
5276     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5277     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
5278     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5279     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
5280     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5281       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5282       AddToWorklist(And.getNode());
5283       return DAG.getSetCC(DL, VT, And, LR, CC1);
5284     }
5285   }
5286 
5287   // TODO: What is the 'or' equivalent of this fold?
5288   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5289   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5290       IsInteger && CC0 == ISD::SETNE &&
5291       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5292        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5293     SDValue One = DAG.getConstant(1, DL, OpVT);
5294     SDValue Two = DAG.getConstant(2, DL, OpVT);
5295     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5296     AddToWorklist(Add.getNode());
5297     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5298   }
5299 
5300   // Try more general transforms if the predicates match and the only user of
5301   // the compares is the 'and' or 'or'.
5302   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5303       N0.hasOneUse() && N1.hasOneUse()) {
5304     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5305     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5306     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5307       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5308       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5309       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5310       SDValue Zero = DAG.getConstant(0, DL, OpVT);
5311       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5312     }
5313 
5314     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5315     // TODO - support non-uniform vector amounts.
5316     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5317       // Match a shared variable operand and 2 non-opaque constant operands.
5318       ConstantSDNode *C0 = isConstOrConstSplat(LR);
5319       ConstantSDNode *C1 = isConstOrConstSplat(RR);
5320       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
5321         const APInt &CMax =
5322             APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5323         const APInt &CMin =
5324             APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5325         // The difference of the constants must be a single bit.
5326         if ((CMax - CMin).isPowerOf2()) {
5327           // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5328           // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5329           SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5330           SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5331           SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5332           SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5333           SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5334           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5335           SDValue Zero = DAG.getConstant(0, DL, OpVT);
5336           return DAG.getSetCC(DL, VT, And, Zero, CC0);
5337         }
5338       }
5339     }
5340   }
5341 
5342   // Canonicalize equivalent operands to LL == RL.
5343   if (LL == RR && LR == RL) {
5344     CC1 = ISD::getSetCCSwappedOperands(CC1);
5345     std::swap(RL, RR);
5346   }
5347 
5348   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5349   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5350   if (LL == RL && LR == RR) {
5351     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5352                                 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5353     if (NewCC != ISD::SETCC_INVALID &&
5354         (!LegalOperations ||
5355          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5356           TLI.isOperationLegal(ISD::SETCC, OpVT))))
5357       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5358   }
5359 
5360   return SDValue();
5361 }
5362 
5363 /// This contains all DAGCombine rules which reduce two values combined by
5364 /// an And operation to a single value. This makes them reusable in the context
5365 /// of visitSELECT(). Rules involving constants are not included as
5366 /// visitSELECT() already handles those cases.
5367 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5368   EVT VT = N1.getValueType();
5369   SDLoc DL(N);
5370 
5371   // fold (and x, undef) -> 0
5372   if (N0.isUndef() || N1.isUndef())
5373     return DAG.getConstant(0, DL, VT);
5374 
5375   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5376     return V;
5377 
5378   // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
5379   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5380       VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
5381     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5382       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5383         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5384         // immediate for an add, but it is legal if its top c2 bits are set,
5385         // transform the ADD so the immediate doesn't need to be materialized
5386         // in a register.
5387         APInt ADDC = ADDI->getAPIntValue();
5388         APInt SRLC = SRLI->getAPIntValue();
5389         if (ADDC.getMinSignedBits() <= 64 &&
5390             SRLC.ult(VT.getSizeInBits()) &&
5391             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5392           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
5393                                              SRLC.getZExtValue());
5394           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5395             ADDC |= Mask;
5396             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5397               SDLoc DL0(N0);
5398               SDValue NewAdd =
5399                 DAG.getNode(ISD::ADD, DL0, VT,
5400                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5401               CombineTo(N0.getNode(), NewAdd);
5402               // Return N so it doesn't get rechecked!
5403               return SDValue(N, 0);
5404             }
5405           }
5406         }
5407       }
5408     }
5409   }
5410 
5411   // Reduce bit extract of low half of an integer to the narrower type.
5412   // (and (srl i64:x, K), KMask) ->
5413   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5414   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5415     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5416       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5417         unsigned Size = VT.getSizeInBits();
5418         const APInt &AndMask = CAnd->getAPIntValue();
5419         unsigned ShiftBits = CShift->getZExtValue();
5420 
5421         // Bail out, this node will probably disappear anyway.
5422         if (ShiftBits == 0)
5423           return SDValue();
5424 
5425         unsigned MaskBits = AndMask.countTrailingOnes();
5426         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5427 
5428         if (AndMask.isMask() &&
5429             // Required bits must not span the two halves of the integer and
5430             // must fit in the half size type.
5431             (ShiftBits + MaskBits <= Size / 2) &&
5432             TLI.isNarrowingProfitable(VT, HalfVT) &&
5433             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5434             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5435             TLI.isTruncateFree(VT, HalfVT) &&
5436             TLI.isZExtFree(HalfVT, VT)) {
5437           // The isNarrowingProfitable is to avoid regressions on PPC and
5438           // AArch64 which match a few 64-bit bit insert / bit extract patterns
5439           // on downstream users of this. Those patterns could probably be
5440           // extended to handle extensions mixed in.
5441 
5442           SDValue SL(N0);
5443           assert(MaskBits <= Size);
5444 
5445           // Extracting the highest bit of the low half.
5446           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5447           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5448                                       N0.getOperand(0));
5449 
5450           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5451           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5452           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5453           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5454           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5455         }
5456       }
5457     }
5458   }
5459 
5460   return SDValue();
5461 }
5462 
5463 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5464                                    EVT LoadResultTy, EVT &ExtVT) {
5465   if (!AndC->getAPIntValue().isMask())
5466     return false;
5467 
5468   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5469 
5470   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5471   EVT LoadedVT = LoadN->getMemoryVT();
5472 
5473   if (ExtVT == LoadedVT &&
5474       (!LegalOperations ||
5475        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5476     // ZEXTLOAD will match without needing to change the size of the value being
5477     // loaded.
5478     return true;
5479   }
5480 
5481   // Do not change the width of a volatile or atomic loads.
5482   if (!LoadN->isSimple())
5483     return false;
5484 
5485   // Do not generate loads of non-round integer types since these can
5486   // be expensive (and would be wrong if the type is not byte sized).
5487   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5488     return false;
5489 
5490   if (LegalOperations &&
5491       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5492     return false;
5493 
5494   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5495     return false;
5496 
5497   return true;
5498 }
5499 
5500 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5501                                     ISD::LoadExtType ExtType, EVT &MemVT,
5502                                     unsigned ShAmt) {
5503   if (!LDST)
5504     return false;
5505   // Only allow byte offsets.
5506   if (ShAmt % 8)
5507     return false;
5508 
5509   // Do not generate loads of non-round integer types since these can
5510   // be expensive (and would be wrong if the type is not byte sized).
5511   if (!MemVT.isRound())
5512     return false;
5513 
5514   // Don't change the width of a volatile or atomic loads.
5515   if (!LDST->isSimple())
5516     return false;
5517 
5518   EVT LdStMemVT = LDST->getMemoryVT();
5519 
5520   // Bail out when changing the scalable property, since we can't be sure that
5521   // we're actually narrowing here.
5522   if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5523     return false;
5524 
5525   // Verify that we are actually reducing a load width here.
5526   if (LdStMemVT.bitsLT(MemVT))
5527     return false;
5528 
5529   // Ensure that this isn't going to produce an unsupported memory access.
5530   if (ShAmt) {
5531     assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
5532     const unsigned ByteShAmt = ShAmt / 8;
5533     const Align LDSTAlign = LDST->getAlign();
5534     const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5535     if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5536                                 LDST->getAddressSpace(), NarrowAlign,
5537                                 LDST->getMemOperand()->getFlags()))
5538       return false;
5539   }
5540 
5541   // It's not possible to generate a constant of extended or untyped type.
5542   EVT PtrType = LDST->getBasePtr().getValueType();
5543   if (PtrType == MVT::Untyped || PtrType.isExtended())
5544     return false;
5545 
5546   if (isa<LoadSDNode>(LDST)) {
5547     LoadSDNode *Load = cast<LoadSDNode>(LDST);
5548     // Don't transform one with multiple uses, this would require adding a new
5549     // load.
5550     if (!SDValue(Load, 0).hasOneUse())
5551       return false;
5552 
5553     if (LegalOperations &&
5554         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5555       return false;
5556 
5557     // For the transform to be legal, the load must produce only two values
5558     // (the value loaded and the chain).  Don't transform a pre-increment
5559     // load, for example, which produces an extra value.  Otherwise the
5560     // transformation is not equivalent, and the downstream logic to replace
5561     // uses gets things wrong.
5562     if (Load->getNumValues() > 2)
5563       return false;
5564 
5565     // If the load that we're shrinking is an extload and we're not just
5566     // discarding the extension we can't simply shrink the load. Bail.
5567     // TODO: It would be possible to merge the extensions in some cases.
5568     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5569         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5570       return false;
5571 
5572     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5573       return false;
5574   } else {
5575     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
5576     StoreSDNode *Store = cast<StoreSDNode>(LDST);
5577     // Can't write outside the original store
5578     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5579       return false;
5580 
5581     if (LegalOperations &&
5582         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5583       return false;
5584   }
5585   return true;
5586 }
5587 
5588 bool DAGCombiner::SearchForAndLoads(SDNode *N,
5589                                     SmallVectorImpl<LoadSDNode*> &Loads,
5590                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5591                                     ConstantSDNode *Mask,
5592                                     SDNode *&NodeToMask) {
5593   // Recursively search for the operands, looking for loads which can be
5594   // narrowed.
5595   for (SDValue Op : N->op_values()) {
5596     if (Op.getValueType().isVector())
5597       return false;
5598 
5599     // Some constants may need fixing up later if they are too large.
5600     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5601       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5602           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5603         NodesWithConsts.insert(N);
5604       continue;
5605     }
5606 
5607     if (!Op.hasOneUse())
5608       return false;
5609 
5610     switch(Op.getOpcode()) {
5611     case ISD::LOAD: {
5612       auto *Load = cast<LoadSDNode>(Op);
5613       EVT ExtVT;
5614       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5615           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5616 
5617         // ZEXTLOAD is already small enough.
5618         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5619             ExtVT.bitsGE(Load->getMemoryVT()))
5620           continue;
5621 
5622         // Use LE to convert equal sized loads to zext.
5623         if (ExtVT.bitsLE(Load->getMemoryVT()))
5624           Loads.push_back(Load);
5625 
5626         continue;
5627       }
5628       return false;
5629     }
5630     case ISD::ZERO_EXTEND:
5631     case ISD::AssertZext: {
5632       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5633       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5634       EVT VT = Op.getOpcode() == ISD::AssertZext ?
5635         cast<VTSDNode>(Op.getOperand(1))->getVT() :
5636         Op.getOperand(0).getValueType();
5637 
5638       // We can accept extending nodes if the mask is wider or an equal
5639       // width to the original type.
5640       if (ExtVT.bitsGE(VT))
5641         continue;
5642       break;
5643     }
5644     case ISD::OR:
5645     case ISD::XOR:
5646     case ISD::AND:
5647       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5648                              NodeToMask))
5649         return false;
5650       continue;
5651     }
5652 
5653     // Allow one node which will masked along with any loads found.
5654     if (NodeToMask)
5655       return false;
5656 
5657     // Also ensure that the node to be masked only produces one data result.
5658     NodeToMask = Op.getNode();
5659     if (NodeToMask->getNumValues() > 1) {
5660       bool HasValue = false;
5661       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5662         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5663         if (VT != MVT::Glue && VT != MVT::Other) {
5664           if (HasValue) {
5665             NodeToMask = nullptr;
5666             return false;
5667           }
5668           HasValue = true;
5669         }
5670       }
5671       assert(HasValue && "Node to be masked has no data result?");
5672     }
5673   }
5674   return true;
5675 }
5676 
5677 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5678   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5679   if (!Mask)
5680     return false;
5681 
5682   if (!Mask->getAPIntValue().isMask())
5683     return false;
5684 
5685   // No need to do anything if the and directly uses a load.
5686   if (isa<LoadSDNode>(N->getOperand(0)))
5687     return false;
5688 
5689   SmallVector<LoadSDNode*, 8> Loads;
5690   SmallPtrSet<SDNode*, 2> NodesWithConsts;
5691   SDNode *FixupNode = nullptr;
5692   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5693     if (Loads.size() == 0)
5694       return false;
5695 
5696     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5697     SDValue MaskOp = N->getOperand(1);
5698 
5699     // If it exists, fixup the single node we allow in the tree that needs
5700     // masking.
5701     if (FixupNode) {
5702       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5703       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5704                                 FixupNode->getValueType(0),
5705                                 SDValue(FixupNode, 0), MaskOp);
5706       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5707       if (And.getOpcode() == ISD ::AND)
5708         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5709     }
5710 
5711     // Narrow any constants that need it.
5712     for (auto *LogicN : NodesWithConsts) {
5713       SDValue Op0 = LogicN->getOperand(0);
5714       SDValue Op1 = LogicN->getOperand(1);
5715 
5716       if (isa<ConstantSDNode>(Op0))
5717           std::swap(Op0, Op1);
5718 
5719       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5720                                 Op1, MaskOp);
5721 
5722       DAG.UpdateNodeOperands(LogicN, Op0, And);
5723     }
5724 
5725     // Create narrow loads.
5726     for (auto *Load : Loads) {
5727       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5728       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5729                                 SDValue(Load, 0), MaskOp);
5730       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5731       if (And.getOpcode() == ISD ::AND)
5732         And = SDValue(
5733             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5734       SDValue NewLoad = reduceLoadWidth(And.getNode());
5735       assert(NewLoad &&
5736              "Shouldn't be masking the load if it can't be narrowed");
5737       CombineTo(Load, NewLoad, NewLoad.getValue(1));
5738     }
5739     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5740     return true;
5741   }
5742   return false;
5743 }
5744 
5745 // Unfold
5746 //    x &  (-1 'logical shift' y)
5747 // To
5748 //    (x 'opposite logical shift' y) 'logical shift' y
5749 // if it is better for performance.
5750 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5751   assert(N->getOpcode() == ISD::AND);
5752 
5753   SDValue N0 = N->getOperand(0);
5754   SDValue N1 = N->getOperand(1);
5755 
5756   // Do we actually prefer shifts over mask?
5757   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5758     return SDValue();
5759 
5760   // Try to match  (-1 '[outer] logical shift' y)
5761   unsigned OuterShift;
5762   unsigned InnerShift; // The opposite direction to the OuterShift.
5763   SDValue Y;           // Shift amount.
5764   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5765     if (!M.hasOneUse())
5766       return false;
5767     OuterShift = M->getOpcode();
5768     if (OuterShift == ISD::SHL)
5769       InnerShift = ISD::SRL;
5770     else if (OuterShift == ISD::SRL)
5771       InnerShift = ISD::SHL;
5772     else
5773       return false;
5774     if (!isAllOnesConstant(M->getOperand(0)))
5775       return false;
5776     Y = M->getOperand(1);
5777     return true;
5778   };
5779 
5780   SDValue X;
5781   if (matchMask(N1))
5782     X = N0;
5783   else if (matchMask(N0))
5784     X = N1;
5785   else
5786     return SDValue();
5787 
5788   SDLoc DL(N);
5789   EVT VT = N->getValueType(0);
5790 
5791   //     tmp = x   'opposite logical shift' y
5792   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5793   //     ret = tmp 'logical shift' y
5794   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5795 
5796   return T1;
5797 }
5798 
5799 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5800 /// For a target with a bit test, this is expected to become test + set and save
5801 /// at least 1 instruction.
5802 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5803   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5804 
5805   // This is probably not worthwhile without a supported type.
5806   EVT VT = And->getValueType(0);
5807   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5808   if (!TLI.isTypeLegal(VT))
5809     return SDValue();
5810 
5811   // Look through an optional extension and find a 'not'.
5812   // TODO: Should we favor test+set even without the 'not' op?
5813   SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5814   if (Not.getOpcode() == ISD::ANY_EXTEND)
5815     Not = Not.getOperand(0);
5816   if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5817     return SDValue();
5818 
5819   // Look though an optional truncation. The source operand may not be the same
5820   // type as the original 'and', but that is ok because we are masking off
5821   // everything but the low bit.
5822   SDValue Srl = Not.getOperand(0);
5823   if (Srl.getOpcode() == ISD::TRUNCATE)
5824     Srl = Srl.getOperand(0);
5825 
5826   // Match a shift-right by constant.
5827   if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5828       !isa<ConstantSDNode>(Srl.getOperand(1)))
5829     return SDValue();
5830 
5831   // We might have looked through casts that make this transform invalid.
5832   // TODO: If the source type is wider than the result type, do the mask and
5833   //       compare in the source type.
5834   const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5835   unsigned VTBitWidth = VT.getSizeInBits();
5836   if (ShiftAmt.uge(VTBitWidth))
5837     return SDValue();
5838 
5839   // Turn this into a bit-test pattern using mask op + setcc:
5840   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5841   SDLoc DL(And);
5842   SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5843   EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5844   SDValue Mask = DAG.getConstant(
5845       APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5846   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5847   SDValue Zero = DAG.getConstant(0, DL, VT);
5848   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5849   return DAG.getZExtOrTrunc(Setcc, DL, VT);
5850 }
5851 
5852 /// For targets that support usubsat, match a bit-hack form of that operation
5853 /// that ends in 'and' and convert it.
5854 static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
5855   SDValue N0 = N->getOperand(0);
5856   SDValue N1 = N->getOperand(1);
5857   EVT VT = N1.getValueType();
5858 
5859   // Canonicalize SRA as operand 1.
5860   if (N0.getOpcode() == ISD::SRA)
5861     std::swap(N0, N1);
5862 
5863   // xor/add with SMIN (signmask) are logically equivalent.
5864   if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
5865     return SDValue();
5866 
5867   if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
5868       N0.getOperand(0) != N1.getOperand(0))
5869     return SDValue();
5870 
5871   unsigned BitWidth = VT.getScalarSizeInBits();
5872   ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
5873   ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
5874   if (!XorC || !XorC->getAPIntValue().isSignMask() ||
5875       !SraC || SraC->getAPIntValue() != BitWidth - 1)
5876     return SDValue();
5877 
5878   // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
5879   // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
5880   SDLoc DL(N);
5881   SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
5882   return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
5883 }
5884 
5885 SDValue DAGCombiner::visitAND(SDNode *N) {
5886   SDValue N0 = N->getOperand(0);
5887   SDValue N1 = N->getOperand(1);
5888   EVT VT = N1.getValueType();
5889 
5890   // x & x --> x
5891   if (N0 == N1)
5892     return N0;
5893 
5894   // fold (and c1, c2) -> c1&c2
5895   if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5896     return C;
5897 
5898   // canonicalize constant to RHS
5899   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5900       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5901     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5902 
5903   // fold vector ops
5904   if (VT.isVector()) {
5905     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
5906       return FoldedVOp;
5907 
5908     // fold (and x, 0) -> 0, vector edition
5909     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
5910       // do not return N1, because undef node may exist in N1
5911       return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
5912                              SDLoc(N), N1.getValueType());
5913 
5914     // fold (and x, -1) -> x, vector edition
5915     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
5916       return N0;
5917 
5918     // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
5919     auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
5920     auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
5921     if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
5922         N0.hasOneUse() && N1.hasOneUse()) {
5923       EVT LoadVT = MLoad->getMemoryVT();
5924       EVT ExtVT = VT;
5925       if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
5926         // For this AND to be a zero extension of the masked load the elements
5927         // of the BuildVec must mask the bottom bits of the extended element
5928         // type
5929         if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
5930           uint64_t ElementSize =
5931               LoadVT.getVectorElementType().getScalarSizeInBits();
5932           if (Splat->getAPIntValue().isMask(ElementSize)) {
5933             return DAG.getMaskedLoad(
5934                 ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
5935                 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
5936                 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
5937                 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
5938           }
5939         }
5940       }
5941     }
5942   }
5943 
5944   // fold (and x, -1) -> x
5945   if (isAllOnesConstant(N1))
5946     return N0;
5947 
5948   // if (and x, c) is known to be zero, return 0
5949   unsigned BitWidth = VT.getScalarSizeInBits();
5950   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5951   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
5952     return DAG.getConstant(0, SDLoc(N), VT);
5953 
5954   if (SDValue NewSel = foldBinOpIntoSelect(N))
5955     return NewSel;
5956 
5957   // reassociate and
5958   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5959     return RAND;
5960 
5961   // Try to convert a constant mask AND into a shuffle clear mask.
5962   if (VT.isVector())
5963     if (SDValue Shuffle = XformToShuffleWithZero(N))
5964       return Shuffle;
5965 
5966   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
5967     return Combined;
5968 
5969   // fold (and (or x, C), D) -> D if (C & D) == D
5970   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5971     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5972   };
5973   if (N0.getOpcode() == ISD::OR &&
5974       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5975     return N1;
5976   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5977   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5978     SDValue N0Op0 = N0.getOperand(0);
5979     APInt Mask = ~N1C->getAPIntValue();
5980     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5981     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5982       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5983                                  N0.getValueType(), N0Op0);
5984 
5985       // Replace uses of the AND with uses of the Zero extend node.
5986       CombineTo(N, Zext);
5987 
5988       // We actually want to replace all uses of the any_extend with the
5989       // zero_extend, to avoid duplicating things.  This will later cause this
5990       // AND to be folded.
5991       CombineTo(N0.getNode(), Zext);
5992       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5993     }
5994   }
5995 
5996   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5997   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5998   // already be zero by virtue of the width of the base type of the load.
5999   //
6000   // the 'X' node here can either be nothing or an extract_vector_elt to catch
6001   // more cases.
6002   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6003        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
6004        N0.getOperand(0).getOpcode() == ISD::LOAD &&
6005        N0.getOperand(0).getResNo() == 0) ||
6006       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
6007     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
6008                                          N0 : N0.getOperand(0) );
6009 
6010     // Get the constant (if applicable) the zero'th operand is being ANDed with.
6011     // This can be a pure constant or a vector splat, in which case we treat the
6012     // vector as a scalar and use the splat value.
6013     APInt Constant = APInt::getZero(1);
6014     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
6015       Constant = C->getAPIntValue();
6016     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
6017       APInt SplatValue, SplatUndef;
6018       unsigned SplatBitSize;
6019       bool HasAnyUndefs;
6020       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
6021                                              SplatBitSize, HasAnyUndefs);
6022       if (IsSplat) {
6023         // Undef bits can contribute to a possible optimisation if set, so
6024         // set them.
6025         SplatValue |= SplatUndef;
6026 
6027         // The splat value may be something like "0x00FFFFFF", which means 0 for
6028         // the first vector value and FF for the rest, repeating. We need a mask
6029         // that will apply equally to all members of the vector, so AND all the
6030         // lanes of the constant together.
6031         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
6032 
6033         // If the splat value has been compressed to a bitlength lower
6034         // than the size of the vector lane, we need to re-expand it to
6035         // the lane size.
6036         if (EltBitWidth > SplatBitSize)
6037           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
6038                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
6039             SplatValue |= SplatValue.shl(SplatBitSize);
6040 
6041         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
6042         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
6043         if ((SplatBitSize % EltBitWidth) == 0) {
6044           Constant = APInt::getAllOnes(EltBitWidth);
6045           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
6046             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
6047         }
6048       }
6049     }
6050 
6051     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
6052     // actually legal and isn't going to get expanded, else this is a false
6053     // optimisation.
6054     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
6055                                                     Load->getValueType(0),
6056                                                     Load->getMemoryVT());
6057 
6058     // Resize the constant to the same size as the original memory access before
6059     // extension. If it is still the AllOnesValue then this AND is completely
6060     // unneeded.
6061     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
6062 
6063     bool B;
6064     switch (Load->getExtensionType()) {
6065     default: B = false; break;
6066     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
6067     case ISD::ZEXTLOAD:
6068     case ISD::NON_EXTLOAD: B = true; break;
6069     }
6070 
6071     if (B && Constant.isAllOnes()) {
6072       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
6073       // preserve semantics once we get rid of the AND.
6074       SDValue NewLoad(Load, 0);
6075 
6076       // Fold the AND away. NewLoad may get replaced immediately.
6077       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
6078 
6079       if (Load->getExtensionType() == ISD::EXTLOAD) {
6080         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
6081                               Load->getValueType(0), SDLoc(Load),
6082                               Load->getChain(), Load->getBasePtr(),
6083                               Load->getOffset(), Load->getMemoryVT(),
6084                               Load->getMemOperand());
6085         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
6086         if (Load->getNumValues() == 3) {
6087           // PRE/POST_INC loads have 3 values.
6088           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
6089                            NewLoad.getValue(2) };
6090           CombineTo(Load, To, 3, true);
6091         } else {
6092           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
6093         }
6094       }
6095 
6096       return SDValue(N, 0); // Return N so it doesn't get rechecked!
6097     }
6098   }
6099 
6100   // fold (and (masked_gather x)) -> (zext_masked_gather x)
6101   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
6102     EVT MemVT = GN0->getMemoryVT();
6103     EVT ScalarVT = MemVT.getScalarType();
6104 
6105     if (SDValue(GN0, 0).hasOneUse() &&
6106         isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
6107         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
6108       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
6109                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
6110 
6111       SDValue ZExtLoad = DAG.getMaskedGather(
6112           DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
6113           GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
6114 
6115       CombineTo(N, ZExtLoad);
6116       AddToWorklist(ZExtLoad.getNode());
6117       // Avoid recheck of N.
6118       return SDValue(N, 0);
6119     }
6120   }
6121 
6122   // fold (and (load x), 255) -> (zextload x, i8)
6123   // fold (and (extload x, i16), 255) -> (zextload x, i8)
6124   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
6125   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
6126                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
6127                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
6128     if (SDValue Res = reduceLoadWidth(N)) {
6129       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
6130         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
6131       AddToWorklist(N);
6132       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
6133       return SDValue(N, 0);
6134     }
6135   }
6136 
6137   if (LegalTypes) {
6138     // Attempt to propagate the AND back up to the leaves which, if they're
6139     // loads, can be combined to narrow loads and the AND node can be removed.
6140     // Perform after legalization so that extend nodes will already be
6141     // combined into the loads.
6142     if (BackwardsPropagateMask(N))
6143       return SDValue(N, 0);
6144   }
6145 
6146   if (SDValue Combined = visitANDLike(N0, N1, N))
6147     return Combined;
6148 
6149   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
6150   if (N0.getOpcode() == N1.getOpcode())
6151     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6152       return V;
6153 
6154   // Masking the negated extension of a boolean is just the zero-extended
6155   // boolean:
6156   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
6157   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
6158   //
6159   // Note: the SimplifyDemandedBits fold below can make an information-losing
6160   // transform, and then we have no way to find this better fold.
6161   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
6162     if (isNullOrNullSplat(N0.getOperand(0))) {
6163       SDValue SubRHS = N0.getOperand(1);
6164       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
6165           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6166         return SubRHS;
6167       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
6168           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6169         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
6170     }
6171   }
6172 
6173   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
6174   // fold (and (sra)) -> (and (srl)) when possible.
6175   if (SimplifyDemandedBits(SDValue(N, 0)))
6176     return SDValue(N, 0);
6177 
6178   // fold (zext_inreg (extload x)) -> (zextload x)
6179   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
6180   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
6181       (ISD::isEXTLoad(N0.getNode()) ||
6182        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
6183     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6184     EVT MemVT = LN0->getMemoryVT();
6185     // If we zero all the possible extended bits, then we can turn this into
6186     // a zextload if we are running before legalize or the operation is legal.
6187     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
6188     unsigned MemBitSize = MemVT.getScalarSizeInBits();
6189     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
6190     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
6191         ((!LegalOperations && LN0->isSimple()) ||
6192          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
6193       SDValue ExtLoad =
6194           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
6195                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
6196       AddToWorklist(N);
6197       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
6198       return SDValue(N, 0); // Return N so it doesn't get rechecked!
6199     }
6200   }
6201 
6202   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
6203   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
6204     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
6205                                            N0.getOperand(1), false))
6206       return BSwap;
6207   }
6208 
6209   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
6210     return Shifts;
6211 
6212   if (TLI.hasBitTest(N0, N1))
6213     if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
6214       return V;
6215 
6216   // Recognize the following pattern:
6217   //
6218   // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
6219   //
6220   // where bitmask is a mask that clears the upper bits of AndVT. The
6221   // number of bits in bitmask must be a power of two.
6222   auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
6223     if (LHS->getOpcode() != ISD::SIGN_EXTEND)
6224       return false;
6225 
6226     auto *C = dyn_cast<ConstantSDNode>(RHS);
6227     if (!C)
6228       return false;
6229 
6230     if (!C->getAPIntValue().isMask(
6231             LHS.getOperand(0).getValueType().getFixedSizeInBits()))
6232       return false;
6233 
6234     return true;
6235   };
6236 
6237   // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
6238   if (IsAndZeroExtMask(N0, N1))
6239     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
6240 
6241   if (hasOperation(ISD::USUBSAT, VT))
6242     if (SDValue V = foldAndToUsubsat(N, DAG))
6243       return V;
6244 
6245   return SDValue();
6246 }
6247 
6248 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
6249 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
6250                                         bool DemandHighBits) {
6251   if (!LegalOperations)
6252     return SDValue();
6253 
6254   EVT VT = N->getValueType(0);
6255   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
6256     return SDValue();
6257   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6258     return SDValue();
6259 
6260   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
6261   bool LookPassAnd0 = false;
6262   bool LookPassAnd1 = false;
6263   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
6264       std::swap(N0, N1);
6265   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
6266       std::swap(N0, N1);
6267   if (N0.getOpcode() == ISD::AND) {
6268     if (!N0.getNode()->hasOneUse())
6269       return SDValue();
6270     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6271     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
6272     // This is needed for X86.
6273     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
6274                   N01C->getZExtValue() != 0xFFFF))
6275       return SDValue();
6276     N0 = N0.getOperand(0);
6277     LookPassAnd0 = true;
6278   }
6279 
6280   if (N1.getOpcode() == ISD::AND) {
6281     if (!N1.getNode()->hasOneUse())
6282       return SDValue();
6283     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6284     if (!N11C || N11C->getZExtValue() != 0xFF)
6285       return SDValue();
6286     N1 = N1.getOperand(0);
6287     LookPassAnd1 = true;
6288   }
6289 
6290   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
6291     std::swap(N0, N1);
6292   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
6293     return SDValue();
6294   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
6295     return SDValue();
6296 
6297   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6298   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6299   if (!N01C || !N11C)
6300     return SDValue();
6301   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
6302     return SDValue();
6303 
6304   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
6305   SDValue N00 = N0->getOperand(0);
6306   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
6307     if (!N00.getNode()->hasOneUse())
6308       return SDValue();
6309     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
6310     if (!N001C || N001C->getZExtValue() != 0xFF)
6311       return SDValue();
6312     N00 = N00.getOperand(0);
6313     LookPassAnd0 = true;
6314   }
6315 
6316   SDValue N10 = N1->getOperand(0);
6317   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
6318     if (!N10.getNode()->hasOneUse())
6319       return SDValue();
6320     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
6321     // Also allow 0xFFFF since the bits will be shifted out. This is needed
6322     // for X86.
6323     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
6324                    N101C->getZExtValue() != 0xFFFF))
6325       return SDValue();
6326     N10 = N10.getOperand(0);
6327     LookPassAnd1 = true;
6328   }
6329 
6330   if (N00 != N10)
6331     return SDValue();
6332 
6333   // Make sure everything beyond the low halfword gets set to zero since the SRL
6334   // 16 will clear the top bits.
6335   unsigned OpSizeInBits = VT.getSizeInBits();
6336   if (DemandHighBits && OpSizeInBits > 16) {
6337     // If the left-shift isn't masked out then the only way this is a bswap is
6338     // if all bits beyond the low 8 are 0. In that case the entire pattern
6339     // reduces to a left shift anyway: leave it for other parts of the combiner.
6340     if (!LookPassAnd0)
6341       return SDValue();
6342 
6343     // However, if the right shift isn't masked out then it might be because
6344     // it's not needed. See if we can spot that too.
6345     if (!LookPassAnd1 &&
6346         !DAG.MaskedValueIsZero(
6347             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
6348       return SDValue();
6349   }
6350 
6351   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
6352   if (OpSizeInBits > 16) {
6353     SDLoc DL(N);
6354     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6355                       DAG.getConstant(OpSizeInBits - 16, DL,
6356                                       getShiftAmountTy(VT)));
6357   }
6358   return Res;
6359 }
6360 
6361 /// Return true if the specified node is an element that makes up a 32-bit
6362 /// packed halfword byteswap.
6363 /// ((x & 0x000000ff) << 8) |
6364 /// ((x & 0x0000ff00) >> 8) |
6365 /// ((x & 0x00ff0000) << 8) |
6366 /// ((x & 0xff000000) >> 8)
6367 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
6368   if (!N.getNode()->hasOneUse())
6369     return false;
6370 
6371   unsigned Opc = N.getOpcode();
6372   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6373     return false;
6374 
6375   SDValue N0 = N.getOperand(0);
6376   unsigned Opc0 = N0.getOpcode();
6377   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6378     return false;
6379 
6380   ConstantSDNode *N1C = nullptr;
6381   // SHL or SRL: look upstream for AND mask operand
6382   if (Opc == ISD::AND)
6383     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6384   else if (Opc0 == ISD::AND)
6385     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6386   if (!N1C)
6387     return false;
6388 
6389   unsigned MaskByteOffset;
6390   switch (N1C->getZExtValue()) {
6391   default:
6392     return false;
6393   case 0xFF:       MaskByteOffset = 0; break;
6394   case 0xFF00:     MaskByteOffset = 1; break;
6395   case 0xFFFF:
6396     // In case demanded bits didn't clear the bits that will be shifted out.
6397     // This is needed for X86.
6398     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6399       MaskByteOffset = 1;
6400       break;
6401     }
6402     return false;
6403   case 0xFF0000:   MaskByteOffset = 2; break;
6404   case 0xFF000000: MaskByteOffset = 3; break;
6405   }
6406 
6407   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6408   if (Opc == ISD::AND) {
6409     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
6410       // (x >> 8) & 0xff
6411       // (x >> 8) & 0xff0000
6412       if (Opc0 != ISD::SRL)
6413         return false;
6414       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6415       if (!C || C->getZExtValue() != 8)
6416         return false;
6417     } else {
6418       // (x << 8) & 0xff00
6419       // (x << 8) & 0xff000000
6420       if (Opc0 != ISD::SHL)
6421         return false;
6422       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6423       if (!C || C->getZExtValue() != 8)
6424         return false;
6425     }
6426   } else if (Opc == ISD::SHL) {
6427     // (x & 0xff) << 8
6428     // (x & 0xff0000) << 8
6429     if (MaskByteOffset != 0 && MaskByteOffset != 2)
6430       return false;
6431     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6432     if (!C || C->getZExtValue() != 8)
6433       return false;
6434   } else { // Opc == ISD::SRL
6435     // (x & 0xff00) >> 8
6436     // (x & 0xff000000) >> 8
6437     if (MaskByteOffset != 1 && MaskByteOffset != 3)
6438       return false;
6439     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6440     if (!C || C->getZExtValue() != 8)
6441       return false;
6442   }
6443 
6444   if (Parts[MaskByteOffset])
6445     return false;
6446 
6447   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6448   return true;
6449 }
6450 
6451 // Match 2 elements of a packed halfword bswap.
6452 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
6453   if (N.getOpcode() == ISD::OR)
6454     return isBSwapHWordElement(N.getOperand(0), Parts) &&
6455            isBSwapHWordElement(N.getOperand(1), Parts);
6456 
6457   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6458     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6459     if (!C || C->getAPIntValue() != 16)
6460       return false;
6461     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6462     return true;
6463   }
6464 
6465   return false;
6466 }
6467 
6468 // Match this pattern:
6469 //   (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6470 // And rewrite this to:
6471 //   (rotr (bswap A), 16)
6472 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
6473                                        SelectionDAG &DAG, SDNode *N, SDValue N0,
6474                                        SDValue N1, EVT VT, EVT ShiftAmountTy) {
6475   assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
6476          "MatchBSwapHWordOrAndAnd: expecting i32");
6477   if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6478     return SDValue();
6479   if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6480     return SDValue();
6481   // TODO: this is too restrictive; lifting this restriction requires more tests
6482   if (!N0->hasOneUse() || !N1->hasOneUse())
6483     return SDValue();
6484   ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
6485   ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6486   if (!Mask0 || !Mask1)
6487     return SDValue();
6488   if (Mask0->getAPIntValue() != 0xff00ff00 ||
6489       Mask1->getAPIntValue() != 0x00ff00ff)
6490     return SDValue();
6491   SDValue Shift0 = N0.getOperand(0);
6492   SDValue Shift1 = N1.getOperand(0);
6493   if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6494     return SDValue();
6495   ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6496   ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6497   if (!ShiftAmt0 || !ShiftAmt1)
6498     return SDValue();
6499   if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6500     return SDValue();
6501   if (Shift0.getOperand(0) != Shift1.getOperand(0))
6502     return SDValue();
6503 
6504   SDLoc DL(N);
6505   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6506   SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6507   return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6508 }
6509 
6510 /// Match a 32-bit packed halfword bswap. That is
6511 /// ((x & 0x000000ff) << 8) |
6512 /// ((x & 0x0000ff00) >> 8) |
6513 /// ((x & 0x00ff0000) << 8) |
6514 /// ((x & 0xff000000) >> 8)
6515 /// => (rotl (bswap x), 16)
6516 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6517   if (!LegalOperations)
6518     return SDValue();
6519 
6520   EVT VT = N->getValueType(0);
6521   if (VT != MVT::i32)
6522     return SDValue();
6523   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6524     return SDValue();
6525 
6526   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6527                                               getShiftAmountTy(VT)))
6528   return BSwap;
6529 
6530   // Try again with commuted operands.
6531   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6532                                               getShiftAmountTy(VT)))
6533   return BSwap;
6534 
6535 
6536   // Look for either
6537   // (or (bswaphpair), (bswaphpair))
6538   // (or (or (bswaphpair), (and)), (and))
6539   // (or (or (and), (bswaphpair)), (and))
6540   SDNode *Parts[4] = {};
6541 
6542   if (isBSwapHWordPair(N0, Parts)) {
6543     // (or (or (and), (and)), (or (and), (and)))
6544     if (!isBSwapHWordPair(N1, Parts))
6545       return SDValue();
6546   } else if (N0.getOpcode() == ISD::OR) {
6547     // (or (or (or (and), (and)), (and)), (and))
6548     if (!isBSwapHWordElement(N1, Parts))
6549       return SDValue();
6550     SDValue N00 = N0.getOperand(0);
6551     SDValue N01 = N0.getOperand(1);
6552     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6553         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6554       return SDValue();
6555   } else
6556     return SDValue();
6557 
6558   // Make sure the parts are all coming from the same node.
6559   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
6560     return SDValue();
6561 
6562   SDLoc DL(N);
6563   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6564                               SDValue(Parts[0], 0));
6565 
6566   // Result of the bswap should be rotated by 16. If it's not legal, then
6567   // do  (x << 16) | (x >> 16).
6568   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6569   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
6570     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6571   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6572     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6573   return DAG.getNode(ISD::OR, DL, VT,
6574                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6575                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6576 }
6577 
6578 /// This contains all DAGCombine rules which reduce two values combined by
6579 /// an Or operation to a single value \see visitANDLike().
6580 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6581   EVT VT = N1.getValueType();
6582   SDLoc DL(N);
6583 
6584   // fold (or x, undef) -> -1
6585   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6586     return DAG.getAllOnesConstant(DL, VT);
6587 
6588   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6589     return V;
6590 
6591   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
6592   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6593       // Don't increase # computations.
6594       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6595     // We can only do this xform if we know that bits from X that are set in C2
6596     // but not in C1 are already zero.  Likewise for Y.
6597     if (const ConstantSDNode *N0O1C =
6598         getAsNonOpaqueConstant(N0.getOperand(1))) {
6599       if (const ConstantSDNode *N1O1C =
6600           getAsNonOpaqueConstant(N1.getOperand(1))) {
6601         // We can only do this xform if we know that bits from X that are set in
6602         // C2 but not in C1 are already zero.  Likewise for Y.
6603         const APInt &LHSMask = N0O1C->getAPIntValue();
6604         const APInt &RHSMask = N1O1C->getAPIntValue();
6605 
6606         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6607             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6608           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6609                                   N0.getOperand(0), N1.getOperand(0));
6610           return DAG.getNode(ISD::AND, DL, VT, X,
6611                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
6612         }
6613       }
6614     }
6615   }
6616 
6617   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6618   if (N0.getOpcode() == ISD::AND &&
6619       N1.getOpcode() == ISD::AND &&
6620       N0.getOperand(0) == N1.getOperand(0) &&
6621       // Don't increase # computations.
6622       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6623     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6624                             N0.getOperand(1), N1.getOperand(1));
6625     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6626   }
6627 
6628   return SDValue();
6629 }
6630 
6631 /// OR combines for which the commuted variant will be tried as well.
6632 static SDValue visitORCommutative(
6633     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6634   EVT VT = N0.getValueType();
6635   if (N0.getOpcode() == ISD::AND) {
6636     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6637     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
6638       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6639 
6640     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6641     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
6642       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6643   }
6644 
6645   return SDValue();
6646 }
6647 
6648 SDValue DAGCombiner::visitOR(SDNode *N) {
6649   SDValue N0 = N->getOperand(0);
6650   SDValue N1 = N->getOperand(1);
6651   EVT VT = N1.getValueType();
6652 
6653   // x | x --> x
6654   if (N0 == N1)
6655     return N0;
6656 
6657   // fold (or c1, c2) -> c1|c2
6658   if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6659     return C;
6660 
6661   // canonicalize constant to RHS
6662   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6663       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6664     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6665 
6666   // fold vector ops
6667   if (VT.isVector()) {
6668     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
6669       return FoldedVOp;
6670 
6671     // fold (or x, 0) -> x, vector edition
6672     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6673       return N0;
6674 
6675     // fold (or x, -1) -> -1, vector edition
6676     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6677       // do not return N1, because undef node may exist in N1
6678       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6679 
6680     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6681     // Do this only if the resulting shuffle is legal.
6682     if (isa<ShuffleVectorSDNode>(N0) &&
6683         isa<ShuffleVectorSDNode>(N1) &&
6684         // Avoid folding a node with illegal type.
6685         TLI.isTypeLegal(VT)) {
6686       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6687       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6688       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6689       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6690       // Ensure both shuffles have a zero input.
6691       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6692         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
6693         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
6694         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
6695         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
6696         bool CanFold = true;
6697         int NumElts = VT.getVectorNumElements();
6698         SmallVector<int, 4> Mask(NumElts);
6699 
6700         for (int i = 0; i != NumElts; ++i) {
6701           int M0 = SV0->getMaskElt(i);
6702           int M1 = SV1->getMaskElt(i);
6703 
6704           // Determine if either index is pointing to a zero vector.
6705           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
6706           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
6707 
6708           // If one element is zero and the otherside is undef, keep undef.
6709           // This also handles the case that both are undef.
6710           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
6711             Mask[i] = -1;
6712             continue;
6713           }
6714 
6715           // Make sure only one of the elements is zero.
6716           if (M0Zero == M1Zero) {
6717             CanFold = false;
6718             break;
6719           }
6720 
6721           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
6722 
6723           // We have a zero and non-zero element. If the non-zero came from
6724           // SV0 make the index a LHS index. If it came from SV1, make it
6725           // a RHS index. We need to mod by NumElts because we don't care
6726           // which operand it came from in the original shuffles.
6727           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6728         }
6729 
6730         if (CanFold) {
6731           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6732           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6733 
6734           SDValue LegalShuffle =
6735               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6736                                           Mask, DAG);
6737           if (LegalShuffle)
6738             return LegalShuffle;
6739         }
6740       }
6741     }
6742   }
6743 
6744   // fold (or x, 0) -> x
6745   if (isNullConstant(N1))
6746     return N0;
6747 
6748   // fold (or x, -1) -> -1
6749   if (isAllOnesConstant(N1))
6750     return N1;
6751 
6752   if (SDValue NewSel = foldBinOpIntoSelect(N))
6753     return NewSel;
6754 
6755   // fold (or x, c) -> c iff (x & ~c) == 0
6756   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6757   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6758     return N1;
6759 
6760   if (SDValue Combined = visitORLike(N0, N1, N))
6761     return Combined;
6762 
6763   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
6764     return Combined;
6765 
6766   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6767   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6768     return BSwap;
6769   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6770     return BSwap;
6771 
6772   // reassociate or
6773   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6774     return ROR;
6775 
6776   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6777   // iff (c1 & c2) != 0 or c1/c2 are undef.
6778   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6779     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6780   };
6781   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6782       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6783     if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6784                                                  {N1, N0.getOperand(1)})) {
6785       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6786       AddToWorklist(IOR.getNode());
6787       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6788     }
6789   }
6790 
6791   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6792     return Combined;
6793   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6794     return Combined;
6795 
6796   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
6797   if (N0.getOpcode() == N1.getOpcode())
6798     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6799       return V;
6800 
6801   // See if this is some rotate idiom.
6802   if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6803     return Rot;
6804 
6805   if (SDValue Load = MatchLoadCombine(N))
6806     return Load;
6807 
6808   // Simplify the operands using demanded-bits information.
6809   if (SimplifyDemandedBits(SDValue(N, 0)))
6810     return SDValue(N, 0);
6811 
6812   // If OR can be rewritten into ADD, try combines based on ADD.
6813   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6814       DAG.haveNoCommonBitsSet(N0, N1))
6815     if (SDValue Combined = visitADDLike(N))
6816       return Combined;
6817 
6818   return SDValue();
6819 }
6820 
6821 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6822   if (Op.getOpcode() == ISD::AND &&
6823       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6824     Mask = Op.getOperand(1);
6825     return Op.getOperand(0);
6826   }
6827   return Op;
6828 }
6829 
6830 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
6831 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6832                             SDValue &Mask) {
6833   Op = stripConstantMask(DAG, Op, Mask);
6834   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6835     Shift = Op;
6836     return true;
6837   }
6838   return false;
6839 }
6840 
6841 /// Helper function for visitOR to extract the needed side of a rotate idiom
6842 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
6843 /// InstCombine merged some outside op with one of the shifts from
6844 /// the rotate pattern.
6845 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6846 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
6847 /// patterns:
6848 ///
6849 ///   (or (add v v) (shrl v bitwidth-1)):
6850 ///     expands (add v v) -> (shl v 1)
6851 ///
6852 ///   (or (mul v c0) (shrl (mul v c1) c2)):
6853 ///     expands (mul v c0) -> (shl (mul v c1) c3)
6854 ///
6855 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
6856 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
6857 ///
6858 ///   (or (shl v c0) (shrl (shl v c1) c2)):
6859 ///     expands (shl v c0) -> (shl (shl v c1) c3)
6860 ///
6861 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
6862 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
6863 ///
6864 /// Such that in all cases, c3+c2==bitwidth(op v c1).
6865 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6866                                      SDValue ExtractFrom, SDValue &Mask,
6867                                      const SDLoc &DL) {
6868   assert(OppShift && ExtractFrom && "Empty SDValue");
6869   assert(
6870       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
6871       "Existing shift must be valid as a rotate half");
6872 
6873   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6874 
6875   // Value and Type of the shift.
6876   SDValue OppShiftLHS = OppShift.getOperand(0);
6877   EVT ShiftedVT = OppShiftLHS.getValueType();
6878 
6879   // Amount of the existing shift.
6880   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6881 
6882   // (add v v) -> (shl v 1)
6883   // TODO: Should this be a general DAG canonicalization?
6884   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6885       ExtractFrom.getOpcode() == ISD::ADD &&
6886       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6887       ExtractFrom.getOperand(0) == OppShiftLHS &&
6888       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6889     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6890                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6891 
6892   // Preconditions:
6893   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6894   //
6895   // Find opcode of the needed shift to be extracted from (op0 v c0).
6896   unsigned Opcode = ISD::DELETED_NODE;
6897   bool IsMulOrDiv = false;
6898   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6899   // opcode or its arithmetic (mul or udiv) variant.
6900   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6901     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6902     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6903       return false;
6904     Opcode = NeededShift;
6905     return true;
6906   };
6907   // op0 must be either the needed shift opcode or the mul/udiv equivalent
6908   // that the needed shift can be extracted from.
6909   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6910       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6911     return SDValue();
6912 
6913   // op0 must be the same opcode on both sides, have the same LHS argument,
6914   // and produce the same value type.
6915   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6916       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6917       ShiftedVT != ExtractFrom.getValueType())
6918     return SDValue();
6919 
6920   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6921   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6922   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6923   ConstantSDNode *ExtractFromCst =
6924       isConstOrConstSplat(ExtractFrom.getOperand(1));
6925   // TODO: We should be able to handle non-uniform constant vectors for these values
6926   // Check that we have constant values.
6927   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6928       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6929       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6930     return SDValue();
6931 
6932   // Compute the shift amount we need to extract to complete the rotate.
6933   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6934   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6935     return SDValue();
6936   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6937   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6938   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6939   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6940   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6941 
6942   // Now try extract the needed shift from the ExtractFrom op and see if the
6943   // result matches up with the existing shift's LHS op.
6944   if (IsMulOrDiv) {
6945     // Op to extract from is a mul or udiv by a constant.
6946     // Check:
6947     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6948     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6949     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6950                                                  NeededShiftAmt.getZExtValue());
6951     APInt ResultAmt;
6952     APInt Rem;
6953     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6954     if (Rem != 0 || ResultAmt != OppLHSAmt)
6955       return SDValue();
6956   } else {
6957     // Op to extract from is a shift by a constant.
6958     // Check:
6959     //      c2 - (bitwidth(op0 v c0) - c1) == c0
6960     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6961                                           ExtractFromAmt.getBitWidth()))
6962       return SDValue();
6963   }
6964 
6965   // Return the expanded shift op that should allow a rotate to be formed.
6966   EVT ShiftVT = OppShift.getOperand(1).getValueType();
6967   EVT ResVT = ExtractFrom.getValueType();
6968   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6969   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6970 }
6971 
6972 // Return true if we can prove that, whenever Neg and Pos are both in the
6973 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
6974 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6975 //
6976 //     (or (shift1 X, Neg), (shift2 X, Pos))
6977 //
6978 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6979 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
6980 // to consider shift amounts with defined behavior.
6981 //
6982 // The IsRotate flag should be set when the LHS of both shifts is the same.
6983 // Otherwise if matching a general funnel shift, it should be clear.
6984 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6985                            SelectionDAG &DAG, bool IsRotate) {
6986   // If EltSize is a power of 2 then:
6987   //
6988   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6989   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6990   //
6991   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6992   // for the stronger condition:
6993   //
6994   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
6995   //
6996   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6997   // we can just replace Neg with Neg' for the rest of the function.
6998   //
6999   // In other cases we check for the even stronger condition:
7000   //
7001   //     Neg == EltSize - Pos                                    [B]
7002   //
7003   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
7004   // behavior if Pos == 0 (and consequently Neg == EltSize).
7005   //
7006   // We could actually use [A] whenever EltSize is a power of 2, but the
7007   // only extra cases that it would match are those uninteresting ones
7008   // where Neg and Pos are never in range at the same time.  E.g. for
7009   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
7010   // as well as (sub 32, Pos), but:
7011   //
7012   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
7013   //
7014   // always invokes undefined behavior for 32-bit X.
7015   //
7016   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
7017   //
7018   // NOTE: We can only do this when matching an AND and not a general
7019   // funnel shift.
7020   unsigned MaskLoBits = 0;
7021   if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
7022     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
7023       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
7024       unsigned Bits = Log2_64(EltSize);
7025       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
7026           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
7027         Neg = Neg.getOperand(0);
7028         MaskLoBits = Bits;
7029       }
7030     }
7031   }
7032 
7033   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
7034   if (Neg.getOpcode() != ISD::SUB)
7035     return false;
7036   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
7037   if (!NegC)
7038     return false;
7039   SDValue NegOp1 = Neg.getOperand(1);
7040 
7041   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
7042   // Pos'.  The truncation is redundant for the purpose of the equality.
7043   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
7044     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
7045       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
7046       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
7047           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
7048            MaskLoBits))
7049         Pos = Pos.getOperand(0);
7050     }
7051   }
7052 
7053   // The condition we need is now:
7054   //
7055   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
7056   //
7057   // If NegOp1 == Pos then we need:
7058   //
7059   //              EltSize & Mask == NegC & Mask
7060   //
7061   // (because "x & Mask" is a truncation and distributes through subtraction).
7062   //
7063   // We also need to account for a potential truncation of NegOp1 if the amount
7064   // has already been legalized to a shift amount type.
7065   APInt Width;
7066   if ((Pos == NegOp1) ||
7067       (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
7068     Width = NegC->getAPIntValue();
7069 
7070   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
7071   // Then the condition we want to prove becomes:
7072   //
7073   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
7074   //
7075   // which, again because "x & Mask" is a truncation, becomes:
7076   //
7077   //                NegC & Mask == (EltSize - PosC) & Mask
7078   //             EltSize & Mask == (NegC + PosC) & Mask
7079   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
7080     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
7081       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
7082     else
7083       return false;
7084   } else
7085     return false;
7086 
7087   // Now we just need to check that EltSize & Mask == Width & Mask.
7088   if (MaskLoBits)
7089     // EltSize & Mask is 0 since Mask is EltSize - 1.
7090     return Width.getLoBits(MaskLoBits) == 0;
7091   return Width == EltSize;
7092 }
7093 
7094 // A subroutine of MatchRotate used once we have found an OR of two opposite
7095 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
7096 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
7097 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
7098 // Neg with outer conversions stripped away.
7099 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
7100                                        SDValue Neg, SDValue InnerPos,
7101                                        SDValue InnerNeg, unsigned PosOpcode,
7102                                        unsigned NegOpcode, const SDLoc &DL) {
7103   // fold (or (shl x, (*ext y)),
7104   //          (srl x, (*ext (sub 32, y)))) ->
7105   //   (rotl x, y) or (rotr x, (sub 32, y))
7106   //
7107   // fold (or (shl x, (*ext (sub 32, y))),
7108   //          (srl x, (*ext y))) ->
7109   //   (rotr x, y) or (rotl x, (sub 32, y))
7110   EVT VT = Shifted.getValueType();
7111   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
7112                      /*IsRotate*/ true)) {
7113     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
7114     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
7115                        HasPos ? Pos : Neg);
7116   }
7117 
7118   return SDValue();
7119 }
7120 
7121 // A subroutine of MatchRotate used once we have found an OR of two opposite
7122 // shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
7123 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
7124 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
7125 // Neg with outer conversions stripped away.
7126 // TODO: Merge with MatchRotatePosNeg.
7127 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
7128                                        SDValue Neg, SDValue InnerPos,
7129                                        SDValue InnerNeg, unsigned PosOpcode,
7130                                        unsigned NegOpcode, const SDLoc &DL) {
7131   EVT VT = N0.getValueType();
7132   unsigned EltBits = VT.getScalarSizeInBits();
7133 
7134   // fold (or (shl x0, (*ext y)),
7135   //          (srl x1, (*ext (sub 32, y)))) ->
7136   //   (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
7137   //
7138   // fold (or (shl x0, (*ext (sub 32, y))),
7139   //          (srl x1, (*ext y))) ->
7140   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
7141   if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
7142     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
7143     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
7144                        HasPos ? Pos : Neg);
7145   }
7146 
7147   // Matching the shift+xor cases, we can't easily use the xor'd shift amount
7148   // so for now just use the PosOpcode case if its legal.
7149   // TODO: When can we use the NegOpcode case?
7150   if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
7151     auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
7152       if (Op.getOpcode() != BinOpc)
7153         return false;
7154       ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
7155       return Cst && (Cst->getAPIntValue() == Imm);
7156     };
7157 
7158     // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
7159     //   -> (fshl x0, x1, y)
7160     if (IsBinOpImm(N1, ISD::SRL, 1) &&
7161         IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
7162         InnerPos == InnerNeg.getOperand(0) &&
7163         TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
7164       return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
7165     }
7166 
7167     // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
7168     //   -> (fshr x0, x1, y)
7169     if (IsBinOpImm(N0, ISD::SHL, 1) &&
7170         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
7171         InnerNeg == InnerPos.getOperand(0) &&
7172         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
7173       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7174     }
7175 
7176     // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
7177     //   -> (fshr x0, x1, y)
7178     // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
7179     if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
7180         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
7181         InnerNeg == InnerPos.getOperand(0) &&
7182         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
7183       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7184     }
7185   }
7186 
7187   return SDValue();
7188 }
7189 
7190 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
7191 // idioms for rotate, and if the target supports rotation instructions, generate
7192 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
7193 // with different shifted sources.
7194 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
7195   EVT VT = LHS.getValueType();
7196 
7197   // The target must have at least one rotate/funnel flavor.
7198   // We still try to match rotate by constant pre-legalization.
7199   // TODO: Support pre-legalization funnel-shift by constant.
7200   bool HasROTL = hasOperation(ISD::ROTL, VT);
7201   bool HasROTR = hasOperation(ISD::ROTR, VT);
7202   bool HasFSHL = hasOperation(ISD::FSHL, VT);
7203   bool HasFSHR = hasOperation(ISD::FSHR, VT);
7204   if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7205     return SDValue();
7206 
7207   // Check for truncated rotate.
7208   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
7209       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
7210     assert(LHS.getValueType() == RHS.getValueType());
7211     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
7212       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
7213     }
7214   }
7215 
7216   // Match "(X shl/srl V1) & V2" where V2 may not be present.
7217   SDValue LHSShift;   // The shift.
7218   SDValue LHSMask;    // AND value if any.
7219   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
7220 
7221   SDValue RHSShift;   // The shift.
7222   SDValue RHSMask;    // AND value if any.
7223   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
7224 
7225   // If neither side matched a rotate half, bail
7226   if (!LHSShift && !RHSShift)
7227     return SDValue();
7228 
7229   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
7230   // side of the rotate, so try to handle that here. In all cases we need to
7231   // pass the matched shift from the opposite side to compute the opcode and
7232   // needed shift amount to extract.  We still want to do this if both sides
7233   // matched a rotate half because one half may be a potential overshift that
7234   // can be broken down (ie if InstCombine merged two shl or srl ops into a
7235   // single one).
7236 
7237   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
7238   if (LHSShift)
7239     if (SDValue NewRHSShift =
7240             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
7241       RHSShift = NewRHSShift;
7242   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
7243   if (RHSShift)
7244     if (SDValue NewLHSShift =
7245             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
7246       LHSShift = NewLHSShift;
7247 
7248   // If a side is still missing, nothing else we can do.
7249   if (!RHSShift || !LHSShift)
7250     return SDValue();
7251 
7252   // At this point we've matched or extracted a shift op on each side.
7253 
7254   if (LHSShift.getOpcode() == RHSShift.getOpcode())
7255     return SDValue(); // Shifts must disagree.
7256 
7257   // TODO: Support pre-legalization funnel-shift by constant.
7258   bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
7259   if (!IsRotate && !(HasFSHL || HasFSHR))
7260     return SDValue(); // Requires funnel shift support.
7261 
7262   // Canonicalize shl to left side in a shl/srl pair.
7263   if (RHSShift.getOpcode() == ISD::SHL) {
7264     std::swap(LHS, RHS);
7265     std::swap(LHSShift, RHSShift);
7266     std::swap(LHSMask, RHSMask);
7267   }
7268 
7269   unsigned EltSizeInBits = VT.getScalarSizeInBits();
7270   SDValue LHSShiftArg = LHSShift.getOperand(0);
7271   SDValue LHSShiftAmt = LHSShift.getOperand(1);
7272   SDValue RHSShiftArg = RHSShift.getOperand(0);
7273   SDValue RHSShiftAmt = RHSShift.getOperand(1);
7274 
7275   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
7276   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
7277   // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
7278   // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
7279   // iff C1+C2 == EltSizeInBits
7280   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
7281                                         ConstantSDNode *RHS) {
7282     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
7283   };
7284   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7285     SDValue Res;
7286     if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
7287       bool UseROTL = !LegalOperations || HasROTL;
7288       Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
7289                         UseROTL ? LHSShiftAmt : RHSShiftAmt);
7290     } else {
7291       bool UseFSHL = !LegalOperations || HasFSHL;
7292       Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
7293                         RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
7294     }
7295 
7296     // If there is an AND of either shifted operand, apply it to the result.
7297     if (LHSMask.getNode() || RHSMask.getNode()) {
7298       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
7299       SDValue Mask = AllOnes;
7300 
7301       if (LHSMask.getNode()) {
7302         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
7303         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7304                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
7305       }
7306       if (RHSMask.getNode()) {
7307         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
7308         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7309                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
7310       }
7311 
7312       Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
7313     }
7314 
7315     return Res;
7316   }
7317 
7318   // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
7319   // shift.
7320   if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7321     return SDValue();
7322 
7323   // If there is a mask here, and we have a variable shift, we can't be sure
7324   // that we're masking out the right stuff.
7325   if (LHSMask.getNode() || RHSMask.getNode())
7326     return SDValue();
7327 
7328   // If the shift amount is sign/zext/any-extended just peel it off.
7329   SDValue LExtOp0 = LHSShiftAmt;
7330   SDValue RExtOp0 = RHSShiftAmt;
7331   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7332        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7333        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7334        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
7335       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7336        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7337        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7338        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
7339     LExtOp0 = LHSShiftAmt.getOperand(0);
7340     RExtOp0 = RHSShiftAmt.getOperand(0);
7341   }
7342 
7343   if (IsRotate && (HasROTL || HasROTR)) {
7344     SDValue TryL =
7345         MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
7346                           RExtOp0, ISD::ROTL, ISD::ROTR, DL);
7347     if (TryL)
7348       return TryL;
7349 
7350     SDValue TryR =
7351         MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
7352                           LExtOp0, ISD::ROTR, ISD::ROTL, DL);
7353     if (TryR)
7354       return TryR;
7355   }
7356 
7357   SDValue TryL =
7358       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
7359                         LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
7360   if (TryL)
7361     return TryL;
7362 
7363   SDValue TryR =
7364       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
7365                         RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
7366   if (TryR)
7367     return TryR;
7368 
7369   return SDValue();
7370 }
7371 
7372 namespace {
7373 
7374 /// Represents known origin of an individual byte in load combine pattern. The
7375 /// value of the byte is either constant zero or comes from memory.
7376 struct ByteProvider {
7377   // For constant zero providers Load is set to nullptr. For memory providers
7378   // Load represents the node which loads the byte from memory.
7379   // ByteOffset is the offset of the byte in the value produced by the load.
7380   LoadSDNode *Load = nullptr;
7381   unsigned ByteOffset = 0;
7382 
7383   ByteProvider() = default;
7384 
7385   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
7386     return ByteProvider(Load, ByteOffset);
7387   }
7388 
7389   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
7390 
7391   bool isConstantZero() const { return !Load; }
7392   bool isMemory() const { return Load; }
7393 
7394   bool operator==(const ByteProvider &Other) const {
7395     return Other.Load == Load && Other.ByteOffset == ByteOffset;
7396   }
7397 
7398 private:
7399   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
7400       : Load(Load), ByteOffset(ByteOffset) {}
7401 };
7402 
7403 } // end anonymous namespace
7404 
7405 /// Recursively traverses the expression calculating the origin of the requested
7406 /// byte of the given value. Returns None if the provider can't be calculated.
7407 ///
7408 /// For all the values except the root of the expression verifies that the value
7409 /// has exactly one use and if it's not true return None. This way if the origin
7410 /// of the byte is returned it's guaranteed that the values which contribute to
7411 /// the byte are not used outside of this expression.
7412 ///
7413 /// Because the parts of the expression are not allowed to have more than one
7414 /// use this function iterates over trees, not DAGs. So it never visits the same
7415 /// node more than once.
7416 static const Optional<ByteProvider>
7417 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
7418                       bool Root = false) {
7419   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
7420   if (Depth == 10)
7421     return None;
7422 
7423   if (!Root && !Op.hasOneUse())
7424     return None;
7425 
7426   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
7427   unsigned BitWidth = Op.getValueSizeInBits();
7428   if (BitWidth % 8 != 0)
7429     return None;
7430   unsigned ByteWidth = BitWidth / 8;
7431   assert(Index < ByteWidth && "invalid index requested");
7432   (void) ByteWidth;
7433 
7434   switch (Op.getOpcode()) {
7435   case ISD::OR: {
7436     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
7437     if (!LHS)
7438       return None;
7439     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
7440     if (!RHS)
7441       return None;
7442 
7443     if (LHS->isConstantZero())
7444       return RHS;
7445     if (RHS->isConstantZero())
7446       return LHS;
7447     return None;
7448   }
7449   case ISD::SHL: {
7450     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
7451     if (!ShiftOp)
7452       return None;
7453 
7454     uint64_t BitShift = ShiftOp->getZExtValue();
7455     if (BitShift % 8 != 0)
7456       return None;
7457     uint64_t ByteShift = BitShift / 8;
7458 
7459     return Index < ByteShift
7460                ? ByteProvider::getConstantZero()
7461                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
7462                                        Depth + 1);
7463   }
7464   case ISD::ANY_EXTEND:
7465   case ISD::SIGN_EXTEND:
7466   case ISD::ZERO_EXTEND: {
7467     SDValue NarrowOp = Op->getOperand(0);
7468     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
7469     if (NarrowBitWidth % 8 != 0)
7470       return None;
7471     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7472 
7473     if (Index >= NarrowByteWidth)
7474       return Op.getOpcode() == ISD::ZERO_EXTEND
7475                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7476                  : None;
7477     return calculateByteProvider(NarrowOp, Index, Depth + 1);
7478   }
7479   case ISD::BSWAP:
7480     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7481                                  Depth + 1);
7482   case ISD::LOAD: {
7483     auto L = cast<LoadSDNode>(Op.getNode());
7484     if (!L->isSimple() || L->isIndexed())
7485       return None;
7486 
7487     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7488     if (NarrowBitWidth % 8 != 0)
7489       return None;
7490     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7491 
7492     if (Index >= NarrowByteWidth)
7493       return L->getExtensionType() == ISD::ZEXTLOAD
7494                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7495                  : None;
7496     return ByteProvider::getMemory(L, Index);
7497   }
7498   }
7499 
7500   return None;
7501 }
7502 
7503 static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7504   return i;
7505 }
7506 
7507 static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7508   return BW - i - 1;
7509 }
7510 
7511 // Check if the bytes offsets we are looking at match with either big or
7512 // little endian value loaded. Return true for big endian, false for little
7513 // endian, and None if match failed.
7514 static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
7515                                   int64_t FirstOffset) {
7516   // The endian can be decided only when it is 2 bytes at least.
7517   unsigned Width = ByteOffsets.size();
7518   if (Width < 2)
7519     return None;
7520 
7521   bool BigEndian = true, LittleEndian = true;
7522   for (unsigned i = 0; i < Width; i++) {
7523     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
7524     LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
7525     BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7526     if (!BigEndian && !LittleEndian)
7527       return None;
7528   }
7529 
7530   assert((BigEndian != LittleEndian) && "It should be either big endian or"
7531                                         "little endian");
7532   return BigEndian;
7533 }
7534 
7535 static SDValue stripTruncAndExt(SDValue Value) {
7536   switch (Value.getOpcode()) {
7537   case ISD::TRUNCATE:
7538   case ISD::ZERO_EXTEND:
7539   case ISD::SIGN_EXTEND:
7540   case ISD::ANY_EXTEND:
7541     return stripTruncAndExt(Value.getOperand(0));
7542   }
7543   return Value;
7544 }
7545 
7546 /// Match a pattern where a wide type scalar value is stored by several narrow
7547 /// stores. Fold it into a single store or a BSWAP and a store if the targets
7548 /// supports it.
7549 ///
7550 /// Assuming little endian target:
7551 ///  i8 *p = ...
7552 ///  i32 val = ...
7553 ///  p[0] = (val >> 0) & 0xFF;
7554 ///  p[1] = (val >> 8) & 0xFF;
7555 ///  p[2] = (val >> 16) & 0xFF;
7556 ///  p[3] = (val >> 24) & 0xFF;
7557 /// =>
7558 ///  *((i32)p) = val;
7559 ///
7560 ///  i8 *p = ...
7561 ///  i32 val = ...
7562 ///  p[0] = (val >> 24) & 0xFF;
7563 ///  p[1] = (val >> 16) & 0xFF;
7564 ///  p[2] = (val >> 8) & 0xFF;
7565 ///  p[3] = (val >> 0) & 0xFF;
7566 /// =>
7567 ///  *((i32)p) = BSWAP(val);
7568 SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7569   // The matching looks for "store (trunc x)" patterns that appear early but are
7570   // likely to be replaced by truncating store nodes during combining.
7571   // TODO: If there is evidence that running this later would help, this
7572   //       limitation could be removed. Legality checks may need to be added
7573   //       for the created store and optional bswap/rotate.
7574   if (LegalOperations || OptLevel == CodeGenOpt::None)
7575     return SDValue();
7576 
7577   // We only handle merging simple stores of 1-4 bytes.
7578   // TODO: Allow unordered atomics when wider type is legal (see D66309)
7579   EVT MemVT = N->getMemoryVT();
7580   if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
7581       !N->isSimple() || N->isIndexed())
7582     return SDValue();
7583 
7584   // Collect all of the stores in the chain.
7585   SDValue Chain = N->getChain();
7586   SmallVector<StoreSDNode *, 8> Stores = {N};
7587   while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
7588     // All stores must be the same size to ensure that we are writing all of the
7589     // bytes in the wide value.
7590     // TODO: We could allow multiple sizes by tracking each stored byte.
7591     if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
7592         Store->isIndexed())
7593       return SDValue();
7594     Stores.push_back(Store);
7595     Chain = Store->getChain();
7596   }
7597   // There is no reason to continue if we do not have at least a pair of stores.
7598   if (Stores.size() < 2)
7599     return SDValue();
7600 
7601   // Handle simple types only.
7602   LLVMContext &Context = *DAG.getContext();
7603   unsigned NumStores = Stores.size();
7604   unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7605   unsigned WideNumBits = NumStores * NarrowNumBits;
7606   EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7607   if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7608     return SDValue();
7609 
7610   // Check if all bytes of the source value that we are looking at are stored
7611   // to the same base address. Collect offsets from Base address into OffsetMap.
7612   SDValue SourceValue;
7613   SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
7614   int64_t FirstOffset = INT64_MAX;
7615   StoreSDNode *FirstStore = nullptr;
7616   Optional<BaseIndexOffset> Base;
7617   for (auto Store : Stores) {
7618     // All the stores store different parts of the CombinedValue. A truncate is
7619     // required to get the partial value.
7620     SDValue Trunc = Store->getValue();
7621     if (Trunc.getOpcode() != ISD::TRUNCATE)
7622       return SDValue();
7623     // Other than the first/last part, a shift operation is required to get the
7624     // offset.
7625     int64_t Offset = 0;
7626     SDValue WideVal = Trunc.getOperand(0);
7627     if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7628         isa<ConstantSDNode>(WideVal.getOperand(1))) {
7629       // The shift amount must be a constant multiple of the narrow type.
7630       // It is translated to the offset address in the wide source value "y".
7631       //
7632       // x = srl y, ShiftAmtC
7633       // i8 z = trunc x
7634       // store z, ...
7635       uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7636       if (ShiftAmtC % NarrowNumBits != 0)
7637         return SDValue();
7638 
7639       Offset = ShiftAmtC / NarrowNumBits;
7640       WideVal = WideVal.getOperand(0);
7641     }
7642 
7643     // Stores must share the same source value with different offsets.
7644     // Truncate and extends should be stripped to get the single source value.
7645     if (!SourceValue)
7646       SourceValue = WideVal;
7647     else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
7648       return SDValue();
7649     else if (SourceValue.getValueType() != WideVT) {
7650       if (WideVal.getValueType() == WideVT ||
7651           WideVal.getScalarValueSizeInBits() >
7652               SourceValue.getScalarValueSizeInBits())
7653         SourceValue = WideVal;
7654       // Give up if the source value type is smaller than the store size.
7655       if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
7656         return SDValue();
7657     }
7658 
7659     // Stores must share the same base address.
7660     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
7661     int64_t ByteOffsetFromBase = 0;
7662     if (!Base)
7663       Base = Ptr;
7664     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7665       return SDValue();
7666 
7667     // Remember the first store.
7668     if (ByteOffsetFromBase < FirstOffset) {
7669       FirstStore = Store;
7670       FirstOffset = ByteOffsetFromBase;
7671     }
7672     // Map the offset in the store and the offset in the combined value, and
7673     // early return if it has been set before.
7674     if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
7675       return SDValue();
7676     OffsetMap[Offset] = ByteOffsetFromBase;
7677   }
7678 
7679   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7680   assert(FirstStore && "First store must be set");
7681 
7682   // Check that a store of the wide type is both allowed and fast on the target
7683   const DataLayout &Layout = DAG.getDataLayout();
7684   bool Fast = false;
7685   bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7686                                         *FirstStore->getMemOperand(), &Fast);
7687   if (!Allowed || !Fast)
7688     return SDValue();
7689 
7690   // Check if the pieces of the value are going to the expected places in memory
7691   // to merge the stores.
7692   auto checkOffsets = [&](bool MatchLittleEndian) {
7693     if (MatchLittleEndian) {
7694       for (unsigned i = 0; i != NumStores; ++i)
7695         if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7696           return false;
7697     } else { // MatchBigEndian by reversing loop counter.
7698       for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7699         if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7700           return false;
7701     }
7702     return true;
7703   };
7704 
7705   // Check if the offsets line up for the native data layout of this target.
7706   bool NeedBswap = false;
7707   bool NeedRotate = false;
7708   if (!checkOffsets(Layout.isLittleEndian())) {
7709     // Special-case: check if byte offsets line up for the opposite endian.
7710     if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7711       NeedBswap = true;
7712     else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7713       NeedRotate = true;
7714     else
7715       return SDValue();
7716   }
7717 
7718   SDLoc DL(N);
7719   if (WideVT != SourceValue.getValueType()) {
7720     assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
7721            "Unexpected store value to merge");
7722     SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
7723   }
7724 
7725   // Before legalize we can introduce illegal bswaps/rotates which will be later
7726   // converted to an explicit bswap sequence. This way we end up with a single
7727   // store and byte shuffling instead of several stores and byte shuffling.
7728   if (NeedBswap) {
7729     SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
7730   } else if (NeedRotate) {
7731     assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
7732     SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
7733     SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
7734   }
7735 
7736   SDValue NewStore =
7737       DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
7738                    FirstStore->getPointerInfo(), FirstStore->getAlign());
7739 
7740   // Rely on other DAG combine rules to remove the other individual stores.
7741   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
7742   return NewStore;
7743 }
7744 
7745 /// Match a pattern where a wide type scalar value is loaded by several narrow
7746 /// loads and combined by shifts and ors. Fold it into a single load or a load
7747 /// and a BSWAP if the targets supports it.
7748 ///
7749 /// Assuming little endian target:
7750 ///  i8 *a = ...
7751 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
7752 /// =>
7753 ///  i32 val = *((i32)a)
7754 ///
7755 ///  i8 *a = ...
7756 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7757 /// =>
7758 ///  i32 val = BSWAP(*((i32)a))
7759 ///
7760 /// TODO: This rule matches complex patterns with OR node roots and doesn't
7761 /// interact well with the worklist mechanism. When a part of the pattern is
7762 /// updated (e.g. one of the loads) its direct users are put into the worklist,
7763 /// but the root node of the pattern which triggers the load combine is not
7764 /// necessarily a direct user of the changed node. For example, once the address
7765 /// of t28 load is reassociated load combine won't be triggered:
7766 ///             t25: i32 = add t4, Constant:i32<2>
7767 ///           t26: i64 = sign_extend t25
7768 ///        t27: i64 = add t2, t26
7769 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7770 ///     t29: i32 = zero_extend t28
7771 ///   t32: i32 = shl t29, Constant:i8<8>
7772 /// t33: i32 = or t23, t32
7773 /// As a possible fix visitLoad can check if the load can be a part of a load
7774 /// combine pattern and add corresponding OR roots to the worklist.
7775 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7776   assert(N->getOpcode() == ISD::OR &&
7777          "Can only match load combining against OR nodes");
7778 
7779   // Handles simple types only
7780   EVT VT = N->getValueType(0);
7781   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7782     return SDValue();
7783   unsigned ByteWidth = VT.getSizeInBits() / 8;
7784 
7785   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7786   auto MemoryByteOffset = [&] (ByteProvider P) {
7787     assert(P.isMemory() && "Must be a memory byte provider");
7788     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7789     assert(LoadBitWidth % 8 == 0 &&
7790            "can only analyze providers for individual bytes not bit");
7791     unsigned LoadByteWidth = LoadBitWidth / 8;
7792     return IsBigEndianTarget
7793             ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
7794             : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
7795   };
7796 
7797   Optional<BaseIndexOffset> Base;
7798   SDValue Chain;
7799 
7800   SmallPtrSet<LoadSDNode *, 8> Loads;
7801   Optional<ByteProvider> FirstByteProvider;
7802   int64_t FirstOffset = INT64_MAX;
7803 
7804   // Check if all the bytes of the OR we are looking at are loaded from the same
7805   // base address. Collect bytes offsets from Base address in ByteOffsets.
7806   SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7807   unsigned ZeroExtendedBytes = 0;
7808   for (int i = ByteWidth - 1; i >= 0; --i) {
7809     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7810     if (!P)
7811       return SDValue();
7812 
7813     if (P->isConstantZero()) {
7814       // It's OK for the N most significant bytes to be 0, we can just
7815       // zero-extend the load.
7816       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7817         return SDValue();
7818       continue;
7819     }
7820     assert(P->isMemory() && "provenance should either be memory or zero");
7821 
7822     LoadSDNode *L = P->Load;
7823     assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
7824            !L->isIndexed() &&
7825            "Must be enforced by calculateByteProvider");
7826     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
7827 
7828     // All loads must share the same chain
7829     SDValue LChain = L->getChain();
7830     if (!Chain)
7831       Chain = LChain;
7832     else if (Chain != LChain)
7833       return SDValue();
7834 
7835     // Loads must share the same base address
7836     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
7837     int64_t ByteOffsetFromBase = 0;
7838     if (!Base)
7839       Base = Ptr;
7840     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7841       return SDValue();
7842 
7843     // Calculate the offset of the current byte from the base address
7844     ByteOffsetFromBase += MemoryByteOffset(*P);
7845     ByteOffsets[i] = ByteOffsetFromBase;
7846 
7847     // Remember the first byte load
7848     if (ByteOffsetFromBase < FirstOffset) {
7849       FirstByteProvider = P;
7850       FirstOffset = ByteOffsetFromBase;
7851     }
7852 
7853     Loads.insert(L);
7854   }
7855   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
7856          "memory, so there must be at least one load which produces the value");
7857   assert(Base && "Base address of the accessed memory location must be set");
7858   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7859 
7860   bool NeedsZext = ZeroExtendedBytes > 0;
7861 
7862   EVT MemVT =
7863       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
7864 
7865   if (!MemVT.isSimple())
7866     return SDValue();
7867 
7868   // Before legalize we can introduce too wide illegal loads which will be later
7869   // split into legal sized loads. This enables us to combine i64 load by i8
7870   // patterns to a couple of i32 loads on 32 bit targets.
7871   if (LegalOperations &&
7872       !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7873                             MemVT))
7874     return SDValue();
7875 
7876   // Check if the bytes of the OR we are looking at match with either big or
7877   // little endian value load
7878   Optional<bool> IsBigEndian = isBigEndian(
7879       makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7880   if (!IsBigEndian.hasValue())
7881     return SDValue();
7882 
7883   assert(FirstByteProvider && "must be set");
7884 
7885   // Ensure that the first byte is loaded from zero offset of the first load.
7886   // So the combined value can be loaded from the first load address.
7887   if (MemoryByteOffset(*FirstByteProvider) != 0)
7888     return SDValue();
7889   LoadSDNode *FirstLoad = FirstByteProvider->Load;
7890 
7891   // The node we are looking at matches with the pattern, check if we can
7892   // replace it with a single (possibly zero-extended) load and bswap + shift if
7893   // needed.
7894 
7895   // If the load needs byte swap check if the target supports it
7896   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7897 
7898   // Before legalize we can introduce illegal bswaps which will be later
7899   // converted to an explicit bswap sequence. This way we end up with a single
7900   // load and byte shuffling instead of several loads and byte shuffling.
7901   // We do not introduce illegal bswaps when zero-extending as this tends to
7902   // introduce too many arithmetic instructions.
7903   if (NeedsBswap && (LegalOperations || NeedsZext) &&
7904       !TLI.isOperationLegal(ISD::BSWAP, VT))
7905     return SDValue();
7906 
7907   // If we need to bswap and zero extend, we have to insert a shift. Check that
7908   // it is legal.
7909   if (NeedsBswap && NeedsZext && LegalOperations &&
7910       !TLI.isOperationLegal(ISD::SHL, VT))
7911     return SDValue();
7912 
7913   // Check that a load of the wide type is both allowed and fast on the target
7914   bool Fast = false;
7915   bool Allowed =
7916       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7917                              *FirstLoad->getMemOperand(), &Fast);
7918   if (!Allowed || !Fast)
7919     return SDValue();
7920 
7921   SDValue NewLoad =
7922       DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
7923                      Chain, FirstLoad->getBasePtr(),
7924                      FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
7925 
7926   // Transfer chain users from old loads to the new load.
7927   for (LoadSDNode *L : Loads)
7928     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7929 
7930   if (!NeedsBswap)
7931     return NewLoad;
7932 
7933   SDValue ShiftedLoad =
7934       NeedsZext
7935           ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7936                         DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7937                                                    SDLoc(N), LegalOperations))
7938           : NewLoad;
7939   return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7940 }
7941 
7942 // If the target has andn, bsl, or a similar bit-select instruction,
7943 // we want to unfold masked merge, with canonical pattern of:
7944 //   |        A  |  |B|
7945 //   ((x ^ y) & m) ^ y
7946 //    |  D  |
7947 // Into:
7948 //   (x & m) | (y & ~m)
7949 // If y is a constant, m is not a 'not', and the 'andn' does not work with
7950 // immediates, we unfold into a different pattern:
7951 //   ~(~x & m) & (m | y)
7952 // If x is a constant, m is a 'not', and the 'andn' does not work with
7953 // immediates, we unfold into a different pattern:
7954 //   (x | ~m) & ~(~m & ~y)
7955 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7956 //       the very least that breaks andnpd / andnps patterns, and because those
7957 //       patterns are simplified in IR and shouldn't be created in the DAG
7958 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7959   assert(N->getOpcode() == ISD::XOR);
7960 
7961   // Don't touch 'not' (i.e. where y = -1).
7962   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7963     return SDValue();
7964 
7965   EVT VT = N->getValueType(0);
7966 
7967   // There are 3 commutable operators in the pattern,
7968   // so we have to deal with 8 possible variants of the basic pattern.
7969   SDValue X, Y, M;
7970   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7971     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7972       return false;
7973     SDValue Xor = And.getOperand(XorIdx);
7974     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7975       return false;
7976     SDValue Xor0 = Xor.getOperand(0);
7977     SDValue Xor1 = Xor.getOperand(1);
7978     // Don't touch 'not' (i.e. where y = -1).
7979     if (isAllOnesOrAllOnesSplat(Xor1))
7980       return false;
7981     if (Other == Xor0)
7982       std::swap(Xor0, Xor1);
7983     if (Other != Xor1)
7984       return false;
7985     X = Xor0;
7986     Y = Xor1;
7987     M = And.getOperand(XorIdx ? 0 : 1);
7988     return true;
7989   };
7990 
7991   SDValue N0 = N->getOperand(0);
7992   SDValue N1 = N->getOperand(1);
7993   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7994       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7995     return SDValue();
7996 
7997   // Don't do anything if the mask is constant. This should not be reachable.
7998   // InstCombine should have already unfolded this pattern, and DAGCombiner
7999   // probably shouldn't produce it, too.
8000   if (isa<ConstantSDNode>(M.getNode()))
8001     return SDValue();
8002 
8003   // We can transform if the target has AndNot
8004   if (!TLI.hasAndNot(M))
8005     return SDValue();
8006 
8007   SDLoc DL(N);
8008 
8009   // If Y is a constant, check that 'andn' works with immediates. Unless M is
8010   // a bitwise not that would already allow ANDN to be used.
8011   if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
8012     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
8013     // If not, we need to do a bit more work to make sure andn is still used.
8014     SDValue NotX = DAG.getNOT(DL, X, VT);
8015     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
8016     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
8017     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
8018     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
8019   }
8020 
8021   // If X is a constant and M is a bitwise not, check that 'andn' works with
8022   // immediates.
8023   if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
8024     assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
8025     // If not, we need to do a bit more work to make sure andn is still used.
8026     SDValue NotM = M.getOperand(0);
8027     SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
8028     SDValue NotY = DAG.getNOT(DL, Y, VT);
8029     SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
8030     SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
8031     return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
8032   }
8033 
8034   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
8035   SDValue NotM = DAG.getNOT(DL, M, VT);
8036   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
8037 
8038   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
8039 }
8040 
8041 SDValue DAGCombiner::visitXOR(SDNode *N) {
8042   SDValue N0 = N->getOperand(0);
8043   SDValue N1 = N->getOperand(1);
8044   EVT VT = N0.getValueType();
8045   SDLoc DL(N);
8046 
8047   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
8048   if (N0.isUndef() && N1.isUndef())
8049     return DAG.getConstant(0, DL, VT);
8050 
8051   // fold (xor x, undef) -> undef
8052   if (N0.isUndef())
8053     return N0;
8054   if (N1.isUndef())
8055     return N1;
8056 
8057   // fold (xor c1, c2) -> c1^c2
8058   if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
8059     return C;
8060 
8061   // canonicalize constant to RHS
8062   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
8063       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
8064     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
8065 
8066   // fold vector ops
8067   if (VT.isVector()) {
8068     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8069       return FoldedVOp;
8070 
8071     // fold (xor x, 0) -> x, vector edition
8072     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
8073       return N0;
8074   }
8075 
8076   // fold (xor x, 0) -> x
8077   if (isNullConstant(N1))
8078     return N0;
8079 
8080   if (SDValue NewSel = foldBinOpIntoSelect(N))
8081     return NewSel;
8082 
8083   // reassociate xor
8084   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
8085     return RXOR;
8086 
8087   // fold !(x cc y) -> (x !cc y)
8088   unsigned N0Opcode = N0.getOpcode();
8089   SDValue LHS, RHS, CC;
8090   if (TLI.isConstTrueVal(N1) &&
8091       isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
8092     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
8093                                                LHS.getValueType());
8094     if (!LegalOperations ||
8095         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
8096       switch (N0Opcode) {
8097       default:
8098         llvm_unreachable("Unhandled SetCC Equivalent!");
8099       case ISD::SETCC:
8100         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
8101       case ISD::SELECT_CC:
8102         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
8103                                N0.getOperand(3), NotCC);
8104       case ISD::STRICT_FSETCC:
8105       case ISD::STRICT_FSETCCS: {
8106         if (N0.hasOneUse()) {
8107           // FIXME Can we handle multiple uses? Could we token factor the chain
8108           // results from the new/old setcc?
8109           SDValue SetCC =
8110               DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
8111                            N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
8112           CombineTo(N, SetCC);
8113           DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
8114           recursivelyDeleteUnusedNodes(N0.getNode());
8115           return SDValue(N, 0); // Return N so it doesn't get rechecked!
8116         }
8117         break;
8118       }
8119       }
8120     }
8121   }
8122 
8123   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
8124   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8125       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
8126     SDValue V = N0.getOperand(0);
8127     SDLoc DL0(N0);
8128     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
8129                     DAG.getConstant(1, DL0, V.getValueType()));
8130     AddToWorklist(V.getNode());
8131     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
8132   }
8133 
8134   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
8135   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
8136       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8137     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8138     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
8139       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8140       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8141       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8142       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8143       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8144     }
8145   }
8146   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
8147   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
8148       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8149     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8150     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
8151       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8152       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8153       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8154       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8155       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8156     }
8157   }
8158 
8159   // fold (not (neg x)) -> (add X, -1)
8160   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
8161   // Y is a constant or the subtract has a single use.
8162   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
8163       isNullConstant(N0.getOperand(0))) {
8164     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
8165                        DAG.getAllOnesConstant(DL, VT));
8166   }
8167 
8168   // fold (not (add X, -1)) -> (neg X)
8169   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
8170       isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
8171     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
8172                        N0.getOperand(0));
8173   }
8174 
8175   // fold (xor (and x, y), y) -> (and (not x), y)
8176   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
8177     SDValue X = N0.getOperand(0);
8178     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
8179     AddToWorklist(NotX.getNode());
8180     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
8181   }
8182 
8183   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
8184     ConstantSDNode *XorC = isConstOrConstSplat(N1);
8185     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
8186     unsigned BitWidth = VT.getScalarSizeInBits();
8187     if (XorC && ShiftC) {
8188       // Don't crash on an oversized shift. We can not guarantee that a bogus
8189       // shift has been simplified to undef.
8190       uint64_t ShiftAmt = ShiftC->getLimitedValue();
8191       if (ShiftAmt < BitWidth) {
8192         APInt Ones = APInt::getAllOnes(BitWidth);
8193         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
8194         if (XorC->getAPIntValue() == Ones) {
8195           // If the xor constant is a shifted -1, do a 'not' before the shift:
8196           // xor (X << ShiftC), XorC --> (not X) << ShiftC
8197           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
8198           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
8199           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
8200         }
8201       }
8202     }
8203   }
8204 
8205   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
8206   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
8207     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
8208     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
8209     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
8210       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
8211       SDValue S0 = S.getOperand(0);
8212       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
8213         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
8214           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
8215             return DAG.getNode(ISD::ABS, DL, VT, S0);
8216     }
8217   }
8218 
8219   // fold (xor x, x) -> 0
8220   if (N0 == N1)
8221     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
8222 
8223   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
8224   // Here is a concrete example of this equivalence:
8225   // i16   x ==  14
8226   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
8227   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
8228   //
8229   // =>
8230   //
8231   // i16     ~1      == 0b1111111111111110
8232   // i16 rol(~1, 14) == 0b1011111111111111
8233   //
8234   // Some additional tips to help conceptualize this transform:
8235   // - Try to see the operation as placing a single zero in a value of all ones.
8236   // - There exists no value for x which would allow the result to contain zero.
8237   // - Values of x larger than the bitwidth are undefined and do not require a
8238   //   consistent result.
8239   // - Pushing the zero left requires shifting one bits in from the right.
8240   // A rotate left of ~1 is a nice way of achieving the desired result.
8241   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
8242       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
8243     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
8244                        N0.getOperand(1));
8245   }
8246 
8247   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
8248   if (N0Opcode == N1.getOpcode())
8249     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8250       return V;
8251 
8252   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
8253   if (SDValue MM = unfoldMaskedMerge(N))
8254     return MM;
8255 
8256   // Simplify the expression using non-local knowledge.
8257   if (SimplifyDemandedBits(SDValue(N, 0)))
8258     return SDValue(N, 0);
8259 
8260   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8261     return Combined;
8262 
8263   return SDValue();
8264 }
8265 
8266 /// If we have a shift-by-constant of a bitwise logic op that itself has a
8267 /// shift-by-constant operand with identical opcode, we may be able to convert
8268 /// that into 2 independent shifts followed by the logic op. This is a
8269 /// throughput improvement.
8270 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
8271   // Match a one-use bitwise logic op.
8272   SDValue LogicOp = Shift->getOperand(0);
8273   if (!LogicOp.hasOneUse())
8274     return SDValue();
8275 
8276   unsigned LogicOpcode = LogicOp.getOpcode();
8277   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
8278       LogicOpcode != ISD::XOR)
8279     return SDValue();
8280 
8281   // Find a matching one-use shift by constant.
8282   unsigned ShiftOpcode = Shift->getOpcode();
8283   SDValue C1 = Shift->getOperand(1);
8284   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
8285   assert(C1Node && "Expected a shift with constant operand");
8286   const APInt &C1Val = C1Node->getAPIntValue();
8287   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
8288                              const APInt *&ShiftAmtVal) {
8289     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
8290       return false;
8291 
8292     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
8293     if (!ShiftCNode)
8294       return false;
8295 
8296     // Capture the shifted operand and shift amount value.
8297     ShiftOp = V.getOperand(0);
8298     ShiftAmtVal = &ShiftCNode->getAPIntValue();
8299 
8300     // Shift amount types do not have to match their operand type, so check that
8301     // the constants are the same width.
8302     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
8303       return false;
8304 
8305     // The fold is not valid if the sum of the shift values exceeds bitwidth.
8306     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
8307       return false;
8308 
8309     return true;
8310   };
8311 
8312   // Logic ops are commutative, so check each operand for a match.
8313   SDValue X, Y;
8314   const APInt *C0Val;
8315   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
8316     Y = LogicOp.getOperand(1);
8317   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
8318     Y = LogicOp.getOperand(0);
8319   else
8320     return SDValue();
8321 
8322   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
8323   SDLoc DL(Shift);
8324   EVT VT = Shift->getValueType(0);
8325   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
8326   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
8327   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
8328   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
8329   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
8330 }
8331 
8332 /// Handle transforms common to the three shifts, when the shift amount is a
8333 /// constant.
8334 /// We are looking for: (shift being one of shl/sra/srl)
8335 ///   shift (binop X, C0), C1
8336 /// And want to transform into:
8337 ///   binop (shift X, C1), (shift C0, C1)
8338 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
8339   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
8340 
8341   // Do not turn a 'not' into a regular xor.
8342   if (isBitwiseNot(N->getOperand(0)))
8343     return SDValue();
8344 
8345   // The inner binop must be one-use, since we want to replace it.
8346   SDValue LHS = N->getOperand(0);
8347   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
8348     return SDValue();
8349 
8350   // TODO: This is limited to early combining because it may reveal regressions
8351   //       otherwise. But since we just checked a target hook to see if this is
8352   //       desirable, that should have filtered out cases where this interferes
8353   //       with some other pattern matching.
8354   if (!LegalTypes)
8355     if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
8356       return R;
8357 
8358   // We want to pull some binops through shifts, so that we have (and (shift))
8359   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
8360   // thing happens with address calculations, so it's important to canonicalize
8361   // it.
8362   switch (LHS.getOpcode()) {
8363   default:
8364     return SDValue();
8365   case ISD::OR:
8366   case ISD::XOR:
8367   case ISD::AND:
8368     break;
8369   case ISD::ADD:
8370     if (N->getOpcode() != ISD::SHL)
8371       return SDValue(); // only shl(add) not sr[al](add).
8372     break;
8373   }
8374 
8375   // We require the RHS of the binop to be a constant and not opaque as well.
8376   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
8377   if (!BinOpCst)
8378     return SDValue();
8379 
8380   // FIXME: disable this unless the input to the binop is a shift by a constant
8381   // or is copy/select. Enable this in other cases when figure out it's exactly
8382   // profitable.
8383   SDValue BinOpLHSVal = LHS.getOperand(0);
8384   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
8385                             BinOpLHSVal.getOpcode() == ISD::SRA ||
8386                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
8387                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
8388   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
8389                         BinOpLHSVal.getOpcode() == ISD::SELECT;
8390 
8391   if (!IsShiftByConstant && !IsCopyOrSelect)
8392     return SDValue();
8393 
8394   if (IsCopyOrSelect && N->hasOneUse())
8395     return SDValue();
8396 
8397   // Fold the constants, shifting the binop RHS by the shift amount.
8398   SDLoc DL(N);
8399   EVT VT = N->getValueType(0);
8400   SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
8401                                N->getOperand(1));
8402   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
8403 
8404   SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
8405                                  N->getOperand(1));
8406   return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
8407 }
8408 
8409 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
8410   assert(N->getOpcode() == ISD::TRUNCATE);
8411   assert(N->getOperand(0).getOpcode() == ISD::AND);
8412 
8413   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
8414   EVT TruncVT = N->getValueType(0);
8415   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
8416       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
8417     SDValue N01 = N->getOperand(0).getOperand(1);
8418     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
8419       SDLoc DL(N);
8420       SDValue N00 = N->getOperand(0).getOperand(0);
8421       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
8422       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
8423       AddToWorklist(Trunc00.getNode());
8424       AddToWorklist(Trunc01.getNode());
8425       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
8426     }
8427   }
8428 
8429   return SDValue();
8430 }
8431 
8432 SDValue DAGCombiner::visitRotate(SDNode *N) {
8433   SDLoc dl(N);
8434   SDValue N0 = N->getOperand(0);
8435   SDValue N1 = N->getOperand(1);
8436   EVT VT = N->getValueType(0);
8437   unsigned Bitsize = VT.getScalarSizeInBits();
8438 
8439   // fold (rot x, 0) -> x
8440   if (isNullOrNullSplat(N1))
8441     return N0;
8442 
8443   // fold (rot x, c) -> x iff (c % BitSize) == 0
8444   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
8445     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
8446     if (DAG.MaskedValueIsZero(N1, ModuloMask))
8447       return N0;
8448   }
8449 
8450   // fold (rot x, c) -> (rot x, c % BitSize)
8451   bool OutOfRange = false;
8452   auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
8453     OutOfRange |= C->getAPIntValue().uge(Bitsize);
8454     return true;
8455   };
8456   if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
8457     EVT AmtVT = N1.getValueType();
8458     SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
8459     if (SDValue Amt =
8460             DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
8461       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
8462   }
8463 
8464   // rot i16 X, 8 --> bswap X
8465   auto *RotAmtC = isConstOrConstSplat(N1);
8466   if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
8467       VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
8468     return DAG.getNode(ISD::BSWAP, dl, VT, N0);
8469 
8470   // Simplify the operands using demanded-bits information.
8471   if (SimplifyDemandedBits(SDValue(N, 0)))
8472     return SDValue(N, 0);
8473 
8474   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
8475   if (N1.getOpcode() == ISD::TRUNCATE &&
8476       N1.getOperand(0).getOpcode() == ISD::AND) {
8477     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8478       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
8479   }
8480 
8481   unsigned NextOp = N0.getOpcode();
8482   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
8483   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
8484     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
8485     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
8486     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
8487       EVT ShiftVT = C1->getValueType(0);
8488       bool SameSide = (N->getOpcode() == NextOp);
8489       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
8490       if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
8491               CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
8492         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
8493         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
8494             ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
8495         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8496                            CombinedShiftNorm);
8497       }
8498     }
8499   }
8500   return SDValue();
8501 }
8502 
8503 SDValue DAGCombiner::visitSHL(SDNode *N) {
8504   SDValue N0 = N->getOperand(0);
8505   SDValue N1 = N->getOperand(1);
8506   if (SDValue V = DAG.simplifyShift(N0, N1))
8507     return V;
8508 
8509   EVT VT = N0.getValueType();
8510   EVT ShiftVT = N1.getValueType();
8511   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8512 
8513   // fold (shl c1, c2) -> c1<<c2
8514   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8515     return C;
8516 
8517   // fold vector ops
8518   if (VT.isVector()) {
8519     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
8520       return FoldedVOp;
8521 
8522     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
8523     // If setcc produces all-one true value then:
8524     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8525     if (N1CV && N1CV->isConstant()) {
8526       if (N0.getOpcode() == ISD::AND) {
8527         SDValue N00 = N0->getOperand(0);
8528         SDValue N01 = N0->getOperand(1);
8529         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
8530 
8531         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8532             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8533                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
8534           if (SDValue C =
8535                   DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
8536             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8537         }
8538       }
8539     }
8540   }
8541 
8542   if (SDValue NewSel = foldBinOpIntoSelect(N))
8543     return NewSel;
8544 
8545   // if (shl x, c) is known to be zero, return 0
8546   if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
8547     return DAG.getConstant(0, SDLoc(N), VT);
8548 
8549   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8550   if (N1.getOpcode() == ISD::TRUNCATE &&
8551       N1.getOperand(0).getOpcode() == ISD::AND) {
8552     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8553       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8554   }
8555 
8556   if (SimplifyDemandedBits(SDValue(N, 0)))
8557     return SDValue(N, 0);
8558 
8559   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8560   if (N0.getOpcode() == ISD::SHL) {
8561     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8562                                           ConstantSDNode *RHS) {
8563       APInt c1 = LHS->getAPIntValue();
8564       APInt c2 = RHS->getAPIntValue();
8565       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8566       return (c1 + c2).uge(OpSizeInBits);
8567     };
8568     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8569       return DAG.getConstant(0, SDLoc(N), VT);
8570 
8571     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8572                                        ConstantSDNode *RHS) {
8573       APInt c1 = LHS->getAPIntValue();
8574       APInt c2 = RHS->getAPIntValue();
8575       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8576       return (c1 + c2).ult(OpSizeInBits);
8577     };
8578     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8579       SDLoc DL(N);
8580       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8581       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8582     }
8583   }
8584 
8585   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8586   // For this to be valid, the second form must not preserve any of the bits
8587   // that are shifted out by the inner shift in the first form.  This means
8588   // the outer shift size must be >= the number of bits added by the ext.
8589   // As a corollary, we don't care what kind of ext it is.
8590   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
8591        N0.getOpcode() == ISD::ANY_EXTEND ||
8592        N0.getOpcode() == ISD::SIGN_EXTEND) &&
8593       N0.getOperand(0).getOpcode() == ISD::SHL) {
8594     SDValue N0Op0 = N0.getOperand(0);
8595     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8596     EVT InnerVT = N0Op0.getValueType();
8597     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8598 
8599     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8600                                                          ConstantSDNode *RHS) {
8601       APInt c1 = LHS->getAPIntValue();
8602       APInt c2 = RHS->getAPIntValue();
8603       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8604       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8605              (c1 + c2).uge(OpSizeInBits);
8606     };
8607     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8608                                   /*AllowUndefs*/ false,
8609                                   /*AllowTypeMismatch*/ true))
8610       return DAG.getConstant(0, SDLoc(N), VT);
8611 
8612     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8613                                                       ConstantSDNode *RHS) {
8614       APInt c1 = LHS->getAPIntValue();
8615       APInt c2 = RHS->getAPIntValue();
8616       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8617       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8618              (c1 + c2).ult(OpSizeInBits);
8619     };
8620     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8621                                   /*AllowUndefs*/ false,
8622                                   /*AllowTypeMismatch*/ true)) {
8623       SDLoc DL(N);
8624       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8625       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8626       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8627       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8628     }
8629   }
8630 
8631   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8632   // Only fold this if the inner zext has no other uses to avoid increasing
8633   // the total number of instructions.
8634   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8635       N0.getOperand(0).getOpcode() == ISD::SRL) {
8636     SDValue N0Op0 = N0.getOperand(0);
8637     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8638 
8639     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8640       APInt c1 = LHS->getAPIntValue();
8641       APInt c2 = RHS->getAPIntValue();
8642       zeroExtendToMatch(c1, c2);
8643       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8644     };
8645     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8646                                   /*AllowUndefs*/ false,
8647                                   /*AllowTypeMismatch*/ true)) {
8648       SDLoc DL(N);
8649       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8650       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
8651       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8652       AddToWorklist(NewSHL.getNode());
8653       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8654     }
8655   }
8656 
8657   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
8658   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
8659   // TODO - support non-uniform vector shift amounts.
8660   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8661   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
8662       N0->getFlags().hasExact()) {
8663     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8664       uint64_t C1 = N0C1->getZExtValue();
8665       uint64_t C2 = N1C->getZExtValue();
8666       SDLoc DL(N);
8667       if (C1 <= C2)
8668         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8669                            DAG.getConstant(C2 - C1, DL, ShiftVT));
8670       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
8671                          DAG.getConstant(C1 - C2, DL, ShiftVT));
8672     }
8673   }
8674 
8675   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8676   //                               (and (srl x, (sub c1, c2), MASK)
8677   // Only fold this if the inner shift has no other uses -- if it does, folding
8678   // this will increase the total number of instructions.
8679   // TODO - drop hasOneUse requirement if c1 == c2?
8680   // TODO - support non-uniform vector shift amounts.
8681   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
8682       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
8683     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8684       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
8685         uint64_t c1 = N0C1->getZExtValue();
8686         uint64_t c2 = N1C->getZExtValue();
8687         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
8688         SDValue Shift;
8689         if (c2 > c1) {
8690           Mask <<= c2 - c1;
8691           SDLoc DL(N);
8692           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8693                               DAG.getConstant(c2 - c1, DL, ShiftVT));
8694         } else {
8695           Mask.lshrInPlace(c1 - c2);
8696           SDLoc DL(N);
8697           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
8698                               DAG.getConstant(c1 - c2, DL, ShiftVT));
8699         }
8700         SDLoc DL(N0);
8701         return DAG.getNode(ISD::AND, DL, VT, Shift,
8702                            DAG.getConstant(Mask, DL, VT));
8703       }
8704     }
8705   }
8706 
8707   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
8708   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
8709       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
8710     SDLoc DL(N);
8711     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
8712     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
8713     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
8714   }
8715 
8716   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8717   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8718   // Variant of version done on multiply, except mul by a power of 2 is turned
8719   // into a shift.
8720   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
8721       N0.getNode()->hasOneUse() &&
8722       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8723       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
8724       TLI.isDesirableToCommuteWithShift(N, Level)) {
8725     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
8726     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8727     AddToWorklist(Shl0.getNode());
8728     AddToWorklist(Shl1.getNode());
8729     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
8730   }
8731 
8732   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
8733   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
8734       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8735       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
8736     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8737     if (isConstantOrConstantVector(Shl))
8738       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
8739   }
8740 
8741   if (N1C && !N1C->isOpaque())
8742     if (SDValue NewSHL = visitShiftByConstant(N))
8743       return NewSHL;
8744 
8745   // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
8746   if (N0.getOpcode() == ISD::VSCALE)
8747     if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
8748       const APInt &C0 = N0.getConstantOperandAPInt(0);
8749       const APInt &C1 = NC1->getAPIntValue();
8750       return DAG.getVScale(SDLoc(N), VT, C0 << C1);
8751     }
8752 
8753   // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
8754   APInt ShlVal;
8755   if (N0.getOpcode() == ISD::STEP_VECTOR)
8756     if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
8757       const APInt &C0 = N0.getConstantOperandAPInt(0);
8758       if (ShlVal.ult(C0.getBitWidth())) {
8759         APInt NewStep = C0 << ShlVal;
8760         return DAG.getStepVector(SDLoc(N), VT, NewStep);
8761       }
8762     }
8763 
8764   return SDValue();
8765 }
8766 
8767 // Transform a right shift of a multiply into a multiply-high.
8768 // Examples:
8769 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
8770 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
8771 static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
8772                                   const TargetLowering &TLI) {
8773   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
8774          "SRL or SRA node is required here!");
8775 
8776   // Check the shift amount. Proceed with the transformation if the shift
8777   // amount is constant.
8778   ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8779   if (!ShiftAmtSrc)
8780     return SDValue();
8781 
8782   SDLoc DL(N);
8783 
8784   // The operation feeding into the shift must be a multiply.
8785   SDValue ShiftOperand = N->getOperand(0);
8786   if (ShiftOperand.getOpcode() != ISD::MUL)
8787     return SDValue();
8788 
8789   // Both operands must be equivalent extend nodes.
8790   SDValue LeftOp = ShiftOperand.getOperand(0);
8791   SDValue RightOp = ShiftOperand.getOperand(1);
8792 
8793   bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8794   bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8795 
8796   if (!IsSignExt && !IsZeroExt)
8797     return SDValue();
8798 
8799   EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8800   unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8801 
8802   SDValue MulhRightOp;
8803   if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
8804     unsigned ActiveBits = IsSignExt
8805                               ? Constant->getAPIntValue().getMinSignedBits()
8806                               : Constant->getAPIntValue().getActiveBits();
8807     if (ActiveBits > NarrowVTSize)
8808       return SDValue();
8809     MulhRightOp = DAG.getConstant(
8810         Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
8811         NarrowVT);
8812   } else {
8813     if (LeftOp.getOpcode() != RightOp.getOpcode())
8814       return SDValue();
8815     // Check that the two extend nodes are the same type.
8816     if (NarrowVT != RightOp.getOperand(0).getValueType())
8817       return SDValue();
8818     MulhRightOp = RightOp.getOperand(0);
8819   }
8820 
8821   EVT WideVT = LeftOp.getValueType();
8822   // Proceed with the transformation if the wide types match.
8823   assert((WideVT == RightOp.getValueType()) &&
8824          "Cannot have a multiply node with two different operand types.");
8825 
8826   // Proceed with the transformation if the wide type is twice as large
8827   // as the narrow type.
8828   if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
8829     return SDValue();
8830 
8831   // Check the shift amount with the narrow type size.
8832   // Proceed with the transformation if the shift amount is the width
8833   // of the narrow type.
8834   unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8835   if (ShiftAmt != NarrowVTSize)
8836     return SDValue();
8837 
8838   // If the operation feeding into the MUL is a sign extend (sext),
8839   // we use mulhs. Othewise, zero extends (zext) use mulhu.
8840   unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8841 
8842   // Combine to mulh if mulh is legal/custom for the narrow type on the target.
8843   if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
8844     return SDValue();
8845 
8846   SDValue Result =
8847       DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
8848   return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
8849                                      : DAG.getZExtOrTrunc(Result, DL, WideVT));
8850 }
8851 
8852 SDValue DAGCombiner::visitSRA(SDNode *N) {
8853   SDValue N0 = N->getOperand(0);
8854   SDValue N1 = N->getOperand(1);
8855   if (SDValue V = DAG.simplifyShift(N0, N1))
8856     return V;
8857 
8858   EVT VT = N0.getValueType();
8859   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8860 
8861   // fold (sra c1, c2) -> (sra c1, c2)
8862   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8863     return C;
8864 
8865   // Arithmetic shifting an all-sign-bit value is a no-op.
8866   // fold (sra 0, x) -> 0
8867   // fold (sra -1, x) -> -1
8868   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8869     return N0;
8870 
8871   // fold vector ops
8872   if (VT.isVector())
8873     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
8874       return FoldedVOp;
8875 
8876   if (SDValue NewSel = foldBinOpIntoSelect(N))
8877     return NewSel;
8878 
8879   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8880   // sext_inreg.
8881   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8882   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8883     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8884     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8885     if (VT.isVector())
8886       ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
8887                                VT.getVectorElementCount());
8888     if (!LegalOperations ||
8889         TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
8890         TargetLowering::Legal)
8891       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8892                          N0.getOperand(0), DAG.getValueType(ExtVT));
8893     // Even if we can't convert to sext_inreg, we might be able to remove
8894     // this shift pair if the input is already sign extended.
8895     if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
8896       return N0.getOperand(0);
8897   }
8898 
8899   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8900   // clamp (add c1, c2) to max shift.
8901   if (N0.getOpcode() == ISD::SRA) {
8902     SDLoc DL(N);
8903     EVT ShiftVT = N1.getValueType();
8904     EVT ShiftSVT = ShiftVT.getScalarType();
8905     SmallVector<SDValue, 16> ShiftValues;
8906 
8907     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8908       APInt c1 = LHS->getAPIntValue();
8909       APInt c2 = RHS->getAPIntValue();
8910       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8911       APInt Sum = c1 + c2;
8912       unsigned ShiftSum =
8913           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8914       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8915       return true;
8916     };
8917     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8918       SDValue ShiftValue;
8919       if (N1.getOpcode() == ISD::BUILD_VECTOR)
8920         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8921       else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
8922         assert(ShiftValues.size() == 1 &&
8923                "Expected matchBinaryPredicate to return one element for "
8924                "SPLAT_VECTORs");
8925         ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
8926       } else
8927         ShiftValue = ShiftValues[0];
8928       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8929     }
8930   }
8931 
8932   // fold (sra (shl X, m), (sub result_size, n))
8933   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8934   // result_size - n != m.
8935   // If truncate is free for the target sext(shl) is likely to result in better
8936   // code.
8937   if (N0.getOpcode() == ISD::SHL && N1C) {
8938     // Get the two constanst of the shifts, CN0 = m, CN = n.
8939     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8940     if (N01C) {
8941       LLVMContext &Ctx = *DAG.getContext();
8942       // Determine what the truncate's result bitsize and type would be.
8943       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8944 
8945       if (VT.isVector())
8946         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8947 
8948       // Determine the residual right-shift amount.
8949       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8950 
8951       // If the shift is not a no-op (in which case this should be just a sign
8952       // extend already), the truncated to type is legal, sign_extend is legal
8953       // on that type, and the truncate to that type is both legal and free,
8954       // perform the transform.
8955       if ((ShiftAmt > 0) &&
8956           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
8957           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
8958           TLI.isTruncateFree(VT, TruncVT)) {
8959         SDLoc DL(N);
8960         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8961             getShiftAmountTy(N0.getOperand(0).getValueType()));
8962         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8963                                     N0.getOperand(0), Amt);
8964         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8965                                     Shift);
8966         return DAG.getNode(ISD::SIGN_EXTEND, DL,
8967                            N->getValueType(0), Trunc);
8968       }
8969     }
8970   }
8971 
8972   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8973   //   sra (add (shl X, N1C), AddC), N1C -->
8974   //   sext (add (trunc X to (width - N1C)), AddC')
8975   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8976       N0.getOperand(0).getOpcode() == ISD::SHL &&
8977       N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8978     if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8979       SDValue Shl = N0.getOperand(0);
8980       // Determine what the truncate's type would be and ask the target if that
8981       // is a free operation.
8982       LLVMContext &Ctx = *DAG.getContext();
8983       unsigned ShiftAmt = N1C->getZExtValue();
8984       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8985       if (VT.isVector())
8986         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8987 
8988       // TODO: The simple type check probably belongs in the default hook
8989       //       implementation and/or target-specific overrides (because
8990       //       non-simple types likely require masking when legalized), but that
8991       //       restriction may conflict with other transforms.
8992       if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8993           TLI.isTruncateFree(VT, TruncVT)) {
8994         SDLoc DL(N);
8995         SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8996         SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8997                              trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8998         SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8999         return DAG.getSExtOrTrunc(Add, DL, VT);
9000       }
9001     }
9002   }
9003 
9004   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
9005   if (N1.getOpcode() == ISD::TRUNCATE &&
9006       N1.getOperand(0).getOpcode() == ISD::AND) {
9007     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9008       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
9009   }
9010 
9011   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
9012   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
9013   //      if c1 is equal to the number of bits the trunc removes
9014   // TODO - support non-uniform vector shift amounts.
9015   if (N0.getOpcode() == ISD::TRUNCATE &&
9016       (N0.getOperand(0).getOpcode() == ISD::SRL ||
9017        N0.getOperand(0).getOpcode() == ISD::SRA) &&
9018       N0.getOperand(0).hasOneUse() &&
9019       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
9020     SDValue N0Op0 = N0.getOperand(0);
9021     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
9022       EVT LargeVT = N0Op0.getValueType();
9023       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
9024       if (LargeShift->getAPIntValue() == TruncBits) {
9025         SDLoc DL(N);
9026         SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
9027                                       getShiftAmountTy(LargeVT));
9028         SDValue SRA =
9029             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
9030         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
9031       }
9032     }
9033   }
9034 
9035   // Simplify, based on bits shifted out of the LHS.
9036   if (SimplifyDemandedBits(SDValue(N, 0)))
9037     return SDValue(N, 0);
9038 
9039   // If the sign bit is known to be zero, switch this to a SRL.
9040   if (DAG.SignBitIsZero(N0))
9041     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
9042 
9043   if (N1C && !N1C->isOpaque())
9044     if (SDValue NewSRA = visitShiftByConstant(N))
9045       return NewSRA;
9046 
9047   // Try to transform this shift into a multiply-high if
9048   // it matches the appropriate pattern detected in combineShiftToMULH.
9049   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
9050     return MULH;
9051 
9052   // Attempt to convert a sra of a load into a narrower sign-extending load.
9053   if (SDValue NarrowLoad = reduceLoadWidth(N))
9054     return NarrowLoad;
9055 
9056   return SDValue();
9057 }
9058 
9059 SDValue DAGCombiner::visitSRL(SDNode *N) {
9060   SDValue N0 = N->getOperand(0);
9061   SDValue N1 = N->getOperand(1);
9062   if (SDValue V = DAG.simplifyShift(N0, N1))
9063     return V;
9064 
9065   EVT VT = N0.getValueType();
9066   unsigned OpSizeInBits = VT.getScalarSizeInBits();
9067 
9068   // fold (srl c1, c2) -> c1 >>u c2
9069   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
9070     return C;
9071 
9072   // fold vector ops
9073   if (VT.isVector())
9074     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
9075       return FoldedVOp;
9076 
9077   if (SDValue NewSel = foldBinOpIntoSelect(N))
9078     return NewSel;
9079 
9080   // if (srl x, c) is known to be zero, return 0
9081   ConstantSDNode *N1C = isConstOrConstSplat(N1);
9082   if (N1C &&
9083       DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9084     return DAG.getConstant(0, SDLoc(N), VT);
9085 
9086   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
9087   if (N0.getOpcode() == ISD::SRL) {
9088     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9089                                           ConstantSDNode *RHS) {
9090       APInt c1 = LHS->getAPIntValue();
9091       APInt c2 = RHS->getAPIntValue();
9092       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9093       return (c1 + c2).uge(OpSizeInBits);
9094     };
9095     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9096       return DAG.getConstant(0, SDLoc(N), VT);
9097 
9098     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9099                                        ConstantSDNode *RHS) {
9100       APInt c1 = LHS->getAPIntValue();
9101       APInt c2 = RHS->getAPIntValue();
9102       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9103       return (c1 + c2).ult(OpSizeInBits);
9104     };
9105     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9106       SDLoc DL(N);
9107       EVT ShiftVT = N1.getValueType();
9108       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9109       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
9110     }
9111   }
9112 
9113   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
9114       N0.getOperand(0).getOpcode() == ISD::SRL) {
9115     SDValue InnerShift = N0.getOperand(0);
9116     // TODO - support non-uniform vector shift amounts.
9117     if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
9118       uint64_t c1 = N001C->getZExtValue();
9119       uint64_t c2 = N1C->getZExtValue();
9120       EVT InnerShiftVT = InnerShift.getValueType();
9121       EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
9122       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
9123       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
9124       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
9125       if (c1 + OpSizeInBits == InnerShiftSize) {
9126         SDLoc DL(N);
9127         if (c1 + c2 >= InnerShiftSize)
9128           return DAG.getConstant(0, DL, VT);
9129         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9130         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
9131                                        InnerShift.getOperand(0), NewShiftAmt);
9132         return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
9133       }
9134       // In the more general case, we can clear the high bits after the shift:
9135       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
9136       if (N0.hasOneUse() && InnerShift.hasOneUse() &&
9137           c1 + c2 < InnerShiftSize) {
9138         SDLoc DL(N);
9139         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9140         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
9141                                        InnerShift.getOperand(0), NewShiftAmt);
9142         SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
9143                                                             OpSizeInBits - c2),
9144                                        DL, InnerShiftVT);
9145         SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
9146         return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
9147       }
9148     }
9149   }
9150 
9151   // fold (srl (shl x, c), c) -> (and x, cst2)
9152   // TODO - (srl (shl x, c1), c2).
9153   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
9154       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
9155     SDLoc DL(N);
9156     SDValue Mask =
9157         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
9158     AddToWorklist(Mask.getNode());
9159     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
9160   }
9161 
9162   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
9163   // TODO - support non-uniform vector shift amounts.
9164   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
9165     // Shifting in all undef bits?
9166     EVT SmallVT = N0.getOperand(0).getValueType();
9167     unsigned BitSize = SmallVT.getScalarSizeInBits();
9168     if (N1C->getAPIntValue().uge(BitSize))
9169       return DAG.getUNDEF(VT);
9170 
9171     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
9172       uint64_t ShiftAmt = N1C->getZExtValue();
9173       SDLoc DL0(N0);
9174       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
9175                                        N0.getOperand(0),
9176                           DAG.getConstant(ShiftAmt, DL0,
9177                                           getShiftAmountTy(SmallVT)));
9178       AddToWorklist(SmallShift.getNode());
9179       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
9180       SDLoc DL(N);
9181       return DAG.getNode(ISD::AND, DL, VT,
9182                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
9183                          DAG.getConstant(Mask, DL, VT));
9184     }
9185   }
9186 
9187   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
9188   // bit, which is unmodified by sra.
9189   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
9190     if (N0.getOpcode() == ISD::SRA)
9191       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
9192   }
9193 
9194   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
9195   if (N1C && N0.getOpcode() == ISD::CTLZ &&
9196       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
9197     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
9198 
9199     // If any of the input bits are KnownOne, then the input couldn't be all
9200     // zeros, thus the result of the srl will always be zero.
9201     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
9202 
9203     // If all of the bits input the to ctlz node are known to be zero, then
9204     // the result of the ctlz is "32" and the result of the shift is one.
9205     APInt UnknownBits = ~Known.Zero;
9206     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
9207 
9208     // Otherwise, check to see if there is exactly one bit input to the ctlz.
9209     if (UnknownBits.isPowerOf2()) {
9210       // Okay, we know that only that the single bit specified by UnknownBits
9211       // could be set on input to the CTLZ node. If this bit is set, the SRL
9212       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
9213       // to an SRL/XOR pair, which is likely to simplify more.
9214       unsigned ShAmt = UnknownBits.countTrailingZeros();
9215       SDValue Op = N0.getOperand(0);
9216 
9217       if (ShAmt) {
9218         SDLoc DL(N0);
9219         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
9220                   DAG.getConstant(ShAmt, DL,
9221                                   getShiftAmountTy(Op.getValueType())));
9222         AddToWorklist(Op.getNode());
9223       }
9224 
9225       SDLoc DL(N);
9226       return DAG.getNode(ISD::XOR, DL, VT,
9227                          Op, DAG.getConstant(1, DL, VT));
9228     }
9229   }
9230 
9231   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
9232   if (N1.getOpcode() == ISD::TRUNCATE &&
9233       N1.getOperand(0).getOpcode() == ISD::AND) {
9234     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9235       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
9236   }
9237 
9238   // fold operands of srl based on knowledge that the low bits are not
9239   // demanded.
9240   if (SimplifyDemandedBits(SDValue(N, 0)))
9241     return SDValue(N, 0);
9242 
9243   if (N1C && !N1C->isOpaque())
9244     if (SDValue NewSRL = visitShiftByConstant(N))
9245       return NewSRL;
9246 
9247   // Attempt to convert a srl of a load into a narrower zero-extending load.
9248   if (SDValue NarrowLoad = reduceLoadWidth(N))
9249     return NarrowLoad;
9250 
9251   // Here is a common situation. We want to optimize:
9252   //
9253   //   %a = ...
9254   //   %b = and i32 %a, 2
9255   //   %c = srl i32 %b, 1
9256   //   brcond i32 %c ...
9257   //
9258   // into
9259   //
9260   //   %a = ...
9261   //   %b = and %a, 2
9262   //   %c = setcc eq %b, 0
9263   //   brcond %c ...
9264   //
9265   // However when after the source operand of SRL is optimized into AND, the SRL
9266   // itself may not be optimized further. Look for it and add the BRCOND into
9267   // the worklist.
9268   if (N->hasOneUse()) {
9269     SDNode *Use = *N->use_begin();
9270     if (Use->getOpcode() == ISD::BRCOND)
9271       AddToWorklist(Use);
9272     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
9273       // Also look pass the truncate.
9274       Use = *Use->use_begin();
9275       if (Use->getOpcode() == ISD::BRCOND)
9276         AddToWorklist(Use);
9277     }
9278   }
9279 
9280   // Try to transform this shift into a multiply-high if
9281   // it matches the appropriate pattern detected in combineShiftToMULH.
9282   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
9283     return MULH;
9284 
9285   return SDValue();
9286 }
9287 
9288 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
9289   EVT VT = N->getValueType(0);
9290   SDValue N0 = N->getOperand(0);
9291   SDValue N1 = N->getOperand(1);
9292   SDValue N2 = N->getOperand(2);
9293   bool IsFSHL = N->getOpcode() == ISD::FSHL;
9294   unsigned BitWidth = VT.getScalarSizeInBits();
9295 
9296   // fold (fshl N0, N1, 0) -> N0
9297   // fold (fshr N0, N1, 0) -> N1
9298   if (isPowerOf2_32(BitWidth))
9299     if (DAG.MaskedValueIsZero(
9300             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
9301       return IsFSHL ? N0 : N1;
9302 
9303   auto IsUndefOrZero = [](SDValue V) {
9304     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
9305   };
9306 
9307   // TODO - support non-uniform vector shift amounts.
9308   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
9309     EVT ShAmtTy = N2.getValueType();
9310 
9311     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
9312     if (Cst->getAPIntValue().uge(BitWidth)) {
9313       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
9314       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
9315                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
9316     }
9317 
9318     unsigned ShAmt = Cst->getZExtValue();
9319     if (ShAmt == 0)
9320       return IsFSHL ? N0 : N1;
9321 
9322     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
9323     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
9324     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
9325     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
9326     if (IsUndefOrZero(N0))
9327       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
9328                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
9329                                          SDLoc(N), ShAmtTy));
9330     if (IsUndefOrZero(N1))
9331       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
9332                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
9333                                          SDLoc(N), ShAmtTy));
9334 
9335     // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9336     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9337     // TODO - bigendian support once we have test coverage.
9338     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
9339     // TODO - permit LHS EXTLOAD if extensions are shifted out.
9340     if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
9341         !DAG.getDataLayout().isBigEndian()) {
9342       auto *LHS = dyn_cast<LoadSDNode>(N0);
9343       auto *RHS = dyn_cast<LoadSDNode>(N1);
9344       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
9345           LHS->getAddressSpace() == RHS->getAddressSpace() &&
9346           (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
9347           ISD::isNON_EXTLoad(LHS)) {
9348         if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
9349           SDLoc DL(RHS);
9350           uint64_t PtrOff =
9351               IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
9352           Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
9353           bool Fast = false;
9354           if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9355                                      RHS->getAddressSpace(), NewAlign,
9356                                      RHS->getMemOperand()->getFlags(), &Fast) &&
9357               Fast) {
9358             SDValue NewPtr = DAG.getMemBasePlusOffset(
9359                 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
9360             AddToWorklist(NewPtr.getNode());
9361             SDValue Load = DAG.getLoad(
9362                 VT, DL, RHS->getChain(), NewPtr,
9363                 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9364                 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
9365             // Replace the old load's chain with the new load's chain.
9366             WorklistRemover DeadNodes(*this);
9367             DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
9368             return Load;
9369           }
9370         }
9371       }
9372     }
9373   }
9374 
9375   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
9376   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
9377   // iff We know the shift amount is in range.
9378   // TODO: when is it worth doing SUB(BW, N2) as well?
9379   if (isPowerOf2_32(BitWidth)) {
9380     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
9381     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9382       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
9383     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9384       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
9385   }
9386 
9387   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
9388   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
9389   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
9390   // is legal as well we might be better off avoiding non-constant (BW - N2).
9391   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
9392   if (N0 == N1 && hasOperation(RotOpc, VT))
9393     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
9394 
9395   // Simplify, based on bits shifted out of N0/N1.
9396   if (SimplifyDemandedBits(SDValue(N, 0)))
9397     return SDValue(N, 0);
9398 
9399   return SDValue();
9400 }
9401 
9402 SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
9403   SDValue N0 = N->getOperand(0);
9404   SDValue N1 = N->getOperand(1);
9405   if (SDValue V = DAG.simplifyShift(N0, N1))
9406     return V;
9407 
9408   EVT VT = N0.getValueType();
9409 
9410   // fold (*shlsat c1, c2) -> c1<<c2
9411   if (SDValue C =
9412           DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
9413     return C;
9414 
9415   return SDValue();
9416 }
9417 
9418 // Given a ABS node, detect the following pattern:
9419 // (ABS (SUB (EXTEND a), (EXTEND b))).
9420 // Generates UABD/SABD instruction.
9421 static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
9422                                const TargetLowering &TLI) {
9423   SDValue AbsOp1 = N->getOperand(0);
9424   SDValue Op0, Op1;
9425 
9426   if (AbsOp1.getOpcode() != ISD::SUB)
9427     return SDValue();
9428 
9429   Op0 = AbsOp1.getOperand(0);
9430   Op1 = AbsOp1.getOperand(1);
9431 
9432   unsigned Opc0 = Op0.getOpcode();
9433   // Check if the operands of the sub are (zero|sign)-extended.
9434   if (Opc0 != Op1.getOpcode() ||
9435       (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
9436     return SDValue();
9437 
9438   EVT VT1 = Op0.getOperand(0).getValueType();
9439   EVT VT2 = Op1.getOperand(0).getValueType();
9440   // Check if the operands are of same type and valid size.
9441   unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
9442   if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1))
9443     return SDValue();
9444 
9445   Op0 = Op0.getOperand(0);
9446   Op1 = Op1.getOperand(0);
9447   SDValue ABD =
9448       DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
9449   return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
9450 }
9451 
9452 SDValue DAGCombiner::visitABS(SDNode *N) {
9453   SDValue N0 = N->getOperand(0);
9454   EVT VT = N->getValueType(0);
9455 
9456   // fold (abs c1) -> c2
9457   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9458     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
9459   // fold (abs (abs x)) -> (abs x)
9460   if (N0.getOpcode() == ISD::ABS)
9461     return N0;
9462   // fold (abs x) -> x iff not-negative
9463   if (DAG.SignBitIsZero(N0))
9464     return N0;
9465 
9466   if (SDValue ABD = combineABSToABD(N, DAG, TLI))
9467     return ABD;
9468 
9469   return SDValue();
9470 }
9471 
9472 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
9473   SDValue N0 = N->getOperand(0);
9474   EVT VT = N->getValueType(0);
9475 
9476   // fold (bswap c1) -> c2
9477   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9478     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
9479   // fold (bswap (bswap x)) -> x
9480   if (N0.getOpcode() == ISD::BSWAP)
9481     return N0->getOperand(0);
9482 
9483   // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
9484   // isn't supported, it will be expanded to bswap followed by a manual reversal
9485   // of bits in each byte. By placing bswaps before bitreverse, we can remove
9486   // the two bswaps if the bitreverse gets expanded.
9487   if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
9488     SDLoc DL(N);
9489     SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
9490     return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
9491   }
9492 
9493   return SDValue();
9494 }
9495 
9496 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
9497   SDValue N0 = N->getOperand(0);
9498   EVT VT = N->getValueType(0);
9499 
9500   // fold (bitreverse c1) -> c2
9501   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9502     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
9503   // fold (bitreverse (bitreverse x)) -> x
9504   if (N0.getOpcode() == ISD::BITREVERSE)
9505     return N0.getOperand(0);
9506   return SDValue();
9507 }
9508 
9509 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
9510   SDValue N0 = N->getOperand(0);
9511   EVT VT = N->getValueType(0);
9512 
9513   // fold (ctlz c1) -> c2
9514   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9515     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
9516 
9517   // If the value is known never to be zero, switch to the undef version.
9518   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
9519     if (DAG.isKnownNeverZero(N0))
9520       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9521   }
9522 
9523   return SDValue();
9524 }
9525 
9526 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
9527   SDValue N0 = N->getOperand(0);
9528   EVT VT = N->getValueType(0);
9529 
9530   // fold (ctlz_zero_undef c1) -> c2
9531   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9532     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9533   return SDValue();
9534 }
9535 
9536 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
9537   SDValue N0 = N->getOperand(0);
9538   EVT VT = N->getValueType(0);
9539 
9540   // fold (cttz c1) -> c2
9541   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9542     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
9543 
9544   // If the value is known never to be zero, switch to the undef version.
9545   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
9546     if (DAG.isKnownNeverZero(N0))
9547       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9548   }
9549 
9550   return SDValue();
9551 }
9552 
9553 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
9554   SDValue N0 = N->getOperand(0);
9555   EVT VT = N->getValueType(0);
9556 
9557   // fold (cttz_zero_undef c1) -> c2
9558   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9559     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9560   return SDValue();
9561 }
9562 
9563 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
9564   SDValue N0 = N->getOperand(0);
9565   EVT VT = N->getValueType(0);
9566 
9567   // fold (ctpop c1) -> c2
9568   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9569     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
9570   return SDValue();
9571 }
9572 
9573 // FIXME: This should be checking for no signed zeros on individual operands, as
9574 // well as no nans.
9575 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
9576                                          SDValue RHS,
9577                                          const TargetLowering &TLI) {
9578   const TargetOptions &Options = DAG.getTarget().Options;
9579   EVT VT = LHS.getValueType();
9580 
9581   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
9582          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
9583          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
9584 }
9585 
9586 /// Generate Min/Max node
9587 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
9588                                    SDValue RHS, SDValue True, SDValue False,
9589                                    ISD::CondCode CC, const TargetLowering &TLI,
9590                                    SelectionDAG &DAG) {
9591   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
9592     return SDValue();
9593 
9594   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
9595   switch (CC) {
9596   case ISD::SETOLT:
9597   case ISD::SETOLE:
9598   case ISD::SETLT:
9599   case ISD::SETLE:
9600   case ISD::SETULT:
9601   case ISD::SETULE: {
9602     // Since it's known never nan to get here already, either fminnum or
9603     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
9604     // expanded in terms of it.
9605     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9606     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9607       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9608 
9609     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
9610     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9611       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9612     return SDValue();
9613   }
9614   case ISD::SETOGT:
9615   case ISD::SETOGE:
9616   case ISD::SETGT:
9617   case ISD::SETGE:
9618   case ISD::SETUGT:
9619   case ISD::SETUGE: {
9620     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9621     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9622       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9623 
9624     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
9625     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9626       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9627     return SDValue();
9628   }
9629   default:
9630     return SDValue();
9631   }
9632 }
9633 
9634 /// If a (v)select has a condition value that is a sign-bit test, try to smear
9635 /// the condition operand sign-bit across the value width and use it as a mask.
9636 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
9637   SDValue Cond = N->getOperand(0);
9638   SDValue C1 = N->getOperand(1);
9639   SDValue C2 = N->getOperand(2);
9640   if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
9641     return SDValue();
9642 
9643   EVT VT = N->getValueType(0);
9644   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
9645       VT != Cond.getOperand(0).getValueType())
9646     return SDValue();
9647 
9648   // The inverted-condition + commuted-select variants of these patterns are
9649   // canonicalized to these forms in IR.
9650   SDValue X = Cond.getOperand(0);
9651   SDValue CondC = Cond.getOperand(1);
9652   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9653   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
9654       isAllOnesOrAllOnesSplat(C2)) {
9655     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
9656     SDLoc DL(N);
9657     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9658     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9659     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
9660   }
9661   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
9662     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
9663     SDLoc DL(N);
9664     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9665     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9666     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
9667   }
9668   return SDValue();
9669 }
9670 
9671 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
9672   SDValue Cond = N->getOperand(0);
9673   SDValue N1 = N->getOperand(1);
9674   SDValue N2 = N->getOperand(2);
9675   EVT VT = N->getValueType(0);
9676   EVT CondVT = Cond.getValueType();
9677   SDLoc DL(N);
9678 
9679   if (!VT.isInteger())
9680     return SDValue();
9681 
9682   auto *C1 = dyn_cast<ConstantSDNode>(N1);
9683   auto *C2 = dyn_cast<ConstantSDNode>(N2);
9684   if (!C1 || !C2)
9685     return SDValue();
9686 
9687   // Only do this before legalization to avoid conflicting with target-specific
9688   // transforms in the other direction (create a select from a zext/sext). There
9689   // is also a target-independent combine here in DAGCombiner in the other
9690   // direction for (select Cond, -1, 0) when the condition is not i1.
9691   if (CondVT == MVT::i1 && !LegalOperations) {
9692     if (C1->isZero() && C2->isOne()) {
9693       // select Cond, 0, 1 --> zext (!Cond)
9694       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9695       if (VT != MVT::i1)
9696         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
9697       return NotCond;
9698     }
9699     if (C1->isZero() && C2->isAllOnes()) {
9700       // select Cond, 0, -1 --> sext (!Cond)
9701       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9702       if (VT != MVT::i1)
9703         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
9704       return NotCond;
9705     }
9706     if (C1->isOne() && C2->isZero()) {
9707       // select Cond, 1, 0 --> zext (Cond)
9708       if (VT != MVT::i1)
9709         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9710       return Cond;
9711     }
9712     if (C1->isAllOnes() && C2->isZero()) {
9713       // select Cond, -1, 0 --> sext (Cond)
9714       if (VT != MVT::i1)
9715         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9716       return Cond;
9717     }
9718 
9719     // Use a target hook because some targets may prefer to transform in the
9720     // other direction.
9721     if (TLI.convertSelectOfConstantsToMath(VT)) {
9722       // For any constants that differ by 1, we can transform the select into an
9723       // extend and add.
9724       const APInt &C1Val = C1->getAPIntValue();
9725       const APInt &C2Val = C2->getAPIntValue();
9726       if (C1Val - 1 == C2Val) {
9727         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
9728         if (VT != MVT::i1)
9729           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9730         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9731       }
9732       if (C1Val + 1 == C2Val) {
9733         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
9734         if (VT != MVT::i1)
9735           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9736         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9737       }
9738 
9739       // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
9740       if (C1Val.isPowerOf2() && C2Val.isZero()) {
9741         if (VT != MVT::i1)
9742           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9743         SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
9744         return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
9745       }
9746 
9747       if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9748         return V;
9749     }
9750 
9751     return SDValue();
9752   }
9753 
9754   // fold (select Cond, 0, 1) -> (xor Cond, 1)
9755   // We can't do this reliably if integer based booleans have different contents
9756   // to floating point based booleans. This is because we can't tell whether we
9757   // have an integer-based boolean or a floating-point-based boolean unless we
9758   // can find the SETCC that produced it and inspect its operands. This is
9759   // fairly easy if C is the SETCC node, but it can potentially be
9760   // undiscoverable (or not reasonably discoverable). For example, it could be
9761   // in another basic block or it could require searching a complicated
9762   // expression.
9763   if (CondVT.isInteger() &&
9764       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
9765           TargetLowering::ZeroOrOneBooleanContent &&
9766       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
9767           TargetLowering::ZeroOrOneBooleanContent &&
9768       C1->isZero() && C2->isOne()) {
9769     SDValue NotCond =
9770         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
9771     if (VT.bitsEq(CondVT))
9772       return NotCond;
9773     return DAG.getZExtOrTrunc(NotCond, DL, VT);
9774   }
9775 
9776   return SDValue();
9777 }
9778 
9779 static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
9780   assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
9781          "Expected a (v)select");
9782   SDValue Cond = N->getOperand(0);
9783   SDValue T = N->getOperand(1), F = N->getOperand(2);
9784   EVT VT = N->getValueType(0);
9785   if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
9786     return SDValue();
9787 
9788   // select Cond, Cond, F --> or Cond, F
9789   // select Cond, 1, F    --> or Cond, F
9790   if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
9791     return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
9792 
9793   // select Cond, T, Cond --> and Cond, T
9794   // select Cond, T, 0    --> and Cond, T
9795   if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
9796     return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
9797 
9798   // select Cond, T, 1 --> or (not Cond), T
9799   if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
9800     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9801     return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
9802   }
9803 
9804   // select Cond, 0, F --> and (not Cond), F
9805   if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
9806     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9807     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
9808   }
9809 
9810   return SDValue();
9811 }
9812 
9813 static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
9814   SDValue N0 = N->getOperand(0);
9815   SDValue N1 = N->getOperand(1);
9816   SDValue N2 = N->getOperand(2);
9817   EVT VT = N->getValueType(0);
9818   if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
9819     return SDValue();
9820 
9821   SDValue Cond0 = N0.getOperand(0);
9822   SDValue Cond1 = N0.getOperand(1);
9823   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9824   if (VT != Cond0.getValueType())
9825     return SDValue();
9826 
9827   // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
9828   // compare is inverted from that pattern ("Cond0 s> -1").
9829   if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
9830     ; // This is the pattern we are looking for.
9831   else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
9832     std::swap(N1, N2);
9833   else
9834     return SDValue();
9835 
9836   // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
9837   if (isNullOrNullSplat(N2)) {
9838     SDLoc DL(N);
9839     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
9840     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
9841     return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
9842   }
9843 
9844   // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
9845   if (isAllOnesOrAllOnesSplat(N1)) {
9846     SDLoc DL(N);
9847     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
9848     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
9849     return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
9850   }
9851 
9852   // If we have to invert the sign bit mask, only do that transform if the
9853   // target has a bitwise 'and not' instruction (the invert is free).
9854   // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
9855   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9856   if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
9857     SDLoc DL(N);
9858     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
9859     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
9860     SDValue Not = DAG.getNOT(DL, Sra, VT);
9861     return DAG.getNode(ISD::AND, DL, VT, Not, N2);
9862   }
9863 
9864   // TODO: There's another pattern in this family, but it may require
9865   //       implementing hasOrNot() to check for profitability:
9866   //       (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
9867 
9868   return SDValue();
9869 }
9870 
9871 SDValue DAGCombiner::visitSELECT(SDNode *N) {
9872   SDValue N0 = N->getOperand(0);
9873   SDValue N1 = N->getOperand(1);
9874   SDValue N2 = N->getOperand(2);
9875   EVT VT = N->getValueType(0);
9876   EVT VT0 = N0.getValueType();
9877   SDLoc DL(N);
9878   SDNodeFlags Flags = N->getFlags();
9879 
9880   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9881     return V;
9882 
9883   if (SDValue V = foldSelectOfConstants(N))
9884     return V;
9885 
9886   if (SDValue V = foldBoolSelectToLogic(N, DAG))
9887     return V;
9888 
9889   // If we can fold this based on the true/false value, do so.
9890   if (SimplifySelectOps(N, N1, N2))
9891     return SDValue(N, 0); // Don't revisit N.
9892 
9893   if (VT0 == MVT::i1) {
9894     // The code in this block deals with the following 2 equivalences:
9895     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
9896     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
9897     // The target can specify its preferred form with the
9898     // shouldNormalizeToSelectSequence() callback. However we always transform
9899     // to the right anyway if we find the inner select exists in the DAG anyway
9900     // and we always transform to the left side if we know that we can further
9901     // optimize the combination of the conditions.
9902     bool normalizeToSequence =
9903         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
9904     // select (and Cond0, Cond1), X, Y
9905     //   -> select Cond0, (select Cond1, X, Y), Y
9906     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
9907       SDValue Cond0 = N0->getOperand(0);
9908       SDValue Cond1 = N0->getOperand(1);
9909       SDValue InnerSelect =
9910           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
9911       if (normalizeToSequence || !InnerSelect.use_empty())
9912         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
9913                            InnerSelect, N2, Flags);
9914       // Cleanup on failure.
9915       if (InnerSelect.use_empty())
9916         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9917     }
9918     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
9919     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
9920       SDValue Cond0 = N0->getOperand(0);
9921       SDValue Cond1 = N0->getOperand(1);
9922       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
9923                                         Cond1, N1, N2, Flags);
9924       if (normalizeToSequence || !InnerSelect.use_empty())
9925         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
9926                            InnerSelect, Flags);
9927       // Cleanup on failure.
9928       if (InnerSelect.use_empty())
9929         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9930     }
9931 
9932     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
9933     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
9934       SDValue N1_0 = N1->getOperand(0);
9935       SDValue N1_1 = N1->getOperand(1);
9936       SDValue N1_2 = N1->getOperand(2);
9937       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9938         // Create the actual and node if we can generate good code for it.
9939         if (!normalizeToSequence) {
9940           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9941           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9942                              N2, Flags);
9943         }
9944         // Otherwise see if we can optimize the "and" to a better pattern.
9945         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9946           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9947                              N2, Flags);
9948         }
9949       }
9950     }
9951     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9952     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9953       SDValue N2_0 = N2->getOperand(0);
9954       SDValue N2_1 = N2->getOperand(1);
9955       SDValue N2_2 = N2->getOperand(2);
9956       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9957         // Create the actual or node if we can generate good code for it.
9958         if (!normalizeToSequence) {
9959           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9960           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9961                              N2_2, Flags);
9962         }
9963         // Otherwise see if we can optimize to a better pattern.
9964         if (SDValue Combined = visitORLike(N0, N2_0, N))
9965           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9966                              N2_2, Flags);
9967       }
9968     }
9969   }
9970 
9971   // select (not Cond), N1, N2 -> select Cond, N2, N1
9972   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9973     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9974     SelectOp->setFlags(Flags);
9975     return SelectOp;
9976   }
9977 
9978   // Fold selects based on a setcc into other things, such as min/max/abs.
9979   if (N0.getOpcode() == ISD::SETCC) {
9980     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9981     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9982 
9983     // select (fcmp lt x, y), x, y -> fminnum x, y
9984     // select (fcmp gt x, y), x, y -> fmaxnum x, y
9985     //
9986     // This is OK if we don't care what happens if either operand is a NaN.
9987     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9988       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9989                                                 CC, TLI, DAG))
9990         return FMinMax;
9991 
9992     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9993     // This is conservatively limited to pre-legal-operations to give targets
9994     // a chance to reverse the transform if they want to do that. Also, it is
9995     // unlikely that the pattern would be formed late, so it's probably not
9996     // worth going through the other checks.
9997     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9998         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9999         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
10000       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
10001       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
10002       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
10003         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
10004         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
10005         //
10006         // The IR equivalent of this transform would have this form:
10007         //   %a = add %x, C
10008         //   %c = icmp ugt %x, ~C
10009         //   %r = select %c, -1, %a
10010         //   =>
10011         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
10012         //   %u0 = extractvalue %u, 0
10013         //   %u1 = extractvalue %u, 1
10014         //   %r = select %u1, -1, %u0
10015         SDVTList VTs = DAG.getVTList(VT, VT0);
10016         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
10017         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
10018       }
10019     }
10020 
10021     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
10022         (!LegalOperations &&
10023          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
10024       // Any flags available in a select/setcc fold will be on the setcc as they
10025       // migrated from fcmp
10026       Flags = N0.getNode()->getFlags();
10027       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
10028                                        N2, N0.getOperand(2));
10029       SelectNode->setFlags(Flags);
10030       return SelectNode;
10031     }
10032 
10033     if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
10034       return NewSel;
10035   }
10036 
10037   if (!VT.isVector())
10038     if (SDValue BinOp = foldSelectOfBinops(N))
10039       return BinOp;
10040 
10041   return SDValue();
10042 }
10043 
10044 // This function assumes all the vselect's arguments are CONCAT_VECTOR
10045 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
10046 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
10047   SDLoc DL(N);
10048   SDValue Cond = N->getOperand(0);
10049   SDValue LHS = N->getOperand(1);
10050   SDValue RHS = N->getOperand(2);
10051   EVT VT = N->getValueType(0);
10052   int NumElems = VT.getVectorNumElements();
10053   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
10054          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
10055          Cond.getOpcode() == ISD::BUILD_VECTOR);
10056 
10057   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
10058   // binary ones here.
10059   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
10060     return SDValue();
10061 
10062   // We're sure we have an even number of elements due to the
10063   // concat_vectors we have as arguments to vselect.
10064   // Skip BV elements until we find one that's not an UNDEF
10065   // After we find an UNDEF element, keep looping until we get to half the
10066   // length of the BV and see if all the non-undef nodes are the same.
10067   ConstantSDNode *BottomHalf = nullptr;
10068   for (int i = 0; i < NumElems / 2; ++i) {
10069     if (Cond->getOperand(i)->isUndef())
10070       continue;
10071 
10072     if (BottomHalf == nullptr)
10073       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
10074     else if (Cond->getOperand(i).getNode() != BottomHalf)
10075       return SDValue();
10076   }
10077 
10078   // Do the same for the second half of the BuildVector
10079   ConstantSDNode *TopHalf = nullptr;
10080   for (int i = NumElems / 2; i < NumElems; ++i) {
10081     if (Cond->getOperand(i)->isUndef())
10082       continue;
10083 
10084     if (TopHalf == nullptr)
10085       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
10086     else if (Cond->getOperand(i).getNode() != TopHalf)
10087       return SDValue();
10088   }
10089 
10090   assert(TopHalf && BottomHalf &&
10091          "One half of the selector was all UNDEFs and the other was all the "
10092          "same value. This should have been addressed before this function.");
10093   return DAG.getNode(
10094       ISD::CONCAT_VECTORS, DL, VT,
10095       BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
10096       TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
10097 }
10098 
10099 bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
10100   if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
10101     return false;
10102 
10103   // For now we check only the LHS of the add.
10104   SDValue LHS = Index.getOperand(0);
10105   SDValue SplatVal = DAG.getSplatValue(LHS);
10106   if (!SplatVal)
10107     return false;
10108 
10109   BasePtr = SplatVal;
10110   Index = Index.getOperand(1);
10111   return true;
10112 }
10113 
10114 // Fold sext/zext of index into index type.
10115 bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
10116                      bool Scaled, SelectionDAG &DAG) {
10117   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10118 
10119   if (Index.getOpcode() == ISD::ZERO_EXTEND) {
10120     SDValue Op = Index.getOperand(0);
10121     MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
10122     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
10123       Index = Op;
10124       return true;
10125     }
10126   }
10127 
10128   if (Index.getOpcode() == ISD::SIGN_EXTEND) {
10129     SDValue Op = Index.getOperand(0);
10130     MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
10131     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
10132       Index = Op;
10133       return true;
10134     }
10135   }
10136 
10137   return false;
10138 }
10139 
10140 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
10141   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
10142   SDValue Mask = MSC->getMask();
10143   SDValue Chain = MSC->getChain();
10144   SDValue Index = MSC->getIndex();
10145   SDValue Scale = MSC->getScale();
10146   SDValue StoreVal = MSC->getValue();
10147   SDValue BasePtr = MSC->getBasePtr();
10148   SDLoc DL(N);
10149 
10150   // Zap scatters with a zero mask.
10151   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10152     return Chain;
10153 
10154   if (refineUniformBase(BasePtr, Index, DAG)) {
10155     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
10156     return DAG.getMaskedScatter(
10157         DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
10158         MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
10159   }
10160 
10161   if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
10162     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
10163     return DAG.getMaskedScatter(
10164         DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
10165         MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
10166   }
10167 
10168   return SDValue();
10169 }
10170 
10171 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
10172   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
10173   SDValue Mask = MST->getMask();
10174   SDValue Chain = MST->getChain();
10175   SDValue Value = MST->getValue();
10176   SDValue Ptr = MST->getBasePtr();
10177   SDLoc DL(N);
10178 
10179   // Zap masked stores with a zero mask.
10180   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10181     return Chain;
10182 
10183   // If this is a masked load with an all ones mask, we can use a unmasked load.
10184   // FIXME: Can we do this for indexed, compressing, or truncating stores?
10185   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
10186       !MST->isCompressingStore() && !MST->isTruncatingStore())
10187     return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
10188                         MST->getBasePtr(), MST->getPointerInfo(),
10189                         MST->getOriginalAlign(), MachineMemOperand::MOStore,
10190                         MST->getAAInfo());
10191 
10192   // Try transforming N to an indexed store.
10193   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10194     return SDValue(N, 0);
10195 
10196   if (MST->isTruncatingStore() && MST->isUnindexed() &&
10197       Value.getValueType().isInteger() &&
10198       (!isa<ConstantSDNode>(Value) ||
10199        !cast<ConstantSDNode>(Value)->isOpaque())) {
10200     APInt TruncDemandedBits =
10201         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
10202                              MST->getMemoryVT().getScalarSizeInBits());
10203 
10204     // See if we can simplify the operation with
10205     // SimplifyDemandedBits, which only works if the value has a single use.
10206     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
10207       // Re-visit the store if anything changed and the store hasn't been merged
10208       // with another node (N is deleted) SimplifyDemandedBits will add Value's
10209       // node back to the worklist if necessary, but we also need to re-visit
10210       // the Store node itself.
10211       if (N->getOpcode() != ISD::DELETED_NODE)
10212         AddToWorklist(N);
10213       return SDValue(N, 0);
10214     }
10215   }
10216 
10217   // If this is a TRUNC followed by a masked store, fold this into a masked
10218   // truncating store.  We can do this even if this is already a masked
10219   // truncstore.
10220   if ((Value.getOpcode() == ISD::TRUNCATE) && Value.getNode()->hasOneUse() &&
10221       MST->isUnindexed() &&
10222       TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
10223                                MST->getMemoryVT(), LegalOperations)) {
10224     auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
10225                                          Value.getOperand(0).getValueType());
10226     return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
10227                               MST->getOffset(), Mask, MST->getMemoryVT(),
10228                               MST->getMemOperand(), MST->getAddressingMode(),
10229                               /*IsTruncating=*/true);
10230   }
10231 
10232   return SDValue();
10233 }
10234 
10235 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
10236   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
10237   SDValue Mask = MGT->getMask();
10238   SDValue Chain = MGT->getChain();
10239   SDValue Index = MGT->getIndex();
10240   SDValue Scale = MGT->getScale();
10241   SDValue PassThru = MGT->getPassThru();
10242   SDValue BasePtr = MGT->getBasePtr();
10243   SDLoc DL(N);
10244 
10245   // Zap gathers with a zero mask.
10246   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10247     return CombineTo(N, PassThru, MGT->getChain());
10248 
10249   if (refineUniformBase(BasePtr, Index, DAG)) {
10250     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
10251     return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
10252                                MGT->getMemoryVT(), DL, Ops,
10253                                MGT->getMemOperand(), MGT->getIndexType(),
10254                                MGT->getExtensionType());
10255   }
10256 
10257   if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
10258     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
10259     return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
10260                                MGT->getMemoryVT(), DL, Ops,
10261                                MGT->getMemOperand(), MGT->getIndexType(),
10262                                MGT->getExtensionType());
10263   }
10264 
10265   return SDValue();
10266 }
10267 
10268 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
10269   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
10270   SDValue Mask = MLD->getMask();
10271   SDLoc DL(N);
10272 
10273   // Zap masked loads with a zero mask.
10274   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10275     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
10276 
10277   // If this is a masked load with an all ones mask, we can use a unmasked load.
10278   // FIXME: Can we do this for indexed, expanding, or extending loads?
10279   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
10280       !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
10281     SDValue NewLd = DAG.getLoad(
10282         N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
10283         MLD->getPointerInfo(), MLD->getOriginalAlign(),
10284         MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges());
10285     return CombineTo(N, NewLd, NewLd.getValue(1));
10286   }
10287 
10288   // Try transforming N to an indexed load.
10289   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10290     return SDValue(N, 0);
10291 
10292   return SDValue();
10293 }
10294 
10295 /// A vector select of 2 constant vectors can be simplified to math/logic to
10296 /// avoid a variable select instruction and possibly avoid constant loads.
10297 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
10298   SDValue Cond = N->getOperand(0);
10299   SDValue N1 = N->getOperand(1);
10300   SDValue N2 = N->getOperand(2);
10301   EVT VT = N->getValueType(0);
10302   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
10303       !TLI.convertSelectOfConstantsToMath(VT) ||
10304       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
10305       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
10306     return SDValue();
10307 
10308   // Check if we can use the condition value to increment/decrement a single
10309   // constant value. This simplifies a select to an add and removes a constant
10310   // load/materialization from the general case.
10311   bool AllAddOne = true;
10312   bool AllSubOne = true;
10313   unsigned Elts = VT.getVectorNumElements();
10314   for (unsigned i = 0; i != Elts; ++i) {
10315     SDValue N1Elt = N1.getOperand(i);
10316     SDValue N2Elt = N2.getOperand(i);
10317     if (N1Elt.isUndef() || N2Elt.isUndef())
10318       continue;
10319     if (N1Elt.getValueType() != N2Elt.getValueType())
10320       continue;
10321 
10322     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
10323     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
10324     if (C1 != C2 + 1)
10325       AllAddOne = false;
10326     if (C1 != C2 - 1)
10327       AllSubOne = false;
10328   }
10329 
10330   // Further simplifications for the extra-special cases where the constants are
10331   // all 0 or all -1 should be implemented as folds of these patterns.
10332   SDLoc DL(N);
10333   if (AllAddOne || AllSubOne) {
10334     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
10335     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
10336     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
10337     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
10338     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
10339   }
10340 
10341   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
10342   APInt Pow2C;
10343   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
10344       isNullOrNullSplat(N2)) {
10345     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
10346     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
10347     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
10348   }
10349 
10350   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
10351     return V;
10352 
10353   // The general case for select-of-constants:
10354   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
10355   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
10356   // leave that to a machine-specific pass.
10357   return SDValue();
10358 }
10359 
10360 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
10361   SDValue N0 = N->getOperand(0);
10362   SDValue N1 = N->getOperand(1);
10363   SDValue N2 = N->getOperand(2);
10364   EVT VT = N->getValueType(0);
10365   SDLoc DL(N);
10366 
10367   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
10368     return V;
10369 
10370   if (SDValue V = foldBoolSelectToLogic(N, DAG))
10371     return V;
10372 
10373   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
10374   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
10375     return DAG.getSelect(DL, VT, F, N2, N1);
10376 
10377   // Canonicalize integer abs.
10378   // vselect (setg[te] X,  0),  X, -X ->
10379   // vselect (setgt    X, -1),  X, -X ->
10380   // vselect (setl[te] X,  0), -X,  X ->
10381   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
10382   if (N0.getOpcode() == ISD::SETCC) {
10383     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
10384     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10385     bool isAbs = false;
10386     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
10387 
10388     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
10389          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
10390         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
10391       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
10392     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
10393              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
10394       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
10395 
10396     if (isAbs) {
10397       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
10398         return DAG.getNode(ISD::ABS, DL, VT, LHS);
10399 
10400       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
10401                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
10402                                                   DL, getShiftAmountTy(VT)));
10403       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
10404       AddToWorklist(Shift.getNode());
10405       AddToWorklist(Add.getNode());
10406       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
10407     }
10408 
10409     // vselect x, y (fcmp lt x, y) -> fminnum x, y
10410     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
10411     //
10412     // This is OK if we don't care about what happens if either operand is a
10413     // NaN.
10414     //
10415     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
10416       if (SDValue FMinMax =
10417               combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
10418         return FMinMax;
10419     }
10420 
10421     if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
10422       return S;
10423     if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
10424       return S;
10425 
10426     // If this select has a condition (setcc) with narrower operands than the
10427     // select, try to widen the compare to match the select width.
10428     // TODO: This should be extended to handle any constant.
10429     // TODO: This could be extended to handle non-loading patterns, but that
10430     //       requires thorough testing to avoid regressions.
10431     if (isNullOrNullSplat(RHS)) {
10432       EVT NarrowVT = LHS.getValueType();
10433       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
10434       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
10435       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
10436       unsigned WideWidth = WideVT.getScalarSizeInBits();
10437       bool IsSigned = isSignedIntSetCC(CC);
10438       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10439       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
10440           SetCCWidth != 1 && SetCCWidth < WideWidth &&
10441           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
10442           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
10443         // Both compare operands can be widened for free. The LHS can use an
10444         // extended load, and the RHS is a constant:
10445         //   vselect (ext (setcc load(X), C)), N1, N2 -->
10446         //   vselect (setcc extload(X), C'), N1, N2
10447         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10448         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
10449         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
10450         EVT WideSetCCVT = getSetCCResultType(WideVT);
10451         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
10452         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
10453       }
10454     }
10455 
10456     // Match VSELECTs into add with unsigned saturation.
10457     if (hasOperation(ISD::UADDSAT, VT)) {
10458       // Check if one of the arms of the VSELECT is vector with all bits set.
10459       // If it's on the left side invert the predicate to simplify logic below.
10460       SDValue Other;
10461       ISD::CondCode SatCC = CC;
10462       if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
10463         Other = N2;
10464         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10465       } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
10466         Other = N1;
10467       }
10468 
10469       if (Other && Other.getOpcode() == ISD::ADD) {
10470         SDValue CondLHS = LHS, CondRHS = RHS;
10471         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10472 
10473         // Canonicalize condition operands.
10474         if (SatCC == ISD::SETUGE) {
10475           std::swap(CondLHS, CondRHS);
10476           SatCC = ISD::SETULE;
10477         }
10478 
10479         // We can test against either of the addition operands.
10480         // x <= x+y ? x+y : ~0 --> uaddsat x, y
10481         // x+y >= x ? x+y : ~0 --> uaddsat x, y
10482         if (SatCC == ISD::SETULE && Other == CondRHS &&
10483             (OpLHS == CondLHS || OpRHS == CondLHS))
10484           return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10485 
10486         if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
10487             (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10488              OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
10489             CondLHS == OpLHS) {
10490           // If the RHS is a constant we have to reverse the const
10491           // canonicalization.
10492           // x >= ~C ? x+C : ~0 --> uaddsat x, C
10493           auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10494             return Cond->getAPIntValue() == ~Op->getAPIntValue();
10495           };
10496           if (SatCC == ISD::SETULE &&
10497               ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
10498             return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10499         }
10500       }
10501     }
10502 
10503     // Match VSELECTs into sub with unsigned saturation.
10504     if (hasOperation(ISD::USUBSAT, VT)) {
10505       // Check if one of the arms of the VSELECT is a zero vector. If it's on
10506       // the left side invert the predicate to simplify logic below.
10507       SDValue Other;
10508       ISD::CondCode SatCC = CC;
10509       if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
10510         Other = N2;
10511         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10512       } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
10513         Other = N1;
10514       }
10515 
10516       if (Other && Other.getNumOperands() == 2) {
10517         SDValue CondRHS = RHS;
10518         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10519 
10520         if (Other.getOpcode() == ISD::SUB &&
10521             LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
10522             OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
10523           // Look for a general sub with unsigned saturation first.
10524           // zext(x) >= y ? x - trunc(y) : 0
10525           // --> usubsat(x,trunc(umin(y,SatLimit)))
10526           // zext(x) >  y ? x - trunc(y) : 0
10527           // --> usubsat(x,trunc(umin(y,SatLimit)))
10528           if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
10529             return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
10530                                        DL);
10531         }
10532 
10533         if (OpLHS == LHS) {
10534           // Look for a general sub with unsigned saturation first.
10535           // x >= y ? x-y : 0 --> usubsat x, y
10536           // x >  y ? x-y : 0 --> usubsat x, y
10537           if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
10538               Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
10539             return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10540 
10541           if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10542               OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10543             if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
10544                 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10545               // If the RHS is a constant we have to reverse the const
10546               // canonicalization.
10547               // x > C-1 ? x+-C : 0 --> usubsat x, C
10548               auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10549                 return (!Op && !Cond) ||
10550                        (Op && Cond &&
10551                         Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
10552               };
10553               if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
10554                   ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
10555                                             /*AllowUndefs*/ true)) {
10556                 OpRHS = DAG.getNode(ISD::SUB, DL, VT,
10557                                     DAG.getConstant(0, DL, VT), OpRHS);
10558                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10559               }
10560 
10561               // Another special case: If C was a sign bit, the sub has been
10562               // canonicalized into a xor.
10563               // FIXME: Would it be better to use computeKnownBits to determine
10564               //        whether it's safe to decanonicalize the xor?
10565               // x s< 0 ? x^C : 0 --> usubsat x, C
10566               APInt SplatValue;
10567               if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
10568                   ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
10569                   ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
10570                   SplatValue.isSignMask()) {
10571                 // Note that we have to rebuild the RHS constant here to
10572                 // ensure we don't rely on particular values of undef lanes.
10573                 OpRHS = DAG.getConstant(SplatValue, DL, VT);
10574                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10575               }
10576             }
10577           }
10578         }
10579       }
10580     }
10581   }
10582 
10583   if (SimplifySelectOps(N, N1, N2))
10584     return SDValue(N, 0);  // Don't revisit N.
10585 
10586   // Fold (vselect all_ones, N1, N2) -> N1
10587   if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
10588     return N1;
10589   // Fold (vselect all_zeros, N1, N2) -> N2
10590   if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
10591     return N2;
10592 
10593   // The ConvertSelectToConcatVector function is assuming both the above
10594   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
10595   // and addressed.
10596   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
10597       N2.getOpcode() == ISD::CONCAT_VECTORS &&
10598       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
10599     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
10600       return CV;
10601   }
10602 
10603   if (SDValue V = foldVSelectOfConstants(N))
10604     return V;
10605 
10606   if (hasOperation(ISD::SRA, VT))
10607     if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
10608       return V;
10609 
10610   return SDValue();
10611 }
10612 
10613 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
10614   SDValue N0 = N->getOperand(0);
10615   SDValue N1 = N->getOperand(1);
10616   SDValue N2 = N->getOperand(2);
10617   SDValue N3 = N->getOperand(3);
10618   SDValue N4 = N->getOperand(4);
10619   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
10620 
10621   // fold select_cc lhs, rhs, x, x, cc -> x
10622   if (N2 == N3)
10623     return N2;
10624 
10625   // Determine if the condition we're dealing with is constant
10626   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
10627                                   CC, SDLoc(N), false)) {
10628     AddToWorklist(SCC.getNode());
10629 
10630     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
10631       if (!SCCC->isZero())
10632         return N2;    // cond always true -> true val
10633       else
10634         return N3;    // cond always false -> false val
10635     } else if (SCC->isUndef()) {
10636       // When the condition is UNDEF, just return the first operand. This is
10637       // coherent the DAG creation, no setcc node is created in this case
10638       return N2;
10639     } else if (SCC.getOpcode() == ISD::SETCC) {
10640       // Fold to a simpler select_cc
10641       SDValue SelectOp = DAG.getNode(
10642           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
10643           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
10644       SelectOp->setFlags(SCC->getFlags());
10645       return SelectOp;
10646     }
10647   }
10648 
10649   // If we can fold this based on the true/false value, do so.
10650   if (SimplifySelectOps(N, N2, N3))
10651     return SDValue(N, 0);  // Don't revisit N.
10652 
10653   // fold select_cc into other things, such as min/max/abs
10654   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
10655 }
10656 
10657 SDValue DAGCombiner::visitSETCC(SDNode *N) {
10658   // setcc is very commonly used as an argument to brcond. This pattern
10659   // also lend itself to numerous combines and, as a result, it is desired
10660   // we keep the argument to a brcond as a setcc as much as possible.
10661   bool PreferSetCC =
10662       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
10663 
10664   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10665   EVT VT = N->getValueType(0);
10666 
10667   //   SETCC(FREEZE(X), CONST, Cond)
10668   // =>
10669   //   FREEZE(SETCC(X, CONST, Cond))
10670   // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
10671   // isn't equivalent to true or false.
10672   // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
10673   // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
10674   //
10675   // This transformation is beneficial because visitBRCOND can fold
10676   // BRCOND(FREEZE(X)) to BRCOND(X).
10677 
10678   // Conservatively optimize integer comparisons only.
10679   if (PreferSetCC) {
10680     // Do this only when SETCC is going to be used by BRCOND.
10681 
10682     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
10683     ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
10684     ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
10685     bool Updated = false;
10686 
10687     // Is 'X Cond C' always true or false?
10688     auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
10689       bool False = (Cond == ISD::SETULT && C->isZero()) ||
10690                    (Cond == ISD::SETLT  && C->isMinSignedValue()) ||
10691                    (Cond == ISD::SETUGT && C->isAllOnes()) ||
10692                    (Cond == ISD::SETGT  && C->isMaxSignedValue());
10693       bool True =  (Cond == ISD::SETULE && C->isAllOnes()) ||
10694                    (Cond == ISD::SETLE  && C->isMaxSignedValue()) ||
10695                    (Cond == ISD::SETUGE && C->isZero()) ||
10696                    (Cond == ISD::SETGE  && C->isMinSignedValue());
10697       return True || False;
10698     };
10699 
10700     if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
10701       if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
10702         N0 = N0->getOperand(0);
10703         Updated = true;
10704       }
10705     }
10706     if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
10707       if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
10708                                N0C)) {
10709         N1 = N1->getOperand(0);
10710         Updated = true;
10711       }
10712     }
10713 
10714     if (Updated)
10715       return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
10716   }
10717 
10718   SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
10719                                    SDLoc(N), !PreferSetCC);
10720 
10721   if (!Combined)
10722     return SDValue();
10723 
10724   // If we prefer to have a setcc, and we don't, we'll try our best to
10725   // recreate one using rebuildSetCC.
10726   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
10727     SDValue NewSetCC = rebuildSetCC(Combined);
10728 
10729     // We don't have anything interesting to combine to.
10730     if (NewSetCC.getNode() == N)
10731       return SDValue();
10732 
10733     if (NewSetCC)
10734       return NewSetCC;
10735   }
10736 
10737   return Combined;
10738 }
10739 
10740 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
10741   SDValue LHS = N->getOperand(0);
10742   SDValue RHS = N->getOperand(1);
10743   SDValue Carry = N->getOperand(2);
10744   SDValue Cond = N->getOperand(3);
10745 
10746   // If Carry is false, fold to a regular SETCC.
10747   if (isNullConstant(Carry))
10748     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
10749 
10750   return SDValue();
10751 }
10752 
10753 /// Check if N satisfies:
10754 ///   N is used once.
10755 ///   N is a Load.
10756 ///   The load is compatible with ExtOpcode. It means
10757 ///     If load has explicit zero/sign extension, ExpOpcode must have the same
10758 ///     extension.
10759 ///     Otherwise returns true.
10760 static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
10761   if (!N.hasOneUse())
10762     return false;
10763 
10764   if (!isa<LoadSDNode>(N))
10765     return false;
10766 
10767   LoadSDNode *Load = cast<LoadSDNode>(N);
10768   ISD::LoadExtType LoadExt = Load->getExtensionType();
10769   if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
10770     return true;
10771 
10772   // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
10773   // extension.
10774   if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
10775       (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
10776     return false;
10777 
10778   return true;
10779 }
10780 
10781 /// Fold
10782 ///   (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
10783 ///   (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
10784 ///   (aext (select c, load x, load y)) -> (select c, extload x, extload y)
10785 /// This function is called by the DAGCombiner when visiting sext/zext/aext
10786 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10787 static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
10788                                          SelectionDAG &DAG) {
10789   unsigned Opcode = N->getOpcode();
10790   SDValue N0 = N->getOperand(0);
10791   EVT VT = N->getValueType(0);
10792   SDLoc DL(N);
10793 
10794   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10795           Opcode == ISD::ANY_EXTEND) &&
10796          "Expected EXTEND dag node in input!");
10797 
10798   if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
10799       !N0.hasOneUse())
10800     return SDValue();
10801 
10802   SDValue Op1 = N0->getOperand(1);
10803   SDValue Op2 = N0->getOperand(2);
10804   if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
10805     return SDValue();
10806 
10807   auto ExtLoadOpcode = ISD::EXTLOAD;
10808   if (Opcode == ISD::SIGN_EXTEND)
10809     ExtLoadOpcode = ISD::SEXTLOAD;
10810   else if (Opcode == ISD::ZERO_EXTEND)
10811     ExtLoadOpcode = ISD::ZEXTLOAD;
10812 
10813   LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
10814   LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
10815   if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
10816       !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
10817     return SDValue();
10818 
10819   SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
10820   SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
10821   return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
10822 }
10823 
10824 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
10825 /// a build_vector of constants.
10826 /// This function is called by the DAGCombiner when visiting sext/zext/aext
10827 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10828 /// Vector extends are not folded if operations are legal; this is to
10829 /// avoid introducing illegal build_vector dag nodes.
10830 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
10831                                          SelectionDAG &DAG, bool LegalTypes) {
10832   unsigned Opcode = N->getOpcode();
10833   SDValue N0 = N->getOperand(0);
10834   EVT VT = N->getValueType(0);
10835   SDLoc DL(N);
10836 
10837   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10838          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
10839          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
10840          && "Expected EXTEND dag node in input!");
10841 
10842   // fold (sext c1) -> c1
10843   // fold (zext c1) -> c1
10844   // fold (aext c1) -> c1
10845   if (isa<ConstantSDNode>(N0))
10846     return DAG.getNode(Opcode, DL, VT, N0);
10847 
10848   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10849   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
10850   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10851   if (N0->getOpcode() == ISD::SELECT) {
10852     SDValue Op1 = N0->getOperand(1);
10853     SDValue Op2 = N0->getOperand(2);
10854     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
10855         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
10856       // For any_extend, choose sign extension of the constants to allow a
10857       // possible further transform to sign_extend_inreg.i.e.
10858       //
10859       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
10860       // t2: i64 = any_extend t1
10861       // -->
10862       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
10863       // -->
10864       // t4: i64 = sign_extend_inreg t3
10865       unsigned FoldOpc = Opcode;
10866       if (FoldOpc == ISD::ANY_EXTEND)
10867         FoldOpc = ISD::SIGN_EXTEND;
10868       return DAG.getSelect(DL, VT, N0->getOperand(0),
10869                            DAG.getNode(FoldOpc, DL, VT, Op1),
10870                            DAG.getNode(FoldOpc, DL, VT, Op2));
10871     }
10872   }
10873 
10874   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
10875   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
10876   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
10877   EVT SVT = VT.getScalarType();
10878   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
10879       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
10880     return SDValue();
10881 
10882   // We can fold this node into a build_vector.
10883   unsigned VTBits = SVT.getSizeInBits();
10884   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
10885   SmallVector<SDValue, 8> Elts;
10886   unsigned NumElts = VT.getVectorNumElements();
10887 
10888   // For zero-extensions, UNDEF elements still guarantee to have the upper
10889   // bits set to zero.
10890   bool IsZext =
10891       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
10892 
10893   for (unsigned i = 0; i != NumElts; ++i) {
10894     SDValue Op = N0.getOperand(i);
10895     if (Op.isUndef()) {
10896       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
10897       continue;
10898     }
10899 
10900     SDLoc DL(Op);
10901     // Get the constant value and if needed trunc it to the size of the type.
10902     // Nodes like build_vector might have constants wider than the scalar type.
10903     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
10904     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
10905       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
10906     else
10907       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
10908   }
10909 
10910   return DAG.getBuildVector(VT, DL, Elts);
10911 }
10912 
10913 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
10914 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
10915 // transformation. Returns true if extension are possible and the above
10916 // mentioned transformation is profitable.
10917 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
10918                                     unsigned ExtOpc,
10919                                     SmallVectorImpl<SDNode *> &ExtendNodes,
10920                                     const TargetLowering &TLI) {
10921   bool HasCopyToRegUses = false;
10922   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
10923   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
10924                             UE = N0.getNode()->use_end();
10925        UI != UE; ++UI) {
10926     SDNode *User = *UI;
10927     if (User == N)
10928       continue;
10929     if (UI.getUse().getResNo() != N0.getResNo())
10930       continue;
10931     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
10932     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
10933       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
10934       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
10935         // Sign bits will be lost after a zext.
10936         return false;
10937       bool Add = false;
10938       for (unsigned i = 0; i != 2; ++i) {
10939         SDValue UseOp = User->getOperand(i);
10940         if (UseOp == N0)
10941           continue;
10942         if (!isa<ConstantSDNode>(UseOp))
10943           return false;
10944         Add = true;
10945       }
10946       if (Add)
10947         ExtendNodes.push_back(User);
10948       continue;
10949     }
10950     // If truncates aren't free and there are users we can't
10951     // extend, it isn't worthwhile.
10952     if (!isTruncFree)
10953       return false;
10954     // Remember if this value is live-out.
10955     if (User->getOpcode() == ISD::CopyToReg)
10956       HasCopyToRegUses = true;
10957   }
10958 
10959   if (HasCopyToRegUses) {
10960     bool BothLiveOut = false;
10961     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
10962          UI != UE; ++UI) {
10963       SDUse &Use = UI.getUse();
10964       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
10965         BothLiveOut = true;
10966         break;
10967       }
10968     }
10969     if (BothLiveOut)
10970       // Both unextended and extended values are live out. There had better be
10971       // a good reason for the transformation.
10972       return ExtendNodes.size();
10973   }
10974   return true;
10975 }
10976 
10977 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
10978                                   SDValue OrigLoad, SDValue ExtLoad,
10979                                   ISD::NodeType ExtType) {
10980   // Extend SetCC uses if necessary.
10981   SDLoc DL(ExtLoad);
10982   for (SDNode *SetCC : SetCCs) {
10983     SmallVector<SDValue, 4> Ops;
10984 
10985     for (unsigned j = 0; j != 2; ++j) {
10986       SDValue SOp = SetCC->getOperand(j);
10987       if (SOp == OrigLoad)
10988         Ops.push_back(ExtLoad);
10989       else
10990         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
10991     }
10992 
10993     Ops.push_back(SetCC->getOperand(2));
10994     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
10995   }
10996 }
10997 
10998 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
10999 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
11000   SDValue N0 = N->getOperand(0);
11001   EVT DstVT = N->getValueType(0);
11002   EVT SrcVT = N0.getValueType();
11003 
11004   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
11005           N->getOpcode() == ISD::ZERO_EXTEND) &&
11006          "Unexpected node type (not an extend)!");
11007 
11008   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
11009   // For example, on a target with legal v4i32, but illegal v8i32, turn:
11010   //   (v8i32 (sext (v8i16 (load x))))
11011   // into:
11012   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
11013   //                          (v4i32 (sextload (x + 16)))))
11014   // Where uses of the original load, i.e.:
11015   //   (v8i16 (load x))
11016   // are replaced with:
11017   //   (v8i16 (truncate
11018   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
11019   //                            (v4i32 (sextload (x + 16)))))))
11020   //
11021   // This combine is only applicable to illegal, but splittable, vectors.
11022   // All legal types, and illegal non-vector types, are handled elsewhere.
11023   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
11024   //
11025   if (N0->getOpcode() != ISD::LOAD)
11026     return SDValue();
11027 
11028   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11029 
11030   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
11031       !N0.hasOneUse() || !LN0->isSimple() ||
11032       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
11033       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
11034     return SDValue();
11035 
11036   SmallVector<SDNode *, 4> SetCCs;
11037   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
11038     return SDValue();
11039 
11040   ISD::LoadExtType ExtType =
11041       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
11042 
11043   // Try to split the vector types to get down to legal types.
11044   EVT SplitSrcVT = SrcVT;
11045   EVT SplitDstVT = DstVT;
11046   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
11047          SplitSrcVT.getVectorNumElements() > 1) {
11048     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
11049     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
11050   }
11051 
11052   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
11053     return SDValue();
11054 
11055   assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
11056 
11057   SDLoc DL(N);
11058   const unsigned NumSplits =
11059       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
11060   const unsigned Stride = SplitSrcVT.getStoreSize();
11061   SmallVector<SDValue, 4> Loads;
11062   SmallVector<SDValue, 4> Chains;
11063 
11064   SDValue BasePtr = LN0->getBasePtr();
11065   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
11066     const unsigned Offset = Idx * Stride;
11067     const Align Align = commonAlignment(LN0->getAlign(), Offset);
11068 
11069     SDValue SplitLoad = DAG.getExtLoad(
11070         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
11071         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
11072         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11073 
11074     BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
11075 
11076     Loads.push_back(SplitLoad.getValue(0));
11077     Chains.push_back(SplitLoad.getValue(1));
11078   }
11079 
11080   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
11081   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
11082 
11083   // Simplify TF.
11084   AddToWorklist(NewChain.getNode());
11085 
11086   CombineTo(N, NewValue);
11087 
11088   // Replace uses of the original load (before extension)
11089   // with a truncate of the concatenated sextloaded vectors.
11090   SDValue Trunc =
11091       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
11092   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
11093   CombineTo(N0.getNode(), Trunc, NewChain);
11094   return SDValue(N, 0); // Return N so it doesn't get rechecked!
11095 }
11096 
11097 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11098 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11099 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
11100   assert(N->getOpcode() == ISD::ZERO_EXTEND);
11101   EVT VT = N->getValueType(0);
11102   EVT OrigVT = N->getOperand(0).getValueType();
11103   if (TLI.isZExtFree(OrigVT, VT))
11104     return SDValue();
11105 
11106   // and/or/xor
11107   SDValue N0 = N->getOperand(0);
11108   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11109         N0.getOpcode() == ISD::XOR) ||
11110       N0.getOperand(1).getOpcode() != ISD::Constant ||
11111       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
11112     return SDValue();
11113 
11114   // shl/shr
11115   SDValue N1 = N0->getOperand(0);
11116   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
11117       N1.getOperand(1).getOpcode() != ISD::Constant ||
11118       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
11119     return SDValue();
11120 
11121   // load
11122   if (!isa<LoadSDNode>(N1.getOperand(0)))
11123     return SDValue();
11124   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
11125   EVT MemVT = Load->getMemoryVT();
11126   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
11127       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
11128     return SDValue();
11129 
11130 
11131   // If the shift op is SHL, the logic op must be AND, otherwise the result
11132   // will be wrong.
11133   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
11134     return SDValue();
11135 
11136   if (!N0.hasOneUse() || !N1.hasOneUse())
11137     return SDValue();
11138 
11139   SmallVector<SDNode*, 4> SetCCs;
11140   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
11141                                ISD::ZERO_EXTEND, SetCCs, TLI))
11142     return SDValue();
11143 
11144   // Actually do the transformation.
11145   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
11146                                    Load->getChain(), Load->getBasePtr(),
11147                                    Load->getMemoryVT(), Load->getMemOperand());
11148 
11149   SDLoc DL1(N1);
11150   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
11151                               N1.getOperand(1));
11152 
11153   APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11154   SDLoc DL0(N0);
11155   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
11156                             DAG.getConstant(Mask, DL0, VT));
11157 
11158   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11159   CombineTo(N, And);
11160   if (SDValue(Load, 0).hasOneUse()) {
11161     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
11162   } else {
11163     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
11164                                 Load->getValueType(0), ExtLoad);
11165     CombineTo(Load, Trunc, ExtLoad.getValue(1));
11166   }
11167 
11168   // N0 is dead at this point.
11169   recursivelyDeleteUnusedNodes(N0.getNode());
11170 
11171   return SDValue(N,0); // Return N so it doesn't get rechecked!
11172 }
11173 
11174 /// If we're narrowing or widening the result of a vector select and the final
11175 /// size is the same size as a setcc (compare) feeding the select, then try to
11176 /// apply the cast operation to the select's operands because matching vector
11177 /// sizes for a select condition and other operands should be more efficient.
11178 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
11179   unsigned CastOpcode = Cast->getOpcode();
11180   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
11181           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
11182           CastOpcode == ISD::FP_ROUND) &&
11183          "Unexpected opcode for vector select narrowing/widening");
11184 
11185   // We only do this transform before legal ops because the pattern may be
11186   // obfuscated by target-specific operations after legalization. Do not create
11187   // an illegal select op, however, because that may be difficult to lower.
11188   EVT VT = Cast->getValueType(0);
11189   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
11190     return SDValue();
11191 
11192   SDValue VSel = Cast->getOperand(0);
11193   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
11194       VSel.getOperand(0).getOpcode() != ISD::SETCC)
11195     return SDValue();
11196 
11197   // Does the setcc have the same vector size as the casted select?
11198   SDValue SetCC = VSel.getOperand(0);
11199   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
11200   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
11201     return SDValue();
11202 
11203   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
11204   SDValue A = VSel.getOperand(1);
11205   SDValue B = VSel.getOperand(2);
11206   SDValue CastA, CastB;
11207   SDLoc DL(Cast);
11208   if (CastOpcode == ISD::FP_ROUND) {
11209     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
11210     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
11211     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
11212   } else {
11213     CastA = DAG.getNode(CastOpcode, DL, VT, A);
11214     CastB = DAG.getNode(CastOpcode, DL, VT, B);
11215   }
11216   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
11217 }
11218 
11219 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11220 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11221 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
11222                                      const TargetLowering &TLI, EVT VT,
11223                                      bool LegalOperations, SDNode *N,
11224                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
11225   SDNode *N0Node = N0.getNode();
11226   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
11227                                                    : ISD::isZEXTLoad(N0Node);
11228   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
11229       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
11230     return SDValue();
11231 
11232   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11233   EVT MemVT = LN0->getMemoryVT();
11234   if ((LegalOperations || !LN0->isSimple() ||
11235        VT.isVector()) &&
11236       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
11237     return SDValue();
11238 
11239   SDValue ExtLoad =
11240       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
11241                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
11242   Combiner.CombineTo(N, ExtLoad);
11243   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11244   if (LN0->use_empty())
11245     Combiner.recursivelyDeleteUnusedNodes(LN0);
11246   return SDValue(N, 0); // Return N so it doesn't get rechecked!
11247 }
11248 
11249 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11250 // Only generate vector extloads when 1) they're legal, and 2) they are
11251 // deemed desirable by the target.
11252 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
11253                                   const TargetLowering &TLI, EVT VT,
11254                                   bool LegalOperations, SDNode *N, SDValue N0,
11255                                   ISD::LoadExtType ExtLoadType,
11256                                   ISD::NodeType ExtOpc) {
11257   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
11258       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
11259       ((LegalOperations || VT.isVector() ||
11260         !cast<LoadSDNode>(N0)->isSimple()) &&
11261        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
11262     return {};
11263 
11264   bool DoXform = true;
11265   SmallVector<SDNode *, 4> SetCCs;
11266   if (!N0.hasOneUse())
11267     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
11268   if (VT.isVector())
11269     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
11270   if (!DoXform)
11271     return {};
11272 
11273   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11274   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
11275                                    LN0->getBasePtr(), N0.getValueType(),
11276                                    LN0->getMemOperand());
11277   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
11278   // If the load value is used only by N, replace it via CombineTo N.
11279   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
11280   Combiner.CombineTo(N, ExtLoad);
11281   if (NoReplaceTrunc) {
11282     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11283     Combiner.recursivelyDeleteUnusedNodes(LN0);
11284   } else {
11285     SDValue Trunc =
11286         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11287     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11288   }
11289   return SDValue(N, 0); // Return N so it doesn't get rechecked!
11290 }
11291 
11292 static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
11293                                         const TargetLowering &TLI, EVT VT,
11294                                         SDNode *N, SDValue N0,
11295                                         ISD::LoadExtType ExtLoadType,
11296                                         ISD::NodeType ExtOpc) {
11297   if (!N0.hasOneUse())
11298     return SDValue();
11299 
11300   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
11301   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
11302     return SDValue();
11303 
11304   if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
11305     return SDValue();
11306 
11307   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
11308     return SDValue();
11309 
11310   SDLoc dl(Ld);
11311   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
11312   SDValue NewLoad = DAG.getMaskedLoad(
11313       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
11314       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
11315       ExtLoadType, Ld->isExpandingLoad());
11316   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
11317   return NewLoad;
11318 }
11319 
11320 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
11321                                        bool LegalOperations) {
11322   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
11323           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
11324 
11325   SDValue SetCC = N->getOperand(0);
11326   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
11327       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
11328     return SDValue();
11329 
11330   SDValue X = SetCC.getOperand(0);
11331   SDValue Ones = SetCC.getOperand(1);
11332   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
11333   EVT VT = N->getValueType(0);
11334   EVT XVT = X.getValueType();
11335   // setge X, C is canonicalized to setgt, so we do not need to match that
11336   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
11337   // not require the 'not' op.
11338   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
11339     // Invert and smear/shift the sign bit:
11340     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
11341     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
11342     SDLoc DL(N);
11343     unsigned ShCt = VT.getSizeInBits() - 1;
11344     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11345     if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
11346       SDValue NotX = DAG.getNOT(DL, X, VT);
11347       SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
11348       auto ShiftOpcode =
11349         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
11350       return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
11351     }
11352   }
11353   return SDValue();
11354 }
11355 
11356 SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
11357   SDValue N0 = N->getOperand(0);
11358   if (N0.getOpcode() != ISD::SETCC)
11359     return SDValue();
11360 
11361   SDValue N00 = N0.getOperand(0);
11362   SDValue N01 = N0.getOperand(1);
11363   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11364   EVT VT = N->getValueType(0);
11365   EVT N00VT = N00.getValueType();
11366   SDLoc DL(N);
11367 
11368   // On some architectures (such as SSE/NEON/etc) the SETCC result type is
11369   // the same size as the compared operands. Try to optimize sext(setcc())
11370   // if this is the case.
11371   if (VT.isVector() && !LegalOperations &&
11372       TLI.getBooleanContents(N00VT) ==
11373           TargetLowering::ZeroOrNegativeOneBooleanContent) {
11374     EVT SVT = getSetCCResultType(N00VT);
11375 
11376     // If we already have the desired type, don't change it.
11377     if (SVT != N0.getValueType()) {
11378       // We know that the # elements of the results is the same as the
11379       // # elements of the compare (and the # elements of the compare result
11380       // for that matter).  Check to see that they are the same size.  If so,
11381       // we know that the element size of the sext'd result matches the
11382       // element size of the compare operands.
11383       if (VT.getSizeInBits() == SVT.getSizeInBits())
11384         return DAG.getSetCC(DL, VT, N00, N01, CC);
11385 
11386       // If the desired elements are smaller or larger than the source
11387       // elements, we can use a matching integer vector type and then
11388       // truncate/sign extend.
11389       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
11390       if (SVT == MatchingVecType) {
11391         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
11392         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
11393       }
11394     }
11395 
11396     // Try to eliminate the sext of a setcc by zexting the compare operands.
11397     if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
11398         !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
11399       bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
11400       unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
11401       unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11402 
11403       // We have an unsupported narrow vector compare op that would be legal
11404       // if extended to the destination type. See if the compare operands
11405       // can be freely extended to the destination type.
11406       auto IsFreeToExtend = [&](SDValue V) {
11407         if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
11408           return true;
11409         // Match a simple, non-extended load that can be converted to a
11410         // legal {z/s}ext-load.
11411         // TODO: Allow widening of an existing {z/s}ext-load?
11412         if (!(ISD::isNON_EXTLoad(V.getNode()) &&
11413               ISD::isUNINDEXEDLoad(V.getNode()) &&
11414               cast<LoadSDNode>(V)->isSimple() &&
11415               TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
11416           return false;
11417 
11418         // Non-chain users of this value must either be the setcc in this
11419         // sequence or extends that can be folded into the new {z/s}ext-load.
11420         for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
11421              UI != UE; ++UI) {
11422           // Skip uses of the chain and the setcc.
11423           SDNode *User = *UI;
11424           if (UI.getUse().getResNo() != 0 || User == N0.getNode())
11425             continue;
11426           // Extra users must have exactly the same cast we are about to create.
11427           // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
11428           //       is enhanced similarly.
11429           if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
11430             return false;
11431         }
11432         return true;
11433       };
11434 
11435       if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
11436         SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
11437         SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
11438         return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
11439       }
11440     }
11441   }
11442 
11443   // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
11444   // Here, T can be 1 or -1, depending on the type of the setcc and
11445   // getBooleanContents().
11446   unsigned SetCCWidth = N0.getScalarValueSizeInBits();
11447 
11448   // To determine the "true" side of the select, we need to know the high bit
11449   // of the value returned by the setcc if it evaluates to true.
11450   // If the type of the setcc is i1, then the true case of the select is just
11451   // sext(i1 1), that is, -1.
11452   // If the type of the setcc is larger (say, i8) then the value of the high
11453   // bit depends on getBooleanContents(), so ask TLI for a real "true" value
11454   // of the appropriate width.
11455   SDValue ExtTrueVal = (SetCCWidth == 1)
11456                            ? DAG.getAllOnesConstant(DL, VT)
11457                            : DAG.getBoolConstant(true, DL, VT, N00VT);
11458   SDValue Zero = DAG.getConstant(0, DL, VT);
11459   if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
11460     return SCC;
11461 
11462   if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
11463     EVT SetCCVT = getSetCCResultType(N00VT);
11464     // Don't do this transform for i1 because there's a select transform
11465     // that would reverse it.
11466     // TODO: We should not do this transform at all without a target hook
11467     // because a sext is likely cheaper than a select?
11468     if (SetCCVT.getScalarSizeInBits() != 1 &&
11469         (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
11470       SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
11471       return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
11472     }
11473   }
11474 
11475   return SDValue();
11476 }
11477 
11478 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
11479   SDValue N0 = N->getOperand(0);
11480   EVT VT = N->getValueType(0);
11481   SDLoc DL(N);
11482 
11483   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11484     return Res;
11485 
11486   // fold (sext (sext x)) -> (sext x)
11487   // fold (sext (aext x)) -> (sext x)
11488   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11489     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
11490 
11491   if (N0.getOpcode() == ISD::TRUNCATE) {
11492     // fold (sext (truncate (load x))) -> (sext (smaller load x))
11493     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
11494     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
11495       SDNode *oye = N0.getOperand(0).getNode();
11496       if (NarrowLoad.getNode() != N0.getNode()) {
11497         CombineTo(N0.getNode(), NarrowLoad);
11498         // CombineTo deleted the truncate, if needed, but not what's under it.
11499         AddToWorklist(oye);
11500       }
11501       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11502     }
11503 
11504     // See if the value being truncated is already sign extended.  If so, just
11505     // eliminate the trunc/sext pair.
11506     SDValue Op = N0.getOperand(0);
11507     unsigned OpBits   = Op.getScalarValueSizeInBits();
11508     unsigned MidBits  = N0.getScalarValueSizeInBits();
11509     unsigned DestBits = VT.getScalarSizeInBits();
11510     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
11511 
11512     if (OpBits == DestBits) {
11513       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
11514       // bits, it is already ready.
11515       if (NumSignBits > DestBits-MidBits)
11516         return Op;
11517     } else if (OpBits < DestBits) {
11518       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
11519       // bits, just sext from i32.
11520       if (NumSignBits > OpBits-MidBits)
11521         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
11522     } else {
11523       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
11524       // bits, just truncate to i32.
11525       if (NumSignBits > OpBits-MidBits)
11526         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
11527     }
11528 
11529     // fold (sext (truncate x)) -> (sextinreg x).
11530     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
11531                                                  N0.getValueType())) {
11532       if (OpBits < DestBits)
11533         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
11534       else if (OpBits > DestBits)
11535         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
11536       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
11537                          DAG.getValueType(N0.getValueType()));
11538     }
11539   }
11540 
11541   // Try to simplify (sext (load x)).
11542   if (SDValue foldedExt =
11543           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11544                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
11545     return foldedExt;
11546 
11547   if (SDValue foldedExt =
11548       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
11549                                ISD::SIGN_EXTEND))
11550     return foldedExt;
11551 
11552   // fold (sext (load x)) to multiple smaller sextloads.
11553   // Only on illegal but splittable vectors.
11554   if (SDValue ExtLoad = CombineExtLoad(N))
11555     return ExtLoad;
11556 
11557   // Try to simplify (sext (sextload x)).
11558   if (SDValue foldedExt = tryToFoldExtOfExtload(
11559           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
11560     return foldedExt;
11561 
11562   // fold (sext (and/or/xor (load x), cst)) ->
11563   //      (and/or/xor (sextload x), (sext cst))
11564   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11565        N0.getOpcode() == ISD::XOR) &&
11566       isa<LoadSDNode>(N0.getOperand(0)) &&
11567       N0.getOperand(1).getOpcode() == ISD::Constant &&
11568       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11569     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11570     EVT MemVT = LN00->getMemoryVT();
11571     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
11572       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
11573       SmallVector<SDNode*, 4> SetCCs;
11574       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11575                                              ISD::SIGN_EXTEND, SetCCs, TLI);
11576       if (DoXform) {
11577         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
11578                                          LN00->getChain(), LN00->getBasePtr(),
11579                                          LN00->getMemoryVT(),
11580                                          LN00->getMemOperand());
11581         APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
11582         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11583                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
11584         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
11585         bool NoReplaceTruncAnd = !N0.hasOneUse();
11586         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11587         CombineTo(N, And);
11588         // If N0 has multiple uses, change other uses as well.
11589         if (NoReplaceTruncAnd) {
11590           SDValue TruncAnd =
11591               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11592           CombineTo(N0.getNode(), TruncAnd);
11593         }
11594         if (NoReplaceTrunc) {
11595           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11596         } else {
11597           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11598                                       LN00->getValueType(0), ExtLoad);
11599           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11600         }
11601         return SDValue(N,0); // Return N so it doesn't get rechecked!
11602       }
11603     }
11604   }
11605 
11606   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11607     return V;
11608 
11609   if (SDValue V = foldSextSetcc(N))
11610     return V;
11611 
11612   // fold (sext x) -> (zext x) if the sign bit is known zero.
11613   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
11614       DAG.SignBitIsZero(N0))
11615     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
11616 
11617   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11618     return NewVSel;
11619 
11620   // Eliminate this sign extend by doing a negation in the destination type:
11621   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
11622   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
11623       isNullOrNullSplat(N0.getOperand(0)) &&
11624       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
11625       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
11626     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
11627     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
11628   }
11629   // Eliminate this sign extend by doing a decrement in the destination type:
11630   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
11631   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
11632       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
11633       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11634       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
11635     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
11636     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11637   }
11638 
11639   // fold sext (not i1 X) -> add (zext i1 X), -1
11640   // TODO: This could be extended to handle bool vectors.
11641   if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
11642       (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
11643                             TLI.isOperationLegal(ISD::ADD, VT)))) {
11644     // If we can eliminate the 'not', the sext form should be better
11645     if (SDValue NewXor = visitXOR(N0.getNode())) {
11646       // Returning N0 is a form of in-visit replacement that may have
11647       // invalidated N0.
11648       if (NewXor.getNode() == N0.getNode()) {
11649         // Return SDValue here as the xor should have already been replaced in
11650         // this sext.
11651         return SDValue();
11652       } else {
11653         // Return a new sext with the new xor.
11654         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
11655       }
11656     }
11657 
11658     SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
11659     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11660   }
11661 
11662   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11663     return Res;
11664 
11665   return SDValue();
11666 }
11667 
11668 // isTruncateOf - If N is a truncate of some other value, return true, record
11669 // the value being truncated in Op and which of Op's bits are zero/one in Known.
11670 // This function computes KnownBits to avoid a duplicated call to
11671 // computeKnownBits in the caller.
11672 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
11673                          KnownBits &Known) {
11674   if (N->getOpcode() == ISD::TRUNCATE) {
11675     Op = N->getOperand(0);
11676     Known = DAG.computeKnownBits(Op);
11677     return true;
11678   }
11679 
11680   if (N.getOpcode() != ISD::SETCC ||
11681       N.getValueType().getScalarType() != MVT::i1 ||
11682       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
11683     return false;
11684 
11685   SDValue Op0 = N->getOperand(0);
11686   SDValue Op1 = N->getOperand(1);
11687   assert(Op0.getValueType() == Op1.getValueType());
11688 
11689   if (isNullOrNullSplat(Op0))
11690     Op = Op1;
11691   else if (isNullOrNullSplat(Op1))
11692     Op = Op0;
11693   else
11694     return false;
11695 
11696   Known = DAG.computeKnownBits(Op);
11697 
11698   return (Known.Zero | 1).isAllOnes();
11699 }
11700 
11701 /// Given an extending node with a pop-count operand, if the target does not
11702 /// support a pop-count in the narrow source type but does support it in the
11703 /// destination type, widen the pop-count to the destination type.
11704 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
11705   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
11706           Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
11707 
11708   SDValue CtPop = Extend->getOperand(0);
11709   if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
11710     return SDValue();
11711 
11712   EVT VT = Extend->getValueType(0);
11713   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11714   if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
11715       !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
11716     return SDValue();
11717 
11718   // zext (ctpop X) --> ctpop (zext X)
11719   SDLoc DL(Extend);
11720   SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
11721   return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
11722 }
11723 
11724 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
11725   SDValue N0 = N->getOperand(0);
11726   EVT VT = N->getValueType(0);
11727 
11728   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11729     return Res;
11730 
11731   // fold (zext (zext x)) -> (zext x)
11732   // fold (zext (aext x)) -> (zext x)
11733   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11734     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
11735                        N0.getOperand(0));
11736 
11737   // fold (zext (truncate x)) -> (zext x) or
11738   //      (zext (truncate x)) -> (truncate x)
11739   // This is valid when the truncated bits of x are already zero.
11740   SDValue Op;
11741   KnownBits Known;
11742   if (isTruncateOf(DAG, N0, Op, Known)) {
11743     APInt TruncatedBits =
11744       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
11745       APInt(Op.getScalarValueSizeInBits(), 0) :
11746       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
11747                         N0.getScalarValueSizeInBits(),
11748                         std::min(Op.getScalarValueSizeInBits(),
11749                                  VT.getScalarSizeInBits()));
11750     if (TruncatedBits.isSubsetOf(Known.Zero))
11751       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11752   }
11753 
11754   // fold (zext (truncate x)) -> (and x, mask)
11755   if (N0.getOpcode() == ISD::TRUNCATE) {
11756     // fold (zext (truncate (load x))) -> (zext (smaller load x))
11757     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
11758     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
11759       SDNode *oye = N0.getOperand(0).getNode();
11760       if (NarrowLoad.getNode() != N0.getNode()) {
11761         CombineTo(N0.getNode(), NarrowLoad);
11762         // CombineTo deleted the truncate, if needed, but not what's under it.
11763         AddToWorklist(oye);
11764       }
11765       return SDValue(N, 0); // Return N so it doesn't get rechecked!
11766     }
11767 
11768     EVT SrcVT = N0.getOperand(0).getValueType();
11769     EVT MinVT = N0.getValueType();
11770 
11771     // Try to mask before the extension to avoid having to generate a larger mask,
11772     // possibly over several sub-vectors.
11773     if (SrcVT.bitsLT(VT) && VT.isVector()) {
11774       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
11775                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
11776         SDValue Op = N0.getOperand(0);
11777         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11778         AddToWorklist(Op.getNode());
11779         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11780         // Transfer the debug info; the new node is equivalent to N0.
11781         DAG.transferDbgValues(N0, ZExtOrTrunc);
11782         return ZExtOrTrunc;
11783       }
11784     }
11785 
11786     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
11787       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11788       AddToWorklist(Op.getNode());
11789       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11790       // We may safely transfer the debug info describing the truncate node over
11791       // to the equivalent and operation.
11792       DAG.transferDbgValues(N0, And);
11793       return And;
11794     }
11795   }
11796 
11797   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
11798   // if either of the casts is not free.
11799   if (N0.getOpcode() == ISD::AND &&
11800       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11801       N0.getOperand(1).getOpcode() == ISD::Constant &&
11802       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11803                            N0.getValueType()) ||
11804        !TLI.isZExtFree(N0.getValueType(), VT))) {
11805     SDValue X = N0.getOperand(0).getOperand(0);
11806     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
11807     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11808     SDLoc DL(N);
11809     return DAG.getNode(ISD::AND, DL, VT,
11810                        X, DAG.getConstant(Mask, DL, VT));
11811   }
11812 
11813   // Try to simplify (zext (load x)).
11814   if (SDValue foldedExt =
11815           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11816                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11817     return foldedExt;
11818 
11819   if (SDValue foldedExt =
11820       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
11821                                ISD::ZERO_EXTEND))
11822     return foldedExt;
11823 
11824   // fold (zext (load x)) to multiple smaller zextloads.
11825   // Only on illegal but splittable vectors.
11826   if (SDValue ExtLoad = CombineExtLoad(N))
11827     return ExtLoad;
11828 
11829   // fold (zext (and/or/xor (load x), cst)) ->
11830   //      (and/or/xor (zextload x), (zext cst))
11831   // Unless (and (load x) cst) will match as a zextload already and has
11832   // additional users.
11833   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11834        N0.getOpcode() == ISD::XOR) &&
11835       isa<LoadSDNode>(N0.getOperand(0)) &&
11836       N0.getOperand(1).getOpcode() == ISD::Constant &&
11837       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11838     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11839     EVT MemVT = LN00->getMemoryVT();
11840     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
11841         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
11842       bool DoXform = true;
11843       SmallVector<SDNode*, 4> SetCCs;
11844       if (!N0.hasOneUse()) {
11845         if (N0.getOpcode() == ISD::AND) {
11846           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
11847           EVT LoadResultTy = AndC->getValueType(0);
11848           EVT ExtVT;
11849           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
11850             DoXform = false;
11851         }
11852       }
11853       if (DoXform)
11854         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11855                                           ISD::ZERO_EXTEND, SetCCs, TLI);
11856       if (DoXform) {
11857         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
11858                                          LN00->getChain(), LN00->getBasePtr(),
11859                                          LN00->getMemoryVT(),
11860                                          LN00->getMemOperand());
11861         APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11862         SDLoc DL(N);
11863         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11864                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
11865         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11866         bool NoReplaceTruncAnd = !N0.hasOneUse();
11867         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11868         CombineTo(N, And);
11869         // If N0 has multiple uses, change other uses as well.
11870         if (NoReplaceTruncAnd) {
11871           SDValue TruncAnd =
11872               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11873           CombineTo(N0.getNode(), TruncAnd);
11874         }
11875         if (NoReplaceTrunc) {
11876           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11877         } else {
11878           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11879                                       LN00->getValueType(0), ExtLoad);
11880           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11881         }
11882         return SDValue(N,0); // Return N so it doesn't get rechecked!
11883       }
11884     }
11885   }
11886 
11887   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11888   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11889   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
11890     return ZExtLoad;
11891 
11892   // Try to simplify (zext (zextload x)).
11893   if (SDValue foldedExt = tryToFoldExtOfExtload(
11894           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
11895     return foldedExt;
11896 
11897   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11898     return V;
11899 
11900   if (N0.getOpcode() == ISD::SETCC) {
11901     // Only do this before legalize for now.
11902     if (!LegalOperations && VT.isVector() &&
11903         N0.getValueType().getVectorElementType() == MVT::i1) {
11904       EVT N00VT = N0.getOperand(0).getValueType();
11905       if (getSetCCResultType(N00VT) == N0.getValueType())
11906         return SDValue();
11907 
11908       // We know that the # elements of the results is the same as the #
11909       // elements of the compare (and the # elements of the compare result for
11910       // that matter). Check to see that they are the same size. If so, we know
11911       // that the element size of the sext'd result matches the element size of
11912       // the compare operands.
11913       SDLoc DL(N);
11914       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
11915         // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
11916         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
11917                                      N0.getOperand(1), N0.getOperand(2));
11918         return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
11919       }
11920 
11921       // If the desired elements are smaller or larger than the source
11922       // elements we can use a matching integer vector type and then
11923       // truncate/any extend followed by zext_in_reg.
11924       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11925       SDValue VsetCC =
11926           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
11927                       N0.getOperand(1), N0.getOperand(2));
11928       return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
11929                                     N0.getValueType());
11930     }
11931 
11932     // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
11933     SDLoc DL(N);
11934     EVT N0VT = N0.getValueType();
11935     EVT N00VT = N0.getOperand(0).getValueType();
11936     if (SDValue SCC = SimplifySelectCC(
11937             DL, N0.getOperand(0), N0.getOperand(1),
11938             DAG.getBoolConstant(true, DL, N0VT, N00VT),
11939             DAG.getBoolConstant(false, DL, N0VT, N00VT),
11940             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11941       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
11942   }
11943 
11944   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
11945   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11946       isa<ConstantSDNode>(N0.getOperand(1)) &&
11947       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11948       N0.hasOneUse()) {
11949     SDValue ShAmt = N0.getOperand(1);
11950     if (N0.getOpcode() == ISD::SHL) {
11951       SDValue InnerZExt = N0.getOperand(0);
11952       // If the original shl may be shifting out bits, do not perform this
11953       // transformation.
11954       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
11955         InnerZExt.getOperand(0).getValueSizeInBits();
11956       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
11957         return SDValue();
11958     }
11959 
11960     SDLoc DL(N);
11961 
11962     // Ensure that the shift amount is wide enough for the shifted value.
11963     if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
11964       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
11965 
11966     return DAG.getNode(N0.getOpcode(), DL, VT,
11967                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
11968                        ShAmt);
11969   }
11970 
11971   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11972     return NewVSel;
11973 
11974   if (SDValue NewCtPop = widenCtPop(N, DAG))
11975     return NewCtPop;
11976 
11977   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11978     return Res;
11979 
11980   return SDValue();
11981 }
11982 
11983 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
11984   SDValue N0 = N->getOperand(0);
11985   EVT VT = N->getValueType(0);
11986 
11987   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11988     return Res;
11989 
11990   // fold (aext (aext x)) -> (aext x)
11991   // fold (aext (zext x)) -> (zext x)
11992   // fold (aext (sext x)) -> (sext x)
11993   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
11994       N0.getOpcode() == ISD::ZERO_EXTEND ||
11995       N0.getOpcode() == ISD::SIGN_EXTEND)
11996     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11997 
11998   // fold (aext (truncate (load x))) -> (aext (smaller load x))
11999   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
12000   if (N0.getOpcode() == ISD::TRUNCATE) {
12001     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
12002       SDNode *oye = N0.getOperand(0).getNode();
12003       if (NarrowLoad.getNode() != N0.getNode()) {
12004         CombineTo(N0.getNode(), NarrowLoad);
12005         // CombineTo deleted the truncate, if needed, but not what's under it.
12006         AddToWorklist(oye);
12007       }
12008       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12009     }
12010   }
12011 
12012   // fold (aext (truncate x))
12013   if (N0.getOpcode() == ISD::TRUNCATE)
12014     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
12015 
12016   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
12017   // if the trunc is not free.
12018   if (N0.getOpcode() == ISD::AND &&
12019       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
12020       N0.getOperand(1).getOpcode() == ISD::Constant &&
12021       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
12022                           N0.getValueType())) {
12023     SDLoc DL(N);
12024     SDValue X = N0.getOperand(0).getOperand(0);
12025     X = DAG.getAnyExtOrTrunc(X, DL, VT);
12026     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
12027     return DAG.getNode(ISD::AND, DL, VT,
12028                        X, DAG.getConstant(Mask, DL, VT));
12029   }
12030 
12031   // fold (aext (load x)) -> (aext (truncate (extload x)))
12032   // None of the supported targets knows how to perform load and any_ext
12033   // on vectors in one instruction, so attempt to fold to zext instead.
12034   if (VT.isVector()) {
12035     // Try to simplify (zext (load x)).
12036     if (SDValue foldedExt =
12037             tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
12038                                ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
12039       return foldedExt;
12040   } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
12041              ISD::isUNINDEXEDLoad(N0.getNode()) &&
12042              TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
12043     bool DoXform = true;
12044     SmallVector<SDNode *, 4> SetCCs;
12045     if (!N0.hasOneUse())
12046       DoXform =
12047           ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
12048     if (DoXform) {
12049       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12050       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
12051                                        LN0->getChain(), LN0->getBasePtr(),
12052                                        N0.getValueType(), LN0->getMemOperand());
12053       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
12054       // If the load value is used only by N, replace it via CombineTo N.
12055       bool NoReplaceTrunc = N0.hasOneUse();
12056       CombineTo(N, ExtLoad);
12057       if (NoReplaceTrunc) {
12058         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
12059         recursivelyDeleteUnusedNodes(LN0);
12060       } else {
12061         SDValue Trunc =
12062             DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
12063         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
12064       }
12065       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12066     }
12067   }
12068 
12069   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
12070   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
12071   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
12072   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
12073       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
12074     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12075     ISD::LoadExtType ExtType = LN0->getExtensionType();
12076     EVT MemVT = LN0->getMemoryVT();
12077     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
12078       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
12079                                        VT, LN0->getChain(), LN0->getBasePtr(),
12080                                        MemVT, LN0->getMemOperand());
12081       CombineTo(N, ExtLoad);
12082       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
12083       recursivelyDeleteUnusedNodes(LN0);
12084       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12085     }
12086   }
12087 
12088   if (N0.getOpcode() == ISD::SETCC) {
12089     // For vectors:
12090     // aext(setcc) -> vsetcc
12091     // aext(setcc) -> truncate(vsetcc)
12092     // aext(setcc) -> aext(vsetcc)
12093     // Only do this before legalize for now.
12094     if (VT.isVector() && !LegalOperations) {
12095       EVT N00VT = N0.getOperand(0).getValueType();
12096       if (getSetCCResultType(N00VT) == N0.getValueType())
12097         return SDValue();
12098 
12099       // We know that the # elements of the results is the same as the
12100       // # elements of the compare (and the # elements of the compare result
12101       // for that matter).  Check to see that they are the same size.  If so,
12102       // we know that the element size of the sext'd result matches the
12103       // element size of the compare operands.
12104       if (VT.getSizeInBits() == N00VT.getSizeInBits())
12105         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
12106                              N0.getOperand(1),
12107                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
12108 
12109       // If the desired elements are smaller or larger than the source
12110       // elements we can use a matching integer vector type and then
12111       // truncate/any extend
12112       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
12113       SDValue VsetCC =
12114         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
12115                       N0.getOperand(1),
12116                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
12117       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
12118     }
12119 
12120     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
12121     SDLoc DL(N);
12122     if (SDValue SCC = SimplifySelectCC(
12123             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
12124             DAG.getConstant(0, DL, VT),
12125             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
12126       return SCC;
12127   }
12128 
12129   if (SDValue NewCtPop = widenCtPop(N, DAG))
12130     return NewCtPop;
12131 
12132   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
12133     return Res;
12134 
12135   return SDValue();
12136 }
12137 
12138 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
12139   unsigned Opcode = N->getOpcode();
12140   SDValue N0 = N->getOperand(0);
12141   SDValue N1 = N->getOperand(1);
12142   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
12143 
12144   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
12145   if (N0.getOpcode() == Opcode &&
12146       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
12147     return N0;
12148 
12149   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
12150       N0.getOperand(0).getOpcode() == Opcode) {
12151     // We have an assert, truncate, assert sandwich. Make one stronger assert
12152     // by asserting on the smallest asserted type to the larger source type.
12153     // This eliminates the later assert:
12154     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
12155     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
12156     SDValue BigA = N0.getOperand(0);
12157     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
12158     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
12159            "Asserting zero/sign-extended bits to a type larger than the "
12160            "truncated destination does not provide information");
12161 
12162     SDLoc DL(N);
12163     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
12164     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
12165     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
12166                                     BigA.getOperand(0), MinAssertVTVal);
12167     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
12168   }
12169 
12170   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
12171   // than X. Just move the AssertZext in front of the truncate and drop the
12172   // AssertSExt.
12173   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
12174       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
12175       Opcode == ISD::AssertZext) {
12176     SDValue BigA = N0.getOperand(0);
12177     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
12178     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
12179            "Asserting zero/sign-extended bits to a type larger than the "
12180            "truncated destination does not provide information");
12181 
12182     if (AssertVT.bitsLT(BigA_AssertVT)) {
12183       SDLoc DL(N);
12184       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
12185                                       BigA.getOperand(0), N1);
12186       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
12187     }
12188   }
12189 
12190   return SDValue();
12191 }
12192 
12193 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
12194   SDLoc DL(N);
12195 
12196   Align AL = cast<AssertAlignSDNode>(N)->getAlign();
12197   SDValue N0 = N->getOperand(0);
12198 
12199   // Fold (assertalign (assertalign x, AL0), AL1) ->
12200   // (assertalign x, max(AL0, AL1))
12201   if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
12202     return DAG.getAssertAlign(DL, N0.getOperand(0),
12203                               std::max(AL, AAN->getAlign()));
12204 
12205   // In rare cases, there are trivial arithmetic ops in source operands. Sink
12206   // this assert down to source operands so that those arithmetic ops could be
12207   // exposed to the DAG combining.
12208   switch (N0.getOpcode()) {
12209   default:
12210     break;
12211   case ISD::ADD:
12212   case ISD::SUB: {
12213     unsigned AlignShift = Log2(AL);
12214     SDValue LHS = N0.getOperand(0);
12215     SDValue RHS = N0.getOperand(1);
12216     unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
12217     unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
12218     if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
12219       if (LHSAlignShift < AlignShift)
12220         LHS = DAG.getAssertAlign(DL, LHS, AL);
12221       if (RHSAlignShift < AlignShift)
12222         RHS = DAG.getAssertAlign(DL, RHS, AL);
12223       return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
12224     }
12225     break;
12226   }
12227   }
12228 
12229   return SDValue();
12230 }
12231 
12232 /// If the result of a load is shifted/masked/truncated to an effectively
12233 /// narrower type, try to transform the load to a narrower type and/or
12234 /// use an extending load.
12235 SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
12236   unsigned Opc = N->getOpcode();
12237 
12238   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
12239   SDValue N0 = N->getOperand(0);
12240   EVT VT = N->getValueType(0);
12241   EVT ExtVT = VT;
12242 
12243   // This transformation isn't valid for vector loads.
12244   if (VT.isVector())
12245     return SDValue();
12246 
12247   // The ShAmt variable is used to indicate that we've consumed a right
12248   // shift. I.e. we want to narrow the width of the load by skipping to load the
12249   // ShAmt least significant bits.
12250   unsigned ShAmt = 0;
12251   // A special case is when the least significant bits from the load are masked
12252   // away, but using an AND rather than a right shift. HasShiftedOffset is used
12253   // to indicate that the narrowed load should be left-shifted ShAmt bits to get
12254   // the result.
12255   bool HasShiftedOffset = false;
12256   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
12257   // extended to VT.
12258   if (Opc == ISD::SIGN_EXTEND_INREG) {
12259     ExtType = ISD::SEXTLOAD;
12260     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
12261   } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
12262     // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
12263     // value, or it may be shifting a higher subword, half or byte into the
12264     // lowest bits.
12265 
12266     // Only handle shift with constant shift amount, and the shiftee must be a
12267     // load.
12268     auto *LN = dyn_cast<LoadSDNode>(N0);
12269     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12270     if (!N1C || !LN)
12271       return SDValue();
12272     // If the shift amount is larger than the memory type then we're not
12273     // accessing any of the loaded bytes.
12274     ShAmt = N1C->getZExtValue();
12275     uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
12276     if (MemoryWidth <= ShAmt)
12277       return SDValue();
12278     // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
12279     ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
12280     ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
12281     // If original load is a SEXTLOAD then we can't simply replace it by a
12282     // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
12283     // followed by a ZEXT, but that is not handled at the moment). Similarly if
12284     // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
12285     if ((LN->getExtensionType() == ISD::SEXTLOAD ||
12286          LN->getExtensionType() == ISD::ZEXTLOAD) &&
12287         LN->getExtensionType() != ExtType)
12288       return SDValue();
12289   } else if (Opc == ISD::AND) {
12290     // An AND with a constant mask is the same as a truncate + zero-extend.
12291     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
12292     if (!AndC)
12293       return SDValue();
12294 
12295     const APInt &Mask = AndC->getAPIntValue();
12296     unsigned ActiveBits = 0;
12297     if (Mask.isMask()) {
12298       ActiveBits = Mask.countTrailingOnes();
12299     } else if (Mask.isShiftedMask()) {
12300       ShAmt = Mask.countTrailingZeros();
12301       APInt ShiftedMask = Mask.lshr(ShAmt);
12302       ActiveBits = ShiftedMask.countTrailingOnes();
12303       HasShiftedOffset = true;
12304     } else
12305       return SDValue();
12306 
12307     ExtType = ISD::ZEXTLOAD;
12308     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
12309   }
12310 
12311   // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
12312   // a right shift. Here we redo some of those checks, to possibly adjust the
12313   // ExtVT even further based on "a masking AND". We could also end up here for
12314   // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
12315   // need to be done here as well.
12316   if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
12317     SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
12318     // Bail out when the SRL has more than one use. This is done for historical
12319     // (undocumented) reasons. Maybe intent was to guard the AND-masking below
12320     // check below? And maybe it could be non-profitable to do the transform in
12321     // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
12322     // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
12323     if (!SRL.hasOneUse())
12324       return SDValue();
12325 
12326     // Only handle shift with constant shift amount, and the shiftee must be a
12327     // load.
12328     auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
12329     auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
12330     if (!SRL1C || !LN)
12331       return SDValue();
12332 
12333     // If the shift amount is larger than the input type then we're not
12334     // accessing any of the loaded bytes.  If the load was a zextload/extload
12335     // then the result of the shift+trunc is zero/undef (handled elsewhere).
12336     ShAmt = SRL1C->getZExtValue();
12337     uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
12338     if (ShAmt >= MemoryWidth)
12339       return SDValue();
12340 
12341     // Because a SRL must be assumed to *need* to zero-extend the high bits
12342     // (as opposed to anyext the high bits), we can't combine the zextload
12343     // lowering of SRL and an sextload.
12344     if (LN->getExtensionType() == ISD::SEXTLOAD)
12345       return SDValue();
12346 
12347     // Avoid reading outside the memory accessed by the original load (could
12348     // happened if we only adjust the load base pointer by ShAmt). Instead we
12349     // try to narrow the load even further. The typical scenario here is:
12350     //   (i64 (truncate (i96 (srl (load x), 64)))) ->
12351     //     (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
12352     if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
12353       // Don't replace sextload by zextload.
12354       if (ExtType == ISD::SEXTLOAD)
12355         return SDValue();
12356       // Narrow the load.
12357       ExtType = ISD::ZEXTLOAD;
12358       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
12359     }
12360 
12361     // If the SRL is only used by a masking AND, we may be able to adjust
12362     // the ExtVT to make the AND redundant.
12363     SDNode *Mask = *(SRL->use_begin());
12364     if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
12365         isa<ConstantSDNode>(Mask->getOperand(1))) {
12366       const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
12367       if (ShiftMask.isMask()) {
12368         EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
12369                                          ShiftMask.countTrailingOnes());
12370         // If the mask is smaller, recompute the type.
12371         if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
12372             TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
12373           ExtVT = MaskedVT;
12374       }
12375     }
12376 
12377     N0 = SRL.getOperand(0);
12378   }
12379 
12380   // If the load is shifted left (and the result isn't shifted back right), we
12381   // can fold a truncate through the shift. The typical scenario is that N
12382   // points at a TRUNCATE here so the attempted fold is:
12383   //   (truncate (shl (load x), c))) -> (shl (narrow load x), c)
12384   // ShLeftAmt will indicate how much a narrowed load should be shifted left.
12385   unsigned ShLeftAmt = 0;
12386   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12387       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
12388     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
12389       ShLeftAmt = N01->getZExtValue();
12390       N0 = N0.getOperand(0);
12391     }
12392   }
12393 
12394   // If we haven't found a load, we can't narrow it.
12395   if (!isa<LoadSDNode>(N0))
12396     return SDValue();
12397 
12398   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12399   // Reducing the width of a volatile load is illegal.  For atomics, we may be
12400   // able to reduce the width provided we never widen again. (see D66309)
12401   if (!LN0->isSimple() ||
12402       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
12403     return SDValue();
12404 
12405   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
12406     unsigned LVTStoreBits =
12407         LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
12408     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
12409     return LVTStoreBits - EVTStoreBits - ShAmt;
12410   };
12411 
12412   // We need to adjust the pointer to the load by ShAmt bits in order to load
12413   // the correct bytes.
12414   unsigned PtrAdjustmentInBits =
12415       DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
12416 
12417   uint64_t PtrOff = PtrAdjustmentInBits / 8;
12418   Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
12419   SDLoc DL(LN0);
12420   // The original load itself didn't wrap, so an offset within it doesn't.
12421   SDNodeFlags Flags;
12422   Flags.setNoUnsignedWrap(true);
12423   SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
12424                                             TypeSize::Fixed(PtrOff), DL, Flags);
12425   AddToWorklist(NewPtr.getNode());
12426 
12427   SDValue Load;
12428   if (ExtType == ISD::NON_EXTLOAD)
12429     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
12430                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12431                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12432   else
12433     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
12434                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
12435                           NewAlign, LN0->getMemOperand()->getFlags(),
12436                           LN0->getAAInfo());
12437 
12438   // Replace the old load's chain with the new load's chain.
12439   WorklistRemover DeadNodes(*this);
12440   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12441 
12442   // Shift the result left, if we've swallowed a left shift.
12443   SDValue Result = Load;
12444   if (ShLeftAmt != 0) {
12445     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
12446     if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
12447       ShImmTy = VT;
12448     // If the shift amount is as large as the result size (but, presumably,
12449     // no larger than the source) then the useful bits of the result are
12450     // zero; we can't simply return the shortened shift, because the result
12451     // of that operation is undefined.
12452     if (ShLeftAmt >= VT.getScalarSizeInBits())
12453       Result = DAG.getConstant(0, DL, VT);
12454     else
12455       Result = DAG.getNode(ISD::SHL, DL, VT,
12456                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
12457   }
12458 
12459   if (HasShiftedOffset) {
12460     // We're using a shifted mask, so the load now has an offset. This means
12461     // that data has been loaded into the lower bytes than it would have been
12462     // before, so we need to shl the loaded data into the correct position in the
12463     // register.
12464     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
12465     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
12466     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
12467   }
12468 
12469   // Return the new loaded value.
12470   return Result;
12471 }
12472 
12473 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
12474   SDValue N0 = N->getOperand(0);
12475   SDValue N1 = N->getOperand(1);
12476   EVT VT = N->getValueType(0);
12477   EVT ExtVT = cast<VTSDNode>(N1)->getVT();
12478   unsigned VTBits = VT.getScalarSizeInBits();
12479   unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
12480 
12481   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
12482   if (N0.isUndef())
12483     return DAG.getConstant(0, SDLoc(N), VT);
12484 
12485   // fold (sext_in_reg c1) -> c1
12486   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
12487     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
12488 
12489   // If the input is already sign extended, just drop the extension.
12490   if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
12491     return N0;
12492 
12493   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
12494   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
12495       ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
12496     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
12497                        N1);
12498 
12499   // fold (sext_in_reg (sext x)) -> (sext x)
12500   // fold (sext_in_reg (aext x)) -> (sext x)
12501   // if x is small enough or if we know that x has more than 1 sign bit and the
12502   // sign_extend_inreg is extending from one of them.
12503   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
12504     SDValue N00 = N0.getOperand(0);
12505     unsigned N00Bits = N00.getScalarValueSizeInBits();
12506     if ((N00Bits <= ExtVTBits ||
12507          DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
12508         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12509       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
12510   }
12511 
12512   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
12513   // if x is small enough or if we know that x has more than 1 sign bit and the
12514   // sign_extend_inreg is extending from one of them.
12515   if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
12516       N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
12517       N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
12518     SDValue N00 = N0.getOperand(0);
12519     unsigned N00Bits = N00.getScalarValueSizeInBits();
12520     unsigned DstElts = N0.getValueType().getVectorMinNumElements();
12521     unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
12522     bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
12523     APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
12524     if ((N00Bits == ExtVTBits ||
12525          (!IsZext && (N00Bits < ExtVTBits ||
12526                       DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
12527         (!LegalOperations ||
12528          TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
12529       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
12530   }
12531 
12532   // fold (sext_in_reg (zext x)) -> (sext x)
12533   // iff we are extending the source sign bit.
12534   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
12535     SDValue N00 = N0.getOperand(0);
12536     if (N00.getScalarValueSizeInBits() == ExtVTBits &&
12537         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12538       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
12539   }
12540 
12541   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
12542   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
12543     return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
12544 
12545   // fold operands of sext_in_reg based on knowledge that the top bits are not
12546   // demanded.
12547   if (SimplifyDemandedBits(SDValue(N, 0)))
12548     return SDValue(N, 0);
12549 
12550   // fold (sext_in_reg (load x)) -> (smaller sextload x)
12551   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
12552   if (SDValue NarrowLoad = reduceLoadWidth(N))
12553     return NarrowLoad;
12554 
12555   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
12556   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
12557   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
12558   if (N0.getOpcode() == ISD::SRL) {
12559     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
12560       if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
12561         // We can turn this into an SRA iff the input to the SRL is already sign
12562         // extended enough.
12563         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
12564         if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
12565           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
12566                              N0.getOperand(1));
12567       }
12568   }
12569 
12570   // fold (sext_inreg (extload x)) -> (sextload x)
12571   // If sextload is not supported by target, we can only do the combine when
12572   // load has one use. Doing otherwise can block folding the extload with other
12573   // extends that the target does support.
12574   if (ISD::isEXTLoad(N0.getNode()) &&
12575       ISD::isUNINDEXEDLoad(N0.getNode()) &&
12576       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12577       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
12578         N0.hasOneUse()) ||
12579        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12580     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12581     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12582                                      LN0->getChain(),
12583                                      LN0->getBasePtr(), ExtVT,
12584                                      LN0->getMemOperand());
12585     CombineTo(N, ExtLoad);
12586     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12587     AddToWorklist(ExtLoad.getNode());
12588     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12589   }
12590 
12591   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
12592   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
12593       N0.hasOneUse() &&
12594       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12595       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
12596        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12597     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12598     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12599                                      LN0->getChain(),
12600                                      LN0->getBasePtr(), ExtVT,
12601                                      LN0->getMemOperand());
12602     CombineTo(N, ExtLoad);
12603     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12604     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12605   }
12606 
12607   // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
12608   // ignore it if the masked load is already sign extended
12609   if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
12610     if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
12611         Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
12612         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
12613       SDValue ExtMaskedLoad = DAG.getMaskedLoad(
12614           VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
12615           Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
12616           Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
12617       CombineTo(N, ExtMaskedLoad);
12618       CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
12619       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12620     }
12621   }
12622 
12623   // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
12624   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
12625     if (SDValue(GN0, 0).hasOneUse() &&
12626         ExtVT == GN0->getMemoryVT() &&
12627         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
12628       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
12629                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
12630 
12631       SDValue ExtLoad = DAG.getMaskedGather(
12632           DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
12633           GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
12634 
12635       CombineTo(N, ExtLoad);
12636       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12637       AddToWorklist(ExtLoad.getNode());
12638       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12639     }
12640   }
12641 
12642   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
12643   if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
12644     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
12645                                            N0.getOperand(1), false))
12646       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
12647   }
12648 
12649   return SDValue();
12650 }
12651 
12652 SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
12653   SDValue N0 = N->getOperand(0);
12654   EVT VT = N->getValueType(0);
12655 
12656   // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
12657   if (N0.isUndef())
12658     return DAG.getConstant(0, SDLoc(N), VT);
12659 
12660   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12661     return Res;
12662 
12663   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
12664     return SDValue(N, 0);
12665 
12666   return SDValue();
12667 }
12668 
12669 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
12670   SDValue N0 = N->getOperand(0);
12671   EVT VT = N->getValueType(0);
12672   EVT SrcVT = N0.getValueType();
12673   bool isLE = DAG.getDataLayout().isLittleEndian();
12674 
12675   // noop truncate
12676   if (SrcVT == VT)
12677     return N0;
12678 
12679   // fold (truncate (truncate x)) -> (truncate x)
12680   if (N0.getOpcode() == ISD::TRUNCATE)
12681     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12682 
12683   // fold (truncate c1) -> c1
12684   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
12685     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
12686     if (C.getNode() != N)
12687       return C;
12688   }
12689 
12690   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
12691   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
12692       N0.getOpcode() == ISD::SIGN_EXTEND ||
12693       N0.getOpcode() == ISD::ANY_EXTEND) {
12694     // if the source is smaller than the dest, we still need an extend.
12695     if (N0.getOperand(0).getValueType().bitsLT(VT))
12696       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
12697     // if the source is larger than the dest, than we just need the truncate.
12698     if (N0.getOperand(0).getValueType().bitsGT(VT))
12699       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12700     // if the source and dest are the same type, we can drop both the extend
12701     // and the truncate.
12702     return N0.getOperand(0);
12703   }
12704 
12705   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
12706   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
12707     return SDValue();
12708 
12709   // Fold extract-and-trunc into a narrow extract. For example:
12710   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
12711   //   i32 y = TRUNCATE(i64 x)
12712   //        -- becomes --
12713   //   v16i8 b = BITCAST (v2i64 val)
12714   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
12715   //
12716   // Note: We only run this optimization after type legalization (which often
12717   // creates this pattern) and before operation legalization after which
12718   // we need to be more careful about the vector instructions that we generate.
12719   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12720       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
12721     EVT VecTy = N0.getOperand(0).getValueType();
12722     EVT ExTy = N0.getValueType();
12723     EVT TrTy = N->getValueType(0);
12724 
12725     auto EltCnt = VecTy.getVectorElementCount();
12726     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
12727     auto NewEltCnt = EltCnt * SizeRatio;
12728 
12729     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
12730     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
12731 
12732     SDValue EltNo = N0->getOperand(1);
12733     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
12734       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12735       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
12736 
12737       SDLoc DL(N);
12738       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
12739                          DAG.getBitcast(NVT, N0.getOperand(0)),
12740                          DAG.getVectorIdxConstant(Index, DL));
12741     }
12742   }
12743 
12744   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
12745   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
12746     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
12747         TLI.isTruncateFree(SrcVT, VT)) {
12748       SDLoc SL(N0);
12749       SDValue Cond = N0.getOperand(0);
12750       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12751       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
12752       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
12753     }
12754   }
12755 
12756   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
12757   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12758       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
12759       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
12760     SDValue Amt = N0.getOperand(1);
12761     KnownBits Known = DAG.computeKnownBits(Amt);
12762     unsigned Size = VT.getScalarSizeInBits();
12763     if (Known.countMaxActiveBits() <= Log2_32(Size)) {
12764       SDLoc SL(N);
12765       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
12766 
12767       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12768       if (AmtVT != Amt.getValueType()) {
12769         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
12770         AddToWorklist(Amt.getNode());
12771       }
12772       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
12773     }
12774   }
12775 
12776   if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
12777     return V;
12778 
12779   // Attempt to pre-truncate BUILD_VECTOR sources.
12780   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
12781       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
12782       // Avoid creating illegal types if running after type legalizer.
12783       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
12784     SDLoc DL(N);
12785     EVT SVT = VT.getScalarType();
12786     SmallVector<SDValue, 8> TruncOps;
12787     for (const SDValue &Op : N0->op_values()) {
12788       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
12789       TruncOps.push_back(TruncOp);
12790     }
12791     return DAG.getBuildVector(VT, DL, TruncOps);
12792   }
12793 
12794   // Fold a series of buildvector, bitcast, and truncate if possible.
12795   // For example fold
12796   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
12797   //   (2xi32 (buildvector x, y)).
12798   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
12799       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
12800       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
12801       N0.getOperand(0).hasOneUse()) {
12802     SDValue BuildVect = N0.getOperand(0);
12803     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
12804     EVT TruncVecEltTy = VT.getVectorElementType();
12805 
12806     // Check that the element types match.
12807     if (BuildVectEltTy == TruncVecEltTy) {
12808       // Now we only need to compute the offset of the truncated elements.
12809       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
12810       unsigned TruncVecNumElts = VT.getVectorNumElements();
12811       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
12812 
12813       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
12814              "Invalid number of elements");
12815 
12816       SmallVector<SDValue, 8> Opnds;
12817       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
12818         Opnds.push_back(BuildVect.getOperand(i));
12819 
12820       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
12821     }
12822   }
12823 
12824   // See if we can simplify the input to this truncate through knowledge that
12825   // only the low bits are being used.
12826   // For example "trunc (or (shl x, 8), y)" // -> trunc y
12827   // Currently we only perform this optimization on scalars because vectors
12828   // may have different active low bits.
12829   if (!VT.isVector()) {
12830     APInt Mask =
12831         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
12832     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
12833       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
12834   }
12835 
12836   // fold (truncate (load x)) -> (smaller load x)
12837   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
12838   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
12839     if (SDValue Reduced = reduceLoadWidth(N))
12840       return Reduced;
12841 
12842     // Handle the case where the load remains an extending load even
12843     // after truncation.
12844     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
12845       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12846       if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
12847         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
12848                                          VT, LN0->getChain(), LN0->getBasePtr(),
12849                                          LN0->getMemoryVT(),
12850                                          LN0->getMemOperand());
12851         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
12852         return NewLoad;
12853       }
12854     }
12855   }
12856 
12857   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
12858   // where ... are all 'undef'.
12859   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
12860     SmallVector<EVT, 8> VTs;
12861     SDValue V;
12862     unsigned Idx = 0;
12863     unsigned NumDefs = 0;
12864 
12865     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
12866       SDValue X = N0.getOperand(i);
12867       if (!X.isUndef()) {
12868         V = X;
12869         Idx = i;
12870         NumDefs++;
12871       }
12872       // Stop if more than one members are non-undef.
12873       if (NumDefs > 1)
12874         break;
12875 
12876       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
12877                                      VT.getVectorElementType(),
12878                                      X.getValueType().getVectorElementCount()));
12879     }
12880 
12881     if (NumDefs == 0)
12882       return DAG.getUNDEF(VT);
12883 
12884     if (NumDefs == 1) {
12885       assert(V.getNode() && "The single defined operand is empty!");
12886       SmallVector<SDValue, 8> Opnds;
12887       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
12888         if (i != Idx) {
12889           Opnds.push_back(DAG.getUNDEF(VTs[i]));
12890           continue;
12891         }
12892         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
12893         AddToWorklist(NV.getNode());
12894         Opnds.push_back(NV);
12895       }
12896       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
12897     }
12898   }
12899 
12900   // Fold truncate of a bitcast of a vector to an extract of the low vector
12901   // element.
12902   //
12903   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
12904   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
12905     SDValue VecSrc = N0.getOperand(0);
12906     EVT VecSrcVT = VecSrc.getValueType();
12907     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
12908         (!LegalOperations ||
12909          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
12910       SDLoc SL(N);
12911 
12912       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
12913       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
12914                          DAG.getVectorIdxConstant(Idx, SL));
12915     }
12916   }
12917 
12918   // Simplify the operands using demanded-bits information.
12919   if (SimplifyDemandedBits(SDValue(N, 0)))
12920     return SDValue(N, 0);
12921 
12922   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
12923   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
12924   // When the adde's carry is not used.
12925   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
12926       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
12927       // We only do for addcarry before legalize operation
12928       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
12929        TLI.isOperationLegal(N0.getOpcode(), VT))) {
12930     SDLoc SL(N);
12931     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12932     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12933     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
12934     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
12935   }
12936 
12937   // fold (truncate (extract_subvector(ext x))) ->
12938   //      (extract_subvector x)
12939   // TODO: This can be generalized to cover cases where the truncate and extract
12940   // do not fully cancel each other out.
12941   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
12942     SDValue N00 = N0.getOperand(0);
12943     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
12944         N00.getOpcode() == ISD::ZERO_EXTEND ||
12945         N00.getOpcode() == ISD::ANY_EXTEND) {
12946       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
12947           VT.getVectorElementType())
12948         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
12949                            N00.getOperand(0), N0.getOperand(1));
12950     }
12951   }
12952 
12953   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12954     return NewVSel;
12955 
12956   // Narrow a suitable binary operation with a non-opaque constant operand by
12957   // moving it ahead of the truncate. This is limited to pre-legalization
12958   // because targets may prefer a wider type during later combines and invert
12959   // this transform.
12960   switch (N0.getOpcode()) {
12961   case ISD::ADD:
12962   case ISD::SUB:
12963   case ISD::MUL:
12964   case ISD::AND:
12965   case ISD::OR:
12966   case ISD::XOR:
12967     if (!LegalOperations && N0.hasOneUse() &&
12968         (isConstantOrConstantVector(N0.getOperand(0), true) ||
12969          isConstantOrConstantVector(N0.getOperand(1), true))) {
12970       // TODO: We already restricted this to pre-legalization, but for vectors
12971       // we are extra cautious to not create an unsupported operation.
12972       // Target-specific changes are likely needed to avoid regressions here.
12973       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
12974         SDLoc DL(N);
12975         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
12976         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
12977         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
12978       }
12979     }
12980     break;
12981   case ISD::USUBSAT:
12982     // Truncate the USUBSAT only if LHS is a known zero-extension, its not
12983     // enough to know that the upper bits are zero we must ensure that we don't
12984     // introduce an extra truncate.
12985     if (!LegalOperations && N0.hasOneUse() &&
12986         N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12987         N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
12988             VT.getScalarSizeInBits() &&
12989         hasOperation(N0.getOpcode(), VT)) {
12990       return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
12991                                  DAG, SDLoc(N));
12992     }
12993     break;
12994   }
12995 
12996   return SDValue();
12997 }
12998 
12999 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
13000   SDValue Elt = N->getOperand(i);
13001   if (Elt.getOpcode() != ISD::MERGE_VALUES)
13002     return Elt.getNode();
13003   return Elt.getOperand(Elt.getResNo()).getNode();
13004 }
13005 
13006 /// build_pair (load, load) -> load
13007 /// if load locations are consecutive.
13008 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
13009   assert(N->getOpcode() == ISD::BUILD_PAIR);
13010 
13011   auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
13012   auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
13013 
13014   // A BUILD_PAIR is always having the least significant part in elt 0 and the
13015   // most significant part in elt 1. So when combining into one large load, we
13016   // need to consider the endianness.
13017   if (DAG.getDataLayout().isBigEndian())
13018     std::swap(LD1, LD2);
13019 
13020   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
13021       !LD1->hasOneUse() || !LD2->hasOneUse() ||
13022       LD1->getAddressSpace() != LD2->getAddressSpace())
13023     return SDValue();
13024 
13025   bool LD1Fast = false;
13026   EVT LD1VT = LD1->getValueType(0);
13027   unsigned LD1Bytes = LD1VT.getStoreSize();
13028   if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
13029       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
13030       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
13031                              *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
13032     return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
13033                        LD1->getPointerInfo(), LD1->getAlign());
13034 
13035   return SDValue();
13036 }
13037 
13038 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
13039   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
13040   // and Lo parts; on big-endian machines it doesn't.
13041   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
13042 }
13043 
13044 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
13045                                     const TargetLowering &TLI) {
13046   // If this is not a bitcast to an FP type or if the target doesn't have
13047   // IEEE754-compliant FP logic, we're done.
13048   EVT VT = N->getValueType(0);
13049   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
13050     return SDValue();
13051 
13052   // TODO: Handle cases where the integer constant is a different scalar
13053   // bitwidth to the FP.
13054   SDValue N0 = N->getOperand(0);
13055   EVT SourceVT = N0.getValueType();
13056   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
13057     return SDValue();
13058 
13059   unsigned FPOpcode;
13060   APInt SignMask;
13061   switch (N0.getOpcode()) {
13062   case ISD::AND:
13063     FPOpcode = ISD::FABS;
13064     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
13065     break;
13066   case ISD::XOR:
13067     FPOpcode = ISD::FNEG;
13068     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
13069     break;
13070   case ISD::OR:
13071     FPOpcode = ISD::FABS;
13072     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
13073     break;
13074   default:
13075     return SDValue();
13076   }
13077 
13078   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
13079   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
13080   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
13081   //   fneg (fabs X)
13082   SDValue LogicOp0 = N0.getOperand(0);
13083   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
13084   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
13085       LogicOp0.getOpcode() == ISD::BITCAST &&
13086       LogicOp0.getOperand(0).getValueType() == VT) {
13087     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
13088     NumFPLogicOpsConv++;
13089     if (N0.getOpcode() == ISD::OR)
13090       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
13091     return FPOp;
13092   }
13093 
13094   return SDValue();
13095 }
13096 
13097 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
13098   SDValue N0 = N->getOperand(0);
13099   EVT VT = N->getValueType(0);
13100 
13101   if (N0.isUndef())
13102     return DAG.getUNDEF(VT);
13103 
13104   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
13105   // Only do this before legalize types, unless both types are integer and the
13106   // scalar type is legal. Only do this before legalize ops, since the target
13107   // maybe depending on the bitcast.
13108   // First check to see if this is all constant.
13109   // TODO: Support FP bitcasts after legalize types.
13110   if (VT.isVector() &&
13111       (!LegalTypes ||
13112        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
13113         TLI.isTypeLegal(VT.getVectorElementType()))) &&
13114       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
13115       cast<BuildVectorSDNode>(N0)->isConstant())
13116     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
13117                                              VT.getVectorElementType());
13118 
13119   // If the input is a constant, let getNode fold it.
13120   if (isIntOrFPConstant(N0)) {
13121     // If we can't allow illegal operations, we need to check that this is just
13122     // a fp -> int or int -> conversion and that the resulting operation will
13123     // be legal.
13124     if (!LegalOperations ||
13125         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
13126          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
13127         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
13128          TLI.isOperationLegal(ISD::Constant, VT))) {
13129       SDValue C = DAG.getBitcast(VT, N0);
13130       if (C.getNode() != N)
13131         return C;
13132     }
13133   }
13134 
13135   // (conv (conv x, t1), t2) -> (conv x, t2)
13136   if (N0.getOpcode() == ISD::BITCAST)
13137     return DAG.getBitcast(VT, N0.getOperand(0));
13138 
13139   // fold (conv (load x)) -> (load (conv*)x)
13140   // If the resultant load doesn't need a higher alignment than the original!
13141   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13142       // Do not remove the cast if the types differ in endian layout.
13143       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
13144           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
13145       // If the load is volatile, we only want to change the load type if the
13146       // resulting load is legal. Otherwise we might increase the number of
13147       // memory accesses. We don't care if the original type was legal or not
13148       // as we assume software couldn't rely on the number of accesses of an
13149       // illegal type.
13150       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
13151        TLI.isOperationLegal(ISD::LOAD, VT))) {
13152     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13153 
13154     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
13155                                     *LN0->getMemOperand())) {
13156       SDValue Load =
13157           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
13158                       LN0->getPointerInfo(), LN0->getAlign(),
13159                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13160       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
13161       return Load;
13162     }
13163   }
13164 
13165   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
13166     return V;
13167 
13168   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
13169   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
13170   //
13171   // For ppc_fp128:
13172   // fold (bitcast (fneg x)) ->
13173   //     flipbit = signbit
13174   //     (xor (bitcast x) (build_pair flipbit, flipbit))
13175   //
13176   // fold (bitcast (fabs x)) ->
13177   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
13178   //     (xor (bitcast x) (build_pair flipbit, flipbit))
13179   // This often reduces constant pool loads.
13180   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
13181        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
13182       N0.getNode()->hasOneUse() && VT.isInteger() &&
13183       !VT.isVector() && !N0.getValueType().isVector()) {
13184     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
13185     AddToWorklist(NewConv.getNode());
13186 
13187     SDLoc DL(N);
13188     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
13189       assert(VT.getSizeInBits() == 128);
13190       SDValue SignBit = DAG.getConstant(
13191           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
13192       SDValue FlipBit;
13193       if (N0.getOpcode() == ISD::FNEG) {
13194         FlipBit = SignBit;
13195         AddToWorklist(FlipBit.getNode());
13196       } else {
13197         assert(N0.getOpcode() == ISD::FABS);
13198         SDValue Hi =
13199             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
13200                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
13201                                               SDLoc(NewConv)));
13202         AddToWorklist(Hi.getNode());
13203         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
13204         AddToWorklist(FlipBit.getNode());
13205       }
13206       SDValue FlipBits =
13207           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
13208       AddToWorklist(FlipBits.getNode());
13209       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
13210     }
13211     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
13212     if (N0.getOpcode() == ISD::FNEG)
13213       return DAG.getNode(ISD::XOR, DL, VT,
13214                          NewConv, DAG.getConstant(SignBit, DL, VT));
13215     assert(N0.getOpcode() == ISD::FABS);
13216     return DAG.getNode(ISD::AND, DL, VT,
13217                        NewConv, DAG.getConstant(~SignBit, DL, VT));
13218   }
13219 
13220   // fold (bitconvert (fcopysign cst, x)) ->
13221   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
13222   // Note that we don't handle (copysign x, cst) because this can always be
13223   // folded to an fneg or fabs.
13224   //
13225   // For ppc_fp128:
13226   // fold (bitcast (fcopysign cst, x)) ->
13227   //     flipbit = (and (extract_element
13228   //                     (xor (bitcast cst), (bitcast x)), 0),
13229   //                    signbit)
13230   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
13231   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
13232       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
13233       VT.isInteger() && !VT.isVector()) {
13234     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
13235     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
13236     if (isTypeLegal(IntXVT)) {
13237       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
13238       AddToWorklist(X.getNode());
13239 
13240       // If X has a different width than the result/lhs, sext it or truncate it.
13241       unsigned VTWidth = VT.getSizeInBits();
13242       if (OrigXWidth < VTWidth) {
13243         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
13244         AddToWorklist(X.getNode());
13245       } else if (OrigXWidth > VTWidth) {
13246         // To get the sign bit in the right place, we have to shift it right
13247         // before truncating.
13248         SDLoc DL(X);
13249         X = DAG.getNode(ISD::SRL, DL,
13250                         X.getValueType(), X,
13251                         DAG.getConstant(OrigXWidth-VTWidth, DL,
13252                                         X.getValueType()));
13253         AddToWorklist(X.getNode());
13254         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
13255         AddToWorklist(X.getNode());
13256       }
13257 
13258       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
13259         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
13260         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
13261         AddToWorklist(Cst.getNode());
13262         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
13263         AddToWorklist(X.getNode());
13264         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
13265         AddToWorklist(XorResult.getNode());
13266         SDValue XorResult64 = DAG.getNode(
13267             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
13268             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
13269                                   SDLoc(XorResult)));
13270         AddToWorklist(XorResult64.getNode());
13271         SDValue FlipBit =
13272             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
13273                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
13274         AddToWorklist(FlipBit.getNode());
13275         SDValue FlipBits =
13276             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
13277         AddToWorklist(FlipBits.getNode());
13278         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
13279       }
13280       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
13281       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
13282                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
13283       AddToWorklist(X.getNode());
13284 
13285       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
13286       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
13287                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
13288       AddToWorklist(Cst.getNode());
13289 
13290       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
13291     }
13292   }
13293 
13294   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
13295   if (N0.getOpcode() == ISD::BUILD_PAIR)
13296     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
13297       return CombineLD;
13298 
13299   // Remove double bitcasts from shuffles - this is often a legacy of
13300   // XformToShuffleWithZero being used to combine bitmaskings (of
13301   // float vectors bitcast to integer vectors) into shuffles.
13302   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
13303   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
13304       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
13305       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
13306       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
13307     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
13308 
13309     // If operands are a bitcast, peek through if it casts the original VT.
13310     // If operands are a constant, just bitcast back to original VT.
13311     auto PeekThroughBitcast = [&](SDValue Op) {
13312       if (Op.getOpcode() == ISD::BITCAST &&
13313           Op.getOperand(0).getValueType() == VT)
13314         return SDValue(Op.getOperand(0));
13315       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
13316           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
13317         return DAG.getBitcast(VT, Op);
13318       return SDValue();
13319     };
13320 
13321     // FIXME: If either input vector is bitcast, try to convert the shuffle to
13322     // the result type of this bitcast. This would eliminate at least one
13323     // bitcast. See the transform in InstCombine.
13324     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
13325     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
13326     if (!(SV0 && SV1))
13327       return SDValue();
13328 
13329     int MaskScale =
13330         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
13331     SmallVector<int, 8> NewMask;
13332     for (int M : SVN->getMask())
13333       for (int i = 0; i != MaskScale; ++i)
13334         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
13335 
13336     SDValue LegalShuffle =
13337         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
13338     if (LegalShuffle)
13339       return LegalShuffle;
13340   }
13341 
13342   return SDValue();
13343 }
13344 
13345 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
13346   EVT VT = N->getValueType(0);
13347   return CombineConsecutiveLoads(N, VT);
13348 }
13349 
13350 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
13351   SDValue N0 = N->getOperand(0);
13352 
13353   if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
13354     return N0;
13355 
13356   return SDValue();
13357 }
13358 
13359 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
13360 /// operands. DstEltVT indicates the destination element value type.
13361 SDValue DAGCombiner::
13362 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
13363   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
13364 
13365   // If this is already the right type, we're done.
13366   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
13367 
13368   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
13369   unsigned DstBitSize = DstEltVT.getSizeInBits();
13370 
13371   // If this is a conversion of N elements of one type to N elements of another
13372   // type, convert each element.  This handles FP<->INT cases.
13373   if (SrcBitSize == DstBitSize) {
13374     SmallVector<SDValue, 8> Ops;
13375     for (SDValue Op : BV->op_values()) {
13376       // If the vector element type is not legal, the BUILD_VECTOR operands
13377       // are promoted and implicitly truncated.  Make that explicit here.
13378       if (Op.getValueType() != SrcEltVT)
13379         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
13380       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
13381       AddToWorklist(Ops.back().getNode());
13382     }
13383     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
13384                               BV->getValueType(0).getVectorNumElements());
13385     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
13386   }
13387 
13388   // Otherwise, we're growing or shrinking the elements.  To avoid having to
13389   // handle annoying details of growing/shrinking FP values, we convert them to
13390   // int first.
13391   if (SrcEltVT.isFloatingPoint()) {
13392     // Convert the input float vector to a int vector where the elements are the
13393     // same sizes.
13394     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
13395     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
13396     SrcEltVT = IntVT;
13397   }
13398 
13399   // Now we know the input is an integer vector.  If the output is a FP type,
13400   // convert to integer first, then to FP of the right size.
13401   if (DstEltVT.isFloatingPoint()) {
13402     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
13403     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
13404 
13405     // Next, convert to FP elements of the same size.
13406     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
13407   }
13408 
13409   // Okay, we know the src/dst types are both integers of differing types.
13410   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
13411 
13412   // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
13413   // BuildVectorSDNode?
13414   auto *BVN = cast<BuildVectorSDNode>(BV);
13415 
13416   // Extract the constant raw bit data.
13417   BitVector UndefElements;
13418   SmallVector<APInt> RawBits;
13419   bool IsLE = DAG.getDataLayout().isLittleEndian();
13420   if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
13421     return SDValue();
13422 
13423   SDLoc DL(BV);
13424   SmallVector<SDValue, 8> Ops;
13425   for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
13426     if (UndefElements[I])
13427       Ops.push_back(DAG.getUNDEF(DstEltVT));
13428     else
13429       Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
13430   }
13431 
13432   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
13433   return DAG.getBuildVector(VT, DL, Ops);
13434 }
13435 
13436 // Returns true if floating point contraction is allowed on the FMUL-SDValue
13437 // `N`
13438 static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
13439   assert(N.getOpcode() == ISD::FMUL);
13440 
13441   return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
13442          N->getFlags().hasAllowContract();
13443 }
13444 
13445 // Returns true if `N` can assume no infinities involved in its computation.
13446 static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
13447   return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs();
13448 }
13449 
13450 /// Try to perform FMA combining on a given FADD node.
13451 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
13452   SDValue N0 = N->getOperand(0);
13453   SDValue N1 = N->getOperand(1);
13454   EVT VT = N->getValueType(0);
13455   SDLoc SL(N);
13456 
13457   const TargetOptions &Options = DAG.getTarget().Options;
13458 
13459   // Floating-point multiply-add with intermediate rounding.
13460   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13461 
13462   // Floating-point multiply-add without intermediate rounding.
13463   bool HasFMA =
13464       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13465       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13466 
13467   // No valid opcode, do not combine.
13468   if (!HasFMAD && !HasFMA)
13469     return SDValue();
13470 
13471   bool CanReassociate =
13472       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13473   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13474                               Options.UnsafeFPMath || HasFMAD);
13475   // If the addition is not contractable, do not combine.
13476   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13477     return SDValue();
13478 
13479   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13480     return SDValue();
13481 
13482   // Always prefer FMAD to FMA for precision.
13483   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13484   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13485 
13486   auto isFusedOp = [&](SDValue N) {
13487     unsigned Opcode = N.getOpcode();
13488     return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13489   };
13490 
13491   // Is the node an FMUL and contractable either due to global flags or
13492   // SDNodeFlags.
13493   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13494     if (N.getOpcode() != ISD::FMUL)
13495       return false;
13496     return AllowFusionGlobally || N->getFlags().hasAllowContract();
13497   };
13498   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
13499   // prefer to fold the multiply with fewer uses.
13500   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
13501     if (N0.getNode()->use_size() > N1.getNode()->use_size())
13502       std::swap(N0, N1);
13503   }
13504 
13505   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
13506   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
13507     return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13508                        N0.getOperand(1), N1);
13509   }
13510 
13511   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
13512   // Note: Commutes FADD operands.
13513   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
13514     return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
13515                        N1.getOperand(1), N0);
13516   }
13517 
13518   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
13519   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
13520   // This requires reassociation because it changes the order of operations.
13521   SDValue FMA, E;
13522   if (CanReassociate && isFusedOp(N0) &&
13523       N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
13524       N0.getOperand(2).hasOneUse()) {
13525     FMA = N0;
13526     E = N1;
13527   } else if (CanReassociate && isFusedOp(N1) &&
13528              N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
13529              N1.getOperand(2).hasOneUse()) {
13530     FMA = N1;
13531     E = N0;
13532   }
13533   if (FMA && E) {
13534     SDValue A = FMA.getOperand(0);
13535     SDValue B = FMA.getOperand(1);
13536     SDValue C = FMA.getOperand(2).getOperand(0);
13537     SDValue D = FMA.getOperand(2).getOperand(1);
13538     SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
13539     return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
13540   }
13541 
13542   // Look through FP_EXTEND nodes to do more combining.
13543 
13544   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
13545   if (N0.getOpcode() == ISD::FP_EXTEND) {
13546     SDValue N00 = N0.getOperand(0);
13547     if (isContractableFMUL(N00) &&
13548         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13549                             N00.getValueType())) {
13550       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13551                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13552                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13553                          N1);
13554     }
13555   }
13556 
13557   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
13558   // Note: Commutes FADD operands.
13559   if (N1.getOpcode() == ISD::FP_EXTEND) {
13560     SDValue N10 = N1.getOperand(0);
13561     if (isContractableFMUL(N10) &&
13562         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13563                             N10.getValueType())) {
13564       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13565                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
13566                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
13567                          N0);
13568     }
13569   }
13570 
13571   // More folding opportunities when target permits.
13572   if (Aggressive) {
13573     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
13574     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
13575     auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13576                                     SDValue Z) {
13577       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
13578                          DAG.getNode(PreferredFusedOpcode, SL, VT,
13579                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13580                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
13581                                      Z));
13582     };
13583     if (isFusedOp(N0)) {
13584       SDValue N02 = N0.getOperand(2);
13585       if (N02.getOpcode() == ISD::FP_EXTEND) {
13586         SDValue N020 = N02.getOperand(0);
13587         if (isContractableFMUL(N020) &&
13588             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13589                                 N020.getValueType())) {
13590           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
13591                                       N020.getOperand(0), N020.getOperand(1),
13592                                       N1);
13593         }
13594       }
13595     }
13596 
13597     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
13598     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
13599     // FIXME: This turns two single-precision and one double-precision
13600     // operation into two double-precision operations, which might not be
13601     // interesting for all targets, especially GPUs.
13602     auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13603                                     SDValue Z) {
13604       return DAG.getNode(
13605           PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
13606           DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
13607           DAG.getNode(PreferredFusedOpcode, SL, VT,
13608                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13609                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
13610     };
13611     if (N0.getOpcode() == ISD::FP_EXTEND) {
13612       SDValue N00 = N0.getOperand(0);
13613       if (isFusedOp(N00)) {
13614         SDValue N002 = N00.getOperand(2);
13615         if (isContractableFMUL(N002) &&
13616             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13617                                 N00.getValueType())) {
13618           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
13619                                       N002.getOperand(0), N002.getOperand(1),
13620                                       N1);
13621         }
13622       }
13623     }
13624 
13625     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
13626     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
13627     if (isFusedOp(N1)) {
13628       SDValue N12 = N1.getOperand(2);
13629       if (N12.getOpcode() == ISD::FP_EXTEND) {
13630         SDValue N120 = N12.getOperand(0);
13631         if (isContractableFMUL(N120) &&
13632             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13633                                 N120.getValueType())) {
13634           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
13635                                       N120.getOperand(0), N120.getOperand(1),
13636                                       N0);
13637         }
13638       }
13639     }
13640 
13641     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
13642     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
13643     // FIXME: This turns two single-precision and one double-precision
13644     // operation into two double-precision operations, which might not be
13645     // interesting for all targets, especially GPUs.
13646     if (N1.getOpcode() == ISD::FP_EXTEND) {
13647       SDValue N10 = N1.getOperand(0);
13648       if (isFusedOp(N10)) {
13649         SDValue N102 = N10.getOperand(2);
13650         if (isContractableFMUL(N102) &&
13651             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13652                                 N10.getValueType())) {
13653           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
13654                                       N102.getOperand(0), N102.getOperand(1),
13655                                       N0);
13656         }
13657       }
13658     }
13659   }
13660 
13661   return SDValue();
13662 }
13663 
13664 /// Try to perform FMA combining on a given FSUB node.
13665 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
13666   SDValue N0 = N->getOperand(0);
13667   SDValue N1 = N->getOperand(1);
13668   EVT VT = N->getValueType(0);
13669   SDLoc SL(N);
13670 
13671   const TargetOptions &Options = DAG.getTarget().Options;
13672   // Floating-point multiply-add with intermediate rounding.
13673   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13674 
13675   // Floating-point multiply-add without intermediate rounding.
13676   bool HasFMA =
13677       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13678       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13679 
13680   // No valid opcode, do not combine.
13681   if (!HasFMAD && !HasFMA)
13682     return SDValue();
13683 
13684   const SDNodeFlags Flags = N->getFlags();
13685   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13686                               Options.UnsafeFPMath || HasFMAD);
13687 
13688   // If the subtraction is not contractable, do not combine.
13689   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13690     return SDValue();
13691 
13692   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13693     return SDValue();
13694 
13695   // Always prefer FMAD to FMA for precision.
13696   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13697   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13698   bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
13699 
13700   // Is the node an FMUL and contractable either due to global flags or
13701   // SDNodeFlags.
13702   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13703     if (N.getOpcode() != ISD::FMUL)
13704       return false;
13705     return AllowFusionGlobally || N->getFlags().hasAllowContract();
13706   };
13707 
13708   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13709   auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
13710     if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
13711       return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
13712                          XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
13713     }
13714     return SDValue();
13715   };
13716 
13717   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13718   // Note: Commutes FSUB operands.
13719   auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
13720     if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
13721       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13722                          DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
13723                          YZ.getOperand(1), X);
13724     }
13725     return SDValue();
13726   };
13727 
13728   // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
13729   // prefer to fold the multiply with fewer uses.
13730   if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
13731       (N0.getNode()->use_size() > N1.getNode()->use_size())) {
13732     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
13733     if (SDValue V = tryToFoldXSubYZ(N0, N1))
13734       return V;
13735     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
13736     if (SDValue V = tryToFoldXYSubZ(N0, N1))
13737       return V;
13738   } else {
13739     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13740     if (SDValue V = tryToFoldXYSubZ(N0, N1))
13741       return V;
13742     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13743     if (SDValue V = tryToFoldXSubYZ(N0, N1))
13744       return V;
13745   }
13746 
13747   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
13748   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
13749       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
13750     SDValue N00 = N0.getOperand(0).getOperand(0);
13751     SDValue N01 = N0.getOperand(0).getOperand(1);
13752     return DAG.getNode(PreferredFusedOpcode, SL, VT,
13753                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
13754                        DAG.getNode(ISD::FNEG, SL, VT, N1));
13755   }
13756 
13757   // Look through FP_EXTEND nodes to do more combining.
13758 
13759   // fold (fsub (fpext (fmul x, y)), z)
13760   //   -> (fma (fpext x), (fpext y), (fneg z))
13761   if (N0.getOpcode() == ISD::FP_EXTEND) {
13762     SDValue N00 = N0.getOperand(0);
13763     if (isContractableFMUL(N00) &&
13764         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13765                             N00.getValueType())) {
13766       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13767                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13768                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13769                          DAG.getNode(ISD::FNEG, SL, VT, N1));
13770     }
13771   }
13772 
13773   // fold (fsub x, (fpext (fmul y, z)))
13774   //   -> (fma (fneg (fpext y)), (fpext z), x)
13775   // Note: Commutes FSUB operands.
13776   if (N1.getOpcode() == ISD::FP_EXTEND) {
13777     SDValue N10 = N1.getOperand(0);
13778     if (isContractableFMUL(N10) &&
13779         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13780                             N10.getValueType())) {
13781       return DAG.getNode(
13782           PreferredFusedOpcode, SL, VT,
13783           DAG.getNode(ISD::FNEG, SL, VT,
13784                       DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
13785           DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
13786     }
13787   }
13788 
13789   // fold (fsub (fpext (fneg (fmul, x, y))), z)
13790   //   -> (fneg (fma (fpext x), (fpext y), z))
13791   // Note: This could be removed with appropriate canonicalization of the
13792   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13793   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13794   // from implementing the canonicalization in visitFSUB.
13795   if (N0.getOpcode() == ISD::FP_EXTEND) {
13796     SDValue N00 = N0.getOperand(0);
13797     if (N00.getOpcode() == ISD::FNEG) {
13798       SDValue N000 = N00.getOperand(0);
13799       if (isContractableFMUL(N000) &&
13800           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13801                               N00.getValueType())) {
13802         return DAG.getNode(
13803             ISD::FNEG, SL, VT,
13804             DAG.getNode(PreferredFusedOpcode, SL, VT,
13805                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13806                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13807                         N1));
13808       }
13809     }
13810   }
13811 
13812   // fold (fsub (fneg (fpext (fmul, x, y))), z)
13813   //   -> (fneg (fma (fpext x)), (fpext y), z)
13814   // Note: This could be removed with appropriate canonicalization of the
13815   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13816   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13817   // from implementing the canonicalization in visitFSUB.
13818   if (N0.getOpcode() == ISD::FNEG) {
13819     SDValue N00 = N0.getOperand(0);
13820     if (N00.getOpcode() == ISD::FP_EXTEND) {
13821       SDValue N000 = N00.getOperand(0);
13822       if (isContractableFMUL(N000) &&
13823           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13824                               N000.getValueType())) {
13825         return DAG.getNode(
13826             ISD::FNEG, SL, VT,
13827             DAG.getNode(PreferredFusedOpcode, SL, VT,
13828                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13829                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13830                         N1));
13831       }
13832     }
13833   }
13834 
13835   auto isReassociable = [Options](SDNode *N) {
13836     return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13837   };
13838 
13839   auto isContractableAndReassociableFMUL = [isContractableFMUL,
13840                                             isReassociable](SDValue N) {
13841     return isContractableFMUL(N) && isReassociable(N.getNode());
13842   };
13843 
13844   auto isFusedOp = [&](SDValue N) {
13845     unsigned Opcode = N.getOpcode();
13846     return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13847   };
13848 
13849   // More folding opportunities when target permits.
13850   if (Aggressive && isReassociable(N)) {
13851     bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
13852     // fold (fsub (fma x, y, (fmul u, v)), z)
13853     //   -> (fma x, y (fma u, v, (fneg z)))
13854     if (CanFuse && isFusedOp(N0) &&
13855         isContractableAndReassociableFMUL(N0.getOperand(2)) &&
13856         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
13857       return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13858                          N0.getOperand(1),
13859                          DAG.getNode(PreferredFusedOpcode, SL, VT,
13860                                      N0.getOperand(2).getOperand(0),
13861                                      N0.getOperand(2).getOperand(1),
13862                                      DAG.getNode(ISD::FNEG, SL, VT, N1)));
13863     }
13864 
13865     // fold (fsub x, (fma y, z, (fmul u, v)))
13866     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
13867     if (CanFuse && isFusedOp(N1) &&
13868         isContractableAndReassociableFMUL(N1.getOperand(2)) &&
13869         N1->hasOneUse() && NoSignedZero) {
13870       SDValue N20 = N1.getOperand(2).getOperand(0);
13871       SDValue N21 = N1.getOperand(2).getOperand(1);
13872       return DAG.getNode(
13873           PreferredFusedOpcode, SL, VT,
13874           DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13875           DAG.getNode(PreferredFusedOpcode, SL, VT,
13876                       DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
13877     }
13878 
13879     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
13880     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
13881     if (isFusedOp(N0) && N0->hasOneUse()) {
13882       SDValue N02 = N0.getOperand(2);
13883       if (N02.getOpcode() == ISD::FP_EXTEND) {
13884         SDValue N020 = N02.getOperand(0);
13885         if (isContractableAndReassociableFMUL(N020) &&
13886             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13887                                 N020.getValueType())) {
13888           return DAG.getNode(
13889               PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
13890               DAG.getNode(
13891                   PreferredFusedOpcode, SL, VT,
13892                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
13893                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
13894                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
13895         }
13896       }
13897     }
13898 
13899     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
13900     //   -> (fma (fpext x), (fpext y),
13901     //           (fma (fpext u), (fpext v), (fneg z)))
13902     // FIXME: This turns two single-precision and one double-precision
13903     // operation into two double-precision operations, which might not be
13904     // interesting for all targets, especially GPUs.
13905     if (N0.getOpcode() == ISD::FP_EXTEND) {
13906       SDValue N00 = N0.getOperand(0);
13907       if (isFusedOp(N00)) {
13908         SDValue N002 = N00.getOperand(2);
13909         if (isContractableAndReassociableFMUL(N002) &&
13910             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13911                                 N00.getValueType())) {
13912           return DAG.getNode(
13913               PreferredFusedOpcode, SL, VT,
13914               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13915               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13916               DAG.getNode(
13917                   PreferredFusedOpcode, SL, VT,
13918                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
13919                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
13920                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
13921         }
13922       }
13923     }
13924 
13925     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
13926     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
13927     if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
13928         N1->hasOneUse()) {
13929       SDValue N120 = N1.getOperand(2).getOperand(0);
13930       if (isContractableAndReassociableFMUL(N120) &&
13931           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13932                               N120.getValueType())) {
13933         SDValue N1200 = N120.getOperand(0);
13934         SDValue N1201 = N120.getOperand(1);
13935         return DAG.getNode(
13936             PreferredFusedOpcode, SL, VT,
13937             DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13938             DAG.getNode(PreferredFusedOpcode, SL, VT,
13939                         DAG.getNode(ISD::FNEG, SL, VT,
13940                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
13941                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
13942       }
13943     }
13944 
13945     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
13946     //   -> (fma (fneg (fpext y)), (fpext z),
13947     //           (fma (fneg (fpext u)), (fpext v), x))
13948     // FIXME: This turns two single-precision and one double-precision
13949     // operation into two double-precision operations, which might not be
13950     // interesting for all targets, especially GPUs.
13951     if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
13952       SDValue CvtSrc = N1.getOperand(0);
13953       SDValue N100 = CvtSrc.getOperand(0);
13954       SDValue N101 = CvtSrc.getOperand(1);
13955       SDValue N102 = CvtSrc.getOperand(2);
13956       if (isContractableAndReassociableFMUL(N102) &&
13957           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13958                               CvtSrc.getValueType())) {
13959         SDValue N1020 = N102.getOperand(0);
13960         SDValue N1021 = N102.getOperand(1);
13961         return DAG.getNode(
13962             PreferredFusedOpcode, SL, VT,
13963             DAG.getNode(ISD::FNEG, SL, VT,
13964                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
13965             DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
13966             DAG.getNode(PreferredFusedOpcode, SL, VT,
13967                         DAG.getNode(ISD::FNEG, SL, VT,
13968                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
13969                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
13970       }
13971     }
13972   }
13973 
13974   return SDValue();
13975 }
13976 
13977 /// Try to perform FMA combining on a given FMUL node based on the distributive
13978 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
13979 /// subtraction instead of addition).
13980 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
13981   SDValue N0 = N->getOperand(0);
13982   SDValue N1 = N->getOperand(1);
13983   EVT VT = N->getValueType(0);
13984   SDLoc SL(N);
13985 
13986   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
13987 
13988   const TargetOptions &Options = DAG.getTarget().Options;
13989 
13990   // The transforms below are incorrect when x == 0 and y == inf, because the
13991   // intermediate multiplication produces a nan.
13992   SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
13993   if (!hasNoInfs(Options, FAdd))
13994     return SDValue();
13995 
13996   // Floating-point multiply-add without intermediate rounding.
13997   bool HasFMA =
13998       isContractableFMUL(Options, SDValue(N, 0)) &&
13999       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
14000       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
14001 
14002   // Floating-point multiply-add with intermediate rounding. This can result
14003   // in a less precise result due to the changed rounding order.
14004   bool HasFMAD = Options.UnsafeFPMath &&
14005                  (LegalOperations && TLI.isFMADLegal(DAG, N));
14006 
14007   // No valid opcode, do not combine.
14008   if (!HasFMAD && !HasFMA)
14009     return SDValue();
14010 
14011   // Always prefer FMAD to FMA for precision.
14012   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
14013   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
14014 
14015   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
14016   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
14017   auto FuseFADD = [&](SDValue X, SDValue Y) {
14018     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
14019       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
14020         if (C->isExactlyValue(+1.0))
14021           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14022                              Y);
14023         if (C->isExactlyValue(-1.0))
14024           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14025                              DAG.getNode(ISD::FNEG, SL, VT, Y));
14026       }
14027     }
14028     return SDValue();
14029   };
14030 
14031   if (SDValue FMA = FuseFADD(N0, N1))
14032     return FMA;
14033   if (SDValue FMA = FuseFADD(N1, N0))
14034     return FMA;
14035 
14036   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
14037   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
14038   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
14039   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
14040   auto FuseFSUB = [&](SDValue X, SDValue Y) {
14041     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
14042       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
14043         if (C0->isExactlyValue(+1.0))
14044           return DAG.getNode(PreferredFusedOpcode, SL, VT,
14045                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
14046                              Y);
14047         if (C0->isExactlyValue(-1.0))
14048           return DAG.getNode(PreferredFusedOpcode, SL, VT,
14049                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
14050                              DAG.getNode(ISD::FNEG, SL, VT, Y));
14051       }
14052       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
14053         if (C1->isExactlyValue(+1.0))
14054           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14055                              DAG.getNode(ISD::FNEG, SL, VT, Y));
14056         if (C1->isExactlyValue(-1.0))
14057           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14058                              Y);
14059       }
14060     }
14061     return SDValue();
14062   };
14063 
14064   if (SDValue FMA = FuseFSUB(N0, N1))
14065     return FMA;
14066   if (SDValue FMA = FuseFSUB(N1, N0))
14067     return FMA;
14068 
14069   return SDValue();
14070 }
14071 
14072 SDValue DAGCombiner::visitFADD(SDNode *N) {
14073   SDValue N0 = N->getOperand(0);
14074   SDValue N1 = N->getOperand(1);
14075   bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
14076   bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
14077   EVT VT = N->getValueType(0);
14078   SDLoc DL(N);
14079   const TargetOptions &Options = DAG.getTarget().Options;
14080   SDNodeFlags Flags = N->getFlags();
14081   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14082 
14083   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14084     return R;
14085 
14086   // fold (fadd c1, c2) -> c1 + c2
14087   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
14088     return C;
14089 
14090   // canonicalize constant to RHS
14091   if (N0CFP && !N1CFP)
14092     return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
14093 
14094   // fold vector ops
14095   if (VT.isVector())
14096     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14097       return FoldedVOp;
14098 
14099   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
14100   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
14101   if (N1C && N1C->isZero())
14102     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
14103       return N0;
14104 
14105   if (SDValue NewSel = foldBinOpIntoSelect(N))
14106     return NewSel;
14107 
14108   // fold (fadd A, (fneg B)) -> (fsub A, B)
14109   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
14110     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
14111             N1, DAG, LegalOperations, ForCodeSize))
14112       return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
14113 
14114   // fold (fadd (fneg A), B) -> (fsub B, A)
14115   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
14116     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
14117             N0, DAG, LegalOperations, ForCodeSize))
14118       return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
14119 
14120   auto isFMulNegTwo = [](SDValue FMul) {
14121     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
14122       return false;
14123     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
14124     return C && C->isExactlyValue(-2.0);
14125   };
14126 
14127   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
14128   if (isFMulNegTwo(N0)) {
14129     SDValue B = N0.getOperand(0);
14130     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
14131     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
14132   }
14133   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
14134   if (isFMulNegTwo(N1)) {
14135     SDValue B = N1.getOperand(0);
14136     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
14137     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
14138   }
14139 
14140   // No FP constant should be created after legalization as Instruction
14141   // Selection pass has a hard time dealing with FP constants.
14142   bool AllowNewConst = (Level < AfterLegalizeDAG);
14143 
14144   // If nnan is enabled, fold lots of things.
14145   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
14146     // If allowed, fold (fadd (fneg x), x) -> 0.0
14147     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
14148       return DAG.getConstantFP(0.0, DL, VT);
14149 
14150     // If allowed, fold (fadd x, (fneg x)) -> 0.0
14151     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
14152       return DAG.getConstantFP(0.0, DL, VT);
14153   }
14154 
14155   // If 'unsafe math' or reassoc and nsz, fold lots of things.
14156   // TODO: break out portions of the transformations below for which Unsafe is
14157   //       considered and which do not require both nsz and reassoc
14158   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
14159        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
14160       AllowNewConst) {
14161     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
14162     if (N1CFP && N0.getOpcode() == ISD::FADD &&
14163         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14164       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
14165       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
14166     }
14167 
14168     // We can fold chains of FADD's of the same value into multiplications.
14169     // This transform is not safe in general because we are reducing the number
14170     // of rounding steps.
14171     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
14172       if (N0.getOpcode() == ISD::FMUL) {
14173         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
14174         bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
14175 
14176         // (fadd (fmul x, c), x) -> (fmul x, c+1)
14177         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
14178           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
14179                                        DAG.getConstantFP(1.0, DL, VT));
14180           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
14181         }
14182 
14183         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
14184         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
14185             N1.getOperand(0) == N1.getOperand(1) &&
14186             N0.getOperand(0) == N1.getOperand(0)) {
14187           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
14188                                        DAG.getConstantFP(2.0, DL, VT));
14189           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
14190         }
14191       }
14192 
14193       if (N1.getOpcode() == ISD::FMUL) {
14194         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
14195         bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
14196 
14197         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
14198         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
14199           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
14200                                        DAG.getConstantFP(1.0, DL, VT));
14201           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
14202         }
14203 
14204         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
14205         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
14206             N0.getOperand(0) == N0.getOperand(1) &&
14207             N1.getOperand(0) == N0.getOperand(0)) {
14208           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
14209                                        DAG.getConstantFP(2.0, DL, VT));
14210           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
14211         }
14212       }
14213 
14214       if (N0.getOpcode() == ISD::FADD) {
14215         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
14216         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
14217         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
14218             (N0.getOperand(0) == N1)) {
14219           return DAG.getNode(ISD::FMUL, DL, VT, N1,
14220                              DAG.getConstantFP(3.0, DL, VT));
14221         }
14222       }
14223 
14224       if (N1.getOpcode() == ISD::FADD) {
14225         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
14226         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
14227         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
14228             N1.getOperand(0) == N0) {
14229           return DAG.getNode(ISD::FMUL, DL, VT, N0,
14230                              DAG.getConstantFP(3.0, DL, VT));
14231         }
14232       }
14233 
14234       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
14235       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
14236           N0.getOperand(0) == N0.getOperand(1) &&
14237           N1.getOperand(0) == N1.getOperand(1) &&
14238           N0.getOperand(0) == N1.getOperand(0)) {
14239         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
14240                            DAG.getConstantFP(4.0, DL, VT));
14241       }
14242     }
14243   } // enable-unsafe-fp-math
14244 
14245   // FADD -> FMA combines:
14246   if (SDValue Fused = visitFADDForFMACombine(N)) {
14247     AddToWorklist(Fused.getNode());
14248     return Fused;
14249   }
14250   return SDValue();
14251 }
14252 
14253 SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
14254   SDValue Chain = N->getOperand(0);
14255   SDValue N0 = N->getOperand(1);
14256   SDValue N1 = N->getOperand(2);
14257   EVT VT = N->getValueType(0);
14258   EVT ChainVT = N->getValueType(1);
14259   SDLoc DL(N);
14260   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14261 
14262   // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
14263   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
14264     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
14265             N1, DAG, LegalOperations, ForCodeSize)) {
14266       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
14267                          {Chain, N0, NegN1});
14268     }
14269 
14270   // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
14271   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
14272     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
14273             N0, DAG, LegalOperations, ForCodeSize)) {
14274       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
14275                          {Chain, N1, NegN0});
14276     }
14277   return SDValue();
14278 }
14279 
14280 SDValue DAGCombiner::visitFSUB(SDNode *N) {
14281   SDValue N0 = N->getOperand(0);
14282   SDValue N1 = N->getOperand(1);
14283   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
14284   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
14285   EVT VT = N->getValueType(0);
14286   SDLoc DL(N);
14287   const TargetOptions &Options = DAG.getTarget().Options;
14288   const SDNodeFlags Flags = N->getFlags();
14289   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14290 
14291   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14292     return R;
14293 
14294   // fold (fsub c1, c2) -> c1-c2
14295   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
14296     return C;
14297 
14298   // fold vector ops
14299   if (VT.isVector())
14300     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14301       return FoldedVOp;
14302 
14303   if (SDValue NewSel = foldBinOpIntoSelect(N))
14304     return NewSel;
14305 
14306   // (fsub A, 0) -> A
14307   if (N1CFP && N1CFP->isZero()) {
14308     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
14309         Flags.hasNoSignedZeros()) {
14310       return N0;
14311     }
14312   }
14313 
14314   if (N0 == N1) {
14315     // (fsub x, x) -> 0.0
14316     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
14317       return DAG.getConstantFP(0.0f, DL, VT);
14318   }
14319 
14320   // (fsub -0.0, N1) -> -N1
14321   if (N0CFP && N0CFP->isZero()) {
14322     if (N0CFP->isNegative() ||
14323         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
14324       // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
14325       // flushed to zero, unless all users treat denorms as zero (DAZ).
14326       // FIXME: This transform will change the sign of a NaN and the behavior
14327       // of a signaling NaN. It is only valid when a NoNaN flag is present.
14328       DenormalMode DenormMode = DAG.getDenormalMode(VT);
14329       if (DenormMode == DenormalMode::getIEEE()) {
14330         if (SDValue NegN1 =
14331                 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
14332           return NegN1;
14333         if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14334           return DAG.getNode(ISD::FNEG, DL, VT, N1);
14335       }
14336     }
14337   }
14338 
14339   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
14340        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
14341       N1.getOpcode() == ISD::FADD) {
14342     // X - (X + Y) -> -Y
14343     if (N0 == N1->getOperand(0))
14344       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
14345     // X - (Y + X) -> -Y
14346     if (N0 == N1->getOperand(1))
14347       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
14348   }
14349 
14350   // fold (fsub A, (fneg B)) -> (fadd A, B)
14351   if (SDValue NegN1 =
14352           TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
14353     return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
14354 
14355   // FSUB -> FMA combines:
14356   if (SDValue Fused = visitFSUBForFMACombine(N)) {
14357     AddToWorklist(Fused.getNode());
14358     return Fused;
14359   }
14360 
14361   return SDValue();
14362 }
14363 
14364 SDValue DAGCombiner::visitFMUL(SDNode *N) {
14365   SDValue N0 = N->getOperand(0);
14366   SDValue N1 = N->getOperand(1);
14367   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
14368   EVT VT = N->getValueType(0);
14369   SDLoc DL(N);
14370   const TargetOptions &Options = DAG.getTarget().Options;
14371   const SDNodeFlags Flags = N->getFlags();
14372   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14373 
14374   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14375     return R;
14376 
14377   // fold (fmul c1, c2) -> c1*c2
14378   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
14379     return C;
14380 
14381   // canonicalize constant to RHS
14382   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14383      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14384     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
14385 
14386   // fold vector ops
14387   if (VT.isVector())
14388     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14389       return FoldedVOp;
14390 
14391   if (SDValue NewSel = foldBinOpIntoSelect(N))
14392     return NewSel;
14393 
14394   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
14395     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
14396     if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14397         N0.getOpcode() == ISD::FMUL) {
14398       SDValue N00 = N0.getOperand(0);
14399       SDValue N01 = N0.getOperand(1);
14400       // Avoid an infinite loop by making sure that N00 is not a constant
14401       // (the inner multiply has not been constant folded yet).
14402       if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
14403           !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
14404         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
14405         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
14406       }
14407     }
14408 
14409     // Match a special-case: we convert X * 2.0 into fadd.
14410     // fmul (fadd X, X), C -> fmul X, 2.0 * C
14411     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
14412         N0.getOperand(0) == N0.getOperand(1)) {
14413       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
14414       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
14415       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
14416     }
14417   }
14418 
14419   // fold (fmul X, 2.0) -> (fadd X, X)
14420   if (N1CFP && N1CFP->isExactlyValue(+2.0))
14421     return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
14422 
14423   // fold (fmul X, -1.0) -> (fsub -0.0, X)
14424   if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
14425     if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
14426       return DAG.getNode(ISD::FSUB, DL, VT,
14427                          DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
14428     }
14429   }
14430 
14431   // -N0 * -N1 --> N0 * N1
14432   TargetLowering::NegatibleCost CostN0 =
14433       TargetLowering::NegatibleCost::Expensive;
14434   TargetLowering::NegatibleCost CostN1 =
14435       TargetLowering::NegatibleCost::Expensive;
14436   SDValue NegN0 =
14437       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14438   SDValue NegN1 =
14439       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14440   if (NegN0 && NegN1 &&
14441       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14442        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14443     return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
14444 
14445   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
14446   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
14447   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
14448       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
14449       TLI.isOperationLegal(ISD::FABS, VT)) {
14450     SDValue Select = N0, X = N1;
14451     if (Select.getOpcode() != ISD::SELECT)
14452       std::swap(Select, X);
14453 
14454     SDValue Cond = Select.getOperand(0);
14455     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
14456     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
14457 
14458     if (TrueOpnd && FalseOpnd &&
14459         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
14460         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
14461         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
14462       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14463       switch (CC) {
14464       default: break;
14465       case ISD::SETOLT:
14466       case ISD::SETULT:
14467       case ISD::SETOLE:
14468       case ISD::SETULE:
14469       case ISD::SETLT:
14470       case ISD::SETLE:
14471         std::swap(TrueOpnd, FalseOpnd);
14472         LLVM_FALLTHROUGH;
14473       case ISD::SETOGT:
14474       case ISD::SETUGT:
14475       case ISD::SETOGE:
14476       case ISD::SETUGE:
14477       case ISD::SETGT:
14478       case ISD::SETGE:
14479         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
14480             TLI.isOperationLegal(ISD::FNEG, VT))
14481           return DAG.getNode(ISD::FNEG, DL, VT,
14482                    DAG.getNode(ISD::FABS, DL, VT, X));
14483         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
14484           return DAG.getNode(ISD::FABS, DL, VT, X);
14485 
14486         break;
14487       }
14488     }
14489   }
14490 
14491   // FMUL -> FMA combines:
14492   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
14493     AddToWorklist(Fused.getNode());
14494     return Fused;
14495   }
14496 
14497   return SDValue();
14498 }
14499 
14500 SDValue DAGCombiner::visitFMA(SDNode *N) {
14501   SDValue N0 = N->getOperand(0);
14502   SDValue N1 = N->getOperand(1);
14503   SDValue N2 = N->getOperand(2);
14504   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14505   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14506   EVT VT = N->getValueType(0);
14507   SDLoc DL(N);
14508   const TargetOptions &Options = DAG.getTarget().Options;
14509   // FMA nodes have flags that propagate to the created nodes.
14510   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14511 
14512   bool UnsafeFPMath =
14513       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14514 
14515   // Constant fold FMA.
14516   if (isa<ConstantFPSDNode>(N0) &&
14517       isa<ConstantFPSDNode>(N1) &&
14518       isa<ConstantFPSDNode>(N2)) {
14519     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
14520   }
14521 
14522   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
14523   TargetLowering::NegatibleCost CostN0 =
14524       TargetLowering::NegatibleCost::Expensive;
14525   TargetLowering::NegatibleCost CostN1 =
14526       TargetLowering::NegatibleCost::Expensive;
14527   SDValue NegN0 =
14528       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14529   SDValue NegN1 =
14530       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14531   if (NegN0 && NegN1 &&
14532       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14533        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14534     return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
14535 
14536   if (UnsafeFPMath) {
14537     if (N0CFP && N0CFP->isZero())
14538       return N2;
14539     if (N1CFP && N1CFP->isZero())
14540       return N2;
14541   }
14542 
14543   if (N0CFP && N0CFP->isExactlyValue(1.0))
14544     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
14545   if (N1CFP && N1CFP->isExactlyValue(1.0))
14546     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
14547 
14548   // Canonicalize (fma c, x, y) -> (fma x, c, y)
14549   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14550      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14551     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
14552 
14553   if (UnsafeFPMath) {
14554     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
14555     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
14556         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14557         DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
14558       return DAG.getNode(ISD::FMUL, DL, VT, N0,
14559                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
14560     }
14561 
14562     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
14563     if (N0.getOpcode() == ISD::FMUL &&
14564         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14565         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14566       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14567                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
14568                          N2);
14569     }
14570   }
14571 
14572   // (fma x, -1, y) -> (fadd (fneg x), y)
14573   if (N1CFP) {
14574     if (N1CFP->isExactlyValue(1.0))
14575       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
14576 
14577     if (N1CFP->isExactlyValue(-1.0) &&
14578         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
14579       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
14580       AddToWorklist(RHSNeg.getNode());
14581       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
14582     }
14583 
14584     // fma (fneg x), K, y -> fma x -K, y
14585     if (N0.getOpcode() == ISD::FNEG &&
14586         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14587          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
14588                                               ForCodeSize)))) {
14589       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14590                          DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
14591     }
14592   }
14593 
14594   if (UnsafeFPMath) {
14595     // (fma x, c, x) -> (fmul x, (c+1))
14596     if (N1CFP && N0 == N2) {
14597       return DAG.getNode(
14598           ISD::FMUL, DL, VT, N0,
14599           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
14600     }
14601 
14602     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
14603     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
14604       return DAG.getNode(
14605           ISD::FMUL, DL, VT, N0,
14606           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
14607     }
14608   }
14609 
14610   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
14611   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
14612   if (!TLI.isFNegFree(VT))
14613     if (SDValue Neg = TLI.getCheaperNegatedExpression(
14614             SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
14615       return DAG.getNode(ISD::FNEG, DL, VT, Neg);
14616   return SDValue();
14617 }
14618 
14619 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14620 // reciprocal.
14621 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
14622 // Notice that this is not always beneficial. One reason is different targets
14623 // may have different costs for FDIV and FMUL, so sometimes the cost of two
14624 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
14625 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
14626 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
14627   // TODO: Limit this transform based on optsize/minsize - it always creates at
14628   //       least 1 extra instruction. But the perf win may be substantial enough
14629   //       that only minsize should restrict this.
14630   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
14631   const SDNodeFlags Flags = N->getFlags();
14632   if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
14633     return SDValue();
14634 
14635   // Skip if current node is a reciprocal/fneg-reciprocal.
14636   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
14637   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
14638   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
14639     return SDValue();
14640 
14641   // Exit early if the target does not want this transform or if there can't
14642   // possibly be enough uses of the divisor to make the transform worthwhile.
14643   unsigned MinUses = TLI.combineRepeatedFPDivisors();
14644 
14645   // For splat vectors, scale the number of uses by the splat factor. If we can
14646   // convert the division into a scalar op, that will likely be much faster.
14647   unsigned NumElts = 1;
14648   EVT VT = N->getValueType(0);
14649   if (VT.isVector() && DAG.isSplatValue(N1))
14650     NumElts = VT.getVectorMinNumElements();
14651 
14652   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
14653     return SDValue();
14654 
14655   // Find all FDIV users of the same divisor.
14656   // Use a set because duplicates may be present in the user list.
14657   SetVector<SDNode *> Users;
14658   for (auto *U : N1->uses()) {
14659     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
14660       // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
14661       if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
14662           U->getOperand(0) == U->getOperand(1).getOperand(0) &&
14663           U->getFlags().hasAllowReassociation() &&
14664           U->getFlags().hasNoSignedZeros())
14665         continue;
14666 
14667       // This division is eligible for optimization only if global unsafe math
14668       // is enabled or if this division allows reciprocal formation.
14669       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
14670         Users.insert(U);
14671     }
14672   }
14673 
14674   // Now that we have the actual number of divisor uses, make sure it meets
14675   // the minimum threshold specified by the target.
14676   if ((Users.size() * NumElts) < MinUses)
14677     return SDValue();
14678 
14679   SDLoc DL(N);
14680   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
14681   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
14682 
14683   // Dividend / Divisor -> Dividend * Reciprocal
14684   for (auto *U : Users) {
14685     SDValue Dividend = U->getOperand(0);
14686     if (Dividend != FPOne) {
14687       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
14688                                     Reciprocal, Flags);
14689       CombineTo(U, NewNode);
14690     } else if (U != Reciprocal.getNode()) {
14691       // In the absence of fast-math-flags, this user node is always the
14692       // same node as Reciprocal, but with FMF they may be different nodes.
14693       CombineTo(U, Reciprocal);
14694     }
14695   }
14696   return SDValue(N, 0);  // N was replaced.
14697 }
14698 
14699 SDValue DAGCombiner::visitFDIV(SDNode *N) {
14700   SDValue N0 = N->getOperand(0);
14701   SDValue N1 = N->getOperand(1);
14702   EVT VT = N->getValueType(0);
14703   SDLoc DL(N);
14704   const TargetOptions &Options = DAG.getTarget().Options;
14705   SDNodeFlags Flags = N->getFlags();
14706   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14707 
14708   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14709     return R;
14710 
14711   // fold (fdiv c1, c2) -> c1/c2
14712   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
14713     return C;
14714 
14715   // fold vector ops
14716   if (VT.isVector())
14717     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14718       return FoldedVOp;
14719 
14720   if (SDValue NewSel = foldBinOpIntoSelect(N))
14721     return NewSel;
14722 
14723   if (SDValue V = combineRepeatedFPDivisors(N))
14724     return V;
14725 
14726   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
14727     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
14728     if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
14729       // Compute the reciprocal 1.0 / c2.
14730       const APFloat &N1APF = N1CFP->getValueAPF();
14731       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
14732       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
14733       // Only do the transform if the reciprocal is a legal fp immediate that
14734       // isn't too nasty (eg NaN, denormal, ...).
14735       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
14736           (!LegalOperations ||
14737            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
14738            // backend)... we should handle this gracefully after Legalize.
14739            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
14740            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14741            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
14742         return DAG.getNode(ISD::FMUL, DL, VT, N0,
14743                            DAG.getConstantFP(Recip, DL, VT));
14744     }
14745 
14746     // If this FDIV is part of a reciprocal square root, it may be folded
14747     // into a target-specific square root estimate instruction.
14748     if (N1.getOpcode() == ISD::FSQRT) {
14749       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
14750         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14751     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
14752                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14753       if (SDValue RV =
14754               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14755         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
14756         AddToWorklist(RV.getNode());
14757         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14758       }
14759     } else if (N1.getOpcode() == ISD::FP_ROUND &&
14760                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14761       if (SDValue RV =
14762               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14763         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
14764         AddToWorklist(RV.getNode());
14765         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14766       }
14767     } else if (N1.getOpcode() == ISD::FMUL) {
14768       // Look through an FMUL. Even though this won't remove the FDIV directly,
14769       // it's still worthwhile to get rid of the FSQRT if possible.
14770       SDValue Sqrt, Y;
14771       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14772         Sqrt = N1.getOperand(0);
14773         Y = N1.getOperand(1);
14774       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
14775         Sqrt = N1.getOperand(1);
14776         Y = N1.getOperand(0);
14777       }
14778       if (Sqrt.getNode()) {
14779         // If the other multiply operand is known positive, pull it into the
14780         // sqrt. That will eliminate the division if we convert to an estimate.
14781         if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
14782             N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
14783           SDValue A;
14784           if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
14785             A = Y.getOperand(0);
14786           else if (Y == Sqrt.getOperand(0))
14787             A = Y;
14788           if (A) {
14789             // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
14790             // X / (A * sqrt(A))       --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
14791             SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
14792             SDValue AAZ =
14793                 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
14794             if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
14795               return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
14796 
14797             // Estimate creation failed. Clean up speculatively created nodes.
14798             recursivelyDeleteUnusedNodes(AAZ.getNode());
14799           }
14800         }
14801 
14802         // We found a FSQRT, so try to make this fold:
14803         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
14804         if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
14805           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
14806           AddToWorklist(Div.getNode());
14807           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
14808         }
14809       }
14810     }
14811 
14812     // Fold into a reciprocal estimate and multiply instead of a real divide.
14813     if (Options.NoInfsFPMath || Flags.hasNoInfs())
14814       if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
14815         return RV;
14816   }
14817 
14818   // Fold X/Sqrt(X) -> Sqrt(X)
14819   if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
14820       (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
14821     if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
14822       return N1;
14823 
14824   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
14825   TargetLowering::NegatibleCost CostN0 =
14826       TargetLowering::NegatibleCost::Expensive;
14827   TargetLowering::NegatibleCost CostN1 =
14828       TargetLowering::NegatibleCost::Expensive;
14829   SDValue NegN0 =
14830       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14831   SDValue NegN1 =
14832       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14833   if (NegN0 && NegN1 &&
14834       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14835        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14836     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
14837 
14838   return SDValue();
14839 }
14840 
14841 SDValue DAGCombiner::visitFREM(SDNode *N) {
14842   SDValue N0 = N->getOperand(0);
14843   SDValue N1 = N->getOperand(1);
14844   EVT VT = N->getValueType(0);
14845   SDNodeFlags Flags = N->getFlags();
14846   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14847 
14848   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14849     return R;
14850 
14851   // fold (frem c1, c2) -> fmod(c1,c2)
14852   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
14853     return C;
14854 
14855   if (SDValue NewSel = foldBinOpIntoSelect(N))
14856     return NewSel;
14857 
14858   return SDValue();
14859 }
14860 
14861 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
14862   SDNodeFlags Flags = N->getFlags();
14863   const TargetOptions &Options = DAG.getTarget().Options;
14864 
14865   // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
14866   // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
14867   if (!Flags.hasApproximateFuncs() ||
14868       (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
14869     return SDValue();
14870 
14871   SDValue N0 = N->getOperand(0);
14872   if (TLI.isFsqrtCheap(N0, DAG))
14873     return SDValue();
14874 
14875   // FSQRT nodes have flags that propagate to the created nodes.
14876   // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
14877   //       transform the fdiv, we may produce a sub-optimal estimate sequence
14878   //       because the reciprocal calculation may not have to filter out a
14879   //       0.0 input.
14880   return buildSqrtEstimate(N0, Flags);
14881 }
14882 
14883 /// copysign(x, fp_extend(y)) -> copysign(x, y)
14884 /// copysign(x, fp_round(y)) -> copysign(x, y)
14885 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
14886   SDValue N1 = N->getOperand(1);
14887   if ((N1.getOpcode() == ISD::FP_EXTEND ||
14888        N1.getOpcode() == ISD::FP_ROUND)) {
14889     EVT N1VT = N1->getValueType(0);
14890     EVT N1Op0VT = N1->getOperand(0).getValueType();
14891 
14892     // Always fold no-op FP casts.
14893     if (N1VT == N1Op0VT)
14894       return true;
14895 
14896     // Do not optimize out type conversion of f128 type yet.
14897     // For some targets like x86_64, configuration is changed to keep one f128
14898     // value in one SSE register, but instruction selection cannot handle
14899     // FCOPYSIGN on SSE registers yet.
14900     if (N1Op0VT == MVT::f128)
14901       return false;
14902 
14903     // Avoid mismatched vector operand types, for better instruction selection.
14904     if (N1Op0VT.isVector())
14905       return false;
14906 
14907     return true;
14908   }
14909   return false;
14910 }
14911 
14912 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
14913   SDValue N0 = N->getOperand(0);
14914   SDValue N1 = N->getOperand(1);
14915   EVT VT = N->getValueType(0);
14916 
14917   // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
14918   if (SDValue C =
14919           DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
14920     return C;
14921 
14922   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
14923     const APFloat &V = N1C->getValueAPF();
14924     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
14925     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
14926     if (!V.isNegative()) {
14927       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
14928         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14929     } else {
14930       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14931         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
14932                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
14933     }
14934   }
14935 
14936   // copysign(fabs(x), y) -> copysign(x, y)
14937   // copysign(fneg(x), y) -> copysign(x, y)
14938   // copysign(copysign(x,z), y) -> copysign(x, y)
14939   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
14940       N0.getOpcode() == ISD::FCOPYSIGN)
14941     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
14942 
14943   // copysign(x, abs(y)) -> abs(x)
14944   if (N1.getOpcode() == ISD::FABS)
14945     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14946 
14947   // copysign(x, copysign(y,z)) -> copysign(x, z)
14948   if (N1.getOpcode() == ISD::FCOPYSIGN)
14949     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
14950 
14951   // copysign(x, fp_extend(y)) -> copysign(x, y)
14952   // copysign(x, fp_round(y)) -> copysign(x, y)
14953   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
14954     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
14955 
14956   return SDValue();
14957 }
14958 
14959 SDValue DAGCombiner::visitFPOW(SDNode *N) {
14960   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
14961   if (!ExponentC)
14962     return SDValue();
14963   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14964 
14965   // Try to convert x ** (1/3) into cube root.
14966   // TODO: Handle the various flavors of long double.
14967   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
14968   //       Some range near 1/3 should be fine.
14969   EVT VT = N->getValueType(0);
14970   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
14971       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
14972     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
14973     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
14974     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
14975     // For regular numbers, rounding may cause the results to differ.
14976     // Therefore, we require { nsz ninf nnan afn } for this transform.
14977     // TODO: We could select out the special cases if we don't have nsz/ninf.
14978     SDNodeFlags Flags = N->getFlags();
14979     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
14980         !Flags.hasApproximateFuncs())
14981       return SDValue();
14982 
14983     // Do not create a cbrt() libcall if the target does not have it, and do not
14984     // turn a pow that has lowering support into a cbrt() libcall.
14985     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
14986         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
14987          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
14988       return SDValue();
14989 
14990     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
14991   }
14992 
14993   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
14994   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
14995   // TODO: This could be extended (using a target hook) to handle smaller
14996   // power-of-2 fractional exponents.
14997   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
14998   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
14999   if (ExponentIs025 || ExponentIs075) {
15000     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
15001     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
15002     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
15003     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
15004     // For regular numbers, rounding may cause the results to differ.
15005     // Therefore, we require { nsz ninf afn } for this transform.
15006     // TODO: We could select out the special cases if we don't have nsz/ninf.
15007     SDNodeFlags Flags = N->getFlags();
15008 
15009     // We only need no signed zeros for the 0.25 case.
15010     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
15011         !Flags.hasApproximateFuncs())
15012       return SDValue();
15013 
15014     // Don't double the number of libcalls. We are trying to inline fast code.
15015     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
15016       return SDValue();
15017 
15018     // Assume that libcalls are the smallest code.
15019     // TODO: This restriction should probably be lifted for vectors.
15020     if (ForCodeSize)
15021       return SDValue();
15022 
15023     // pow(X, 0.25) --> sqrt(sqrt(X))
15024     SDLoc DL(N);
15025     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
15026     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
15027     if (ExponentIs025)
15028       return SqrtSqrt;
15029     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
15030     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
15031   }
15032 
15033   return SDValue();
15034 }
15035 
15036 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
15037                                const TargetLowering &TLI) {
15038   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
15039   // replacing casts with a libcall. We also must be allowed to ignore -0.0
15040   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
15041   // conversions would return +0.0.
15042   // FIXME: We should be able to use node-level FMF here.
15043   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
15044   EVT VT = N->getValueType(0);
15045   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
15046       !DAG.getTarget().Options.NoSignedZerosFPMath)
15047     return SDValue();
15048 
15049   // fptosi/fptoui round towards zero, so converting from FP to integer and
15050   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
15051   SDValue N0 = N->getOperand(0);
15052   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
15053       N0.getOperand(0).getValueType() == VT)
15054     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
15055 
15056   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
15057       N0.getOperand(0).getValueType() == VT)
15058     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
15059 
15060   return SDValue();
15061 }
15062 
15063 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
15064   SDValue N0 = N->getOperand(0);
15065   EVT VT = N->getValueType(0);
15066   EVT OpVT = N0.getValueType();
15067 
15068   // [us]itofp(undef) = 0, because the result value is bounded.
15069   if (N0.isUndef())
15070     return DAG.getConstantFP(0.0, SDLoc(N), VT);
15071 
15072   // fold (sint_to_fp c1) -> c1fp
15073   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
15074       // ...but only if the target supports immediate floating-point values
15075       (!LegalOperations ||
15076        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
15077     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
15078 
15079   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
15080   // but UINT_TO_FP is legal on this target, try to convert.
15081   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
15082       hasOperation(ISD::UINT_TO_FP, OpVT)) {
15083     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
15084     if (DAG.SignBitIsZero(N0))
15085       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
15086   }
15087 
15088   // The next optimizations are desirable only if SELECT_CC can be lowered.
15089   // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
15090   if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
15091       !VT.isVector() &&
15092       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15093     SDLoc DL(N);
15094     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
15095                          DAG.getConstantFP(0.0, DL, VT));
15096   }
15097 
15098   // fold (sint_to_fp (zext (setcc x, y, cc))) ->
15099   //      (select (setcc x, y, cc), 1.0, 0.0)
15100   if (N0.getOpcode() == ISD::ZERO_EXTEND &&
15101       N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
15102       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15103     SDLoc DL(N);
15104     return DAG.getSelect(DL, VT, N0.getOperand(0),
15105                          DAG.getConstantFP(1.0, DL, VT),
15106                          DAG.getConstantFP(0.0, DL, VT));
15107   }
15108 
15109   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
15110     return FTrunc;
15111 
15112   return SDValue();
15113 }
15114 
15115 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
15116   SDValue N0 = N->getOperand(0);
15117   EVT VT = N->getValueType(0);
15118   EVT OpVT = N0.getValueType();
15119 
15120   // [us]itofp(undef) = 0, because the result value is bounded.
15121   if (N0.isUndef())
15122     return DAG.getConstantFP(0.0, SDLoc(N), VT);
15123 
15124   // fold (uint_to_fp c1) -> c1fp
15125   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
15126       // ...but only if the target supports immediate floating-point values
15127       (!LegalOperations ||
15128        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
15129     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
15130 
15131   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
15132   // but SINT_TO_FP is legal on this target, try to convert.
15133   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
15134       hasOperation(ISD::SINT_TO_FP, OpVT)) {
15135     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
15136     if (DAG.SignBitIsZero(N0))
15137       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
15138   }
15139 
15140   // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
15141   if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
15142       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15143     SDLoc DL(N);
15144     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
15145                          DAG.getConstantFP(0.0, DL, VT));
15146   }
15147 
15148   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
15149     return FTrunc;
15150 
15151   return SDValue();
15152 }
15153 
15154 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
15155 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
15156   SDValue N0 = N->getOperand(0);
15157   EVT VT = N->getValueType(0);
15158 
15159   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
15160     return SDValue();
15161 
15162   SDValue Src = N0.getOperand(0);
15163   EVT SrcVT = Src.getValueType();
15164   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
15165   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
15166 
15167   // We can safely assume the conversion won't overflow the output range,
15168   // because (for example) (uint8_t)18293.f is undefined behavior.
15169 
15170   // Since we can assume the conversion won't overflow, our decision as to
15171   // whether the input will fit in the float should depend on the minimum
15172   // of the input range and output range.
15173 
15174   // This means this is also safe for a signed input and unsigned output, since
15175   // a negative input would lead to undefined behavior.
15176   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
15177   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
15178   unsigned ActualSize = std::min(InputSize, OutputSize);
15179   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
15180 
15181   // We can only fold away the float conversion if the input range can be
15182   // represented exactly in the float range.
15183   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
15184     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
15185       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
15186                                                        : ISD::ZERO_EXTEND;
15187       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
15188     }
15189     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
15190       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
15191     return DAG.getBitcast(VT, Src);
15192   }
15193   return SDValue();
15194 }
15195 
15196 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
15197   SDValue N0 = N->getOperand(0);
15198   EVT VT = N->getValueType(0);
15199 
15200   // fold (fp_to_sint undef) -> undef
15201   if (N0.isUndef())
15202     return DAG.getUNDEF(VT);
15203 
15204   // fold (fp_to_sint c1fp) -> c1
15205   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15206     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
15207 
15208   return FoldIntToFPToInt(N, DAG);
15209 }
15210 
15211 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
15212   SDValue N0 = N->getOperand(0);
15213   EVT VT = N->getValueType(0);
15214 
15215   // fold (fp_to_uint undef) -> undef
15216   if (N0.isUndef())
15217     return DAG.getUNDEF(VT);
15218 
15219   // fold (fp_to_uint c1fp) -> c1
15220   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15221     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
15222 
15223   return FoldIntToFPToInt(N, DAG);
15224 }
15225 
15226 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
15227   SDValue N0 = N->getOperand(0);
15228   SDValue N1 = N->getOperand(1);
15229   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
15230   EVT VT = N->getValueType(0);
15231 
15232   // fold (fp_round c1fp) -> c1fp
15233   if (N0CFP)
15234     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
15235 
15236   // fold (fp_round (fp_extend x)) -> x
15237   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
15238     return N0.getOperand(0);
15239 
15240   // fold (fp_round (fp_round x)) -> (fp_round x)
15241   if (N0.getOpcode() == ISD::FP_ROUND) {
15242     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
15243     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
15244 
15245     // Skip this folding if it results in an fp_round from f80 to f16.
15246     //
15247     // f80 to f16 always generates an expensive (and as yet, unimplemented)
15248     // libcall to __truncxfhf2 instead of selecting native f16 conversion
15249     // instructions from f32 or f64.  Moreover, the first (value-preserving)
15250     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
15251     // x86.
15252     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
15253       return SDValue();
15254 
15255     // If the first fp_round isn't a value preserving truncation, it might
15256     // introduce a tie in the second fp_round, that wouldn't occur in the
15257     // single-step fp_round we want to fold to.
15258     // In other words, double rounding isn't the same as rounding.
15259     // Also, this is a value preserving truncation iff both fp_round's are.
15260     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
15261       SDLoc DL(N);
15262       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
15263                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
15264     }
15265   }
15266 
15267   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
15268   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
15269     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
15270                               N0.getOperand(0), N1);
15271     AddToWorklist(Tmp.getNode());
15272     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
15273                        Tmp, N0.getOperand(1));
15274   }
15275 
15276   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15277     return NewVSel;
15278 
15279   return SDValue();
15280 }
15281 
15282 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
15283   SDValue N0 = N->getOperand(0);
15284   EVT VT = N->getValueType(0);
15285 
15286   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
15287   if (N->hasOneUse() &&
15288       N->use_begin()->getOpcode() == ISD::FP_ROUND)
15289     return SDValue();
15290 
15291   // fold (fp_extend c1fp) -> c1fp
15292   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15293     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
15294 
15295   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
15296   if (N0.getOpcode() == ISD::FP16_TO_FP &&
15297       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
15298     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
15299 
15300   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
15301   // value of X.
15302   if (N0.getOpcode() == ISD::FP_ROUND
15303       && N0.getConstantOperandVal(1) == 1) {
15304     SDValue In = N0.getOperand(0);
15305     if (In.getValueType() == VT) return In;
15306     if (VT.bitsLT(In.getValueType()))
15307       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
15308                          In, N0.getOperand(1));
15309     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
15310   }
15311 
15312   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
15313   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15314       TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
15315     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15316     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
15317                                      LN0->getChain(),
15318                                      LN0->getBasePtr(), N0.getValueType(),
15319                                      LN0->getMemOperand());
15320     CombineTo(N, ExtLoad);
15321     CombineTo(N0.getNode(),
15322               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
15323                           N0.getValueType(), ExtLoad,
15324                           DAG.getIntPtrConstant(1, SDLoc(N0))),
15325               ExtLoad.getValue(1));
15326     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
15327   }
15328 
15329   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15330     return NewVSel;
15331 
15332   return SDValue();
15333 }
15334 
15335 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
15336   SDValue N0 = N->getOperand(0);
15337   EVT VT = N->getValueType(0);
15338 
15339   // fold (fceil c1) -> fceil(c1)
15340   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15341     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
15342 
15343   return SDValue();
15344 }
15345 
15346 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
15347   SDValue N0 = N->getOperand(0);
15348   EVT VT = N->getValueType(0);
15349 
15350   // fold (ftrunc c1) -> ftrunc(c1)
15351   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15352     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
15353 
15354   // fold ftrunc (known rounded int x) -> x
15355   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
15356   // likely to be generated to extract integer from a rounded floating value.
15357   switch (N0.getOpcode()) {
15358   default: break;
15359   case ISD::FRINT:
15360   case ISD::FTRUNC:
15361   case ISD::FNEARBYINT:
15362   case ISD::FFLOOR:
15363   case ISD::FCEIL:
15364     return N0;
15365   }
15366 
15367   return SDValue();
15368 }
15369 
15370 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
15371   SDValue N0 = N->getOperand(0);
15372   EVT VT = N->getValueType(0);
15373 
15374   // fold (ffloor c1) -> ffloor(c1)
15375   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15376     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
15377 
15378   return SDValue();
15379 }
15380 
15381 SDValue DAGCombiner::visitFNEG(SDNode *N) {
15382   SDValue N0 = N->getOperand(0);
15383   EVT VT = N->getValueType(0);
15384   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15385 
15386   // Constant fold FNEG.
15387   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15388     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
15389 
15390   if (SDValue NegN0 =
15391           TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
15392     return NegN0;
15393 
15394   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
15395   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
15396   // know it was called from a context with a nsz flag if the input fsub does
15397   // not.
15398   if (N0.getOpcode() == ISD::FSUB &&
15399       (DAG.getTarget().Options.NoSignedZerosFPMath ||
15400        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
15401     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
15402                        N0.getOperand(0));
15403   }
15404 
15405   if (SDValue Cast = foldSignChangeInBitcast(N))
15406     return Cast;
15407 
15408   return SDValue();
15409 }
15410 
15411 SDValue DAGCombiner::visitFMinMax(SDNode *N) {
15412   SDValue N0 = N->getOperand(0);
15413   SDValue N1 = N->getOperand(1);
15414   EVT VT = N->getValueType(0);
15415   const SDNodeFlags Flags = N->getFlags();
15416   unsigned Opc = N->getOpcode();
15417   bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
15418   bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
15419   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15420 
15421   // Constant fold.
15422   if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
15423     return C;
15424 
15425   // Canonicalize to constant on RHS.
15426   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
15427       !DAG.isConstantFPBuildVectorOrConstantFP(N1))
15428     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
15429 
15430   if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
15431     const APFloat &AF = N1CFP->getValueAPF();
15432 
15433     // minnum(X, nan) -> X
15434     // maxnum(X, nan) -> X
15435     // minimum(X, nan) -> nan
15436     // maximum(X, nan) -> nan
15437     if (AF.isNaN())
15438       return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
15439 
15440     // In the following folds, inf can be replaced with the largest finite
15441     // float, if the ninf flag is set.
15442     if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
15443       // minnum(X, -inf) -> -inf
15444       // maxnum(X, +inf) -> +inf
15445       // minimum(X, -inf) -> -inf if nnan
15446       // maximum(X, +inf) -> +inf if nnan
15447       if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
15448         return N->getOperand(1);
15449 
15450       // minnum(X, +inf) -> X if nnan
15451       // maxnum(X, -inf) -> X if nnan
15452       // minimum(X, +inf) -> X
15453       // maximum(X, -inf) -> X
15454       if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
15455         return N->getOperand(0);
15456     }
15457   }
15458 
15459   return SDValue();
15460 }
15461 
15462 SDValue DAGCombiner::visitFABS(SDNode *N) {
15463   SDValue N0 = N->getOperand(0);
15464   EVT VT = N->getValueType(0);
15465 
15466   // fold (fabs c1) -> fabs(c1)
15467   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15468     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15469 
15470   // fold (fabs (fabs x)) -> (fabs x)
15471   if (N0.getOpcode() == ISD::FABS)
15472     return N->getOperand(0);
15473 
15474   // fold (fabs (fneg x)) -> (fabs x)
15475   // fold (fabs (fcopysign x, y)) -> (fabs x)
15476   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
15477     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
15478 
15479   if (SDValue Cast = foldSignChangeInBitcast(N))
15480     return Cast;
15481 
15482   return SDValue();
15483 }
15484 
15485 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
15486   SDValue Chain = N->getOperand(0);
15487   SDValue N1 = N->getOperand(1);
15488   SDValue N2 = N->getOperand(2);
15489 
15490   // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
15491   // nondeterministic jumps).
15492   if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
15493     return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
15494                        N1->getOperand(0), N2);
15495   }
15496 
15497   // If N is a constant we could fold this into a fallthrough or unconditional
15498   // branch. However that doesn't happen very often in normal code, because
15499   // Instcombine/SimplifyCFG should have handled the available opportunities.
15500   // If we did this folding here, it would be necessary to update the
15501   // MachineBasicBlock CFG, which is awkward.
15502 
15503   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
15504   // on the target.
15505   if (N1.getOpcode() == ISD::SETCC &&
15506       TLI.isOperationLegalOrCustom(ISD::BR_CC,
15507                                    N1.getOperand(0).getValueType())) {
15508     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15509                        Chain, N1.getOperand(2),
15510                        N1.getOperand(0), N1.getOperand(1), N2);
15511   }
15512 
15513   if (N1.hasOneUse()) {
15514     // rebuildSetCC calls visitXor which may change the Chain when there is a
15515     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
15516     HandleSDNode ChainHandle(Chain);
15517     if (SDValue NewN1 = rebuildSetCC(N1))
15518       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
15519                          ChainHandle.getValue(), NewN1, N2);
15520   }
15521 
15522   return SDValue();
15523 }
15524 
15525 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
15526   if (N.getOpcode() == ISD::SRL ||
15527       (N.getOpcode() == ISD::TRUNCATE &&
15528        (N.getOperand(0).hasOneUse() &&
15529         N.getOperand(0).getOpcode() == ISD::SRL))) {
15530     // Look pass the truncate.
15531     if (N.getOpcode() == ISD::TRUNCATE)
15532       N = N.getOperand(0);
15533 
15534     // Match this pattern so that we can generate simpler code:
15535     //
15536     //   %a = ...
15537     //   %b = and i32 %a, 2
15538     //   %c = srl i32 %b, 1
15539     //   brcond i32 %c ...
15540     //
15541     // into
15542     //
15543     //   %a = ...
15544     //   %b = and i32 %a, 2
15545     //   %c = setcc eq %b, 0
15546     //   brcond %c ...
15547     //
15548     // This applies only when the AND constant value has one bit set and the
15549     // SRL constant is equal to the log2 of the AND constant. The back-end is
15550     // smart enough to convert the result into a TEST/JMP sequence.
15551     SDValue Op0 = N.getOperand(0);
15552     SDValue Op1 = N.getOperand(1);
15553 
15554     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
15555       SDValue AndOp1 = Op0.getOperand(1);
15556 
15557       if (AndOp1.getOpcode() == ISD::Constant) {
15558         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
15559 
15560         if (AndConst.isPowerOf2() &&
15561             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
15562           SDLoc DL(N);
15563           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
15564                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
15565                               ISD::SETNE);
15566         }
15567       }
15568     }
15569   }
15570 
15571   // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
15572   // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
15573   if (N.getOpcode() == ISD::XOR) {
15574     // Because we may call this on a speculatively constructed
15575     // SimplifiedSetCC Node, we need to simplify this node first.
15576     // Ideally this should be folded into SimplifySetCC and not
15577     // here. For now, grab a handle to N so we don't lose it from
15578     // replacements interal to the visit.
15579     HandleSDNode XORHandle(N);
15580     while (N.getOpcode() == ISD::XOR) {
15581       SDValue Tmp = visitXOR(N.getNode());
15582       // No simplification done.
15583       if (!Tmp.getNode())
15584         break;
15585       // Returning N is form in-visit replacement that may invalidated
15586       // N. Grab value from Handle.
15587       if (Tmp.getNode() == N.getNode())
15588         N = XORHandle.getValue();
15589       else // Node simplified. Try simplifying again.
15590         N = Tmp;
15591     }
15592 
15593     if (N.getOpcode() != ISD::XOR)
15594       return N;
15595 
15596     SDValue Op0 = N->getOperand(0);
15597     SDValue Op1 = N->getOperand(1);
15598 
15599     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
15600       bool Equal = false;
15601       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
15602       if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
15603           Op0.getValueType() == MVT::i1) {
15604         N = Op0;
15605         Op0 = N->getOperand(0);
15606         Op1 = N->getOperand(1);
15607         Equal = true;
15608       }
15609 
15610       EVT SetCCVT = N.getValueType();
15611       if (LegalTypes)
15612         SetCCVT = getSetCCResultType(SetCCVT);
15613       // Replace the uses of XOR with SETCC
15614       return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
15615                           Equal ? ISD::SETEQ : ISD::SETNE);
15616     }
15617   }
15618 
15619   return SDValue();
15620 }
15621 
15622 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
15623 //
15624 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
15625   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
15626   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
15627 
15628   // If N is a constant we could fold this into a fallthrough or unconditional
15629   // branch. However that doesn't happen very often in normal code, because
15630   // Instcombine/SimplifyCFG should have handled the available opportunities.
15631   // If we did this folding here, it would be necessary to update the
15632   // MachineBasicBlock CFG, which is awkward.
15633 
15634   // Use SimplifySetCC to simplify SETCC's.
15635   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
15636                                CondLHS, CondRHS, CC->get(), SDLoc(N),
15637                                false);
15638   if (Simp.getNode()) AddToWorklist(Simp.getNode());
15639 
15640   // fold to a simpler setcc
15641   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
15642     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15643                        N->getOperand(0), Simp.getOperand(2),
15644                        Simp.getOperand(0), Simp.getOperand(1),
15645                        N->getOperand(4));
15646 
15647   return SDValue();
15648 }
15649 
15650 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
15651                                      bool &IsLoad, bool &IsMasked, SDValue &Ptr,
15652                                      const TargetLowering &TLI) {
15653   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
15654     if (LD->isIndexed())
15655       return false;
15656     EVT VT = LD->getMemoryVT();
15657     if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
15658       return false;
15659     Ptr = LD->getBasePtr();
15660   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
15661     if (ST->isIndexed())
15662       return false;
15663     EVT VT = ST->getMemoryVT();
15664     if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
15665       return false;
15666     Ptr = ST->getBasePtr();
15667     IsLoad = false;
15668   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
15669     if (LD->isIndexed())
15670       return false;
15671     EVT VT = LD->getMemoryVT();
15672     if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
15673         !TLI.isIndexedMaskedLoadLegal(Dec, VT))
15674       return false;
15675     Ptr = LD->getBasePtr();
15676     IsMasked = true;
15677   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
15678     if (ST->isIndexed())
15679       return false;
15680     EVT VT = ST->getMemoryVT();
15681     if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
15682         !TLI.isIndexedMaskedStoreLegal(Dec, VT))
15683       return false;
15684     Ptr = ST->getBasePtr();
15685     IsLoad = false;
15686     IsMasked = true;
15687   } else {
15688     return false;
15689   }
15690   return true;
15691 }
15692 
15693 /// Try turning a load/store into a pre-indexed load/store when the base
15694 /// pointer is an add or subtract and it has other uses besides the load/store.
15695 /// After the transformation, the new indexed load/store has effectively folded
15696 /// the add/subtract in and all of its other uses are redirected to the
15697 /// new load/store.
15698 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
15699   if (Level < AfterLegalizeDAG)
15700     return false;
15701 
15702   bool IsLoad = true;
15703   bool IsMasked = false;
15704   SDValue Ptr;
15705   if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
15706                                 Ptr, TLI))
15707     return false;
15708 
15709   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
15710   // out.  There is no reason to make this a preinc/predec.
15711   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
15712       Ptr.getNode()->hasOneUse())
15713     return false;
15714 
15715   // Ask the target to do addressing mode selection.
15716   SDValue BasePtr;
15717   SDValue Offset;
15718   ISD::MemIndexedMode AM = ISD::UNINDEXED;
15719   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
15720     return false;
15721 
15722   // Backends without true r+i pre-indexed forms may need to pass a
15723   // constant base with a variable offset so that constant coercion
15724   // will work with the patterns in canonical form.
15725   bool Swapped = false;
15726   if (isa<ConstantSDNode>(BasePtr)) {
15727     std::swap(BasePtr, Offset);
15728     Swapped = true;
15729   }
15730 
15731   // Don't create a indexed load / store with zero offset.
15732   if (isNullConstant(Offset))
15733     return false;
15734 
15735   // Try turning it into a pre-indexed load / store except when:
15736   // 1) The new base ptr is a frame index.
15737   // 2) If N is a store and the new base ptr is either the same as or is a
15738   //    predecessor of the value being stored.
15739   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
15740   //    that would create a cycle.
15741   // 4) All uses are load / store ops that use it as old base ptr.
15742 
15743   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
15744   // (plus the implicit offset) to a register to preinc anyway.
15745   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15746     return false;
15747 
15748   // Check #2.
15749   if (!IsLoad) {
15750     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
15751                            : cast<StoreSDNode>(N)->getValue();
15752 
15753     // Would require a copy.
15754     if (Val == BasePtr)
15755       return false;
15756 
15757     // Would create a cycle.
15758     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
15759       return false;
15760   }
15761 
15762   // Caches for hasPredecessorHelper.
15763   SmallPtrSet<const SDNode *, 32> Visited;
15764   SmallVector<const SDNode *, 16> Worklist;
15765   Worklist.push_back(N);
15766 
15767   // If the offset is a constant, there may be other adds of constants that
15768   // can be folded with this one. We should do this to avoid having to keep
15769   // a copy of the original base pointer.
15770   SmallVector<SDNode *, 16> OtherUses;
15771   if (isa<ConstantSDNode>(Offset))
15772     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
15773                               UE = BasePtr.getNode()->use_end();
15774          UI != UE; ++UI) {
15775       SDUse &Use = UI.getUse();
15776       // Skip the use that is Ptr and uses of other results from BasePtr's
15777       // node (important for nodes that return multiple results).
15778       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
15779         continue;
15780 
15781       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
15782         continue;
15783 
15784       if (Use.getUser()->getOpcode() != ISD::ADD &&
15785           Use.getUser()->getOpcode() != ISD::SUB) {
15786         OtherUses.clear();
15787         break;
15788       }
15789 
15790       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
15791       if (!isa<ConstantSDNode>(Op1)) {
15792         OtherUses.clear();
15793         break;
15794       }
15795 
15796       // FIXME: In some cases, we can be smarter about this.
15797       if (Op1.getValueType() != Offset.getValueType()) {
15798         OtherUses.clear();
15799         break;
15800       }
15801 
15802       OtherUses.push_back(Use.getUser());
15803     }
15804 
15805   if (Swapped)
15806     std::swap(BasePtr, Offset);
15807 
15808   // Now check for #3 and #4.
15809   bool RealUse = false;
15810 
15811   for (SDNode *Use : Ptr.getNode()->uses()) {
15812     if (Use == N)
15813       continue;
15814     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
15815       return false;
15816 
15817     // If Ptr may be folded in addressing mode of other use, then it's
15818     // not profitable to do this transformation.
15819     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
15820       RealUse = true;
15821   }
15822 
15823   if (!RealUse)
15824     return false;
15825 
15826   SDValue Result;
15827   if (!IsMasked) {
15828     if (IsLoad)
15829       Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15830     else
15831       Result =
15832           DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15833   } else {
15834     if (IsLoad)
15835       Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15836                                         Offset, AM);
15837     else
15838       Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
15839                                          Offset, AM);
15840   }
15841   ++PreIndexedNodes;
15842   ++NodesCombined;
15843   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
15844              Result.getNode()->dump(&DAG); dbgs() << '\n');
15845   WorklistRemover DeadNodes(*this);
15846   if (IsLoad) {
15847     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15848     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15849   } else {
15850     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15851   }
15852 
15853   // Finally, since the node is now dead, remove it from the graph.
15854   deleteAndRecombine(N);
15855 
15856   if (Swapped)
15857     std::swap(BasePtr, Offset);
15858 
15859   // Replace other uses of BasePtr that can be updated to use Ptr
15860   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
15861     unsigned OffsetIdx = 1;
15862     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
15863       OffsetIdx = 0;
15864     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
15865            BasePtr.getNode() && "Expected BasePtr operand");
15866 
15867     // We need to replace ptr0 in the following expression:
15868     //   x0 * offset0 + y0 * ptr0 = t0
15869     // knowing that
15870     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
15871     //
15872     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
15873     // indexed load/store and the expression that needs to be re-written.
15874     //
15875     // Therefore, we have:
15876     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
15877 
15878     auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
15879     const APInt &Offset0 = CN->getAPIntValue();
15880     const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
15881     int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
15882     int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
15883     int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
15884     int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
15885 
15886     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
15887 
15888     APInt CNV = Offset0;
15889     if (X0 < 0) CNV = -CNV;
15890     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
15891     else CNV = CNV - Offset1;
15892 
15893     SDLoc DL(OtherUses[i]);
15894 
15895     // We can now generate the new expression.
15896     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
15897     SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
15898 
15899     SDValue NewUse = DAG.getNode(Opcode,
15900                                  DL,
15901                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
15902     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
15903     deleteAndRecombine(OtherUses[i]);
15904   }
15905 
15906   // Replace the uses of Ptr with uses of the updated base value.
15907   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
15908   deleteAndRecombine(Ptr.getNode());
15909   AddToWorklist(Result.getNode());
15910 
15911   return true;
15912 }
15913 
15914 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
15915                                    SDValue &BasePtr, SDValue &Offset,
15916                                    ISD::MemIndexedMode &AM,
15917                                    SelectionDAG &DAG,
15918                                    const TargetLowering &TLI) {
15919   if (PtrUse == N ||
15920       (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
15921     return false;
15922 
15923   if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
15924     return false;
15925 
15926   // Don't create a indexed load / store with zero offset.
15927   if (isNullConstant(Offset))
15928     return false;
15929 
15930   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15931     return false;
15932 
15933   SmallPtrSet<const SDNode *, 32> Visited;
15934   for (SDNode *Use : BasePtr.getNode()->uses()) {
15935     if (Use == Ptr.getNode())
15936       continue;
15937 
15938     // No if there's a later user which could perform the index instead.
15939     if (isa<MemSDNode>(Use)) {
15940       bool IsLoad = true;
15941       bool IsMasked = false;
15942       SDValue OtherPtr;
15943       if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15944                                    IsMasked, OtherPtr, TLI)) {
15945         SmallVector<const SDNode *, 2> Worklist;
15946         Worklist.push_back(Use);
15947         if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
15948           return false;
15949       }
15950     }
15951 
15952     // If all the uses are load / store addresses, then don't do the
15953     // transformation.
15954     if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
15955       for (SDNode *UseUse : Use->uses())
15956         if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
15957           return false;
15958     }
15959   }
15960   return true;
15961 }
15962 
15963 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
15964                                          bool &IsMasked, SDValue &Ptr,
15965                                          SDValue &BasePtr, SDValue &Offset,
15966                                          ISD::MemIndexedMode &AM,
15967                                          SelectionDAG &DAG,
15968                                          const TargetLowering &TLI) {
15969   if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15970                                 IsMasked, Ptr, TLI) ||
15971       Ptr.getNode()->hasOneUse())
15972     return nullptr;
15973 
15974   // Try turning it into a post-indexed load / store except when
15975   // 1) All uses are load / store ops that use it as base ptr (and
15976   //    it may be folded as addressing mmode).
15977   // 2) Op must be independent of N, i.e. Op is neither a predecessor
15978   //    nor a successor of N. Otherwise, if Op is folded that would
15979   //    create a cycle.
15980   for (SDNode *Op : Ptr->uses()) {
15981     // Check for #1.
15982     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
15983       continue;
15984 
15985     // Check for #2.
15986     SmallPtrSet<const SDNode *, 32> Visited;
15987     SmallVector<const SDNode *, 8> Worklist;
15988     // Ptr is predecessor to both N and Op.
15989     Visited.insert(Ptr.getNode());
15990     Worklist.push_back(N);
15991     Worklist.push_back(Op);
15992     if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
15993         !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
15994       return Op;
15995   }
15996   return nullptr;
15997 }
15998 
15999 /// Try to combine a load/store with a add/sub of the base pointer node into a
16000 /// post-indexed load/store. The transformation folded the add/subtract into the
16001 /// new indexed load/store effectively and all of its uses are redirected to the
16002 /// new load/store.
16003 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
16004   if (Level < AfterLegalizeDAG)
16005     return false;
16006 
16007   bool IsLoad = true;
16008   bool IsMasked = false;
16009   SDValue Ptr;
16010   SDValue BasePtr;
16011   SDValue Offset;
16012   ISD::MemIndexedMode AM = ISD::UNINDEXED;
16013   SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
16014                                          Offset, AM, DAG, TLI);
16015   if (!Op)
16016     return false;
16017 
16018   SDValue Result;
16019   if (!IsMasked)
16020     Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
16021                                          Offset, AM)
16022                     : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
16023                                           BasePtr, Offset, AM);
16024   else
16025     Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
16026                                                BasePtr, Offset, AM)
16027                     : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
16028                                                 BasePtr, Offset, AM);
16029   ++PostIndexedNodes;
16030   ++NodesCombined;
16031   LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
16032              dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
16033              dbgs() << '\n');
16034   WorklistRemover DeadNodes(*this);
16035   if (IsLoad) {
16036     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
16037     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
16038   } else {
16039     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
16040   }
16041 
16042   // Finally, since the node is now dead, remove it from the graph.
16043   deleteAndRecombine(N);
16044 
16045   // Replace the uses of Use with uses of the updated base value.
16046   DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
16047                                 Result.getValue(IsLoad ? 1 : 0));
16048   deleteAndRecombine(Op);
16049   return true;
16050 }
16051 
16052 /// Return the base-pointer arithmetic from an indexed \p LD.
16053 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
16054   ISD::MemIndexedMode AM = LD->getAddressingMode();
16055   assert(AM != ISD::UNINDEXED);
16056   SDValue BP = LD->getOperand(1);
16057   SDValue Inc = LD->getOperand(2);
16058 
16059   // Some backends use TargetConstants for load offsets, but don't expect
16060   // TargetConstants in general ADD nodes. We can convert these constants into
16061   // regular Constants (if the constant is not opaque).
16062   assert((Inc.getOpcode() != ISD::TargetConstant ||
16063           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
16064          "Cannot split out indexing using opaque target constants");
16065   if (Inc.getOpcode() == ISD::TargetConstant) {
16066     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
16067     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
16068                           ConstInc->getValueType(0));
16069   }
16070 
16071   unsigned Opc =
16072       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
16073   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
16074 }
16075 
16076 static inline ElementCount numVectorEltsOrZero(EVT T) {
16077   return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
16078 }
16079 
16080 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
16081   Val = ST->getValue();
16082   EVT STType = Val.getValueType();
16083   EVT STMemType = ST->getMemoryVT();
16084   if (STType == STMemType)
16085     return true;
16086   if (isTypeLegal(STMemType))
16087     return false; // fail.
16088   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
16089       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
16090     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
16091     return true;
16092   }
16093   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
16094       STType.isInteger() && STMemType.isInteger()) {
16095     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
16096     return true;
16097   }
16098   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
16099     Val = DAG.getBitcast(STMemType, Val);
16100     return true;
16101   }
16102   return false; // fail.
16103 }
16104 
16105 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
16106   EVT LDMemType = LD->getMemoryVT();
16107   EVT LDType = LD->getValueType(0);
16108   assert(Val.getValueType() == LDMemType &&
16109          "Attempting to extend value of non-matching type");
16110   if (LDType == LDMemType)
16111     return true;
16112   if (LDMemType.isInteger() && LDType.isInteger()) {
16113     switch (LD->getExtensionType()) {
16114     case ISD::NON_EXTLOAD:
16115       Val = DAG.getBitcast(LDType, Val);
16116       return true;
16117     case ISD::EXTLOAD:
16118       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
16119       return true;
16120     case ISD::SEXTLOAD:
16121       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
16122       return true;
16123     case ISD::ZEXTLOAD:
16124       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
16125       return true;
16126     }
16127   }
16128   return false;
16129 }
16130 
16131 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
16132   if (OptLevel == CodeGenOpt::None || !LD->isSimple())
16133     return SDValue();
16134   SDValue Chain = LD->getOperand(0);
16135   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
16136   // TODO: Relax this restriction for unordered atomics (see D66309)
16137   if (!ST || !ST->isSimple())
16138     return SDValue();
16139 
16140   EVT LDType = LD->getValueType(0);
16141   EVT LDMemType = LD->getMemoryVT();
16142   EVT STMemType = ST->getMemoryVT();
16143   EVT STType = ST->getValue().getValueType();
16144 
16145   // There are two cases to consider here:
16146   //  1. The store is fixed width and the load is scalable. In this case we
16147   //     don't know at compile time if the store completely envelops the load
16148   //     so we abandon the optimisation.
16149   //  2. The store is scalable and the load is fixed width. We could
16150   //     potentially support a limited number of cases here, but there has been
16151   //     no cost-benefit analysis to prove it's worth it.
16152   bool LdStScalable = LDMemType.isScalableVector();
16153   if (LdStScalable != STMemType.isScalableVector())
16154     return SDValue();
16155 
16156   // If we are dealing with scalable vectors on a big endian platform the
16157   // calculation of offsets below becomes trickier, since we do not know at
16158   // compile time the absolute size of the vector. Until we've done more
16159   // analysis on big-endian platforms it seems better to bail out for now.
16160   if (LdStScalable && DAG.getDataLayout().isBigEndian())
16161     return SDValue();
16162 
16163   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
16164   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
16165   int64_t Offset;
16166   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
16167     return SDValue();
16168 
16169   // Normalize for Endianness. After this Offset=0 will denote that the least
16170   // significant bit in the loaded value maps to the least significant bit in
16171   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
16172   // n:th least significant byte of the stored value.
16173   if (DAG.getDataLayout().isBigEndian())
16174     Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
16175               (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
16176                  8 -
16177              Offset;
16178 
16179   // Check that the stored value cover all bits that are loaded.
16180   bool STCoversLD;
16181 
16182   TypeSize LdMemSize = LDMemType.getSizeInBits();
16183   TypeSize StMemSize = STMemType.getSizeInBits();
16184   if (LdStScalable)
16185     STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
16186   else
16187     STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
16188                                    StMemSize.getFixedSize());
16189 
16190   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
16191     if (LD->isIndexed()) {
16192       // Cannot handle opaque target constants and we must respect the user's
16193       // request not to split indexes from loads.
16194       if (!canSplitIdx(LD))
16195         return SDValue();
16196       SDValue Idx = SplitIndexingFromLoad(LD);
16197       SDValue Ops[] = {Val, Idx, Chain};
16198       return CombineTo(LD, Ops, 3);
16199     }
16200     return CombineTo(LD, Val, Chain);
16201   };
16202 
16203   if (!STCoversLD)
16204     return SDValue();
16205 
16206   // Memory as copy space (potentially masked).
16207   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
16208     // Simple case: Direct non-truncating forwarding
16209     if (LDType.getSizeInBits() == LdMemSize)
16210       return ReplaceLd(LD, ST->getValue(), Chain);
16211     // Can we model the truncate and extension with an and mask?
16212     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
16213         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
16214       // Mask to size of LDMemType
16215       auto Mask =
16216           DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
16217                                                StMemSize.getFixedSize()),
16218                           SDLoc(ST), STType);
16219       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
16220       return ReplaceLd(LD, Val, Chain);
16221     }
16222   }
16223 
16224   // TODO: Deal with nonzero offset.
16225   if (LD->getBasePtr().isUndef() || Offset != 0)
16226     return SDValue();
16227   // Model necessary truncations / extenstions.
16228   SDValue Val;
16229   // Truncate Value To Stored Memory Size.
16230   do {
16231     if (!getTruncatedStoreValue(ST, Val))
16232       continue;
16233     if (!isTypeLegal(LDMemType))
16234       continue;
16235     if (STMemType != LDMemType) {
16236       // TODO: Support vectors? This requires extract_subvector/bitcast.
16237       if (!STMemType.isVector() && !LDMemType.isVector() &&
16238           STMemType.isInteger() && LDMemType.isInteger())
16239         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
16240       else
16241         continue;
16242     }
16243     if (!extendLoadedValueToExtension(LD, Val))
16244       continue;
16245     return ReplaceLd(LD, Val, Chain);
16246   } while (false);
16247 
16248   // On failure, cleanup dead nodes we may have created.
16249   if (Val->use_empty())
16250     deleteAndRecombine(Val.getNode());
16251   return SDValue();
16252 }
16253 
16254 SDValue DAGCombiner::visitLOAD(SDNode *N) {
16255   LoadSDNode *LD  = cast<LoadSDNode>(N);
16256   SDValue Chain = LD->getChain();
16257   SDValue Ptr   = LD->getBasePtr();
16258 
16259   // If load is not volatile and there are no uses of the loaded value (and
16260   // the updated indexed value in case of indexed loads), change uses of the
16261   // chain value into uses of the chain input (i.e. delete the dead load).
16262   // TODO: Allow this for unordered atomics (see D66309)
16263   if (LD->isSimple()) {
16264     if (N->getValueType(1) == MVT::Other) {
16265       // Unindexed loads.
16266       if (!N->hasAnyUseOfValue(0)) {
16267         // It's not safe to use the two value CombineTo variant here. e.g.
16268         // v1, chain2 = load chain1, loc
16269         // v2, chain3 = load chain2, loc
16270         // v3         = add v2, c
16271         // Now we replace use of chain2 with chain1.  This makes the second load
16272         // isomorphic to the one we are deleting, and thus makes this load live.
16273         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
16274                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
16275                    dbgs() << "\n");
16276         WorklistRemover DeadNodes(*this);
16277         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16278         AddUsersToWorklist(Chain.getNode());
16279         if (N->use_empty())
16280           deleteAndRecombine(N);
16281 
16282         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
16283       }
16284     } else {
16285       // Indexed loads.
16286       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
16287 
16288       // If this load has an opaque TargetConstant offset, then we cannot split
16289       // the indexing into an add/sub directly (that TargetConstant may not be
16290       // valid for a different type of node, and we cannot convert an opaque
16291       // target constant into a regular constant).
16292       bool CanSplitIdx = canSplitIdx(LD);
16293 
16294       if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
16295         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
16296         SDValue Index;
16297         if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
16298           Index = SplitIndexingFromLoad(LD);
16299           // Try to fold the base pointer arithmetic into subsequent loads and
16300           // stores.
16301           AddUsersToWorklist(N);
16302         } else
16303           Index = DAG.getUNDEF(N->getValueType(1));
16304         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
16305                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
16306                    dbgs() << " and 2 other values\n");
16307         WorklistRemover DeadNodes(*this);
16308         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
16309         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
16310         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
16311         deleteAndRecombine(N);
16312         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
16313       }
16314     }
16315   }
16316 
16317   // If this load is directly stored, replace the load value with the stored
16318   // value.
16319   if (auto V = ForwardStoreValueToDirectLoad(LD))
16320     return V;
16321 
16322   // Try to infer better alignment information than the load already has.
16323   if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
16324     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
16325       if (*Alignment > LD->getAlign() &&
16326           isAligned(*Alignment, LD->getSrcValueOffset())) {
16327         SDValue NewLoad = DAG.getExtLoad(
16328             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
16329             LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
16330             LD->getMemOperand()->getFlags(), LD->getAAInfo());
16331         // NewLoad will always be N as we are only refining the alignment
16332         assert(NewLoad.getNode() == N);
16333         (void)NewLoad;
16334       }
16335     }
16336   }
16337 
16338   if (LD->isUnindexed()) {
16339     // Walk up chain skipping non-aliasing memory nodes.
16340     SDValue BetterChain = FindBetterChain(LD, Chain);
16341 
16342     // If there is a better chain.
16343     if (Chain != BetterChain) {
16344       SDValue ReplLoad;
16345 
16346       // Replace the chain to void dependency.
16347       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
16348         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
16349                                BetterChain, Ptr, LD->getMemOperand());
16350       } else {
16351         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
16352                                   LD->getValueType(0),
16353                                   BetterChain, Ptr, LD->getMemoryVT(),
16354                                   LD->getMemOperand());
16355       }
16356 
16357       // Create token factor to keep old chain connected.
16358       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
16359                                   MVT::Other, Chain, ReplLoad.getValue(1));
16360 
16361       // Replace uses with load result and token factor
16362       return CombineTo(N, ReplLoad.getValue(0), Token);
16363     }
16364   }
16365 
16366   // Try transforming N to an indexed load.
16367   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
16368     return SDValue(N, 0);
16369 
16370   // Try to slice up N to more direct loads if the slices are mapped to
16371   // different register banks or pairing can take place.
16372   if (SliceUpLoad(N))
16373     return SDValue(N, 0);
16374 
16375   return SDValue();
16376 }
16377 
16378 namespace {
16379 
16380 /// Helper structure used to slice a load in smaller loads.
16381 /// Basically a slice is obtained from the following sequence:
16382 /// Origin = load Ty1, Base
16383 /// Shift = srl Ty1 Origin, CstTy Amount
16384 /// Inst = trunc Shift to Ty2
16385 ///
16386 /// Then, it will be rewritten into:
16387 /// Slice = load SliceTy, Base + SliceOffset
16388 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
16389 ///
16390 /// SliceTy is deduced from the number of bits that are actually used to
16391 /// build Inst.
16392 struct LoadedSlice {
16393   /// Helper structure used to compute the cost of a slice.
16394   struct Cost {
16395     /// Are we optimizing for code size.
16396     bool ForCodeSize = false;
16397 
16398     /// Various cost.
16399     unsigned Loads = 0;
16400     unsigned Truncates = 0;
16401     unsigned CrossRegisterBanksCopies = 0;
16402     unsigned ZExts = 0;
16403     unsigned Shift = 0;
16404 
16405     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
16406 
16407     /// Get the cost of one isolated slice.
16408     Cost(const LoadedSlice &LS, bool ForCodeSize)
16409         : ForCodeSize(ForCodeSize), Loads(1) {
16410       EVT TruncType = LS.Inst->getValueType(0);
16411       EVT LoadedType = LS.getLoadedType();
16412       if (TruncType != LoadedType &&
16413           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
16414         ZExts = 1;
16415     }
16416 
16417     /// Account for slicing gain in the current cost.
16418     /// Slicing provide a few gains like removing a shift or a
16419     /// truncate. This method allows to grow the cost of the original
16420     /// load with the gain from this slice.
16421     void addSliceGain(const LoadedSlice &LS) {
16422       // Each slice saves a truncate.
16423       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
16424       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
16425                               LS.Inst->getValueType(0)))
16426         ++Truncates;
16427       // If there is a shift amount, this slice gets rid of it.
16428       if (LS.Shift)
16429         ++Shift;
16430       // If this slice can merge a cross register bank copy, account for it.
16431       if (LS.canMergeExpensiveCrossRegisterBankCopy())
16432         ++CrossRegisterBanksCopies;
16433     }
16434 
16435     Cost &operator+=(const Cost &RHS) {
16436       Loads += RHS.Loads;
16437       Truncates += RHS.Truncates;
16438       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
16439       ZExts += RHS.ZExts;
16440       Shift += RHS.Shift;
16441       return *this;
16442     }
16443 
16444     bool operator==(const Cost &RHS) const {
16445       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
16446              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
16447              ZExts == RHS.ZExts && Shift == RHS.Shift;
16448     }
16449 
16450     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
16451 
16452     bool operator<(const Cost &RHS) const {
16453       // Assume cross register banks copies are as expensive as loads.
16454       // FIXME: Do we want some more target hooks?
16455       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
16456       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
16457       // Unless we are optimizing for code size, consider the
16458       // expensive operation first.
16459       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
16460         return ExpensiveOpsLHS < ExpensiveOpsRHS;
16461       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
16462              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
16463     }
16464 
16465     bool operator>(const Cost &RHS) const { return RHS < *this; }
16466 
16467     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
16468 
16469     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
16470   };
16471 
16472   // The last instruction that represent the slice. This should be a
16473   // truncate instruction.
16474   SDNode *Inst;
16475 
16476   // The original load instruction.
16477   LoadSDNode *Origin;
16478 
16479   // The right shift amount in bits from the original load.
16480   unsigned Shift;
16481 
16482   // The DAG from which Origin came from.
16483   // This is used to get some contextual information about legal types, etc.
16484   SelectionDAG *DAG;
16485 
16486   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
16487               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
16488       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
16489 
16490   /// Get the bits used in a chunk of bits \p BitWidth large.
16491   /// \return Result is \p BitWidth and has used bits set to 1 and
16492   ///         not used bits set to 0.
16493   APInt getUsedBits() const {
16494     // Reproduce the trunc(lshr) sequence:
16495     // - Start from the truncated value.
16496     // - Zero extend to the desired bit width.
16497     // - Shift left.
16498     assert(Origin && "No original load to compare against.");
16499     unsigned BitWidth = Origin->getValueSizeInBits(0);
16500     assert(Inst && "This slice is not bound to an instruction");
16501     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
16502            "Extracted slice is bigger than the whole type!");
16503     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
16504     UsedBits.setAllBits();
16505     UsedBits = UsedBits.zext(BitWidth);
16506     UsedBits <<= Shift;
16507     return UsedBits;
16508   }
16509 
16510   /// Get the size of the slice to be loaded in bytes.
16511   unsigned getLoadedSize() const {
16512     unsigned SliceSize = getUsedBits().countPopulation();
16513     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
16514     return SliceSize / 8;
16515   }
16516 
16517   /// Get the type that will be loaded for this slice.
16518   /// Note: This may not be the final type for the slice.
16519   EVT getLoadedType() const {
16520     assert(DAG && "Missing context");
16521     LLVMContext &Ctxt = *DAG->getContext();
16522     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
16523   }
16524 
16525   /// Get the alignment of the load used for this slice.
16526   Align getAlign() const {
16527     Align Alignment = Origin->getAlign();
16528     uint64_t Offset = getOffsetFromBase();
16529     if (Offset != 0)
16530       Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
16531     return Alignment;
16532   }
16533 
16534   /// Check if this slice can be rewritten with legal operations.
16535   bool isLegal() const {
16536     // An invalid slice is not legal.
16537     if (!Origin || !Inst || !DAG)
16538       return false;
16539 
16540     // Offsets are for indexed load only, we do not handle that.
16541     if (!Origin->getOffset().isUndef())
16542       return false;
16543 
16544     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16545 
16546     // Check that the type is legal.
16547     EVT SliceType = getLoadedType();
16548     if (!TLI.isTypeLegal(SliceType))
16549       return false;
16550 
16551     // Check that the load is legal for this type.
16552     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
16553       return false;
16554 
16555     // Check that the offset can be computed.
16556     // 1. Check its type.
16557     EVT PtrType = Origin->getBasePtr().getValueType();
16558     if (PtrType == MVT::Untyped || PtrType.isExtended())
16559       return false;
16560 
16561     // 2. Check that it fits in the immediate.
16562     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
16563       return false;
16564 
16565     // 3. Check that the computation is legal.
16566     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
16567       return false;
16568 
16569     // Check that the zext is legal if it needs one.
16570     EVT TruncateType = Inst->getValueType(0);
16571     if (TruncateType != SliceType &&
16572         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
16573       return false;
16574 
16575     return true;
16576   }
16577 
16578   /// Get the offset in bytes of this slice in the original chunk of
16579   /// bits.
16580   /// \pre DAG != nullptr.
16581   uint64_t getOffsetFromBase() const {
16582     assert(DAG && "Missing context.");
16583     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
16584     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
16585     uint64_t Offset = Shift / 8;
16586     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
16587     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
16588            "The size of the original loaded type is not a multiple of a"
16589            " byte.");
16590     // If Offset is bigger than TySizeInBytes, it means we are loading all
16591     // zeros. This should have been optimized before in the process.
16592     assert(TySizeInBytes > Offset &&
16593            "Invalid shift amount for given loaded size");
16594     if (IsBigEndian)
16595       Offset = TySizeInBytes - Offset - getLoadedSize();
16596     return Offset;
16597   }
16598 
16599   /// Generate the sequence of instructions to load the slice
16600   /// represented by this object and redirect the uses of this slice to
16601   /// this new sequence of instructions.
16602   /// \pre this->Inst && this->Origin are valid Instructions and this
16603   /// object passed the legal check: LoadedSlice::isLegal returned true.
16604   /// \return The last instruction of the sequence used to load the slice.
16605   SDValue loadSlice() const {
16606     assert(Inst && Origin && "Unable to replace a non-existing slice.");
16607     const SDValue &OldBaseAddr = Origin->getBasePtr();
16608     SDValue BaseAddr = OldBaseAddr;
16609     // Get the offset in that chunk of bytes w.r.t. the endianness.
16610     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
16611     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
16612     if (Offset) {
16613       // BaseAddr = BaseAddr + Offset.
16614       EVT ArithType = BaseAddr.getValueType();
16615       SDLoc DL(Origin);
16616       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
16617                               DAG->getConstant(Offset, DL, ArithType));
16618     }
16619 
16620     // Create the type of the loaded slice according to its size.
16621     EVT SliceType = getLoadedType();
16622 
16623     // Create the load for the slice.
16624     SDValue LastInst =
16625         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
16626                      Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
16627                      Origin->getMemOperand()->getFlags());
16628     // If the final type is not the same as the loaded type, this means that
16629     // we have to pad with zero. Create a zero extend for that.
16630     EVT FinalType = Inst->getValueType(0);
16631     if (SliceType != FinalType)
16632       LastInst =
16633           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
16634     return LastInst;
16635   }
16636 
16637   /// Check if this slice can be merged with an expensive cross register
16638   /// bank copy. E.g.,
16639   /// i = load i32
16640   /// f = bitcast i32 i to float
16641   bool canMergeExpensiveCrossRegisterBankCopy() const {
16642     if (!Inst || !Inst->hasOneUse())
16643       return false;
16644     SDNode *Use = *Inst->use_begin();
16645     if (Use->getOpcode() != ISD::BITCAST)
16646       return false;
16647     assert(DAG && "Missing context");
16648     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16649     EVT ResVT = Use->getValueType(0);
16650     const TargetRegisterClass *ResRC =
16651         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
16652     const TargetRegisterClass *ArgRC =
16653         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
16654                            Use->getOperand(0)->isDivergent());
16655     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
16656       return false;
16657 
16658     // At this point, we know that we perform a cross-register-bank copy.
16659     // Check if it is expensive.
16660     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
16661     // Assume bitcasts are cheap, unless both register classes do not
16662     // explicitly share a common sub class.
16663     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
16664       return false;
16665 
16666     // Check if it will be merged with the load.
16667     // 1. Check the alignment / fast memory access constraint.
16668     bool IsFast = false;
16669     if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
16670                                 Origin->getAddressSpace(), getAlign(),
16671                                 Origin->getMemOperand()->getFlags(), &IsFast) ||
16672         !IsFast)
16673       return false;
16674 
16675     // 2. Check that the load is a legal operation for that type.
16676     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
16677       return false;
16678 
16679     // 3. Check that we do not have a zext in the way.
16680     if (Inst->getValueType(0) != getLoadedType())
16681       return false;
16682 
16683     return true;
16684   }
16685 };
16686 
16687 } // end anonymous namespace
16688 
16689 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
16690 /// \p UsedBits looks like 0..0 1..1 0..0.
16691 static bool areUsedBitsDense(const APInt &UsedBits) {
16692   // If all the bits are one, this is dense!
16693   if (UsedBits.isAllOnes())
16694     return true;
16695 
16696   // Get rid of the unused bits on the right.
16697   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
16698   // Get rid of the unused bits on the left.
16699   if (NarrowedUsedBits.countLeadingZeros())
16700     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
16701   // Check that the chunk of bits is completely used.
16702   return NarrowedUsedBits.isAllOnes();
16703 }
16704 
16705 /// Check whether or not \p First and \p Second are next to each other
16706 /// in memory. This means that there is no hole between the bits loaded
16707 /// by \p First and the bits loaded by \p Second.
16708 static bool areSlicesNextToEachOther(const LoadedSlice &First,
16709                                      const LoadedSlice &Second) {
16710   assert(First.Origin == Second.Origin && First.Origin &&
16711          "Unable to match different memory origins.");
16712   APInt UsedBits = First.getUsedBits();
16713   assert((UsedBits & Second.getUsedBits()) == 0 &&
16714          "Slices are not supposed to overlap.");
16715   UsedBits |= Second.getUsedBits();
16716   return areUsedBitsDense(UsedBits);
16717 }
16718 
16719 /// Adjust the \p GlobalLSCost according to the target
16720 /// paring capabilities and the layout of the slices.
16721 /// \pre \p GlobalLSCost should account for at least as many loads as
16722 /// there is in the slices in \p LoadedSlices.
16723 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16724                                  LoadedSlice::Cost &GlobalLSCost) {
16725   unsigned NumberOfSlices = LoadedSlices.size();
16726   // If there is less than 2 elements, no pairing is possible.
16727   if (NumberOfSlices < 2)
16728     return;
16729 
16730   // Sort the slices so that elements that are likely to be next to each
16731   // other in memory are next to each other in the list.
16732   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
16733     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
16734     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
16735   });
16736   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
16737   // First (resp. Second) is the first (resp. Second) potentially candidate
16738   // to be placed in a paired load.
16739   const LoadedSlice *First = nullptr;
16740   const LoadedSlice *Second = nullptr;
16741   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
16742                 // Set the beginning of the pair.
16743                                                            First = Second) {
16744     Second = &LoadedSlices[CurrSlice];
16745 
16746     // If First is NULL, it means we start a new pair.
16747     // Get to the next slice.
16748     if (!First)
16749       continue;
16750 
16751     EVT LoadedType = First->getLoadedType();
16752 
16753     // If the types of the slices are different, we cannot pair them.
16754     if (LoadedType != Second->getLoadedType())
16755       continue;
16756 
16757     // Check if the target supplies paired loads for this type.
16758     Align RequiredAlignment;
16759     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
16760       // move to the next pair, this type is hopeless.
16761       Second = nullptr;
16762       continue;
16763     }
16764     // Check if we meet the alignment requirement.
16765     if (First->getAlign() < RequiredAlignment)
16766       continue;
16767 
16768     // Check that both loads are next to each other in memory.
16769     if (!areSlicesNextToEachOther(*First, *Second))
16770       continue;
16771 
16772     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
16773     --GlobalLSCost.Loads;
16774     // Move to the next pair.
16775     Second = nullptr;
16776   }
16777 }
16778 
16779 /// Check the profitability of all involved LoadedSlice.
16780 /// Currently, it is considered profitable if there is exactly two
16781 /// involved slices (1) which are (2) next to each other in memory, and
16782 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
16783 ///
16784 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
16785 /// the elements themselves.
16786 ///
16787 /// FIXME: When the cost model will be mature enough, we can relax
16788 /// constraints (1) and (2).
16789 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16790                                 const APInt &UsedBits, bool ForCodeSize) {
16791   unsigned NumberOfSlices = LoadedSlices.size();
16792   if (StressLoadSlicing)
16793     return NumberOfSlices > 1;
16794 
16795   // Check (1).
16796   if (NumberOfSlices != 2)
16797     return false;
16798 
16799   // Check (2).
16800   if (!areUsedBitsDense(UsedBits))
16801     return false;
16802 
16803   // Check (3).
16804   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
16805   // The original code has one big load.
16806   OrigCost.Loads = 1;
16807   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
16808     const LoadedSlice &LS = LoadedSlices[CurrSlice];
16809     // Accumulate the cost of all the slices.
16810     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
16811     GlobalSlicingCost += SliceCost;
16812 
16813     // Account as cost in the original configuration the gain obtained
16814     // with the current slices.
16815     OrigCost.addSliceGain(LS);
16816   }
16817 
16818   // If the target supports paired load, adjust the cost accordingly.
16819   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
16820   return OrigCost > GlobalSlicingCost;
16821 }
16822 
16823 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
16824 /// operations, split it in the various pieces being extracted.
16825 ///
16826 /// This sort of thing is introduced by SROA.
16827 /// This slicing takes care not to insert overlapping loads.
16828 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
16829 bool DAGCombiner::SliceUpLoad(SDNode *N) {
16830   if (Level < AfterLegalizeDAG)
16831     return false;
16832 
16833   LoadSDNode *LD = cast<LoadSDNode>(N);
16834   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
16835       !LD->getValueType(0).isInteger())
16836     return false;
16837 
16838   // The algorithm to split up a load of a scalable vector into individual
16839   // elements currently requires knowing the length of the loaded type,
16840   // so will need adjusting to work on scalable vectors.
16841   if (LD->getValueType(0).isScalableVector())
16842     return false;
16843 
16844   // Keep track of already used bits to detect overlapping values.
16845   // In that case, we will just abort the transformation.
16846   APInt UsedBits(LD->getValueSizeInBits(0), 0);
16847 
16848   SmallVector<LoadedSlice, 4> LoadedSlices;
16849 
16850   // Check if this load is used as several smaller chunks of bits.
16851   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
16852   // of computation for each trunc.
16853   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
16854        UI != UIEnd; ++UI) {
16855     // Skip the uses of the chain.
16856     if (UI.getUse().getResNo() != 0)
16857       continue;
16858 
16859     SDNode *User = *UI;
16860     unsigned Shift = 0;
16861 
16862     // Check if this is a trunc(lshr).
16863     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
16864         isa<ConstantSDNode>(User->getOperand(1))) {
16865       Shift = User->getConstantOperandVal(1);
16866       User = *User->use_begin();
16867     }
16868 
16869     // At this point, User is a Truncate, iff we encountered, trunc or
16870     // trunc(lshr).
16871     if (User->getOpcode() != ISD::TRUNCATE)
16872       return false;
16873 
16874     // The width of the type must be a power of 2 and greater than 8-bits.
16875     // Otherwise the load cannot be represented in LLVM IR.
16876     // Moreover, if we shifted with a non-8-bits multiple, the slice
16877     // will be across several bytes. We do not support that.
16878     unsigned Width = User->getValueSizeInBits(0);
16879     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
16880       return false;
16881 
16882     // Build the slice for this chain of computations.
16883     LoadedSlice LS(User, LD, Shift, &DAG);
16884     APInt CurrentUsedBits = LS.getUsedBits();
16885 
16886     // Check if this slice overlaps with another.
16887     if ((CurrentUsedBits & UsedBits) != 0)
16888       return false;
16889     // Update the bits used globally.
16890     UsedBits |= CurrentUsedBits;
16891 
16892     // Check if the new slice would be legal.
16893     if (!LS.isLegal())
16894       return false;
16895 
16896     // Record the slice.
16897     LoadedSlices.push_back(LS);
16898   }
16899 
16900   // Abort slicing if it does not seem to be profitable.
16901   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
16902     return false;
16903 
16904   ++SlicedLoads;
16905 
16906   // Rewrite each chain to use an independent load.
16907   // By construction, each chain can be represented by a unique load.
16908 
16909   // Prepare the argument for the new token factor for all the slices.
16910   SmallVector<SDValue, 8> ArgChains;
16911   for (const LoadedSlice &LS : LoadedSlices) {
16912     SDValue SliceInst = LS.loadSlice();
16913     CombineTo(LS.Inst, SliceInst, true);
16914     if (SliceInst.getOpcode() != ISD::LOAD)
16915       SliceInst = SliceInst.getOperand(0);
16916     assert(SliceInst->getOpcode() == ISD::LOAD &&
16917            "It takes more than a zext to get to the loaded slice!!");
16918     ArgChains.push_back(SliceInst.getValue(1));
16919   }
16920 
16921   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
16922                               ArgChains);
16923   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16924   AddToWorklist(Chain.getNode());
16925   return true;
16926 }
16927 
16928 /// Check to see if V is (and load (ptr), imm), where the load is having
16929 /// specific bytes cleared out.  If so, return the byte size being masked out
16930 /// and the shift amount.
16931 static std::pair<unsigned, unsigned>
16932 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
16933   std::pair<unsigned, unsigned> Result(0, 0);
16934 
16935   // Check for the structure we're looking for.
16936   if (V->getOpcode() != ISD::AND ||
16937       !isa<ConstantSDNode>(V->getOperand(1)) ||
16938       !ISD::isNormalLoad(V->getOperand(0).getNode()))
16939     return Result;
16940 
16941   // Check the chain and pointer.
16942   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
16943   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
16944 
16945   // This only handles simple types.
16946   if (V.getValueType() != MVT::i16 &&
16947       V.getValueType() != MVT::i32 &&
16948       V.getValueType() != MVT::i64)
16949     return Result;
16950 
16951   // Check the constant mask.  Invert it so that the bits being masked out are
16952   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
16953   // follow the sign bit for uniformity.
16954   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
16955   unsigned NotMaskLZ = countLeadingZeros(NotMask);
16956   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
16957   unsigned NotMaskTZ = countTrailingZeros(NotMask);
16958   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
16959   if (NotMaskLZ == 64) return Result;  // All zero mask.
16960 
16961   // See if we have a continuous run of bits.  If so, we have 0*1+0*
16962   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
16963     return Result;
16964 
16965   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
16966   if (V.getValueType() != MVT::i64 && NotMaskLZ)
16967     NotMaskLZ -= 64-V.getValueSizeInBits();
16968 
16969   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
16970   switch (MaskedBytes) {
16971   case 1:
16972   case 2:
16973   case 4: break;
16974   default: return Result; // All one mask, or 5-byte mask.
16975   }
16976 
16977   // Verify that the first bit starts at a multiple of mask so that the access
16978   // is aligned the same as the access width.
16979   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
16980 
16981   // For narrowing to be valid, it must be the case that the load the
16982   // immediately preceding memory operation before the store.
16983   if (LD == Chain.getNode())
16984     ; // ok.
16985   else if (Chain->getOpcode() == ISD::TokenFactor &&
16986            SDValue(LD, 1).hasOneUse()) {
16987     // LD has only 1 chain use so they are no indirect dependencies.
16988     if (!LD->isOperandOf(Chain.getNode()))
16989       return Result;
16990   } else
16991     return Result; // Fail.
16992 
16993   Result.first = MaskedBytes;
16994   Result.second = NotMaskTZ/8;
16995   return Result;
16996 }
16997 
16998 /// Check to see if IVal is something that provides a value as specified by
16999 /// MaskInfo. If so, replace the specified store with a narrower store of
17000 /// truncated IVal.
17001 static SDValue
17002 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
17003                                 SDValue IVal, StoreSDNode *St,
17004                                 DAGCombiner *DC) {
17005   unsigned NumBytes = MaskInfo.first;
17006   unsigned ByteShift = MaskInfo.second;
17007   SelectionDAG &DAG = DC->getDAG();
17008 
17009   // Check to see if IVal is all zeros in the part being masked in by the 'or'
17010   // that uses this.  If not, this is not a replacement.
17011   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
17012                                   ByteShift*8, (ByteShift+NumBytes)*8);
17013   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
17014 
17015   // Check that it is legal on the target to do this.  It is legal if the new
17016   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
17017   // legalization (and the target doesn't explicitly think this is a bad idea).
17018   MVT VT = MVT::getIntegerVT(NumBytes * 8);
17019   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17020   if (!DC->isTypeLegal(VT))
17021     return SDValue();
17022   if (St->getMemOperand() &&
17023       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
17024                               *St->getMemOperand()))
17025     return SDValue();
17026 
17027   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
17028   // shifted by ByteShift and truncated down to NumBytes.
17029   if (ByteShift) {
17030     SDLoc DL(IVal);
17031     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
17032                        DAG.getConstant(ByteShift*8, DL,
17033                                     DC->getShiftAmountTy(IVal.getValueType())));
17034   }
17035 
17036   // Figure out the offset for the store and the alignment of the access.
17037   unsigned StOffset;
17038   if (DAG.getDataLayout().isLittleEndian())
17039     StOffset = ByteShift;
17040   else
17041     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
17042 
17043   SDValue Ptr = St->getBasePtr();
17044   if (StOffset) {
17045     SDLoc DL(IVal);
17046     Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
17047   }
17048 
17049   // Truncate down to the new size.
17050   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
17051 
17052   ++OpsNarrowed;
17053   return DAG
17054       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
17055                 St->getPointerInfo().getWithOffset(StOffset),
17056                 St->getOriginalAlign());
17057 }
17058 
17059 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
17060 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
17061 /// narrowing the load and store if it would end up being a win for performance
17062 /// or code size.
17063 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
17064   StoreSDNode *ST  = cast<StoreSDNode>(N);
17065   if (!ST->isSimple())
17066     return SDValue();
17067 
17068   SDValue Chain = ST->getChain();
17069   SDValue Value = ST->getValue();
17070   SDValue Ptr   = ST->getBasePtr();
17071   EVT VT = Value.getValueType();
17072 
17073   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
17074     return SDValue();
17075 
17076   unsigned Opc = Value.getOpcode();
17077 
17078   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
17079   // is a byte mask indicating a consecutive number of bytes, check to see if
17080   // Y is known to provide just those bytes.  If so, we try to replace the
17081   // load + replace + store sequence with a single (narrower) store, which makes
17082   // the load dead.
17083   if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
17084     std::pair<unsigned, unsigned> MaskedLoad;
17085     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
17086     if (MaskedLoad.first)
17087       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
17088                                                   Value.getOperand(1), ST,this))
17089         return NewST;
17090 
17091     // Or is commutative, so try swapping X and Y.
17092     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
17093     if (MaskedLoad.first)
17094       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
17095                                                   Value.getOperand(0), ST,this))
17096         return NewST;
17097   }
17098 
17099   if (!EnableReduceLoadOpStoreWidth)
17100     return SDValue();
17101 
17102   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
17103       Value.getOperand(1).getOpcode() != ISD::Constant)
17104     return SDValue();
17105 
17106   SDValue N0 = Value.getOperand(0);
17107   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17108       Chain == SDValue(N0.getNode(), 1)) {
17109     LoadSDNode *LD = cast<LoadSDNode>(N0);
17110     if (LD->getBasePtr() != Ptr ||
17111         LD->getPointerInfo().getAddrSpace() !=
17112         ST->getPointerInfo().getAddrSpace())
17113       return SDValue();
17114 
17115     // Find the type to narrow it the load / op / store to.
17116     SDValue N1 = Value.getOperand(1);
17117     unsigned BitWidth = N1.getValueSizeInBits();
17118     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
17119     if (Opc == ISD::AND)
17120       Imm ^= APInt::getAllOnes(BitWidth);
17121     if (Imm == 0 || Imm.isAllOnes())
17122       return SDValue();
17123     unsigned ShAmt = Imm.countTrailingZeros();
17124     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
17125     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
17126     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
17127     // The narrowing should be profitable, the load/store operation should be
17128     // legal (or custom) and the store size should be equal to the NewVT width.
17129     while (NewBW < BitWidth &&
17130            (NewVT.getStoreSizeInBits() != NewBW ||
17131             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
17132             !TLI.isNarrowingProfitable(VT, NewVT))) {
17133       NewBW = NextPowerOf2(NewBW);
17134       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
17135     }
17136     if (NewBW >= BitWidth)
17137       return SDValue();
17138 
17139     // If the lsb changed does not start at the type bitwidth boundary,
17140     // start at the previous one.
17141     if (ShAmt % NewBW)
17142       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
17143     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
17144                                    std::min(BitWidth, ShAmt + NewBW));
17145     if ((Imm & Mask) == Imm) {
17146       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
17147       if (Opc == ISD::AND)
17148         NewImm ^= APInt::getAllOnes(NewBW);
17149       uint64_t PtrOff = ShAmt / 8;
17150       // For big endian targets, we need to adjust the offset to the pointer to
17151       // load the correct bytes.
17152       if (DAG.getDataLayout().isBigEndian())
17153         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
17154 
17155       bool IsFast = false;
17156       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
17157       if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
17158                                   LD->getAddressSpace(), NewAlign,
17159                                   LD->getMemOperand()->getFlags(), &IsFast) ||
17160           !IsFast)
17161         return SDValue();
17162 
17163       SDValue NewPtr =
17164           DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
17165       SDValue NewLD =
17166           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
17167                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
17168                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
17169       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
17170                                    DAG.getConstant(NewImm, SDLoc(Value),
17171                                                    NewVT));
17172       SDValue NewST =
17173           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
17174                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
17175 
17176       AddToWorklist(NewPtr.getNode());
17177       AddToWorklist(NewLD.getNode());
17178       AddToWorklist(NewVal.getNode());
17179       WorklistRemover DeadNodes(*this);
17180       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
17181       ++OpsNarrowed;
17182       return NewST;
17183     }
17184   }
17185 
17186   return SDValue();
17187 }
17188 
17189 /// For a given floating point load / store pair, if the load value isn't used
17190 /// by any other operations, then consider transforming the pair to integer
17191 /// load / store operations if the target deems the transformation profitable.
17192 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
17193   StoreSDNode *ST  = cast<StoreSDNode>(N);
17194   SDValue Value = ST->getValue();
17195   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
17196       Value.hasOneUse()) {
17197     LoadSDNode *LD = cast<LoadSDNode>(Value);
17198     EVT VT = LD->getMemoryVT();
17199     if (!VT.isFloatingPoint() ||
17200         VT != ST->getMemoryVT() ||
17201         LD->isNonTemporal() ||
17202         ST->isNonTemporal() ||
17203         LD->getPointerInfo().getAddrSpace() != 0 ||
17204         ST->getPointerInfo().getAddrSpace() != 0)
17205       return SDValue();
17206 
17207     TypeSize VTSize = VT.getSizeInBits();
17208 
17209     // We don't know the size of scalable types at compile time so we cannot
17210     // create an integer of the equivalent size.
17211     if (VTSize.isScalable())
17212       return SDValue();
17213 
17214     bool FastLD = false, FastST = false;
17215     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
17216     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
17217         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
17218         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
17219         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
17220         !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
17221                                 *LD->getMemOperand(), &FastLD) ||
17222         !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
17223                                 *ST->getMemOperand(), &FastST) ||
17224         !FastLD || !FastST)
17225       return SDValue();
17226 
17227     SDValue NewLD =
17228         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
17229                     LD->getPointerInfo(), LD->getAlign());
17230 
17231     SDValue NewST =
17232         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
17233                      ST->getPointerInfo(), ST->getAlign());
17234 
17235     AddToWorklist(NewLD.getNode());
17236     AddToWorklist(NewST.getNode());
17237     WorklistRemover DeadNodes(*this);
17238     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
17239     ++LdStFP2Int;
17240     return NewST;
17241   }
17242 
17243   return SDValue();
17244 }
17245 
17246 // This is a helper function for visitMUL to check the profitability
17247 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
17248 // MulNode is the original multiply, AddNode is (add x, c1),
17249 // and ConstNode is c2.
17250 //
17251 // If the (add x, c1) has multiple uses, we could increase
17252 // the number of adds if we make this transformation.
17253 // It would only be worth doing this if we can remove a
17254 // multiply in the process. Check for that here.
17255 // To illustrate:
17256 //     (A + c1) * c3
17257 //     (A + c2) * c3
17258 // We're checking for cases where we have common "c3 * A" expressions.
17259 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
17260                                               SDValue &AddNode,
17261                                               SDValue &ConstNode) {
17262   APInt Val;
17263 
17264   // If the add only has one use, and the target thinks the folding is
17265   // profitable or does not lead to worse code, this would be OK to do.
17266   if (AddNode.getNode()->hasOneUse() &&
17267       TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
17268     return true;
17269 
17270   // Walk all the users of the constant with which we're multiplying.
17271   for (SDNode *Use : ConstNode->uses()) {
17272     if (Use == MulNode) // This use is the one we're on right now. Skip it.
17273       continue;
17274 
17275     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
17276       SDNode *OtherOp;
17277       SDNode *MulVar = AddNode.getOperand(0).getNode();
17278 
17279       // OtherOp is what we're multiplying against the constant.
17280       if (Use->getOperand(0) == ConstNode)
17281         OtherOp = Use->getOperand(1).getNode();
17282       else
17283         OtherOp = Use->getOperand(0).getNode();
17284 
17285       // Check to see if multiply is with the same operand of our "add".
17286       //
17287       //     ConstNode  = CONST
17288       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
17289       //     ...
17290       //     AddNode  = (A + c1)  <-- MulVar is A.
17291       //         = AddNode * ConstNode   <-- current visiting instruction.
17292       //
17293       // If we make this transformation, we will have a common
17294       // multiply (ConstNode * A) that we can save.
17295       if (OtherOp == MulVar)
17296         return true;
17297 
17298       // Now check to see if a future expansion will give us a common
17299       // multiply.
17300       //
17301       //     ConstNode  = CONST
17302       //     AddNode    = (A + c1)
17303       //     ...   = AddNode * ConstNode <-- current visiting instruction.
17304       //     ...
17305       //     OtherOp = (A + c2)
17306       //     Use     = OtherOp * ConstNode <-- visiting Use.
17307       //
17308       // If we make this transformation, we will have a common
17309       // multiply (CONST * A) after we also do the same transformation
17310       // to the "t2" instruction.
17311       if (OtherOp->getOpcode() == ISD::ADD &&
17312           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
17313           OtherOp->getOperand(0).getNode() == MulVar)
17314         return true;
17315     }
17316   }
17317 
17318   // Didn't find a case where this would be profitable.
17319   return false;
17320 }
17321 
17322 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
17323                                          unsigned NumStores) {
17324   SmallVector<SDValue, 8> Chains;
17325   SmallPtrSet<const SDNode *, 8> Visited;
17326   SDLoc StoreDL(StoreNodes[0].MemNode);
17327 
17328   for (unsigned i = 0; i < NumStores; ++i) {
17329     Visited.insert(StoreNodes[i].MemNode);
17330   }
17331 
17332   // don't include nodes that are children or repeated nodes.
17333   for (unsigned i = 0; i < NumStores; ++i) {
17334     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
17335       Chains.push_back(StoreNodes[i].MemNode->getChain());
17336   }
17337 
17338   assert(Chains.size() > 0 && "Chain should have generated a chain");
17339   return DAG.getTokenFactor(StoreDL, Chains);
17340 }
17341 
17342 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
17343     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
17344     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
17345   // Make sure we have something to merge.
17346   if (NumStores < 2)
17347     return false;
17348 
17349   assert((!UseTrunc || !UseVector) &&
17350          "This optimization cannot emit a vector truncating store");
17351 
17352   // The latest Node in the DAG.
17353   SDLoc DL(StoreNodes[0].MemNode);
17354 
17355   TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
17356   unsigned SizeInBits = NumStores * ElementSizeBits;
17357   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17358 
17359   Optional<MachineMemOperand::Flags> Flags;
17360   AAMDNodes AAInfo;
17361   for (unsigned I = 0; I != NumStores; ++I) {
17362     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
17363     if (!Flags) {
17364       Flags = St->getMemOperand()->getFlags();
17365       AAInfo = St->getAAInfo();
17366       continue;
17367     }
17368     // Skip merging if there's an inconsistent flag.
17369     if (Flags != St->getMemOperand()->getFlags())
17370       return false;
17371     // Concatenate AA metadata.
17372     AAInfo = AAInfo.concat(St->getAAInfo());
17373   }
17374 
17375   EVT StoreTy;
17376   if (UseVector) {
17377     unsigned Elts = NumStores * NumMemElts;
17378     // Get the type for the merged vector store.
17379     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17380   } else
17381     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
17382 
17383   SDValue StoredVal;
17384   if (UseVector) {
17385     if (IsConstantSrc) {
17386       SmallVector<SDValue, 8> BuildVector;
17387       for (unsigned I = 0; I != NumStores; ++I) {
17388         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
17389         SDValue Val = St->getValue();
17390         // If constant is of the wrong type, convert it now.
17391         if (MemVT != Val.getValueType()) {
17392           Val = peekThroughBitcasts(Val);
17393           // Deal with constants of wrong size.
17394           if (ElementSizeBits != Val.getValueSizeInBits()) {
17395             EVT IntMemVT =
17396                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
17397             if (isa<ConstantFPSDNode>(Val)) {
17398               // Not clear how to truncate FP values.
17399               return false;
17400             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
17401               Val = DAG.getConstant(C->getAPIntValue()
17402                                         .zextOrTrunc(Val.getValueSizeInBits())
17403                                         .zextOrTrunc(ElementSizeBits),
17404                                     SDLoc(C), IntMemVT);
17405           }
17406           // Make sure correctly size type is the correct type.
17407           Val = DAG.getBitcast(MemVT, Val);
17408         }
17409         BuildVector.push_back(Val);
17410       }
17411       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17412                                                : ISD::BUILD_VECTOR,
17413                               DL, StoreTy, BuildVector);
17414     } else {
17415       SmallVector<SDValue, 8> Ops;
17416       for (unsigned i = 0; i < NumStores; ++i) {
17417         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17418         SDValue Val = peekThroughBitcasts(St->getValue());
17419         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
17420         // type MemVT. If the underlying value is not the correct
17421         // type, but it is an extraction of an appropriate vector we
17422         // can recast Val to be of the correct type. This may require
17423         // converting between EXTRACT_VECTOR_ELT and
17424         // EXTRACT_SUBVECTOR.
17425         if ((MemVT != Val.getValueType()) &&
17426             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
17427              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
17428           EVT MemVTScalarTy = MemVT.getScalarType();
17429           // We may need to add a bitcast here to get types to line up.
17430           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
17431             Val = DAG.getBitcast(MemVT, Val);
17432           } else {
17433             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
17434                                             : ISD::EXTRACT_VECTOR_ELT;
17435             SDValue Vec = Val.getOperand(0);
17436             SDValue Idx = Val.getOperand(1);
17437             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
17438           }
17439         }
17440         Ops.push_back(Val);
17441       }
17442 
17443       // Build the extracted vector elements back into a vector.
17444       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17445                                                : ISD::BUILD_VECTOR,
17446                               DL, StoreTy, Ops);
17447     }
17448   } else {
17449     // We should always use a vector store when merging extracted vector
17450     // elements, so this path implies a store of constants.
17451     assert(IsConstantSrc && "Merged vector elements should use vector store");
17452 
17453     APInt StoreInt(SizeInBits, 0);
17454 
17455     // Construct a single integer constant which is made of the smaller
17456     // constant inputs.
17457     bool IsLE = DAG.getDataLayout().isLittleEndian();
17458     for (unsigned i = 0; i < NumStores; ++i) {
17459       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
17460       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
17461 
17462       SDValue Val = St->getValue();
17463       Val = peekThroughBitcasts(Val);
17464       StoreInt <<= ElementSizeBits;
17465       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
17466         StoreInt |= C->getAPIntValue()
17467                         .zextOrTrunc(ElementSizeBits)
17468                         .zextOrTrunc(SizeInBits);
17469       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
17470         StoreInt |= C->getValueAPF()
17471                         .bitcastToAPInt()
17472                         .zextOrTrunc(ElementSizeBits)
17473                         .zextOrTrunc(SizeInBits);
17474         // If fp truncation is necessary give up for now.
17475         if (MemVT.getSizeInBits() != ElementSizeBits)
17476           return false;
17477       } else {
17478         llvm_unreachable("Invalid constant element type");
17479       }
17480     }
17481 
17482     // Create the new Load and Store operations.
17483     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
17484   }
17485 
17486   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17487   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
17488 
17489   // make sure we use trunc store if it's necessary to be legal.
17490   SDValue NewStore;
17491   if (!UseTrunc) {
17492     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
17493                             FirstInChain->getPointerInfo(),
17494                             FirstInChain->getAlign(), Flags.getValue(), AAInfo);
17495   } else { // Must be realized as a trunc store
17496     EVT LegalizedStoredValTy =
17497         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
17498     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
17499     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
17500     SDValue ExtendedStoreVal =
17501         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
17502                         LegalizedStoredValTy);
17503     NewStore = DAG.getTruncStore(
17504         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
17505         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
17506         FirstInChain->getAlign(), Flags.getValue(), AAInfo);
17507   }
17508 
17509   // Replace all merged stores with the new store.
17510   for (unsigned i = 0; i < NumStores; ++i)
17511     CombineTo(StoreNodes[i].MemNode, NewStore);
17512 
17513   AddToWorklist(NewChain.getNode());
17514   return true;
17515 }
17516 
17517 void DAGCombiner::getStoreMergeCandidates(
17518     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
17519     SDNode *&RootNode) {
17520   // This holds the base pointer, index, and the offset in bytes from the base
17521   // pointer. We must have a base and an offset. Do not handle stores to undef
17522   // base pointers.
17523   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
17524   if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
17525     return;
17526 
17527   SDValue Val = peekThroughBitcasts(St->getValue());
17528   StoreSource StoreSrc = getStoreSource(Val);
17529   assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
17530 
17531   // Match on loadbaseptr if relevant.
17532   EVT MemVT = St->getMemoryVT();
17533   BaseIndexOffset LBasePtr;
17534   EVT LoadVT;
17535   if (StoreSrc == StoreSource::Load) {
17536     auto *Ld = cast<LoadSDNode>(Val);
17537     LBasePtr = BaseIndexOffset::match(Ld, DAG);
17538     LoadVT = Ld->getMemoryVT();
17539     // Load and store should be the same type.
17540     if (MemVT != LoadVT)
17541       return;
17542     // Loads must only have one use.
17543     if (!Ld->hasNUsesOfValue(1, 0))
17544       return;
17545     // The memory operands must not be volatile/indexed/atomic.
17546     // TODO: May be able to relax for unordered atomics (see D66309)
17547     if (!Ld->isSimple() || Ld->isIndexed())
17548       return;
17549   }
17550   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
17551                             int64_t &Offset) -> bool {
17552     // The memory operands must not be volatile/indexed/atomic.
17553     // TODO: May be able to relax for unordered atomics (see D66309)
17554     if (!Other->isSimple() || Other->isIndexed())
17555       return false;
17556     // Don't mix temporal stores with non-temporal stores.
17557     if (St->isNonTemporal() != Other->isNonTemporal())
17558       return false;
17559     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
17560     // Allow merging constants of different types as integers.
17561     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
17562                                            : Other->getMemoryVT() != MemVT;
17563     switch (StoreSrc) {
17564     case StoreSource::Load: {
17565       if (NoTypeMatch)
17566         return false;
17567       // The Load's Base Ptr must also match.
17568       auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
17569       if (!OtherLd)
17570         return false;
17571       BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
17572       if (LoadVT != OtherLd->getMemoryVT())
17573         return false;
17574       // Loads must only have one use.
17575       if (!OtherLd->hasNUsesOfValue(1, 0))
17576         return false;
17577       // The memory operands must not be volatile/indexed/atomic.
17578       // TODO: May be able to relax for unordered atomics (see D66309)
17579       if (!OtherLd->isSimple() || OtherLd->isIndexed())
17580         return false;
17581       // Don't mix temporal loads with non-temporal loads.
17582       if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
17583         return false;
17584       if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
17585         return false;
17586       break;
17587     }
17588     case StoreSource::Constant:
17589       if (NoTypeMatch)
17590         return false;
17591       if (!isIntOrFPConstant(OtherBC))
17592         return false;
17593       break;
17594     case StoreSource::Extract:
17595       // Do not merge truncated stores here.
17596       if (Other->isTruncatingStore())
17597         return false;
17598       if (!MemVT.bitsEq(OtherBC.getValueType()))
17599         return false;
17600       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
17601           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17602         return false;
17603       break;
17604     default:
17605       llvm_unreachable("Unhandled store source for merging");
17606     }
17607     Ptr = BaseIndexOffset::match(Other, DAG);
17608     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
17609   };
17610 
17611   // Check if the pair of StoreNode and the RootNode already bail out many
17612   // times which is over the limit in dependence check.
17613   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
17614                                         SDNode *RootNode) -> bool {
17615     auto RootCount = StoreRootCountMap.find(StoreNode);
17616     return RootCount != StoreRootCountMap.end() &&
17617            RootCount->second.first == RootNode &&
17618            RootCount->second.second > StoreMergeDependenceLimit;
17619   };
17620 
17621   auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
17622     // This must be a chain use.
17623     if (UseIter.getOperandNo() != 0)
17624       return;
17625     if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
17626       BaseIndexOffset Ptr;
17627       int64_t PtrDiff;
17628       if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
17629           !OverLimitInDependenceCheck(OtherStore, RootNode))
17630         StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
17631     }
17632   };
17633 
17634   // We looking for a root node which is an ancestor to all mergable
17635   // stores. We search up through a load, to our root and then down
17636   // through all children. For instance we will find Store{1,2,3} if
17637   // St is Store1, Store2. or Store3 where the root is not a load
17638   // which always true for nonvolatile ops. TODO: Expand
17639   // the search to find all valid candidates through multiple layers of loads.
17640   //
17641   // Root
17642   // |-------|-------|
17643   // Load    Load    Store3
17644   // |       |
17645   // Store1   Store2
17646   //
17647   // FIXME: We should be able to climb and
17648   // descend TokenFactors to find candidates as well.
17649 
17650   RootNode = St->getChain().getNode();
17651 
17652   unsigned NumNodesExplored = 0;
17653   const unsigned MaxSearchNodes = 1024;
17654   if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
17655     RootNode = Ldn->getChain().getNode();
17656     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17657          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
17658       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
17659         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
17660           TryToAddCandidate(I2);
17661       }
17662       // Check stores that depend on the root (e.g. Store 3 in the chart above).
17663       if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
17664         TryToAddCandidate(I);
17665       }
17666     }
17667   } else {
17668     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17669          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
17670       TryToAddCandidate(I);
17671   }
17672 }
17673 
17674 // We need to check that merging these stores does not cause a loop in
17675 // the DAG. Any store candidate may depend on another candidate
17676 // indirectly through its operand (we already consider dependencies
17677 // through the chain). Check in parallel by searching up from
17678 // non-chain operands of candidates.
17679 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
17680     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
17681     SDNode *RootNode) {
17682   // FIXME: We should be able to truncate a full search of
17683   // predecessors by doing a BFS and keeping tabs the originating
17684   // stores from which worklist nodes come from in a similar way to
17685   // TokenFactor simplfication.
17686 
17687   SmallPtrSet<const SDNode *, 32> Visited;
17688   SmallVector<const SDNode *, 8> Worklist;
17689 
17690   // RootNode is a predecessor to all candidates so we need not search
17691   // past it. Add RootNode (peeking through TokenFactors). Do not count
17692   // these towards size check.
17693 
17694   Worklist.push_back(RootNode);
17695   while (!Worklist.empty()) {
17696     auto N = Worklist.pop_back_val();
17697     if (!Visited.insert(N).second)
17698       continue; // Already present in Visited.
17699     if (N->getOpcode() == ISD::TokenFactor) {
17700       for (SDValue Op : N->ops())
17701         Worklist.push_back(Op.getNode());
17702     }
17703   }
17704 
17705   // Don't count pruning nodes towards max.
17706   unsigned int Max = 1024 + Visited.size();
17707   // Search Ops of store candidates.
17708   for (unsigned i = 0; i < NumStores; ++i) {
17709     SDNode *N = StoreNodes[i].MemNode;
17710     // Of the 4 Store Operands:
17711     //   * Chain (Op 0) -> We have already considered these
17712     //                    in candidate selection and can be
17713     //                    safely ignored
17714     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
17715     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
17716     //                       but aren't necessarily fromt the same base node, so
17717     //                       cycles possible (e.g. via indexed store).
17718     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
17719     //               non-indexed stores). Not constant on all targets (e.g. ARM)
17720     //               and so can participate in a cycle.
17721     for (unsigned j = 1; j < N->getNumOperands(); ++j)
17722       Worklist.push_back(N->getOperand(j).getNode());
17723   }
17724   // Search through DAG. We can stop early if we find a store node.
17725   for (unsigned i = 0; i < NumStores; ++i)
17726     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
17727                                      Max)) {
17728       // If the searching bail out, record the StoreNode and RootNode in the
17729       // StoreRootCountMap. If we have seen the pair many times over a limit,
17730       // we won't add the StoreNode into StoreNodes set again.
17731       if (Visited.size() >= Max) {
17732         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
17733         if (RootCount.first == RootNode)
17734           RootCount.second++;
17735         else
17736           RootCount = {RootNode, 1};
17737       }
17738       return false;
17739     }
17740   return true;
17741 }
17742 
17743 unsigned
17744 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
17745                                   int64_t ElementSizeBytes) const {
17746   while (true) {
17747     // Find a store past the width of the first store.
17748     size_t StartIdx = 0;
17749     while ((StartIdx + 1 < StoreNodes.size()) &&
17750            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
17751               StoreNodes[StartIdx + 1].OffsetFromBase)
17752       ++StartIdx;
17753 
17754     // Bail if we don't have enough candidates to merge.
17755     if (StartIdx + 1 >= StoreNodes.size())
17756       return 0;
17757 
17758     // Trim stores that overlapped with the first store.
17759     if (StartIdx)
17760       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
17761 
17762     // Scan the memory operations on the chain and find the first
17763     // non-consecutive store memory address.
17764     unsigned NumConsecutiveStores = 1;
17765     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
17766     // Check that the addresses are consecutive starting from the second
17767     // element in the list of stores.
17768     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
17769       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
17770       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17771         break;
17772       NumConsecutiveStores = i + 1;
17773     }
17774     if (NumConsecutiveStores > 1)
17775       return NumConsecutiveStores;
17776 
17777     // There are no consecutive stores at the start of the list.
17778     // Remove the first store and try again.
17779     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
17780   }
17781 }
17782 
17783 bool DAGCombiner::tryStoreMergeOfConstants(
17784     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17785     EVT MemVT, SDNode *RootNode, bool AllowVectors) {
17786   LLVMContext &Context = *DAG.getContext();
17787   const DataLayout &DL = DAG.getDataLayout();
17788   int64_t ElementSizeBytes = MemVT.getStoreSize();
17789   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17790   bool MadeChange = false;
17791 
17792   // Store the constants into memory as one consecutive store.
17793   while (NumConsecutiveStores >= 2) {
17794     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17795     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17796     unsigned FirstStoreAlign = FirstInChain->getAlignment();
17797     unsigned LastLegalType = 1;
17798     unsigned LastLegalVectorType = 1;
17799     bool LastIntegerTrunc = false;
17800     bool NonZero = false;
17801     unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
17802     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17803       StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
17804       SDValue StoredVal = ST->getValue();
17805       bool IsElementZero = false;
17806       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
17807         IsElementZero = C->isZero();
17808       else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
17809         IsElementZero = C->getConstantFPValue()->isNullValue();
17810       if (IsElementZero) {
17811         if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
17812           FirstZeroAfterNonZero = i;
17813       }
17814       NonZero |= !IsElementZero;
17815 
17816       // Find a legal type for the constant store.
17817       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17818       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17819       bool IsFast = false;
17820 
17821       // Break early when size is too large to be legal.
17822       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17823         break;
17824 
17825       if (TLI.isTypeLegal(StoreTy) &&
17826           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17827                                DAG.getMachineFunction()) &&
17828           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17829                                  *FirstInChain->getMemOperand(), &IsFast) &&
17830           IsFast) {
17831         LastIntegerTrunc = false;
17832         LastLegalType = i + 1;
17833         // Or check whether a truncstore is legal.
17834       } else if (TLI.getTypeAction(Context, StoreTy) ==
17835                  TargetLowering::TypePromoteInteger) {
17836         EVT LegalizedStoredValTy =
17837             TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
17838         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17839             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
17840                                  DAG.getMachineFunction()) &&
17841             TLI.allowsMemoryAccess(Context, DL, StoreTy,
17842                                    *FirstInChain->getMemOperand(), &IsFast) &&
17843             IsFast) {
17844           LastIntegerTrunc = true;
17845           LastLegalType = i + 1;
17846         }
17847       }
17848 
17849       // We only use vectors if the constant is known to be zero or the
17850       // target allows it and the function is not marked with the
17851       // noimplicitfloat attribute.
17852       if ((!NonZero ||
17853            TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
17854           AllowVectors) {
17855         // Find a legal type for the vector store.
17856         unsigned Elts = (i + 1) * NumMemElts;
17857         EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17858         if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
17859             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
17860             TLI.allowsMemoryAccess(Context, DL, Ty,
17861                                    *FirstInChain->getMemOperand(), &IsFast) &&
17862             IsFast)
17863           LastLegalVectorType = i + 1;
17864       }
17865     }
17866 
17867     bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
17868     unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
17869     bool UseTrunc = LastIntegerTrunc && !UseVector;
17870 
17871     // Check if we found a legal integer type that creates a meaningful
17872     // merge.
17873     if (NumElem < 2) {
17874       // We know that candidate stores are in order and of correct
17875       // shape. While there is no mergeable sequence from the
17876       // beginning one may start later in the sequence. The only
17877       // reason a merge of size N could have failed where another of
17878       // the same size would not have, is if the alignment has
17879       // improved or we've dropped a non-zero value. Drop as many
17880       // candidates as we can here.
17881       unsigned NumSkip = 1;
17882       while ((NumSkip < NumConsecutiveStores) &&
17883              (NumSkip < FirstZeroAfterNonZero) &&
17884              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17885         NumSkip++;
17886 
17887       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17888       NumConsecutiveStores -= NumSkip;
17889       continue;
17890     }
17891 
17892     // Check that we can merge these candidates without causing a cycle.
17893     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17894                                                   RootNode)) {
17895       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17896       NumConsecutiveStores -= NumElem;
17897       continue;
17898     }
17899 
17900     MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
17901                                                   /*IsConstantSrc*/ true,
17902                                                   UseVector, UseTrunc);
17903 
17904     // Remove merged stores for next iteration.
17905     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17906     NumConsecutiveStores -= NumElem;
17907   }
17908   return MadeChange;
17909 }
17910 
17911 bool DAGCombiner::tryStoreMergeOfExtracts(
17912     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17913     EVT MemVT, SDNode *RootNode) {
17914   LLVMContext &Context = *DAG.getContext();
17915   const DataLayout &DL = DAG.getDataLayout();
17916   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17917   bool MadeChange = false;
17918 
17919   // Loop on Consecutive Stores on success.
17920   while (NumConsecutiveStores >= 2) {
17921     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17922     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17923     unsigned FirstStoreAlign = FirstInChain->getAlignment();
17924     unsigned NumStoresToMerge = 1;
17925     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17926       // Find a legal type for the vector store.
17927       unsigned Elts = (i + 1) * NumMemElts;
17928       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17929       bool IsFast = false;
17930 
17931       // Break early when size is too large to be legal.
17932       if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
17933         break;
17934 
17935       if (TLI.isTypeLegal(Ty) &&
17936           TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
17937           TLI.allowsMemoryAccess(Context, DL, Ty,
17938                                  *FirstInChain->getMemOperand(), &IsFast) &&
17939           IsFast)
17940         NumStoresToMerge = i + 1;
17941     }
17942 
17943     // Check if we found a legal integer type creating a meaningful
17944     // merge.
17945     if (NumStoresToMerge < 2) {
17946       // We know that candidate stores are in order and of correct
17947       // shape. While there is no mergeable sequence from the
17948       // beginning one may start later in the sequence. The only
17949       // reason a merge of size N could have failed where another of
17950       // the same size would not have, is if the alignment has
17951       // improved. Drop as many candidates as we can here.
17952       unsigned NumSkip = 1;
17953       while ((NumSkip < NumConsecutiveStores) &&
17954              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17955         NumSkip++;
17956 
17957       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17958       NumConsecutiveStores -= NumSkip;
17959       continue;
17960     }
17961 
17962     // Check that we can merge these candidates without causing a cycle.
17963     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
17964                                                   RootNode)) {
17965       StoreNodes.erase(StoreNodes.begin(),
17966                        StoreNodes.begin() + NumStoresToMerge);
17967       NumConsecutiveStores -= NumStoresToMerge;
17968       continue;
17969     }
17970 
17971     MadeChange |= mergeStoresOfConstantsOrVecElts(
17972         StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
17973         /*UseVector*/ true, /*UseTrunc*/ false);
17974 
17975     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
17976     NumConsecutiveStores -= NumStoresToMerge;
17977   }
17978   return MadeChange;
17979 }
17980 
17981 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
17982                                        unsigned NumConsecutiveStores, EVT MemVT,
17983                                        SDNode *RootNode, bool AllowVectors,
17984                                        bool IsNonTemporalStore,
17985                                        bool IsNonTemporalLoad) {
17986   LLVMContext &Context = *DAG.getContext();
17987   const DataLayout &DL = DAG.getDataLayout();
17988   int64_t ElementSizeBytes = MemVT.getStoreSize();
17989   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17990   bool MadeChange = false;
17991 
17992   // Look for load nodes which are used by the stored values.
17993   SmallVector<MemOpLink, 8> LoadNodes;
17994 
17995   // Find acceptable loads. Loads need to have the same chain (token factor),
17996   // must not be zext, volatile, indexed, and they must be consecutive.
17997   BaseIndexOffset LdBasePtr;
17998 
17999   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
18000     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
18001     SDValue Val = peekThroughBitcasts(St->getValue());
18002     LoadSDNode *Ld = cast<LoadSDNode>(Val);
18003 
18004     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
18005     // If this is not the first ptr that we check.
18006     int64_t LdOffset = 0;
18007     if (LdBasePtr.getBase().getNode()) {
18008       // The base ptr must be the same.
18009       if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
18010         break;
18011     } else {
18012       // Check that all other base pointers are the same as this one.
18013       LdBasePtr = LdPtr;
18014     }
18015 
18016     // We found a potential memory operand to merge.
18017     LoadNodes.push_back(MemOpLink(Ld, LdOffset));
18018   }
18019 
18020   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
18021     Align RequiredAlignment;
18022     bool NeedRotate = false;
18023     if (LoadNodes.size() == 2) {
18024       // If we have load/store pair instructions and we only have two values,
18025       // don't bother merging.
18026       if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
18027           StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
18028         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
18029         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
18030         break;
18031       }
18032       // If the loads are reversed, see if we can rotate the halves into place.
18033       int64_t Offset0 = LoadNodes[0].OffsetFromBase;
18034       int64_t Offset1 = LoadNodes[1].OffsetFromBase;
18035       EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
18036       if (Offset0 - Offset1 == ElementSizeBytes &&
18037           (hasOperation(ISD::ROTL, PairVT) ||
18038            hasOperation(ISD::ROTR, PairVT))) {
18039         std::swap(LoadNodes[0], LoadNodes[1]);
18040         NeedRotate = true;
18041       }
18042     }
18043     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
18044     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
18045     Align FirstStoreAlign = FirstInChain->getAlign();
18046     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
18047 
18048     // Scan the memory operations on the chain and find the first
18049     // non-consecutive load memory address. These variables hold the index in
18050     // the store node array.
18051 
18052     unsigned LastConsecutiveLoad = 1;
18053 
18054     // This variable refers to the size and not index in the array.
18055     unsigned LastLegalVectorType = 1;
18056     unsigned LastLegalIntegerType = 1;
18057     bool isDereferenceable = true;
18058     bool DoIntegerTruncate = false;
18059     int64_t StartAddress = LoadNodes[0].OffsetFromBase;
18060     SDValue LoadChain = FirstLoad->getChain();
18061     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
18062       // All loads must share the same chain.
18063       if (LoadNodes[i].MemNode->getChain() != LoadChain)
18064         break;
18065 
18066       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
18067       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
18068         break;
18069       LastConsecutiveLoad = i;
18070 
18071       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
18072         isDereferenceable = false;
18073 
18074       // Find a legal type for the vector store.
18075       unsigned Elts = (i + 1) * NumMemElts;
18076       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18077 
18078       // Break early when size is too large to be legal.
18079       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
18080         break;
18081 
18082       bool IsFastSt = false;
18083       bool IsFastLd = false;
18084       // Don't try vector types if we need a rotate. We may still fail the
18085       // legality checks for the integer type, but we can't handle the rotate
18086       // case with vectors.
18087       // FIXME: We could use a shuffle in place of the rotate.
18088       if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
18089           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
18090                                DAG.getMachineFunction()) &&
18091           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18092                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
18093           IsFastSt &&
18094           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18095                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
18096           IsFastLd) {
18097         LastLegalVectorType = i + 1;
18098       }
18099 
18100       // Find a legal type for the integer store.
18101       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
18102       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
18103       if (TLI.isTypeLegal(StoreTy) &&
18104           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
18105                                DAG.getMachineFunction()) &&
18106           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18107                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
18108           IsFastSt &&
18109           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18110                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
18111           IsFastLd) {
18112         LastLegalIntegerType = i + 1;
18113         DoIntegerTruncate = false;
18114         // Or check whether a truncstore and extload is legal.
18115       } else if (TLI.getTypeAction(Context, StoreTy) ==
18116                  TargetLowering::TypePromoteInteger) {
18117         EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
18118         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
18119             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
18120                                  DAG.getMachineFunction()) &&
18121             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
18122             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
18123             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
18124             TLI.allowsMemoryAccess(Context, DL, StoreTy,
18125                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
18126             IsFastSt &&
18127             TLI.allowsMemoryAccess(Context, DL, StoreTy,
18128                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
18129             IsFastLd) {
18130           LastLegalIntegerType = i + 1;
18131           DoIntegerTruncate = true;
18132         }
18133       }
18134     }
18135 
18136     // Only use vector types if the vector type is larger than the integer
18137     // type. If they are the same, use integers.
18138     bool UseVectorTy =
18139         LastLegalVectorType > LastLegalIntegerType && AllowVectors;
18140     unsigned LastLegalType =
18141         std::max(LastLegalVectorType, LastLegalIntegerType);
18142 
18143     // We add +1 here because the LastXXX variables refer to location while
18144     // the NumElem refers to array/index size.
18145     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
18146     NumElem = std::min(LastLegalType, NumElem);
18147     Align FirstLoadAlign = FirstLoad->getAlign();
18148 
18149     if (NumElem < 2) {
18150       // We know that candidate stores are in order and of correct
18151       // shape. While there is no mergeable sequence from the
18152       // beginning one may start later in the sequence. The only
18153       // reason a merge of size N could have failed where another of
18154       // the same size would not have is if the alignment or either
18155       // the load or store has improved. Drop as many candidates as we
18156       // can here.
18157       unsigned NumSkip = 1;
18158       while ((NumSkip < LoadNodes.size()) &&
18159              (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
18160              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
18161         NumSkip++;
18162       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
18163       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
18164       NumConsecutiveStores -= NumSkip;
18165       continue;
18166     }
18167 
18168     // Check that we can merge these candidates without causing a cycle.
18169     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
18170                                                   RootNode)) {
18171       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18172       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
18173       NumConsecutiveStores -= NumElem;
18174       continue;
18175     }
18176 
18177     // Find if it is better to use vectors or integers to load and store
18178     // to memory.
18179     EVT JointMemOpVT;
18180     if (UseVectorTy) {
18181       // Find a legal type for the vector store.
18182       unsigned Elts = NumElem * NumMemElts;
18183       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18184     } else {
18185       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
18186       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
18187     }
18188 
18189     SDLoc LoadDL(LoadNodes[0].MemNode);
18190     SDLoc StoreDL(StoreNodes[0].MemNode);
18191 
18192     // The merged loads are required to have the same incoming chain, so
18193     // using the first's chain is acceptable.
18194 
18195     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
18196     AddToWorklist(NewStoreChain.getNode());
18197 
18198     MachineMemOperand::Flags LdMMOFlags =
18199         isDereferenceable ? MachineMemOperand::MODereferenceable
18200                           : MachineMemOperand::MONone;
18201     if (IsNonTemporalLoad)
18202       LdMMOFlags |= MachineMemOperand::MONonTemporal;
18203 
18204     MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
18205                                               ? MachineMemOperand::MONonTemporal
18206                                               : MachineMemOperand::MONone;
18207 
18208     SDValue NewLoad, NewStore;
18209     if (UseVectorTy || !DoIntegerTruncate) {
18210       NewLoad = DAG.getLoad(
18211           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
18212           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
18213       SDValue StoreOp = NewLoad;
18214       if (NeedRotate) {
18215         unsigned LoadWidth = ElementSizeBytes * 8 * 2;
18216         assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
18217                "Unexpected type for rotate-able load pair");
18218         SDValue RotAmt =
18219             DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
18220         // Target can convert to the identical ROTR if it does not have ROTL.
18221         StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
18222       }
18223       NewStore = DAG.getStore(
18224           NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
18225           FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
18226     } else { // This must be the truncstore/extload case
18227       EVT ExtendedTy =
18228           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
18229       NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
18230                                FirstLoad->getChain(), FirstLoad->getBasePtr(),
18231                                FirstLoad->getPointerInfo(), JointMemOpVT,
18232                                FirstLoadAlign, LdMMOFlags);
18233       NewStore = DAG.getTruncStore(
18234           NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
18235           FirstInChain->getPointerInfo(), JointMemOpVT,
18236           FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
18237     }
18238 
18239     // Transfer chain users from old loads to the new load.
18240     for (unsigned i = 0; i < NumElem; ++i) {
18241       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
18242       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
18243                                     SDValue(NewLoad.getNode(), 1));
18244     }
18245 
18246     // Replace all stores with the new store. Recursively remove corresponding
18247     // values if they are no longer used.
18248     for (unsigned i = 0; i < NumElem; ++i) {
18249       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
18250       CombineTo(StoreNodes[i].MemNode, NewStore);
18251       if (Val.getNode()->use_empty())
18252         recursivelyDeleteUnusedNodes(Val.getNode());
18253     }
18254 
18255     MadeChange = true;
18256     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18257     LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
18258     NumConsecutiveStores -= NumElem;
18259   }
18260   return MadeChange;
18261 }
18262 
18263 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
18264   if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
18265     return false;
18266 
18267   // TODO: Extend this function to merge stores of scalable vectors.
18268   // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
18269   // store since we know <vscale x 16 x i8> is exactly twice as large as
18270   // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
18271   EVT MemVT = St->getMemoryVT();
18272   if (MemVT.isScalableVector())
18273     return false;
18274   if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
18275     return false;
18276 
18277   // This function cannot currently deal with non-byte-sized memory sizes.
18278   int64_t ElementSizeBytes = MemVT.getStoreSize();
18279   if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
18280     return false;
18281 
18282   // Do not bother looking at stored values that are not constants, loads, or
18283   // extracted vector elements.
18284   SDValue StoredVal = peekThroughBitcasts(St->getValue());
18285   const StoreSource StoreSrc = getStoreSource(StoredVal);
18286   if (StoreSrc == StoreSource::Unknown)
18287     return false;
18288 
18289   SmallVector<MemOpLink, 8> StoreNodes;
18290   SDNode *RootNode;
18291   // Find potential store merge candidates by searching through chain sub-DAG
18292   getStoreMergeCandidates(St, StoreNodes, RootNode);
18293 
18294   // Check if there is anything to merge.
18295   if (StoreNodes.size() < 2)
18296     return false;
18297 
18298   // Sort the memory operands according to their distance from the
18299   // base pointer.
18300   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
18301     return LHS.OffsetFromBase < RHS.OffsetFromBase;
18302   });
18303 
18304   bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
18305       Attribute::NoImplicitFloat);
18306   bool IsNonTemporalStore = St->isNonTemporal();
18307   bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
18308                            cast<LoadSDNode>(StoredVal)->isNonTemporal();
18309 
18310   // Store Merge attempts to merge the lowest stores. This generally
18311   // works out as if successful, as the remaining stores are checked
18312   // after the first collection of stores is merged. However, in the
18313   // case that a non-mergeable store is found first, e.g., {p[-2],
18314   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
18315   // mergeable cases. To prevent this, we prune such stores from the
18316   // front of StoreNodes here.
18317   bool MadeChange = false;
18318   while (StoreNodes.size() > 1) {
18319     unsigned NumConsecutiveStores =
18320         getConsecutiveStores(StoreNodes, ElementSizeBytes);
18321     // There are no more stores in the list to examine.
18322     if (NumConsecutiveStores == 0)
18323       return MadeChange;
18324 
18325     // We have at least 2 consecutive stores. Try to merge them.
18326     assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
18327     switch (StoreSrc) {
18328     case StoreSource::Constant:
18329       MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
18330                                              MemVT, RootNode, AllowVectors);
18331       break;
18332 
18333     case StoreSource::Extract:
18334       MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
18335                                             MemVT, RootNode);
18336       break;
18337 
18338     case StoreSource::Load:
18339       MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
18340                                          MemVT, RootNode, AllowVectors,
18341                                          IsNonTemporalStore, IsNonTemporalLoad);
18342       break;
18343 
18344     default:
18345       llvm_unreachable("Unhandled store source type");
18346     }
18347   }
18348   return MadeChange;
18349 }
18350 
18351 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
18352   SDLoc SL(ST);
18353   SDValue ReplStore;
18354 
18355   // Replace the chain to avoid dependency.
18356   if (ST->isTruncatingStore()) {
18357     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
18358                                   ST->getBasePtr(), ST->getMemoryVT(),
18359                                   ST->getMemOperand());
18360   } else {
18361     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
18362                              ST->getMemOperand());
18363   }
18364 
18365   // Create token to keep both nodes around.
18366   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
18367                               MVT::Other, ST->getChain(), ReplStore);
18368 
18369   // Make sure the new and old chains are cleaned up.
18370   AddToWorklist(Token.getNode());
18371 
18372   // Don't add users to work list.
18373   return CombineTo(ST, Token, false);
18374 }
18375 
18376 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
18377   SDValue Value = ST->getValue();
18378   if (Value.getOpcode() == ISD::TargetConstantFP)
18379     return SDValue();
18380 
18381   if (!ISD::isNormalStore(ST))
18382     return SDValue();
18383 
18384   SDLoc DL(ST);
18385 
18386   SDValue Chain = ST->getChain();
18387   SDValue Ptr = ST->getBasePtr();
18388 
18389   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
18390 
18391   // NOTE: If the original store is volatile, this transform must not increase
18392   // the number of stores.  For example, on x86-32 an f64 can be stored in one
18393   // processor operation but an i64 (which is not legal) requires two.  So the
18394   // transform should not be done in this case.
18395 
18396   SDValue Tmp;
18397   switch (CFP->getSimpleValueType(0).SimpleTy) {
18398   default:
18399     llvm_unreachable("Unknown FP type");
18400   case MVT::f16:    // We don't do this for these yet.
18401   case MVT::f80:
18402   case MVT::f128:
18403   case MVT::ppcf128:
18404     return SDValue();
18405   case MVT::f32:
18406     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
18407         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18408       ;
18409       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
18410                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
18411                             MVT::i32);
18412       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
18413     }
18414 
18415     return SDValue();
18416   case MVT::f64:
18417     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
18418          ST->isSimple()) ||
18419         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
18420       ;
18421       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
18422                             getZExtValue(), SDLoc(CFP), MVT::i64);
18423       return DAG.getStore(Chain, DL, Tmp,
18424                           Ptr, ST->getMemOperand());
18425     }
18426 
18427     if (ST->isSimple() &&
18428         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18429       // Many FP stores are not made apparent until after legalize, e.g. for
18430       // argument passing.  Since this is so common, custom legalize the
18431       // 64-bit integer store into two 32-bit stores.
18432       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
18433       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
18434       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
18435       if (DAG.getDataLayout().isBigEndian())
18436         std::swap(Lo, Hi);
18437 
18438       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18439       AAMDNodes AAInfo = ST->getAAInfo();
18440 
18441       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18442                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
18443       Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
18444       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
18445                                  ST->getPointerInfo().getWithOffset(4),
18446                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
18447       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
18448                          St0, St1);
18449     }
18450 
18451     return SDValue();
18452   }
18453 }
18454 
18455 SDValue DAGCombiner::visitSTORE(SDNode *N) {
18456   StoreSDNode *ST  = cast<StoreSDNode>(N);
18457   SDValue Chain = ST->getChain();
18458   SDValue Value = ST->getValue();
18459   SDValue Ptr   = ST->getBasePtr();
18460 
18461   // If this is a store of a bit convert, store the input value if the
18462   // resultant store does not need a higher alignment than the original.
18463   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
18464       ST->isUnindexed()) {
18465     EVT SVT = Value.getOperand(0).getValueType();
18466     // If the store is volatile, we only want to change the store type if the
18467     // resulting store is legal. Otherwise we might increase the number of
18468     // memory accesses. We don't care if the original type was legal or not
18469     // as we assume software couldn't rely on the number of accesses of an
18470     // illegal type.
18471     // TODO: May be able to relax for unordered atomics (see D66309)
18472     if (((!LegalOperations && ST->isSimple()) ||
18473          TLI.isOperationLegal(ISD::STORE, SVT)) &&
18474         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
18475                                      DAG, *ST->getMemOperand())) {
18476       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18477                           ST->getMemOperand());
18478     }
18479   }
18480 
18481   // Turn 'store undef, Ptr' -> nothing.
18482   if (Value.isUndef() && ST->isUnindexed())
18483     return Chain;
18484 
18485   // Try to infer better alignment information than the store already has.
18486   if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
18487     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18488       if (*Alignment > ST->getAlign() &&
18489           isAligned(*Alignment, ST->getSrcValueOffset())) {
18490         SDValue NewStore =
18491             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
18492                               ST->getMemoryVT(), *Alignment,
18493                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
18494         // NewStore will always be N as we are only refining the alignment
18495         assert(NewStore.getNode() == N);
18496         (void)NewStore;
18497       }
18498     }
18499   }
18500 
18501   // Try transforming a pair floating point load / store ops to integer
18502   // load / store ops.
18503   if (SDValue NewST = TransformFPLoadStorePair(N))
18504     return NewST;
18505 
18506   // Try transforming several stores into STORE (BSWAP).
18507   if (SDValue Store = mergeTruncStores(ST))
18508     return Store;
18509 
18510   if (ST->isUnindexed()) {
18511     // Walk up chain skipping non-aliasing memory nodes, on this store and any
18512     // adjacent stores.
18513     if (findBetterNeighborChains(ST)) {
18514       // replaceStoreChain uses CombineTo, which handled all of the worklist
18515       // manipulation. Return the original node to not do anything else.
18516       return SDValue(ST, 0);
18517     }
18518     Chain = ST->getChain();
18519   }
18520 
18521   // FIXME: is there such a thing as a truncating indexed store?
18522   if (ST->isTruncatingStore() && ST->isUnindexed() &&
18523       Value.getValueType().isInteger() &&
18524       (!isa<ConstantSDNode>(Value) ||
18525        !cast<ConstantSDNode>(Value)->isOpaque())) {
18526     // Convert a truncating store of a extension into a standard store.
18527     if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
18528          Value.getOpcode() == ISD::SIGN_EXTEND ||
18529          Value.getOpcode() == ISD::ANY_EXTEND) &&
18530         Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
18531         TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
18532       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18533                           ST->getMemOperand());
18534 
18535     APInt TruncDemandedBits =
18536         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
18537                              ST->getMemoryVT().getScalarSizeInBits());
18538 
18539     // See if we can simplify the input to this truncstore with knowledge that
18540     // only the low bits are being used.  For example:
18541     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
18542     AddToWorklist(Value.getNode());
18543     if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
18544       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
18545                                ST->getMemOperand());
18546 
18547     // Otherwise, see if we can simplify the operation with
18548     // SimplifyDemandedBits, which only works if the value has a single use.
18549     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
18550       // Re-visit the store if anything changed and the store hasn't been merged
18551       // with another node (N is deleted) SimplifyDemandedBits will add Value's
18552       // node back to the worklist if necessary, but we also need to re-visit
18553       // the Store node itself.
18554       if (N->getOpcode() != ISD::DELETED_NODE)
18555         AddToWorklist(N);
18556       return SDValue(N, 0);
18557     }
18558   }
18559 
18560   // If this is a load followed by a store to the same location, then the store
18561   // is dead/noop.
18562   // TODO: Can relax for unordered atomics (see D66309)
18563   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
18564     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
18565         ST->isUnindexed() && ST->isSimple() &&
18566         Ld->getAddressSpace() == ST->getAddressSpace() &&
18567         // There can't be any side effects between the load and store, such as
18568         // a call or store.
18569         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
18570       // The store is dead, remove it.
18571       return Chain;
18572     }
18573   }
18574 
18575   // TODO: Can relax for unordered atomics (see D66309)
18576   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
18577     if (ST->isUnindexed() && ST->isSimple() &&
18578         ST1->isUnindexed() && ST1->isSimple()) {
18579       if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr &&
18580           ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
18581           ST->getAddressSpace() == ST1->getAddressSpace()) {
18582         // If this is a store followed by a store with the same value to the
18583         // same location, then the store is dead/noop.
18584         return Chain;
18585       }
18586 
18587       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
18588           !ST1->getBasePtr().isUndef() &&
18589           // BaseIndexOffset and the code below requires knowing the size
18590           // of a vector, so bail out if MemoryVT is scalable.
18591           !ST->getMemoryVT().isScalableVector() &&
18592           !ST1->getMemoryVT().isScalableVector() &&
18593           ST->getAddressSpace() == ST1->getAddressSpace()) {
18594         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
18595         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
18596         unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
18597         unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
18598         // If this is a store who's preceding store to a subset of the current
18599         // location and no one other node is chained to that store we can
18600         // effectively drop the store. Do not remove stores to undef as they may
18601         // be used as data sinks.
18602         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
18603           CombineTo(ST1, ST1->getChain());
18604           return SDValue();
18605         }
18606       }
18607     }
18608   }
18609 
18610   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
18611   // truncating store.  We can do this even if this is already a truncstore.
18612   if ((Value.getOpcode() == ISD::FP_ROUND ||
18613        Value.getOpcode() == ISD::TRUNCATE) &&
18614       Value.getNode()->hasOneUse() && ST->isUnindexed() &&
18615       TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
18616                                ST->getMemoryVT(), LegalOperations)) {
18617     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
18618                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
18619   }
18620 
18621   // Always perform this optimization before types are legal. If the target
18622   // prefers, also try this after legalization to catch stores that were created
18623   // by intrinsics or other nodes.
18624   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
18625     while (true) {
18626       // There can be multiple store sequences on the same chain.
18627       // Keep trying to merge store sequences until we are unable to do so
18628       // or until we merge the last store on the chain.
18629       bool Changed = mergeConsecutiveStores(ST);
18630       if (!Changed) break;
18631       // Return N as merge only uses CombineTo and no worklist clean
18632       // up is necessary.
18633       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
18634         return SDValue(N, 0);
18635     }
18636   }
18637 
18638   // Try transforming N to an indexed store.
18639   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18640     return SDValue(N, 0);
18641 
18642   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
18643   //
18644   // Make sure to do this only after attempting to merge stores in order to
18645   //  avoid changing the types of some subset of stores due to visit order,
18646   //  preventing their merging.
18647   if (isa<ConstantFPSDNode>(ST->getValue())) {
18648     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
18649       return NewSt;
18650   }
18651 
18652   if (SDValue NewSt = splitMergedValStore(ST))
18653     return NewSt;
18654 
18655   return ReduceLoadOpStoreWidth(N);
18656 }
18657 
18658 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
18659   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
18660   if (!LifetimeEnd->hasOffset())
18661     return SDValue();
18662 
18663   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
18664                                         LifetimeEnd->getOffset(), false);
18665 
18666   // We walk up the chains to find stores.
18667   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
18668   while (!Chains.empty()) {
18669     SDValue Chain = Chains.pop_back_val();
18670     if (!Chain.hasOneUse())
18671       continue;
18672     switch (Chain.getOpcode()) {
18673     case ISD::TokenFactor:
18674       for (unsigned Nops = Chain.getNumOperands(); Nops;)
18675         Chains.push_back(Chain.getOperand(--Nops));
18676       break;
18677     case ISD::LIFETIME_START:
18678     case ISD::LIFETIME_END:
18679       // We can forward past any lifetime start/end that can be proven not to
18680       // alias the node.
18681       if (!mayAlias(Chain.getNode(), N))
18682         Chains.push_back(Chain.getOperand(0));
18683       break;
18684     case ISD::STORE: {
18685       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
18686       // TODO: Can relax for unordered atomics (see D66309)
18687       if (!ST->isSimple() || ST->isIndexed())
18688         continue;
18689       const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
18690       // The bounds of a scalable store are not known until runtime, so this
18691       // store cannot be elided.
18692       if (StoreSize.isScalable())
18693         continue;
18694       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
18695       // If we store purely within object bounds just before its lifetime ends,
18696       // we can remove the store.
18697       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
18698                                    StoreSize.getFixedSize() * 8)) {
18699         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
18700                    dbgs() << "\nwithin LIFETIME_END of : ";
18701                    LifetimeEndBase.dump(); dbgs() << "\n");
18702         CombineTo(ST, ST->getChain());
18703         return SDValue(N, 0);
18704       }
18705     }
18706     }
18707   }
18708   return SDValue();
18709 }
18710 
18711 /// For the instruction sequence of store below, F and I values
18712 /// are bundled together as an i64 value before being stored into memory.
18713 /// Sometimes it is more efficent to generate separate stores for F and I,
18714 /// which can remove the bitwise instructions or sink them to colder places.
18715 ///
18716 ///   (store (or (zext (bitcast F to i32) to i64),
18717 ///              (shl (zext I to i64), 32)), addr)  -->
18718 ///   (store F, addr) and (store I, addr+4)
18719 ///
18720 /// Similarly, splitting for other merged store can also be beneficial, like:
18721 /// For pair of {i32, i32}, i64 store --> two i32 stores.
18722 /// For pair of {i32, i16}, i64 store --> two i32 stores.
18723 /// For pair of {i16, i16}, i32 store --> two i16 stores.
18724 /// For pair of {i16, i8},  i32 store --> two i16 stores.
18725 /// For pair of {i8, i8},   i16 store --> two i8 stores.
18726 ///
18727 /// We allow each target to determine specifically which kind of splitting is
18728 /// supported.
18729 ///
18730 /// The store patterns are commonly seen from the simple code snippet below
18731 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
18732 ///   void goo(const std::pair<int, float> &);
18733 ///   hoo() {
18734 ///     ...
18735 ///     goo(std::make_pair(tmp, ftmp));
18736 ///     ...
18737 ///   }
18738 ///
18739 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
18740   if (OptLevel == CodeGenOpt::None)
18741     return SDValue();
18742 
18743   // Can't change the number of memory accesses for a volatile store or break
18744   // atomicity for an atomic one.
18745   if (!ST->isSimple())
18746     return SDValue();
18747 
18748   SDValue Val = ST->getValue();
18749   SDLoc DL(ST);
18750 
18751   // Match OR operand.
18752   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
18753     return SDValue();
18754 
18755   // Match SHL operand and get Lower and Higher parts of Val.
18756   SDValue Op1 = Val.getOperand(0);
18757   SDValue Op2 = Val.getOperand(1);
18758   SDValue Lo, Hi;
18759   if (Op1.getOpcode() != ISD::SHL) {
18760     std::swap(Op1, Op2);
18761     if (Op1.getOpcode() != ISD::SHL)
18762       return SDValue();
18763   }
18764   Lo = Op2;
18765   Hi = Op1.getOperand(0);
18766   if (!Op1.hasOneUse())
18767     return SDValue();
18768 
18769   // Match shift amount to HalfValBitSize.
18770   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
18771   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
18772   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
18773     return SDValue();
18774 
18775   // Lo and Hi are zero-extended from int with size less equal than 32
18776   // to i64.
18777   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
18778       !Lo.getOperand(0).getValueType().isScalarInteger() ||
18779       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
18780       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
18781       !Hi.getOperand(0).getValueType().isScalarInteger() ||
18782       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
18783     return SDValue();
18784 
18785   // Use the EVT of low and high parts before bitcast as the input
18786   // of target query.
18787   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
18788                   ? Lo.getOperand(0).getValueType()
18789                   : Lo.getValueType();
18790   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
18791                    ? Hi.getOperand(0).getValueType()
18792                    : Hi.getValueType();
18793   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
18794     return SDValue();
18795 
18796   // Start to split store.
18797   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18798   AAMDNodes AAInfo = ST->getAAInfo();
18799 
18800   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
18801   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
18802   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
18803   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
18804 
18805   SDValue Chain = ST->getChain();
18806   SDValue Ptr = ST->getBasePtr();
18807   // Lower value store.
18808   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18809                              ST->getOriginalAlign(), MMOFlags, AAInfo);
18810   Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
18811   // Higher value store.
18812   SDValue St1 = DAG.getStore(
18813       St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
18814       ST->getOriginalAlign(), MMOFlags, AAInfo);
18815   return St1;
18816 }
18817 
18818 /// Convert a disguised subvector insertion into a shuffle:
18819 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
18820   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
18821          "Expected extract_vector_elt");
18822   SDValue InsertVal = N->getOperand(1);
18823   SDValue Vec = N->getOperand(0);
18824 
18825   // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
18826   // InsIndex)
18827   //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
18828   //   CONCAT_VECTORS.
18829   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
18830       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18831       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
18832     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
18833     ArrayRef<int> Mask = SVN->getMask();
18834 
18835     SDValue X = Vec.getOperand(0);
18836     SDValue Y = Vec.getOperand(1);
18837 
18838     // Vec's operand 0 is using indices from 0 to N-1 and
18839     // operand 1 from N to 2N - 1, where N is the number of
18840     // elements in the vectors.
18841     SDValue InsertVal0 = InsertVal.getOperand(0);
18842     int ElementOffset = -1;
18843 
18844     // We explore the inputs of the shuffle in order to see if we find the
18845     // source of the extract_vector_elt. If so, we can use it to modify the
18846     // shuffle rather than perform an insert_vector_elt.
18847     SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
18848     ArgWorkList.emplace_back(Mask.size(), Y);
18849     ArgWorkList.emplace_back(0, X);
18850 
18851     while (!ArgWorkList.empty()) {
18852       int ArgOffset;
18853       SDValue ArgVal;
18854       std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
18855 
18856       if (ArgVal == InsertVal0) {
18857         ElementOffset = ArgOffset;
18858         break;
18859       }
18860 
18861       // Peek through concat_vector.
18862       if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
18863         int CurrentArgOffset =
18864             ArgOffset + ArgVal.getValueType().getVectorNumElements();
18865         int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
18866         for (SDValue Op : reverse(ArgVal->ops())) {
18867           CurrentArgOffset -= Step;
18868           ArgWorkList.emplace_back(CurrentArgOffset, Op);
18869         }
18870 
18871         // Make sure we went through all the elements and did not screw up index
18872         // computation.
18873         assert(CurrentArgOffset == ArgOffset);
18874       }
18875     }
18876 
18877     if (ElementOffset != -1) {
18878       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
18879 
18880       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
18881       NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
18882       assert(NewMask[InsIndex] <
18883                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
18884              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
18885 
18886       SDValue LegalShuffle =
18887               TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
18888                                           Y, NewMask, DAG);
18889       if (LegalShuffle)
18890         return LegalShuffle;
18891     }
18892   }
18893 
18894   // insert_vector_elt V, (bitcast X from vector type), IdxC -->
18895   // bitcast(shuffle (bitcast V), (extended X), Mask)
18896   // Note: We do not use an insert_subvector node because that requires a
18897   // legal subvector type.
18898   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
18899       !InsertVal.getOperand(0).getValueType().isVector())
18900     return SDValue();
18901 
18902   SDValue SubVec = InsertVal.getOperand(0);
18903   SDValue DestVec = N->getOperand(0);
18904   EVT SubVecVT = SubVec.getValueType();
18905   EVT VT = DestVec.getValueType();
18906   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
18907   // If the source only has a single vector element, the cost of creating adding
18908   // it to a vector is likely to exceed the cost of a insert_vector_elt.
18909   if (NumSrcElts == 1)
18910     return SDValue();
18911   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
18912   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
18913 
18914   // Step 1: Create a shuffle mask that implements this insert operation. The
18915   // vector that we are inserting into will be operand 0 of the shuffle, so
18916   // those elements are just 'i'. The inserted subvector is in the first
18917   // positions of operand 1 of the shuffle. Example:
18918   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
18919   SmallVector<int, 16> Mask(NumMaskVals);
18920   for (unsigned i = 0; i != NumMaskVals; ++i) {
18921     if (i / NumSrcElts == InsIndex)
18922       Mask[i] = (i % NumSrcElts) + NumMaskVals;
18923     else
18924       Mask[i] = i;
18925   }
18926 
18927   // Bail out if the target can not handle the shuffle we want to create.
18928   EVT SubVecEltVT = SubVecVT.getVectorElementType();
18929   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
18930   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
18931     return SDValue();
18932 
18933   // Step 2: Create a wide vector from the inserted source vector by appending
18934   // undefined elements. This is the same size as our destination vector.
18935   SDLoc DL(N);
18936   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
18937   ConcatOps[0] = SubVec;
18938   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
18939 
18940   // Step 3: Shuffle in the padded subvector.
18941   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
18942   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
18943   AddToWorklist(PaddedSubV.getNode());
18944   AddToWorklist(DestVecBC.getNode());
18945   AddToWorklist(Shuf.getNode());
18946   return DAG.getBitcast(VT, Shuf);
18947 }
18948 
18949 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
18950   SDValue InVec = N->getOperand(0);
18951   SDValue InVal = N->getOperand(1);
18952   SDValue EltNo = N->getOperand(2);
18953   SDLoc DL(N);
18954 
18955   EVT VT = InVec.getValueType();
18956   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
18957 
18958   // Insert into out-of-bounds element is undefined.
18959   if (IndexC && VT.isFixedLengthVector() &&
18960       IndexC->getZExtValue() >= VT.getVectorNumElements())
18961     return DAG.getUNDEF(VT);
18962 
18963   // Remove redundant insertions:
18964   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
18965   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18966       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
18967     return InVec;
18968 
18969   if (!IndexC) {
18970     // If this is variable insert to undef vector, it might be better to splat:
18971     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
18972     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
18973       if (VT.isScalableVector())
18974         return DAG.getSplatVector(VT, DL, InVal);
18975       else {
18976         SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
18977         return DAG.getBuildVector(VT, DL, Ops);
18978       }
18979     }
18980     return SDValue();
18981   }
18982 
18983   if (VT.isScalableVector())
18984     return SDValue();
18985 
18986   unsigned NumElts = VT.getVectorNumElements();
18987 
18988   // We must know which element is being inserted for folds below here.
18989   unsigned Elt = IndexC->getZExtValue();
18990   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
18991     return Shuf;
18992 
18993   // Canonicalize insert_vector_elt dag nodes.
18994   // Example:
18995   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
18996   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
18997   //
18998   // Do this only if the child insert_vector node has one use; also
18999   // do this only if indices are both constants and Idx1 < Idx0.
19000   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
19001       && isa<ConstantSDNode>(InVec.getOperand(2))) {
19002     unsigned OtherElt = InVec.getConstantOperandVal(2);
19003     if (Elt < OtherElt) {
19004       // Swap nodes.
19005       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
19006                                   InVec.getOperand(0), InVal, EltNo);
19007       AddToWorklist(NewOp.getNode());
19008       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
19009                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
19010     }
19011   }
19012 
19013   // If we can't generate a legal BUILD_VECTOR, exit
19014   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
19015     return SDValue();
19016 
19017   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
19018   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
19019   // vector elements.
19020   SmallVector<SDValue, 8> Ops;
19021   // Do not combine these two vectors if the output vector will not replace
19022   // the input vector.
19023   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
19024     Ops.append(InVec.getNode()->op_begin(),
19025                InVec.getNode()->op_end());
19026   } else if (InVec.isUndef()) {
19027     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
19028   } else {
19029     return SDValue();
19030   }
19031   assert(Ops.size() == NumElts && "Unexpected vector size");
19032 
19033   // Insert the element
19034   if (Elt < Ops.size()) {
19035     // All the operands of BUILD_VECTOR must have the same type;
19036     // we enforce that here.
19037     EVT OpVT = Ops[0].getValueType();
19038     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
19039   }
19040 
19041   // Return the new vector
19042   return DAG.getBuildVector(VT, DL, Ops);
19043 }
19044 
19045 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
19046                                                   SDValue EltNo,
19047                                                   LoadSDNode *OriginalLoad) {
19048   assert(OriginalLoad->isSimple());
19049 
19050   EVT ResultVT = EVE->getValueType(0);
19051   EVT VecEltVT = InVecVT.getVectorElementType();
19052 
19053   // If the vector element type is not a multiple of a byte then we are unable
19054   // to correctly compute an address to load only the extracted element as a
19055   // scalar.
19056   if (!VecEltVT.isByteSized())
19057     return SDValue();
19058 
19059   ISD::LoadExtType ExtTy =
19060       ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
19061   if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
19062       !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
19063     return SDValue();
19064 
19065   Align Alignment = OriginalLoad->getAlign();
19066   MachinePointerInfo MPI;
19067   SDLoc DL(EVE);
19068   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
19069     int Elt = ConstEltNo->getZExtValue();
19070     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
19071     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
19072     Alignment = commonAlignment(Alignment, PtrOff);
19073   } else {
19074     // Discard the pointer info except the address space because the memory
19075     // operand can't represent this new access since the offset is variable.
19076     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
19077     Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
19078   }
19079 
19080   bool IsFast = false;
19081   if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
19082                               OriginalLoad->getAddressSpace(), Alignment,
19083                               OriginalLoad->getMemOperand()->getFlags(),
19084                               &IsFast) ||
19085       !IsFast)
19086     return SDValue();
19087 
19088   SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
19089                                                InVecVT, EltNo);
19090 
19091   // The replacement we need to do here is a little tricky: we need to
19092   // replace an extractelement of a load with a load.
19093   // Use ReplaceAllUsesOfValuesWith to do the replacement.
19094   // Note that this replacement assumes that the extractvalue is the only
19095   // use of the load; that's okay because we don't want to perform this
19096   // transformation in other cases anyway.
19097   SDValue Load;
19098   SDValue Chain;
19099   if (ResultVT.bitsGT(VecEltVT)) {
19100     // If the result type of vextract is wider than the load, then issue an
19101     // extending load instead.
19102     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
19103                                                   VecEltVT)
19104                                    ? ISD::ZEXTLOAD
19105                                    : ISD::EXTLOAD;
19106     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
19107                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
19108                           Alignment, OriginalLoad->getMemOperand()->getFlags(),
19109                           OriginalLoad->getAAInfo());
19110     Chain = Load.getValue(1);
19111   } else {
19112     Load = DAG.getLoad(
19113         VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
19114         OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
19115     Chain = Load.getValue(1);
19116     if (ResultVT.bitsLT(VecEltVT))
19117       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
19118     else
19119       Load = DAG.getBitcast(ResultVT, Load);
19120   }
19121   WorklistRemover DeadNodes(*this);
19122   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
19123   SDValue To[] = { Load, Chain };
19124   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
19125   // Make sure to revisit this node to clean it up; it will usually be dead.
19126   AddToWorklist(EVE);
19127   // Since we're explicitly calling ReplaceAllUses, add the new node to the
19128   // worklist explicitly as well.
19129   AddToWorklistWithUsers(Load.getNode());
19130   ++OpsNarrowed;
19131   return SDValue(EVE, 0);
19132 }
19133 
19134 /// Transform a vector binary operation into a scalar binary operation by moving
19135 /// the math/logic after an extract element of a vector.
19136 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
19137                                        bool LegalOperations) {
19138   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19139   SDValue Vec = ExtElt->getOperand(0);
19140   SDValue Index = ExtElt->getOperand(1);
19141   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19142   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
19143       Vec.getNode()->getNumValues() != 1)
19144     return SDValue();
19145 
19146   // Targets may want to avoid this to prevent an expensive register transfer.
19147   if (!TLI.shouldScalarizeBinop(Vec))
19148     return SDValue();
19149 
19150   // Extracting an element of a vector constant is constant-folded, so this
19151   // transform is just replacing a vector op with a scalar op while moving the
19152   // extract.
19153   SDValue Op0 = Vec.getOperand(0);
19154   SDValue Op1 = Vec.getOperand(1);
19155   if (isAnyConstantBuildVector(Op0, true) ||
19156       isAnyConstantBuildVector(Op1, true)) {
19157     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
19158     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
19159     SDLoc DL(ExtElt);
19160     EVT VT = ExtElt->getValueType(0);
19161     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
19162     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
19163     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
19164   }
19165 
19166   return SDValue();
19167 }
19168 
19169 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
19170   SDValue VecOp = N->getOperand(0);
19171   SDValue Index = N->getOperand(1);
19172   EVT ScalarVT = N->getValueType(0);
19173   EVT VecVT = VecOp.getValueType();
19174   if (VecOp.isUndef())
19175     return DAG.getUNDEF(ScalarVT);
19176 
19177   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
19178   //
19179   // This only really matters if the index is non-constant since other combines
19180   // on the constant elements already work.
19181   SDLoc DL(N);
19182   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
19183       Index == VecOp.getOperand(2)) {
19184     SDValue Elt = VecOp.getOperand(1);
19185     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
19186   }
19187 
19188   // (vextract (scalar_to_vector val, 0) -> val
19189   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19190     // Only 0'th element of SCALAR_TO_VECTOR is defined.
19191     if (DAG.isKnownNeverZero(Index))
19192       return DAG.getUNDEF(ScalarVT);
19193 
19194     // Check if the result type doesn't match the inserted element type. A
19195     // SCALAR_TO_VECTOR may truncate the inserted element and the
19196     // EXTRACT_VECTOR_ELT may widen the extracted vector.
19197     SDValue InOp = VecOp.getOperand(0);
19198     if (InOp.getValueType() != ScalarVT) {
19199       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
19200       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
19201     }
19202     return InOp;
19203   }
19204 
19205   // extract_vector_elt of out-of-bounds element -> UNDEF
19206   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19207   if (IndexC && VecVT.isFixedLengthVector() &&
19208       IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
19209     return DAG.getUNDEF(ScalarVT);
19210 
19211   // extract_vector_elt (build_vector x, y), 1 -> y
19212   if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
19213        VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
19214       TLI.isTypeLegal(VecVT) &&
19215       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
19216     assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
19217             VecVT.isFixedLengthVector()) &&
19218            "BUILD_VECTOR used for scalable vectors");
19219     unsigned IndexVal =
19220         VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
19221     SDValue Elt = VecOp.getOperand(IndexVal);
19222     EVT InEltVT = Elt.getValueType();
19223 
19224     // Sometimes build_vector's scalar input types do not match result type.
19225     if (ScalarVT == InEltVT)
19226       return Elt;
19227 
19228     // TODO: It may be useful to truncate if free if the build_vector implicitly
19229     // converts.
19230   }
19231 
19232   if (VecVT.isScalableVector())
19233     return SDValue();
19234 
19235   // All the code from this point onwards assumes fixed width vectors, but it's
19236   // possible that some of the combinations could be made to work for scalable
19237   // vectors too.
19238   unsigned NumElts = VecVT.getVectorNumElements();
19239   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
19240 
19241   // TODO: These transforms should not require the 'hasOneUse' restriction, but
19242   // there are regressions on multiple targets without it. We can end up with a
19243   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
19244   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
19245       VecOp.hasOneUse()) {
19246     // The vector index of the LSBs of the source depend on the endian-ness.
19247     bool IsLE = DAG.getDataLayout().isLittleEndian();
19248     unsigned ExtractIndex = IndexC->getZExtValue();
19249     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
19250     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
19251     SDValue BCSrc = VecOp.getOperand(0);
19252     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
19253       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
19254 
19255     if (LegalTypes && BCSrc.getValueType().isInteger() &&
19256         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19257       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
19258       // trunc i64 X to i32
19259       SDValue X = BCSrc.getOperand(0);
19260       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
19261              "Extract element and scalar to vector can't change element type "
19262              "from FP to integer.");
19263       unsigned XBitWidth = X.getValueSizeInBits();
19264       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
19265 
19266       // An extract element return value type can be wider than its vector
19267       // operand element type. In that case, the high bits are undefined, so
19268       // it's possible that we may need to extend rather than truncate.
19269       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
19270         assert(XBitWidth % VecEltBitWidth == 0 &&
19271                "Scalar bitwidth must be a multiple of vector element bitwidth");
19272         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
19273       }
19274     }
19275   }
19276 
19277   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
19278     return BO;
19279 
19280   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
19281   // We only perform this optimization before the op legalization phase because
19282   // we may introduce new vector instructions which are not backed by TD
19283   // patterns. For example on AVX, extracting elements from a wide vector
19284   // without using extract_subvector. However, if we can find an underlying
19285   // scalar value, then we can always use that.
19286   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
19287     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
19288     // Find the new index to extract from.
19289     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
19290 
19291     // Extracting an undef index is undef.
19292     if (OrigElt == -1)
19293       return DAG.getUNDEF(ScalarVT);
19294 
19295     // Select the right vector half to extract from.
19296     SDValue SVInVec;
19297     if (OrigElt < (int)NumElts) {
19298       SVInVec = VecOp.getOperand(0);
19299     } else {
19300       SVInVec = VecOp.getOperand(1);
19301       OrigElt -= NumElts;
19302     }
19303 
19304     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
19305       SDValue InOp = SVInVec.getOperand(OrigElt);
19306       if (InOp.getValueType() != ScalarVT) {
19307         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
19308         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
19309       }
19310 
19311       return InOp;
19312     }
19313 
19314     // FIXME: We should handle recursing on other vector shuffles and
19315     // scalar_to_vector here as well.
19316 
19317     if (!LegalOperations ||
19318         // FIXME: Should really be just isOperationLegalOrCustom.
19319         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
19320         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
19321       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
19322                          DAG.getVectorIdxConstant(OrigElt, DL));
19323     }
19324   }
19325 
19326   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
19327   // simplify it based on the (valid) extraction indices.
19328   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
19329         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19330                Use->getOperand(0) == VecOp &&
19331                isa<ConstantSDNode>(Use->getOperand(1));
19332       })) {
19333     APInt DemandedElts = APInt::getZero(NumElts);
19334     for (SDNode *Use : VecOp->uses()) {
19335       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
19336       if (CstElt->getAPIntValue().ult(NumElts))
19337         DemandedElts.setBit(CstElt->getZExtValue());
19338     }
19339     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
19340       // We simplified the vector operand of this extract element. If this
19341       // extract is not dead, visit it again so it is folded properly.
19342       if (N->getOpcode() != ISD::DELETED_NODE)
19343         AddToWorklist(N);
19344       return SDValue(N, 0);
19345     }
19346     APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
19347     if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
19348       // We simplified the vector operand of this extract element. If this
19349       // extract is not dead, visit it again so it is folded properly.
19350       if (N->getOpcode() != ISD::DELETED_NODE)
19351         AddToWorklist(N);
19352       return SDValue(N, 0);
19353     }
19354   }
19355 
19356   // Everything under here is trying to match an extract of a loaded value.
19357   // If the result of load has to be truncated, then it's not necessarily
19358   // profitable.
19359   bool BCNumEltsChanged = false;
19360   EVT ExtVT = VecVT.getVectorElementType();
19361   EVT LVT = ExtVT;
19362   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
19363     return SDValue();
19364 
19365   if (VecOp.getOpcode() == ISD::BITCAST) {
19366     // Don't duplicate a load with other uses.
19367     if (!VecOp.hasOneUse())
19368       return SDValue();
19369 
19370     EVT BCVT = VecOp.getOperand(0).getValueType();
19371     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
19372       return SDValue();
19373     if (NumElts != BCVT.getVectorNumElements())
19374       BCNumEltsChanged = true;
19375     VecOp = VecOp.getOperand(0);
19376     ExtVT = BCVT.getVectorElementType();
19377   }
19378 
19379   // extract (vector load $addr), i --> load $addr + i * size
19380   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
19381       ISD::isNormalLoad(VecOp.getNode()) &&
19382       !Index->hasPredecessor(VecOp.getNode())) {
19383     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
19384     if (VecLoad && VecLoad->isSimple())
19385       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
19386   }
19387 
19388   // Perform only after legalization to ensure build_vector / vector_shuffle
19389   // optimizations have already been done.
19390   if (!LegalOperations || !IndexC)
19391     return SDValue();
19392 
19393   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
19394   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
19395   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
19396   int Elt = IndexC->getZExtValue();
19397   LoadSDNode *LN0 = nullptr;
19398   if (ISD::isNormalLoad(VecOp.getNode())) {
19399     LN0 = cast<LoadSDNode>(VecOp);
19400   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
19401              VecOp.getOperand(0).getValueType() == ExtVT &&
19402              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
19403     // Don't duplicate a load with other uses.
19404     if (!VecOp.hasOneUse())
19405       return SDValue();
19406 
19407     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
19408   }
19409   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
19410     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
19411     // =>
19412     // (load $addr+1*size)
19413 
19414     // Don't duplicate a load with other uses.
19415     if (!VecOp.hasOneUse())
19416       return SDValue();
19417 
19418     // If the bit convert changed the number of elements, it is unsafe
19419     // to examine the mask.
19420     if (BCNumEltsChanged)
19421       return SDValue();
19422 
19423     // Select the input vector, guarding against out of range extract vector.
19424     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
19425     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
19426 
19427     if (VecOp.getOpcode() == ISD::BITCAST) {
19428       // Don't duplicate a load with other uses.
19429       if (!VecOp.hasOneUse())
19430         return SDValue();
19431 
19432       VecOp = VecOp.getOperand(0);
19433     }
19434     if (ISD::isNormalLoad(VecOp.getNode())) {
19435       LN0 = cast<LoadSDNode>(VecOp);
19436       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
19437       Index = DAG.getConstant(Elt, DL, Index.getValueType());
19438     }
19439   } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
19440              VecVT.getVectorElementType() == ScalarVT &&
19441              (!LegalTypes ||
19442               TLI.isTypeLegal(
19443                   VecOp.getOperand(0).getValueType().getVectorElementType()))) {
19444     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
19445     //      -> extract_vector_elt a, 0
19446     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
19447     //      -> extract_vector_elt a, 1
19448     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
19449     //      -> extract_vector_elt b, 0
19450     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
19451     //      -> extract_vector_elt b, 1
19452     SDLoc SL(N);
19453     EVT ConcatVT = VecOp.getOperand(0).getValueType();
19454     unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19455     SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
19456                                      Index.getValueType());
19457 
19458     SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
19459     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
19460                               ConcatVT.getVectorElementType(),
19461                               ConcatOp, NewIdx);
19462     return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
19463   }
19464 
19465   // Make sure we found a non-volatile load and the extractelement is
19466   // the only use.
19467   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
19468     return SDValue();
19469 
19470   // If Idx was -1 above, Elt is going to be -1, so just return undef.
19471   if (Elt == -1)
19472     return DAG.getUNDEF(LVT);
19473 
19474   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
19475 }
19476 
19477 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
19478 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
19479   // We perform this optimization post type-legalization because
19480   // the type-legalizer often scalarizes integer-promoted vectors.
19481   // Performing this optimization before may create bit-casts which
19482   // will be type-legalized to complex code sequences.
19483   // We perform this optimization only before the operation legalizer because we
19484   // may introduce illegal operations.
19485   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
19486     return SDValue();
19487 
19488   unsigned NumInScalars = N->getNumOperands();
19489   SDLoc DL(N);
19490   EVT VT = N->getValueType(0);
19491 
19492   // Check to see if this is a BUILD_VECTOR of a bunch of values
19493   // which come from any_extend or zero_extend nodes. If so, we can create
19494   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
19495   // optimizations. We do not handle sign-extend because we can't fill the sign
19496   // using shuffles.
19497   EVT SourceType = MVT::Other;
19498   bool AllAnyExt = true;
19499 
19500   for (unsigned i = 0; i != NumInScalars; ++i) {
19501     SDValue In = N->getOperand(i);
19502     // Ignore undef inputs.
19503     if (In.isUndef()) continue;
19504 
19505     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
19506     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
19507 
19508     // Abort if the element is not an extension.
19509     if (!ZeroExt && !AnyExt) {
19510       SourceType = MVT::Other;
19511       break;
19512     }
19513 
19514     // The input is a ZeroExt or AnyExt. Check the original type.
19515     EVT InTy = In.getOperand(0).getValueType();
19516 
19517     // Check that all of the widened source types are the same.
19518     if (SourceType == MVT::Other)
19519       // First time.
19520       SourceType = InTy;
19521     else if (InTy != SourceType) {
19522       // Multiple income types. Abort.
19523       SourceType = MVT::Other;
19524       break;
19525     }
19526 
19527     // Check if all of the extends are ANY_EXTENDs.
19528     AllAnyExt &= AnyExt;
19529   }
19530 
19531   // In order to have valid types, all of the inputs must be extended from the
19532   // same source type and all of the inputs must be any or zero extend.
19533   // Scalar sizes must be a power of two.
19534   EVT OutScalarTy = VT.getScalarType();
19535   bool ValidTypes = SourceType != MVT::Other &&
19536                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
19537                  isPowerOf2_32(SourceType.getSizeInBits());
19538 
19539   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
19540   // turn into a single shuffle instruction.
19541   if (!ValidTypes)
19542     return SDValue();
19543 
19544   // If we already have a splat buildvector, then don't fold it if it means
19545   // introducing zeros.
19546   if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
19547     return SDValue();
19548 
19549   bool isLE = DAG.getDataLayout().isLittleEndian();
19550   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
19551   assert(ElemRatio > 1 && "Invalid element size ratio");
19552   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
19553                                DAG.getConstant(0, DL, SourceType);
19554 
19555   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
19556   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
19557 
19558   // Populate the new build_vector
19559   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19560     SDValue Cast = N->getOperand(i);
19561     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
19562             Cast.getOpcode() == ISD::ZERO_EXTEND ||
19563             Cast.isUndef()) && "Invalid cast opcode");
19564     SDValue In;
19565     if (Cast.isUndef())
19566       In = DAG.getUNDEF(SourceType);
19567     else
19568       In = Cast->getOperand(0);
19569     unsigned Index = isLE ? (i * ElemRatio) :
19570                             (i * ElemRatio + (ElemRatio - 1));
19571 
19572     assert(Index < Ops.size() && "Invalid index");
19573     Ops[Index] = In;
19574   }
19575 
19576   // The type of the new BUILD_VECTOR node.
19577   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
19578   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
19579          "Invalid vector size");
19580   // Check if the new vector type is legal.
19581   if (!isTypeLegal(VecVT) ||
19582       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
19583        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
19584     return SDValue();
19585 
19586   // Make the new BUILD_VECTOR.
19587   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
19588 
19589   // The new BUILD_VECTOR node has the potential to be further optimized.
19590   AddToWorklist(BV.getNode());
19591   // Bitcast to the desired type.
19592   return DAG.getBitcast(VT, BV);
19593 }
19594 
19595 // Simplify (build_vec (trunc $1)
19596 //                     (trunc (srl $1 half-width))
19597 //                     (trunc (srl $1 (2 * half-width))) …)
19598 // to (bitcast $1)
19599 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
19600   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
19601 
19602   // Only for little endian
19603   if (!DAG.getDataLayout().isLittleEndian())
19604     return SDValue();
19605 
19606   SDLoc DL(N);
19607   EVT VT = N->getValueType(0);
19608   EVT OutScalarTy = VT.getScalarType();
19609   uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
19610 
19611   // Only for power of two types to be sure that bitcast works well
19612   if (!isPowerOf2_64(ScalarTypeBitsize))
19613     return SDValue();
19614 
19615   unsigned NumInScalars = N->getNumOperands();
19616 
19617   // Look through bitcasts
19618   auto PeekThroughBitcast = [](SDValue Op) {
19619     if (Op.getOpcode() == ISD::BITCAST)
19620       return Op.getOperand(0);
19621     return Op;
19622   };
19623 
19624   // The source value where all the parts are extracted.
19625   SDValue Src;
19626   for (unsigned i = 0; i != NumInScalars; ++i) {
19627     SDValue In = PeekThroughBitcast(N->getOperand(i));
19628     // Ignore undef inputs.
19629     if (In.isUndef()) continue;
19630 
19631     if (In.getOpcode() != ISD::TRUNCATE)
19632       return SDValue();
19633 
19634     In = PeekThroughBitcast(In.getOperand(0));
19635 
19636     if (In.getOpcode() != ISD::SRL) {
19637       // For now only build_vec without shuffling, handle shifts here in the
19638       // future.
19639       if (i != 0)
19640         return SDValue();
19641 
19642       Src = In;
19643     } else {
19644       // In is SRL
19645       SDValue part = PeekThroughBitcast(In.getOperand(0));
19646 
19647       if (!Src) {
19648         Src = part;
19649       } else if (Src != part) {
19650         // Vector parts do not stem from the same variable
19651         return SDValue();
19652       }
19653 
19654       SDValue ShiftAmtVal = In.getOperand(1);
19655       if (!isa<ConstantSDNode>(ShiftAmtVal))
19656         return SDValue();
19657 
19658       uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
19659 
19660       // The extracted value is not extracted at the right position
19661       if (ShiftAmt != i * ScalarTypeBitsize)
19662         return SDValue();
19663     }
19664   }
19665 
19666   // Only cast if the size is the same
19667   if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
19668     return SDValue();
19669 
19670   return DAG.getBitcast(VT, Src);
19671 }
19672 
19673 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
19674                                            ArrayRef<int> VectorMask,
19675                                            SDValue VecIn1, SDValue VecIn2,
19676                                            unsigned LeftIdx, bool DidSplitVec) {
19677   SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
19678 
19679   EVT VT = N->getValueType(0);
19680   EVT InVT1 = VecIn1.getValueType();
19681   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
19682 
19683   unsigned NumElems = VT.getVectorNumElements();
19684   unsigned ShuffleNumElems = NumElems;
19685 
19686   // If we artificially split a vector in two already, then the offsets in the
19687   // operands will all be based off of VecIn1, even those in VecIn2.
19688   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
19689 
19690   uint64_t VTSize = VT.getFixedSizeInBits();
19691   uint64_t InVT1Size = InVT1.getFixedSizeInBits();
19692   uint64_t InVT2Size = InVT2.getFixedSizeInBits();
19693 
19694   assert(InVT2Size <= InVT1Size &&
19695          "Inputs must be sorted to be in non-increasing vector size order.");
19696 
19697   // We can't generate a shuffle node with mismatched input and output types.
19698   // Try to make the types match the type of the output.
19699   if (InVT1 != VT || InVT2 != VT) {
19700     if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
19701       // If the output vector length is a multiple of both input lengths,
19702       // we can concatenate them and pad the rest with undefs.
19703       unsigned NumConcats = VTSize / InVT1Size;
19704       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
19705       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
19706       ConcatOps[0] = VecIn1;
19707       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
19708       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19709       VecIn2 = SDValue();
19710     } else if (InVT1Size == VTSize * 2) {
19711       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
19712         return SDValue();
19713 
19714       if (!VecIn2.getNode()) {
19715         // If we only have one input vector, and it's twice the size of the
19716         // output, split it in two.
19717         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
19718                              DAG.getVectorIdxConstant(NumElems, DL));
19719         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
19720         // Since we now have shorter input vectors, adjust the offset of the
19721         // second vector's start.
19722         Vec2Offset = NumElems;
19723       } else {
19724         assert(InVT2Size <= InVT1Size &&
19725                "Second input is not going to be larger than the first one.");
19726 
19727         // VecIn1 is wider than the output, and we have another, possibly
19728         // smaller input. Pad the smaller input with undefs, shuffle at the
19729         // input vector width, and extract the output.
19730         // The shuffle type is different than VT, so check legality again.
19731         if (LegalOperations &&
19732             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
19733           return SDValue();
19734 
19735         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
19736         // lower it back into a BUILD_VECTOR. So if the inserted type is
19737         // illegal, don't even try.
19738         if (InVT1 != InVT2) {
19739           if (!TLI.isTypeLegal(InVT2))
19740             return SDValue();
19741           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
19742                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
19743         }
19744         ShuffleNumElems = NumElems * 2;
19745       }
19746     } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
19747       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
19748       ConcatOps[0] = VecIn2;
19749       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19750     } else {
19751       // TODO: Support cases where the length mismatch isn't exactly by a
19752       // factor of 2.
19753       // TODO: Move this check upwards, so that if we have bad type
19754       // mismatches, we don't create any DAG nodes.
19755       return SDValue();
19756     }
19757   }
19758 
19759   // Initialize mask to undef.
19760   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
19761 
19762   // Only need to run up to the number of elements actually used, not the
19763   // total number of elements in the shuffle - if we are shuffling a wider
19764   // vector, the high lanes should be set to undef.
19765   for (unsigned i = 0; i != NumElems; ++i) {
19766     if (VectorMask[i] <= 0)
19767       continue;
19768 
19769     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
19770     if (VectorMask[i] == (int)LeftIdx) {
19771       Mask[i] = ExtIndex;
19772     } else if (VectorMask[i] == (int)LeftIdx + 1) {
19773       Mask[i] = Vec2Offset + ExtIndex;
19774     }
19775   }
19776 
19777   // The type the input vectors may have changed above.
19778   InVT1 = VecIn1.getValueType();
19779 
19780   // If we already have a VecIn2, it should have the same type as VecIn1.
19781   // If we don't, get an undef/zero vector of the appropriate type.
19782   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
19783   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
19784 
19785   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
19786   if (ShuffleNumElems > NumElems)
19787     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
19788 
19789   return Shuffle;
19790 }
19791 
19792 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
19793   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
19794 
19795   // First, determine where the build vector is not undef.
19796   // TODO: We could extend this to handle zero elements as well as undefs.
19797   int NumBVOps = BV->getNumOperands();
19798   int ZextElt = -1;
19799   for (int i = 0; i != NumBVOps; ++i) {
19800     SDValue Op = BV->getOperand(i);
19801     if (Op.isUndef())
19802       continue;
19803     if (ZextElt == -1)
19804       ZextElt = i;
19805     else
19806       return SDValue();
19807   }
19808   // Bail out if there's no non-undef element.
19809   if (ZextElt == -1)
19810     return SDValue();
19811 
19812   // The build vector contains some number of undef elements and exactly
19813   // one other element. That other element must be a zero-extended scalar
19814   // extracted from a vector at a constant index to turn this into a shuffle.
19815   // Also, require that the build vector does not implicitly truncate/extend
19816   // its elements.
19817   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
19818   EVT VT = BV->getValueType(0);
19819   SDValue Zext = BV->getOperand(ZextElt);
19820   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
19821       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19822       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
19823       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
19824     return SDValue();
19825 
19826   // The zero-extend must be a multiple of the source size, and we must be
19827   // building a vector of the same size as the source of the extract element.
19828   SDValue Extract = Zext.getOperand(0);
19829   unsigned DestSize = Zext.getValueSizeInBits();
19830   unsigned SrcSize = Extract.getValueSizeInBits();
19831   if (DestSize % SrcSize != 0 ||
19832       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
19833     return SDValue();
19834 
19835   // Create a shuffle mask that will combine the extracted element with zeros
19836   // and undefs.
19837   int ZextRatio = DestSize / SrcSize;
19838   int NumMaskElts = NumBVOps * ZextRatio;
19839   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
19840   for (int i = 0; i != NumMaskElts; ++i) {
19841     if (i / ZextRatio == ZextElt) {
19842       // The low bits of the (potentially translated) extracted element map to
19843       // the source vector. The high bits map to zero. We will use a zero vector
19844       // as the 2nd source operand of the shuffle, so use the 1st element of
19845       // that vector (mask value is number-of-elements) for the high bits.
19846       if (i % ZextRatio == 0)
19847         ShufMask[i] = Extract.getConstantOperandVal(1);
19848       else
19849         ShufMask[i] = NumMaskElts;
19850     }
19851 
19852     // Undef elements of the build vector remain undef because we initialize
19853     // the shuffle mask with -1.
19854   }
19855 
19856   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
19857   // bitcast (shuffle V, ZeroVec, VectorMask)
19858   SDLoc DL(BV);
19859   EVT VecVT = Extract.getOperand(0).getValueType();
19860   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
19861   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19862   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
19863                                              ZeroVec, ShufMask, DAG);
19864   if (!Shuf)
19865     return SDValue();
19866   return DAG.getBitcast(VT, Shuf);
19867 }
19868 
19869 // FIXME: promote to STLExtras.
19870 template <typename R, typename T>
19871 static auto getFirstIndexOf(R &&Range, const T &Val) {
19872   auto I = find(Range, Val);
19873   if (I == Range.end())
19874     return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
19875   return std::distance(Range.begin(), I);
19876 }
19877 
19878 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
19879 // operations. If the types of the vectors we're extracting from allow it,
19880 // turn this into a vector_shuffle node.
19881 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
19882   SDLoc DL(N);
19883   EVT VT = N->getValueType(0);
19884 
19885   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
19886   if (!isTypeLegal(VT))
19887     return SDValue();
19888 
19889   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
19890     return V;
19891 
19892   // May only combine to shuffle after legalize if shuffle is legal.
19893   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
19894     return SDValue();
19895 
19896   bool UsesZeroVector = false;
19897   unsigned NumElems = N->getNumOperands();
19898 
19899   // Record, for each element of the newly built vector, which input vector
19900   // that element comes from. -1 stands for undef, 0 for the zero vector,
19901   // and positive values for the input vectors.
19902   // VectorMask maps each element to its vector number, and VecIn maps vector
19903   // numbers to their initial SDValues.
19904 
19905   SmallVector<int, 8> VectorMask(NumElems, -1);
19906   SmallVector<SDValue, 8> VecIn;
19907   VecIn.push_back(SDValue());
19908 
19909   for (unsigned i = 0; i != NumElems; ++i) {
19910     SDValue Op = N->getOperand(i);
19911 
19912     if (Op.isUndef())
19913       continue;
19914 
19915     // See if we can use a blend with a zero vector.
19916     // TODO: Should we generalize this to a blend with an arbitrary constant
19917     // vector?
19918     if (isNullConstant(Op) || isNullFPConstant(Op)) {
19919       UsesZeroVector = true;
19920       VectorMask[i] = 0;
19921       continue;
19922     }
19923 
19924     // Not an undef or zero. If the input is something other than an
19925     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
19926     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19927         !isa<ConstantSDNode>(Op.getOperand(1)))
19928       return SDValue();
19929     SDValue ExtractedFromVec = Op.getOperand(0);
19930 
19931     if (ExtractedFromVec.getValueType().isScalableVector())
19932       return SDValue();
19933 
19934     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
19935     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
19936       return SDValue();
19937 
19938     // All inputs must have the same element type as the output.
19939     if (VT.getVectorElementType() !=
19940         ExtractedFromVec.getValueType().getVectorElementType())
19941       return SDValue();
19942 
19943     // Have we seen this input vector before?
19944     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
19945     // a map back from SDValues to numbers isn't worth it.
19946     int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
19947     if (Idx == -1) { // A new source vector?
19948       Idx = VecIn.size();
19949       VecIn.push_back(ExtractedFromVec);
19950     }
19951 
19952     VectorMask[i] = Idx;
19953   }
19954 
19955   // If we didn't find at least one input vector, bail out.
19956   if (VecIn.size() < 2)
19957     return SDValue();
19958 
19959   // If all the Operands of BUILD_VECTOR extract from same
19960   // vector, then split the vector efficiently based on the maximum
19961   // vector access index and adjust the VectorMask and
19962   // VecIn accordingly.
19963   bool DidSplitVec = false;
19964   if (VecIn.size() == 2) {
19965     unsigned MaxIndex = 0;
19966     unsigned NearestPow2 = 0;
19967     SDValue Vec = VecIn.back();
19968     EVT InVT = Vec.getValueType();
19969     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
19970 
19971     for (unsigned i = 0; i < NumElems; i++) {
19972       if (VectorMask[i] <= 0)
19973         continue;
19974       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
19975       IndexVec[i] = Index;
19976       MaxIndex = std::max(MaxIndex, Index);
19977     }
19978 
19979     NearestPow2 = PowerOf2Ceil(MaxIndex);
19980     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
19981         NumElems * 2 < NearestPow2) {
19982       unsigned SplitSize = NearestPow2 / 2;
19983       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
19984                                      InVT.getVectorElementType(), SplitSize);
19985       if (TLI.isTypeLegal(SplitVT) &&
19986           SplitSize + SplitVT.getVectorNumElements() <=
19987               InVT.getVectorNumElements()) {
19988         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19989                                      DAG.getVectorIdxConstant(SplitSize, DL));
19990         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19991                                      DAG.getVectorIdxConstant(0, DL));
19992         VecIn.pop_back();
19993         VecIn.push_back(VecIn1);
19994         VecIn.push_back(VecIn2);
19995         DidSplitVec = true;
19996 
19997         for (unsigned i = 0; i < NumElems; i++) {
19998           if (VectorMask[i] <= 0)
19999             continue;
20000           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
20001         }
20002       }
20003     }
20004   }
20005 
20006   // Sort input vectors by decreasing vector element count,
20007   // while preserving the relative order of equally-sized vectors.
20008   // Note that we keep the first "implicit zero vector as-is.
20009   SmallVector<SDValue, 8> SortedVecIn(VecIn);
20010   llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
20011                     [](const SDValue &a, const SDValue &b) {
20012                       return a.getValueType().getVectorNumElements() >
20013                              b.getValueType().getVectorNumElements();
20014                     });
20015 
20016   // We now also need to rebuild the VectorMask, because it referenced element
20017   // order in VecIn, and we just sorted them.
20018   for (int &SourceVectorIndex : VectorMask) {
20019     if (SourceVectorIndex <= 0)
20020       continue;
20021     unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
20022     assert(Idx > 0 && Idx < SortedVecIn.size() &&
20023            VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
20024     SourceVectorIndex = Idx;
20025   }
20026 
20027   VecIn = std::move(SortedVecIn);
20028 
20029   // TODO: Should this fire if some of the input vectors has illegal type (like
20030   // it does now), or should we let legalization run its course first?
20031 
20032   // Shuffle phase:
20033   // Take pairs of vectors, and shuffle them so that the result has elements
20034   // from these vectors in the correct places.
20035   // For example, given:
20036   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
20037   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
20038   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
20039   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
20040   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
20041   // We will generate:
20042   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
20043   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
20044   SmallVector<SDValue, 4> Shuffles;
20045   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
20046     unsigned LeftIdx = 2 * In + 1;
20047     SDValue VecLeft = VecIn[LeftIdx];
20048     SDValue VecRight =
20049         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
20050 
20051     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
20052                                                 VecRight, LeftIdx, DidSplitVec))
20053       Shuffles.push_back(Shuffle);
20054     else
20055       return SDValue();
20056   }
20057 
20058   // If we need the zero vector as an "ingredient" in the blend tree, add it
20059   // to the list of shuffles.
20060   if (UsesZeroVector)
20061     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
20062                                       : DAG.getConstantFP(0.0, DL, VT));
20063 
20064   // If we only have one shuffle, we're done.
20065   if (Shuffles.size() == 1)
20066     return Shuffles[0];
20067 
20068   // Update the vector mask to point to the post-shuffle vectors.
20069   for (int &Vec : VectorMask)
20070     if (Vec == 0)
20071       Vec = Shuffles.size() - 1;
20072     else
20073       Vec = (Vec - 1) / 2;
20074 
20075   // More than one shuffle. Generate a binary tree of blends, e.g. if from
20076   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
20077   // generate:
20078   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
20079   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
20080   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
20081   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
20082   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
20083   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
20084   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
20085 
20086   // Make sure the initial size of the shuffle list is even.
20087   if (Shuffles.size() % 2)
20088     Shuffles.push_back(DAG.getUNDEF(VT));
20089 
20090   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
20091     if (CurSize % 2) {
20092       Shuffles[CurSize] = DAG.getUNDEF(VT);
20093       CurSize++;
20094     }
20095     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
20096       int Left = 2 * In;
20097       int Right = 2 * In + 1;
20098       SmallVector<int, 8> Mask(NumElems, -1);
20099       for (unsigned i = 0; i != NumElems; ++i) {
20100         if (VectorMask[i] == Left) {
20101           Mask[i] = i;
20102           VectorMask[i] = In;
20103         } else if (VectorMask[i] == Right) {
20104           Mask[i] = i + NumElems;
20105           VectorMask[i] = In;
20106         }
20107       }
20108 
20109       Shuffles[In] =
20110           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
20111     }
20112   }
20113   return Shuffles[0];
20114 }
20115 
20116 // Try to turn a build vector of zero extends of extract vector elts into a
20117 // a vector zero extend and possibly an extract subvector.
20118 // TODO: Support sign extend?
20119 // TODO: Allow undef elements?
20120 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
20121   if (LegalOperations)
20122     return SDValue();
20123 
20124   EVT VT = N->getValueType(0);
20125 
20126   bool FoundZeroExtend = false;
20127   SDValue Op0 = N->getOperand(0);
20128   auto checkElem = [&](SDValue Op) -> int64_t {
20129     unsigned Opc = Op.getOpcode();
20130     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
20131     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
20132         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20133         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
20134       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
20135         return C->getZExtValue();
20136     return -1;
20137   };
20138 
20139   // Make sure the first element matches
20140   // (zext (extract_vector_elt X, C))
20141   // Offset must be a constant multiple of the
20142   // known-minimum vector length of the result type.
20143   int64_t Offset = checkElem(Op0);
20144   if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
20145     return SDValue();
20146 
20147   unsigned NumElems = N->getNumOperands();
20148   SDValue In = Op0.getOperand(0).getOperand(0);
20149   EVT InSVT = In.getValueType().getScalarType();
20150   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
20151 
20152   // Don't create an illegal input type after type legalization.
20153   if (LegalTypes && !TLI.isTypeLegal(InVT))
20154     return SDValue();
20155 
20156   // Ensure all the elements come from the same vector and are adjacent.
20157   for (unsigned i = 1; i != NumElems; ++i) {
20158     if ((Offset + i) != checkElem(N->getOperand(i)))
20159       return SDValue();
20160   }
20161 
20162   SDLoc DL(N);
20163   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
20164                    Op0.getOperand(0).getOperand(1));
20165   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
20166                      VT, In);
20167 }
20168 
20169 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
20170   EVT VT = N->getValueType(0);
20171 
20172   // A vector built entirely of undefs is undef.
20173   if (ISD::allOperandsUndef(N))
20174     return DAG.getUNDEF(VT);
20175 
20176   // If this is a splat of a bitcast from another vector, change to a
20177   // concat_vector.
20178   // For example:
20179   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
20180   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
20181   //
20182   // If X is a build_vector itself, the concat can become a larger build_vector.
20183   // TODO: Maybe this is useful for non-splat too?
20184   if (!LegalOperations) {
20185     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
20186       Splat = peekThroughBitcasts(Splat);
20187       EVT SrcVT = Splat.getValueType();
20188       if (SrcVT.isVector()) {
20189         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
20190         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
20191                                      SrcVT.getVectorElementType(), NumElts);
20192         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
20193           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
20194           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
20195                                        NewVT, Ops);
20196           return DAG.getBitcast(VT, Concat);
20197         }
20198       }
20199     }
20200   }
20201 
20202   // Check if we can express BUILD VECTOR via subvector extract.
20203   if (!LegalTypes && (N->getNumOperands() > 1)) {
20204     SDValue Op0 = N->getOperand(0);
20205     auto checkElem = [&](SDValue Op) -> uint64_t {
20206       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
20207           (Op0.getOperand(0) == Op.getOperand(0)))
20208         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
20209           return CNode->getZExtValue();
20210       return -1;
20211     };
20212 
20213     int Offset = checkElem(Op0);
20214     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
20215       if (Offset + i != checkElem(N->getOperand(i))) {
20216         Offset = -1;
20217         break;
20218       }
20219     }
20220 
20221     if ((Offset == 0) &&
20222         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
20223       return Op0.getOperand(0);
20224     if ((Offset != -1) &&
20225         ((Offset % N->getValueType(0).getVectorNumElements()) ==
20226          0)) // IDX must be multiple of output size.
20227       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
20228                          Op0.getOperand(0), Op0.getOperand(1));
20229   }
20230 
20231   if (SDValue V = convertBuildVecZextToZext(N))
20232     return V;
20233 
20234   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
20235     return V;
20236 
20237   if (SDValue V = reduceBuildVecTruncToBitCast(N))
20238     return V;
20239 
20240   if (SDValue V = reduceBuildVecToShuffle(N))
20241     return V;
20242 
20243   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
20244   // Do this late as some of the above may replace the splat.
20245   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
20246     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
20247       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
20248       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
20249     }
20250 
20251   return SDValue();
20252 }
20253 
20254 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
20255   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20256   EVT OpVT = N->getOperand(0).getValueType();
20257 
20258   // If the operands are legal vectors, leave them alone.
20259   if (TLI.isTypeLegal(OpVT))
20260     return SDValue();
20261 
20262   SDLoc DL(N);
20263   EVT VT = N->getValueType(0);
20264   SmallVector<SDValue, 8> Ops;
20265 
20266   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
20267   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
20268 
20269   // Keep track of what we encounter.
20270   bool AnyInteger = false;
20271   bool AnyFP = false;
20272   for (const SDValue &Op : N->ops()) {
20273     if (ISD::BITCAST == Op.getOpcode() &&
20274         !Op.getOperand(0).getValueType().isVector())
20275       Ops.push_back(Op.getOperand(0));
20276     else if (ISD::UNDEF == Op.getOpcode())
20277       Ops.push_back(ScalarUndef);
20278     else
20279       return SDValue();
20280 
20281     // Note whether we encounter an integer or floating point scalar.
20282     // If it's neither, bail out, it could be something weird like x86mmx.
20283     EVT LastOpVT = Ops.back().getValueType();
20284     if (LastOpVT.isFloatingPoint())
20285       AnyFP = true;
20286     else if (LastOpVT.isInteger())
20287       AnyInteger = true;
20288     else
20289       return SDValue();
20290   }
20291 
20292   // If any of the operands is a floating point scalar bitcast to a vector,
20293   // use floating point types throughout, and bitcast everything.
20294   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
20295   if (AnyFP) {
20296     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
20297     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
20298     if (AnyInteger) {
20299       for (SDValue &Op : Ops) {
20300         if (Op.getValueType() == SVT)
20301           continue;
20302         if (Op.isUndef())
20303           Op = ScalarUndef;
20304         else
20305           Op = DAG.getBitcast(SVT, Op);
20306       }
20307     }
20308   }
20309 
20310   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
20311                                VT.getSizeInBits() / SVT.getSizeInBits());
20312   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
20313 }
20314 
20315 // Attempt to merge nested concat_vectors/undefs.
20316 // Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
20317 //  --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
20318 static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
20319                                                   SelectionDAG &DAG) {
20320   EVT VT = N->getValueType(0);
20321 
20322   // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
20323   EVT SubVT;
20324   SDValue FirstConcat;
20325   for (const SDValue &Op : N->ops()) {
20326     if (Op.isUndef())
20327       continue;
20328     if (Op.getOpcode() != ISD::CONCAT_VECTORS)
20329       return SDValue();
20330     if (!FirstConcat) {
20331       SubVT = Op.getOperand(0).getValueType();
20332       if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
20333         return SDValue();
20334       FirstConcat = Op;
20335       continue;
20336     }
20337     if (SubVT != Op.getOperand(0).getValueType())
20338       return SDValue();
20339   }
20340   assert(FirstConcat && "Concat of all-undefs found");
20341 
20342   SmallVector<SDValue> ConcatOps;
20343   for (const SDValue &Op : N->ops()) {
20344     if (Op.isUndef()) {
20345       ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
20346       continue;
20347     }
20348     ConcatOps.append(Op->op_begin(), Op->op_end());
20349   }
20350   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
20351 }
20352 
20353 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
20354 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
20355 // most two distinct vectors the same size as the result, attempt to turn this
20356 // into a legal shuffle.
20357 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
20358   EVT VT = N->getValueType(0);
20359   EVT OpVT = N->getOperand(0).getValueType();
20360 
20361   // We currently can't generate an appropriate shuffle for a scalable vector.
20362   if (VT.isScalableVector())
20363     return SDValue();
20364 
20365   int NumElts = VT.getVectorNumElements();
20366   int NumOpElts = OpVT.getVectorNumElements();
20367 
20368   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
20369   SmallVector<int, 8> Mask;
20370 
20371   for (SDValue Op : N->ops()) {
20372     Op = peekThroughBitcasts(Op);
20373 
20374     // UNDEF nodes convert to UNDEF shuffle mask values.
20375     if (Op.isUndef()) {
20376       Mask.append((unsigned)NumOpElts, -1);
20377       continue;
20378     }
20379 
20380     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20381       return SDValue();
20382 
20383     // What vector are we extracting the subvector from and at what index?
20384     SDValue ExtVec = Op.getOperand(0);
20385     int ExtIdx = Op.getConstantOperandVal(1);
20386 
20387     // We want the EVT of the original extraction to correctly scale the
20388     // extraction index.
20389     EVT ExtVT = ExtVec.getValueType();
20390     ExtVec = peekThroughBitcasts(ExtVec);
20391 
20392     // UNDEF nodes convert to UNDEF shuffle mask values.
20393     if (ExtVec.isUndef()) {
20394       Mask.append((unsigned)NumOpElts, -1);
20395       continue;
20396     }
20397 
20398     // Ensure that we are extracting a subvector from a vector the same
20399     // size as the result.
20400     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
20401       return SDValue();
20402 
20403     // Scale the subvector index to account for any bitcast.
20404     int NumExtElts = ExtVT.getVectorNumElements();
20405     if (0 == (NumExtElts % NumElts))
20406       ExtIdx /= (NumExtElts / NumElts);
20407     else if (0 == (NumElts % NumExtElts))
20408       ExtIdx *= (NumElts / NumExtElts);
20409     else
20410       return SDValue();
20411 
20412     // At most we can reference 2 inputs in the final shuffle.
20413     if (SV0.isUndef() || SV0 == ExtVec) {
20414       SV0 = ExtVec;
20415       for (int i = 0; i != NumOpElts; ++i)
20416         Mask.push_back(i + ExtIdx);
20417     } else if (SV1.isUndef() || SV1 == ExtVec) {
20418       SV1 = ExtVec;
20419       for (int i = 0; i != NumOpElts; ++i)
20420         Mask.push_back(i + ExtIdx + NumElts);
20421     } else {
20422       return SDValue();
20423     }
20424   }
20425 
20426   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20427   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
20428                                      DAG.getBitcast(VT, SV1), Mask, DAG);
20429 }
20430 
20431 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
20432   unsigned CastOpcode = N->getOperand(0).getOpcode();
20433   switch (CastOpcode) {
20434   case ISD::SINT_TO_FP:
20435   case ISD::UINT_TO_FP:
20436   case ISD::FP_TO_SINT:
20437   case ISD::FP_TO_UINT:
20438     // TODO: Allow more opcodes?
20439     //  case ISD::BITCAST:
20440     //  case ISD::TRUNCATE:
20441     //  case ISD::ZERO_EXTEND:
20442     //  case ISD::SIGN_EXTEND:
20443     //  case ISD::FP_EXTEND:
20444     break;
20445   default:
20446     return SDValue();
20447   }
20448 
20449   EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
20450   if (!SrcVT.isVector())
20451     return SDValue();
20452 
20453   // All operands of the concat must be the same kind of cast from the same
20454   // source type.
20455   SmallVector<SDValue, 4> SrcOps;
20456   for (SDValue Op : N->ops()) {
20457     if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
20458         Op.getOperand(0).getValueType() != SrcVT)
20459       return SDValue();
20460     SrcOps.push_back(Op.getOperand(0));
20461   }
20462 
20463   // The wider cast must be supported by the target. This is unusual because
20464   // the operation support type parameter depends on the opcode. In addition,
20465   // check the other type in the cast to make sure this is really legal.
20466   EVT VT = N->getValueType(0);
20467   EVT SrcEltVT = SrcVT.getVectorElementType();
20468   ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
20469   EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
20470   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20471   switch (CastOpcode) {
20472   case ISD::SINT_TO_FP:
20473   case ISD::UINT_TO_FP:
20474     if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
20475         !TLI.isTypeLegal(VT))
20476       return SDValue();
20477     break;
20478   case ISD::FP_TO_SINT:
20479   case ISD::FP_TO_UINT:
20480     if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
20481         !TLI.isTypeLegal(ConcatSrcVT))
20482       return SDValue();
20483     break;
20484   default:
20485     llvm_unreachable("Unexpected cast opcode");
20486   }
20487 
20488   // concat (cast X), (cast Y)... -> cast (concat X, Y...)
20489   SDLoc DL(N);
20490   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
20491   return DAG.getNode(CastOpcode, DL, VT, NewConcat);
20492 }
20493 
20494 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
20495   // If we only have one input vector, we don't need to do any concatenation.
20496   if (N->getNumOperands() == 1)
20497     return N->getOperand(0);
20498 
20499   // Check if all of the operands are undefs.
20500   EVT VT = N->getValueType(0);
20501   if (ISD::allOperandsUndef(N))
20502     return DAG.getUNDEF(VT);
20503 
20504   // Optimize concat_vectors where all but the first of the vectors are undef.
20505   if (all_of(drop_begin(N->ops()),
20506              [](const SDValue &Op) { return Op.isUndef(); })) {
20507     SDValue In = N->getOperand(0);
20508     assert(In.getValueType().isVector() && "Must concat vectors");
20509 
20510     // If the input is a concat_vectors, just make a larger concat by padding
20511     // with smaller undefs.
20512     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
20513       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
20514       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
20515       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
20516       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20517     }
20518 
20519     SDValue Scalar = peekThroughOneUseBitcasts(In);
20520 
20521     // concat_vectors(scalar_to_vector(scalar), undef) ->
20522     //     scalar_to_vector(scalar)
20523     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
20524          Scalar.hasOneUse()) {
20525       EVT SVT = Scalar.getValueType().getVectorElementType();
20526       if (SVT == Scalar.getOperand(0).getValueType())
20527         Scalar = Scalar.getOperand(0);
20528     }
20529 
20530     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
20531     if (!Scalar.getValueType().isVector()) {
20532       // If the bitcast type isn't legal, it might be a trunc of a legal type;
20533       // look through the trunc so we can still do the transform:
20534       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
20535       if (Scalar->getOpcode() == ISD::TRUNCATE &&
20536           !TLI.isTypeLegal(Scalar.getValueType()) &&
20537           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
20538         Scalar = Scalar->getOperand(0);
20539 
20540       EVT SclTy = Scalar.getValueType();
20541 
20542       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
20543         return SDValue();
20544 
20545       // Bail out if the vector size is not a multiple of the scalar size.
20546       if (VT.getSizeInBits() % SclTy.getSizeInBits())
20547         return SDValue();
20548 
20549       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
20550       if (VNTNumElms < 2)
20551         return SDValue();
20552 
20553       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
20554       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
20555         return SDValue();
20556 
20557       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
20558       return DAG.getBitcast(VT, Res);
20559     }
20560   }
20561 
20562   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
20563   // We have already tested above for an UNDEF only concatenation.
20564   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
20565   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
20566   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
20567     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
20568   };
20569   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
20570     SmallVector<SDValue, 8> Opnds;
20571     EVT SVT = VT.getScalarType();
20572 
20573     EVT MinVT = SVT;
20574     if (!SVT.isFloatingPoint()) {
20575       // If BUILD_VECTOR are from built from integer, they may have different
20576       // operand types. Get the smallest type and truncate all operands to it.
20577       bool FoundMinVT = false;
20578       for (const SDValue &Op : N->ops())
20579         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20580           EVT OpSVT = Op.getOperand(0).getValueType();
20581           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
20582           FoundMinVT = true;
20583         }
20584       assert(FoundMinVT && "Concat vector type mismatch");
20585     }
20586 
20587     for (const SDValue &Op : N->ops()) {
20588       EVT OpVT = Op.getValueType();
20589       unsigned NumElts = OpVT.getVectorNumElements();
20590 
20591       if (ISD::UNDEF == Op.getOpcode())
20592         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
20593 
20594       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20595         if (SVT.isFloatingPoint()) {
20596           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
20597           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
20598         } else {
20599           for (unsigned i = 0; i != NumElts; ++i)
20600             Opnds.push_back(
20601                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
20602         }
20603       }
20604     }
20605 
20606     assert(VT.getVectorNumElements() == Opnds.size() &&
20607            "Concat vector type mismatch");
20608     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
20609   }
20610 
20611   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
20612   // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
20613   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
20614     return V;
20615 
20616   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
20617     // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
20618     if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
20619       return V;
20620 
20621     // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
20622     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
20623       return V;
20624   }
20625 
20626   if (SDValue V = combineConcatVectorOfCasts(N, DAG))
20627     return V;
20628 
20629   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
20630   // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
20631   // operands and look for a CONCAT operations that place the incoming vectors
20632   // at the exact same location.
20633   //
20634   // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
20635   SDValue SingleSource = SDValue();
20636   unsigned PartNumElem =
20637       N->getOperand(0).getValueType().getVectorMinNumElements();
20638 
20639   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20640     SDValue Op = N->getOperand(i);
20641 
20642     if (Op.isUndef())
20643       continue;
20644 
20645     // Check if this is the identity extract:
20646     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20647       return SDValue();
20648 
20649     // Find the single incoming vector for the extract_subvector.
20650     if (SingleSource.getNode()) {
20651       if (Op.getOperand(0) != SingleSource)
20652         return SDValue();
20653     } else {
20654       SingleSource = Op.getOperand(0);
20655 
20656       // Check the source type is the same as the type of the result.
20657       // If not, this concat may extend the vector, so we can not
20658       // optimize it away.
20659       if (SingleSource.getValueType() != N->getValueType(0))
20660         return SDValue();
20661     }
20662 
20663     // Check that we are reading from the identity index.
20664     unsigned IdentityIndex = i * PartNumElem;
20665     if (Op.getConstantOperandAPInt(1) != IdentityIndex)
20666       return SDValue();
20667   }
20668 
20669   if (SingleSource.getNode())
20670     return SingleSource;
20671 
20672   return SDValue();
20673 }
20674 
20675 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
20676 // if the subvector can be sourced for free.
20677 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
20678   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
20679       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
20680     return V.getOperand(1);
20681   }
20682   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
20683   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
20684       V.getOperand(0).getValueType() == SubVT &&
20685       (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
20686     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
20687     return V.getOperand(SubIdx);
20688   }
20689   return SDValue();
20690 }
20691 
20692 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
20693                                               SelectionDAG &DAG,
20694                                               bool LegalOperations) {
20695   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20696   SDValue BinOp = Extract->getOperand(0);
20697   unsigned BinOpcode = BinOp.getOpcode();
20698   if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
20699     return SDValue();
20700 
20701   EVT VecVT = BinOp.getValueType();
20702   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
20703   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
20704     return SDValue();
20705 
20706   SDValue Index = Extract->getOperand(1);
20707   EVT SubVT = Extract->getValueType(0);
20708   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
20709     return SDValue();
20710 
20711   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
20712   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
20713 
20714   // TODO: We could handle the case where only 1 operand is being inserted by
20715   //       creating an extract of the other operand, but that requires checking
20716   //       number of uses and/or costs.
20717   if (!Sub0 || !Sub1)
20718     return SDValue();
20719 
20720   // We are inserting both operands of the wide binop only to extract back
20721   // to the narrow vector size. Eliminate all of the insert/extract:
20722   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
20723   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
20724                      BinOp->getFlags());
20725 }
20726 
20727 /// If we are extracting a subvector produced by a wide binary operator try
20728 /// to use a narrow binary operator and/or avoid concatenation and extraction.
20729 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
20730                                           bool LegalOperations) {
20731   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
20732   // some of these bailouts with other transforms.
20733 
20734   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
20735     return V;
20736 
20737   // The extract index must be a constant, so we can map it to a concat operand.
20738   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20739   if (!ExtractIndexC)
20740     return SDValue();
20741 
20742   // We are looking for an optionally bitcasted wide vector binary operator
20743   // feeding an extract subvector.
20744   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20745   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
20746   unsigned BOpcode = BinOp.getOpcode();
20747   if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
20748     return SDValue();
20749 
20750   // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
20751   // reduced to the unary fneg when it is visited, and we probably want to deal
20752   // with fneg in a target-specific way.
20753   if (BOpcode == ISD::FSUB) {
20754     auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
20755     if (C && C->getValueAPF().isNegZero())
20756       return SDValue();
20757   }
20758 
20759   // The binop must be a vector type, so we can extract some fraction of it.
20760   EVT WideBVT = BinOp.getValueType();
20761   // The optimisations below currently assume we are dealing with fixed length
20762   // vectors. It is possible to add support for scalable vectors, but at the
20763   // moment we've done no analysis to prove whether they are profitable or not.
20764   if (!WideBVT.isFixedLengthVector())
20765     return SDValue();
20766 
20767   EVT VT = Extract->getValueType(0);
20768   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
20769   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
20770          "Extract index is not a multiple of the vector length.");
20771 
20772   // Bail out if this is not a proper multiple width extraction.
20773   unsigned WideWidth = WideBVT.getSizeInBits();
20774   unsigned NarrowWidth = VT.getSizeInBits();
20775   if (WideWidth % NarrowWidth != 0)
20776     return SDValue();
20777 
20778   // Bail out if we are extracting a fraction of a single operation. This can
20779   // occur because we potentially looked through a bitcast of the binop.
20780   unsigned NarrowingRatio = WideWidth / NarrowWidth;
20781   unsigned WideNumElts = WideBVT.getVectorNumElements();
20782   if (WideNumElts % NarrowingRatio != 0)
20783     return SDValue();
20784 
20785   // Bail out if the target does not support a narrower version of the binop.
20786   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
20787                                    WideNumElts / NarrowingRatio);
20788   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
20789     return SDValue();
20790 
20791   // If extraction is cheap, we don't need to look at the binop operands
20792   // for concat ops. The narrow binop alone makes this transform profitable.
20793   // We can't just reuse the original extract index operand because we may have
20794   // bitcasted.
20795   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
20796   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
20797   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
20798       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
20799     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
20800     SDLoc DL(Extract);
20801     SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20802     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20803                             BinOp.getOperand(0), NewExtIndex);
20804     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20805                             BinOp.getOperand(1), NewExtIndex);
20806     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
20807                                       BinOp.getNode()->getFlags());
20808     return DAG.getBitcast(VT, NarrowBinOp);
20809   }
20810 
20811   // Only handle the case where we are doubling and then halving. A larger ratio
20812   // may require more than two narrow binops to replace the wide binop.
20813   if (NarrowingRatio != 2)
20814     return SDValue();
20815 
20816   // TODO: The motivating case for this transform is an x86 AVX1 target. That
20817   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
20818   // flavors, but no other 256-bit integer support. This could be extended to
20819   // handle any binop, but that may require fixing/adding other folds to avoid
20820   // codegen regressions.
20821   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
20822     return SDValue();
20823 
20824   // We need at least one concatenation operation of a binop operand to make
20825   // this transform worthwhile. The concat must double the input vector sizes.
20826   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
20827     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
20828       return V.getOperand(ConcatOpNum);
20829     return SDValue();
20830   };
20831   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
20832   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
20833 
20834   if (SubVecL || SubVecR) {
20835     // If a binop operand was not the result of a concat, we must extract a
20836     // half-sized operand for our new narrow binop:
20837     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
20838     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
20839     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
20840     SDLoc DL(Extract);
20841     SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20842     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
20843                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20844                                       BinOp.getOperand(0), IndexC);
20845 
20846     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
20847                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20848                                       BinOp.getOperand(1), IndexC);
20849 
20850     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
20851     return DAG.getBitcast(VT, NarrowBinOp);
20852   }
20853 
20854   return SDValue();
20855 }
20856 
20857 /// If we are extracting a subvector from a wide vector load, convert to a
20858 /// narrow load to eliminate the extraction:
20859 /// (extract_subvector (load wide vector)) --> (load narrow vector)
20860 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
20861   // TODO: Add support for big-endian. The offset calculation must be adjusted.
20862   if (DAG.getDataLayout().isBigEndian())
20863     return SDValue();
20864 
20865   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
20866   if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
20867     return SDValue();
20868 
20869   // Allow targets to opt-out.
20870   EVT VT = Extract->getValueType(0);
20871 
20872   // We can only create byte sized loads.
20873   if (!VT.isByteSized())
20874     return SDValue();
20875 
20876   unsigned Index = Extract->getConstantOperandVal(1);
20877   unsigned NumElts = VT.getVectorMinNumElements();
20878 
20879   // The definition of EXTRACT_SUBVECTOR states that the index must be a
20880   // multiple of the minimum number of elements in the result type.
20881   assert(Index % NumElts == 0 && "The extract subvector index is not a "
20882                                  "multiple of the result's element count");
20883 
20884   // It's fine to use TypeSize here as we know the offset will not be negative.
20885   TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
20886 
20887   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20888   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
20889     return SDValue();
20890 
20891   // The narrow load will be offset from the base address of the old load if
20892   // we are extracting from something besides index 0 (little-endian).
20893   SDLoc DL(Extract);
20894 
20895   // TODO: Use "BaseIndexOffset" to make this more effective.
20896   SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
20897 
20898   uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
20899   MachineFunction &MF = DAG.getMachineFunction();
20900   MachineMemOperand *MMO;
20901   if (Offset.isScalable()) {
20902     MachinePointerInfo MPI =
20903         MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
20904     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
20905   } else
20906     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
20907                                   StoreSize);
20908 
20909   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
20910   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
20911   return NewLd;
20912 }
20913 
20914 /// Given  EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
20915 /// try to produce  VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
20916 ///                                EXTRACT_SUBVECTOR(Op?, ?),
20917 ///                                Mask'))
20918 /// iff it is legal and profitable to do so. Notably, the trimmed mask
20919 /// (containing only the elements that are extracted)
20920 /// must reference at most two subvectors.
20921 static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
20922                                                      SelectionDAG &DAG,
20923                                                      const TargetLowering &TLI,
20924                                                      bool LegalOperations) {
20925   assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20926          "Must only be called on EXTRACT_SUBVECTOR's");
20927 
20928   SDValue N0 = N->getOperand(0);
20929 
20930   // Only deal with non-scalable vectors.
20931   EVT NarrowVT = N->getValueType(0);
20932   EVT WideVT = N0.getValueType();
20933   if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
20934     return SDValue();
20935 
20936   // The operand must be a shufflevector.
20937   auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
20938   if (!WideShuffleVector)
20939     return SDValue();
20940 
20941   // The old shuffleneeds to go away.
20942   if (!WideShuffleVector->hasOneUse())
20943     return SDValue();
20944 
20945   // And the narrow shufflevector that we'll form must be legal.
20946   if (LegalOperations &&
20947       !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, NarrowVT))
20948     return SDValue();
20949 
20950   uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
20951   int NumEltsExtracted = NarrowVT.getVectorNumElements();
20952   assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
20953          "Extract index is not a multiple of the output vector length.");
20954 
20955   int WideNumElts = WideVT.getVectorNumElements();
20956 
20957   SmallVector<int, 16> NewMask;
20958   NewMask.reserve(NumEltsExtracted);
20959   SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
20960       DemandedSubvectors;
20961 
20962   // Try to decode the wide mask into narrow mask from at most two subvectors.
20963   for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
20964                                                   NumEltsExtracted)) {
20965     assert((M >= -1) && (M < (2 * WideNumElts)) &&
20966            "Out-of-bounds shuffle mask?");
20967 
20968     if (M < 0) {
20969       // Does not depend on operands, does not require adjustment.
20970       NewMask.emplace_back(M);
20971       continue;
20972     }
20973 
20974     // From which operand of the shuffle does this shuffle mask element pick?
20975     int WideShufOpIdx = M / WideNumElts;
20976     // Which element of that operand is picked?
20977     int OpEltIdx = M % WideNumElts;
20978 
20979     assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
20980            "Shuffle mask vector decomposition failure.");
20981 
20982     // And which NumEltsExtracted-sized subvector of that operand is that?
20983     int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
20984     // And which element within that subvector of that operand is that?
20985     int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
20986 
20987     assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
20988            "Shuffle mask subvector decomposition failure.");
20989 
20990     assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
20991             WideShufOpIdx * WideNumElts) == M &&
20992            "Shuffle mask full decomposition failure.");
20993 
20994     SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
20995 
20996     if (Op.isUndef()) {
20997       // Picking from an undef operand. Let's adjust mask instead.
20998       NewMask.emplace_back(-1);
20999       continue;
21000     }
21001 
21002     // Profitability check: only deal with extractions from the first subvector.
21003     if (OpSubvecIdx != 0)
21004       return SDValue();
21005 
21006     const std::pair<SDValue, int> DemandedSubvector =
21007         std::make_pair(Op, OpSubvecIdx);
21008 
21009     if (DemandedSubvectors.insert(DemandedSubvector)) {
21010       if (DemandedSubvectors.size() > 2)
21011         return SDValue(); // We can't handle more than two subvectors.
21012       // How many elements into the WideVT does this subvector start?
21013       int Index = NumEltsExtracted * OpSubvecIdx;
21014       // Bail out if the extraction isn't going to be cheap.
21015       if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
21016         return SDValue();
21017     }
21018 
21019     // Ok, but from which operand of the new shuffle will this element pick?
21020     int NewOpIdx =
21021         getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
21022     assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
21023 
21024     int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
21025     NewMask.emplace_back(AdjM);
21026   }
21027   assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
21028   assert(DemandedSubvectors.size() <= 2 &&
21029          "Should have ended up demanding at most two subvectors.");
21030 
21031   // Did we discover that the shuffle does not actually depend on operands?
21032   if (DemandedSubvectors.empty())
21033     return DAG.getUNDEF(NarrowVT);
21034 
21035   // We still perform the exact same EXTRACT_SUBVECTOR,  just on different
21036   // operand[s]/index[es], so there is no point in checking for it's legality.
21037 
21038   // Do not turn a legal shuffle into an illegal one.
21039   if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
21040       !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
21041     return SDValue();
21042 
21043   SDLoc DL(N);
21044 
21045   SmallVector<SDValue, 2> NewOps;
21046   for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
21047            &DemandedSubvector : DemandedSubvectors) {
21048     // How many elements into the WideVT does this subvector start?
21049     int Index = NumEltsExtracted * DemandedSubvector.second;
21050     SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
21051     NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
21052                                     DemandedSubvector.first, IndexC));
21053   }
21054   assert((NewOps.size() == 1 || NewOps.size() == 2) &&
21055          "Should end up with either one or two ops");
21056 
21057   // If we ended up with only one operand, pad with an undef.
21058   if (NewOps.size() == 1)
21059     NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
21060 
21061   return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
21062 }
21063 
21064 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
21065   EVT NVT = N->getValueType(0);
21066   SDValue V = N->getOperand(0);
21067   uint64_t ExtIdx = N->getConstantOperandVal(1);
21068 
21069   // Extract from UNDEF is UNDEF.
21070   if (V.isUndef())
21071     return DAG.getUNDEF(NVT);
21072 
21073   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
21074     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
21075       return NarrowLoad;
21076 
21077   // Combine an extract of an extract into a single extract_subvector.
21078   // ext (ext X, C), 0 --> ext X, C
21079   if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
21080     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
21081                                     V.getConstantOperandVal(1)) &&
21082         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
21083       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
21084                          V.getOperand(1));
21085     }
21086   }
21087 
21088   // Try to move vector bitcast after extract_subv by scaling extraction index:
21089   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
21090   if (V.getOpcode() == ISD::BITCAST &&
21091       V.getOperand(0).getValueType().isVector() &&
21092       (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
21093     SDValue SrcOp = V.getOperand(0);
21094     EVT SrcVT = SrcOp.getValueType();
21095     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
21096     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
21097     if ((SrcNumElts % DestNumElts) == 0) {
21098       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
21099       ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
21100       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
21101                                       NewExtEC);
21102       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
21103         SDLoc DL(N);
21104         SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
21105         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
21106                                          V.getOperand(0), NewIndex);
21107         return DAG.getBitcast(NVT, NewExtract);
21108       }
21109     }
21110     if ((DestNumElts % SrcNumElts) == 0) {
21111       unsigned DestSrcRatio = DestNumElts / SrcNumElts;
21112       if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
21113         ElementCount NewExtEC =
21114             NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
21115         EVT ScalarVT = SrcVT.getScalarType();
21116         if ((ExtIdx % DestSrcRatio) == 0) {
21117           SDLoc DL(N);
21118           unsigned IndexValScaled = ExtIdx / DestSrcRatio;
21119           EVT NewExtVT =
21120               EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
21121           if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
21122             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
21123             SDValue NewExtract =
21124                 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
21125                             V.getOperand(0), NewIndex);
21126             return DAG.getBitcast(NVT, NewExtract);
21127           }
21128           if (NewExtEC.isScalar() &&
21129               TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
21130             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
21131             SDValue NewExtract =
21132                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
21133                             V.getOperand(0), NewIndex);
21134             return DAG.getBitcast(NVT, NewExtract);
21135           }
21136         }
21137       }
21138     }
21139   }
21140 
21141   if (V.getOpcode() == ISD::CONCAT_VECTORS) {
21142     unsigned ExtNumElts = NVT.getVectorMinNumElements();
21143     EVT ConcatSrcVT = V.getOperand(0).getValueType();
21144     assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
21145            "Concat and extract subvector do not change element type");
21146     assert((ExtIdx % ExtNumElts) == 0 &&
21147            "Extract index is not a multiple of the input vector length.");
21148 
21149     unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
21150     unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
21151 
21152     // If the concatenated source types match this extract, it's a direct
21153     // simplification:
21154     // extract_subvec (concat V1, V2, ...), i --> Vi
21155     if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
21156       return V.getOperand(ConcatOpIdx);
21157 
21158     // If the concatenated source vectors are a multiple length of this extract,
21159     // then extract a fraction of one of those source vectors directly from a
21160     // concat operand. Example:
21161     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
21162     //   v2i8 extract_subvec v8i8 Y, 6
21163     if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
21164         ConcatSrcNumElts % ExtNumElts == 0) {
21165       SDLoc DL(N);
21166       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
21167       assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
21168              "Trying to extract from >1 concat operand?");
21169       assert(NewExtIdx % ExtNumElts == 0 &&
21170              "Extract index is not a multiple of the input vector length.");
21171       SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
21172       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
21173                          V.getOperand(ConcatOpIdx), NewIndexC);
21174     }
21175   }
21176 
21177   if (SDValue V =
21178           foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
21179     return V;
21180 
21181   V = peekThroughBitcasts(V);
21182 
21183   // If the input is a build vector. Try to make a smaller build vector.
21184   if (V.getOpcode() == ISD::BUILD_VECTOR) {
21185     EVT InVT = V.getValueType();
21186     unsigned ExtractSize = NVT.getSizeInBits();
21187     unsigned EltSize = InVT.getScalarSizeInBits();
21188     // Only do this if we won't split any elements.
21189     if (ExtractSize % EltSize == 0) {
21190       unsigned NumElems = ExtractSize / EltSize;
21191       EVT EltVT = InVT.getVectorElementType();
21192       EVT ExtractVT =
21193           NumElems == 1 ? EltVT
21194                         : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
21195       if ((Level < AfterLegalizeDAG ||
21196            (NumElems == 1 ||
21197             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
21198           (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
21199         unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
21200 
21201         if (NumElems == 1) {
21202           SDValue Src = V->getOperand(IdxVal);
21203           if (EltVT != Src.getValueType())
21204             Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
21205           return DAG.getBitcast(NVT, Src);
21206         }
21207 
21208         // Extract the pieces from the original build_vector.
21209         SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
21210                                               V->ops().slice(IdxVal, NumElems));
21211         return DAG.getBitcast(NVT, BuildVec);
21212       }
21213     }
21214   }
21215 
21216   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
21217     // Handle only simple case where vector being inserted and vector
21218     // being extracted are of same size.
21219     EVT SmallVT = V.getOperand(1).getValueType();
21220     if (!NVT.bitsEq(SmallVT))
21221       return SDValue();
21222 
21223     // Combine:
21224     //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
21225     // Into:
21226     //    indices are equal or bit offsets are equal => V1
21227     //    otherwise => (extract_subvec V1, ExtIdx)
21228     uint64_t InsIdx = V.getConstantOperandVal(2);
21229     if (InsIdx * SmallVT.getScalarSizeInBits() ==
21230         ExtIdx * NVT.getScalarSizeInBits()) {
21231       if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
21232         return SDValue();
21233 
21234       return DAG.getBitcast(NVT, V.getOperand(1));
21235     }
21236     return DAG.getNode(
21237         ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
21238         DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
21239         N->getOperand(1));
21240   }
21241 
21242   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
21243     return NarrowBOp;
21244 
21245   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21246     return SDValue(N, 0);
21247 
21248   return SDValue();
21249 }
21250 
21251 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
21252 /// followed by concatenation. Narrow vector ops may have better performance
21253 /// than wide ops, and this can unlock further narrowing of other vector ops.
21254 /// Targets can invert this transform later if it is not profitable.
21255 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
21256                                          SelectionDAG &DAG) {
21257   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
21258   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
21259       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
21260       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
21261     return SDValue();
21262 
21263   // Split the wide shuffle mask into halves. Any mask element that is accessing
21264   // operand 1 is offset down to account for narrowing of the vectors.
21265   ArrayRef<int> Mask = Shuf->getMask();
21266   EVT VT = Shuf->getValueType(0);
21267   unsigned NumElts = VT.getVectorNumElements();
21268   unsigned HalfNumElts = NumElts / 2;
21269   SmallVector<int, 16> Mask0(HalfNumElts, -1);
21270   SmallVector<int, 16> Mask1(HalfNumElts, -1);
21271   for (unsigned i = 0; i != NumElts; ++i) {
21272     if (Mask[i] == -1)
21273       continue;
21274     // If we reference the upper (undef) subvector then the element is undef.
21275     if ((Mask[i] % NumElts) >= HalfNumElts)
21276       continue;
21277     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
21278     if (i < HalfNumElts)
21279       Mask0[i] = M;
21280     else
21281       Mask1[i - HalfNumElts] = M;
21282   }
21283 
21284   // Ask the target if this is a valid transform.
21285   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21286   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
21287                                 HalfNumElts);
21288   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
21289       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
21290     return SDValue();
21291 
21292   // shuffle (concat X, undef), (concat Y, undef), Mask -->
21293   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
21294   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
21295   SDLoc DL(Shuf);
21296   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
21297   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
21298   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
21299 }
21300 
21301 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
21302 // or turn a shuffle of a single concat into simpler shuffle then concat.
21303 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
21304   EVT VT = N->getValueType(0);
21305   unsigned NumElts = VT.getVectorNumElements();
21306 
21307   SDValue N0 = N->getOperand(0);
21308   SDValue N1 = N->getOperand(1);
21309   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
21310   ArrayRef<int> Mask = SVN->getMask();
21311 
21312   SmallVector<SDValue, 4> Ops;
21313   EVT ConcatVT = N0.getOperand(0).getValueType();
21314   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
21315   unsigned NumConcats = NumElts / NumElemsPerConcat;
21316 
21317   auto IsUndefMaskElt = [](int i) { return i == -1; };
21318 
21319   // Special case: shuffle(concat(A,B)) can be more efficiently represented
21320   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
21321   // half vector elements.
21322   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
21323       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
21324                    IsUndefMaskElt)) {
21325     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
21326                               N0.getOperand(1),
21327                               Mask.slice(0, NumElemsPerConcat));
21328     N1 = DAG.getUNDEF(ConcatVT);
21329     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
21330   }
21331 
21332   // Look at every vector that's inserted. We're looking for exact
21333   // subvector-sized copies from a concatenated vector
21334   for (unsigned I = 0; I != NumConcats; ++I) {
21335     unsigned Begin = I * NumElemsPerConcat;
21336     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
21337 
21338     // Make sure we're dealing with a copy.
21339     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
21340       Ops.push_back(DAG.getUNDEF(ConcatVT));
21341       continue;
21342     }
21343 
21344     int OpIdx = -1;
21345     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
21346       if (IsUndefMaskElt(SubMask[i]))
21347         continue;
21348       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
21349         return SDValue();
21350       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
21351       if (0 <= OpIdx && EltOpIdx != OpIdx)
21352         return SDValue();
21353       OpIdx = EltOpIdx;
21354     }
21355     assert(0 <= OpIdx && "Unknown concat_vectors op");
21356 
21357     if (OpIdx < (int)N0.getNumOperands())
21358       Ops.push_back(N0.getOperand(OpIdx));
21359     else
21360       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
21361   }
21362 
21363   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21364 }
21365 
21366 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
21367 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
21368 //
21369 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
21370 // a simplification in some sense, but it isn't appropriate in general: some
21371 // BUILD_VECTORs are substantially cheaper than others. The general case
21372 // of a BUILD_VECTOR requires inserting each element individually (or
21373 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
21374 // all constants is a single constant pool load.  A BUILD_VECTOR where each
21375 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
21376 // are undef lowers to a small number of element insertions.
21377 //
21378 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
21379 // We don't fold shuffles where one side is a non-zero constant, and we don't
21380 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
21381 // non-constant operands. This seems to work out reasonably well in practice.
21382 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
21383                                        SelectionDAG &DAG,
21384                                        const TargetLowering &TLI) {
21385   EVT VT = SVN->getValueType(0);
21386   unsigned NumElts = VT.getVectorNumElements();
21387   SDValue N0 = SVN->getOperand(0);
21388   SDValue N1 = SVN->getOperand(1);
21389 
21390   if (!N0->hasOneUse())
21391     return SDValue();
21392 
21393   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
21394   // discussed above.
21395   if (!N1.isUndef()) {
21396     if (!N1->hasOneUse())
21397       return SDValue();
21398 
21399     bool N0AnyConst = isAnyConstantBuildVector(N0);
21400     bool N1AnyConst = isAnyConstantBuildVector(N1);
21401     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
21402       return SDValue();
21403     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
21404       return SDValue();
21405   }
21406 
21407   // If both inputs are splats of the same value then we can safely merge this
21408   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
21409   bool IsSplat = false;
21410   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
21411   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
21412   if (BV0 && BV1)
21413     if (SDValue Splat0 = BV0->getSplatValue())
21414       IsSplat = (Splat0 == BV1->getSplatValue());
21415 
21416   SmallVector<SDValue, 8> Ops;
21417   SmallSet<SDValue, 16> DuplicateOps;
21418   for (int M : SVN->getMask()) {
21419     SDValue Op = DAG.getUNDEF(VT.getScalarType());
21420     if (M >= 0) {
21421       int Idx = M < (int)NumElts ? M : M - NumElts;
21422       SDValue &S = (M < (int)NumElts ? N0 : N1);
21423       if (S.getOpcode() == ISD::BUILD_VECTOR) {
21424         Op = S.getOperand(Idx);
21425       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
21426         SDValue Op0 = S.getOperand(0);
21427         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
21428       } else {
21429         // Operand can't be combined - bail out.
21430         return SDValue();
21431       }
21432     }
21433 
21434     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
21435     // generating a splat; semantically, this is fine, but it's likely to
21436     // generate low-quality code if the target can't reconstruct an appropriate
21437     // shuffle.
21438     if (!Op.isUndef() && !isIntOrFPConstant(Op))
21439       if (!IsSplat && !DuplicateOps.insert(Op).second)
21440         return SDValue();
21441 
21442     Ops.push_back(Op);
21443   }
21444 
21445   // BUILD_VECTOR requires all inputs to be of the same type, find the
21446   // maximum type and extend them all.
21447   EVT SVT = VT.getScalarType();
21448   if (SVT.isInteger())
21449     for (SDValue &Op : Ops)
21450       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
21451   if (SVT != VT.getScalarType())
21452     for (SDValue &Op : Ops)
21453       Op = TLI.isZExtFree(Op.getValueType(), SVT)
21454                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
21455                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
21456   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
21457 }
21458 
21459 // Match shuffles that can be converted to any_vector_extend_in_reg.
21460 // This is often generated during legalization.
21461 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
21462 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
21463 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
21464                                             SelectionDAG &DAG,
21465                                             const TargetLowering &TLI,
21466                                             bool LegalOperations) {
21467   EVT VT = SVN->getValueType(0);
21468   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
21469 
21470   // TODO Add support for big-endian when we have a test case.
21471   if (!VT.isInteger() || IsBigEndian)
21472     return SDValue();
21473 
21474   unsigned NumElts = VT.getVectorNumElements();
21475   unsigned EltSizeInBits = VT.getScalarSizeInBits();
21476   ArrayRef<int> Mask = SVN->getMask();
21477   SDValue N0 = SVN->getOperand(0);
21478 
21479   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
21480   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
21481     for (unsigned i = 0; i != NumElts; ++i) {
21482       if (Mask[i] < 0)
21483         continue;
21484       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
21485         continue;
21486       return false;
21487     }
21488     return true;
21489   };
21490 
21491   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
21492   // power-of-2 extensions as they are the most likely.
21493   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
21494     // Check for non power of 2 vector sizes
21495     if (NumElts % Scale != 0)
21496       continue;
21497     if (!isAnyExtend(Scale))
21498       continue;
21499 
21500     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
21501     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
21502     // Never create an illegal type. Only create unsupported operations if we
21503     // are pre-legalization.
21504     if (TLI.isTypeLegal(OutVT))
21505       if (!LegalOperations ||
21506           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
21507         return DAG.getBitcast(VT,
21508                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
21509                                           SDLoc(SVN), OutVT, N0));
21510   }
21511 
21512   return SDValue();
21513 }
21514 
21515 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
21516 // each source element of a large type into the lowest elements of a smaller
21517 // destination type. This is often generated during legalization.
21518 // If the source node itself was a '*_extend_vector_inreg' node then we should
21519 // then be able to remove it.
21520 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
21521                                         SelectionDAG &DAG) {
21522   EVT VT = SVN->getValueType(0);
21523   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
21524 
21525   // TODO Add support for big-endian when we have a test case.
21526   if (!VT.isInteger() || IsBigEndian)
21527     return SDValue();
21528 
21529   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
21530 
21531   unsigned Opcode = N0.getOpcode();
21532   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
21533       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
21534       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
21535     return SDValue();
21536 
21537   SDValue N00 = N0.getOperand(0);
21538   ArrayRef<int> Mask = SVN->getMask();
21539   unsigned NumElts = VT.getVectorNumElements();
21540   unsigned EltSizeInBits = VT.getScalarSizeInBits();
21541   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
21542   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
21543 
21544   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
21545     return SDValue();
21546   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
21547 
21548   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
21549   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
21550   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
21551   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
21552     for (unsigned i = 0; i != NumElts; ++i) {
21553       if (Mask[i] < 0)
21554         continue;
21555       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
21556         continue;
21557       return false;
21558     }
21559     return true;
21560   };
21561 
21562   // At the moment we just handle the case where we've truncated back to the
21563   // same size as before the extension.
21564   // TODO: handle more extension/truncation cases as cases arise.
21565   if (EltSizeInBits != ExtSrcSizeInBits)
21566     return SDValue();
21567 
21568   // We can remove *extend_vector_inreg only if the truncation happens at
21569   // the same scale as the extension.
21570   if (isTruncate(ExtScale))
21571     return DAG.getBitcast(VT, N00);
21572 
21573   return SDValue();
21574 }
21575 
21576 // Combine shuffles of splat-shuffles of the form:
21577 // shuffle (shuffle V, undef, splat-mask), undef, M
21578 // If splat-mask contains undef elements, we need to be careful about
21579 // introducing undef's in the folded mask which are not the result of composing
21580 // the masks of the shuffles.
21581 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
21582                                         SelectionDAG &DAG) {
21583   if (!Shuf->getOperand(1).isUndef())
21584     return SDValue();
21585   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21586   if (!Splat || !Splat->isSplat())
21587     return SDValue();
21588 
21589   ArrayRef<int> ShufMask = Shuf->getMask();
21590   ArrayRef<int> SplatMask = Splat->getMask();
21591   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
21592 
21593   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
21594   // every undef mask element in the splat-shuffle has a corresponding undef
21595   // element in the user-shuffle's mask or if the composition of mask elements
21596   // would result in undef.
21597   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
21598   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
21599   //   In this case it is not legal to simplify to the splat-shuffle because we
21600   //   may be exposing the users of the shuffle an undef element at index 1
21601   //   which was not there before the combine.
21602   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
21603   //   In this case the composition of masks yields SplatMask, so it's ok to
21604   //   simplify to the splat-shuffle.
21605   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
21606   //   In this case the composed mask includes all undef elements of SplatMask
21607   //   and in addition sets element zero to undef. It is safe to simplify to
21608   //   the splat-shuffle.
21609   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
21610                                        ArrayRef<int> SplatMask) {
21611     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
21612       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
21613           SplatMask[UserMask[i]] != -1)
21614         return false;
21615     return true;
21616   };
21617   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
21618     return Shuf->getOperand(0);
21619 
21620   // Create a new shuffle with a mask that is composed of the two shuffles'
21621   // masks.
21622   SmallVector<int, 32> NewMask;
21623   for (int Idx : ShufMask)
21624     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
21625 
21626   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
21627                               Splat->getOperand(0), Splat->getOperand(1),
21628                               NewMask);
21629 }
21630 
21631 /// Combine shuffle of shuffle of the form:
21632 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
21633 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
21634                                      SelectionDAG &DAG) {
21635   if (!OuterShuf->getOperand(1).isUndef())
21636     return SDValue();
21637   auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
21638   if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
21639     return SDValue();
21640 
21641   ArrayRef<int> OuterMask = OuterShuf->getMask();
21642   ArrayRef<int> InnerMask = InnerShuf->getMask();
21643   unsigned NumElts = OuterMask.size();
21644   assert(NumElts == InnerMask.size() && "Mask length mismatch");
21645   SmallVector<int, 32> CombinedMask(NumElts, -1);
21646   int SplatIndex = -1;
21647   for (unsigned i = 0; i != NumElts; ++i) {
21648     // Undef lanes remain undef.
21649     int OuterMaskElt = OuterMask[i];
21650     if (OuterMaskElt == -1)
21651       continue;
21652 
21653     // Peek through the shuffle masks to get the underlying source element.
21654     int InnerMaskElt = InnerMask[OuterMaskElt];
21655     if (InnerMaskElt == -1)
21656       continue;
21657 
21658     // Initialize the splatted element.
21659     if (SplatIndex == -1)
21660       SplatIndex = InnerMaskElt;
21661 
21662     // Non-matching index - this is not a splat.
21663     if (SplatIndex != InnerMaskElt)
21664       return SDValue();
21665 
21666     CombinedMask[i] = InnerMaskElt;
21667   }
21668   assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
21669           getSplatIndex(CombinedMask) != -1) &&
21670          "Expected a splat mask");
21671 
21672   // TODO: The transform may be a win even if the mask is not legal.
21673   EVT VT = OuterShuf->getValueType(0);
21674   assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
21675   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
21676     return SDValue();
21677 
21678   return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
21679                               InnerShuf->getOperand(1), CombinedMask);
21680 }
21681 
21682 /// If the shuffle mask is taking exactly one element from the first vector
21683 /// operand and passing through all other elements from the second vector
21684 /// operand, return the index of the mask element that is choosing an element
21685 /// from the first operand. Otherwise, return -1.
21686 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
21687   int MaskSize = Mask.size();
21688   int EltFromOp0 = -1;
21689   // TODO: This does not match if there are undef elements in the shuffle mask.
21690   // Should we ignore undefs in the shuffle mask instead? The trade-off is
21691   // removing an instruction (a shuffle), but losing the knowledge that some
21692   // vector lanes are not needed.
21693   for (int i = 0; i != MaskSize; ++i) {
21694     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
21695       // We're looking for a shuffle of exactly one element from operand 0.
21696       if (EltFromOp0 != -1)
21697         return -1;
21698       EltFromOp0 = i;
21699     } else if (Mask[i] != i + MaskSize) {
21700       // Nothing from operand 1 can change lanes.
21701       return -1;
21702     }
21703   }
21704   return EltFromOp0;
21705 }
21706 
21707 /// If a shuffle inserts exactly one element from a source vector operand into
21708 /// another vector operand and we can access the specified element as a scalar,
21709 /// then we can eliminate the shuffle.
21710 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
21711                                       SelectionDAG &DAG) {
21712   // First, check if we are taking one element of a vector and shuffling that
21713   // element into another vector.
21714   ArrayRef<int> Mask = Shuf->getMask();
21715   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
21716   SDValue Op0 = Shuf->getOperand(0);
21717   SDValue Op1 = Shuf->getOperand(1);
21718   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
21719   if (ShufOp0Index == -1) {
21720     // Commute mask and check again.
21721     ShuffleVectorSDNode::commuteMask(CommutedMask);
21722     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
21723     if (ShufOp0Index == -1)
21724       return SDValue();
21725     // Commute operands to match the commuted shuffle mask.
21726     std::swap(Op0, Op1);
21727     Mask = CommutedMask;
21728   }
21729 
21730   // The shuffle inserts exactly one element from operand 0 into operand 1.
21731   // Now see if we can access that element as a scalar via a real insert element
21732   // instruction.
21733   // TODO: We can try harder to locate the element as a scalar. Examples: it
21734   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
21735   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
21736          "Shuffle mask value must be from operand 0");
21737   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
21738     return SDValue();
21739 
21740   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
21741   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
21742     return SDValue();
21743 
21744   // There's an existing insertelement with constant insertion index, so we
21745   // don't need to check the legality/profitability of a replacement operation
21746   // that differs at most in the constant value. The target should be able to
21747   // lower any of those in a similar way. If not, legalization will expand this
21748   // to a scalar-to-vector plus shuffle.
21749   //
21750   // Note that the shuffle may move the scalar from the position that the insert
21751   // element used. Therefore, our new insert element occurs at the shuffle's
21752   // mask index value, not the insert's index value.
21753   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
21754   SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
21755   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
21756                      Op1, Op0.getOperand(1), NewInsIndex);
21757 }
21758 
21759 /// If we have a unary shuffle of a shuffle, see if it can be folded away
21760 /// completely. This has the potential to lose undef knowledge because the first
21761 /// shuffle may not have an undef mask element where the second one does. So
21762 /// only call this after doing simplifications based on demanded elements.
21763 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
21764   // shuf (shuf0 X, Y, Mask0), undef, Mask
21765   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21766   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
21767     return SDValue();
21768 
21769   ArrayRef<int> Mask = Shuf->getMask();
21770   ArrayRef<int> Mask0 = Shuf0->getMask();
21771   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
21772     // Ignore undef elements.
21773     if (Mask[i] == -1)
21774       continue;
21775     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
21776 
21777     // Is the element of the shuffle operand chosen by this shuffle the same as
21778     // the element chosen by the shuffle operand itself?
21779     if (Mask0[Mask[i]] != Mask0[i])
21780       return SDValue();
21781   }
21782   // Every element of this shuffle is identical to the result of the previous
21783   // shuffle, so we can replace this value.
21784   return Shuf->getOperand(0);
21785 }
21786 
21787 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
21788   EVT VT = N->getValueType(0);
21789   unsigned NumElts = VT.getVectorNumElements();
21790 
21791   SDValue N0 = N->getOperand(0);
21792   SDValue N1 = N->getOperand(1);
21793 
21794   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
21795 
21796   // Canonicalize shuffle undef, undef -> undef
21797   if (N0.isUndef() && N1.isUndef())
21798     return DAG.getUNDEF(VT);
21799 
21800   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
21801 
21802   // Canonicalize shuffle v, v -> v, undef
21803   if (N0 == N1)
21804     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
21805                                 createUnaryMask(SVN->getMask(), NumElts));
21806 
21807   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
21808   if (N0.isUndef())
21809     return DAG.getCommutedVectorShuffle(*SVN);
21810 
21811   // Remove references to rhs if it is undef
21812   if (N1.isUndef()) {
21813     bool Changed = false;
21814     SmallVector<int, 8> NewMask;
21815     for (unsigned i = 0; i != NumElts; ++i) {
21816       int Idx = SVN->getMaskElt(i);
21817       if (Idx >= (int)NumElts) {
21818         Idx = -1;
21819         Changed = true;
21820       }
21821       NewMask.push_back(Idx);
21822     }
21823     if (Changed)
21824       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
21825   }
21826 
21827   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
21828     return InsElt;
21829 
21830   // A shuffle of a single vector that is a splatted value can always be folded.
21831   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
21832     return V;
21833 
21834   if (SDValue V = formSplatFromShuffles(SVN, DAG))
21835     return V;
21836 
21837   // If it is a splat, check if the argument vector is another splat or a
21838   // build_vector.
21839   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
21840     int SplatIndex = SVN->getSplatIndex();
21841     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
21842         TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
21843       // splat (vector_bo L, R), Index -->
21844       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
21845       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
21846       SDLoc DL(N);
21847       EVT EltVT = VT.getScalarType();
21848       SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
21849       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
21850       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
21851       SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
21852                                   N0.getNode()->getFlags());
21853       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
21854       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
21855       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
21856     }
21857 
21858     // If this is a bit convert that changes the element type of the vector but
21859     // not the number of vector elements, look through it.  Be careful not to
21860     // look though conversions that change things like v4f32 to v2f64.
21861     SDNode *V = N0.getNode();
21862     if (V->getOpcode() == ISD::BITCAST) {
21863       SDValue ConvInput = V->getOperand(0);
21864       if (ConvInput.getValueType().isVector() &&
21865           ConvInput.getValueType().getVectorNumElements() == NumElts)
21866         V = ConvInput.getNode();
21867     }
21868 
21869     if (V->getOpcode() == ISD::BUILD_VECTOR) {
21870       assert(V->getNumOperands() == NumElts &&
21871              "BUILD_VECTOR has wrong number of operands");
21872       SDValue Base;
21873       bool AllSame = true;
21874       for (unsigned i = 0; i != NumElts; ++i) {
21875         if (!V->getOperand(i).isUndef()) {
21876           Base = V->getOperand(i);
21877           break;
21878         }
21879       }
21880       // Splat of <u, u, u, u>, return <u, u, u, u>
21881       if (!Base.getNode())
21882         return N0;
21883       for (unsigned i = 0; i != NumElts; ++i) {
21884         if (V->getOperand(i) != Base) {
21885           AllSame = false;
21886           break;
21887         }
21888       }
21889       // Splat of <x, x, x, x>, return <x, x, x, x>
21890       if (AllSame)
21891         return N0;
21892 
21893       // Canonicalize any other splat as a build_vector.
21894       SDValue Splatted = V->getOperand(SplatIndex);
21895       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
21896       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
21897 
21898       // We may have jumped through bitcasts, so the type of the
21899       // BUILD_VECTOR may not match the type of the shuffle.
21900       if (V->getValueType(0) != VT)
21901         NewBV = DAG.getBitcast(VT, NewBV);
21902       return NewBV;
21903     }
21904   }
21905 
21906   // Simplify source operands based on shuffle mask.
21907   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21908     return SDValue(N, 0);
21909 
21910   // This is intentionally placed after demanded elements simplification because
21911   // it could eliminate knowledge of undef elements created by this shuffle.
21912   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
21913     return ShufOp;
21914 
21915   // Match shuffles that can be converted to any_vector_extend_in_reg.
21916   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
21917     return V;
21918 
21919   // Combine "truncate_vector_in_reg" style shuffles.
21920   if (SDValue V = combineTruncationShuffle(SVN, DAG))
21921     return V;
21922 
21923   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
21924       Level < AfterLegalizeVectorOps &&
21925       (N1.isUndef() ||
21926       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
21927        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
21928     if (SDValue V = partitionShuffleOfConcats(N, DAG))
21929       return V;
21930   }
21931 
21932   // A shuffle of a concat of the same narrow vector can be reduced to use
21933   // only low-half elements of a concat with undef:
21934   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
21935   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
21936       N0.getNumOperands() == 2 &&
21937       N0.getOperand(0) == N0.getOperand(1)) {
21938     int HalfNumElts = (int)NumElts / 2;
21939     SmallVector<int, 8> NewMask;
21940     for (unsigned i = 0; i != NumElts; ++i) {
21941       int Idx = SVN->getMaskElt(i);
21942       if (Idx >= HalfNumElts) {
21943         assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
21944         Idx -= HalfNumElts;
21945       }
21946       NewMask.push_back(Idx);
21947     }
21948     if (TLI.isShuffleMaskLegal(NewMask, VT)) {
21949       SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
21950       SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
21951                                    N0.getOperand(0), UndefVec);
21952       return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
21953     }
21954   }
21955 
21956   // See if we can replace a shuffle with an insert_subvector.
21957   // e.g. v2i32 into v8i32:
21958   // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
21959   // --> insert_subvector(lhs,rhs1,4).
21960   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
21961       TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
21962     auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
21963       // Ensure RHS subvectors are legal.
21964       assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
21965       EVT SubVT = RHS.getOperand(0).getValueType();
21966       int NumSubVecs = RHS.getNumOperands();
21967       int NumSubElts = SubVT.getVectorNumElements();
21968       assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
21969       if (!TLI.isTypeLegal(SubVT))
21970         return SDValue();
21971 
21972       // Don't bother if we have an unary shuffle (matches undef + LHS elts).
21973       if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
21974         return SDValue();
21975 
21976       // Search [NumSubElts] spans for RHS sequence.
21977       // TODO: Can we avoid nested loops to increase performance?
21978       SmallVector<int> InsertionMask(NumElts);
21979       for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
21980         for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
21981           // Reset mask to identity.
21982           std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
21983 
21984           // Add subvector insertion.
21985           std::iota(InsertionMask.begin() + SubIdx,
21986                     InsertionMask.begin() + SubIdx + NumSubElts,
21987                     NumElts + (SubVec * NumSubElts));
21988 
21989           // See if the shuffle mask matches the reference insertion mask.
21990           bool MatchingShuffle = true;
21991           for (int i = 0; i != (int)NumElts; ++i) {
21992             int ExpectIdx = InsertionMask[i];
21993             int ActualIdx = Mask[i];
21994             if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
21995               MatchingShuffle = false;
21996               break;
21997             }
21998           }
21999 
22000           if (MatchingShuffle)
22001             return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
22002                                RHS.getOperand(SubVec),
22003                                DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
22004         }
22005       }
22006       return SDValue();
22007     };
22008     ArrayRef<int> Mask = SVN->getMask();
22009     if (N1.getOpcode() == ISD::CONCAT_VECTORS)
22010       if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
22011         return InsertN1;
22012     if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
22013       SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
22014       ShuffleVectorSDNode::commuteMask(CommuteMask);
22015       if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
22016         return InsertN0;
22017     }
22018   }
22019 
22020   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
22021   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
22022   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
22023     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
22024       return Res;
22025 
22026   // If this shuffle only has a single input that is a bitcasted shuffle,
22027   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
22028   // back to their original types.
22029   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
22030       N1.isUndef() && Level < AfterLegalizeVectorOps &&
22031       TLI.isTypeLegal(VT)) {
22032 
22033     SDValue BC0 = peekThroughOneUseBitcasts(N0);
22034     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
22035       EVT SVT = VT.getScalarType();
22036       EVT InnerVT = BC0->getValueType(0);
22037       EVT InnerSVT = InnerVT.getScalarType();
22038 
22039       // Determine which shuffle works with the smaller scalar type.
22040       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
22041       EVT ScaleSVT = ScaleVT.getScalarType();
22042 
22043       if (TLI.isTypeLegal(ScaleVT) &&
22044           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
22045           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
22046         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
22047         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
22048 
22049         // Scale the shuffle masks to the smaller scalar type.
22050         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
22051         SmallVector<int, 8> InnerMask;
22052         SmallVector<int, 8> OuterMask;
22053         narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
22054         narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
22055 
22056         // Merge the shuffle masks.
22057         SmallVector<int, 8> NewMask;
22058         for (int M : OuterMask)
22059           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
22060 
22061         // Test for shuffle mask legality over both commutations.
22062         SDValue SV0 = BC0->getOperand(0);
22063         SDValue SV1 = BC0->getOperand(1);
22064         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
22065         if (!LegalMask) {
22066           std::swap(SV0, SV1);
22067           ShuffleVectorSDNode::commuteMask(NewMask);
22068           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
22069         }
22070 
22071         if (LegalMask) {
22072           SV0 = DAG.getBitcast(ScaleVT, SV0);
22073           SV1 = DAG.getBitcast(ScaleVT, SV1);
22074           return DAG.getBitcast(
22075               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
22076         }
22077       }
22078     }
22079   }
22080 
22081   // Compute the combined shuffle mask for a shuffle with SV0 as the first
22082   // operand, and SV1 as the second operand.
22083   // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
22084   //      Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
22085   auto MergeInnerShuffle =
22086       [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
22087                      ShuffleVectorSDNode *OtherSVN, SDValue N1,
22088                      const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
22089                      SmallVectorImpl<int> &Mask) -> bool {
22090     // Don't try to fold splats; they're likely to simplify somehow, or they
22091     // might be free.
22092     if (OtherSVN->isSplat())
22093       return false;
22094 
22095     SV0 = SV1 = SDValue();
22096     Mask.clear();
22097 
22098     for (unsigned i = 0; i != NumElts; ++i) {
22099       int Idx = SVN->getMaskElt(i);
22100       if (Idx < 0) {
22101         // Propagate Undef.
22102         Mask.push_back(Idx);
22103         continue;
22104       }
22105 
22106       if (Commute)
22107         Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
22108 
22109       SDValue CurrentVec;
22110       if (Idx < (int)NumElts) {
22111         // This shuffle index refers to the inner shuffle N0. Lookup the inner
22112         // shuffle mask to identify which vector is actually referenced.
22113         Idx = OtherSVN->getMaskElt(Idx);
22114         if (Idx < 0) {
22115           // Propagate Undef.
22116           Mask.push_back(Idx);
22117           continue;
22118         }
22119         CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
22120                                           : OtherSVN->getOperand(1);
22121       } else {
22122         // This shuffle index references an element within N1.
22123         CurrentVec = N1;
22124       }
22125 
22126       // Simple case where 'CurrentVec' is UNDEF.
22127       if (CurrentVec.isUndef()) {
22128         Mask.push_back(-1);
22129         continue;
22130       }
22131 
22132       // Canonicalize the shuffle index. We don't know yet if CurrentVec
22133       // will be the first or second operand of the combined shuffle.
22134       Idx = Idx % NumElts;
22135       if (!SV0.getNode() || SV0 == CurrentVec) {
22136         // Ok. CurrentVec is the left hand side.
22137         // Update the mask accordingly.
22138         SV0 = CurrentVec;
22139         Mask.push_back(Idx);
22140         continue;
22141       }
22142       if (!SV1.getNode() || SV1 == CurrentVec) {
22143         // Ok. CurrentVec is the right hand side.
22144         // Update the mask accordingly.
22145         SV1 = CurrentVec;
22146         Mask.push_back(Idx + NumElts);
22147         continue;
22148       }
22149 
22150       // Last chance - see if the vector is another shuffle and if it
22151       // uses one of the existing candidate shuffle ops.
22152       if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
22153         int InnerIdx = CurrentSVN->getMaskElt(Idx);
22154         if (InnerIdx < 0) {
22155           Mask.push_back(-1);
22156           continue;
22157         }
22158         SDValue InnerVec = (InnerIdx < (int)NumElts)
22159                                ? CurrentSVN->getOperand(0)
22160                                : CurrentSVN->getOperand(1);
22161         if (InnerVec.isUndef()) {
22162           Mask.push_back(-1);
22163           continue;
22164         }
22165         InnerIdx %= NumElts;
22166         if (InnerVec == SV0) {
22167           Mask.push_back(InnerIdx);
22168           continue;
22169         }
22170         if (InnerVec == SV1) {
22171           Mask.push_back(InnerIdx + NumElts);
22172           continue;
22173         }
22174       }
22175 
22176       // Bail out if we cannot convert the shuffle pair into a single shuffle.
22177       return false;
22178     }
22179 
22180     if (llvm::all_of(Mask, [](int M) { return M < 0; }))
22181       return true;
22182 
22183     // Avoid introducing shuffles with illegal mask.
22184     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
22185     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
22186     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
22187     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
22188     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
22189     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
22190     if (TLI.isShuffleMaskLegal(Mask, VT))
22191       return true;
22192 
22193     std::swap(SV0, SV1);
22194     ShuffleVectorSDNode::commuteMask(Mask);
22195     return TLI.isShuffleMaskLegal(Mask, VT);
22196   };
22197 
22198   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
22199     // Canonicalize shuffles according to rules:
22200     //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
22201     //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
22202     //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
22203     if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
22204         N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
22205       // The incoming shuffle must be of the same type as the result of the
22206       // current shuffle.
22207       assert(N1->getOperand(0).getValueType() == VT &&
22208              "Shuffle types don't match");
22209 
22210       SDValue SV0 = N1->getOperand(0);
22211       SDValue SV1 = N1->getOperand(1);
22212       bool HasSameOp0 = N0 == SV0;
22213       bool IsSV1Undef = SV1.isUndef();
22214       if (HasSameOp0 || IsSV1Undef || N0 == SV1)
22215         // Commute the operands of this shuffle so merging below will trigger.
22216         return DAG.getCommutedVectorShuffle(*SVN);
22217     }
22218 
22219     // Canonicalize splat shuffles to the RHS to improve merging below.
22220     //  shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
22221     if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
22222         N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
22223         cast<ShuffleVectorSDNode>(N0)->isSplat() &&
22224         !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
22225       return DAG.getCommutedVectorShuffle(*SVN);
22226     }
22227 
22228     // Try to fold according to rules:
22229     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
22230     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
22231     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
22232     // Don't try to fold shuffles with illegal type.
22233     // Only fold if this shuffle is the only user of the other shuffle.
22234     // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
22235     for (int i = 0; i != 2; ++i) {
22236       if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
22237           N->isOnlyUserOf(N->getOperand(i).getNode())) {
22238         // The incoming shuffle must be of the same type as the result of the
22239         // current shuffle.
22240         auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
22241         assert(OtherSV->getOperand(0).getValueType() == VT &&
22242                "Shuffle types don't match");
22243 
22244         SDValue SV0, SV1;
22245         SmallVector<int, 4> Mask;
22246         if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
22247                               SV0, SV1, Mask)) {
22248           // Check if all indices in Mask are Undef. In case, propagate Undef.
22249           if (llvm::all_of(Mask, [](int M) { return M < 0; }))
22250             return DAG.getUNDEF(VT);
22251 
22252           return DAG.getVectorShuffle(VT, SDLoc(N),
22253                                       SV0 ? SV0 : DAG.getUNDEF(VT),
22254                                       SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
22255         }
22256       }
22257     }
22258 
22259     // Merge shuffles through binops if we are able to merge it with at least
22260     // one other shuffles.
22261     // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
22262     // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
22263     unsigned SrcOpcode = N0.getOpcode();
22264     if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
22265         (N1.isUndef() ||
22266          (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
22267       // Get binop source ops, or just pass on the undef.
22268       SDValue Op00 = N0.getOperand(0);
22269       SDValue Op01 = N0.getOperand(1);
22270       SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
22271       SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
22272       // TODO: We might be able to relax the VT check but we don't currently
22273       // have any isBinOp() that has different result/ops VTs so play safe until
22274       // we have test coverage.
22275       if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
22276           Op01.getValueType() == VT && Op11.getValueType() == VT &&
22277           (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
22278            Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
22279            Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
22280            Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
22281         auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
22282                                         SmallVectorImpl<int> &Mask, bool LeftOp,
22283                                         bool Commute) {
22284           SDValue InnerN = Commute ? N1 : N0;
22285           SDValue Op0 = LeftOp ? Op00 : Op01;
22286           SDValue Op1 = LeftOp ? Op10 : Op11;
22287           if (Commute)
22288             std::swap(Op0, Op1);
22289           // Only accept the merged shuffle if we don't introduce undef elements,
22290           // or the inner shuffle already contained undef elements.
22291           auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
22292           return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
22293                  MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
22294                                    Mask) &&
22295                  (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
22296                   llvm::none_of(Mask, [](int M) { return M < 0; }));
22297         };
22298 
22299         // Ensure we don't increase the number of shuffles - we must merge a
22300         // shuffle from at least one of the LHS and RHS ops.
22301         bool MergedLeft = false;
22302         SDValue LeftSV0, LeftSV1;
22303         SmallVector<int, 4> LeftMask;
22304         if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
22305             CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
22306           MergedLeft = true;
22307         } else {
22308           LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
22309           LeftSV0 = Op00, LeftSV1 = Op10;
22310         }
22311 
22312         bool MergedRight = false;
22313         SDValue RightSV0, RightSV1;
22314         SmallVector<int, 4> RightMask;
22315         if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
22316             CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
22317           MergedRight = true;
22318         } else {
22319           RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
22320           RightSV0 = Op01, RightSV1 = Op11;
22321         }
22322 
22323         if (MergedLeft || MergedRight) {
22324           SDLoc DL(N);
22325           SDValue LHS = DAG.getVectorShuffle(
22326               VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
22327               LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
22328           SDValue RHS = DAG.getVectorShuffle(
22329               VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
22330               RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
22331           return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
22332         }
22333       }
22334     }
22335   }
22336 
22337   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
22338     return V;
22339 
22340   return SDValue();
22341 }
22342 
22343 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
22344   SDValue InVal = N->getOperand(0);
22345   EVT VT = N->getValueType(0);
22346 
22347   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
22348   // with a VECTOR_SHUFFLE and possible truncate.
22349   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22350       VT.isFixedLengthVector() &&
22351       InVal->getOperand(0).getValueType().isFixedLengthVector()) {
22352     SDValue InVec = InVal->getOperand(0);
22353     SDValue EltNo = InVal->getOperand(1);
22354     auto InVecT = InVec.getValueType();
22355     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
22356       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
22357       int Elt = C0->getZExtValue();
22358       NewMask[0] = Elt;
22359       // If we have an implict truncate do truncate here as long as it's legal.
22360       // if it's not legal, this should
22361       if (VT.getScalarType() != InVal.getValueType() &&
22362           InVal.getValueType().isScalarInteger() &&
22363           isTypeLegal(VT.getScalarType())) {
22364         SDValue Val =
22365             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
22366         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
22367       }
22368       if (VT.getScalarType() == InVecT.getScalarType() &&
22369           VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
22370         SDValue LegalShuffle =
22371           TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
22372                                       DAG.getUNDEF(InVecT), NewMask, DAG);
22373         if (LegalShuffle) {
22374           // If the initial vector is the correct size this shuffle is a
22375           // valid result.
22376           if (VT == InVecT)
22377             return LegalShuffle;
22378           // If not we must truncate the vector.
22379           if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
22380             SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
22381             EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
22382                                          InVecT.getVectorElementType(),
22383                                          VT.getVectorNumElements());
22384             return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
22385                                LegalShuffle, ZeroIdx);
22386           }
22387         }
22388       }
22389     }
22390   }
22391 
22392   return SDValue();
22393 }
22394 
22395 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
22396   EVT VT = N->getValueType(0);
22397   SDValue N0 = N->getOperand(0);
22398   SDValue N1 = N->getOperand(1);
22399   SDValue N2 = N->getOperand(2);
22400   uint64_t InsIdx = N->getConstantOperandVal(2);
22401 
22402   // If inserting an UNDEF, just return the original vector.
22403   if (N1.isUndef())
22404     return N0;
22405 
22406   // If this is an insert of an extracted vector into an undef vector, we can
22407   // just use the input to the extract.
22408   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
22409       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
22410     return N1.getOperand(0);
22411 
22412   // If we are inserting a bitcast value into an undef, with the same
22413   // number of elements, just use the bitcast input of the extract.
22414   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
22415   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
22416   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
22417       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
22418       N1.getOperand(0).getOperand(1) == N2 &&
22419       N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
22420           VT.getVectorElementCount() &&
22421       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
22422           VT.getSizeInBits()) {
22423     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
22424   }
22425 
22426   // If both N1 and N2 are bitcast values on which insert_subvector
22427   // would makes sense, pull the bitcast through.
22428   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
22429   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
22430   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
22431     SDValue CN0 = N0.getOperand(0);
22432     SDValue CN1 = N1.getOperand(0);
22433     EVT CN0VT = CN0.getValueType();
22434     EVT CN1VT = CN1.getValueType();
22435     if (CN0VT.isVector() && CN1VT.isVector() &&
22436         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
22437         CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
22438       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
22439                                       CN0.getValueType(), CN0, CN1, N2);
22440       return DAG.getBitcast(VT, NewINSERT);
22441     }
22442   }
22443 
22444   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
22445   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
22446   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
22447   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
22448       N0.getOperand(1).getValueType() == N1.getValueType() &&
22449       N0.getOperand(2) == N2)
22450     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
22451                        N1, N2);
22452 
22453   // Eliminate an intermediate insert into an undef vector:
22454   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
22455   // insert_subvector undef, X, N2
22456   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
22457       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
22458     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
22459                        N1.getOperand(1), N2);
22460 
22461   // Push subvector bitcasts to the output, adjusting the index as we go.
22462   // insert_subvector(bitcast(v), bitcast(s), c1)
22463   // -> bitcast(insert_subvector(v, s, c2))
22464   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
22465       N1.getOpcode() == ISD::BITCAST) {
22466     SDValue N0Src = peekThroughBitcasts(N0);
22467     SDValue N1Src = peekThroughBitcasts(N1);
22468     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
22469     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
22470     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
22471         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
22472       EVT NewVT;
22473       SDLoc DL(N);
22474       SDValue NewIdx;
22475       LLVMContext &Ctx = *DAG.getContext();
22476       ElementCount NumElts = VT.getVectorElementCount();
22477       unsigned EltSizeInBits = VT.getScalarSizeInBits();
22478       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
22479         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
22480         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
22481         NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
22482       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
22483         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
22484         if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
22485           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
22486                                    NumElts.divideCoefficientBy(Scale));
22487           NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
22488         }
22489       }
22490       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
22491         SDValue Res = DAG.getBitcast(NewVT, N0Src);
22492         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
22493         return DAG.getBitcast(VT, Res);
22494       }
22495     }
22496   }
22497 
22498   // Canonicalize insert_subvector dag nodes.
22499   // Example:
22500   // (insert_subvector (insert_subvector A, Idx0), Idx1)
22501   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
22502   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
22503       N1.getValueType() == N0.getOperand(1).getValueType()) {
22504     unsigned OtherIdx = N0.getConstantOperandVal(2);
22505     if (InsIdx < OtherIdx) {
22506       // Swap nodes.
22507       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
22508                                   N0.getOperand(0), N1, N2);
22509       AddToWorklist(NewOp.getNode());
22510       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
22511                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
22512     }
22513   }
22514 
22515   // If the input vector is a concatenation, and the insert replaces
22516   // one of the pieces, we can optimize into a single concat_vectors.
22517   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
22518       N0.getOperand(0).getValueType() == N1.getValueType() &&
22519       N0.getOperand(0).getValueType().isScalableVector() ==
22520           N1.getValueType().isScalableVector()) {
22521     unsigned Factor = N1.getValueType().getVectorMinNumElements();
22522     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
22523     Ops[InsIdx / Factor] = N1;
22524     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
22525   }
22526 
22527   // Simplify source operands based on insertion.
22528   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
22529     return SDValue(N, 0);
22530 
22531   return SDValue();
22532 }
22533 
22534 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
22535   SDValue N0 = N->getOperand(0);
22536 
22537   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
22538   if (N0->getOpcode() == ISD::FP16_TO_FP)
22539     return N0->getOperand(0);
22540 
22541   return SDValue();
22542 }
22543 
22544 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
22545   SDValue N0 = N->getOperand(0);
22546 
22547   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
22548   if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
22549     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
22550     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
22551       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
22552                          N0.getOperand(0));
22553     }
22554   }
22555 
22556   return SDValue();
22557 }
22558 
22559 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
22560   SDValue N0 = N->getOperand(0);
22561   EVT VT = N0.getValueType();
22562   unsigned Opcode = N->getOpcode();
22563 
22564   // VECREDUCE over 1-element vector is just an extract.
22565   if (VT.getVectorElementCount().isScalar()) {
22566     SDLoc dl(N);
22567     SDValue Res =
22568         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
22569                     DAG.getVectorIdxConstant(0, dl));
22570     if (Res.getValueType() != N->getValueType(0))
22571       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
22572     return Res;
22573   }
22574 
22575   // On an boolean vector an and/or reduction is the same as a umin/umax
22576   // reduction. Convert them if the latter is legal while the former isn't.
22577   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
22578     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
22579         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
22580     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
22581         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
22582         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
22583       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
22584   }
22585 
22586   return SDValue();
22587 }
22588 
22589 SDValue DAGCombiner::visitVPOp(SDNode *N) {
22590   // VP operations in which all vector elements are disabled - either by
22591   // determining that the mask is all false or that the EVL is 0 - can be
22592   // eliminated.
22593   bool AreAllEltsDisabled = false;
22594   if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
22595     AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
22596   if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
22597     AreAllEltsDisabled |=
22598         ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
22599 
22600   // This is the only generic VP combine we support for now.
22601   if (!AreAllEltsDisabled)
22602     return SDValue();
22603 
22604   // Binary operations can be replaced by UNDEF.
22605   if (ISD::isVPBinaryOp(N->getOpcode()))
22606     return DAG.getUNDEF(N->getValueType(0));
22607 
22608   // VP Memory operations can be replaced by either the chain (stores) or the
22609   // chain + undef (loads).
22610   if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
22611     if (MemSD->writeMem())
22612       return MemSD->getChain();
22613     return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
22614   }
22615 
22616   // Reduction operations return the start operand when no elements are active.
22617   if (ISD::isVPReduction(N->getOpcode()))
22618     return N->getOperand(0);
22619 
22620   return SDValue();
22621 }
22622 
22623 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
22624 /// with the destination vector and a zero vector.
22625 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
22626 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
22627 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
22628   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
22629 
22630   EVT VT = N->getValueType(0);
22631   SDValue LHS = N->getOperand(0);
22632   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
22633   SDLoc DL(N);
22634 
22635   // Make sure we're not running after operation legalization where it
22636   // may have custom lowered the vector shuffles.
22637   if (LegalOperations)
22638     return SDValue();
22639 
22640   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
22641     return SDValue();
22642 
22643   EVT RVT = RHS.getValueType();
22644   unsigned NumElts = RHS.getNumOperands();
22645 
22646   // Attempt to create a valid clear mask, splitting the mask into
22647   // sub elements and checking to see if each is
22648   // all zeros or all ones - suitable for shuffle masking.
22649   auto BuildClearMask = [&](int Split) {
22650     int NumSubElts = NumElts * Split;
22651     int NumSubBits = RVT.getScalarSizeInBits() / Split;
22652 
22653     SmallVector<int, 8> Indices;
22654     for (int i = 0; i != NumSubElts; ++i) {
22655       int EltIdx = i / Split;
22656       int SubIdx = i % Split;
22657       SDValue Elt = RHS.getOperand(EltIdx);
22658       // X & undef --> 0 (not undef). So this lane must be converted to choose
22659       // from the zero constant vector (same as if the element had all 0-bits).
22660       if (Elt.isUndef()) {
22661         Indices.push_back(i + NumSubElts);
22662         continue;
22663       }
22664 
22665       APInt Bits;
22666       if (isa<ConstantSDNode>(Elt))
22667         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
22668       else if (isa<ConstantFPSDNode>(Elt))
22669         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
22670       else
22671         return SDValue();
22672 
22673       // Extract the sub element from the constant bit mask.
22674       if (DAG.getDataLayout().isBigEndian())
22675         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
22676       else
22677         Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
22678 
22679       if (Bits.isAllOnes())
22680         Indices.push_back(i);
22681       else if (Bits == 0)
22682         Indices.push_back(i + NumSubElts);
22683       else
22684         return SDValue();
22685     }
22686 
22687     // Let's see if the target supports this vector_shuffle.
22688     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
22689     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
22690     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
22691       return SDValue();
22692 
22693     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
22694     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
22695                                                    DAG.getBitcast(ClearVT, LHS),
22696                                                    Zero, Indices));
22697   };
22698 
22699   // Determine maximum split level (byte level masking).
22700   int MaxSplit = 1;
22701   if (RVT.getScalarSizeInBits() % 8 == 0)
22702     MaxSplit = RVT.getScalarSizeInBits() / 8;
22703 
22704   for (int Split = 1; Split <= MaxSplit; ++Split)
22705     if (RVT.getScalarSizeInBits() % Split == 0)
22706       if (SDValue S = BuildClearMask(Split))
22707         return S;
22708 
22709   return SDValue();
22710 }
22711 
22712 /// If a vector binop is performed on splat values, it may be profitable to
22713 /// extract, scalarize, and insert/splat.
22714 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
22715                                       const SDLoc &DL) {
22716   SDValue N0 = N->getOperand(0);
22717   SDValue N1 = N->getOperand(1);
22718   unsigned Opcode = N->getOpcode();
22719   EVT VT = N->getValueType(0);
22720   EVT EltVT = VT.getVectorElementType();
22721   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22722 
22723   // TODO: Remove/replace the extract cost check? If the elements are available
22724   //       as scalars, then there may be no extract cost. Should we ask if
22725   //       inserting a scalar back into a vector is cheap instead?
22726   int Index0, Index1;
22727   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
22728   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
22729   if (!Src0 || !Src1 || Index0 != Index1 ||
22730       Src0.getValueType().getVectorElementType() != EltVT ||
22731       Src1.getValueType().getVectorElementType() != EltVT ||
22732       !TLI.isExtractVecEltCheap(VT, Index0) ||
22733       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
22734     return SDValue();
22735 
22736   SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
22737   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
22738   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
22739   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
22740 
22741   // If all lanes but 1 are undefined, no need to splat the scalar result.
22742   // TODO: Keep track of undefs and use that info in the general case.
22743   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
22744       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
22745       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
22746     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
22747     // build_vec ..undef, (bo X, Y), undef...
22748     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
22749     Ops[Index0] = ScalarBO;
22750     return DAG.getBuildVector(VT, DL, Ops);
22751   }
22752 
22753   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
22754   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
22755   return DAG.getBuildVector(VT, DL, Ops);
22756 }
22757 
22758 /// Visit a binary vector operation, like ADD.
22759 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
22760   EVT VT = N->getValueType(0);
22761   assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
22762 
22763   SDValue LHS = N->getOperand(0);
22764   SDValue RHS = N->getOperand(1);
22765   unsigned Opcode = N->getOpcode();
22766   SDNodeFlags Flags = N->getFlags();
22767 
22768   // Move unary shuffles with identical masks after a vector binop:
22769   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
22770   //   --> shuffle (VBinOp A, B), Undef, Mask
22771   // This does not require type legality checks because we are creating the
22772   // same types of operations that are in the original sequence. We do have to
22773   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
22774   // though. This code is adapted from the identical transform in instcombine.
22775   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
22776       Opcode != ISD::UREM && Opcode != ISD::SREM &&
22777       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
22778     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
22779     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
22780     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
22781         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
22782         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
22783       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
22784                                      RHS.getOperand(0), Flags);
22785       SDValue UndefV = LHS.getOperand(1);
22786       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
22787     }
22788 
22789     // Try to sink a splat shuffle after a binop with a uniform constant.
22790     // This is limited to cases where neither the shuffle nor the constant have
22791     // undefined elements because that could be poison-unsafe or inhibit
22792     // demanded elements analysis. It is further limited to not change a splat
22793     // of an inserted scalar because that may be optimized better by
22794     // load-folding or other target-specific behaviors.
22795     if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
22796         Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
22797         Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22798       // binop (splat X), (splat C) --> splat (binop X, C)
22799       SDValue X = Shuf0->getOperand(0);
22800       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
22801       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22802                                   Shuf0->getMask());
22803     }
22804     if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
22805         Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
22806         Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22807       // binop (splat C), (splat X) --> splat (binop C, X)
22808       SDValue X = Shuf1->getOperand(0);
22809       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
22810       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22811                                   Shuf1->getMask());
22812     }
22813   }
22814 
22815   // The following pattern is likely to emerge with vector reduction ops. Moving
22816   // the binary operation ahead of insertion may allow using a narrower vector
22817   // instruction that has better performance than the wide version of the op:
22818   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
22819   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
22820       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
22821       LHS.getOperand(2) == RHS.getOperand(2) &&
22822       (LHS.hasOneUse() || RHS.hasOneUse())) {
22823     SDValue X = LHS.getOperand(1);
22824     SDValue Y = RHS.getOperand(1);
22825     SDValue Z = LHS.getOperand(2);
22826     EVT NarrowVT = X.getValueType();
22827     if (NarrowVT == Y.getValueType() &&
22828         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
22829                                               LegalOperations)) {
22830       // (binop undef, undef) may not return undef, so compute that result.
22831       SDValue VecC =
22832           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
22833       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
22834       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
22835     }
22836   }
22837 
22838   // Make sure all but the first op are undef or constant.
22839   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
22840     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
22841            all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
22842              return Op.isUndef() ||
22843                     ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
22844            });
22845   };
22846 
22847   // The following pattern is likely to emerge with vector reduction ops. Moving
22848   // the binary operation ahead of the concat may allow using a narrower vector
22849   // instruction that has better performance than the wide version of the op:
22850   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
22851   //   concat (VBinOp X, Y), VecC
22852   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
22853       (LHS.hasOneUse() || RHS.hasOneUse())) {
22854     EVT NarrowVT = LHS.getOperand(0).getValueType();
22855     if (NarrowVT == RHS.getOperand(0).getValueType() &&
22856         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
22857       unsigned NumOperands = LHS.getNumOperands();
22858       SmallVector<SDValue, 4> ConcatOps;
22859       for (unsigned i = 0; i != NumOperands; ++i) {
22860         // This constant fold for operands 1 and up.
22861         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
22862                                         RHS.getOperand(i)));
22863       }
22864 
22865       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22866     }
22867   }
22868 
22869   if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
22870     return V;
22871 
22872   return SDValue();
22873 }
22874 
22875 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
22876                                     SDValue N2) {
22877   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
22878 
22879   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
22880                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
22881 
22882   // If we got a simplified select_cc node back from SimplifySelectCC, then
22883   // break it down into a new SETCC node, and a new SELECT node, and then return
22884   // the SELECT node, since we were called with a SELECT node.
22885   if (SCC.getNode()) {
22886     // Check to see if we got a select_cc back (to turn into setcc/select).
22887     // Otherwise, just return whatever node we got back, like fabs.
22888     if (SCC.getOpcode() == ISD::SELECT_CC) {
22889       const SDNodeFlags Flags = N0.getNode()->getFlags();
22890       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
22891                                   N0.getValueType(),
22892                                   SCC.getOperand(0), SCC.getOperand(1),
22893                                   SCC.getOperand(4), Flags);
22894       AddToWorklist(SETCC.getNode());
22895       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
22896                                          SCC.getOperand(2), SCC.getOperand(3));
22897       SelectNode->setFlags(Flags);
22898       return SelectNode;
22899     }
22900 
22901     return SCC;
22902   }
22903   return SDValue();
22904 }
22905 
22906 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
22907 /// being selected between, see if we can simplify the select.  Callers of this
22908 /// should assume that TheSelect is deleted if this returns true.  As such, they
22909 /// should return the appropriate thing (e.g. the node) back to the top-level of
22910 /// the DAG combiner loop to avoid it being looked at.
22911 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
22912                                     SDValue RHS) {
22913   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22914   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
22915   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
22916     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
22917       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
22918       SDValue Sqrt = RHS;
22919       ISD::CondCode CC;
22920       SDValue CmpLHS;
22921       const ConstantFPSDNode *Zero = nullptr;
22922 
22923       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
22924         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
22925         CmpLHS = TheSelect->getOperand(0);
22926         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
22927       } else {
22928         // SELECT or VSELECT
22929         SDValue Cmp = TheSelect->getOperand(0);
22930         if (Cmp.getOpcode() == ISD::SETCC) {
22931           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
22932           CmpLHS = Cmp.getOperand(0);
22933           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
22934         }
22935       }
22936       if (Zero && Zero->isZero() &&
22937           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
22938           CC == ISD::SETULT || CC == ISD::SETLT)) {
22939         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22940         CombineTo(TheSelect, Sqrt);
22941         return true;
22942       }
22943     }
22944   }
22945   // Cannot simplify select with vector condition
22946   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
22947 
22948   // If this is a select from two identical things, try to pull the operation
22949   // through the select.
22950   if (LHS.getOpcode() != RHS.getOpcode() ||
22951       !LHS.hasOneUse() || !RHS.hasOneUse())
22952     return false;
22953 
22954   // If this is a load and the token chain is identical, replace the select
22955   // of two loads with a load through a select of the address to load from.
22956   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
22957   // constants have been dropped into the constant pool.
22958   if (LHS.getOpcode() == ISD::LOAD) {
22959     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
22960     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
22961 
22962     // Token chains must be identical.
22963     if (LHS.getOperand(0) != RHS.getOperand(0) ||
22964         // Do not let this transformation reduce the number of volatile loads.
22965         // Be conservative for atomics for the moment
22966         // TODO: This does appear to be legal for unordered atomics (see D66309)
22967         !LLD->isSimple() || !RLD->isSimple() ||
22968         // FIXME: If either is a pre/post inc/dec load,
22969         // we'd need to split out the address adjustment.
22970         LLD->isIndexed() || RLD->isIndexed() ||
22971         // If this is an EXTLOAD, the VT's must match.
22972         LLD->getMemoryVT() != RLD->getMemoryVT() ||
22973         // If this is an EXTLOAD, the kind of extension must match.
22974         (LLD->getExtensionType() != RLD->getExtensionType() &&
22975          // The only exception is if one of the extensions is anyext.
22976          LLD->getExtensionType() != ISD::EXTLOAD &&
22977          RLD->getExtensionType() != ISD::EXTLOAD) ||
22978         // FIXME: this discards src value information.  This is
22979         // over-conservative. It would be beneficial to be able to remember
22980         // both potential memory locations.  Since we are discarding
22981         // src value info, don't do the transformation if the memory
22982         // locations are not in the default address space.
22983         LLD->getPointerInfo().getAddrSpace() != 0 ||
22984         RLD->getPointerInfo().getAddrSpace() != 0 ||
22985         // We can't produce a CMOV of a TargetFrameIndex since we won't
22986         // generate the address generation required.
22987         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22988         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22989         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
22990                                       LLD->getBasePtr().getValueType()))
22991       return false;
22992 
22993     // The loads must not depend on one another.
22994     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
22995       return false;
22996 
22997     // Check that the select condition doesn't reach either load.  If so,
22998     // folding this will induce a cycle into the DAG.  If not, this is safe to
22999     // xform, so create a select of the addresses.
23000 
23001     SmallPtrSet<const SDNode *, 32> Visited;
23002     SmallVector<const SDNode *, 16> Worklist;
23003 
23004     // Always fail if LLD and RLD are not independent. TheSelect is a
23005     // predecessor to all Nodes in question so we need not search past it.
23006 
23007     Visited.insert(TheSelect);
23008     Worklist.push_back(LLD);
23009     Worklist.push_back(RLD);
23010 
23011     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
23012         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
23013       return false;
23014 
23015     SDValue Addr;
23016     if (TheSelect->getOpcode() == ISD::SELECT) {
23017       // We cannot do this optimization if any pair of {RLD, LLD} is a
23018       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
23019       // Loads, we only need to check if CondNode is a successor to one of the
23020       // loads. We can further avoid this if there's no use of their chain
23021       // value.
23022       SDNode *CondNode = TheSelect->getOperand(0).getNode();
23023       Worklist.push_back(CondNode);
23024 
23025       if ((LLD->hasAnyUseOfValue(1) &&
23026            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
23027           (RLD->hasAnyUseOfValue(1) &&
23028            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
23029         return false;
23030 
23031       Addr = DAG.getSelect(SDLoc(TheSelect),
23032                            LLD->getBasePtr().getValueType(),
23033                            TheSelect->getOperand(0), LLD->getBasePtr(),
23034                            RLD->getBasePtr());
23035     } else {  // Otherwise SELECT_CC
23036       // We cannot do this optimization if any pair of {RLD, LLD} is a
23037       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
23038       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
23039       // one of the loads. We can further avoid this if there's no use of their
23040       // chain value.
23041 
23042       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
23043       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
23044       Worklist.push_back(CondLHS);
23045       Worklist.push_back(CondRHS);
23046 
23047       if ((LLD->hasAnyUseOfValue(1) &&
23048            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
23049           (RLD->hasAnyUseOfValue(1) &&
23050            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
23051         return false;
23052 
23053       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
23054                          LLD->getBasePtr().getValueType(),
23055                          TheSelect->getOperand(0),
23056                          TheSelect->getOperand(1),
23057                          LLD->getBasePtr(), RLD->getBasePtr(),
23058                          TheSelect->getOperand(4));
23059     }
23060 
23061     SDValue Load;
23062     // It is safe to replace the two loads if they have different alignments,
23063     // but the new load must be the minimum (most restrictive) alignment of the
23064     // inputs.
23065     Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
23066     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
23067     if (!RLD->isInvariant())
23068       MMOFlags &= ~MachineMemOperand::MOInvariant;
23069     if (!RLD->isDereferenceable())
23070       MMOFlags &= ~MachineMemOperand::MODereferenceable;
23071     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
23072       // FIXME: Discards pointer and AA info.
23073       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
23074                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
23075                          MMOFlags);
23076     } else {
23077       // FIXME: Discards pointer and AA info.
23078       Load = DAG.getExtLoad(
23079           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
23080                                                   : LLD->getExtensionType(),
23081           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
23082           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
23083     }
23084 
23085     // Users of the select now use the result of the load.
23086     CombineTo(TheSelect, Load);
23087 
23088     // Users of the old loads now use the new load's chain.  We know the
23089     // old-load value is dead now.
23090     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
23091     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
23092     return true;
23093   }
23094 
23095   return false;
23096 }
23097 
23098 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
23099 /// bitwise 'and'.
23100 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
23101                                             SDValue N1, SDValue N2, SDValue N3,
23102                                             ISD::CondCode CC) {
23103   // If this is a select where the false operand is zero and the compare is a
23104   // check of the sign bit, see if we can perform the "gzip trick":
23105   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
23106   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
23107   EVT XType = N0.getValueType();
23108   EVT AType = N2.getValueType();
23109   if (!isNullConstant(N3) || !XType.bitsGE(AType))
23110     return SDValue();
23111 
23112   // If the comparison is testing for a positive value, we have to invert
23113   // the sign bit mask, so only do that transform if the target has a bitwise
23114   // 'and not' instruction (the invert is free).
23115   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
23116     // (X > -1) ? A : 0
23117     // (X >  0) ? X : 0 <-- This is canonical signed max.
23118     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
23119       return SDValue();
23120   } else if (CC == ISD::SETLT) {
23121     // (X <  0) ? A : 0
23122     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
23123     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
23124       return SDValue();
23125   } else {
23126     return SDValue();
23127   }
23128 
23129   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
23130   // constant.
23131   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
23132   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
23133   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
23134     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
23135     if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
23136       SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
23137       SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
23138       AddToWorklist(Shift.getNode());
23139 
23140       if (XType.bitsGT(AType)) {
23141         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
23142         AddToWorklist(Shift.getNode());
23143       }
23144 
23145       if (CC == ISD::SETGT)
23146         Shift = DAG.getNOT(DL, Shift, AType);
23147 
23148       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
23149     }
23150   }
23151 
23152   unsigned ShCt = XType.getSizeInBits() - 1;
23153   if (TLI.shouldAvoidTransformToShift(XType, ShCt))
23154     return SDValue();
23155 
23156   SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
23157   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
23158   AddToWorklist(Shift.getNode());
23159 
23160   if (XType.bitsGT(AType)) {
23161     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
23162     AddToWorklist(Shift.getNode());
23163   }
23164 
23165   if (CC == ISD::SETGT)
23166     Shift = DAG.getNOT(DL, Shift, AType);
23167 
23168   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
23169 }
23170 
23171 // Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
23172 SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
23173   SDValue N0 = N->getOperand(0);
23174   SDValue N1 = N->getOperand(1);
23175   SDValue N2 = N->getOperand(2);
23176   EVT VT = N->getValueType(0);
23177   SDLoc DL(N);
23178 
23179   unsigned BinOpc = N1.getOpcode();
23180   if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
23181     return SDValue();
23182 
23183   // The use checks are intentionally on SDNode because we may be dealing
23184   // with opcodes that produce more than one SDValue.
23185   // TODO: Do we really need to check N0 (the condition operand of the select)?
23186   //       But removing that clause could cause an infinite loop...
23187   if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
23188     return SDValue();
23189 
23190   // Binops may include opcodes that return multiple values, so all values
23191   // must be created/propagated from the newly created binops below.
23192   SDVTList OpVTs = N1->getVTList();
23193 
23194   // Fold select(cond, binop(x, y), binop(z, y))
23195   //  --> binop(select(cond, x, z), y)
23196   if (N1.getOperand(1) == N2.getOperand(1)) {
23197     SDValue NewSel =
23198         DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
23199     SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
23200     NewBinOp->setFlags(N1->getFlags());
23201     NewBinOp->intersectFlagsWith(N2->getFlags());
23202     return NewBinOp;
23203   }
23204 
23205   // Fold select(cond, binop(x, y), binop(x, z))
23206   //  --> binop(x, select(cond, y, z))
23207   // Second op VT might be different (e.g. shift amount type)
23208   if (N1.getOperand(0) == N2.getOperand(0) &&
23209       VT == N1.getOperand(1).getValueType() &&
23210       VT == N2.getOperand(1).getValueType()) {
23211     SDValue NewSel =
23212         DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
23213     SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
23214     NewBinOp->setFlags(N1->getFlags());
23215     NewBinOp->intersectFlagsWith(N2->getFlags());
23216     return NewBinOp;
23217   }
23218 
23219   // TODO: Handle isCommutativeBinOp patterns as well?
23220   return SDValue();
23221 }
23222 
23223 // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
23224 SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
23225   SDValue N0 = N->getOperand(0);
23226   EVT VT = N->getValueType(0);
23227   bool IsFabs = N->getOpcode() == ISD::FABS;
23228   bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
23229 
23230   if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
23231     return SDValue();
23232 
23233   SDValue Int = N0.getOperand(0);
23234   EVT IntVT = Int.getValueType();
23235 
23236   // The operand to cast should be integer.
23237   if (!IntVT.isInteger() || IntVT.isVector())
23238     return SDValue();
23239 
23240   // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
23241   // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
23242   APInt SignMask;
23243   if (N0.getValueType().isVector()) {
23244     // For vector, create a sign mask (0x80...) or its inverse (for fabs,
23245     // 0x7f...) per element and splat it.
23246     SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
23247     if (IsFabs)
23248       SignMask = ~SignMask;
23249     SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
23250   } else {
23251     // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
23252     SignMask = APInt::getSignMask(IntVT.getSizeInBits());
23253     if (IsFabs)
23254       SignMask = ~SignMask;
23255   }
23256   SDLoc DL(N0);
23257   Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
23258                     DAG.getConstant(SignMask, DL, IntVT));
23259   AddToWorklist(Int.getNode());
23260   return DAG.getBitcast(VT, Int);
23261 }
23262 
23263 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
23264 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
23265 /// in it. This may be a win when the constant is not otherwise available
23266 /// because it replaces two constant pool loads with one.
23267 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
23268     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
23269     ISD::CondCode CC) {
23270   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
23271     return SDValue();
23272 
23273   // If we are before legalize types, we want the other legalization to happen
23274   // first (for example, to avoid messing with soft float).
23275   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
23276   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
23277   EVT VT = N2.getValueType();
23278   if (!TV || !FV || !TLI.isTypeLegal(VT))
23279     return SDValue();
23280 
23281   // If a constant can be materialized without loads, this does not make sense.
23282   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
23283       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
23284       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
23285     return SDValue();
23286 
23287   // If both constants have multiple uses, then we won't need to do an extra
23288   // load. The values are likely around in registers for other users.
23289   if (!TV->hasOneUse() && !FV->hasOneUse())
23290     return SDValue();
23291 
23292   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
23293                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
23294   Type *FPTy = Elts[0]->getType();
23295   const DataLayout &TD = DAG.getDataLayout();
23296 
23297   // Create a ConstantArray of the two constants.
23298   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
23299   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
23300                                       TD.getPrefTypeAlign(FPTy));
23301   Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
23302 
23303   // Get offsets to the 0 and 1 elements of the array, so we can select between
23304   // them.
23305   SDValue Zero = DAG.getIntPtrConstant(0, DL);
23306   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
23307   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
23308   SDValue Cond =
23309       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
23310   AddToWorklist(Cond.getNode());
23311   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
23312   AddToWorklist(CstOffset.getNode());
23313   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
23314   AddToWorklist(CPIdx.getNode());
23315   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
23316                      MachinePointerInfo::getConstantPool(
23317                          DAG.getMachineFunction()), Alignment);
23318 }
23319 
23320 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
23321 /// where 'cond' is the comparison specified by CC.
23322 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
23323                                       SDValue N2, SDValue N3, ISD::CondCode CC,
23324                                       bool NotExtCompare) {
23325   // (x ? y : y) -> y.
23326   if (N2 == N3) return N2;
23327 
23328   EVT CmpOpVT = N0.getValueType();
23329   EVT CmpResVT = getSetCCResultType(CmpOpVT);
23330   EVT VT = N2.getValueType();
23331   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
23332   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
23333   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
23334 
23335   // Determine if the condition we're dealing with is constant.
23336   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
23337     AddToWorklist(SCC.getNode());
23338     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
23339       // fold select_cc true, x, y -> x
23340       // fold select_cc false, x, y -> y
23341       return !(SCCC->isZero()) ? N2 : N3;
23342     }
23343   }
23344 
23345   if (SDValue V =
23346           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
23347     return V;
23348 
23349   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
23350     return V;
23351 
23352   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
23353   // where y is has a single bit set.
23354   // A plaintext description would be, we can turn the SELECT_CC into an AND
23355   // when the condition can be materialized as an all-ones register.  Any
23356   // single bit-test can be materialized as an all-ones register with
23357   // shift-left and shift-right-arith.
23358   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
23359       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
23360     SDValue AndLHS = N0->getOperand(0);
23361     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
23362     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
23363       // Shift the tested bit over the sign bit.
23364       const APInt &AndMask = ConstAndRHS->getAPIntValue();
23365       unsigned ShCt = AndMask.getBitWidth() - 1;
23366       if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
23367         SDValue ShlAmt =
23368           DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
23369                           getShiftAmountTy(AndLHS.getValueType()));
23370         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
23371 
23372         // Now arithmetic right shift it all the way over, so the result is
23373         // either all-ones, or zero.
23374         SDValue ShrAmt =
23375           DAG.getConstant(ShCt, SDLoc(Shl),
23376                           getShiftAmountTy(Shl.getValueType()));
23377         SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
23378 
23379         return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
23380       }
23381     }
23382   }
23383 
23384   // fold select C, 16, 0 -> shl C, 4
23385   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
23386   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
23387 
23388   if ((Fold || Swap) &&
23389       TLI.getBooleanContents(CmpOpVT) ==
23390           TargetLowering::ZeroOrOneBooleanContent &&
23391       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
23392 
23393     if (Swap) {
23394       CC = ISD::getSetCCInverse(CC, CmpOpVT);
23395       std::swap(N2C, N3C);
23396     }
23397 
23398     // If the caller doesn't want us to simplify this into a zext of a compare,
23399     // don't do it.
23400     if (NotExtCompare && N2C->isOne())
23401       return SDValue();
23402 
23403     SDValue Temp, SCC;
23404     // zext (setcc n0, n1)
23405     if (LegalTypes) {
23406       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
23407       if (VT.bitsLT(SCC.getValueType()))
23408         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
23409       else
23410         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
23411     } else {
23412       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
23413       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
23414     }
23415 
23416     AddToWorklist(SCC.getNode());
23417     AddToWorklist(Temp.getNode());
23418 
23419     if (N2C->isOne())
23420       return Temp;
23421 
23422     unsigned ShCt = N2C->getAPIntValue().logBase2();
23423     if (TLI.shouldAvoidTransformToShift(VT, ShCt))
23424       return SDValue();
23425 
23426     // shl setcc result by log2 n2c
23427     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
23428                        DAG.getConstant(ShCt, SDLoc(Temp),
23429                                        getShiftAmountTy(Temp.getValueType())));
23430   }
23431 
23432   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
23433   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
23434   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
23435   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
23436   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
23437   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
23438   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
23439   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
23440   if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
23441     SDValue ValueOnZero = N2;
23442     SDValue Count = N3;
23443     // If the condition is NE instead of E, swap the operands.
23444     if (CC == ISD::SETNE)
23445       std::swap(ValueOnZero, Count);
23446     // Check if the value on zero is a constant equal to the bits in the type.
23447     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
23448       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
23449         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
23450         // legal, combine to just cttz.
23451         if ((Count.getOpcode() == ISD::CTTZ ||
23452              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
23453             N0 == Count.getOperand(0) &&
23454             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
23455           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
23456         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
23457         // legal, combine to just ctlz.
23458         if ((Count.getOpcode() == ISD::CTLZ ||
23459              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
23460             N0 == Count.getOperand(0) &&
23461             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
23462           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
23463       }
23464     }
23465   }
23466 
23467   // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
23468   // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
23469   if (!NotExtCompare && N1C && N2C && N3C &&
23470       N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
23471       ((N1C->isAllOnes() && CC == ISD::SETGT) ||
23472        (N1C->isZero() && CC == ISD::SETLT)) &&
23473       !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
23474     SDValue ASR = DAG.getNode(
23475         ISD::SRA, DL, CmpOpVT, N0,
23476         DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
23477     return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
23478                        DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
23479   }
23480 
23481   if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
23482     return S;
23483   if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
23484     return S;
23485 
23486   return SDValue();
23487 }
23488 
23489 /// This is a stub for TargetLowering::SimplifySetCC.
23490 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
23491                                    ISD::CondCode Cond, const SDLoc &DL,
23492                                    bool foldBooleans) {
23493   TargetLowering::DAGCombinerInfo
23494     DagCombineInfo(DAG, Level, false, this);
23495   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
23496 }
23497 
23498 /// Given an ISD::SDIV node expressing a divide by constant, return
23499 /// a DAG expression to select that will generate the same value by multiplying
23500 /// by a magic number.
23501 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
23502 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
23503   // when optimising for minimum size, we don't want to expand a div to a mul
23504   // and a shift.
23505   if (DAG.getMachineFunction().getFunction().hasMinSize())
23506     return SDValue();
23507 
23508   SmallVector<SDNode *, 8> Built;
23509   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
23510     for (SDNode *N : Built)
23511       AddToWorklist(N);
23512     return S;
23513   }
23514 
23515   return SDValue();
23516 }
23517 
23518 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
23519 /// DAG expression that will generate the same value by right shifting.
23520 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
23521   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
23522   if (!C)
23523     return SDValue();
23524 
23525   // Avoid division by zero.
23526   if (C->isZero())
23527     return SDValue();
23528 
23529   SmallVector<SDNode *, 8> Built;
23530   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
23531     for (SDNode *N : Built)
23532       AddToWorklist(N);
23533     return S;
23534   }
23535 
23536   return SDValue();
23537 }
23538 
23539 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
23540 /// expression that will generate the same value by multiplying by a magic
23541 /// number.
23542 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
23543 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
23544   // when optimising for minimum size, we don't want to expand a div to a mul
23545   // and a shift.
23546   if (DAG.getMachineFunction().getFunction().hasMinSize())
23547     return SDValue();
23548 
23549   SmallVector<SDNode *, 8> Built;
23550   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
23551     for (SDNode *N : Built)
23552       AddToWorklist(N);
23553     return S;
23554   }
23555 
23556   return SDValue();
23557 }
23558 
23559 /// Determines the LogBase2 value for a non-null input value using the
23560 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
23561 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
23562   EVT VT = V.getValueType();
23563   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
23564   SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
23565   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
23566   return LogBase2;
23567 }
23568 
23569 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23570 /// For the reciprocal, we need to find the zero of the function:
23571 ///   F(X) = 1/X - A [which has a zero at X = 1/A]
23572 ///     =>
23573 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
23574 ///     does not require additional intermediate precision]
23575 /// For the last iteration, put numerator N into it to gain more precision:
23576 ///   Result = N X_i + X_i (N - N A X_i)
23577 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
23578                                       SDNodeFlags Flags) {
23579   if (LegalDAG)
23580     return SDValue();
23581 
23582   // TODO: Handle extended types?
23583   EVT VT = Op.getValueType();
23584   if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
23585       VT.getScalarType() != MVT::f64)
23586     return SDValue();
23587 
23588   // If estimates are explicitly disabled for this function, we're done.
23589   MachineFunction &MF = DAG.getMachineFunction();
23590   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
23591   if (Enabled == TLI.ReciprocalEstimate::Disabled)
23592     return SDValue();
23593 
23594   // Estimates may be explicitly enabled for this type with a custom number of
23595   // refinement steps.
23596   int Iterations = TLI.getDivRefinementSteps(VT, MF);
23597   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
23598     AddToWorklist(Est.getNode());
23599 
23600     SDLoc DL(Op);
23601     if (Iterations) {
23602       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
23603 
23604       // Newton iterations: Est = Est + Est (N - Arg * Est)
23605       // If this is the last iteration, also multiply by the numerator.
23606       for (int i = 0; i < Iterations; ++i) {
23607         SDValue MulEst = Est;
23608 
23609         if (i == Iterations - 1) {
23610           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
23611           AddToWorklist(MulEst.getNode());
23612         }
23613 
23614         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
23615         AddToWorklist(NewEst.getNode());
23616 
23617         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
23618                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
23619         AddToWorklist(NewEst.getNode());
23620 
23621         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23622         AddToWorklist(NewEst.getNode());
23623 
23624         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
23625         AddToWorklist(Est.getNode());
23626       }
23627     } else {
23628       // If no iterations are available, multiply with N.
23629       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
23630       AddToWorklist(Est.getNode());
23631     }
23632 
23633     return Est;
23634   }
23635 
23636   return SDValue();
23637 }
23638 
23639 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23640 /// For the reciprocal sqrt, we need to find the zero of the function:
23641 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23642 ///     =>
23643 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
23644 /// As a result, we precompute A/2 prior to the iteration loop.
23645 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
23646                                          unsigned Iterations,
23647                                          SDNodeFlags Flags, bool Reciprocal) {
23648   EVT VT = Arg.getValueType();
23649   SDLoc DL(Arg);
23650   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
23651 
23652   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
23653   // this entire sequence requires only one FP constant.
23654   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
23655   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
23656 
23657   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
23658   for (unsigned i = 0; i < Iterations; ++i) {
23659     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
23660     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
23661     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
23662     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23663   }
23664 
23665   // If non-reciprocal square root is requested, multiply the result by Arg.
23666   if (!Reciprocal)
23667     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
23668 
23669   return Est;
23670 }
23671 
23672 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23673 /// For the reciprocal sqrt, we need to find the zero of the function:
23674 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23675 ///     =>
23676 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
23677 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
23678                                          unsigned Iterations,
23679                                          SDNodeFlags Flags, bool Reciprocal) {
23680   EVT VT = Arg.getValueType();
23681   SDLoc DL(Arg);
23682   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
23683   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
23684 
23685   // This routine must enter the loop below to work correctly
23686   // when (Reciprocal == false).
23687   assert(Iterations > 0);
23688 
23689   // Newton iterations for reciprocal square root:
23690   // E = (E * -0.5) * ((A * E) * E + -3.0)
23691   for (unsigned i = 0; i < Iterations; ++i) {
23692     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
23693     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
23694     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
23695 
23696     // When calculating a square root at the last iteration build:
23697     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
23698     // (notice a common subexpression)
23699     SDValue LHS;
23700     if (Reciprocal || (i + 1) < Iterations) {
23701       // RSQRT: LHS = (E * -0.5)
23702       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
23703     } else {
23704       // SQRT: LHS = (A * E) * -0.5
23705       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
23706     }
23707 
23708     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
23709   }
23710 
23711   return Est;
23712 }
23713 
23714 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
23715 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
23716 /// Op can be zero.
23717 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
23718                                            bool Reciprocal) {
23719   if (LegalDAG)
23720     return SDValue();
23721 
23722   // TODO: Handle extended types?
23723   EVT VT = Op.getValueType();
23724   if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
23725       VT.getScalarType() != MVT::f64)
23726     return SDValue();
23727 
23728   // If estimates are explicitly disabled for this function, we're done.
23729   MachineFunction &MF = DAG.getMachineFunction();
23730   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
23731   if (Enabled == TLI.ReciprocalEstimate::Disabled)
23732     return SDValue();
23733 
23734   // Estimates may be explicitly enabled for this type with a custom number of
23735   // refinement steps.
23736   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
23737 
23738   bool UseOneConstNR = false;
23739   if (SDValue Est =
23740       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
23741                           Reciprocal)) {
23742     AddToWorklist(Est.getNode());
23743 
23744     if (Iterations)
23745       Est = UseOneConstNR
23746             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
23747             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
23748     if (!Reciprocal) {
23749       SDLoc DL(Op);
23750       // Try the target specific test first.
23751       SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
23752 
23753       // The estimate is now completely wrong if the input was exactly 0.0 or
23754       // possibly a denormal. Force the answer to 0.0 or value provided by
23755       // target for those cases.
23756       Est = DAG.getNode(
23757           Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
23758           Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
23759     }
23760     return Est;
23761   }
23762 
23763   return SDValue();
23764 }
23765 
23766 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
23767   return buildSqrtEstimateImpl(Op, Flags, true);
23768 }
23769 
23770 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
23771   return buildSqrtEstimateImpl(Op, Flags, false);
23772 }
23773 
23774 /// Return true if there is any possibility that the two addresses overlap.
23775 bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
23776 
23777   struct MemUseCharacteristics {
23778     bool IsVolatile;
23779     bool IsAtomic;
23780     SDValue BasePtr;
23781     int64_t Offset;
23782     Optional<int64_t> NumBytes;
23783     MachineMemOperand *MMO;
23784   };
23785 
23786   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
23787     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
23788       int64_t Offset = 0;
23789       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
23790         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
23791                      ? C->getSExtValue()
23792                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
23793                            ? -1 * C->getSExtValue()
23794                            : 0;
23795       uint64_t Size =
23796           MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
23797       return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
23798               Offset /*base offset*/,
23799               Optional<int64_t>(Size),
23800               LSN->getMemOperand()};
23801     }
23802     if (const auto *LN = cast<LifetimeSDNode>(N))
23803       return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
23804               (LN->hasOffset()) ? LN->getOffset() : 0,
23805               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
23806                                 : Optional<int64_t>(),
23807               (MachineMemOperand *)nullptr};
23808     // Default.
23809     return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
23810             (int64_t)0 /*offset*/,
23811             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
23812   };
23813 
23814   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
23815                         MUC1 = getCharacteristics(Op1);
23816 
23817   // If they are to the same address, then they must be aliases.
23818   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
23819       MUC0.Offset == MUC1.Offset)
23820     return true;
23821 
23822   // If they are both volatile then they cannot be reordered.
23823   if (MUC0.IsVolatile && MUC1.IsVolatile)
23824     return true;
23825 
23826   // Be conservative about atomics for the moment
23827   // TODO: This is way overconservative for unordered atomics (see D66309)
23828   if (MUC0.IsAtomic && MUC1.IsAtomic)
23829     return true;
23830 
23831   if (MUC0.MMO && MUC1.MMO) {
23832     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23833         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23834       return false;
23835   }
23836 
23837   // Try to prove that there is aliasing, or that there is no aliasing. Either
23838   // way, we can return now. If nothing can be proved, proceed with more tests.
23839   bool IsAlias;
23840   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
23841                                        DAG, IsAlias))
23842     return IsAlias;
23843 
23844   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
23845   // either are not known.
23846   if (!MUC0.MMO || !MUC1.MMO)
23847     return true;
23848 
23849   // If one operation reads from invariant memory, and the other may store, they
23850   // cannot alias. These should really be checking the equivalent of mayWrite,
23851   // but it only matters for memory nodes other than load /store.
23852   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23853       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23854     return false;
23855 
23856   // If we know required SrcValue1 and SrcValue2 have relatively large
23857   // alignment compared to the size and offset of the access, we may be able
23858   // to prove they do not alias. This check is conservative for now to catch
23859   // cases created by splitting vector types, it only works when the offsets are
23860   // multiples of the size of the data.
23861   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
23862   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
23863   Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
23864   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
23865   auto &Size0 = MUC0.NumBytes;
23866   auto &Size1 = MUC1.NumBytes;
23867   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
23868       Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
23869       OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
23870       SrcValOffset1 % *Size1 == 0) {
23871     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
23872     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
23873 
23874     // There is no overlap between these relatively aligned accesses of
23875     // similar size. Return no alias.
23876     if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
23877       return false;
23878   }
23879 
23880   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
23881                    ? CombinerGlobalAA
23882                    : DAG.getSubtarget().useAA();
23883 #ifndef NDEBUG
23884   if (CombinerAAOnlyFunc.getNumOccurrences() &&
23885       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
23886     UseAA = false;
23887 #endif
23888 
23889   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
23890       Size0.hasValue() && Size1.hasValue()) {
23891     // Use alias analysis information.
23892     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
23893     int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
23894     int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
23895     if (AA->isNoAlias(
23896             MemoryLocation(MUC0.MMO->getValue(), Overlap0,
23897                            UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
23898             MemoryLocation(MUC1.MMO->getValue(), Overlap1,
23899                            UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
23900       return false;
23901   }
23902 
23903   // Otherwise we have to assume they alias.
23904   return true;
23905 }
23906 
23907 /// Walk up chain skipping non-aliasing memory nodes,
23908 /// looking for aliasing nodes and adding them to the Aliases vector.
23909 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
23910                                    SmallVectorImpl<SDValue> &Aliases) {
23911   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
23912   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
23913 
23914   // Get alias information for node.
23915   // TODO: relax aliasing for unordered atomics (see D66309)
23916   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
23917 
23918   // Starting off.
23919   Chains.push_back(OriginalChain);
23920   unsigned Depth = 0;
23921 
23922   // Attempt to improve chain by a single step
23923   std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
23924     switch (C.getOpcode()) {
23925     case ISD::EntryToken:
23926       // No need to mark EntryToken.
23927       C = SDValue();
23928       return true;
23929     case ISD::LOAD:
23930     case ISD::STORE: {
23931       // Get alias information for C.
23932       // TODO: Relax aliasing for unordered atomics (see D66309)
23933       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
23934                       cast<LSBaseSDNode>(C.getNode())->isSimple();
23935       if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
23936         // Look further up the chain.
23937         C = C.getOperand(0);
23938         return true;
23939       }
23940       // Alias, so stop here.
23941       return false;
23942     }
23943 
23944     case ISD::CopyFromReg:
23945       // Always forward past past CopyFromReg.
23946       C = C.getOperand(0);
23947       return true;
23948 
23949     case ISD::LIFETIME_START:
23950     case ISD::LIFETIME_END: {
23951       // We can forward past any lifetime start/end that can be proven not to
23952       // alias the memory access.
23953       if (!mayAlias(N, C.getNode())) {
23954         // Look further up the chain.
23955         C = C.getOperand(0);
23956         return true;
23957       }
23958       return false;
23959     }
23960     default:
23961       return false;
23962     }
23963   };
23964 
23965   // Look at each chain and determine if it is an alias.  If so, add it to the
23966   // aliases list.  If not, then continue up the chain looking for the next
23967   // candidate.
23968   while (!Chains.empty()) {
23969     SDValue Chain = Chains.pop_back_val();
23970 
23971     // Don't bother if we've seen Chain before.
23972     if (!Visited.insert(Chain.getNode()).second)
23973       continue;
23974 
23975     // For TokenFactor nodes, look at each operand and only continue up the
23976     // chain until we reach the depth limit.
23977     //
23978     // FIXME: The depth check could be made to return the last non-aliasing
23979     // chain we found before we hit a tokenfactor rather than the original
23980     // chain.
23981     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
23982       Aliases.clear();
23983       Aliases.push_back(OriginalChain);
23984       return;
23985     }
23986 
23987     if (Chain.getOpcode() == ISD::TokenFactor) {
23988       // We have to check each of the operands of the token factor for "small"
23989       // token factors, so we queue them up.  Adding the operands to the queue
23990       // (stack) in reverse order maintains the original order and increases the
23991       // likelihood that getNode will find a matching token factor (CSE.)
23992       if (Chain.getNumOperands() > 16) {
23993         Aliases.push_back(Chain);
23994         continue;
23995       }
23996       for (unsigned n = Chain.getNumOperands(); n;)
23997         Chains.push_back(Chain.getOperand(--n));
23998       ++Depth;
23999       continue;
24000     }
24001     // Everything else
24002     if (ImproveChain(Chain)) {
24003       // Updated Chain Found, Consider new chain if one exists.
24004       if (Chain.getNode())
24005         Chains.push_back(Chain);
24006       ++Depth;
24007       continue;
24008     }
24009     // No Improved Chain Possible, treat as Alias.
24010     Aliases.push_back(Chain);
24011   }
24012 }
24013 
24014 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
24015 /// (aliasing node.)
24016 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
24017   if (OptLevel == CodeGenOpt::None)
24018     return OldChain;
24019 
24020   // Ops for replacing token factor.
24021   SmallVector<SDValue, 8> Aliases;
24022 
24023   // Accumulate all the aliases to this node.
24024   GatherAllAliases(N, OldChain, Aliases);
24025 
24026   // If no operands then chain to entry token.
24027   if (Aliases.size() == 0)
24028     return DAG.getEntryNode();
24029 
24030   // If a single operand then chain to it.  We don't need to revisit it.
24031   if (Aliases.size() == 1)
24032     return Aliases[0];
24033 
24034   // Construct a custom tailored token factor.
24035   return DAG.getTokenFactor(SDLoc(N), Aliases);
24036 }
24037 
24038 namespace {
24039 // TODO: Replace with with std::monostate when we move to C++17.
24040 struct UnitT { } Unit;
24041 bool operator==(const UnitT &, const UnitT &) { return true; }
24042 bool operator!=(const UnitT &, const UnitT &) { return false; }
24043 } // namespace
24044 
24045 // This function tries to collect a bunch of potentially interesting
24046 // nodes to improve the chains of, all at once. This might seem
24047 // redundant, as this function gets called when visiting every store
24048 // node, so why not let the work be done on each store as it's visited?
24049 //
24050 // I believe this is mainly important because mergeConsecutiveStores
24051 // is unable to deal with merging stores of different sizes, so unless
24052 // we improve the chains of all the potential candidates up-front
24053 // before running mergeConsecutiveStores, it might only see some of
24054 // the nodes that will eventually be candidates, and then not be able
24055 // to go from a partially-merged state to the desired final
24056 // fully-merged state.
24057 
24058 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
24059   SmallVector<StoreSDNode *, 8> ChainedStores;
24060   StoreSDNode *STChain = St;
24061   // Intervals records which offsets from BaseIndex have been covered. In
24062   // the common case, every store writes to the immediately previous address
24063   // space and thus merged with the previous interval at insertion time.
24064 
24065   using IMap =
24066       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
24067   IMap::Allocator A;
24068   IMap Intervals(A);
24069 
24070   // This holds the base pointer, index, and the offset in bytes from the base
24071   // pointer.
24072   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
24073 
24074   // We must have a base and an offset.
24075   if (!BasePtr.getBase().getNode())
24076     return false;
24077 
24078   // Do not handle stores to undef base pointers.
24079   if (BasePtr.getBase().isUndef())
24080     return false;
24081 
24082   // Do not handle stores to opaque types
24083   if (St->getMemoryVT().isZeroSized())
24084     return false;
24085 
24086   // BaseIndexOffset assumes that offsets are fixed-size, which
24087   // is not valid for scalable vectors where the offsets are
24088   // scaled by `vscale`, so bail out early.
24089   if (St->getMemoryVT().isScalableVector())
24090     return false;
24091 
24092   // Add ST's interval.
24093   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
24094 
24095   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
24096     if (Chain->getMemoryVT().isScalableVector())
24097       return false;
24098 
24099     // If the chain has more than one use, then we can't reorder the mem ops.
24100     if (!SDValue(Chain, 0)->hasOneUse())
24101       break;
24102     // TODO: Relax for unordered atomics (see D66309)
24103     if (!Chain->isSimple() || Chain->isIndexed())
24104       break;
24105 
24106     // Find the base pointer and offset for this memory node.
24107     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
24108     // Check that the base pointer is the same as the original one.
24109     int64_t Offset;
24110     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
24111       break;
24112     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
24113     // Make sure we don't overlap with other intervals by checking the ones to
24114     // the left or right before inserting.
24115     auto I = Intervals.find(Offset);
24116     // If there's a next interval, we should end before it.
24117     if (I != Intervals.end() && I.start() < (Offset + Length))
24118       break;
24119     // If there's a previous interval, we should start after it.
24120     if (I != Intervals.begin() && (--I).stop() <= Offset)
24121       break;
24122     Intervals.insert(Offset, Offset + Length, Unit);
24123 
24124     ChainedStores.push_back(Chain);
24125     STChain = Chain;
24126   }
24127 
24128   // If we didn't find a chained store, exit.
24129   if (ChainedStores.size() == 0)
24130     return false;
24131 
24132   // Improve all chained stores (St and ChainedStores members) starting from
24133   // where the store chain ended and return single TokenFactor.
24134   SDValue NewChain = STChain->getChain();
24135   SmallVector<SDValue, 8> TFOps;
24136   for (unsigned I = ChainedStores.size(); I;) {
24137     StoreSDNode *S = ChainedStores[--I];
24138     SDValue BetterChain = FindBetterChain(S, NewChain);
24139     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
24140         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
24141     TFOps.push_back(SDValue(S, 0));
24142     ChainedStores[I] = S;
24143   }
24144 
24145   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
24146   SDValue BetterChain = FindBetterChain(St, NewChain);
24147   SDValue NewST;
24148   if (St->isTruncatingStore())
24149     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
24150                               St->getBasePtr(), St->getMemoryVT(),
24151                               St->getMemOperand());
24152   else
24153     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
24154                          St->getBasePtr(), St->getMemOperand());
24155 
24156   TFOps.push_back(NewST);
24157 
24158   // If we improved every element of TFOps, then we've lost the dependence on
24159   // NewChain to successors of St and we need to add it back to TFOps. Do so at
24160   // the beginning to keep relative order consistent with FindBetterChains.
24161   auto hasImprovedChain = [&](SDValue ST) -> bool {
24162     return ST->getOperand(0) != NewChain;
24163   };
24164   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
24165   if (AddNewChain)
24166     TFOps.insert(TFOps.begin(), NewChain);
24167 
24168   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
24169   CombineTo(St, TF);
24170 
24171   // Add TF and its operands to the worklist.
24172   AddToWorklist(TF.getNode());
24173   for (const SDValue &Op : TF->ops())
24174     AddToWorklist(Op.getNode());
24175   AddToWorklist(STChain);
24176   return true;
24177 }
24178 
24179 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
24180   if (OptLevel == CodeGenOpt::None)
24181     return false;
24182 
24183   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
24184 
24185   // We must have a base and an offset.
24186   if (!BasePtr.getBase().getNode())
24187     return false;
24188 
24189   // Do not handle stores to undef base pointers.
24190   if (BasePtr.getBase().isUndef())
24191     return false;
24192 
24193   // Directly improve a chain of disjoint stores starting at St.
24194   if (parallelizeChainedStores(St))
24195     return true;
24196 
24197   // Improve St's Chain..
24198   SDValue BetterChain = FindBetterChain(St, St->getChain());
24199   if (St->getChain() != BetterChain) {
24200     replaceStoreChain(St, BetterChain);
24201     return true;
24202   }
24203   return false;
24204 }
24205 
24206 /// This is the entry point for the file.
24207 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
24208                            CodeGenOpt::Level OptLevel) {
24209   /// This is the main entry point to this class.
24210   DAGCombiner(*this, AA, OptLevel).Run(Level);
24211 }
24212