xref: /freebsd/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp (revision 47ef2a131091508e049ab10cad7f91a3c1342cd9)
1 //===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the Hexagon target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "HexagonISelDAGToDAG.h"
14 #include "Hexagon.h"
15 #include "HexagonISelLowering.h"
16 #include "HexagonMachineFunctionInfo.h"
17 #include "HexagonTargetMachine.h"
18 #include "llvm/CodeGen/FunctionLoweringInfo.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/SelectionDAGISel.h"
21 #include "llvm/IR/Intrinsics.h"
22 #include "llvm/IR/IntrinsicsHexagon.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/Debug.h"
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "hexagon-isel"
28 #define PASS_NAME "Hexagon DAG->DAG Pattern Instruction Selection"
29 
30 static
31 cl::opt<bool>
32 EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true),
33   cl::desc("Rebalance address calculation trees to improve "
34           "instruction selection"));
35 
36 // Rebalance only if this allows e.g. combining a GA with an offset or
37 // factoring out a shift.
38 static
39 cl::opt<bool>
40 RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false),
41   cl::desc("Rebalance address tree only if this allows optimizations"));
42 
43 static
44 cl::opt<bool>
45 RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden,
46   cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"));
47 
48 static cl::opt<bool> CheckSingleUse("hexagon-isel-su", cl::Hidden,
49   cl::init(true), cl::desc("Enable checking of SDNode's single-use status"));
50 
51 //===----------------------------------------------------------------------===//
52 // Instruction Selector Implementation
53 //===----------------------------------------------------------------------===//
54 
55 #define GET_DAGISEL_BODY HexagonDAGToDAGISel
56 #include "HexagonGenDAGISel.inc"
57 
58 namespace llvm {
59 /// createHexagonISelDag - This pass converts a legalized DAG into a
60 /// Hexagon-specific DAG, ready for instruction scheduling.
61 FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
62                                    CodeGenOptLevel OptLevel) {
63   return new HexagonDAGToDAGISelLegacy(TM, OptLevel);
64 }
65 }
66 
67 HexagonDAGToDAGISelLegacy::HexagonDAGToDAGISelLegacy(HexagonTargetMachine &tm,
68                                                      CodeGenOptLevel OptLevel)
69     : SelectionDAGISelLegacy(
70           ID, std::make_unique<HexagonDAGToDAGISel>(tm, OptLevel)) {}
71 
72 char HexagonDAGToDAGISelLegacy::ID = 0;
73 
74 INITIALIZE_PASS(HexagonDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
75 
76 void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
77   SDValue Chain = LD->getChain();
78   SDValue Base = LD->getBasePtr();
79   SDValue Offset = LD->getOffset();
80   int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
81   EVT LoadedVT = LD->getMemoryVT();
82   unsigned Opcode = 0;
83 
84   // Check for zero extended loads. Treat any-extend loads as zero extended
85   // loads.
86   ISD::LoadExtType ExtType = LD->getExtensionType();
87   bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
88   bool IsValidInc = HII->isValidAutoIncImm(LoadedVT, Inc);
89 
90   assert(LoadedVT.isSimple());
91   switch (LoadedVT.getSimpleVT().SimpleTy) {
92   case MVT::i8:
93     if (IsZeroExt)
94       Opcode = IsValidInc ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrub_io;
95     else
96       Opcode = IsValidInc ? Hexagon::L2_loadrb_pi : Hexagon::L2_loadrb_io;
97     break;
98   case MVT::i16:
99     if (IsZeroExt)
100       Opcode = IsValidInc ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadruh_io;
101     else
102       Opcode = IsValidInc ? Hexagon::L2_loadrh_pi : Hexagon::L2_loadrh_io;
103     break;
104   case MVT::i32:
105   case MVT::f32:
106   case MVT::v2i16:
107   case MVT::v4i8:
108     Opcode = IsValidInc ? Hexagon::L2_loadri_pi : Hexagon::L2_loadri_io;
109     break;
110   case MVT::i64:
111   case MVT::f64:
112   case MVT::v2i32:
113   case MVT::v4i16:
114   case MVT::v8i8:
115     Opcode = IsValidInc ? Hexagon::L2_loadrd_pi : Hexagon::L2_loadrd_io;
116     break;
117   case MVT::v64i8:
118   case MVT::v32i16:
119   case MVT::v16i32:
120   case MVT::v8i64:
121   case MVT::v128i8:
122   case MVT::v64i16:
123   case MVT::v32i32:
124   case MVT::v16i64:
125     if (isAlignedMemNode(LD)) {
126       if (LD->isNonTemporal())
127         Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi : Hexagon::V6_vL32b_nt_ai;
128       else
129         Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai;
130     } else {
131       Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi : Hexagon::V6_vL32Ub_ai;
132     }
133     break;
134   default:
135     llvm_unreachable("Unexpected memory type in indexed load");
136   }
137 
138   SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
139   MachineMemOperand *MemOp = LD->getMemOperand();
140 
141   auto getExt64 = [this,ExtType] (MachineSDNode *N, const SDLoc &dl)
142         -> MachineSDNode* {
143     if (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD) {
144       SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
145       return CurDAG->getMachineNode(Hexagon::A4_combineir, dl, MVT::i64,
146                                     Zero, SDValue(N, 0));
147     }
148     if (ExtType == ISD::SEXTLOAD)
149       return CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
150                                     SDValue(N, 0));
151     return N;
152   };
153 
154   //                  Loaded value   Next address   Chain
155   SDValue From[3] = { SDValue(LD,0), SDValue(LD,1), SDValue(LD,2) };
156   SDValue To[3];
157 
158   EVT ValueVT = LD->getValueType(0);
159   if (ValueVT == MVT::i64 && ExtType != ISD::NON_EXTLOAD) {
160     // A load extending to i64 will actually produce i32, which will then
161     // need to be extended to i64.
162     assert(LoadedVT.getSizeInBits() <= 32);
163     ValueVT = MVT::i32;
164   }
165 
166   if (IsValidInc) {
167     MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT,
168                                               MVT::i32, MVT::Other, Base,
169                                               IncV, Chain);
170     CurDAG->setNodeMemRefs(L, {MemOp});
171     To[1] = SDValue(L, 1); // Next address.
172     To[2] = SDValue(L, 2); // Chain.
173     // Handle special case for extension to i64.
174     if (LD->getValueType(0) == MVT::i64)
175       L = getExt64(L, dl);
176     To[0] = SDValue(L, 0); // Loaded (extended) value.
177   } else {
178     SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
179     MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, MVT::Other,
180                                               Base, Zero, Chain);
181     CurDAG->setNodeMemRefs(L, {MemOp});
182     To[2] = SDValue(L, 1); // Chain.
183     MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
184                                               Base, IncV);
185     To[1] = SDValue(A, 0); // Next address.
186     // Handle special case for extension to i64.
187     if (LD->getValueType(0) == MVT::i64)
188       L = getExt64(L, dl);
189     To[0] = SDValue(L, 0); // Loaded (extended) value.
190   }
191   ReplaceUses(From, To, 3);
192   CurDAG->RemoveDeadNode(LD);
193 }
194 
195 MachineSDNode *HexagonDAGToDAGISel::LoadInstrForLoadIntrinsic(SDNode *IntN) {
196   if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
197     return nullptr;
198 
199   SDLoc dl(IntN);
200   unsigned IntNo = IntN->getConstantOperandVal(1);
201 
202   static std::map<unsigned,unsigned> LoadPciMap = {
203     { Intrinsic::hexagon_circ_ldb,  Hexagon::L2_loadrb_pci  },
204     { Intrinsic::hexagon_circ_ldub, Hexagon::L2_loadrub_pci },
205     { Intrinsic::hexagon_circ_ldh,  Hexagon::L2_loadrh_pci  },
206     { Intrinsic::hexagon_circ_lduh, Hexagon::L2_loadruh_pci },
207     { Intrinsic::hexagon_circ_ldw,  Hexagon::L2_loadri_pci  },
208     { Intrinsic::hexagon_circ_ldd,  Hexagon::L2_loadrd_pci  },
209   };
210   auto FLC = LoadPciMap.find(IntNo);
211   if (FLC != LoadPciMap.end()) {
212     EVT ValTy = (IntNo == Intrinsic::hexagon_circ_ldd) ? MVT::i64 : MVT::i32;
213     EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
214     // Operands: { Base, Increment, Modifier, Chain }
215     auto Inc = cast<ConstantSDNode>(IntN->getOperand(5));
216     SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), dl, MVT::i32);
217     MachineSDNode *Res = CurDAG->getMachineNode(FLC->second, dl, RTys,
218           { IntN->getOperand(2), I, IntN->getOperand(4),
219             IntN->getOperand(0) });
220     return Res;
221   }
222 
223   return nullptr;
224 }
225 
226 SDNode *HexagonDAGToDAGISel::StoreInstrForLoadIntrinsic(MachineSDNode *LoadN,
227       SDNode *IntN) {
228   // The "LoadN" is just a machine load instruction. The intrinsic also
229   // involves storing it. Generate an appropriate store to the location
230   // given in the intrinsic's operand(3).
231   uint64_t F = HII->get(LoadN->getMachineOpcode()).TSFlags;
232   unsigned SizeBits = (F >> HexagonII::MemAccessSizePos) &
233                       HexagonII::MemAccesSizeMask;
234   unsigned Size = 1U << (SizeBits-1);
235 
236   SDLoc dl(IntN);
237   MachinePointerInfo PI;
238   SDValue TS;
239   SDValue Loc = IntN->getOperand(3);
240 
241   if (Size >= 4)
242     TS = CurDAG->getStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, PI,
243                           Align(Size));
244   else
245     TS = CurDAG->getTruncStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc,
246                                PI, MVT::getIntegerVT(Size * 8), Align(Size));
247 
248   SDNode *StoreN;
249   {
250     HandleSDNode Handle(TS);
251     SelectStore(TS.getNode());
252     StoreN = Handle.getValue().getNode();
253   }
254 
255   // Load's results are { Loaded value, Updated pointer, Chain }
256   ReplaceUses(SDValue(IntN, 0), SDValue(LoadN, 1));
257   ReplaceUses(SDValue(IntN, 1), SDValue(StoreN, 0));
258   return StoreN;
259 }
260 
261 bool HexagonDAGToDAGISel::tryLoadOfLoadIntrinsic(LoadSDNode *N) {
262   // The intrinsics for load circ/brev perform two operations:
263   // 1. Load a value V from the specified location, using the addressing
264   //    mode corresponding to the intrinsic.
265   // 2. Store V into a specified location. This location is typically a
266   //    local, temporary object.
267   // In many cases, the program using these intrinsics will immediately
268   // load V again from the local object. In those cases, when certain
269   // conditions are met, the last load can be removed.
270   // This function identifies and optimizes this pattern. If the pattern
271   // cannot be optimized, it returns nullptr, which will cause the load
272   // to be selected separately from the intrinsic (which will be handled
273   // in SelectIntrinsicWChain).
274 
275   SDValue Ch = N->getOperand(0);
276   SDValue Loc = N->getOperand(1);
277 
278   // Assume that the load and the intrinsic are connected directly with a
279   // chain:
280   //   t1: i32,ch = int.load ..., ..., ..., Loc, ...    // <-- C
281   //   t2: i32,ch = load t1:1, Loc, ...
282   SDNode *C = Ch.getNode();
283 
284   if (C->getOpcode() != ISD::INTRINSIC_W_CHAIN)
285     return false;
286 
287   // The second load can only be eliminated if its extension type matches
288   // that of the load instruction corresponding to the intrinsic. The user
289   // can provide an address of an unsigned variable to store the result of
290   // a sign-extending intrinsic into (or the other way around).
291   ISD::LoadExtType IntExt;
292   switch (C->getConstantOperandVal(1)) {
293   case Intrinsic::hexagon_circ_ldub:
294   case Intrinsic::hexagon_circ_lduh:
295     IntExt = ISD::ZEXTLOAD;
296     break;
297   case Intrinsic::hexagon_circ_ldw:
298   case Intrinsic::hexagon_circ_ldd:
299     IntExt = ISD::NON_EXTLOAD;
300     break;
301   default:
302     IntExt = ISD::SEXTLOAD;
303     break;
304   }
305   if (N->getExtensionType() != IntExt)
306     return false;
307 
308   // Make sure the target location for the loaded value in the load intrinsic
309   // is the location from which LD (or N) is loading.
310   if (C->getNumOperands() < 4 || Loc.getNode() != C->getOperand(3).getNode())
311     return false;
312 
313   if (MachineSDNode *L = LoadInstrForLoadIntrinsic(C)) {
314     SDNode *S = StoreInstrForLoadIntrinsic(L, C);
315     SDValue F[] = { SDValue(N,0), SDValue(N,1), SDValue(C,0), SDValue(C,1) };
316     SDValue T[] = { SDValue(L,0), SDValue(S,0), SDValue(L,1), SDValue(S,0) };
317     ReplaceUses(F, T, std::size(T));
318     // This transformation will leave the intrinsic dead. If it remains in
319     // the DAG, the selection code will see it again, but without the load,
320     // and it will generate a store that is normally required for it.
321     CurDAG->RemoveDeadNode(C);
322     return true;
323   }
324   return false;
325 }
326 
327 // Convert the bit-reverse load intrinsic to appropriate target instruction.
328 bool HexagonDAGToDAGISel::SelectBrevLdIntrinsic(SDNode *IntN) {
329   if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
330     return false;
331 
332   const SDLoc &dl(IntN);
333   unsigned IntNo = IntN->getConstantOperandVal(1);
334 
335   static const std::map<unsigned, unsigned> LoadBrevMap = {
336     { Intrinsic::hexagon_L2_loadrb_pbr, Hexagon::L2_loadrb_pbr },
337     { Intrinsic::hexagon_L2_loadrub_pbr, Hexagon::L2_loadrub_pbr },
338     { Intrinsic::hexagon_L2_loadrh_pbr, Hexagon::L2_loadrh_pbr },
339     { Intrinsic::hexagon_L2_loadruh_pbr, Hexagon::L2_loadruh_pbr },
340     { Intrinsic::hexagon_L2_loadri_pbr, Hexagon::L2_loadri_pbr },
341     { Intrinsic::hexagon_L2_loadrd_pbr, Hexagon::L2_loadrd_pbr }
342   };
343   auto FLI = LoadBrevMap.find(IntNo);
344   if (FLI != LoadBrevMap.end()) {
345     EVT ValTy =
346         (IntNo == Intrinsic::hexagon_L2_loadrd_pbr) ? MVT::i64 : MVT::i32;
347     EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
348     // Operands of Intrinsic: {chain, enum ID of intrinsic, baseptr,
349     // modifier}.
350     // Operands of target instruction: { Base, Modifier, Chain }.
351     MachineSDNode *Res = CurDAG->getMachineNode(
352         FLI->second, dl, RTys,
353         {IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(0)});
354 
355     MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(IntN)->getMemOperand();
356     CurDAG->setNodeMemRefs(Res, {MemOp});
357 
358     ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
359     ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
360     ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
361     CurDAG->RemoveDeadNode(IntN);
362     return true;
363   }
364   return false;
365 }
366 
367 /// Generate a machine instruction node for the new circular buffer intrinsics.
368 /// The new versions use a CSx register instead of the K field.
369 bool HexagonDAGToDAGISel::SelectNewCircIntrinsic(SDNode *IntN) {
370   if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
371     return false;
372 
373   SDLoc DL(IntN);
374   unsigned IntNo = IntN->getConstantOperandVal(1);
375   SmallVector<SDValue, 7> Ops;
376 
377   static std::map<unsigned,unsigned> LoadNPcMap = {
378     { Intrinsic::hexagon_L2_loadrub_pci, Hexagon::PS_loadrub_pci },
379     { Intrinsic::hexagon_L2_loadrb_pci, Hexagon::PS_loadrb_pci },
380     { Intrinsic::hexagon_L2_loadruh_pci, Hexagon::PS_loadruh_pci },
381     { Intrinsic::hexagon_L2_loadrh_pci, Hexagon::PS_loadrh_pci },
382     { Intrinsic::hexagon_L2_loadri_pci, Hexagon::PS_loadri_pci },
383     { Intrinsic::hexagon_L2_loadrd_pci, Hexagon::PS_loadrd_pci },
384     { Intrinsic::hexagon_L2_loadrub_pcr, Hexagon::PS_loadrub_pcr },
385     { Intrinsic::hexagon_L2_loadrb_pcr, Hexagon::PS_loadrb_pcr },
386     { Intrinsic::hexagon_L2_loadruh_pcr, Hexagon::PS_loadruh_pcr },
387     { Intrinsic::hexagon_L2_loadrh_pcr, Hexagon::PS_loadrh_pcr },
388     { Intrinsic::hexagon_L2_loadri_pcr, Hexagon::PS_loadri_pcr },
389     { Intrinsic::hexagon_L2_loadrd_pcr, Hexagon::PS_loadrd_pcr }
390   };
391   auto FLI = LoadNPcMap.find (IntNo);
392   if (FLI != LoadNPcMap.end()) {
393     EVT ValTy = MVT::i32;
394     if (IntNo == Intrinsic::hexagon_L2_loadrd_pci ||
395         IntNo == Intrinsic::hexagon_L2_loadrd_pcr)
396       ValTy = MVT::i64;
397     EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
398     // Handle load.*_pci case which has 6 operands.
399     if (IntN->getNumOperands() == 6) {
400       auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
401       SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
402       // Operands: { Base, Increment, Modifier, Start, Chain }.
403       Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
404               IntN->getOperand(0) };
405     } else
406       // Handle load.*_pcr case which has 5 operands.
407       // Operands: { Base, Modifier, Start, Chain }.
408       Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
409               IntN->getOperand(0) };
410     MachineSDNode *Res = CurDAG->getMachineNode(FLI->second, DL, RTys, Ops);
411     ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
412     ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
413     ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
414     CurDAG->RemoveDeadNode(IntN);
415     return true;
416   }
417 
418   static std::map<unsigned,unsigned> StoreNPcMap = {
419     { Intrinsic::hexagon_S2_storerb_pci, Hexagon::PS_storerb_pci },
420     { Intrinsic::hexagon_S2_storerh_pci, Hexagon::PS_storerh_pci },
421     { Intrinsic::hexagon_S2_storerf_pci, Hexagon::PS_storerf_pci },
422     { Intrinsic::hexagon_S2_storeri_pci, Hexagon::PS_storeri_pci },
423     { Intrinsic::hexagon_S2_storerd_pci, Hexagon::PS_storerd_pci },
424     { Intrinsic::hexagon_S2_storerb_pcr, Hexagon::PS_storerb_pcr },
425     { Intrinsic::hexagon_S2_storerh_pcr, Hexagon::PS_storerh_pcr },
426     { Intrinsic::hexagon_S2_storerf_pcr, Hexagon::PS_storerf_pcr },
427     { Intrinsic::hexagon_S2_storeri_pcr, Hexagon::PS_storeri_pcr },
428     { Intrinsic::hexagon_S2_storerd_pcr, Hexagon::PS_storerd_pcr }
429   };
430   auto FSI = StoreNPcMap.find (IntNo);
431   if (FSI != StoreNPcMap.end()) {
432     EVT RTys[] = { MVT::i32, MVT::Other };
433     // Handle store.*_pci case which has 7 operands.
434     if (IntN->getNumOperands() == 7) {
435       auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
436       SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
437       // Operands: { Base, Increment, Modifier, Value, Start, Chain }.
438       Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
439               IntN->getOperand(6), IntN->getOperand(0) };
440     } else
441       // Handle store.*_pcr case which has 6 operands.
442       // Operands: { Base, Modifier, Value, Start, Chain }.
443       Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
444               IntN->getOperand(5), IntN->getOperand(0) };
445     MachineSDNode *Res = CurDAG->getMachineNode(FSI->second, DL, RTys, Ops);
446     ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
447     ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
448     CurDAG->RemoveDeadNode(IntN);
449     return true;
450   }
451 
452   return false;
453 }
454 
455 void HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
456   SDLoc dl(N);
457   LoadSDNode *LD = cast<LoadSDNode>(N);
458 
459   // Handle indexed loads.
460   ISD::MemIndexedMode AM = LD->getAddressingMode();
461   if (AM != ISD::UNINDEXED) {
462     SelectIndexedLoad(LD, dl);
463     return;
464   }
465 
466   // Handle patterns using circ/brev load intrinsics.
467   if (tryLoadOfLoadIntrinsic(LD))
468     return;
469 
470   SelectCode(LD);
471 }
472 
473 void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) {
474   SDValue Chain = ST->getChain();
475   SDValue Base = ST->getBasePtr();
476   SDValue Offset = ST->getOffset();
477   SDValue Value = ST->getValue();
478   // Get the constant value.
479   int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
480   EVT StoredVT = ST->getMemoryVT();
481   EVT ValueVT = Value.getValueType();
482 
483   bool IsValidInc = HII->isValidAutoIncImm(StoredVT, Inc);
484   unsigned Opcode = 0;
485 
486   assert(StoredVT.isSimple());
487   switch (StoredVT.getSimpleVT().SimpleTy) {
488   case MVT::i8:
489     Opcode = IsValidInc ? Hexagon::S2_storerb_pi : Hexagon::S2_storerb_io;
490     break;
491   case MVT::i16:
492     Opcode = IsValidInc ? Hexagon::S2_storerh_pi : Hexagon::S2_storerh_io;
493     break;
494   case MVT::i32:
495   case MVT::f32:
496   case MVT::v2i16:
497   case MVT::v4i8:
498     Opcode = IsValidInc ? Hexagon::S2_storeri_pi : Hexagon::S2_storeri_io;
499     break;
500   case MVT::i64:
501   case MVT::f64:
502   case MVT::v2i32:
503   case MVT::v4i16:
504   case MVT::v8i8:
505     Opcode = IsValidInc ? Hexagon::S2_storerd_pi : Hexagon::S2_storerd_io;
506     break;
507   case MVT::v64i8:
508   case MVT::v32i16:
509   case MVT::v16i32:
510   case MVT::v8i64:
511   case MVT::v128i8:
512   case MVT::v64i16:
513   case MVT::v32i32:
514   case MVT::v16i64:
515     if (isAlignedMemNode(ST)) {
516       if (ST->isNonTemporal())
517         Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi : Hexagon::V6_vS32b_nt_ai;
518       else
519         Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai;
520     } else {
521       Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi : Hexagon::V6_vS32Ub_ai;
522     }
523     break;
524   default:
525     llvm_unreachable("Unexpected memory type in indexed store");
526   }
527 
528   if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
529     assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
530     Value = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo,
531                                            dl, MVT::i32, Value);
532   }
533 
534   SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
535   MachineMemOperand *MemOp = ST->getMemOperand();
536 
537   //                  Next address   Chain
538   SDValue From[2] = { SDValue(ST,0), SDValue(ST,1) };
539   SDValue To[2];
540 
541   if (IsValidInc) {
542     // Build post increment store.
543     SDValue Ops[] = { Base, IncV, Value, Chain };
544     MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other,
545                                               Ops);
546     CurDAG->setNodeMemRefs(S, {MemOp});
547     To[0] = SDValue(S, 0);
548     To[1] = SDValue(S, 1);
549   } else {
550     SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
551     SDValue Ops[] = { Base, Zero, Value, Chain };
552     MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
553     CurDAG->setNodeMemRefs(S, {MemOp});
554     To[1] = SDValue(S, 0);
555     MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
556                                               Base, IncV);
557     To[0] = SDValue(A, 0);
558   }
559 
560   ReplaceUses(From, To, 2);
561   CurDAG->RemoveDeadNode(ST);
562 }
563 
564 void HexagonDAGToDAGISel::SelectStore(SDNode *N) {
565   SDLoc dl(N);
566   StoreSDNode *ST = cast<StoreSDNode>(N);
567 
568   // Handle indexed stores.
569   ISD::MemIndexedMode AM = ST->getAddressingMode();
570   if (AM != ISD::UNINDEXED) {
571     SelectIndexedStore(ST, dl);
572     return;
573   }
574 
575   SelectCode(ST);
576 }
577 
578 void HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
579   SDLoc dl(N);
580   SDValue Shl_0 = N->getOperand(0);
581   SDValue Shl_1 = N->getOperand(1);
582 
583   auto Default = [this,N] () -> void { SelectCode(N); };
584 
585   if (N->getValueType(0) != MVT::i32 || Shl_1.getOpcode() != ISD::Constant)
586     return Default();
587 
588   // RHS is const.
589   int32_t ShlConst = cast<ConstantSDNode>(Shl_1)->getSExtValue();
590 
591   if (Shl_0.getOpcode() == ISD::MUL) {
592     SDValue Mul_0 = Shl_0.getOperand(0); // Val
593     SDValue Mul_1 = Shl_0.getOperand(1); // Const
594     // RHS of mul is const.
595     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mul_1)) {
596       int32_t ValConst = C->getSExtValue() << ShlConst;
597       if (isInt<9>(ValConst)) {
598         SDValue Val = CurDAG->getTargetConstant(ValConst, dl, MVT::i32);
599         SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
600                                                 MVT::i32, Mul_0, Val);
601         ReplaceNode(N, Result);
602         return;
603       }
604     }
605     return Default();
606   }
607 
608   if (Shl_0.getOpcode() == ISD::SUB) {
609     SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
610     SDValue Sub_1 = Shl_0.getOperand(1); // Val
611     if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Sub_0)) {
612       if (C1->getSExtValue() != 0 || Sub_1.getOpcode() != ISD::SHL)
613         return Default();
614       SDValue Shl2_0 = Sub_1.getOperand(0); // Val
615       SDValue Shl2_1 = Sub_1.getOperand(1); // Const
616       if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(Shl2_1)) {
617         int32_t ValConst = 1 << (ShlConst + C2->getSExtValue());
618         if (isInt<9>(-ValConst)) {
619           SDValue Val = CurDAG->getTargetConstant(-ValConst, dl, MVT::i32);
620           SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
621                                                   MVT::i32, Shl2_0, Val);
622           ReplaceNode(N, Result);
623           return;
624         }
625       }
626     }
627   }
628 
629   return Default();
630 }
631 
632 //
633 // Handling intrinsics for circular load and bitreverse load.
634 //
635 void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
636   if (MachineSDNode *L = LoadInstrForLoadIntrinsic(N)) {
637     StoreInstrForLoadIntrinsic(L, N);
638     CurDAG->RemoveDeadNode(N);
639     return;
640   }
641 
642   // Handle bit-reverse load intrinsics.
643   if (SelectBrevLdIntrinsic(N))
644     return;
645 
646   if (SelectNewCircIntrinsic(N))
647     return;
648 
649   unsigned IntNo = N->getConstantOperandVal(1);
650   if (IntNo == Intrinsic::hexagon_V6_vgathermw ||
651       IntNo == Intrinsic::hexagon_V6_vgathermw_128B ||
652       IntNo == Intrinsic::hexagon_V6_vgathermh ||
653       IntNo == Intrinsic::hexagon_V6_vgathermh_128B ||
654       IntNo == Intrinsic::hexagon_V6_vgathermhw ||
655       IntNo == Intrinsic::hexagon_V6_vgathermhw_128B) {
656     SelectV65Gather(N);
657     return;
658   }
659   if (IntNo == Intrinsic::hexagon_V6_vgathermwq ||
660       IntNo == Intrinsic::hexagon_V6_vgathermwq_128B ||
661       IntNo == Intrinsic::hexagon_V6_vgathermhq ||
662       IntNo == Intrinsic::hexagon_V6_vgathermhq_128B ||
663       IntNo == Intrinsic::hexagon_V6_vgathermhwq ||
664       IntNo == Intrinsic::hexagon_V6_vgathermhwq_128B) {
665     SelectV65GatherPred(N);
666     return;
667   }
668 
669   SelectCode(N);
670 }
671 
672 void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
673   unsigned IID = N->getConstantOperandVal(0);
674   unsigned Bits;
675   switch (IID) {
676   case Intrinsic::hexagon_S2_vsplatrb:
677     Bits = 8;
678     break;
679   case Intrinsic::hexagon_S2_vsplatrh:
680     Bits = 16;
681     break;
682   case Intrinsic::hexagon_V6_vaddcarry:
683   case Intrinsic::hexagon_V6_vaddcarry_128B:
684   case Intrinsic::hexagon_V6_vsubcarry:
685   case Intrinsic::hexagon_V6_vsubcarry_128B:
686     SelectHVXDualOutput(N);
687     return;
688   default:
689     SelectCode(N);
690     return;
691   }
692 
693   SDValue V = N->getOperand(1);
694   SDValue U;
695   // Splat intrinsics.
696   if (keepsLowBits(V, Bits, U)) {
697     SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
698                                 N->getOperand(0), U);
699     ReplaceNode(N, R.getNode());
700     SelectCode(R.getNode());
701     return;
702   }
703   SelectCode(N);
704 }
705 
706 void HexagonDAGToDAGISel::SelectExtractSubvector(SDNode *N) {
707   SDValue Inp = N->getOperand(0);
708   MVT ResTy = N->getValueType(0).getSimpleVT();
709   unsigned Idx = N->getConstantOperandVal(1);
710 
711   [[maybe_unused]] MVT InpTy = Inp.getValueType().getSimpleVT();
712   [[maybe_unused]] unsigned ResLen = ResTy.getVectorNumElements();
713   assert(InpTy.getVectorElementType() == ResTy.getVectorElementType());
714   assert(2 * ResLen == InpTy.getVectorNumElements());
715   assert(ResTy.getSizeInBits() == 32);
716   assert(Idx == 0 || Idx == ResLen);
717 
718   unsigned SubReg = Idx == 0 ? Hexagon::isub_lo : Hexagon::isub_hi;
719   SDValue Ext = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(N), ResTy, Inp);
720 
721   ReplaceNode(N, Ext.getNode());
722 }
723 
724 //
725 // Map floating point constant values.
726 //
727 void HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) {
728   SDLoc dl(N);
729   auto *CN = cast<ConstantFPSDNode>(N);
730   APInt A = CN->getValueAPF().bitcastToAPInt();
731   if (N->getValueType(0) == MVT::f32) {
732     SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i32);
733     ReplaceNode(N, CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, V));
734     return;
735   }
736   if (N->getValueType(0) == MVT::f64) {
737     SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i64);
738     ReplaceNode(N, CurDAG->getMachineNode(Hexagon::CONST64, dl, MVT::f64, V));
739     return;
740   }
741 
742   SelectCode(N);
743 }
744 
745 //
746 // Map boolean values.
747 //
748 void HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
749   if (N->getValueType(0) == MVT::i1) {
750     assert(!(N->getAsZExtVal() >> 1));
751     unsigned Opc = (cast<ConstantSDNode>(N)->getSExtValue() != 0)
752                       ? Hexagon::PS_true
753                       : Hexagon::PS_false;
754     ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i1));
755     return;
756   }
757 
758   SelectCode(N);
759 }
760 
761 void HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) {
762   MachineFrameInfo &MFI = MF->getFrameInfo();
763   const HexagonFrameLowering *HFI = HST->getFrameLowering();
764   int FX = cast<FrameIndexSDNode>(N)->getIndex();
765   Align StkA = HFI->getStackAlign();
766   Align MaxA = MFI.getMaxAlign();
767   SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32);
768   SDLoc DL(N);
769   SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
770   SDNode *R = nullptr;
771 
772   // Use PS_fi when:
773   // - the object is fixed, or
774   // - there are no objects with higher-than-default alignment, or
775   // - there are no dynamically allocated objects.
776   // Otherwise, use PS_fia.
777   if (FX < 0 || MaxA <= StkA || !MFI.hasVarSizedObjects()) {
778     R = CurDAG->getMachineNode(Hexagon::PS_fi, DL, MVT::i32, FI, Zero);
779   } else {
780     auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>();
781     Register AR = HMFI.getStackAlignBaseReg();
782     SDValue CH = CurDAG->getEntryNode();
783     SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero };
784     R = CurDAG->getMachineNode(Hexagon::PS_fia, DL, MVT::i32, Ops);
785   }
786 
787   ReplaceNode(N, R);
788 }
789 
790 void HexagonDAGToDAGISel::SelectAddSubCarry(SDNode *N) {
791   unsigned OpcCarry = N->getOpcode() == HexagonISD::ADDC ? Hexagon::A4_addp_c
792                                                          : Hexagon::A4_subp_c;
793   SDNode *C = CurDAG->getMachineNode(OpcCarry, SDLoc(N), N->getVTList(),
794                                      { N->getOperand(0), N->getOperand(1),
795                                        N->getOperand(2) });
796   ReplaceNode(N, C);
797 }
798 
799 void HexagonDAGToDAGISel::SelectVAlign(SDNode *N) {
800   MVT ResTy = N->getValueType(0).getSimpleVT();
801   if (HST->isHVXVectorType(ResTy, true))
802     return SelectHvxVAlign(N);
803 
804   const SDLoc &dl(N);
805   unsigned VecLen = ResTy.getSizeInBits();
806   if (VecLen == 32) {
807     SDValue Ops[] = {
808       CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
809       N->getOperand(0),
810       CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
811       N->getOperand(1),
812       CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
813     };
814     SDNode *R = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
815                                        MVT::i64, Ops);
816 
817     // Shift right by "(Addr & 0x3) * 8" bytes.
818     SDNode *C;
819     SDValue M0 = CurDAG->getTargetConstant(0x18, dl, MVT::i32);
820     SDValue M1 = CurDAG->getTargetConstant(0x03, dl, MVT::i32);
821     if (HST->useCompound()) {
822       C = CurDAG->getMachineNode(Hexagon::S4_andi_asl_ri, dl, MVT::i32,
823                                  M0, N->getOperand(2), M1);
824     } else {
825       SDNode *T = CurDAG->getMachineNode(Hexagon::S2_asl_i_r, dl, MVT::i32,
826                                          N->getOperand(2), M1);
827       C = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32,
828                                  SDValue(T, 0), M0);
829     }
830     SDNode *S = CurDAG->getMachineNode(Hexagon::S2_lsr_r_p, dl, MVT::i64,
831                                        SDValue(R, 0), SDValue(C, 0));
832     SDValue E = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, dl, ResTy,
833                                                SDValue(S, 0));
834     ReplaceNode(N, E.getNode());
835   } else {
836     assert(VecLen == 64);
837     SDNode *Pu = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::v8i1,
838                                         N->getOperand(2));
839     SDNode *VA = CurDAG->getMachineNode(Hexagon::S2_valignrb, dl, ResTy,
840                                         N->getOperand(0), N->getOperand(1),
841                                         SDValue(Pu,0));
842     ReplaceNode(N, VA);
843   }
844 }
845 
846 void HexagonDAGToDAGISel::SelectVAlignAddr(SDNode *N) {
847   const SDLoc &dl(N);
848   SDValue A = N->getOperand(1);
849   int Mask = -cast<ConstantSDNode>(A.getNode())->getSExtValue();
850   assert(isPowerOf2_32(-Mask));
851 
852   SDValue M = CurDAG->getTargetConstant(Mask, dl, MVT::i32);
853   SDNode *AA = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32,
854                                       N->getOperand(0), M);
855   ReplaceNode(N, AA);
856 }
857 
858 // Handle these nodes here to avoid having to write patterns for all
859 // combinations of input/output types. In all cases, the resulting
860 // instruction is the same.
861 void HexagonDAGToDAGISel::SelectTypecast(SDNode *N) {
862   SDValue Op = N->getOperand(0);
863   MVT OpTy = Op.getValueType().getSimpleVT();
864   SDNode *T = CurDAG->MorphNodeTo(N, N->getOpcode(),
865                                   CurDAG->getVTList(OpTy), {Op});
866   ReplaceNode(T, Op.getNode());
867 }
868 
869 void HexagonDAGToDAGISel::SelectP2D(SDNode *N) {
870   MVT ResTy = N->getValueType(0).getSimpleVT();
871   SDNode *T = CurDAG->getMachineNode(Hexagon::C2_mask, SDLoc(N), ResTy,
872                                      N->getOperand(0));
873   ReplaceNode(N, T);
874 }
875 
876 void HexagonDAGToDAGISel::SelectD2P(SDNode *N) {
877   const SDLoc &dl(N);
878   MVT ResTy = N->getValueType(0).getSimpleVT();
879   SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
880   SDNode *T = CurDAG->getMachineNode(Hexagon::A4_vcmpbgtui, dl, ResTy,
881                                      N->getOperand(0), Zero);
882   ReplaceNode(N, T);
883 }
884 
885 void HexagonDAGToDAGISel::SelectV2Q(SDNode *N) {
886   const SDLoc &dl(N);
887   MVT ResTy = N->getValueType(0).getSimpleVT();
888   // The argument to V2Q should be a single vector.
889   MVT OpTy = N->getOperand(0).getValueType().getSimpleVT(); (void)OpTy;
890   assert(HST->getVectorLength() * 8 == OpTy.getSizeInBits());
891 
892   SDValue C = CurDAG->getTargetConstant(-1, dl, MVT::i32);
893   SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
894   SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandvrt, dl, ResTy,
895                                      N->getOperand(0), SDValue(R,0));
896   ReplaceNode(N, T);
897 }
898 
899 void HexagonDAGToDAGISel::SelectQ2V(SDNode *N) {
900   const SDLoc &dl(N);
901   MVT ResTy = N->getValueType(0).getSimpleVT();
902   // The result of V2Q should be a single vector.
903   assert(HST->getVectorLength() * 8 == ResTy.getSizeInBits());
904 
905   SDValue C = CurDAG->getTargetConstant(-1, dl, MVT::i32);
906   SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
907   SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandqrt, dl, ResTy,
908                                      N->getOperand(0), SDValue(R,0));
909   ReplaceNode(N, T);
910 }
911 
912 void HexagonDAGToDAGISel::FDiv(SDNode *N) {
913   const SDLoc &dl(N);
914   ArrayRef<EVT> ResultType(N->value_begin(), N->value_end());
915   SmallVector<SDValue, 2> Ops;
916   Ops = {N->getOperand(0), N->getOperand(1)};
917   SDVTList VTs;
918   VTs = CurDAG->getVTList(MVT::f32, MVT::f32);
919   SDNode *ResScale = CurDAG->getMachineNode(Hexagon::F2_sfrecipa, dl, VTs, Ops);
920   SDNode *D = CurDAG->getMachineNode(Hexagon::F2_sffixupd, dl, MVT::f32, Ops);
921 
922   SDValue C = CurDAG->getTargetConstant(0x3f800000, dl, MVT::i32);
923   SDNode *constNode =
924       CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, C);
925 
926   SDNode *n = CurDAG->getMachineNode(Hexagon::F2_sffixupn, dl, MVT::f32, Ops);
927   SDNode *Err = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
928                                        SDValue(constNode, 0), SDValue(D, 0),
929                                        SDValue(ResScale, 0));
930   SDNode *NewRec = CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32,
931                                           SDValue(ResScale, 0), SDValue(Err, 0),
932                                           SDValue(ResScale, 0));
933   SDNode *newErr = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
934                                           SDValue(constNode, 0), SDValue(D, 0),
935                                           SDValue(NewRec, 0));
936   SDNode *q = CurDAG->getMachineNode(
937       Hexagon::A2_andir, dl, MVT::f32, SDValue(n, 0),
938       CurDAG->getTargetConstant(0x80000000, dl, MVT::i32));
939   SDNode *NewQ =
940       CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32, SDValue(q, 0),
941                              SDValue(n, 0), SDValue(NewRec, 0));
942   SDNode *NNewRec = CurDAG->getMachineNode(
943       Hexagon::F2_sffma_lib, dl, MVT::f32, SDValue(NewRec, 0),
944       SDValue(newErr, 0), SDValue(NewRec, 0));
945   SDNode *qErr =
946       CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32, SDValue(n, 0),
947                              SDValue(D, 0), SDValue(NewQ, 0));
948   SDNode *NNewQ = CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32,
949                                          SDValue(NewQ, 0), SDValue(qErr, 0),
950                                          SDValue(NNewRec, 0));
951 
952   SDNode *NqErr =
953       CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32, SDValue(n, 0),
954                              SDValue(NNewQ, 0), SDValue(D, 0));
955   std::array<SDValue, 4> temp1 = {SDValue(NNewQ, 0), SDValue(NqErr, 0),
956                                   SDValue(NNewRec, 0), SDValue(ResScale, 1)};
957   ArrayRef<SDValue> OpValue1(temp1);
958   SDNode *FinalNewQ =
959       CurDAG->getMachineNode(Hexagon::F2_sffma_sc, dl, MVT::f32, OpValue1);
960   ReplaceNode(N, FinalNewQ);
961 }
962 
963 void HexagonDAGToDAGISel::FastFDiv(SDNode *N) {
964   const SDLoc &dl(N);
965   ArrayRef<EVT> ResultType(N->value_begin(), N->value_end());
966   SmallVector<SDValue, 2> Ops;
967   Ops = {N->getOperand(0), N->getOperand(1)};
968   SDVTList VTs;
969   VTs = CurDAG->getVTList(MVT::f32, MVT::f32);
970   SDNode *ResScale = CurDAG->getMachineNode(Hexagon::F2_sfrecipa, dl, VTs, Ops);
971   SDNode *D = CurDAG->getMachineNode(Hexagon::F2_sffixupd, dl, MVT::f32, Ops);
972 
973   SDValue C = CurDAG->getTargetConstant(0x3f800000, dl, MVT::i32);
974   SDNode *constNode =
975       CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, C);
976 
977   SDNode *n = CurDAG->getMachineNode(Hexagon::F2_sffixupn, dl, MVT::f32, Ops);
978   SDNode *Err = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
979                                        SDValue(constNode, 0), SDValue(D, 0),
980                                        SDValue(ResScale, 0));
981   SDNode *NewRec = CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32,
982                                           SDValue(ResScale, 0), SDValue(Err, 0),
983                                           SDValue(ResScale, 0));
984   SDNode *newErr = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
985                                           SDValue(constNode, 0), SDValue(D, 0),
986                                           SDValue(NewRec, 0));
987 
988   SDNode *NNewRec = CurDAG->getMachineNode(
989       Hexagon::F2_sffma_lib, dl, MVT::f32, SDValue(NewRec, 0),
990       SDValue(newErr, 0), SDValue(NewRec, 0));
991   SDNode *FinalNewQ = CurDAG->getMachineNode(
992       Hexagon::F2_sfmpy, dl, MVT::f32, SDValue(NNewRec, 0), SDValue(n, 0));
993   ReplaceNode(N, FinalNewQ);
994 }
995 
996 void HexagonDAGToDAGISel::SelectFDiv(SDNode *N) {
997   if (N->getFlags().hasAllowReassociation())
998     FastFDiv(N);
999   else
1000     FDiv(N);
1001   return;
1002 }
1003 
1004 void HexagonDAGToDAGISel::Select(SDNode *N) {
1005   if (N->isMachineOpcode())
1006     return N->setNodeId(-1);  // Already selected.
1007 
1008   auto isHvxOp = [this](SDNode *N) {
1009     for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1010       if (HST->isHVXVectorType(N->getValueType(i), true))
1011         return true;
1012     }
1013     for (SDValue I : N->ops()) {
1014       if (HST->isHVXVectorType(I.getValueType(), true))
1015         return true;
1016     }
1017     return false;
1018   };
1019 
1020   if (HST->useHVXOps() && isHvxOp(N)) {
1021     switch (N->getOpcode()) {
1022     case ISD::EXTRACT_SUBVECTOR:  return SelectHvxExtractSubvector(N);
1023     case ISD::VECTOR_SHUFFLE:     return SelectHvxShuffle(N);
1024 
1025     case HexagonISD::VROR:        return SelectHvxRor(N);
1026     }
1027   }
1028 
1029   switch (N->getOpcode()) {
1030   case ISD::Constant:             return SelectConstant(N);
1031   case ISD::ConstantFP:           return SelectConstantFP(N);
1032   case ISD::FrameIndex:           return SelectFrameIndex(N);
1033   case ISD::SHL:                  return SelectSHL(N);
1034   case ISD::LOAD:                 return SelectLoad(N);
1035   case ISD::STORE:                return SelectStore(N);
1036   case ISD::INTRINSIC_W_CHAIN:    return SelectIntrinsicWChain(N);
1037   case ISD::INTRINSIC_WO_CHAIN:   return SelectIntrinsicWOChain(N);
1038   case ISD::EXTRACT_SUBVECTOR:    return SelectExtractSubvector(N);
1039 
1040   case HexagonISD::ADDC:
1041   case HexagonISD::SUBC:          return SelectAddSubCarry(N);
1042   case HexagonISD::VALIGN:        return SelectVAlign(N);
1043   case HexagonISD::VALIGNADDR:    return SelectVAlignAddr(N);
1044   case HexagonISD::TYPECAST:      return SelectTypecast(N);
1045   case HexagonISD::P2D:           return SelectP2D(N);
1046   case HexagonISD::D2P:           return SelectD2P(N);
1047   case HexagonISD::Q2V:           return SelectQ2V(N);
1048   case HexagonISD::V2Q:           return SelectV2Q(N);
1049   case ISD::FDIV:
1050     return SelectFDiv(N);
1051   }
1052 
1053   SelectCode(N);
1054 }
1055 
1056 bool HexagonDAGToDAGISel::SelectInlineAsmMemoryOperand(
1057     const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
1058     std::vector<SDValue> &OutOps) {
1059   SDValue Inp = Op, Res;
1060 
1061   switch (ConstraintID) {
1062   default:
1063     return true;
1064   case InlineAsm::ConstraintCode::o: // Offsetable.
1065   case InlineAsm::ConstraintCode::v: // Not offsetable.
1066   case InlineAsm::ConstraintCode::m: // Memory.
1067     if (SelectAddrFI(Inp, Res))
1068       OutOps.push_back(Res);
1069     else
1070       OutOps.push_back(Inp);
1071     break;
1072   }
1073 
1074   OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
1075   return false;
1076 }
1077 
1078 static bool isMemOPCandidate(SDNode *I, SDNode *U) {
1079   // I is an operand of U. Check if U is an arithmetic (binary) operation
1080   // usable in a memop, where the other operand is a loaded value, and the
1081   // result of U is stored in the same location.
1082 
1083   if (!U->hasOneUse())
1084     return false;
1085   unsigned Opc = U->getOpcode();
1086   switch (Opc) {
1087     case ISD::ADD:
1088     case ISD::SUB:
1089     case ISD::AND:
1090     case ISD::OR:
1091       break;
1092     default:
1093       return false;
1094   }
1095 
1096   SDValue S0 = U->getOperand(0);
1097   SDValue S1 = U->getOperand(1);
1098   SDValue SY = (S0.getNode() == I) ? S1 : S0;
1099 
1100   SDNode *UUse = *U->use_begin();
1101   if (UUse->getNumValues() != 1)
1102     return false;
1103 
1104   // Check if one of the inputs to U is a load instruction and the output
1105   // is used by a store instruction. If so and they also have the same
1106   // base pointer, then don't preoprocess this node sequence as it
1107   // can be matched to a memop.
1108   SDNode *SYNode = SY.getNode();
1109   if (UUse->getOpcode() == ISD::STORE && SYNode->getOpcode() == ISD::LOAD) {
1110     SDValue LDBasePtr = cast<MemSDNode>(SYNode)->getBasePtr();
1111     SDValue STBasePtr = cast<MemSDNode>(UUse)->getBasePtr();
1112     if (LDBasePtr == STBasePtr)
1113       return true;
1114   }
1115   return false;
1116 }
1117 
1118 
1119 // Transform: (or (select c x 0) z)  ->  (select c (or x z) z)
1120 //            (or (select c 0 y) z)  ->  (select c z (or y z))
1121 void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) {
1122   SelectionDAG &DAG = *CurDAG;
1123 
1124   for (auto *I : Nodes) {
1125     if (I->getOpcode() != ISD::OR)
1126       continue;
1127 
1128     auto IsSelect0 = [](const SDValue &Op) -> bool {
1129       if (Op.getOpcode() != ISD::SELECT)
1130         return false;
1131       return isNullConstant(Op.getOperand(1)) ||
1132              isNullConstant(Op.getOperand(2));
1133     };
1134 
1135     SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
1136     EVT VT = I->getValueType(0);
1137     bool SelN0 = IsSelect0(N0);
1138     SDValue SOp = SelN0 ? N0 : N1;
1139     SDValue VOp = SelN0 ? N1 : N0;
1140 
1141     if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) {
1142       SDValue SC = SOp.getOperand(0);
1143       SDValue SX = SOp.getOperand(1);
1144       SDValue SY = SOp.getOperand(2);
1145       SDLoc DLS = SOp;
1146       if (isNullConstant(SY)) {
1147         SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp);
1148         SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp);
1149         DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1150       } else if (isNullConstant(SX)) {
1151         SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp);
1152         SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr);
1153         DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1154       }
1155     }
1156   }
1157 }
1158 
1159 // Transform: (store ch val (add x (add (shl y c) e)))
1160 //        to: (store ch val (add x (shl (add y d) c))),
1161 // where e = (shl d c) for some integer d.
1162 // The purpose of this is to enable generation of loads/stores with
1163 // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1164 // value c must be 0, 1 or 2.
1165 void HexagonDAGToDAGISel::ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes) {
1166   SelectionDAG &DAG = *CurDAG;
1167 
1168   for (auto *I : Nodes) {
1169     if (I->getOpcode() != ISD::STORE)
1170       continue;
1171 
1172     // I matched: (store ch val Off)
1173     SDValue Off = I->getOperand(2);
1174     // Off needs to match: (add x (add (shl y c) (shl d c))))
1175     if (Off.getOpcode() != ISD::ADD)
1176       continue;
1177     // Off matched: (add x T0)
1178     SDValue T0 = Off.getOperand(1);
1179     // T0 needs to match: (add T1 T2):
1180     if (T0.getOpcode() != ISD::ADD)
1181       continue;
1182     // T0 matched: (add T1 T2)
1183     SDValue T1 = T0.getOperand(0);
1184     SDValue T2 = T0.getOperand(1);
1185     // T1 needs to match: (shl y c)
1186     if (T1.getOpcode() != ISD::SHL)
1187       continue;
1188     SDValue C = T1.getOperand(1);
1189     ConstantSDNode *CN = dyn_cast<ConstantSDNode>(C.getNode());
1190     if (CN == nullptr)
1191       continue;
1192     unsigned CV = CN->getZExtValue();
1193     if (CV > 2)
1194       continue;
1195     // T2 needs to match e, where e = (shl d c) for some d.
1196     ConstantSDNode *EN = dyn_cast<ConstantSDNode>(T2.getNode());
1197     if (EN == nullptr)
1198       continue;
1199     unsigned EV = EN->getZExtValue();
1200     if (EV % (1 << CV) != 0)
1201       continue;
1202     unsigned DV = EV / (1 << CV);
1203 
1204     // Replace T0 with: (shl (add y d) c)
1205     SDLoc DL = SDLoc(I);
1206     EVT VT = T0.getValueType();
1207     SDValue D = DAG.getConstant(DV, DL, VT);
1208     // NewAdd = (add y d)
1209     SDValue NewAdd = DAG.getNode(ISD::ADD, DL, VT, T1.getOperand(0), D);
1210     // NewShl = (shl NewAdd c)
1211     SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C);
1212     ReplaceNode(T0.getNode(), NewShl.getNode());
1213   }
1214 }
1215 
1216 // Transform: (load ch (add x (and (srl y c) Mask)))
1217 //        to: (load ch (add x (shl (srl y d) d-c)))
1218 // where
1219 // Mask = 00..0 111..1 0.0
1220 //          |     |     +-- d-c 0s, and d-c is 0, 1 or 2.
1221 //          |     +-------- 1s
1222 //          +-------------- at most c 0s
1223 // Motivating example:
1224 // DAG combiner optimizes (add x (shl (srl y 5) 2))
1225 //                     to (add x (and (srl y 3) 1FFFFFFC))
1226 // which results in a constant-extended and(##...,lsr). This transformation
1227 // undoes this simplification for cases where the shl can be folded into
1228 // an addressing mode.
1229 void HexagonDAGToDAGISel::ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes) {
1230   SelectionDAG &DAG = *CurDAG;
1231 
1232   for (SDNode *N : Nodes) {
1233     unsigned Opc = N->getOpcode();
1234     if (Opc != ISD::LOAD && Opc != ISD::STORE)
1235       continue;
1236     SDValue Addr = Opc == ISD::LOAD ? N->getOperand(1) : N->getOperand(2);
1237     // Addr must match: (add x T0)
1238     if (Addr.getOpcode() != ISD::ADD)
1239       continue;
1240     SDValue T0 = Addr.getOperand(1);
1241     // T0 must match: (and T1 Mask)
1242     if (T0.getOpcode() != ISD::AND)
1243       continue;
1244 
1245     // We have an AND.
1246     //
1247     // Check the first operand. It must be: (srl y c).
1248     SDValue S = T0.getOperand(0);
1249     if (S.getOpcode() != ISD::SRL)
1250       continue;
1251     ConstantSDNode *SN = dyn_cast<ConstantSDNode>(S.getOperand(1).getNode());
1252     if (SN == nullptr)
1253       continue;
1254     if (SN->getAPIntValue().getBitWidth() != 32)
1255       continue;
1256     uint32_t CV = SN->getZExtValue();
1257 
1258     // Check the second operand: the supposed mask.
1259     ConstantSDNode *MN = dyn_cast<ConstantSDNode>(T0.getOperand(1).getNode());
1260     if (MN == nullptr)
1261       continue;
1262     if (MN->getAPIntValue().getBitWidth() != 32)
1263       continue;
1264     uint32_t Mask = MN->getZExtValue();
1265     // Examine the mask.
1266     uint32_t TZ = llvm::countr_zero(Mask);
1267     uint32_t M1 = llvm::countr_one(Mask >> TZ);
1268     uint32_t LZ = llvm::countl_zero(Mask);
1269     // Trailing zeros + middle ones + leading zeros must equal the width.
1270     if (TZ + M1 + LZ != 32)
1271       continue;
1272     // The number of trailing zeros will be encoded in the addressing mode.
1273     if (TZ > 2)
1274       continue;
1275     // The number of leading zeros must be at most c.
1276     if (LZ > CV)
1277       continue;
1278 
1279     // All looks good.
1280     SDValue Y = S.getOperand(0);
1281     EVT VT = Addr.getValueType();
1282     SDLoc dl(S);
1283     // TZ = D-C, so D = TZ+C.
1284     SDValue D = DAG.getConstant(TZ+CV, dl, VT);
1285     SDValue DC = DAG.getConstant(TZ, dl, VT);
1286     SDValue NewSrl = DAG.getNode(ISD::SRL, dl, VT, Y, D);
1287     SDValue NewShl = DAG.getNode(ISD::SHL, dl, VT, NewSrl, DC);
1288     ReplaceNode(T0.getNode(), NewShl.getNode());
1289   }
1290 }
1291 
1292 // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1293 //                                                  (op ... 1 ...))
1294 void HexagonDAGToDAGISel::ppHoistZextI1(std::vector<SDNode*> &&Nodes) {
1295   SelectionDAG &DAG = *CurDAG;
1296 
1297   for (SDNode *N : Nodes) {
1298     unsigned Opc = N->getOpcode();
1299     if (Opc != ISD::ZERO_EXTEND)
1300       continue;
1301     SDValue OpI1 = N->getOperand(0);
1302     EVT OpVT = OpI1.getValueType();
1303     if (!OpVT.isSimple() || OpVT.getSimpleVT() != MVT::i1)
1304       continue;
1305     for (auto I = N->use_begin(), E = N->use_end(); I != E; ++I) {
1306       SDNode *U = *I;
1307       if (U->getNumValues() != 1)
1308         continue;
1309       EVT UVT = U->getValueType(0);
1310       if (!UVT.isSimple() || !UVT.isInteger() || UVT.getSimpleVT() == MVT::i1)
1311         continue;
1312       // Do not generate select for all i1 vector type.
1313       if (UVT.isVector() && UVT.getVectorElementType() == MVT::i1)
1314         continue;
1315       if (isMemOPCandidate(N, U))
1316         continue;
1317 
1318       // Potentially simplifiable operation.
1319       unsigned I1N = I.getOperandNo();
1320       SmallVector<SDValue,2> Ops(U->getNumOperands());
1321       for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i)
1322         Ops[i] = U->getOperand(i);
1323       EVT BVT = Ops[I1N].getValueType();
1324 
1325       const SDLoc &dl(U);
1326       SDValue C0 = DAG.getConstant(0, dl, BVT);
1327       SDValue C1 = DAG.getConstant(1, dl, BVT);
1328       SDValue If0, If1;
1329 
1330       if (isa<MachineSDNode>(U)) {
1331         unsigned UseOpc = U->getMachineOpcode();
1332         Ops[I1N] = C0;
1333         If0 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1334         Ops[I1N] = C1;
1335         If1 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1336       } else {
1337         unsigned UseOpc = U->getOpcode();
1338         Ops[I1N] = C0;
1339         If0 = DAG.getNode(UseOpc, dl, UVT, Ops);
1340         Ops[I1N] = C1;
1341         If1 = DAG.getNode(UseOpc, dl, UVT, Ops);
1342       }
1343       // We're generating a SELECT way after legalization, so keep the types
1344       // simple.
1345       unsigned UW = UVT.getSizeInBits();
1346       EVT SVT = (UW == 32 || UW == 64) ? MVT::getIntegerVT(UW) : UVT;
1347       SDValue Sel = DAG.getNode(ISD::SELECT, dl, SVT, OpI1,
1348                                 DAG.getBitcast(SVT, If1),
1349                                 DAG.getBitcast(SVT, If0));
1350       SDValue Ret = DAG.getBitcast(UVT, Sel);
1351       DAG.ReplaceAllUsesWith(U, Ret.getNode());
1352     }
1353   }
1354 }
1355 
1356 void HexagonDAGToDAGISel::PreprocessISelDAG() {
1357   // Repack all nodes before calling each preprocessing function,
1358   // because each of them can modify the set of nodes.
1359   auto getNodes = [this]() -> std::vector<SDNode *> {
1360     std::vector<SDNode *> T;
1361     T.reserve(CurDAG->allnodes_size());
1362     for (SDNode &N : CurDAG->allnodes())
1363       T.push_back(&N);
1364     return T;
1365   };
1366 
1367   if (HST->useHVXOps())
1368     PreprocessHvxISelDAG();
1369 
1370   // Transform: (or (select c x 0) z)  ->  (select c (or x z) z)
1371   //            (or (select c 0 y) z)  ->  (select c z (or y z))
1372   ppSimplifyOrSelect0(getNodes());
1373 
1374   // Transform: (store ch val (add x (add (shl y c) e)))
1375   //        to: (store ch val (add x (shl (add y d) c))),
1376   // where e = (shl d c) for some integer d.
1377   // The purpose of this is to enable generation of loads/stores with
1378   // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1379   // value c must be 0, 1 or 2.
1380   ppAddrReorderAddShl(getNodes());
1381 
1382   // Transform: (load ch (add x (and (srl y c) Mask)))
1383   //        to: (load ch (add x (shl (srl y d) d-c)))
1384   // where
1385   // Mask = 00..0 111..1 0.0
1386   //          |     |     +-- d-c 0s, and d-c is 0, 1 or 2.
1387   //          |     +-------- 1s
1388   //          +-------------- at most c 0s
1389   // Motivating example:
1390   // DAG combiner optimizes (add x (shl (srl y 5) 2))
1391   //                     to (add x (and (srl y 3) 1FFFFFFC))
1392   // which results in a constant-extended and(##...,lsr). This transformation
1393   // undoes this simplification for cases where the shl can be folded into
1394   // an addressing mode.
1395   ppAddrRewriteAndSrl(getNodes());
1396 
1397   // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1398   //                                                  (op ... 1 ...))
1399   ppHoistZextI1(getNodes());
1400 
1401   DEBUG_WITH_TYPE("isel", {
1402     dbgs() << "Preprocessed (Hexagon) selection DAG:";
1403     CurDAG->dump();
1404   });
1405 
1406   if (EnableAddressRebalancing) {
1407     rebalanceAddressTrees();
1408 
1409     DEBUG_WITH_TYPE("isel", {
1410       dbgs() << "Address tree balanced selection DAG:";
1411       CurDAG->dump();
1412     });
1413   }
1414 }
1415 
1416 void HexagonDAGToDAGISel::emitFunctionEntryCode() {
1417   auto &HST = MF->getSubtarget<HexagonSubtarget>();
1418   auto &HFI = *HST.getFrameLowering();
1419   if (!HFI.needsAligna(*MF))
1420     return;
1421 
1422   MachineFrameInfo &MFI = MF->getFrameInfo();
1423   MachineBasicBlock *EntryBB = &MF->front();
1424   Align EntryMaxA = MFI.getMaxAlign();
1425 
1426   // Reserve the first non-volatile register.
1427   Register AP = 0;
1428   auto &HRI = *HST.getRegisterInfo();
1429   BitVector Reserved = HRI.getReservedRegs(*MF);
1430   for (const MCPhysReg *R = HRI.getCalleeSavedRegs(MF); *R; ++R) {
1431     if (Reserved[*R])
1432       continue;
1433     AP = *R;
1434     break;
1435   }
1436   assert(AP.isValid() && "Couldn't reserve stack align register");
1437   BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AP)
1438       .addImm(EntryMaxA.value());
1439   MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseReg(AP);
1440 }
1441 
1442 void HexagonDAGToDAGISel::updateAligna() {
1443   auto &HFI = *MF->getSubtarget<HexagonSubtarget>().getFrameLowering();
1444   if (!HFI.needsAligna(*MF))
1445     return;
1446   auto *AlignaI = const_cast<MachineInstr*>(HFI.getAlignaInstr(*MF));
1447   assert(AlignaI != nullptr);
1448   unsigned MaxA = MF->getFrameInfo().getMaxAlign().value();
1449   if (AlignaI->getOperand(1).getImm() < MaxA)
1450     AlignaI->getOperand(1).setImm(MaxA);
1451 }
1452 
1453 // Match a frame index that can be used in an addressing mode.
1454 bool HexagonDAGToDAGISel::SelectAddrFI(SDValue &N, SDValue &R) {
1455   if (N.getOpcode() != ISD::FrameIndex)
1456     return false;
1457   auto &HFI = *HST->getFrameLowering();
1458   MachineFrameInfo &MFI = MF->getFrameInfo();
1459   int FX = cast<FrameIndexSDNode>(N)->getIndex();
1460   if (!MFI.isFixedObjectIndex(FX) && HFI.needsAligna(*MF))
1461     return false;
1462   R = CurDAG->getTargetFrameIndex(FX, MVT::i32);
1463   return true;
1464 }
1465 
1466 inline bool HexagonDAGToDAGISel::SelectAddrGA(SDValue &N, SDValue &R) {
1467   return SelectGlobalAddress(N, R, false, Align(1));
1468 }
1469 
1470 inline bool HexagonDAGToDAGISel::SelectAddrGP(SDValue &N, SDValue &R) {
1471   return SelectGlobalAddress(N, R, true, Align(1));
1472 }
1473 
1474 inline bool HexagonDAGToDAGISel::SelectAnyImm(SDValue &N, SDValue &R) {
1475   return SelectAnyImmediate(N, R, Align(1));
1476 }
1477 
1478 inline bool HexagonDAGToDAGISel::SelectAnyImm0(SDValue &N, SDValue &R) {
1479   return SelectAnyImmediate(N, R, Align(1));
1480 }
1481 inline bool HexagonDAGToDAGISel::SelectAnyImm1(SDValue &N, SDValue &R) {
1482   return SelectAnyImmediate(N, R, Align(2));
1483 }
1484 inline bool HexagonDAGToDAGISel::SelectAnyImm2(SDValue &N, SDValue &R) {
1485   return SelectAnyImmediate(N, R, Align(4));
1486 }
1487 inline bool HexagonDAGToDAGISel::SelectAnyImm3(SDValue &N, SDValue &R) {
1488   return SelectAnyImmediate(N, R, Align(8));
1489 }
1490 
1491 inline bool HexagonDAGToDAGISel::SelectAnyInt(SDValue &N, SDValue &R) {
1492   EVT T = N.getValueType();
1493   if (!T.isInteger() || T.getSizeInBits() != 32 || !isa<ConstantSDNode>(N))
1494     return false;
1495   int32_t V = cast<const ConstantSDNode>(N)->getZExtValue();
1496   R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType());
1497   return true;
1498 }
1499 
1500 bool HexagonDAGToDAGISel::SelectAnyImmediate(SDValue &N, SDValue &R,
1501                                              Align Alignment) {
1502   switch (N.getOpcode()) {
1503   case ISD::Constant: {
1504     if (N.getValueType() != MVT::i32)
1505       return false;
1506     int32_t V = cast<const ConstantSDNode>(N)->getZExtValue();
1507     if (!isAligned(Alignment, V))
1508       return false;
1509     R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType());
1510     return true;
1511   }
1512   case HexagonISD::JT:
1513   case HexagonISD::CP:
1514     // These are assumed to always be aligned at least 8-byte boundary.
1515     if (Alignment > Align(8))
1516       return false;
1517     R = N.getOperand(0);
1518     return true;
1519   case ISD::ExternalSymbol:
1520     // Symbols may be aligned at any boundary.
1521     if (Alignment > Align(1))
1522       return false;
1523     R = N;
1524     return true;
1525   case ISD::BlockAddress:
1526     // Block address is always aligned at least 4-byte boundary.
1527     if (Alignment > Align(4) ||
1528         !isAligned(Alignment, cast<BlockAddressSDNode>(N)->getOffset()))
1529       return false;
1530     R = N;
1531     return true;
1532   }
1533 
1534   if (SelectGlobalAddress(N, R, false, Alignment) ||
1535       SelectGlobalAddress(N, R, true, Alignment))
1536     return true;
1537 
1538   return false;
1539 }
1540 
1541 bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R,
1542                                               bool UseGP, Align Alignment) {
1543   switch (N.getOpcode()) {
1544   case ISD::ADD: {
1545     SDValue N0 = N.getOperand(0);
1546     SDValue N1 = N.getOperand(1);
1547     unsigned GAOpc = N0.getOpcode();
1548     if (UseGP && GAOpc != HexagonISD::CONST32_GP)
1549       return false;
1550     if (!UseGP && GAOpc != HexagonISD::CONST32)
1551       return false;
1552     if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) {
1553       if (!isAligned(Alignment, Const->getZExtValue()))
1554         return false;
1555       SDValue Addr = N0.getOperand(0);
1556       if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) {
1557         if (GA->getOpcode() == ISD::TargetGlobalAddress) {
1558           uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue();
1559           R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const),
1560                                              N.getValueType(), NewOff);
1561           return true;
1562         }
1563       }
1564     }
1565     break;
1566   }
1567   case HexagonISD::CP:
1568   case HexagonISD::JT:
1569   case HexagonISD::CONST32:
1570     // The operand(0) of CONST32 is TargetGlobalAddress, which is what we
1571     // want in the instruction.
1572     if (!UseGP)
1573       R = N.getOperand(0);
1574     return !UseGP;
1575   case HexagonISD::CONST32_GP:
1576     if (UseGP)
1577       R = N.getOperand(0);
1578     return UseGP;
1579   default:
1580     return false;
1581   }
1582 
1583   return false;
1584 }
1585 
1586 bool HexagonDAGToDAGISel::DetectUseSxtw(SDValue &N, SDValue &R) {
1587   // This (complex pattern) function is meant to detect a sign-extension
1588   // i32->i64 on a per-operand basis. This would allow writing single
1589   // patterns that would cover a number of combinations of different ways
1590   // a sign-extensions could be written. For example:
1591   //   (mul (DetectUseSxtw x) (DetectUseSxtw y)) -> (M2_dpmpyss_s0 x y)
1592   // could match either one of these:
1593   //   (mul (sext x) (sext_inreg y))
1594   //   (mul (sext-load *p) (sext_inreg y))
1595   //   (mul (sext_inreg x) (sext y))
1596   // etc.
1597   //
1598   // The returned value will have type i64 and its low word will
1599   // contain the value being extended. The high bits are not specified.
1600   // The returned type is i64 because the original type of N was i64,
1601   // but the users of this function should only use the low-word of the
1602   // result, e.g.
1603   //  (mul sxtw:x, sxtw:y) -> (M2_dpmpyss_s0 (LoReg sxtw:x), (LoReg sxtw:y))
1604 
1605   if (N.getValueType() != MVT::i64)
1606     return false;
1607   unsigned Opc = N.getOpcode();
1608   switch (Opc) {
1609     case ISD::SIGN_EXTEND:
1610     case ISD::SIGN_EXTEND_INREG: {
1611       // sext_inreg has the source type as a separate operand.
1612       EVT T = Opc == ISD::SIGN_EXTEND
1613                 ? N.getOperand(0).getValueType()
1614                 : cast<VTSDNode>(N.getOperand(1))->getVT();
1615       unsigned SW = T.getSizeInBits();
1616       if (SW == 32)
1617         R = N.getOperand(0);
1618       else if (SW < 32)
1619         R = N;
1620       else
1621         return false;
1622       break;
1623     }
1624     case ISD::LOAD: {
1625       LoadSDNode *L = cast<LoadSDNode>(N);
1626       if (L->getExtensionType() != ISD::SEXTLOAD)
1627         return false;
1628       // All extending loads extend to i32, so even if the value in
1629       // memory is shorter than 32 bits, it will be i32 after the load.
1630       if (L->getMemoryVT().getSizeInBits() > 32)
1631         return false;
1632       R = N;
1633       break;
1634     }
1635     case ISD::SRA: {
1636       auto *S = dyn_cast<ConstantSDNode>(N.getOperand(1));
1637       if (!S || S->getZExtValue() != 32)
1638         return false;
1639       R = N;
1640       break;
1641     }
1642     default:
1643       return false;
1644   }
1645   EVT RT = R.getValueType();
1646   if (RT == MVT::i64)
1647     return true;
1648   assert(RT == MVT::i32);
1649   // This is only to produce a value of type i64. Do not rely on the
1650   // high bits produced by this.
1651   const SDLoc &dl(N);
1652   SDValue Ops[] = {
1653     CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
1654     R, CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
1655     R, CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
1656   };
1657   SDNode *T = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
1658                                      MVT::i64, Ops);
1659   R = SDValue(T, 0);
1660   return true;
1661 }
1662 
1663 bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
1664       SDValue &Src) {
1665   unsigned Opc = Val.getOpcode();
1666   switch (Opc) {
1667   case ISD::SIGN_EXTEND:
1668   case ISD::ZERO_EXTEND:
1669   case ISD::ANY_EXTEND: {
1670     const SDValue &Op0 = Val.getOperand(0);
1671     EVT T = Op0.getValueType();
1672     if (T.isInteger() && T.getSizeInBits() == NumBits) {
1673       Src = Op0;
1674       return true;
1675     }
1676     break;
1677   }
1678   case ISD::SIGN_EXTEND_INREG:
1679   case ISD::AssertSext:
1680   case ISD::AssertZext:
1681     if (Val.getOperand(0).getValueType().isInteger()) {
1682       VTSDNode *T = cast<VTSDNode>(Val.getOperand(1));
1683       if (T->getVT().getSizeInBits() == NumBits) {
1684         Src = Val.getOperand(0);
1685         return true;
1686       }
1687     }
1688     break;
1689   case ISD::AND: {
1690     // Check if this is an AND with NumBits of lower bits set to 1.
1691     uint64_t Mask = (1ULL << NumBits) - 1;
1692     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1693       if (C->getZExtValue() == Mask) {
1694         Src = Val.getOperand(1);
1695         return true;
1696       }
1697     }
1698     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1699       if (C->getZExtValue() == Mask) {
1700         Src = Val.getOperand(0);
1701         return true;
1702       }
1703     }
1704     break;
1705   }
1706   case ISD::OR:
1707   case ISD::XOR: {
1708     // OR/XOR with the lower NumBits bits set to 0.
1709     uint64_t Mask = (1ULL << NumBits) - 1;
1710     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1711       if ((C->getZExtValue() & Mask) == 0) {
1712         Src = Val.getOperand(1);
1713         return true;
1714       }
1715     }
1716     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1717       if ((C->getZExtValue() & Mask) == 0) {
1718         Src = Val.getOperand(0);
1719         return true;
1720       }
1721     }
1722     break;
1723   }
1724   default:
1725     break;
1726   }
1727   return false;
1728 }
1729 
1730 bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const {
1731   return N->getAlign().value() >= N->getMemoryVT().getStoreSize();
1732 }
1733 
1734 bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const {
1735   unsigned StackSize = MF->getFrameInfo().estimateStackSize(*MF);
1736   switch (N->getMemoryVT().getStoreSize()) {
1737     case 1:
1738       return StackSize <= 56;   // 1*2^6 - 8
1739     case 2:
1740       return StackSize <= 120;  // 2*2^6 - 8
1741     case 4:
1742       return StackSize <= 248;  // 4*2^6 - 8
1743     default:
1744       return false;
1745   }
1746 }
1747 
1748 // Return true when the given node fits in a positive half word.
1749 bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const {
1750   if (const ConstantSDNode *CN = dyn_cast<const ConstantSDNode>(N)) {
1751     int64_t V = CN->getSExtValue();
1752     return V > 0 && isInt<16>(V);
1753   }
1754   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
1755     const VTSDNode *VN = dyn_cast<const VTSDNode>(N->getOperand(1));
1756     return VN->getVT().getSizeInBits() <= 16;
1757   }
1758   return false;
1759 }
1760 
1761 bool HexagonDAGToDAGISel::hasOneUse(const SDNode *N) const {
1762   return !CheckSingleUse || N->hasOneUse();
1763 }
1764 
1765 ////////////////////////////////////////////////////////////////////////////////
1766 // Rebalancing of address calculation trees
1767 
1768 static bool isOpcodeHandled(const SDNode *N) {
1769   switch (N->getOpcode()) {
1770     case ISD::ADD:
1771     case ISD::MUL:
1772       return true;
1773     case ISD::SHL:
1774       // We only handle constant shifts because these can be easily flattened
1775       // into multiplications by 2^Op1.
1776       return isa<ConstantSDNode>(N->getOperand(1).getNode());
1777     default:
1778       return false;
1779   }
1780 }
1781 
1782 /// Return the weight of an SDNode
1783 int HexagonDAGToDAGISel::getWeight(SDNode *N) {
1784   if (!isOpcodeHandled(N))
1785     return 1;
1786   assert(RootWeights.count(N) && "Cannot get weight of unseen root!");
1787   assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!");
1788   assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!");
1789   return RootWeights[N];
1790 }
1791 
1792 int HexagonDAGToDAGISel::getHeight(SDNode *N) {
1793   if (!isOpcodeHandled(N))
1794     return 0;
1795   assert(RootWeights.count(N) && RootWeights[N] >= 0 &&
1796       "Cannot query height of unvisited/RAUW'd node!");
1797   return RootHeights[N];
1798 }
1799 
1800 namespace {
1801 struct WeightedLeaf {
1802   SDValue Value;
1803   int Weight;
1804   int InsertionOrder;
1805 
1806   WeightedLeaf() {}
1807 
1808   WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
1809     Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
1810     assert(Weight >= 0 && "Weight must be >= 0");
1811   }
1812 
1813   static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) {
1814     assert(A.Value.getNode() && B.Value.getNode());
1815     return A.Weight == B.Weight ?
1816             (A.InsertionOrder > B.InsertionOrder) :
1817             (A.Weight > B.Weight);
1818   }
1819 };
1820 
1821 /// A specialized priority queue for WeigthedLeaves. It automatically folds
1822 /// constants and allows removal of non-top elements while maintaining the
1823 /// priority order.
1824 class LeafPrioQueue {
1825   SmallVector<WeightedLeaf, 8> Q;
1826   bool HaveConst;
1827   WeightedLeaf ConstElt;
1828   unsigned Opcode;
1829 
1830 public:
1831   bool empty() {
1832     return (!HaveConst && Q.empty());
1833   }
1834 
1835   size_t size() {
1836     return Q.size() + HaveConst;
1837   }
1838 
1839   bool hasConst() {
1840     return HaveConst;
1841   }
1842 
1843   const WeightedLeaf &top() {
1844     if (HaveConst)
1845       return ConstElt;
1846     return Q.front();
1847   }
1848 
1849   WeightedLeaf pop() {
1850     if (HaveConst) {
1851       HaveConst = false;
1852       return ConstElt;
1853     }
1854     std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1855     return Q.pop_back_val();
1856   }
1857 
1858   void push(WeightedLeaf L, bool SeparateConst=true) {
1859     if (!HaveConst && SeparateConst && isa<ConstantSDNode>(L.Value)) {
1860       if (Opcode == ISD::MUL &&
1861           cast<ConstantSDNode>(L.Value)->getSExtValue() == 1)
1862         return;
1863       if (Opcode == ISD::ADD &&
1864           cast<ConstantSDNode>(L.Value)->getSExtValue() == 0)
1865         return;
1866 
1867       HaveConst = true;
1868       ConstElt = L;
1869     } else {
1870       Q.push_back(L);
1871       std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1872     }
1873   }
1874 
1875   /// Push L to the bottom of the queue regardless of its weight. If L is
1876   /// constant, it will not be folded with other constants in the queue.
1877   void pushToBottom(WeightedLeaf L) {
1878     L.Weight = 1000;
1879     push(L, false);
1880   }
1881 
1882   /// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of
1883   /// lowest weight and remove it from the queue.
1884   WeightedLeaf findSHL(uint64_t MaxAmount);
1885 
1886   WeightedLeaf findMULbyConst();
1887 
1888   LeafPrioQueue(unsigned Opcode) :
1889     HaveConst(false), Opcode(Opcode) { }
1890 };
1891 } // end anonymous namespace
1892 
1893 WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) {
1894   int ResultPos;
1895   WeightedLeaf Result;
1896 
1897   for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1898     const WeightedLeaf &L = Q[Pos];
1899     const SDValue &Val = L.Value;
1900     if (Val.getOpcode() != ISD::SHL ||
1901         !isa<ConstantSDNode>(Val.getOperand(1)) ||
1902         Val.getConstantOperandVal(1) > MaxAmount)
1903       continue;
1904     if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1905         (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1906     {
1907       Result = L;
1908       ResultPos = Pos;
1909     }
1910   }
1911 
1912   if (Result.Value.getNode()) {
1913     Q.erase(&Q[ResultPos]);
1914     std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1915   }
1916 
1917   return Result;
1918 }
1919 
1920 WeightedLeaf LeafPrioQueue::findMULbyConst() {
1921   int ResultPos;
1922   WeightedLeaf Result;
1923 
1924   for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1925     const WeightedLeaf &L = Q[Pos];
1926     const SDValue &Val = L.Value;
1927     if (Val.getOpcode() != ISD::MUL ||
1928         !isa<ConstantSDNode>(Val.getOperand(1)) ||
1929         Val.getConstantOperandVal(1) > 127)
1930       continue;
1931     if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1932         (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1933     {
1934       Result = L;
1935       ResultPos = Pos;
1936     }
1937   }
1938 
1939   if (Result.Value.getNode()) {
1940     Q.erase(&Q[ResultPos]);
1941     std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1942   }
1943 
1944   return Result;
1945 }
1946 
1947 SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) {
1948   uint64_t MulFactor = 1ull << N->getConstantOperandVal(1);
1949   return CurDAG->getConstant(MulFactor, SDLoc(N),
1950                              N->getOperand(1).getValueType());
1951 }
1952 
1953 /// @returns the value x for which 2^x is a factor of Val
1954 static unsigned getPowerOf2Factor(SDValue Val) {
1955   if (Val.getOpcode() == ISD::MUL) {
1956     unsigned MaxFactor = 0;
1957     for (int i = 0; i < 2; ++i) {
1958       ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(i));
1959       if (!C)
1960         continue;
1961       const APInt &CInt = C->getAPIntValue();
1962       if (CInt.getBoolValue())
1963         MaxFactor = CInt.countr_zero();
1964     }
1965     return MaxFactor;
1966   }
1967   if (Val.getOpcode() == ISD::SHL) {
1968     if (!isa<ConstantSDNode>(Val.getOperand(1).getNode()))
1969       return 0;
1970     return (unsigned) Val.getConstantOperandVal(1);
1971   }
1972 
1973   return 0;
1974 }
1975 
1976 /// @returns true if V>>Amount will eliminate V's operation on its child
1977 static bool willShiftRightEliminate(SDValue V, unsigned Amount) {
1978   if (V.getOpcode() == ISD::MUL) {
1979     SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1980     for (int i = 0; i < 2; ++i)
1981       if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1982           V.getConstantOperandVal(i) % (1ULL << Amount) == 0) {
1983         uint64_t NewConst = V.getConstantOperandVal(i) >> Amount;
1984         return (NewConst == 1);
1985       }
1986   } else if (V.getOpcode() == ISD::SHL) {
1987     return (Amount == V.getConstantOperandVal(1));
1988   }
1989 
1990   return false;
1991 }
1992 
1993 SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) {
1994   SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1995   if (V.getOpcode() == ISD::MUL) {
1996     for (int i=0; i < 2; ++i) {
1997       if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1998           V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 0) {
1999         uint64_t NewConst = V.getConstantOperandVal(i) >> Power;
2000         if (NewConst == 1)
2001           return Ops[!i];
2002         Ops[i] = CurDAG->getConstant(NewConst,
2003                                      SDLoc(V), V.getValueType());
2004         break;
2005       }
2006     }
2007   } else if (V.getOpcode() == ISD::SHL) {
2008     uint64_t ShiftAmount = V.getConstantOperandVal(1);
2009     if (ShiftAmount == Power)
2010       return Ops[0];
2011     Ops[1] = CurDAG->getConstant(ShiftAmount - Power,
2012                                  SDLoc(V), V.getValueType());
2013   }
2014 
2015   return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops);
2016 }
2017 
2018 static bool isTargetConstant(const SDValue &V) {
2019   return V.getOpcode() == HexagonISD::CONST32 ||
2020          V.getOpcode() == HexagonISD::CONST32_GP;
2021 }
2022 
2023 unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) {
2024   if (GAUsesInFunction.count(V))
2025     return GAUsesInFunction[V];
2026 
2027   unsigned Result = 0;
2028   const Function &CurF = CurDAG->getMachineFunction().getFunction();
2029   for (const User *U : V->users()) {
2030     if (isa<Instruction>(U) &&
2031         cast<Instruction>(U)->getParent()->getParent() == &CurF)
2032       ++Result;
2033   }
2034 
2035   GAUsesInFunction[V] = Result;
2036 
2037   return Result;
2038 }
2039 
2040 /// Note - After calling this, N may be dead. It may have been replaced by a
2041 /// new node, so always use the returned value in place of N.
2042 ///
2043 /// @returns The SDValue taking the place of N (which could be N if it is
2044 /// unchanged)
2045 SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
2046   assert(RootWeights.count(N) && "Cannot balance non-root node.");
2047   assert(RootWeights[N] != -2 && "This node was RAUW'd!");
2048   assert(!TopLevel || N->getOpcode() == ISD::ADD);
2049 
2050   // Return early if this node was already visited
2051   if (RootWeights[N] != -1)
2052     return SDValue(N, 0);
2053 
2054   assert(isOpcodeHandled(N));
2055 
2056   SDValue Op0 = N->getOperand(0);
2057   SDValue Op1 = N->getOperand(1);
2058 
2059   // Return early if the operands will remain unchanged or are all roots
2060   if ((!isOpcodeHandled(Op0.getNode()) || RootWeights.count(Op0.getNode())) &&
2061       (!isOpcodeHandled(Op1.getNode()) || RootWeights.count(Op1.getNode()))) {
2062     SDNode *Op0N = Op0.getNode();
2063     int Weight;
2064     if (isOpcodeHandled(Op0N) && RootWeights[Op0N] == -1) {
2065       Weight = getWeight(balanceSubTree(Op0N).getNode());
2066       // Weight = calculateWeight(Op0N);
2067     } else
2068       Weight = getWeight(Op0N);
2069 
2070     SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd
2071     if (isOpcodeHandled(Op1N) && RootWeights[Op1N] == -1) {
2072       Weight += getWeight(balanceSubTree(Op1N).getNode());
2073       // Weight += calculateWeight(Op1N);
2074     } else
2075       Weight += getWeight(Op1N);
2076 
2077     RootWeights[N] = Weight;
2078     RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()),
2079                               getHeight(N->getOperand(1).getNode())) + 1;
2080 
2081     LLVM_DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight
2082                       << " Height=" << RootHeights[N] << "): ");
2083     LLVM_DEBUG(N->dump(CurDAG));
2084 
2085     return SDValue(N, 0);
2086   }
2087 
2088   LLVM_DEBUG(dbgs() << "** Balancing root node: ");
2089   LLVM_DEBUG(N->dump(CurDAG));
2090 
2091   unsigned NOpcode = N->getOpcode();
2092 
2093   LeafPrioQueue Leaves(NOpcode);
2094   SmallVector<SDValue, 4> Worklist;
2095   Worklist.push_back(SDValue(N, 0));
2096 
2097   // SHL nodes will be converted to MUL nodes
2098   if (NOpcode == ISD::SHL)
2099     NOpcode = ISD::MUL;
2100 
2101   bool CanFactorize = false;
2102   WeightedLeaf Mul1, Mul2;
2103   unsigned MaxPowerOf2 = 0;
2104   WeightedLeaf GA;
2105 
2106   // Do not try to factor out a shift if there is already a shift at the tip of
2107   // the tree.
2108   bool HaveTopLevelShift = false;
2109   if (TopLevel &&
2110       ((isOpcodeHandled(Op0.getNode()) && Op0.getOpcode() == ISD::SHL &&
2111                         Op0.getConstantOperandVal(1) < 4) ||
2112        (isOpcodeHandled(Op1.getNode()) && Op1.getOpcode() == ISD::SHL &&
2113                         Op1.getConstantOperandVal(1) < 4)))
2114     HaveTopLevelShift = true;
2115 
2116   // Flatten the subtree into an ordered list of leaves; at the same time
2117   // determine whether the tree is already balanced.
2118   int InsertionOrder = 0;
2119   SmallDenseMap<SDValue, int> NodeHeights;
2120   bool Imbalanced = false;
2121   int CurrentWeight = 0;
2122   while (!Worklist.empty()) {
2123     SDValue Child = Worklist.pop_back_val();
2124 
2125     if (Child.getNode() != N && RootWeights.count(Child.getNode())) {
2126       // CASE 1: Child is a root note
2127 
2128       int Weight = RootWeights[Child.getNode()];
2129       if (Weight == -1) {
2130         Child = balanceSubTree(Child.getNode());
2131         // calculateWeight(Child.getNode());
2132         Weight = getWeight(Child.getNode());
2133       } else if (Weight == -2) {
2134         // Whoops, this node was RAUWd by one of the balanceSubTree calls we
2135         // made. Our worklist isn't up to date anymore.
2136         // Restart the whole process.
2137         LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
2138         return balanceSubTree(N, TopLevel);
2139       }
2140 
2141       NodeHeights[Child] = 1;
2142       CurrentWeight += Weight;
2143 
2144       unsigned PowerOf2;
2145       if (TopLevel && !CanFactorize && !HaveTopLevelShift &&
2146           (Child.getOpcode() == ISD::MUL || Child.getOpcode() == ISD::SHL) &&
2147           Child.hasOneUse() && (PowerOf2 = getPowerOf2Factor(Child))) {
2148         // Try to identify two factorizable MUL/SHL children greedily. Leave
2149         // them out of the priority queue for now so we can deal with them
2150         // after.
2151         if (!Mul1.Value.getNode()) {
2152           Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++);
2153           MaxPowerOf2 = PowerOf2;
2154         } else {
2155           Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++);
2156           MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2);
2157 
2158           // Our addressing modes can only shift by a maximum of 3
2159           if (MaxPowerOf2 > 3)
2160             MaxPowerOf2 = 3;
2161 
2162           CanFactorize = true;
2163         }
2164       } else
2165         Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
2166     } else if (!isOpcodeHandled(Child.getNode())) {
2167       // CASE 2: Child is an unhandled kind of node (e.g. constant)
2168       int Weight = getWeight(Child.getNode());
2169 
2170       NodeHeights[Child] = getHeight(Child.getNode());
2171       CurrentWeight += Weight;
2172 
2173       if (isTargetConstant(Child) && !GA.Value.getNode())
2174         GA = WeightedLeaf(Child, Weight, InsertionOrder++);
2175       else
2176         Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
2177     } else {
2178       // CASE 3: Child is a subtree of same opcode
2179       // Visit children first, then flatten.
2180       unsigned ChildOpcode = Child.getOpcode();
2181       assert(ChildOpcode == NOpcode ||
2182              (NOpcode == ISD::MUL && ChildOpcode == ISD::SHL));
2183 
2184       // Convert SHL to MUL
2185       SDValue Op1;
2186       if (ChildOpcode == ISD::SHL)
2187         Op1 = getMultiplierForSHL(Child.getNode());
2188       else
2189         Op1 = Child->getOperand(1);
2190 
2191       if (!NodeHeights.count(Op1) || !NodeHeights.count(Child->getOperand(0))) {
2192         assert(!NodeHeights.count(Child) && "Parent visited before children?");
2193         // Visit children first, then re-visit this node
2194         Worklist.push_back(Child);
2195         Worklist.push_back(Op1);
2196         Worklist.push_back(Child->getOperand(0));
2197       } else {
2198         // Back at this node after visiting the children
2199         if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1)
2200           Imbalanced = true;
2201 
2202         NodeHeights[Child] = std::max(NodeHeights[Op1],
2203                                       NodeHeights[Child->getOperand(0)]) + 1;
2204       }
2205     }
2206   }
2207 
2208   LLVM_DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)]
2209                     << " weight=" << CurrentWeight
2210                     << " imbalanced=" << Imbalanced << "\n");
2211 
2212   // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y)
2213   //  This factors out a shift in order to match memw(a<<Y+b).
2214   if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) ||
2215                        willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) {
2216     LLVM_DEBUG(dbgs() << "--> Found common factor for two MUL children!\n");
2217     int Weight = Mul1.Weight + Mul2.Weight;
2218     int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1;
2219     SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2);
2220     SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2);
2221     SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(),
2222                                   Mul1Factored, Mul2Factored);
2223     SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N),
2224                                         Mul1.Value.getValueType());
2225     SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(),
2226                                   Sum, Const);
2227     NodeHeights[New] = Height;
2228     Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder));
2229   } else if (Mul1.Value.getNode()) {
2230     // We failed to factorize two MULs, so now the Muls are left outside the
2231     // queue... add them back.
2232     Leaves.push(Mul1);
2233     if (Mul2.Value.getNode())
2234       Leaves.push(Mul2);
2235     CanFactorize = false;
2236   }
2237 
2238   // Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere
2239   // and the root node itself is not used more than twice. This reduces the
2240   // amount of additional constant extenders introduced by this optimization.
2241   bool CombinedGA = false;
2242   if (NOpcode == ISD::ADD && GA.Value.getNode() && Leaves.hasConst() &&
2243       GA.Value.hasOneUse() && N->use_size() < 3) {
2244     GlobalAddressSDNode *GANode =
2245       cast<GlobalAddressSDNode>(GA.Value.getOperand(0));
2246     ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value);
2247 
2248     if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() &&
2249         getTargetLowering()->isOffsetFoldingLegal(GANode)) {
2250       LLVM_DEBUG(dbgs() << "--> Combining GA and offset ("
2251                         << Offset->getSExtValue() << "): ");
2252       LLVM_DEBUG(GANode->dump(CurDAG));
2253 
2254       SDValue NewTGA =
2255         CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value),
2256             GANode->getValueType(0),
2257             GANode->getOffset() + (uint64_t)Offset->getSExtValue());
2258       GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value),
2259           GA.Value.getValueType(), NewTGA);
2260       GA.Weight += Leaves.top().Weight;
2261 
2262       NodeHeights[GA.Value] = getHeight(GA.Value.getNode());
2263       CombinedGA = true;
2264 
2265       Leaves.pop(); // Remove the offset constant from the queue
2266     }
2267   }
2268 
2269   if ((RebalanceOnlyForOptimizations && !CanFactorize && !CombinedGA) ||
2270       (RebalanceOnlyImbalancedTrees && !Imbalanced)) {
2271     RootWeights[N] = CurrentWeight;
2272     RootHeights[N] = NodeHeights[SDValue(N, 0)];
2273 
2274     return SDValue(N, 0);
2275   }
2276 
2277   // Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5))
2278   if (NOpcode == ISD::ADD && GA.Value.getNode()) {
2279     WeightedLeaf SHL = Leaves.findSHL(31);
2280     if (SHL.Value.getNode()) {
2281       int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1;
2282       GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value),
2283                                  GA.Value.getValueType(),
2284                                  GA.Value, SHL.Value);
2285       GA.Weight = SHL.Weight; // Specifically ignore the GA weight here
2286       NodeHeights[GA.Value] = Height;
2287     }
2288   }
2289 
2290   if (GA.Value.getNode())
2291     Leaves.push(GA);
2292 
2293   // If this is the top level and we haven't factored out a shift, we should try
2294   // to move a constant to the bottom to match addressing modes like memw(rX+C)
2295   if (TopLevel && !CanFactorize && Leaves.hasConst()) {
2296     LLVM_DEBUG(dbgs() << "--> Pushing constant to tip of tree.");
2297     Leaves.pushToBottom(Leaves.pop());
2298   }
2299 
2300   const DataLayout &DL = CurDAG->getDataLayout();
2301   const TargetLowering &TLI = *getTargetLowering();
2302 
2303   // Rebuild the tree using Huffman's algorithm
2304   while (Leaves.size() > 1) {
2305     WeightedLeaf L0 = Leaves.pop();
2306 
2307     // See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)),
2308     // otherwise just get the next leaf
2309     WeightedLeaf L1 = Leaves.findMULbyConst();
2310     if (!L1.Value.getNode())
2311       L1 = Leaves.pop();
2312 
2313     assert(L0.Weight <= L1.Weight && "Priority queue is broken!");
2314 
2315     SDValue V0 = L0.Value;
2316     int V0Weight = L0.Weight;
2317     SDValue V1 = L1.Value;
2318     int V1Weight = L1.Weight;
2319 
2320     // Make sure that none of these nodes have been RAUW'd
2321     if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) ||
2322         (RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) {
2323       LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
2324       return balanceSubTree(N, TopLevel);
2325     }
2326 
2327     ConstantSDNode *V0C = dyn_cast<ConstantSDNode>(V0);
2328     ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(V1);
2329     EVT VT = N->getValueType(0);
2330     SDValue NewNode;
2331 
2332     if (V0C && !V1C) {
2333       std::swap(V0, V1);
2334       std::swap(V0C, V1C);
2335     }
2336 
2337     // Calculate height of this node
2338     assert(NodeHeights.count(V0) && NodeHeights.count(V1) &&
2339            "Children must have been visited before re-combining them!");
2340     int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1;
2341 
2342     // Rebuild this node (and restore SHL from MUL if needed)
2343     if (V1C && NOpcode == ISD::MUL && V1C->getAPIntValue().isPowerOf2())
2344       NewNode = CurDAG->getNode(
2345           ISD::SHL, SDLoc(V0), VT, V0,
2346           CurDAG->getConstant(
2347               V1C->getAPIntValue().logBase2(), SDLoc(N),
2348               TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
2349     else
2350       NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1);
2351 
2352     NodeHeights[NewNode] = Height;
2353 
2354     int Weight = V0Weight + V1Weight;
2355     Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder));
2356 
2357     LLVM_DEBUG(dbgs() << "--> Built new node (Weight=" << Weight
2358                       << ",Height=" << Height << "):\n");
2359     LLVM_DEBUG(NewNode.dump());
2360   }
2361 
2362   assert(Leaves.size() == 1);
2363   SDValue NewRoot = Leaves.top().Value;
2364 
2365   assert(NodeHeights.count(NewRoot));
2366   int Height = NodeHeights[NewRoot];
2367 
2368   // Restore SHL if we earlier converted it to a MUL
2369   if (NewRoot.getOpcode() == ISD::MUL) {
2370     ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1));
2371     if (V1C && V1C->getAPIntValue().isPowerOf2()) {
2372       EVT VT = NewRoot.getValueType();
2373       SDValue V0 = NewRoot.getOperand(0);
2374       NewRoot = CurDAG->getNode(
2375           ISD::SHL, SDLoc(NewRoot), VT, V0,
2376           CurDAG->getConstant(
2377               V1C->getAPIntValue().logBase2(), SDLoc(NewRoot),
2378               TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
2379     }
2380   }
2381 
2382   if (N != NewRoot.getNode()) {
2383     LLVM_DEBUG(dbgs() << "--> Root is now: ");
2384     LLVM_DEBUG(NewRoot.dump());
2385 
2386     // Replace all uses of old root by new root
2387     CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode());
2388     // Mark that we have RAUW'd N
2389     RootWeights[N] = -2;
2390   } else {
2391     LLVM_DEBUG(dbgs() << "--> Root unchanged.\n");
2392   }
2393 
2394   RootWeights[NewRoot.getNode()] = Leaves.top().Weight;
2395   RootHeights[NewRoot.getNode()] = Height;
2396 
2397   return NewRoot;
2398 }
2399 
2400 void HexagonDAGToDAGISel::rebalanceAddressTrees() {
2401   for (SDNode &Node : llvm::make_early_inc_range(CurDAG->allnodes())) {
2402     SDNode *N = &Node;
2403     if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE)
2404       continue;
2405 
2406     SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr();
2407     if (BasePtr.getOpcode() != ISD::ADD)
2408       continue;
2409 
2410     // We've already processed this node
2411     if (RootWeights.count(BasePtr.getNode()))
2412       continue;
2413 
2414     LLVM_DEBUG(dbgs() << "** Rebalancing address calculation in node: ");
2415     LLVM_DEBUG(N->dump(CurDAG));
2416 
2417     // FindRoots
2418     SmallVector<SDNode *, 4> Worklist;
2419 
2420     Worklist.push_back(BasePtr.getOperand(0).getNode());
2421     Worklist.push_back(BasePtr.getOperand(1).getNode());
2422 
2423     while (!Worklist.empty()) {
2424       SDNode *N = Worklist.pop_back_val();
2425       unsigned Opcode = N->getOpcode();
2426 
2427       if (!isOpcodeHandled(N))
2428         continue;
2429 
2430       Worklist.push_back(N->getOperand(0).getNode());
2431       Worklist.push_back(N->getOperand(1).getNode());
2432 
2433       // Not a root if it has only one use and same opcode as its parent
2434       if (N->hasOneUse() && Opcode == N->use_begin()->getOpcode())
2435         continue;
2436 
2437       // This root node has already been processed
2438       if (RootWeights.count(N))
2439         continue;
2440 
2441       RootWeights[N] = -1;
2442     }
2443 
2444     // Balance node itself
2445     RootWeights[BasePtr.getNode()] = -1;
2446     SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true);
2447 
2448     if (N->getOpcode() == ISD::LOAD)
2449       N = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
2450             NewBasePtr, N->getOperand(2));
2451     else
2452       N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
2453             NewBasePtr, N->getOperand(3));
2454 
2455     LLVM_DEBUG(dbgs() << "--> Final node: ");
2456     LLVM_DEBUG(N->dump(CurDAG));
2457   }
2458 
2459   CurDAG->RemoveDeadNodes();
2460   GAUsesInFunction.clear();
2461   RootHeights.clear();
2462   RootWeights.clear();
2463 }
2464