xref: /freebsd/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the Hexagon target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "HexagonISelDAGToDAG.h"
14 #include "Hexagon.h"
15 #include "HexagonISelLowering.h"
16 #include "HexagonMachineFunctionInfo.h"
17 #include "HexagonTargetMachine.h"
18 #include "llvm/CodeGen/FunctionLoweringInfo.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/SelectionDAGISel.h"
21 #include "llvm/IR/Intrinsics.h"
22 #include "llvm/IR/IntrinsicsHexagon.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/Debug.h"
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "hexagon-isel"
28 #define PASS_NAME "Hexagon DAG->DAG Pattern Instruction Selection"
29 
30 static
31 cl::opt<bool>
32 EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true),
33   cl::desc("Rebalance address calculation trees to improve "
34           "instruction selection"));
35 
36 // Rebalance only if this allows e.g. combining a GA with an offset or
37 // factoring out a shift.
38 static
39 cl::opt<bool>
40 RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false),
41   cl::desc("Rebalance address tree only if this allows optimizations"));
42 
43 static
44 cl::opt<bool>
45 RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden,
46   cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"));
47 
48 static cl::opt<bool> CheckSingleUse("hexagon-isel-su", cl::Hidden,
49   cl::init(true), cl::desc("Enable checking of SDNode's single-use status"));
50 
51 //===----------------------------------------------------------------------===//
52 // Instruction Selector Implementation
53 //===----------------------------------------------------------------------===//
54 
55 #define GET_DAGISEL_BODY HexagonDAGToDAGISel
56 #include "HexagonGenDAGISel.inc"
57 
58 namespace llvm {
59 /// createHexagonISelDag - This pass converts a legalized DAG into a
60 /// Hexagon-specific DAG, ready for instruction scheduling.
createHexagonISelDag(HexagonTargetMachine & TM,CodeGenOptLevel OptLevel)61 FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
62                                    CodeGenOptLevel OptLevel) {
63   return new HexagonDAGToDAGISelLegacy(TM, OptLevel);
64 }
65 }
66 
HexagonDAGToDAGISelLegacy(HexagonTargetMachine & tm,CodeGenOptLevel OptLevel)67 HexagonDAGToDAGISelLegacy::HexagonDAGToDAGISelLegacy(HexagonTargetMachine &tm,
68                                                      CodeGenOptLevel OptLevel)
69     : SelectionDAGISelLegacy(
70           ID, std::make_unique<HexagonDAGToDAGISel>(tm, OptLevel)) {}
71 
72 char HexagonDAGToDAGISelLegacy::ID = 0;
73 
INITIALIZE_PASS(HexagonDAGToDAGISelLegacy,DEBUG_TYPE,PASS_NAME,false,false)74 INITIALIZE_PASS(HexagonDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
75 
76 void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
77   SDValue Chain = LD->getChain();
78   SDValue Base = LD->getBasePtr();
79   SDValue Offset = LD->getOffset();
80   int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
81   EVT LoadedVT = LD->getMemoryVT();
82   unsigned Opcode = 0;
83 
84   // Check for zero extended loads. Treat any-extend loads as zero extended
85   // loads.
86   ISD::LoadExtType ExtType = LD->getExtensionType();
87   bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
88   bool IsValidInc = HII->isValidAutoIncImm(LoadedVT, Inc);
89 
90   assert(LoadedVT.isSimple());
91   switch (LoadedVT.getSimpleVT().SimpleTy) {
92   case MVT::i8:
93     if (IsZeroExt)
94       Opcode = IsValidInc ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrub_io;
95     else
96       Opcode = IsValidInc ? Hexagon::L2_loadrb_pi : Hexagon::L2_loadrb_io;
97     break;
98   case MVT::i16:
99     if (IsZeroExt)
100       Opcode = IsValidInc ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadruh_io;
101     else
102       Opcode = IsValidInc ? Hexagon::L2_loadrh_pi : Hexagon::L2_loadrh_io;
103     break;
104   case MVT::i32:
105   case MVT::f32:
106   case MVT::v2i16:
107   case MVT::v4i8:
108     Opcode = IsValidInc ? Hexagon::L2_loadri_pi : Hexagon::L2_loadri_io;
109     break;
110   case MVT::i64:
111   case MVT::f64:
112   case MVT::v2i32:
113   case MVT::v4i16:
114   case MVT::v8i8:
115     Opcode = IsValidInc ? Hexagon::L2_loadrd_pi : Hexagon::L2_loadrd_io;
116     break;
117   case MVT::v64i8:
118   case MVT::v32i16:
119   case MVT::v16i32:
120   case MVT::v8i64:
121   case MVT::v128i8:
122   case MVT::v64i16:
123   case MVT::v32i32:
124   case MVT::v16i64:
125     if (isAlignedMemNode(LD)) {
126       if (LD->isNonTemporal())
127         Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi : Hexagon::V6_vL32b_nt_ai;
128       else
129         Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai;
130     } else {
131       Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi : Hexagon::V6_vL32Ub_ai;
132     }
133     break;
134   default:
135     llvm_unreachable("Unexpected memory type in indexed load");
136   }
137 
138   SDValue IncV = CurDAG->getSignedTargetConstant(Inc, dl, MVT::i32);
139   MachineMemOperand *MemOp = LD->getMemOperand();
140 
141   auto getExt64 = [this,ExtType] (MachineSDNode *N, const SDLoc &dl)
142         -> MachineSDNode* {
143     if (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD) {
144       SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
145       return CurDAG->getMachineNode(Hexagon::A4_combineir, dl, MVT::i64,
146                                     Zero, SDValue(N, 0));
147     }
148     if (ExtType == ISD::SEXTLOAD)
149       return CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
150                                     SDValue(N, 0));
151     return N;
152   };
153 
154   //                  Loaded value   Next address   Chain
155   SDValue From[3] = { SDValue(LD,0), SDValue(LD,1), SDValue(LD,2) };
156   SDValue To[3];
157 
158   EVT ValueVT = LD->getValueType(0);
159   if (ValueVT == MVT::i64 && ExtType != ISD::NON_EXTLOAD) {
160     // A load extending to i64 will actually produce i32, which will then
161     // need to be extended to i64.
162     assert(LoadedVT.getSizeInBits() <= 32);
163     ValueVT = MVT::i32;
164   }
165 
166   if (IsValidInc) {
167     MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT,
168                                               MVT::i32, MVT::Other, Base,
169                                               IncV, Chain);
170     CurDAG->setNodeMemRefs(L, {MemOp});
171     To[1] = SDValue(L, 1); // Next address.
172     To[2] = SDValue(L, 2); // Chain.
173     // Handle special case for extension to i64.
174     if (LD->getValueType(0) == MVT::i64)
175       L = getExt64(L, dl);
176     To[0] = SDValue(L, 0); // Loaded (extended) value.
177   } else {
178     SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
179     MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, MVT::Other,
180                                               Base, Zero, Chain);
181     CurDAG->setNodeMemRefs(L, {MemOp});
182     To[2] = SDValue(L, 1); // Chain.
183     MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
184                                               Base, IncV);
185     To[1] = SDValue(A, 0); // Next address.
186     // Handle special case for extension to i64.
187     if (LD->getValueType(0) == MVT::i64)
188       L = getExt64(L, dl);
189     To[0] = SDValue(L, 0); // Loaded (extended) value.
190   }
191   ReplaceUses(From, To, 3);
192   CurDAG->RemoveDeadNode(LD);
193 }
194 
LoadInstrForLoadIntrinsic(SDNode * IntN)195 MachineSDNode *HexagonDAGToDAGISel::LoadInstrForLoadIntrinsic(SDNode *IntN) {
196   if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
197     return nullptr;
198 
199   SDLoc dl(IntN);
200   unsigned IntNo = IntN->getConstantOperandVal(1);
201 
202   static std::map<unsigned,unsigned> LoadPciMap = {
203     { Intrinsic::hexagon_circ_ldb,  Hexagon::L2_loadrb_pci  },
204     { Intrinsic::hexagon_circ_ldub, Hexagon::L2_loadrub_pci },
205     { Intrinsic::hexagon_circ_ldh,  Hexagon::L2_loadrh_pci  },
206     { Intrinsic::hexagon_circ_lduh, Hexagon::L2_loadruh_pci },
207     { Intrinsic::hexagon_circ_ldw,  Hexagon::L2_loadri_pci  },
208     { Intrinsic::hexagon_circ_ldd,  Hexagon::L2_loadrd_pci  },
209   };
210   auto FLC = LoadPciMap.find(IntNo);
211   if (FLC != LoadPciMap.end()) {
212     EVT ValTy = (IntNo == Intrinsic::hexagon_circ_ldd) ? MVT::i64 : MVT::i32;
213     EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
214     // Operands: { Base, Increment, Modifier, Chain }
215     auto Inc = cast<ConstantSDNode>(IntN->getOperand(5));
216     SDValue I =
217         CurDAG->getSignedTargetConstant(Inc->getSExtValue(), dl, MVT::i32);
218     MachineSDNode *Res = CurDAG->getMachineNode(FLC->second, dl, RTys,
219           { IntN->getOperand(2), I, IntN->getOperand(4),
220             IntN->getOperand(0) });
221     return Res;
222   }
223 
224   return nullptr;
225 }
226 
StoreInstrForLoadIntrinsic(MachineSDNode * LoadN,SDNode * IntN)227 SDNode *HexagonDAGToDAGISel::StoreInstrForLoadIntrinsic(MachineSDNode *LoadN,
228       SDNode *IntN) {
229   // The "LoadN" is just a machine load instruction. The intrinsic also
230   // involves storing it. Generate an appropriate store to the location
231   // given in the intrinsic's operand(3).
232   uint64_t F = HII->get(LoadN->getMachineOpcode()).TSFlags;
233   unsigned SizeBits = (F >> HexagonII::MemAccessSizePos) &
234                       HexagonII::MemAccesSizeMask;
235   unsigned Size = 1U << (SizeBits-1);
236 
237   SDLoc dl(IntN);
238   MachinePointerInfo PI;
239   SDValue TS;
240   SDValue Loc = IntN->getOperand(3);
241 
242   if (Size >= 4)
243     TS = CurDAG->getStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, PI,
244                           Align(Size));
245   else
246     TS = CurDAG->getTruncStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc,
247                                PI, MVT::getIntegerVT(Size * 8), Align(Size));
248 
249   SDNode *StoreN;
250   {
251     HandleSDNode Handle(TS);
252     SelectStore(TS.getNode());
253     StoreN = Handle.getValue().getNode();
254   }
255 
256   // Load's results are { Loaded value, Updated pointer, Chain }
257   ReplaceUses(SDValue(IntN, 0), SDValue(LoadN, 1));
258   ReplaceUses(SDValue(IntN, 1), SDValue(StoreN, 0));
259   return StoreN;
260 }
261 
tryLoadOfLoadIntrinsic(LoadSDNode * N)262 bool HexagonDAGToDAGISel::tryLoadOfLoadIntrinsic(LoadSDNode *N) {
263   // The intrinsics for load circ/brev perform two operations:
264   // 1. Load a value V from the specified location, using the addressing
265   //    mode corresponding to the intrinsic.
266   // 2. Store V into a specified location. This location is typically a
267   //    local, temporary object.
268   // In many cases, the program using these intrinsics will immediately
269   // load V again from the local object. In those cases, when certain
270   // conditions are met, the last load can be removed.
271   // This function identifies and optimizes this pattern. If the pattern
272   // cannot be optimized, it returns nullptr, which will cause the load
273   // to be selected separately from the intrinsic (which will be handled
274   // in SelectIntrinsicWChain).
275 
276   SDValue Ch = N->getOperand(0);
277   SDValue Loc = N->getOperand(1);
278 
279   // Assume that the load and the intrinsic are connected directly with a
280   // chain:
281   //   t1: i32,ch = int.load ..., ..., ..., Loc, ...    // <-- C
282   //   t2: i32,ch = load t1:1, Loc, ...
283   SDNode *C = Ch.getNode();
284 
285   if (C->getOpcode() != ISD::INTRINSIC_W_CHAIN)
286     return false;
287 
288   // The second load can only be eliminated if its extension type matches
289   // that of the load instruction corresponding to the intrinsic. The user
290   // can provide an address of an unsigned variable to store the result of
291   // a sign-extending intrinsic into (or the other way around).
292   ISD::LoadExtType IntExt;
293   switch (C->getConstantOperandVal(1)) {
294   case Intrinsic::hexagon_circ_ldub:
295   case Intrinsic::hexagon_circ_lduh:
296     IntExt = ISD::ZEXTLOAD;
297     break;
298   case Intrinsic::hexagon_circ_ldw:
299   case Intrinsic::hexagon_circ_ldd:
300     IntExt = ISD::NON_EXTLOAD;
301     break;
302   default:
303     IntExt = ISD::SEXTLOAD;
304     break;
305   }
306   if (N->getExtensionType() != IntExt)
307     return false;
308 
309   // Make sure the target location for the loaded value in the load intrinsic
310   // is the location from which LD (or N) is loading.
311   if (C->getNumOperands() < 4 || Loc.getNode() != C->getOperand(3).getNode())
312     return false;
313 
314   if (MachineSDNode *L = LoadInstrForLoadIntrinsic(C)) {
315     SDNode *S = StoreInstrForLoadIntrinsic(L, C);
316     SDValue F[] = { SDValue(N,0), SDValue(N,1), SDValue(C,0), SDValue(C,1) };
317     SDValue T[] = { SDValue(L,0), SDValue(S,0), SDValue(L,1), SDValue(S,0) };
318     ReplaceUses(F, T, std::size(T));
319     // This transformation will leave the intrinsic dead. If it remains in
320     // the DAG, the selection code will see it again, but without the load,
321     // and it will generate a store that is normally required for it.
322     CurDAG->RemoveDeadNode(C);
323     return true;
324   }
325   return false;
326 }
327 
328 // Convert the bit-reverse load intrinsic to appropriate target instruction.
SelectBrevLdIntrinsic(SDNode * IntN)329 bool HexagonDAGToDAGISel::SelectBrevLdIntrinsic(SDNode *IntN) {
330   if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
331     return false;
332 
333   const SDLoc &dl(IntN);
334   unsigned IntNo = IntN->getConstantOperandVal(1);
335 
336   static const std::map<unsigned, unsigned> LoadBrevMap = {
337     { Intrinsic::hexagon_L2_loadrb_pbr, Hexagon::L2_loadrb_pbr },
338     { Intrinsic::hexagon_L2_loadrub_pbr, Hexagon::L2_loadrub_pbr },
339     { Intrinsic::hexagon_L2_loadrh_pbr, Hexagon::L2_loadrh_pbr },
340     { Intrinsic::hexagon_L2_loadruh_pbr, Hexagon::L2_loadruh_pbr },
341     { Intrinsic::hexagon_L2_loadri_pbr, Hexagon::L2_loadri_pbr },
342     { Intrinsic::hexagon_L2_loadrd_pbr, Hexagon::L2_loadrd_pbr }
343   };
344   auto FLI = LoadBrevMap.find(IntNo);
345   if (FLI != LoadBrevMap.end()) {
346     EVT ValTy =
347         (IntNo == Intrinsic::hexagon_L2_loadrd_pbr) ? MVT::i64 : MVT::i32;
348     EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
349     // Operands of Intrinsic: {chain, enum ID of intrinsic, baseptr,
350     // modifier}.
351     // Operands of target instruction: { Base, Modifier, Chain }.
352     MachineSDNode *Res = CurDAG->getMachineNode(
353         FLI->second, dl, RTys,
354         {IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(0)});
355 
356     MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(IntN)->getMemOperand();
357     CurDAG->setNodeMemRefs(Res, {MemOp});
358 
359     ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
360     ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
361     ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
362     CurDAG->RemoveDeadNode(IntN);
363     return true;
364   }
365   return false;
366 }
367 
368 /// Generate a machine instruction node for the new circular buffer intrinsics.
369 /// The new versions use a CSx register instead of the K field.
SelectNewCircIntrinsic(SDNode * IntN)370 bool HexagonDAGToDAGISel::SelectNewCircIntrinsic(SDNode *IntN) {
371   if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
372     return false;
373 
374   SDLoc DL(IntN);
375   unsigned IntNo = IntN->getConstantOperandVal(1);
376   SmallVector<SDValue, 7> Ops;
377 
378   static std::map<unsigned,unsigned> LoadNPcMap = {
379     { Intrinsic::hexagon_L2_loadrub_pci, Hexagon::PS_loadrub_pci },
380     { Intrinsic::hexagon_L2_loadrb_pci, Hexagon::PS_loadrb_pci },
381     { Intrinsic::hexagon_L2_loadruh_pci, Hexagon::PS_loadruh_pci },
382     { Intrinsic::hexagon_L2_loadrh_pci, Hexagon::PS_loadrh_pci },
383     { Intrinsic::hexagon_L2_loadri_pci, Hexagon::PS_loadri_pci },
384     { Intrinsic::hexagon_L2_loadrd_pci, Hexagon::PS_loadrd_pci },
385     { Intrinsic::hexagon_L2_loadrub_pcr, Hexagon::PS_loadrub_pcr },
386     { Intrinsic::hexagon_L2_loadrb_pcr, Hexagon::PS_loadrb_pcr },
387     { Intrinsic::hexagon_L2_loadruh_pcr, Hexagon::PS_loadruh_pcr },
388     { Intrinsic::hexagon_L2_loadrh_pcr, Hexagon::PS_loadrh_pcr },
389     { Intrinsic::hexagon_L2_loadri_pcr, Hexagon::PS_loadri_pcr },
390     { Intrinsic::hexagon_L2_loadrd_pcr, Hexagon::PS_loadrd_pcr }
391   };
392   auto FLI = LoadNPcMap.find (IntNo);
393   if (FLI != LoadNPcMap.end()) {
394     EVT ValTy = MVT::i32;
395     if (IntNo == Intrinsic::hexagon_L2_loadrd_pci ||
396         IntNo == Intrinsic::hexagon_L2_loadrd_pcr)
397       ValTy = MVT::i64;
398     EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
399     // Handle load.*_pci case which has 6 operands.
400     if (IntN->getNumOperands() == 6) {
401       auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
402       SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
403       // Operands: { Base, Increment, Modifier, Start, Chain }.
404       Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
405               IntN->getOperand(0) };
406     } else
407       // Handle load.*_pcr case which has 5 operands.
408       // Operands: { Base, Modifier, Start, Chain }.
409       Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
410               IntN->getOperand(0) };
411     MachineSDNode *Res = CurDAG->getMachineNode(FLI->second, DL, RTys, Ops);
412     ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
413     ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
414     ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
415     CurDAG->RemoveDeadNode(IntN);
416     return true;
417   }
418 
419   static std::map<unsigned,unsigned> StoreNPcMap = {
420     { Intrinsic::hexagon_S2_storerb_pci, Hexagon::PS_storerb_pci },
421     { Intrinsic::hexagon_S2_storerh_pci, Hexagon::PS_storerh_pci },
422     { Intrinsic::hexagon_S2_storerf_pci, Hexagon::PS_storerf_pci },
423     { Intrinsic::hexagon_S2_storeri_pci, Hexagon::PS_storeri_pci },
424     { Intrinsic::hexagon_S2_storerd_pci, Hexagon::PS_storerd_pci },
425     { Intrinsic::hexagon_S2_storerb_pcr, Hexagon::PS_storerb_pcr },
426     { Intrinsic::hexagon_S2_storerh_pcr, Hexagon::PS_storerh_pcr },
427     { Intrinsic::hexagon_S2_storerf_pcr, Hexagon::PS_storerf_pcr },
428     { Intrinsic::hexagon_S2_storeri_pcr, Hexagon::PS_storeri_pcr },
429     { Intrinsic::hexagon_S2_storerd_pcr, Hexagon::PS_storerd_pcr }
430   };
431   auto FSI = StoreNPcMap.find (IntNo);
432   if (FSI != StoreNPcMap.end()) {
433     EVT RTys[] = { MVT::i32, MVT::Other };
434     // Handle store.*_pci case which has 7 operands.
435     if (IntN->getNumOperands() == 7) {
436       auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
437       SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
438       // Operands: { Base, Increment, Modifier, Value, Start, Chain }.
439       Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
440               IntN->getOperand(6), IntN->getOperand(0) };
441     } else
442       // Handle store.*_pcr case which has 6 operands.
443       // Operands: { Base, Modifier, Value, Start, Chain }.
444       Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
445               IntN->getOperand(5), IntN->getOperand(0) };
446     MachineSDNode *Res = CurDAG->getMachineNode(FSI->second, DL, RTys, Ops);
447     ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
448     ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
449     CurDAG->RemoveDeadNode(IntN);
450     return true;
451   }
452 
453   return false;
454 }
455 
SelectLoad(SDNode * N)456 void HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
457   SDLoc dl(N);
458   LoadSDNode *LD = cast<LoadSDNode>(N);
459 
460   // Handle indexed loads.
461   ISD::MemIndexedMode AM = LD->getAddressingMode();
462   if (AM != ISD::UNINDEXED) {
463     SelectIndexedLoad(LD, dl);
464     return;
465   }
466 
467   // Handle patterns using circ/brev load intrinsics.
468   if (tryLoadOfLoadIntrinsic(LD))
469     return;
470 
471   SelectCode(LD);
472 }
473 
SelectIndexedStore(StoreSDNode * ST,const SDLoc & dl)474 void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) {
475   SDValue Chain = ST->getChain();
476   SDValue Base = ST->getBasePtr();
477   SDValue Offset = ST->getOffset();
478   SDValue Value = ST->getValue();
479   // Get the constant value.
480   int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
481   EVT StoredVT = ST->getMemoryVT();
482   EVT ValueVT = Value.getValueType();
483 
484   bool IsValidInc = HII->isValidAutoIncImm(StoredVT, Inc);
485   unsigned Opcode = 0;
486 
487   assert(StoredVT.isSimple());
488   switch (StoredVT.getSimpleVT().SimpleTy) {
489   case MVT::i8:
490     Opcode = IsValidInc ? Hexagon::S2_storerb_pi : Hexagon::S2_storerb_io;
491     break;
492   case MVT::i16:
493     Opcode = IsValidInc ? Hexagon::S2_storerh_pi : Hexagon::S2_storerh_io;
494     break;
495   case MVT::i32:
496   case MVT::f32:
497   case MVT::v2i16:
498   case MVT::v4i8:
499     Opcode = IsValidInc ? Hexagon::S2_storeri_pi : Hexagon::S2_storeri_io;
500     break;
501   case MVT::i64:
502   case MVT::f64:
503   case MVT::v2i32:
504   case MVT::v4i16:
505   case MVT::v8i8:
506     Opcode = IsValidInc ? Hexagon::S2_storerd_pi : Hexagon::S2_storerd_io;
507     break;
508   case MVT::v64i8:
509   case MVT::v32i16:
510   case MVT::v16i32:
511   case MVT::v8i64:
512   case MVT::v128i8:
513   case MVT::v64i16:
514   case MVT::v32i32:
515   case MVT::v16i64:
516     if (isAlignedMemNode(ST)) {
517       if (ST->isNonTemporal())
518         Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi : Hexagon::V6_vS32b_nt_ai;
519       else
520         Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai;
521     } else {
522       Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi : Hexagon::V6_vS32Ub_ai;
523     }
524     break;
525   default:
526     llvm_unreachable("Unexpected memory type in indexed store");
527   }
528 
529   if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
530     assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
531     Value = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo,
532                                            dl, MVT::i32, Value);
533   }
534 
535   SDValue IncV = CurDAG->getSignedTargetConstant(Inc, dl, MVT::i32);
536   MachineMemOperand *MemOp = ST->getMemOperand();
537 
538   //                  Next address   Chain
539   SDValue From[2] = { SDValue(ST,0), SDValue(ST,1) };
540   SDValue To[2];
541 
542   if (IsValidInc) {
543     // Build post increment store.
544     SDValue Ops[] = { Base, IncV, Value, Chain };
545     MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other,
546                                               Ops);
547     CurDAG->setNodeMemRefs(S, {MemOp});
548     To[0] = SDValue(S, 0);
549     To[1] = SDValue(S, 1);
550   } else {
551     SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
552     SDValue Ops[] = { Base, Zero, Value, Chain };
553     MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
554     CurDAG->setNodeMemRefs(S, {MemOp});
555     To[1] = SDValue(S, 0);
556     MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
557                                               Base, IncV);
558     To[0] = SDValue(A, 0);
559   }
560 
561   ReplaceUses(From, To, 2);
562   CurDAG->RemoveDeadNode(ST);
563 }
564 
SelectStore(SDNode * N)565 void HexagonDAGToDAGISel::SelectStore(SDNode *N) {
566   SDLoc dl(N);
567   StoreSDNode *ST = cast<StoreSDNode>(N);
568 
569   // Handle indexed stores.
570   ISD::MemIndexedMode AM = ST->getAddressingMode();
571   if (AM != ISD::UNINDEXED) {
572     SelectIndexedStore(ST, dl);
573     return;
574   }
575 
576   SelectCode(ST);
577 }
578 
SelectSHL(SDNode * N)579 void HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
580   SDLoc dl(N);
581   SDValue Shl_0 = N->getOperand(0);
582   SDValue Shl_1 = N->getOperand(1);
583 
584   auto Default = [this,N] () -> void { SelectCode(N); };
585 
586   if (N->getValueType(0) != MVT::i32 || Shl_1.getOpcode() != ISD::Constant)
587     return Default();
588 
589   // RHS is const.
590   int32_t ShlConst = cast<ConstantSDNode>(Shl_1)->getSExtValue();
591 
592   if (Shl_0.getOpcode() == ISD::MUL) {
593     SDValue Mul_0 = Shl_0.getOperand(0); // Val
594     SDValue Mul_1 = Shl_0.getOperand(1); // Const
595     // RHS of mul is const.
596     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mul_1)) {
597       int32_t ValConst = C->getSExtValue() << ShlConst;
598       if (isInt<9>(ValConst)) {
599         SDValue Val = CurDAG->getTargetConstant(ValConst, dl, MVT::i32);
600         SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
601                                                 MVT::i32, Mul_0, Val);
602         ReplaceNode(N, Result);
603         return;
604       }
605     }
606     return Default();
607   }
608 
609   if (Shl_0.getOpcode() == ISD::SUB) {
610     SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
611     SDValue Sub_1 = Shl_0.getOperand(1); // Val
612     if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Sub_0)) {
613       if (C1->getSExtValue() != 0 || Sub_1.getOpcode() != ISD::SHL)
614         return Default();
615       SDValue Shl2_0 = Sub_1.getOperand(0); // Val
616       SDValue Shl2_1 = Sub_1.getOperand(1); // Const
617       if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(Shl2_1)) {
618         int32_t ValConst = 1 << (ShlConst + C2->getSExtValue());
619         if (isInt<9>(-ValConst)) {
620           SDValue Val =
621               CurDAG->getSignedTargetConstant(-ValConst, dl, MVT::i32);
622           SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
623                                                   MVT::i32, Shl2_0, Val);
624           ReplaceNode(N, Result);
625           return;
626         }
627       }
628     }
629   }
630 
631   return Default();
632 }
633 
634 //
635 // Handling intrinsics for circular load and bitreverse load.
636 //
SelectIntrinsicWChain(SDNode * N)637 void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
638   if (MachineSDNode *L = LoadInstrForLoadIntrinsic(N)) {
639     StoreInstrForLoadIntrinsic(L, N);
640     CurDAG->RemoveDeadNode(N);
641     return;
642   }
643 
644   // Handle bit-reverse load intrinsics.
645   if (SelectBrevLdIntrinsic(N))
646     return;
647 
648   if (SelectNewCircIntrinsic(N))
649     return;
650 
651   unsigned IntNo = N->getConstantOperandVal(1);
652   if (IntNo == Intrinsic::hexagon_V6_vgathermw ||
653       IntNo == Intrinsic::hexagon_V6_vgathermw_128B ||
654       IntNo == Intrinsic::hexagon_V6_vgathermh ||
655       IntNo == Intrinsic::hexagon_V6_vgathermh_128B ||
656       IntNo == Intrinsic::hexagon_V6_vgathermhw ||
657       IntNo == Intrinsic::hexagon_V6_vgathermhw_128B) {
658     SelectV65Gather(N);
659     return;
660   }
661   if (IntNo == Intrinsic::hexagon_V6_vgathermwq ||
662       IntNo == Intrinsic::hexagon_V6_vgathermwq_128B ||
663       IntNo == Intrinsic::hexagon_V6_vgathermhq ||
664       IntNo == Intrinsic::hexagon_V6_vgathermhq_128B ||
665       IntNo == Intrinsic::hexagon_V6_vgathermhwq ||
666       IntNo == Intrinsic::hexagon_V6_vgathermhwq_128B) {
667     SelectV65GatherPred(N);
668     return;
669   }
670 
671   SelectCode(N);
672 }
673 
SelectIntrinsicWOChain(SDNode * N)674 void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
675   unsigned IID = N->getConstantOperandVal(0);
676   unsigned Bits;
677   switch (IID) {
678   case Intrinsic::hexagon_S2_vsplatrb:
679     Bits = 8;
680     break;
681   case Intrinsic::hexagon_S2_vsplatrh:
682     Bits = 16;
683     break;
684   case Intrinsic::hexagon_V6_vaddcarry:
685   case Intrinsic::hexagon_V6_vaddcarry_128B:
686   case Intrinsic::hexagon_V6_vsubcarry:
687   case Intrinsic::hexagon_V6_vsubcarry_128B:
688     SelectHVXDualOutput(N);
689     return;
690   default:
691     SelectCode(N);
692     return;
693   }
694 
695   SDValue V = N->getOperand(1);
696   SDValue U;
697   // Splat intrinsics.
698   if (keepsLowBits(V, Bits, U)) {
699     SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
700                                 N->getOperand(0), U);
701     ReplaceNode(N, R.getNode());
702     SelectCode(R.getNode());
703     return;
704   }
705   SelectCode(N);
706 }
707 
SelectExtractSubvector(SDNode * N)708 void HexagonDAGToDAGISel::SelectExtractSubvector(SDNode *N) {
709   SDValue Inp = N->getOperand(0);
710   MVT ResTy = N->getValueType(0).getSimpleVT();
711   unsigned Idx = N->getConstantOperandVal(1);
712 
713   [[maybe_unused]] MVT InpTy = Inp.getValueType().getSimpleVT();
714   [[maybe_unused]] unsigned ResLen = ResTy.getVectorNumElements();
715   assert(InpTy.getVectorElementType() == ResTy.getVectorElementType());
716   assert(2 * ResLen == InpTy.getVectorNumElements());
717   assert(ResTy.getSizeInBits() == 32);
718   assert(Idx == 0 || Idx == ResLen);
719 
720   unsigned SubReg = Idx == 0 ? Hexagon::isub_lo : Hexagon::isub_hi;
721   SDValue Ext = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(N), ResTy, Inp);
722 
723   ReplaceNode(N, Ext.getNode());
724 }
725 
726 //
727 // Map floating point constant values.
728 //
SelectConstantFP(SDNode * N)729 void HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) {
730   SDLoc dl(N);
731   auto *CN = cast<ConstantFPSDNode>(N);
732   APInt A = CN->getValueAPF().bitcastToAPInt();
733   if (N->getValueType(0) == MVT::f32) {
734     SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i32);
735     ReplaceNode(N, CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, V));
736     return;
737   }
738   if (N->getValueType(0) == MVT::f64) {
739     SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i64);
740     ReplaceNode(N, CurDAG->getMachineNode(Hexagon::CONST64, dl, MVT::f64, V));
741     return;
742   }
743 
744   SelectCode(N);
745 }
746 
747 //
748 // Map boolean values.
749 //
SelectConstant(SDNode * N)750 void HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
751   if (N->getValueType(0) == MVT::i1) {
752     assert(!(N->getAsZExtVal() >> 1));
753     unsigned Opc = (cast<ConstantSDNode>(N)->getSExtValue() != 0)
754                       ? Hexagon::PS_true
755                       : Hexagon::PS_false;
756     ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i1));
757     return;
758   }
759 
760   SelectCode(N);
761 }
762 
SelectFrameIndex(SDNode * N)763 void HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) {
764   MachineFrameInfo &MFI = MF->getFrameInfo();
765   const HexagonFrameLowering *HFI = HST->getFrameLowering();
766   int FX = cast<FrameIndexSDNode>(N)->getIndex();
767   Align StkA = HFI->getStackAlign();
768   Align MaxA = MFI.getMaxAlign();
769   SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32);
770   SDLoc DL(N);
771   SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
772   SDNode *R = nullptr;
773 
774   // Use PS_fi when:
775   // - the object is fixed, or
776   // - there are no objects with higher-than-default alignment, or
777   // - there are no dynamically allocated objects.
778   // Otherwise, use PS_fia.
779   if (FX < 0 || MaxA <= StkA || !MFI.hasVarSizedObjects()) {
780     R = CurDAG->getMachineNode(Hexagon::PS_fi, DL, MVT::i32, FI, Zero);
781   } else {
782     auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>();
783     Register AR = HMFI.getStackAlignBaseReg();
784     SDValue CH = CurDAG->getEntryNode();
785     SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero };
786     R = CurDAG->getMachineNode(Hexagon::PS_fia, DL, MVT::i32, Ops);
787   }
788 
789   ReplaceNode(N, R);
790 }
791 
SelectAddSubCarry(SDNode * N)792 void HexagonDAGToDAGISel::SelectAddSubCarry(SDNode *N) {
793   unsigned OpcCarry = N->getOpcode() == HexagonISD::ADDC ? Hexagon::A4_addp_c
794                                                          : Hexagon::A4_subp_c;
795   SDNode *C = CurDAG->getMachineNode(OpcCarry, SDLoc(N), N->getVTList(),
796                                      { N->getOperand(0), N->getOperand(1),
797                                        N->getOperand(2) });
798   ReplaceNode(N, C);
799 }
800 
SelectVAlign(SDNode * N)801 void HexagonDAGToDAGISel::SelectVAlign(SDNode *N) {
802   MVT ResTy = N->getValueType(0).getSimpleVT();
803   if (HST->isHVXVectorType(ResTy, true))
804     return SelectHvxVAlign(N);
805 
806   const SDLoc &dl(N);
807   unsigned VecLen = ResTy.getSizeInBits();
808   if (VecLen == 32) {
809     SDValue Ops[] = {
810       CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
811       N->getOperand(0),
812       CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
813       N->getOperand(1),
814       CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
815     };
816     SDNode *R = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
817                                        MVT::i64, Ops);
818 
819     // Shift right by "(Addr & 0x3) * 8" bytes.
820     SDNode *C;
821     SDValue M0 = CurDAG->getTargetConstant(0x18, dl, MVT::i32);
822     SDValue M1 = CurDAG->getTargetConstant(0x03, dl, MVT::i32);
823     if (HST->useCompound()) {
824       C = CurDAG->getMachineNode(Hexagon::S4_andi_asl_ri, dl, MVT::i32,
825                                  M0, N->getOperand(2), M1);
826     } else {
827       SDNode *T = CurDAG->getMachineNode(Hexagon::S2_asl_i_r, dl, MVT::i32,
828                                          N->getOperand(2), M1);
829       C = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32,
830                                  SDValue(T, 0), M0);
831     }
832     SDNode *S = CurDAG->getMachineNode(Hexagon::S2_lsr_r_p, dl, MVT::i64,
833                                        SDValue(R, 0), SDValue(C, 0));
834     SDValue E = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, dl, ResTy,
835                                                SDValue(S, 0));
836     ReplaceNode(N, E.getNode());
837   } else {
838     assert(VecLen == 64);
839     SDNode *Pu = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::v8i1,
840                                         N->getOperand(2));
841     SDNode *VA = CurDAG->getMachineNode(Hexagon::S2_valignrb, dl, ResTy,
842                                         N->getOperand(0), N->getOperand(1),
843                                         SDValue(Pu,0));
844     ReplaceNode(N, VA);
845   }
846 }
847 
SelectVAlignAddr(SDNode * N)848 void HexagonDAGToDAGISel::SelectVAlignAddr(SDNode *N) {
849   const SDLoc &dl(N);
850   SDValue A = N->getOperand(1);
851   int Mask = -cast<ConstantSDNode>(A.getNode())->getSExtValue();
852   assert(isPowerOf2_32(-Mask));
853 
854   SDValue M = CurDAG->getTargetConstant(Mask, dl, MVT::i32);
855   SDNode *AA = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32,
856                                       N->getOperand(0), M);
857   ReplaceNode(N, AA);
858 }
859 
860 // Handle these nodes here to avoid having to write patterns for all
861 // combinations of input/output types. In all cases, the resulting
862 // instruction is the same.
SelectTypecast(SDNode * N)863 void HexagonDAGToDAGISel::SelectTypecast(SDNode *N) {
864   SDValue Op = N->getOperand(0);
865   MVT OpTy = Op.getValueType().getSimpleVT();
866   SDNode *T = CurDAG->MorphNodeTo(N, N->getOpcode(),
867                                   CurDAG->getVTList(OpTy), {Op});
868   ReplaceNode(T, Op.getNode());
869 }
870 
SelectP2D(SDNode * N)871 void HexagonDAGToDAGISel::SelectP2D(SDNode *N) {
872   MVT ResTy = N->getValueType(0).getSimpleVT();
873   SDNode *T = CurDAG->getMachineNode(Hexagon::C2_mask, SDLoc(N), ResTy,
874                                      N->getOperand(0));
875   ReplaceNode(N, T);
876 }
877 
SelectD2P(SDNode * N)878 void HexagonDAGToDAGISel::SelectD2P(SDNode *N) {
879   const SDLoc &dl(N);
880   MVT ResTy = N->getValueType(0).getSimpleVT();
881   SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
882   SDNode *T = CurDAG->getMachineNode(Hexagon::A4_vcmpbgtui, dl, ResTy,
883                                      N->getOperand(0), Zero);
884   ReplaceNode(N, T);
885 }
886 
SelectV2Q(SDNode * N)887 void HexagonDAGToDAGISel::SelectV2Q(SDNode *N) {
888   const SDLoc &dl(N);
889   MVT ResTy = N->getValueType(0).getSimpleVT();
890   // The argument to V2Q should be a single vector.
891   MVT OpTy = N->getOperand(0).getValueType().getSimpleVT(); (void)OpTy;
892   assert(HST->getVectorLength() * 8 == OpTy.getSizeInBits());
893 
894   SDValue C = CurDAG->getSignedTargetConstant(-1, dl, MVT::i32);
895   SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
896   SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandvrt, dl, ResTy,
897                                      N->getOperand(0), SDValue(R,0));
898   ReplaceNode(N, T);
899 }
900 
SelectQ2V(SDNode * N)901 void HexagonDAGToDAGISel::SelectQ2V(SDNode *N) {
902   const SDLoc &dl(N);
903   MVT ResTy = N->getValueType(0).getSimpleVT();
904   // The result of V2Q should be a single vector.
905   assert(HST->getVectorLength() * 8 == ResTy.getSizeInBits());
906 
907   SDValue C = CurDAG->getSignedTargetConstant(-1, dl, MVT::i32);
908   SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
909   SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandqrt, dl, ResTy,
910                                      N->getOperand(0), SDValue(R,0));
911   ReplaceNode(N, T);
912 }
913 
FDiv(SDNode * N)914 void HexagonDAGToDAGISel::FDiv(SDNode *N) {
915   const SDLoc &dl(N);
916   ArrayRef<EVT> ResultType(N->value_begin(), N->value_end());
917   SmallVector<SDValue, 2> Ops;
918   Ops = {N->getOperand(0), N->getOperand(1)};
919   SDVTList VTs;
920   VTs = CurDAG->getVTList(MVT::f32, MVT::f32);
921   SDNode *ResScale = CurDAG->getMachineNode(Hexagon::F2_sfrecipa, dl, VTs, Ops);
922   SDNode *D = CurDAG->getMachineNode(Hexagon::F2_sffixupd, dl, MVT::f32, Ops);
923 
924   SDValue C = CurDAG->getTargetConstant(0x3f800000, dl, MVT::i32);
925   SDNode *constNode =
926       CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, C);
927 
928   SDNode *n = CurDAG->getMachineNode(Hexagon::F2_sffixupn, dl, MVT::f32, Ops);
929   SDNode *Err = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
930                                        SDValue(constNode, 0), SDValue(D, 0),
931                                        SDValue(ResScale, 0));
932   SDNode *NewRec = CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32,
933                                           SDValue(ResScale, 0), SDValue(Err, 0),
934                                           SDValue(ResScale, 0));
935   SDNode *newErr = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
936                                           SDValue(constNode, 0), SDValue(D, 0),
937                                           SDValue(NewRec, 0));
938   SDNode *q = CurDAG->getMachineNode(
939       Hexagon::A2_andir, dl, MVT::f32, SDValue(n, 0),
940       CurDAG->getTargetConstant(0x80000000, dl, MVT::i32));
941   SDNode *NewQ =
942       CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32, SDValue(q, 0),
943                              SDValue(n, 0), SDValue(NewRec, 0));
944   SDNode *NNewRec = CurDAG->getMachineNode(
945       Hexagon::F2_sffma_lib, dl, MVT::f32, SDValue(NewRec, 0),
946       SDValue(newErr, 0), SDValue(NewRec, 0));
947   SDNode *qErr =
948       CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32, SDValue(n, 0),
949                              SDValue(D, 0), SDValue(NewQ, 0));
950   SDNode *NNewQ = CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32,
951                                          SDValue(NewQ, 0), SDValue(qErr, 0),
952                                          SDValue(NNewRec, 0));
953 
954   SDNode *NqErr =
955       CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32, SDValue(n, 0),
956                              SDValue(NNewQ, 0), SDValue(D, 0));
957   std::array<SDValue, 4> temp1 = {SDValue(NNewQ, 0), SDValue(NqErr, 0),
958                                   SDValue(NNewRec, 0), SDValue(ResScale, 1)};
959   ArrayRef<SDValue> OpValue1(temp1);
960   SDNode *FinalNewQ =
961       CurDAG->getMachineNode(Hexagon::F2_sffma_sc, dl, MVT::f32, OpValue1);
962   ReplaceNode(N, FinalNewQ);
963 }
964 
FastFDiv(SDNode * N)965 void HexagonDAGToDAGISel::FastFDiv(SDNode *N) {
966   const SDLoc &dl(N);
967   ArrayRef<EVT> ResultType(N->value_begin(), N->value_end());
968   SmallVector<SDValue, 2> Ops;
969   Ops = {N->getOperand(0), N->getOperand(1)};
970   SDVTList VTs;
971   VTs = CurDAG->getVTList(MVT::f32, MVT::f32);
972   SDNode *ResScale = CurDAG->getMachineNode(Hexagon::F2_sfrecipa, dl, VTs, Ops);
973   SDNode *D = CurDAG->getMachineNode(Hexagon::F2_sffixupd, dl, MVT::f32, Ops);
974 
975   SDValue C = CurDAG->getTargetConstant(0x3f800000, dl, MVT::i32);
976   SDNode *constNode =
977       CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, C);
978 
979   SDNode *n = CurDAG->getMachineNode(Hexagon::F2_sffixupn, dl, MVT::f32, Ops);
980   SDNode *Err = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
981                                        SDValue(constNode, 0), SDValue(D, 0),
982                                        SDValue(ResScale, 0));
983   SDNode *NewRec = CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32,
984                                           SDValue(ResScale, 0), SDValue(Err, 0),
985                                           SDValue(ResScale, 0));
986   SDNode *newErr = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
987                                           SDValue(constNode, 0), SDValue(D, 0),
988                                           SDValue(NewRec, 0));
989 
990   SDNode *NNewRec = CurDAG->getMachineNode(
991       Hexagon::F2_sffma_lib, dl, MVT::f32, SDValue(NewRec, 0),
992       SDValue(newErr, 0), SDValue(NewRec, 0));
993   SDNode *FinalNewQ = CurDAG->getMachineNode(
994       Hexagon::F2_sfmpy, dl, MVT::f32, SDValue(NNewRec, 0), SDValue(n, 0));
995   ReplaceNode(N, FinalNewQ);
996 }
997 
SelectFDiv(SDNode * N)998 void HexagonDAGToDAGISel::SelectFDiv(SDNode *N) {
999   if (N->getFlags().hasAllowReassociation())
1000     FastFDiv(N);
1001   else
1002     FDiv(N);
1003 }
1004 
Select(SDNode * N)1005 void HexagonDAGToDAGISel::Select(SDNode *N) {
1006   if (N->isMachineOpcode())
1007     return N->setNodeId(-1);  // Already selected.
1008 
1009   auto isHvxOp = [this](SDNode *N) {
1010     for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1011       if (HST->isHVXVectorType(N->getValueType(i), true))
1012         return true;
1013     }
1014     for (SDValue I : N->ops()) {
1015       if (HST->isHVXVectorType(I.getValueType(), true))
1016         return true;
1017     }
1018     return false;
1019   };
1020 
1021   if (HST->useHVXOps() && isHvxOp(N)) {
1022     switch (N->getOpcode()) {
1023     case ISD::EXTRACT_SUBVECTOR:  return SelectHvxExtractSubvector(N);
1024     case ISD::VECTOR_SHUFFLE:     return SelectHvxShuffle(N);
1025 
1026     case HexagonISD::VROR:        return SelectHvxRor(N);
1027     }
1028   }
1029 
1030   switch (N->getOpcode()) {
1031   case ISD::Constant:             return SelectConstant(N);
1032   case ISD::ConstantFP:           return SelectConstantFP(N);
1033   case ISD::FrameIndex:           return SelectFrameIndex(N);
1034   case ISD::SHL:                  return SelectSHL(N);
1035   case ISD::LOAD:                 return SelectLoad(N);
1036   case ISD::STORE:                return SelectStore(N);
1037   case ISD::INTRINSIC_W_CHAIN:    return SelectIntrinsicWChain(N);
1038   case ISD::INTRINSIC_WO_CHAIN:   return SelectIntrinsicWOChain(N);
1039   case ISD::EXTRACT_SUBVECTOR:    return SelectExtractSubvector(N);
1040 
1041   case HexagonISD::ADDC:
1042   case HexagonISD::SUBC:          return SelectAddSubCarry(N);
1043   case HexagonISD::VALIGN:        return SelectVAlign(N);
1044   case HexagonISD::VALIGNADDR:    return SelectVAlignAddr(N);
1045   case HexagonISD::TYPECAST:      return SelectTypecast(N);
1046   case HexagonISD::P2D:           return SelectP2D(N);
1047   case HexagonISD::D2P:           return SelectD2P(N);
1048   case HexagonISD::Q2V:           return SelectQ2V(N);
1049   case HexagonISD::V2Q:           return SelectV2Q(N);
1050   case ISD::FDIV:
1051     return SelectFDiv(N);
1052   }
1053 
1054   SelectCode(N);
1055 }
1056 
SelectInlineAsmMemoryOperand(const SDValue & Op,InlineAsm::ConstraintCode ConstraintID,std::vector<SDValue> & OutOps)1057 bool HexagonDAGToDAGISel::SelectInlineAsmMemoryOperand(
1058     const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
1059     std::vector<SDValue> &OutOps) {
1060   SDValue Inp = Op, Res;
1061 
1062   switch (ConstraintID) {
1063   default:
1064     return true;
1065   case InlineAsm::ConstraintCode::o: // Offsetable.
1066   case InlineAsm::ConstraintCode::v: // Not offsetable.
1067   case InlineAsm::ConstraintCode::m: // Memory.
1068     if (SelectAddrFI(Inp, Res))
1069       OutOps.push_back(Res);
1070     else
1071       OutOps.push_back(Inp);
1072     break;
1073   }
1074 
1075   OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
1076   return false;
1077 }
1078 
isMemOPCandidate(SDNode * I,SDNode * U)1079 static bool isMemOPCandidate(SDNode *I, SDNode *U) {
1080   // I is an operand of U. Check if U is an arithmetic (binary) operation
1081   // usable in a memop, where the other operand is a loaded value, and the
1082   // result of U is stored in the same location.
1083 
1084   if (!U->hasOneUse())
1085     return false;
1086   unsigned Opc = U->getOpcode();
1087   switch (Opc) {
1088     case ISD::ADD:
1089     case ISD::SUB:
1090     case ISD::AND:
1091     case ISD::OR:
1092       break;
1093     default:
1094       return false;
1095   }
1096 
1097   SDValue S0 = U->getOperand(0);
1098   SDValue S1 = U->getOperand(1);
1099   SDValue SY = (S0.getNode() == I) ? S1 : S0;
1100 
1101   SDNode *UUse = *U->user_begin();
1102   if (UUse->getNumValues() != 1)
1103     return false;
1104 
1105   // Check if one of the inputs to U is a load instruction and the output
1106   // is used by a store instruction. If so and they also have the same
1107   // base pointer, then don't preoprocess this node sequence as it
1108   // can be matched to a memop.
1109   SDNode *SYNode = SY.getNode();
1110   if (UUse->getOpcode() == ISD::STORE && SYNode->getOpcode() == ISD::LOAD) {
1111     SDValue LDBasePtr = cast<MemSDNode>(SYNode)->getBasePtr();
1112     SDValue STBasePtr = cast<MemSDNode>(UUse)->getBasePtr();
1113     if (LDBasePtr == STBasePtr)
1114       return true;
1115   }
1116   return false;
1117 }
1118 
1119 
1120 // Transform: (or (select c x 0) z)  ->  (select c (or x z) z)
1121 //            (or (select c 0 y) z)  ->  (select c z (or y z))
ppSimplifyOrSelect0(std::vector<SDNode * > && Nodes)1122 void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) {
1123   SelectionDAG &DAG = *CurDAG;
1124 
1125   for (auto *I : Nodes) {
1126     if (I->getOpcode() != ISD::OR)
1127       continue;
1128 
1129     auto IsSelect0 = [](const SDValue &Op) -> bool {
1130       if (Op.getOpcode() != ISD::SELECT)
1131         return false;
1132       return isNullConstant(Op.getOperand(1)) ||
1133              isNullConstant(Op.getOperand(2));
1134     };
1135 
1136     SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
1137     EVT VT = I->getValueType(0);
1138     bool SelN0 = IsSelect0(N0);
1139     SDValue SOp = SelN0 ? N0 : N1;
1140     SDValue VOp = SelN0 ? N1 : N0;
1141 
1142     if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) {
1143       SDValue SC = SOp.getOperand(0);
1144       SDValue SX = SOp.getOperand(1);
1145       SDValue SY = SOp.getOperand(2);
1146       SDLoc DLS = SOp;
1147       if (isNullConstant(SY)) {
1148         SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp);
1149         SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp);
1150         DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1151       } else if (isNullConstant(SX)) {
1152         SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp);
1153         SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr);
1154         DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1155       }
1156     }
1157   }
1158 }
1159 
1160 // Transform: (store ch val (add x (add (shl y c) e)))
1161 //        to: (store ch val (add x (shl (add y d) c))),
1162 // where e = (shl d c) for some integer d.
1163 // The purpose of this is to enable generation of loads/stores with
1164 // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1165 // value c must be 0, 1 or 2.
ppAddrReorderAddShl(std::vector<SDNode * > && Nodes)1166 void HexagonDAGToDAGISel::ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes) {
1167   SelectionDAG &DAG = *CurDAG;
1168 
1169   for (auto *I : Nodes) {
1170     if (I->getOpcode() != ISD::STORE)
1171       continue;
1172 
1173     // I matched: (store ch val Off)
1174     SDValue Off = I->getOperand(2);
1175     // Off needs to match: (add x (add (shl y c) (shl d c))))
1176     if (Off.getOpcode() != ISD::ADD)
1177       continue;
1178     // Off matched: (add x T0)
1179     SDValue T0 = Off.getOperand(1);
1180     // T0 needs to match: (add T1 T2):
1181     if (T0.getOpcode() != ISD::ADD)
1182       continue;
1183     // T0 matched: (add T1 T2)
1184     SDValue T1 = T0.getOperand(0);
1185     SDValue T2 = T0.getOperand(1);
1186     // T1 needs to match: (shl y c)
1187     if (T1.getOpcode() != ISD::SHL)
1188       continue;
1189     SDValue C = T1.getOperand(1);
1190     ConstantSDNode *CN = dyn_cast<ConstantSDNode>(C.getNode());
1191     if (CN == nullptr)
1192       continue;
1193     unsigned CV = CN->getZExtValue();
1194     if (CV > 2)
1195       continue;
1196     // T2 needs to match e, where e = (shl d c) for some d.
1197     ConstantSDNode *EN = dyn_cast<ConstantSDNode>(T2.getNode());
1198     if (EN == nullptr)
1199       continue;
1200     unsigned EV = EN->getZExtValue();
1201     if (EV % (1 << CV) != 0)
1202       continue;
1203     unsigned DV = EV / (1 << CV);
1204 
1205     // Replace T0 with: (shl (add y d) c)
1206     SDLoc DL = SDLoc(I);
1207     EVT VT = T0.getValueType();
1208     SDValue D = DAG.getConstant(DV, DL, VT);
1209     // NewAdd = (add y d)
1210     SDValue NewAdd = DAG.getNode(ISD::ADD, DL, VT, T1.getOperand(0), D);
1211     // NewShl = (shl NewAdd c)
1212     SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C);
1213     ReplaceNode(T0.getNode(), NewShl.getNode());
1214   }
1215 }
1216 
1217 // Transform: (load ch (add x (and (srl y c) Mask)))
1218 //        to: (load ch (add x (shl (srl y d) d-c)))
1219 // where
1220 // Mask = 00..0 111..1 0.0
1221 //          |     |     +-- d-c 0s, and d-c is 0, 1 or 2.
1222 //          |     +-------- 1s
1223 //          +-------------- at most c 0s
1224 // Motivating example:
1225 // DAG combiner optimizes (add x (shl (srl y 5) 2))
1226 //                     to (add x (and (srl y 3) 1FFFFFFC))
1227 // which results in a constant-extended and(##...,lsr). This transformation
1228 // undoes this simplification for cases where the shl can be folded into
1229 // an addressing mode.
ppAddrRewriteAndSrl(std::vector<SDNode * > && Nodes)1230 void HexagonDAGToDAGISel::ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes) {
1231   SelectionDAG &DAG = *CurDAG;
1232 
1233   for (SDNode *N : Nodes) {
1234     unsigned Opc = N->getOpcode();
1235     if (Opc != ISD::LOAD && Opc != ISD::STORE)
1236       continue;
1237     SDValue Addr = Opc == ISD::LOAD ? N->getOperand(1) : N->getOperand(2);
1238     // Addr must match: (add x T0)
1239     if (Addr.getOpcode() != ISD::ADD)
1240       continue;
1241     SDValue T0 = Addr.getOperand(1);
1242     // T0 must match: (and T1 Mask)
1243     if (T0.getOpcode() != ISD::AND)
1244       continue;
1245 
1246     // We have an AND.
1247     //
1248     // Check the first operand. It must be: (srl y c).
1249     SDValue S = T0.getOperand(0);
1250     if (S.getOpcode() != ISD::SRL)
1251       continue;
1252     ConstantSDNode *SN = dyn_cast<ConstantSDNode>(S.getOperand(1).getNode());
1253     if (SN == nullptr)
1254       continue;
1255     if (SN->getAPIntValue().getBitWidth() != 32)
1256       continue;
1257     uint32_t CV = SN->getZExtValue();
1258 
1259     // Check the second operand: the supposed mask.
1260     ConstantSDNode *MN = dyn_cast<ConstantSDNode>(T0.getOperand(1).getNode());
1261     if (MN == nullptr)
1262       continue;
1263     if (MN->getAPIntValue().getBitWidth() != 32)
1264       continue;
1265     uint32_t Mask = MN->getZExtValue();
1266     // Examine the mask.
1267     uint32_t TZ = llvm::countr_zero(Mask);
1268     uint32_t M1 = llvm::countr_one(Mask >> TZ);
1269     uint32_t LZ = llvm::countl_zero(Mask);
1270     // Trailing zeros + middle ones + leading zeros must equal the width.
1271     if (TZ + M1 + LZ != 32)
1272       continue;
1273     // The number of trailing zeros will be encoded in the addressing mode.
1274     if (TZ > 2)
1275       continue;
1276     // The number of leading zeros must be at most c.
1277     if (LZ > CV)
1278       continue;
1279 
1280     // All looks good.
1281     SDValue Y = S.getOperand(0);
1282     EVT VT = Addr.getValueType();
1283     SDLoc dl(S);
1284     // TZ = D-C, so D = TZ+C.
1285     SDValue D = DAG.getConstant(TZ+CV, dl, VT);
1286     SDValue DC = DAG.getConstant(TZ, dl, VT);
1287     SDValue NewSrl = DAG.getNode(ISD::SRL, dl, VT, Y, D);
1288     SDValue NewShl = DAG.getNode(ISD::SHL, dl, VT, NewSrl, DC);
1289     ReplaceNode(T0.getNode(), NewShl.getNode());
1290   }
1291 }
1292 
1293 // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1294 //                                                  (op ... 1 ...))
ppHoistZextI1(std::vector<SDNode * > && Nodes)1295 void HexagonDAGToDAGISel::ppHoistZextI1(std::vector<SDNode*> &&Nodes) {
1296   SelectionDAG &DAG = *CurDAG;
1297 
1298   for (SDNode *N : Nodes) {
1299     unsigned Opc = N->getOpcode();
1300     if (Opc != ISD::ZERO_EXTEND)
1301       continue;
1302     SDValue OpI1 = N->getOperand(0);
1303     EVT OpVT = OpI1.getValueType();
1304     if (!OpVT.isSimple() || OpVT.getSimpleVT() != MVT::i1)
1305       continue;
1306     for (SDUse &Use : N->uses()) {
1307       SDNode *U = Use.getUser();
1308       if (U->getNumValues() != 1)
1309         continue;
1310       EVT UVT = U->getValueType(0);
1311       if (!UVT.isSimple() || !UVT.isInteger() || UVT.getSimpleVT() == MVT::i1)
1312         continue;
1313       // Do not generate select for all i1 vector type.
1314       if (UVT.isVector() && UVT.getVectorElementType() == MVT::i1)
1315         continue;
1316       if (isMemOPCandidate(N, U))
1317         continue;
1318 
1319       // Potentially simplifiable operation.
1320       unsigned I1N = Use.getOperandNo();
1321       SmallVector<SDValue,2> Ops(U->getNumOperands());
1322       for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i)
1323         Ops[i] = U->getOperand(i);
1324       EVT BVT = Ops[I1N].getValueType();
1325 
1326       const SDLoc &dl(U);
1327       SDValue C0 = DAG.getConstant(0, dl, BVT);
1328       SDValue C1 = DAG.getConstant(1, dl, BVT);
1329       SDValue If0, If1;
1330 
1331       if (isa<MachineSDNode>(U)) {
1332         unsigned UseOpc = U->getMachineOpcode();
1333         Ops[I1N] = C0;
1334         If0 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1335         Ops[I1N] = C1;
1336         If1 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1337       } else {
1338         unsigned UseOpc = U->getOpcode();
1339         Ops[I1N] = C0;
1340         If0 = DAG.getNode(UseOpc, dl, UVT, Ops);
1341         Ops[I1N] = C1;
1342         If1 = DAG.getNode(UseOpc, dl, UVT, Ops);
1343       }
1344       // We're generating a SELECT way after legalization, so keep the types
1345       // simple.
1346       unsigned UW = UVT.getSizeInBits();
1347       EVT SVT = (UW == 32 || UW == 64) ? MVT::getIntegerVT(UW) : UVT;
1348       SDValue Sel = DAG.getNode(ISD::SELECT, dl, SVT, OpI1,
1349                                 DAG.getBitcast(SVT, If1),
1350                                 DAG.getBitcast(SVT, If0));
1351       SDValue Ret = DAG.getBitcast(UVT, Sel);
1352       DAG.ReplaceAllUsesWith(U, Ret.getNode());
1353     }
1354   }
1355 }
1356 
PreprocessISelDAG()1357 void HexagonDAGToDAGISel::PreprocessISelDAG() {
1358   // Repack all nodes before calling each preprocessing function,
1359   // because each of them can modify the set of nodes.
1360   auto getNodes = [this]() -> std::vector<SDNode *> {
1361     std::vector<SDNode *> T;
1362     T.reserve(CurDAG->allnodes_size());
1363     for (SDNode &N : CurDAG->allnodes())
1364       T.push_back(&N);
1365     return T;
1366   };
1367 
1368   if (HST->useHVXOps())
1369     PreprocessHvxISelDAG();
1370 
1371   // Transform: (or (select c x 0) z)  ->  (select c (or x z) z)
1372   //            (or (select c 0 y) z)  ->  (select c z (or y z))
1373   ppSimplifyOrSelect0(getNodes());
1374 
1375   // Transform: (store ch val (add x (add (shl y c) e)))
1376   //        to: (store ch val (add x (shl (add y d) c))),
1377   // where e = (shl d c) for some integer d.
1378   // The purpose of this is to enable generation of loads/stores with
1379   // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1380   // value c must be 0, 1 or 2.
1381   ppAddrReorderAddShl(getNodes());
1382 
1383   // Transform: (load ch (add x (and (srl y c) Mask)))
1384   //        to: (load ch (add x (shl (srl y d) d-c)))
1385   // where
1386   // Mask = 00..0 111..1 0.0
1387   //          |     |     +-- d-c 0s, and d-c is 0, 1 or 2.
1388   //          |     +-------- 1s
1389   //          +-------------- at most c 0s
1390   // Motivating example:
1391   // DAG combiner optimizes (add x (shl (srl y 5) 2))
1392   //                     to (add x (and (srl y 3) 1FFFFFFC))
1393   // which results in a constant-extended and(##...,lsr). This transformation
1394   // undoes this simplification for cases where the shl can be folded into
1395   // an addressing mode.
1396   ppAddrRewriteAndSrl(getNodes());
1397 
1398   // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1399   //                                                  (op ... 1 ...))
1400   ppHoistZextI1(getNodes());
1401 
1402   DEBUG_WITH_TYPE("isel", {
1403     dbgs() << "Preprocessed (Hexagon) selection DAG:";
1404     CurDAG->dump();
1405   });
1406 
1407   if (EnableAddressRebalancing) {
1408     rebalanceAddressTrees();
1409 
1410     DEBUG_WITH_TYPE("isel", {
1411       dbgs() << "Address tree balanced selection DAG:";
1412       CurDAG->dump();
1413     });
1414   }
1415 }
1416 
emitFunctionEntryCode()1417 void HexagonDAGToDAGISel::emitFunctionEntryCode() {
1418   auto &HST = MF->getSubtarget<HexagonSubtarget>();
1419   auto &HFI = *HST.getFrameLowering();
1420   if (!HFI.needsAligna(*MF))
1421     return;
1422 
1423   MachineFrameInfo &MFI = MF->getFrameInfo();
1424   MachineBasicBlock *EntryBB = &MF->front();
1425   Align EntryMaxA = MFI.getMaxAlign();
1426 
1427   // Reserve the first non-volatile register.
1428   Register AP = 0;
1429   auto &HRI = *HST.getRegisterInfo();
1430   BitVector Reserved = HRI.getReservedRegs(*MF);
1431   for (const MCPhysReg *R = HRI.getCalleeSavedRegs(MF); *R; ++R) {
1432     if (Reserved[*R])
1433       continue;
1434     AP = *R;
1435     break;
1436   }
1437   assert(AP.isValid() && "Couldn't reserve stack align register");
1438   BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AP)
1439       .addImm(EntryMaxA.value());
1440   MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseReg(AP);
1441 }
1442 
updateAligna()1443 void HexagonDAGToDAGISel::updateAligna() {
1444   auto &HFI = *MF->getSubtarget<HexagonSubtarget>().getFrameLowering();
1445   if (!HFI.needsAligna(*MF))
1446     return;
1447   auto *AlignaI = const_cast<MachineInstr*>(HFI.getAlignaInstr(*MF));
1448   assert(AlignaI != nullptr);
1449   unsigned MaxA = MF->getFrameInfo().getMaxAlign().value();
1450   if (AlignaI->getOperand(1).getImm() < MaxA)
1451     AlignaI->getOperand(1).setImm(MaxA);
1452 }
1453 
1454 // Match a frame index that can be used in an addressing mode.
SelectAddrFI(SDValue & N,SDValue & R)1455 bool HexagonDAGToDAGISel::SelectAddrFI(SDValue &N, SDValue &R) {
1456   if (N.getOpcode() != ISD::FrameIndex)
1457     return false;
1458   auto &HFI = *HST->getFrameLowering();
1459   MachineFrameInfo &MFI = MF->getFrameInfo();
1460   int FX = cast<FrameIndexSDNode>(N)->getIndex();
1461   if (!MFI.isFixedObjectIndex(FX) && HFI.needsAligna(*MF))
1462     return false;
1463   R = CurDAG->getTargetFrameIndex(FX, MVT::i32);
1464   return true;
1465 }
1466 
SelectAddrGA(SDValue & N,SDValue & R)1467 inline bool HexagonDAGToDAGISel::SelectAddrGA(SDValue &N, SDValue &R) {
1468   return SelectGlobalAddress(N, R, false, Align(1));
1469 }
1470 
SelectAddrGP(SDValue & N,SDValue & R)1471 inline bool HexagonDAGToDAGISel::SelectAddrGP(SDValue &N, SDValue &R) {
1472   return SelectGlobalAddress(N, R, true, Align(1));
1473 }
1474 
SelectAnyImm(SDValue & N,SDValue & R)1475 inline bool HexagonDAGToDAGISel::SelectAnyImm(SDValue &N, SDValue &R) {
1476   return SelectAnyImmediate(N, R, Align(1));
1477 }
1478 
SelectAnyImm0(SDValue & N,SDValue & R)1479 inline bool HexagonDAGToDAGISel::SelectAnyImm0(SDValue &N, SDValue &R) {
1480   return SelectAnyImmediate(N, R, Align(1));
1481 }
SelectAnyImm1(SDValue & N,SDValue & R)1482 inline bool HexagonDAGToDAGISel::SelectAnyImm1(SDValue &N, SDValue &R) {
1483   return SelectAnyImmediate(N, R, Align(2));
1484 }
SelectAnyImm2(SDValue & N,SDValue & R)1485 inline bool HexagonDAGToDAGISel::SelectAnyImm2(SDValue &N, SDValue &R) {
1486   return SelectAnyImmediate(N, R, Align(4));
1487 }
SelectAnyImm3(SDValue & N,SDValue & R)1488 inline bool HexagonDAGToDAGISel::SelectAnyImm3(SDValue &N, SDValue &R) {
1489   return SelectAnyImmediate(N, R, Align(8));
1490 }
1491 
SelectAnyInt(SDValue & N,SDValue & R)1492 inline bool HexagonDAGToDAGISel::SelectAnyInt(SDValue &N, SDValue &R) {
1493   EVT T = N.getValueType();
1494   if (!T.isInteger() || T.getSizeInBits() != 32 || !isa<ConstantSDNode>(N))
1495     return false;
1496   uint32_t V = cast<const ConstantSDNode>(N)->getZExtValue();
1497   R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType());
1498   return true;
1499 }
1500 
SelectAnyImmediate(SDValue & N,SDValue & R,Align Alignment)1501 bool HexagonDAGToDAGISel::SelectAnyImmediate(SDValue &N, SDValue &R,
1502                                              Align Alignment) {
1503   switch (N.getOpcode()) {
1504   case ISD::Constant: {
1505     if (N.getValueType() != MVT::i32)
1506       return false;
1507     uint32_t V = cast<const ConstantSDNode>(N)->getZExtValue();
1508     if (!isAligned(Alignment, V))
1509       return false;
1510     R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType());
1511     return true;
1512   }
1513   case HexagonISD::JT:
1514   case HexagonISD::CP:
1515     // These are assumed to always be aligned at least 8-byte boundary.
1516     if (Alignment > Align(8))
1517       return false;
1518     R = N.getOperand(0);
1519     return true;
1520   case ISD::ExternalSymbol:
1521     // Symbols may be aligned at any boundary.
1522     if (Alignment > Align(1))
1523       return false;
1524     R = N;
1525     return true;
1526   case ISD::BlockAddress:
1527     // Block address is always aligned at least 4-byte boundary.
1528     if (Alignment > Align(4) ||
1529         !isAligned(Alignment, cast<BlockAddressSDNode>(N)->getOffset()))
1530       return false;
1531     R = N;
1532     return true;
1533   }
1534 
1535   if (SelectGlobalAddress(N, R, false, Alignment) ||
1536       SelectGlobalAddress(N, R, true, Alignment))
1537     return true;
1538 
1539   return false;
1540 }
1541 
SelectGlobalAddress(SDValue & N,SDValue & R,bool UseGP,Align Alignment)1542 bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R,
1543                                               bool UseGP, Align Alignment) {
1544   switch (N.getOpcode()) {
1545   case ISD::ADD: {
1546     SDValue N0 = N.getOperand(0);
1547     SDValue N1 = N.getOperand(1);
1548     unsigned GAOpc = N0.getOpcode();
1549     if (UseGP && GAOpc != HexagonISD::CONST32_GP)
1550       return false;
1551     if (!UseGP && GAOpc != HexagonISD::CONST32)
1552       return false;
1553     if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) {
1554       if (!isAligned(Alignment, Const->getZExtValue()))
1555         return false;
1556       SDValue Addr = N0.getOperand(0);
1557       if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) {
1558         if (GA->getOpcode() == ISD::TargetGlobalAddress) {
1559           uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue();
1560           R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const),
1561                                              N.getValueType(), NewOff);
1562           return true;
1563         }
1564       }
1565     }
1566     break;
1567   }
1568   case HexagonISD::CP:
1569   case HexagonISD::JT:
1570   case HexagonISD::CONST32:
1571     // The operand(0) of CONST32 is TargetGlobalAddress, which is what we
1572     // want in the instruction.
1573     if (!UseGP)
1574       R = N.getOperand(0);
1575     return !UseGP;
1576   case HexagonISD::CONST32_GP:
1577     if (UseGP)
1578       R = N.getOperand(0);
1579     return UseGP;
1580   default:
1581     return false;
1582   }
1583 
1584   return false;
1585 }
1586 
DetectUseSxtw(SDValue & N,SDValue & R)1587 bool HexagonDAGToDAGISel::DetectUseSxtw(SDValue &N, SDValue &R) {
1588   // This (complex pattern) function is meant to detect a sign-extension
1589   // i32->i64 on a per-operand basis. This would allow writing single
1590   // patterns that would cover a number of combinations of different ways
1591   // a sign-extensions could be written. For example:
1592   //   (mul (DetectUseSxtw x) (DetectUseSxtw y)) -> (M2_dpmpyss_s0 x y)
1593   // could match either one of these:
1594   //   (mul (sext x) (sext_inreg y))
1595   //   (mul (sext-load *p) (sext_inreg y))
1596   //   (mul (sext_inreg x) (sext y))
1597   // etc.
1598   //
1599   // The returned value will have type i64 and its low word will
1600   // contain the value being extended. The high bits are not specified.
1601   // The returned type is i64 because the original type of N was i64,
1602   // but the users of this function should only use the low-word of the
1603   // result, e.g.
1604   //  (mul sxtw:x, sxtw:y) -> (M2_dpmpyss_s0 (LoReg sxtw:x), (LoReg sxtw:y))
1605 
1606   if (N.getValueType() != MVT::i64)
1607     return false;
1608   unsigned Opc = N.getOpcode();
1609   switch (Opc) {
1610     case ISD::SIGN_EXTEND:
1611     case ISD::SIGN_EXTEND_INREG: {
1612       // sext_inreg has the source type as a separate operand.
1613       EVT T = Opc == ISD::SIGN_EXTEND
1614                 ? N.getOperand(0).getValueType()
1615                 : cast<VTSDNode>(N.getOperand(1))->getVT();
1616       unsigned SW = T.getSizeInBits();
1617       if (SW == 32)
1618         R = N.getOperand(0);
1619       else if (SW < 32)
1620         R = N;
1621       else
1622         return false;
1623       break;
1624     }
1625     case ISD::LOAD: {
1626       LoadSDNode *L = cast<LoadSDNode>(N);
1627       if (L->getExtensionType() != ISD::SEXTLOAD)
1628         return false;
1629       // All extending loads extend to i32, so even if the value in
1630       // memory is shorter than 32 bits, it will be i32 after the load.
1631       if (L->getMemoryVT().getSizeInBits() > 32)
1632         return false;
1633       R = N;
1634       break;
1635     }
1636     case ISD::SRA: {
1637       auto *S = dyn_cast<ConstantSDNode>(N.getOperand(1));
1638       if (!S || S->getZExtValue() != 32)
1639         return false;
1640       R = N;
1641       break;
1642     }
1643     case ISD::AssertSext: {
1644       EVT T = cast<VTSDNode>(N.getOperand(1))->getVT();
1645       if (T.getSizeInBits() == 32)
1646         R = N.getOperand(0);
1647       else
1648         return false;
1649       break;
1650     }
1651 
1652     default:
1653       return false;
1654   }
1655   EVT RT = R.getValueType();
1656   if (RT == MVT::i64)
1657     return true;
1658   assert(RT == MVT::i32);
1659   // This is only to produce a value of type i64. Do not rely on the
1660   // high bits produced by this.
1661   const SDLoc &dl(N);
1662   SDValue Ops[] = {
1663     CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
1664     R, CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
1665     R, CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
1666   };
1667   SDNode *T = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
1668                                      MVT::i64, Ops);
1669   R = SDValue(T, 0);
1670   return true;
1671 }
1672 
keepsLowBits(const SDValue & Val,unsigned NumBits,SDValue & Src)1673 bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
1674       SDValue &Src) {
1675   unsigned Opc = Val.getOpcode();
1676   switch (Opc) {
1677   case ISD::SIGN_EXTEND:
1678   case ISD::ZERO_EXTEND:
1679   case ISD::ANY_EXTEND: {
1680     const SDValue &Op0 = Val.getOperand(0);
1681     EVT T = Op0.getValueType();
1682     if (T.isInteger() && T.getSizeInBits() == NumBits) {
1683       Src = Op0;
1684       return true;
1685     }
1686     break;
1687   }
1688   case ISD::SIGN_EXTEND_INREG:
1689   case ISD::AssertSext:
1690   case ISD::AssertZext:
1691     if (Val.getOperand(0).getValueType().isInteger()) {
1692       VTSDNode *T = cast<VTSDNode>(Val.getOperand(1));
1693       if (T->getVT().getSizeInBits() == NumBits) {
1694         Src = Val.getOperand(0);
1695         return true;
1696       }
1697     }
1698     break;
1699   case ISD::AND: {
1700     // Check if this is an AND with NumBits of lower bits set to 1.
1701     uint64_t Mask = (1ULL << NumBits) - 1;
1702     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1703       if (C->getZExtValue() == Mask) {
1704         Src = Val.getOperand(1);
1705         return true;
1706       }
1707     }
1708     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1709       if (C->getZExtValue() == Mask) {
1710         Src = Val.getOperand(0);
1711         return true;
1712       }
1713     }
1714     break;
1715   }
1716   case ISD::OR:
1717   case ISD::XOR: {
1718     // OR/XOR with the lower NumBits bits set to 0.
1719     uint64_t Mask = (1ULL << NumBits) - 1;
1720     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1721       if ((C->getZExtValue() & Mask) == 0) {
1722         Src = Val.getOperand(1);
1723         return true;
1724       }
1725     }
1726     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1727       if ((C->getZExtValue() & Mask) == 0) {
1728         Src = Val.getOperand(0);
1729         return true;
1730       }
1731     }
1732     break;
1733   }
1734   default:
1735     break;
1736   }
1737   return false;
1738 }
1739 
isAlignedMemNode(const MemSDNode * N) const1740 bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const {
1741   return N->getAlign().value() >= N->getMemoryVT().getStoreSize();
1742 }
1743 
isSmallStackStore(const StoreSDNode * N) const1744 bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const {
1745   unsigned StackSize = MF->getFrameInfo().estimateStackSize(*MF);
1746   switch (N->getMemoryVT().getStoreSize()) {
1747     case 1:
1748       return StackSize <= 56;   // 1*2^6 - 8
1749     case 2:
1750       return StackSize <= 120;  // 2*2^6 - 8
1751     case 4:
1752       return StackSize <= 248;  // 4*2^6 - 8
1753     default:
1754       return false;
1755   }
1756 }
1757 
1758 // Return true when the given node fits in a positive half word.
isPositiveHalfWord(const SDNode * N) const1759 bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const {
1760   if (const ConstantSDNode *CN = dyn_cast<const ConstantSDNode>(N)) {
1761     int64_t V = CN->getSExtValue();
1762     return V > 0 && isInt<16>(V);
1763   }
1764   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
1765     const VTSDNode *VN = dyn_cast<const VTSDNode>(N->getOperand(1));
1766     return VN->getVT().getSizeInBits() <= 16;
1767   }
1768   return false;
1769 }
1770 
hasOneUse(const SDNode * N) const1771 bool HexagonDAGToDAGISel::hasOneUse(const SDNode *N) const {
1772   return !CheckSingleUse || N->hasOneUse();
1773 }
1774 
1775 ////////////////////////////////////////////////////////////////////////////////
1776 // Rebalancing of address calculation trees
1777 
isOpcodeHandled(const SDNode * N)1778 static bool isOpcodeHandled(const SDNode *N) {
1779   switch (N->getOpcode()) {
1780     case ISD::ADD:
1781     case ISD::MUL:
1782       return true;
1783     case ISD::SHL:
1784       // We only handle constant shifts because these can be easily flattened
1785       // into multiplications by 2^Op1.
1786       return isa<ConstantSDNode>(N->getOperand(1).getNode());
1787     default:
1788       return false;
1789   }
1790 }
1791 
1792 /// Return the weight of an SDNode
getWeight(SDNode * N)1793 int HexagonDAGToDAGISel::getWeight(SDNode *N) {
1794   if (!isOpcodeHandled(N))
1795     return 1;
1796   assert(RootWeights.count(N) && "Cannot get weight of unseen root!");
1797   assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!");
1798   assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!");
1799   return RootWeights[N];
1800 }
1801 
getHeight(SDNode * N)1802 int HexagonDAGToDAGISel::getHeight(SDNode *N) {
1803   if (!isOpcodeHandled(N))
1804     return 0;
1805   assert(RootWeights.count(N) && RootWeights[N] >= 0 &&
1806       "Cannot query height of unvisited/RAUW'd node!");
1807   return RootHeights[N];
1808 }
1809 
1810 namespace {
1811 struct WeightedLeaf {
1812   SDValue Value;
1813   int Weight;
1814   int InsertionOrder;
1815 
WeightedLeaf__anon14d602910611::WeightedLeaf1816   WeightedLeaf() {}
1817 
WeightedLeaf__anon14d602910611::WeightedLeaf1818   WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
1819     Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
1820     assert(Weight >= 0 && "Weight must be >= 0");
1821   }
1822 
Compare__anon14d602910611::WeightedLeaf1823   static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) {
1824     assert(A.Value.getNode() && B.Value.getNode());
1825     return A.Weight == B.Weight ?
1826             (A.InsertionOrder > B.InsertionOrder) :
1827             (A.Weight > B.Weight);
1828   }
1829 };
1830 
1831 /// A specialized priority queue for WeigthedLeaves. It automatically folds
1832 /// constants and allows removal of non-top elements while maintaining the
1833 /// priority order.
1834 class LeafPrioQueue {
1835   SmallVector<WeightedLeaf, 8> Q;
1836   bool HaveConst;
1837   WeightedLeaf ConstElt;
1838   unsigned Opcode;
1839 
1840 public:
empty()1841   bool empty() {
1842     return (!HaveConst && Q.empty());
1843   }
1844 
size()1845   size_t size() {
1846     return Q.size() + HaveConst;
1847   }
1848 
hasConst()1849   bool hasConst() {
1850     return HaveConst;
1851   }
1852 
top()1853   const WeightedLeaf &top() {
1854     if (HaveConst)
1855       return ConstElt;
1856     return Q.front();
1857   }
1858 
pop()1859   WeightedLeaf pop() {
1860     if (HaveConst) {
1861       HaveConst = false;
1862       return ConstElt;
1863     }
1864     std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1865     return Q.pop_back_val();
1866   }
1867 
push(WeightedLeaf L,bool SeparateConst=true)1868   void push(WeightedLeaf L, bool SeparateConst=true) {
1869     if (!HaveConst && SeparateConst && isa<ConstantSDNode>(L.Value)) {
1870       if (Opcode == ISD::MUL &&
1871           cast<ConstantSDNode>(L.Value)->getSExtValue() == 1)
1872         return;
1873       if (Opcode == ISD::ADD &&
1874           cast<ConstantSDNode>(L.Value)->getSExtValue() == 0)
1875         return;
1876 
1877       HaveConst = true;
1878       ConstElt = L;
1879     } else {
1880       Q.push_back(L);
1881       std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1882     }
1883   }
1884 
1885   /// Push L to the bottom of the queue regardless of its weight. If L is
1886   /// constant, it will not be folded with other constants in the queue.
pushToBottom(WeightedLeaf L)1887   void pushToBottom(WeightedLeaf L) {
1888     L.Weight = 1000;
1889     push(L, false);
1890   }
1891 
1892   /// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of
1893   /// lowest weight and remove it from the queue.
1894   WeightedLeaf findSHL(uint64_t MaxAmount);
1895 
1896   WeightedLeaf findMULbyConst();
1897 
LeafPrioQueue(unsigned Opcode)1898   LeafPrioQueue(unsigned Opcode) :
1899     HaveConst(false), Opcode(Opcode) { }
1900 };
1901 } // end anonymous namespace
1902 
findSHL(uint64_t MaxAmount)1903 WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) {
1904   int ResultPos;
1905   WeightedLeaf Result;
1906 
1907   for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1908     const WeightedLeaf &L = Q[Pos];
1909     const SDValue &Val = L.Value;
1910     if (Val.getOpcode() != ISD::SHL ||
1911         !isa<ConstantSDNode>(Val.getOperand(1)) ||
1912         Val.getConstantOperandVal(1) > MaxAmount)
1913       continue;
1914     if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1915         (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1916     {
1917       Result = L;
1918       ResultPos = Pos;
1919     }
1920   }
1921 
1922   if (Result.Value.getNode()) {
1923     Q.erase(&Q[ResultPos]);
1924     std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1925   }
1926 
1927   return Result;
1928 }
1929 
findMULbyConst()1930 WeightedLeaf LeafPrioQueue::findMULbyConst() {
1931   int ResultPos;
1932   WeightedLeaf Result;
1933 
1934   for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1935     const WeightedLeaf &L = Q[Pos];
1936     const SDValue &Val = L.Value;
1937     if (Val.getOpcode() != ISD::MUL ||
1938         !isa<ConstantSDNode>(Val.getOperand(1)) ||
1939         Val.getConstantOperandVal(1) > 127)
1940       continue;
1941     if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1942         (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1943     {
1944       Result = L;
1945       ResultPos = Pos;
1946     }
1947   }
1948 
1949   if (Result.Value.getNode()) {
1950     Q.erase(&Q[ResultPos]);
1951     std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1952   }
1953 
1954   return Result;
1955 }
1956 
getMultiplierForSHL(SDNode * N)1957 SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) {
1958   uint64_t MulFactor = 1ull << N->getConstantOperandVal(1);
1959   return CurDAG->getConstant(MulFactor, SDLoc(N),
1960                              N->getOperand(1).getValueType());
1961 }
1962 
1963 /// @returns the value x for which 2^x is a factor of Val
getPowerOf2Factor(SDValue Val)1964 static unsigned getPowerOf2Factor(SDValue Val) {
1965   if (Val.getOpcode() == ISD::MUL) {
1966     unsigned MaxFactor = 0;
1967     for (int i = 0; i < 2; ++i) {
1968       ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(i));
1969       if (!C)
1970         continue;
1971       const APInt &CInt = C->getAPIntValue();
1972       if (CInt.getBoolValue())
1973         MaxFactor = CInt.countr_zero();
1974     }
1975     return MaxFactor;
1976   }
1977   if (Val.getOpcode() == ISD::SHL) {
1978     if (!isa<ConstantSDNode>(Val.getOperand(1).getNode()))
1979       return 0;
1980     return (unsigned) Val.getConstantOperandVal(1);
1981   }
1982 
1983   return 0;
1984 }
1985 
1986 /// @returns true if V>>Amount will eliminate V's operation on its child
willShiftRightEliminate(SDValue V,unsigned Amount)1987 static bool willShiftRightEliminate(SDValue V, unsigned Amount) {
1988   if (V.getOpcode() == ISD::MUL) {
1989     SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1990     for (int i = 0; i < 2; ++i)
1991       if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1992           V.getConstantOperandVal(i) % (1ULL << Amount) == 0) {
1993         uint64_t NewConst = V.getConstantOperandVal(i) >> Amount;
1994         return (NewConst == 1);
1995       }
1996   } else if (V.getOpcode() == ISD::SHL) {
1997     return (Amount == V.getConstantOperandVal(1));
1998   }
1999 
2000   return false;
2001 }
2002 
factorOutPowerOf2(SDValue V,unsigned Power)2003 SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) {
2004   SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
2005   if (V.getOpcode() == ISD::MUL) {
2006     for (int i=0; i < 2; ++i) {
2007       if (isa<ConstantSDNode>(Ops[i].getNode()) &&
2008           V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 0) {
2009         uint64_t NewConst = V.getConstantOperandVal(i) >> Power;
2010         if (NewConst == 1)
2011           return Ops[!i];
2012         Ops[i] = CurDAG->getConstant(NewConst,
2013                                      SDLoc(V), V.getValueType());
2014         break;
2015       }
2016     }
2017   } else if (V.getOpcode() == ISD::SHL) {
2018     uint64_t ShiftAmount = V.getConstantOperandVal(1);
2019     if (ShiftAmount == Power)
2020       return Ops[0];
2021     Ops[1] = CurDAG->getConstant(ShiftAmount - Power,
2022                                  SDLoc(V), V.getValueType());
2023   }
2024 
2025   return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops);
2026 }
2027 
isTargetConstant(const SDValue & V)2028 static bool isTargetConstant(const SDValue &V) {
2029   return V.getOpcode() == HexagonISD::CONST32 ||
2030          V.getOpcode() == HexagonISD::CONST32_GP;
2031 }
2032 
getUsesInFunction(const Value * V)2033 unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) {
2034   auto [It, Inserted] = GAUsesInFunction.try_emplace(V);
2035   if (!Inserted)
2036     return It->second;
2037 
2038   unsigned Result = 0;
2039   const Function &CurF = CurDAG->getMachineFunction().getFunction();
2040   for (const User *U : V->users()) {
2041     if (isa<Instruction>(U) &&
2042         cast<Instruction>(U)->getParent()->getParent() == &CurF)
2043       ++Result;
2044   }
2045 
2046   It->second = Result;
2047 
2048   return Result;
2049 }
2050 
2051 /// Note - After calling this, N may be dead. It may have been replaced by a
2052 /// new node, so always use the returned value in place of N.
2053 ///
2054 /// @returns The SDValue taking the place of N (which could be N if it is
2055 /// unchanged)
balanceSubTree(SDNode * N,bool TopLevel)2056 SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
2057   assert(RootWeights.count(N) && "Cannot balance non-root node.");
2058   assert(RootWeights[N] != -2 && "This node was RAUW'd!");
2059   assert(!TopLevel || N->getOpcode() == ISD::ADD);
2060 
2061   // Return early if this node was already visited
2062   if (RootWeights[N] != -1)
2063     return SDValue(N, 0);
2064 
2065   assert(isOpcodeHandled(N));
2066 
2067   SDValue Op0 = N->getOperand(0);
2068   SDValue Op1 = N->getOperand(1);
2069 
2070   // Return early if the operands will remain unchanged or are all roots
2071   if ((!isOpcodeHandled(Op0.getNode()) || RootWeights.count(Op0.getNode())) &&
2072       (!isOpcodeHandled(Op1.getNode()) || RootWeights.count(Op1.getNode()))) {
2073     SDNode *Op0N = Op0.getNode();
2074     int Weight;
2075     if (isOpcodeHandled(Op0N) && RootWeights[Op0N] == -1) {
2076       Weight = getWeight(balanceSubTree(Op0N).getNode());
2077       // Weight = calculateWeight(Op0N);
2078     } else
2079       Weight = getWeight(Op0N);
2080 
2081     SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd
2082     if (isOpcodeHandled(Op1N) && RootWeights[Op1N] == -1) {
2083       Weight += getWeight(balanceSubTree(Op1N).getNode());
2084       // Weight += calculateWeight(Op1N);
2085     } else
2086       Weight += getWeight(Op1N);
2087 
2088     RootWeights[N] = Weight;
2089     RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()),
2090                               getHeight(N->getOperand(1).getNode())) + 1;
2091 
2092     LLVM_DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight
2093                       << " Height=" << RootHeights[N] << "): ");
2094     LLVM_DEBUG(N->dump(CurDAG));
2095 
2096     return SDValue(N, 0);
2097   }
2098 
2099   LLVM_DEBUG(dbgs() << "** Balancing root node: ");
2100   LLVM_DEBUG(N->dump(CurDAG));
2101 
2102   unsigned NOpcode = N->getOpcode();
2103 
2104   LeafPrioQueue Leaves(NOpcode);
2105   SmallVector<SDValue, 4> Worklist;
2106   Worklist.push_back(SDValue(N, 0));
2107 
2108   // SHL nodes will be converted to MUL nodes
2109   if (NOpcode == ISD::SHL)
2110     NOpcode = ISD::MUL;
2111 
2112   bool CanFactorize = false;
2113   WeightedLeaf Mul1, Mul2;
2114   unsigned MaxPowerOf2 = 0;
2115   WeightedLeaf GA;
2116 
2117   // Do not try to factor out a shift if there is already a shift at the tip of
2118   // the tree.
2119   bool HaveTopLevelShift = false;
2120   if (TopLevel &&
2121       ((isOpcodeHandled(Op0.getNode()) && Op0.getOpcode() == ISD::SHL &&
2122                         Op0.getConstantOperandVal(1) < 4) ||
2123        (isOpcodeHandled(Op1.getNode()) && Op1.getOpcode() == ISD::SHL &&
2124                         Op1.getConstantOperandVal(1) < 4)))
2125     HaveTopLevelShift = true;
2126 
2127   // Flatten the subtree into an ordered list of leaves; at the same time
2128   // determine whether the tree is already balanced.
2129   int InsertionOrder = 0;
2130   SmallDenseMap<SDValue, int> NodeHeights;
2131   bool Imbalanced = false;
2132   int CurrentWeight = 0;
2133   while (!Worklist.empty()) {
2134     SDValue Child = Worklist.pop_back_val();
2135 
2136     if (Child.getNode() != N && RootWeights.count(Child.getNode())) {
2137       // CASE 1: Child is a root note
2138 
2139       int Weight = RootWeights[Child.getNode()];
2140       if (Weight == -1) {
2141         Child = balanceSubTree(Child.getNode());
2142         // calculateWeight(Child.getNode());
2143         Weight = getWeight(Child.getNode());
2144       } else if (Weight == -2) {
2145         // Whoops, this node was RAUWd by one of the balanceSubTree calls we
2146         // made. Our worklist isn't up to date anymore.
2147         // Restart the whole process.
2148         LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
2149         return balanceSubTree(N, TopLevel);
2150       }
2151 
2152       NodeHeights[Child] = 1;
2153       CurrentWeight += Weight;
2154 
2155       unsigned PowerOf2;
2156       if (TopLevel && !CanFactorize && !HaveTopLevelShift &&
2157           (Child.getOpcode() == ISD::MUL || Child.getOpcode() == ISD::SHL) &&
2158           Child.hasOneUse() && (PowerOf2 = getPowerOf2Factor(Child))) {
2159         // Try to identify two factorizable MUL/SHL children greedily. Leave
2160         // them out of the priority queue for now so we can deal with them
2161         // after.
2162         if (!Mul1.Value.getNode()) {
2163           Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++);
2164           MaxPowerOf2 = PowerOf2;
2165         } else {
2166           Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++);
2167           MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2);
2168 
2169           // Our addressing modes can only shift by a maximum of 3
2170           if (MaxPowerOf2 > 3)
2171             MaxPowerOf2 = 3;
2172 
2173           CanFactorize = true;
2174         }
2175       } else
2176         Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
2177     } else if (!isOpcodeHandled(Child.getNode())) {
2178       // CASE 2: Child is an unhandled kind of node (e.g. constant)
2179       int Weight = getWeight(Child.getNode());
2180 
2181       NodeHeights[Child] = getHeight(Child.getNode());
2182       CurrentWeight += Weight;
2183 
2184       if (isTargetConstant(Child) && !GA.Value.getNode())
2185         GA = WeightedLeaf(Child, Weight, InsertionOrder++);
2186       else
2187         Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
2188     } else {
2189       // CASE 3: Child is a subtree of same opcode
2190       // Visit children first, then flatten.
2191       unsigned ChildOpcode = Child.getOpcode();
2192       assert(ChildOpcode == NOpcode ||
2193              (NOpcode == ISD::MUL && ChildOpcode == ISD::SHL));
2194 
2195       // Convert SHL to MUL
2196       SDValue Op1;
2197       if (ChildOpcode == ISD::SHL)
2198         Op1 = getMultiplierForSHL(Child.getNode());
2199       else
2200         Op1 = Child->getOperand(1);
2201 
2202       if (!NodeHeights.count(Op1) || !NodeHeights.count(Child->getOperand(0))) {
2203         assert(!NodeHeights.count(Child) && "Parent visited before children?");
2204         // Visit children first, then re-visit this node
2205         Worklist.push_back(Child);
2206         Worklist.push_back(Op1);
2207         Worklist.push_back(Child->getOperand(0));
2208       } else {
2209         // Back at this node after visiting the children
2210         if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1)
2211           Imbalanced = true;
2212 
2213         NodeHeights[Child] = std::max(NodeHeights[Op1],
2214                                       NodeHeights[Child->getOperand(0)]) + 1;
2215       }
2216     }
2217   }
2218 
2219   LLVM_DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)]
2220                     << " weight=" << CurrentWeight
2221                     << " imbalanced=" << Imbalanced << "\n");
2222 
2223   // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y)
2224   //  This factors out a shift in order to match memw(a<<Y+b).
2225   if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) ||
2226                        willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) {
2227     LLVM_DEBUG(dbgs() << "--> Found common factor for two MUL children!\n");
2228     int Weight = Mul1.Weight + Mul2.Weight;
2229     int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1;
2230     SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2);
2231     SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2);
2232     SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(),
2233                                   Mul1Factored, Mul2Factored);
2234     SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N),
2235                                         Mul1.Value.getValueType());
2236     SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(),
2237                                   Sum, Const);
2238     NodeHeights[New] = Height;
2239     Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder));
2240   } else if (Mul1.Value.getNode()) {
2241     // We failed to factorize two MULs, so now the Muls are left outside the
2242     // queue... add them back.
2243     Leaves.push(Mul1);
2244     if (Mul2.Value.getNode())
2245       Leaves.push(Mul2);
2246     CanFactorize = false;
2247   }
2248 
2249   // Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere
2250   // and the root node itself is not used more than twice. This reduces the
2251   // amount of additional constant extenders introduced by this optimization.
2252   bool CombinedGA = false;
2253   if (NOpcode == ISD::ADD && GA.Value.getNode() && Leaves.hasConst() &&
2254       GA.Value.hasOneUse() && N->use_size() < 3) {
2255     GlobalAddressSDNode *GANode =
2256       cast<GlobalAddressSDNode>(GA.Value.getOperand(0));
2257     ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value);
2258 
2259     if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() &&
2260         getTargetLowering()->isOffsetFoldingLegal(GANode)) {
2261       LLVM_DEBUG(dbgs() << "--> Combining GA and offset ("
2262                         << Offset->getSExtValue() << "): ");
2263       LLVM_DEBUG(GANode->dump(CurDAG));
2264 
2265       SDValue NewTGA =
2266         CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value),
2267             GANode->getValueType(0),
2268             GANode->getOffset() + (uint64_t)Offset->getSExtValue());
2269       GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value),
2270           GA.Value.getValueType(), NewTGA);
2271       GA.Weight += Leaves.top().Weight;
2272 
2273       NodeHeights[GA.Value] = getHeight(GA.Value.getNode());
2274       CombinedGA = true;
2275 
2276       Leaves.pop(); // Remove the offset constant from the queue
2277     }
2278   }
2279 
2280   if ((RebalanceOnlyForOptimizations && !CanFactorize && !CombinedGA) ||
2281       (RebalanceOnlyImbalancedTrees && !Imbalanced)) {
2282     RootWeights[N] = CurrentWeight;
2283     RootHeights[N] = NodeHeights[SDValue(N, 0)];
2284 
2285     return SDValue(N, 0);
2286   }
2287 
2288   // Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5))
2289   if (NOpcode == ISD::ADD && GA.Value.getNode()) {
2290     WeightedLeaf SHL = Leaves.findSHL(31);
2291     if (SHL.Value.getNode()) {
2292       int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1;
2293       GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value),
2294                                  GA.Value.getValueType(),
2295                                  GA.Value, SHL.Value);
2296       GA.Weight = SHL.Weight; // Specifically ignore the GA weight here
2297       NodeHeights[GA.Value] = Height;
2298     }
2299   }
2300 
2301   if (GA.Value.getNode())
2302     Leaves.push(GA);
2303 
2304   // If this is the top level and we haven't factored out a shift, we should try
2305   // to move a constant to the bottom to match addressing modes like memw(rX+C)
2306   if (TopLevel && !CanFactorize && Leaves.hasConst()) {
2307     LLVM_DEBUG(dbgs() << "--> Pushing constant to tip of tree.");
2308     Leaves.pushToBottom(Leaves.pop());
2309   }
2310 
2311   const DataLayout &DL = CurDAG->getDataLayout();
2312   const TargetLowering &TLI = *getTargetLowering();
2313 
2314   // Rebuild the tree using Huffman's algorithm
2315   while (Leaves.size() > 1) {
2316     WeightedLeaf L0 = Leaves.pop();
2317 
2318     // See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)),
2319     // otherwise just get the next leaf
2320     WeightedLeaf L1 = Leaves.findMULbyConst();
2321     if (!L1.Value.getNode())
2322       L1 = Leaves.pop();
2323 
2324     assert(L0.Weight <= L1.Weight && "Priority queue is broken!");
2325 
2326     SDValue V0 = L0.Value;
2327     int V0Weight = L0.Weight;
2328     SDValue V1 = L1.Value;
2329     int V1Weight = L1.Weight;
2330 
2331     // Make sure that none of these nodes have been RAUW'd
2332     if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) ||
2333         (RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) {
2334       LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
2335       return balanceSubTree(N, TopLevel);
2336     }
2337 
2338     ConstantSDNode *V0C = dyn_cast<ConstantSDNode>(V0);
2339     ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(V1);
2340     EVT VT = N->getValueType(0);
2341     SDValue NewNode;
2342 
2343     if (V0C && !V1C) {
2344       std::swap(V0, V1);
2345       std::swap(V0C, V1C);
2346     }
2347 
2348     // Calculate height of this node
2349     assert(NodeHeights.count(V0) && NodeHeights.count(V1) &&
2350            "Children must have been visited before re-combining them!");
2351     int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1;
2352 
2353     // Rebuild this node (and restore SHL from MUL if needed)
2354     if (V1C && NOpcode == ISD::MUL && V1C->getAPIntValue().isPowerOf2())
2355       NewNode = CurDAG->getNode(
2356           ISD::SHL, SDLoc(V0), VT, V0,
2357           CurDAG->getConstant(
2358               V1C->getAPIntValue().logBase2(), SDLoc(N),
2359               TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
2360     else
2361       NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1);
2362 
2363     NodeHeights[NewNode] = Height;
2364 
2365     int Weight = V0Weight + V1Weight;
2366     Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder));
2367 
2368     LLVM_DEBUG(dbgs() << "--> Built new node (Weight=" << Weight
2369                       << ",Height=" << Height << "):\n");
2370     LLVM_DEBUG(NewNode.dump());
2371   }
2372 
2373   assert(Leaves.size() == 1);
2374   SDValue NewRoot = Leaves.top().Value;
2375 
2376   assert(NodeHeights.count(NewRoot));
2377   int Height = NodeHeights[NewRoot];
2378 
2379   // Restore SHL if we earlier converted it to a MUL
2380   if (NewRoot.getOpcode() == ISD::MUL) {
2381     ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1));
2382     if (V1C && V1C->getAPIntValue().isPowerOf2()) {
2383       EVT VT = NewRoot.getValueType();
2384       SDValue V0 = NewRoot.getOperand(0);
2385       NewRoot = CurDAG->getNode(
2386           ISD::SHL, SDLoc(NewRoot), VT, V0,
2387           CurDAG->getConstant(
2388               V1C->getAPIntValue().logBase2(), SDLoc(NewRoot),
2389               TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
2390     }
2391   }
2392 
2393   if (N != NewRoot.getNode()) {
2394     LLVM_DEBUG(dbgs() << "--> Root is now: ");
2395     LLVM_DEBUG(NewRoot.dump());
2396 
2397     // Replace all uses of old root by new root
2398     CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode());
2399     // Mark that we have RAUW'd N
2400     RootWeights[N] = -2;
2401   } else {
2402     LLVM_DEBUG(dbgs() << "--> Root unchanged.\n");
2403   }
2404 
2405   RootWeights[NewRoot.getNode()] = Leaves.top().Weight;
2406   RootHeights[NewRoot.getNode()] = Height;
2407 
2408   return NewRoot;
2409 }
2410 
rebalanceAddressTrees()2411 void HexagonDAGToDAGISel::rebalanceAddressTrees() {
2412   for (SDNode &Node : llvm::make_early_inc_range(CurDAG->allnodes())) {
2413     SDNode *N = &Node;
2414     if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE)
2415       continue;
2416 
2417     SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr();
2418     if (BasePtr.getOpcode() != ISD::ADD)
2419       continue;
2420 
2421     // We've already processed this node
2422     if (RootWeights.count(BasePtr.getNode()))
2423       continue;
2424 
2425     LLVM_DEBUG(dbgs() << "** Rebalancing address calculation in node: ");
2426     LLVM_DEBUG(N->dump(CurDAG));
2427 
2428     // FindRoots
2429     SmallVector<SDNode *, 4> Worklist;
2430 
2431     Worklist.push_back(BasePtr.getOperand(0).getNode());
2432     Worklist.push_back(BasePtr.getOperand(1).getNode());
2433 
2434     while (!Worklist.empty()) {
2435       SDNode *N = Worklist.pop_back_val();
2436       unsigned Opcode = N->getOpcode();
2437 
2438       if (!isOpcodeHandled(N))
2439         continue;
2440 
2441       Worklist.push_back(N->getOperand(0).getNode());
2442       Worklist.push_back(N->getOperand(1).getNode());
2443 
2444       // Not a root if it has only one use and same opcode as its parent
2445       if (N->hasOneUse() && Opcode == N->user_begin()->getOpcode())
2446         continue;
2447 
2448       // This root node has already been processed
2449       RootWeights.try_emplace(N, -1);
2450     }
2451 
2452     // Balance node itself
2453     RootWeights[BasePtr.getNode()] = -1;
2454     SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true);
2455 
2456     if (N->getOpcode() == ISD::LOAD)
2457       N = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
2458             NewBasePtr, N->getOperand(2));
2459     else
2460       N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
2461             NewBasePtr, N->getOperand(3));
2462 
2463     LLVM_DEBUG(dbgs() << "--> Final node: ");
2464     LLVM_DEBUG(N->dump(CurDAG));
2465   }
2466 
2467   CurDAG->RemoveDeadNodes();
2468   GAUsesInFunction.clear();
2469   RootHeights.clear();
2470   RootWeights.clear();
2471 }
2472