xref: /freebsd/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp (revision 32100375a661c1e16588ddfa7b90ca8d26cb9786)
1 //===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interfaces that Hexagon uses to lower LLVM code
10 // into a selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "HexagonISelLowering.h"
15 #include "Hexagon.h"
16 #include "HexagonMachineFunctionInfo.h"
17 #include "HexagonRegisterInfo.h"
18 #include "HexagonSubtarget.h"
19 #include "HexagonTargetMachine.h"
20 #include "HexagonTargetObjectFile.h"
21 #include "llvm/ADT/APInt.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/CodeGen/CallingConvLower.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineMemOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/RuntimeLibcalls.h"
31 #include "llvm/CodeGen/SelectionDAG.h"
32 #include "llvm/CodeGen/TargetCallingConv.h"
33 #include "llvm/CodeGen/ValueTypes.h"
34 #include "llvm/IR/BasicBlock.h"
35 #include "llvm/IR/CallingConv.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/GlobalValue.h"
40 #include "llvm/IR/InlineAsm.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/Intrinsics.h"
44 #include "llvm/IR/IntrinsicsHexagon.h"
45 #include "llvm/IR/Module.h"
46 #include "llvm/IR/Type.h"
47 #include "llvm/IR/Value.h"
48 #include "llvm/MC/MCRegisterInfo.h"
49 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/CodeGen.h"
51 #include "llvm/Support/CommandLine.h"
52 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/ErrorHandling.h"
54 #include "llvm/Support/MathExtras.h"
55 #include "llvm/Support/raw_ostream.h"
56 #include "llvm/Target/TargetMachine.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <cstddef>
60 #include <cstdint>
61 #include <limits>
62 #include <utility>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "hexagon-lowering"
67 
68 static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
69   cl::init(true), cl::Hidden,
70   cl::desc("Control jump table emission on Hexagon target"));
71 
72 static cl::opt<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched",
73   cl::Hidden, cl::ZeroOrMore, cl::init(false),
74   cl::desc("Enable Hexagon SDNode scheduling"));
75 
76 static cl::opt<bool> EnableFastMath("ffast-math",
77   cl::Hidden, cl::ZeroOrMore, cl::init(false),
78   cl::desc("Enable Fast Math processing"));
79 
80 static cl::opt<int> MinimumJumpTables("minimum-jump-tables",
81   cl::Hidden, cl::ZeroOrMore, cl::init(5),
82   cl::desc("Set minimum jump tables"));
83 
84 static cl::opt<int> MaxStoresPerMemcpyCL("max-store-memcpy",
85   cl::Hidden, cl::ZeroOrMore, cl::init(6),
86   cl::desc("Max #stores to inline memcpy"));
87 
88 static cl::opt<int> MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os",
89   cl::Hidden, cl::ZeroOrMore, cl::init(4),
90   cl::desc("Max #stores to inline memcpy"));
91 
92 static cl::opt<int> MaxStoresPerMemmoveCL("max-store-memmove",
93   cl::Hidden, cl::ZeroOrMore, cl::init(6),
94   cl::desc("Max #stores to inline memmove"));
95 
96 static cl::opt<int> MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os",
97   cl::Hidden, cl::ZeroOrMore, cl::init(4),
98   cl::desc("Max #stores to inline memmove"));
99 
100 static cl::opt<int> MaxStoresPerMemsetCL("max-store-memset",
101   cl::Hidden, cl::ZeroOrMore, cl::init(8),
102   cl::desc("Max #stores to inline memset"));
103 
104 static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",
105   cl::Hidden, cl::ZeroOrMore, cl::init(4),
106   cl::desc("Max #stores to inline memset"));
107 
108 static cl::opt<bool> AlignLoads("hexagon-align-loads",
109   cl::Hidden, cl::init(false),
110   cl::desc("Rewrite unaligned loads as a pair of aligned loads"));
111 
112 
113 namespace {
114 
115   class HexagonCCState : public CCState {
116     unsigned NumNamedVarArgParams = 0;
117 
118   public:
119     HexagonCCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
120                    SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
121                    unsigned NumNamedArgs)
122         : CCState(CC, IsVarArg, MF, locs, C),
123           NumNamedVarArgParams(NumNamedArgs) {}
124     unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
125   };
126 
127 } // end anonymous namespace
128 
129 
130 // Implement calling convention for Hexagon.
131 
132 static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
133                        CCValAssign::LocInfo &LocInfo,
134                        ISD::ArgFlagsTy &ArgFlags, CCState &State) {
135   static const MCPhysReg ArgRegs[] = {
136     Hexagon::R0, Hexagon::R1, Hexagon::R2,
137     Hexagon::R3, Hexagon::R4, Hexagon::R5
138   };
139   const unsigned NumArgRegs = array_lengthof(ArgRegs);
140   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
141 
142   // RegNum is an index into ArgRegs: skip a register if RegNum is odd.
143   if (RegNum != NumArgRegs && RegNum % 2 == 1)
144     State.AllocateReg(ArgRegs[RegNum]);
145 
146   // Always return false here, as this function only makes sure that the first
147   // unallocated register has an even register number and does not actually
148   // allocate a register for the current argument.
149   return false;
150 }
151 
152 #include "HexagonGenCallingConv.inc"
153 
154 
155 SDValue
156 HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
157       const {
158   return SDValue();
159 }
160 
161 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
162 /// by "Src" to address "Dst" of size "Size".  Alignment information is
163 /// specified by the specific parameter attribute. The copy will be passed as
164 /// a byval function parameter.  Sometimes what we are copying is the end of a
165 /// larger object, the part that does not fit in registers.
166 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
167                                          SDValue Chain, ISD::ArgFlagsTy Flags,
168                                          SelectionDAG &DAG, const SDLoc &dl) {
169   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
170   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
171                        /*isVolatile=*/false, /*AlwaysInline=*/false,
172                        /*isTailCall=*/false,
173                        MachinePointerInfo(), MachinePointerInfo());
174 }
175 
176 bool
177 HexagonTargetLowering::CanLowerReturn(
178     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
179     const SmallVectorImpl<ISD::OutputArg> &Outs,
180     LLVMContext &Context) const {
181   SmallVector<CCValAssign, 16> RVLocs;
182   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
183 
184   if (MF.getSubtarget<HexagonSubtarget>().useHVXOps())
185     return CCInfo.CheckReturn(Outs, RetCC_Hexagon_HVX);
186   return CCInfo.CheckReturn(Outs, RetCC_Hexagon);
187 }
188 
189 // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
190 // passed by value, the function prototype is modified to return void and
191 // the value is stored in memory pointed by a pointer passed by caller.
192 SDValue
193 HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
194                                    bool IsVarArg,
195                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
196                                    const SmallVectorImpl<SDValue> &OutVals,
197                                    const SDLoc &dl, SelectionDAG &DAG) const {
198   // CCValAssign - represent the assignment of the return value to locations.
199   SmallVector<CCValAssign, 16> RVLocs;
200 
201   // CCState - Info about the registers and stack slot.
202   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
203                  *DAG.getContext());
204 
205   // Analyze return values of ISD::RET
206   if (Subtarget.useHVXOps())
207     CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon_HVX);
208   else
209     CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
210 
211   SDValue Flag;
212   SmallVector<SDValue, 4> RetOps(1, Chain);
213 
214   // Copy the result values into the output registers.
215   for (unsigned i = 0; i != RVLocs.size(); ++i) {
216     CCValAssign &VA = RVLocs[i];
217 
218     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
219 
220     // Guarantee that all emitted copies are stuck together with flags.
221     Flag = Chain.getValue(1);
222     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
223   }
224 
225   RetOps[0] = Chain;  // Update chain.
226 
227   // Add the flag if we have it.
228   if (Flag.getNode())
229     RetOps.push_back(Flag);
230 
231   return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps);
232 }
233 
234 bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
235   // If either no tail call or told not to tail call at all, don't.
236   return CI->isTailCall();
237 }
238 
239 Register HexagonTargetLowering::getRegisterByName(
240       const char* RegName, LLT VT, const MachineFunction &) const {
241   // Just support r19, the linux kernel uses it.
242   Register Reg = StringSwitch<Register>(RegName)
243                      .Case("r0", Hexagon::R0)
244                      .Case("r1", Hexagon::R1)
245                      .Case("r2", Hexagon::R2)
246                      .Case("r3", Hexagon::R3)
247                      .Case("r4", Hexagon::R4)
248                      .Case("r5", Hexagon::R5)
249                      .Case("r6", Hexagon::R6)
250                      .Case("r7", Hexagon::R7)
251                      .Case("r8", Hexagon::R8)
252                      .Case("r9", Hexagon::R9)
253                      .Case("r10", Hexagon::R10)
254                      .Case("r11", Hexagon::R11)
255                      .Case("r12", Hexagon::R12)
256                      .Case("r13", Hexagon::R13)
257                      .Case("r14", Hexagon::R14)
258                      .Case("r15", Hexagon::R15)
259                      .Case("r16", Hexagon::R16)
260                      .Case("r17", Hexagon::R17)
261                      .Case("r18", Hexagon::R18)
262                      .Case("r19", Hexagon::R19)
263                      .Case("r20", Hexagon::R20)
264                      .Case("r21", Hexagon::R21)
265                      .Case("r22", Hexagon::R22)
266                      .Case("r23", Hexagon::R23)
267                      .Case("r24", Hexagon::R24)
268                      .Case("r25", Hexagon::R25)
269                      .Case("r26", Hexagon::R26)
270                      .Case("r27", Hexagon::R27)
271                      .Case("r28", Hexagon::R28)
272                      .Case("r29", Hexagon::R29)
273                      .Case("r30", Hexagon::R30)
274                      .Case("r31", Hexagon::R31)
275                      .Case("r1:0", Hexagon::D0)
276                      .Case("r3:2", Hexagon::D1)
277                      .Case("r5:4", Hexagon::D2)
278                      .Case("r7:6", Hexagon::D3)
279                      .Case("r9:8", Hexagon::D4)
280                      .Case("r11:10", Hexagon::D5)
281                      .Case("r13:12", Hexagon::D6)
282                      .Case("r15:14", Hexagon::D7)
283                      .Case("r17:16", Hexagon::D8)
284                      .Case("r19:18", Hexagon::D9)
285                      .Case("r21:20", Hexagon::D10)
286                      .Case("r23:22", Hexagon::D11)
287                      .Case("r25:24", Hexagon::D12)
288                      .Case("r27:26", Hexagon::D13)
289                      .Case("r29:28", Hexagon::D14)
290                      .Case("r31:30", Hexagon::D15)
291                      .Case("sp", Hexagon::R29)
292                      .Case("fp", Hexagon::R30)
293                      .Case("lr", Hexagon::R31)
294                      .Case("p0", Hexagon::P0)
295                      .Case("p1", Hexagon::P1)
296                      .Case("p2", Hexagon::P2)
297                      .Case("p3", Hexagon::P3)
298                      .Case("sa0", Hexagon::SA0)
299                      .Case("lc0", Hexagon::LC0)
300                      .Case("sa1", Hexagon::SA1)
301                      .Case("lc1", Hexagon::LC1)
302                      .Case("m0", Hexagon::M0)
303                      .Case("m1", Hexagon::M1)
304                      .Case("usr", Hexagon::USR)
305                      .Case("ugp", Hexagon::UGP)
306                      .Default(Register());
307   if (Reg)
308     return Reg;
309 
310   report_fatal_error("Invalid register name global variable");
311 }
312 
313 /// LowerCallResult - Lower the result values of an ISD::CALL into the
314 /// appropriate copies out of appropriate physical registers.  This assumes that
315 /// Chain/Glue are the input chain/glue to use, and that TheCall is the call
316 /// being lowered. Returns a SDNode with the same number of values as the
317 /// ISD::CALL.
318 SDValue HexagonTargetLowering::LowerCallResult(
319     SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool IsVarArg,
320     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
321     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
322     const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const {
323   // Assign locations to each value returned by this call.
324   SmallVector<CCValAssign, 16> RVLocs;
325 
326   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
327                  *DAG.getContext());
328 
329   if (Subtarget.useHVXOps())
330     CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon_HVX);
331   else
332     CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
333 
334   // Copy all of the result registers out of their specified physreg.
335   for (unsigned i = 0; i != RVLocs.size(); ++i) {
336     SDValue RetVal;
337     if (RVLocs[i].getValVT() == MVT::i1) {
338       // Return values of type MVT::i1 require special handling. The reason
339       // is that MVT::i1 is associated with the PredRegs register class, but
340       // values of that type are still returned in R0. Generate an explicit
341       // copy into a predicate register from R0, and treat the value of the
342       // predicate register as the call result.
343       auto &MRI = DAG.getMachineFunction().getRegInfo();
344       SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
345                                        MVT::i32, Glue);
346       // FR0 = (Value, Chain, Glue)
347       Register PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
348       SDValue TPR = DAG.getCopyToReg(FR0.getValue(1), dl, PredR,
349                                      FR0.getValue(0), FR0.getValue(2));
350       // TPR = (Chain, Glue)
351       // Don't glue this CopyFromReg, because it copies from a virtual
352       // register. If it is glued to the call, InstrEmitter will add it
353       // as an implicit def to the call (EmitMachineNode).
354       RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1);
355       Glue = TPR.getValue(1);
356       Chain = TPR.getValue(0);
357     } else {
358       RetVal = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
359                                   RVLocs[i].getValVT(), Glue);
360       Glue = RetVal.getValue(2);
361       Chain = RetVal.getValue(1);
362     }
363     InVals.push_back(RetVal.getValue(0));
364   }
365 
366   return Chain;
367 }
368 
369 /// LowerCall - Functions arguments are copied from virtual regs to
370 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
371 SDValue
372 HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
373                                  SmallVectorImpl<SDValue> &InVals) const {
374   SelectionDAG &DAG                     = CLI.DAG;
375   SDLoc &dl                             = CLI.DL;
376   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
377   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
378   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
379   SDValue Chain                         = CLI.Chain;
380   SDValue Callee                        = CLI.Callee;
381   CallingConv::ID CallConv              = CLI.CallConv;
382   bool IsVarArg                         = CLI.IsVarArg;
383   bool DoesNotReturn                    = CLI.DoesNotReturn;
384 
385   bool IsStructRet    = Outs.empty() ? false : Outs[0].Flags.isSRet();
386   MachineFunction &MF = DAG.getMachineFunction();
387   MachineFrameInfo &MFI = MF.getFrameInfo();
388   auto PtrVT = getPointerTy(MF.getDataLayout());
389 
390   unsigned NumParams = CLI.CS.getInstruction()
391                         ? CLI.CS.getFunctionType()->getNumParams()
392                         : 0;
393   if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee))
394     Callee = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, MVT::i32);
395 
396   // Analyze operands of the call, assigning locations to each operand.
397   SmallVector<CCValAssign, 16> ArgLocs;
398   HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(),
399                         NumParams);
400 
401   if (Subtarget.useHVXOps())
402     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_HVX);
403   else
404     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
405 
406   if (CLI.IsTailCall) {
407     bool StructAttrFlag = MF.getFunction().hasStructRetAttr();
408     CLI.IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
409                         IsVarArg, IsStructRet, StructAttrFlag, Outs,
410                         OutVals, Ins, DAG);
411     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
412       CCValAssign &VA = ArgLocs[i];
413       if (VA.isMemLoc()) {
414         CLI.IsTailCall = false;
415         break;
416       }
417     }
418     LLVM_DEBUG(dbgs() << (CLI.IsTailCall ? "Eligible for Tail Call\n"
419                                          : "Argument must be passed on stack. "
420                                            "Not eligible for Tail Call\n"));
421   }
422   // Get a count of how many bytes are to be pushed on the stack.
423   unsigned NumBytes = CCInfo.getNextStackOffset();
424   SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
425   SmallVector<SDValue, 8> MemOpChains;
426 
427   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
428   SDValue StackPtr =
429       DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
430 
431   bool NeedsArgAlign = false;
432   unsigned LargestAlignSeen = 0;
433   // Walk the register/memloc assignments, inserting copies/loads.
434   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
435     CCValAssign &VA = ArgLocs[i];
436     SDValue Arg = OutVals[i];
437     ISD::ArgFlagsTy Flags = Outs[i].Flags;
438     // Record if we need > 8 byte alignment on an argument.
439     bool ArgAlign = Subtarget.isHVXVectorType(VA.getValVT());
440     NeedsArgAlign |= ArgAlign;
441 
442     // Promote the value if needed.
443     switch (VA.getLocInfo()) {
444       default:
445         // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
446         llvm_unreachable("Unknown loc info!");
447       case CCValAssign::Full:
448         break;
449       case CCValAssign::BCvt:
450         Arg = DAG.getBitcast(VA.getLocVT(), Arg);
451         break;
452       case CCValAssign::SExt:
453         Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
454         break;
455       case CCValAssign::ZExt:
456         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
457         break;
458       case CCValAssign::AExt:
459         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
460         break;
461     }
462 
463     if (VA.isMemLoc()) {
464       unsigned LocMemOffset = VA.getLocMemOffset();
465       SDValue MemAddr = DAG.getConstant(LocMemOffset, dl,
466                                         StackPtr.getValueType());
467       MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
468       if (ArgAlign)
469         LargestAlignSeen = std::max(LargestAlignSeen,
470                              (unsigned)VA.getLocVT().getStoreSizeInBits() >> 3);
471       if (Flags.isByVal()) {
472         // The argument is a struct passed by value. According to LLVM, "Arg"
473         // is a pointer.
474         MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
475                                                         Flags, DAG, dl));
476       } else {
477         MachinePointerInfo LocPI = MachinePointerInfo::getStack(
478             DAG.getMachineFunction(), LocMemOffset);
479         SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI);
480         MemOpChains.push_back(S);
481       }
482       continue;
483     }
484 
485     // Arguments that can be passed on register must be kept at RegsToPass
486     // vector.
487     if (VA.isRegLoc())
488       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
489   }
490 
491   if (NeedsArgAlign && Subtarget.hasV60Ops()) {
492     LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
493     unsigned VecAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
494     LargestAlignSeen = std::max(LargestAlignSeen, VecAlign);
495     MFI.ensureMaxAlignment(LargestAlignSeen);
496   }
497   // Transform all store nodes into one single node because all store
498   // nodes are independent of each other.
499   if (!MemOpChains.empty())
500     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
501 
502   SDValue Glue;
503   if (!CLI.IsTailCall) {
504     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
505     Glue = Chain.getValue(1);
506   }
507 
508   // Build a sequence of copy-to-reg nodes chained together with token
509   // chain and flag operands which copy the outgoing args into registers.
510   // The Glue is necessary since all emitted instructions must be
511   // stuck together.
512   if (!CLI.IsTailCall) {
513     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
514       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
515                                RegsToPass[i].second, Glue);
516       Glue = Chain.getValue(1);
517     }
518   } else {
519     // For tail calls lower the arguments to the 'real' stack slot.
520     //
521     // Force all the incoming stack arguments to be loaded from the stack
522     // before any new outgoing arguments are stored to the stack, because the
523     // outgoing stack slots may alias the incoming argument stack slots, and
524     // the alias isn't otherwise explicit. This is slightly more conservative
525     // than necessary, because it means that each store effectively depends
526     // on every argument instead of just those arguments it would clobber.
527     //
528     // Do not flag preceding copytoreg stuff together with the following stuff.
529     Glue = SDValue();
530     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
531       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
532                                RegsToPass[i].second, Glue);
533       Glue = Chain.getValue(1);
534     }
535     Glue = SDValue();
536   }
537 
538   bool LongCalls = MF.getSubtarget<HexagonSubtarget>().useLongCalls();
539   unsigned Flags = LongCalls ? HexagonII::HMOTF_ConstExtended : 0;
540 
541   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
542   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
543   // node so that legalize doesn't hack it.
544   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
545     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT, 0, Flags);
546   } else if (ExternalSymbolSDNode *S =
547              dyn_cast<ExternalSymbolSDNode>(Callee)) {
548     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, Flags);
549   }
550 
551   // Returns a chain & a flag for retval copy to use.
552   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
553   SmallVector<SDValue, 8> Ops;
554   Ops.push_back(Chain);
555   Ops.push_back(Callee);
556 
557   // Add argument registers to the end of the list so that they are
558   // known live into the call.
559   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
560     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
561                                   RegsToPass[i].second.getValueType()));
562   }
563 
564   const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallConv);
565   assert(Mask && "Missing call preserved mask for calling convention");
566   Ops.push_back(DAG.getRegisterMask(Mask));
567 
568   if (Glue.getNode())
569     Ops.push_back(Glue);
570 
571   if (CLI.IsTailCall) {
572     MFI.setHasTailCall();
573     return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops);
574   }
575 
576   // Set this here because we need to know this for "hasFP" in frame lowering.
577   // The target-independent code calls getFrameRegister before setting it, and
578   // getFrameRegister uses hasFP to determine whether the function has FP.
579   MFI.setHasCalls(true);
580 
581   unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL;
582   Chain = DAG.getNode(OpCode, dl, NodeTys, Ops);
583   Glue = Chain.getValue(1);
584 
585   // Create the CALLSEQ_END node.
586   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
587                              DAG.getIntPtrConstant(0, dl, true), Glue, dl);
588   Glue = Chain.getValue(1);
589 
590   // Handle result values, copying them out of physregs into vregs that we
591   // return.
592   return LowerCallResult(Chain, Glue, CallConv, IsVarArg, Ins, dl, DAG,
593                          InVals, OutVals, Callee);
594 }
595 
596 /// Returns true by value, base pointer and offset pointer and addressing
597 /// mode by reference if this node can be combined with a load / store to
598 /// form a post-indexed load / store.
599 bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
600       SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM,
601       SelectionDAG &DAG) const {
602   LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N);
603   if (!LSN)
604     return false;
605   EVT VT = LSN->getMemoryVT();
606   if (!VT.isSimple())
607     return false;
608   bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
609                      VT == MVT::i64 || VT == MVT::f32 || VT == MVT::f64 ||
610                      VT == MVT::v2i16 || VT == MVT::v2i32 || VT == MVT::v4i8 ||
611                      VT == MVT::v4i16 || VT == MVT::v8i8 ||
612                      Subtarget.isHVXVectorType(VT.getSimpleVT());
613   if (!IsLegalType)
614     return false;
615 
616   if (Op->getOpcode() != ISD::ADD)
617     return false;
618   Base = Op->getOperand(0);
619   Offset = Op->getOperand(1);
620   if (!isa<ConstantSDNode>(Offset.getNode()))
621     return false;
622   AM = ISD::POST_INC;
623 
624   int32_t V = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
625   return Subtarget.getInstrInfo()->isValidAutoIncImm(VT, V);
626 }
627 
628 SDValue
629 HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
630   MachineFunction &MF = DAG.getMachineFunction();
631   auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
632   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
633   unsigned LR = HRI.getRARegister();
634 
635   if ((Op.getOpcode() != ISD::INLINEASM &&
636        Op.getOpcode() != ISD::INLINEASM_BR) || HMFI.hasClobberLR())
637     return Op;
638 
639   unsigned NumOps = Op.getNumOperands();
640   if (Op.getOperand(NumOps-1).getValueType() == MVT::Glue)
641     --NumOps;  // Ignore the flag operand.
642 
643   for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
644     unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
645     unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
646     ++i;  // Skip the ID value.
647 
648     switch (InlineAsm::getKind(Flags)) {
649       default:
650         llvm_unreachable("Bad flags!");
651       case InlineAsm::Kind_RegUse:
652       case InlineAsm::Kind_Imm:
653       case InlineAsm::Kind_Mem:
654         i += NumVals;
655         break;
656       case InlineAsm::Kind_Clobber:
657       case InlineAsm::Kind_RegDef:
658       case InlineAsm::Kind_RegDefEarlyClobber: {
659         for (; NumVals; --NumVals, ++i) {
660           unsigned Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
661           if (Reg != LR)
662             continue;
663           HMFI.setHasClobberLR(true);
664           return Op;
665         }
666         break;
667       }
668     }
669   }
670 
671   return Op;
672 }
673 
674 // Need to transform ISD::PREFETCH into something that doesn't inherit
675 // all of the properties of ISD::PREFETCH, specifically SDNPMayLoad and
676 // SDNPMayStore.
677 SDValue HexagonTargetLowering::LowerPREFETCH(SDValue Op,
678                                              SelectionDAG &DAG) const {
679   SDValue Chain = Op.getOperand(0);
680   SDValue Addr = Op.getOperand(1);
681   // Lower it to DCFETCH($reg, #0).  A "pat" will try to merge the offset in,
682   // if the "reg" is fed by an "add".
683   SDLoc DL(Op);
684   SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
685   return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
686 }
687 
688 // Custom-handle ISD::READCYCLECOUNTER because the target-independent SDNode
689 // is marked as having side-effects, while the register read on Hexagon does
690 // not have any. TableGen refuses to accept the direct pattern from that node
691 // to the A4_tfrcpp.
692 SDValue HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
693                                                      SelectionDAG &DAG) const {
694   SDValue Chain = Op.getOperand(0);
695   SDLoc dl(Op);
696   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
697   return DAG.getNode(HexagonISD::READCYCLE, dl, VTs, Chain);
698 }
699 
700 SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
701       SelectionDAG &DAG) const {
702   SDValue Chain = Op.getOperand(0);
703   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
704   // Lower the hexagon_prefetch builtin to DCFETCH, as above.
705   if (IntNo == Intrinsic::hexagon_prefetch) {
706     SDValue Addr = Op.getOperand(2);
707     SDLoc DL(Op);
708     SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
709     return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
710   }
711   return SDValue();
712 }
713 
714 SDValue
715 HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
716                                                SelectionDAG &DAG) const {
717   SDValue Chain = Op.getOperand(0);
718   SDValue Size = Op.getOperand(1);
719   SDValue Align = Op.getOperand(2);
720   SDLoc dl(Op);
721 
722   ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Align);
723   assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC");
724 
725   unsigned A = AlignConst->getSExtValue();
726   auto &HFI = *Subtarget.getFrameLowering();
727   // "Zero" means natural stack alignment.
728   if (A == 0)
729     A = HFI.getStackAlignment();
730 
731   LLVM_DEBUG({
732     dbgs () << __func__ << " Align: " << A << " Size: ";
733     Size.getNode()->dump(&DAG);
734     dbgs() << "\n";
735   });
736 
737   SDValue AC = DAG.getConstant(A, dl, MVT::i32);
738   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
739   SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
740 
741   DAG.ReplaceAllUsesOfValueWith(Op, AA);
742   return AA;
743 }
744 
745 SDValue HexagonTargetLowering::LowerFormalArguments(
746     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
747     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
748     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
749   MachineFunction &MF = DAG.getMachineFunction();
750   MachineFrameInfo &MFI = MF.getFrameInfo();
751   MachineRegisterInfo &MRI = MF.getRegInfo();
752 
753   // Assign locations to all of the incoming arguments.
754   SmallVector<CCValAssign, 16> ArgLocs;
755   HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(),
756                         MF.getFunction().getFunctionType()->getNumParams());
757 
758   if (Subtarget.useHVXOps())
759     CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_HVX);
760   else
761     CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
762 
763   // For LLVM, in the case when returning a struct by value (>8byte),
764   // the first argument is a pointer that points to the location on caller's
765   // stack where the return value will be stored. For Hexagon, the location on
766   // caller's stack is passed only when the struct size is smaller than (and
767   // equal to) 8 bytes. If not, no address will be passed into callee and
768   // callee return the result direclty through R0/R1.
769 
770   auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
771 
772   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
773     CCValAssign &VA = ArgLocs[i];
774     ISD::ArgFlagsTy Flags = Ins[i].Flags;
775     bool ByVal = Flags.isByVal();
776 
777     // Arguments passed in registers:
778     // 1. 32- and 64-bit values and HVX vectors are passed directly,
779     // 2. Large structs are passed via an address, and the address is
780     //    passed in a register.
781     if (VA.isRegLoc() && ByVal && Flags.getByValSize() <= 8)
782       llvm_unreachable("ByValSize must be bigger than 8 bytes");
783 
784     bool InReg = VA.isRegLoc() &&
785                  (!ByVal || (ByVal && Flags.getByValSize() > 8));
786 
787     if (InReg) {
788       MVT RegVT = VA.getLocVT();
789       if (VA.getLocInfo() == CCValAssign::BCvt)
790         RegVT = VA.getValVT();
791 
792       const TargetRegisterClass *RC = getRegClassFor(RegVT);
793       Register VReg = MRI.createVirtualRegister(RC);
794       SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
795 
796       // Treat values of type MVT::i1 specially: they are passed in
797       // registers of type i32, but they need to remain as values of
798       // type i1 for consistency of the argument lowering.
799       if (VA.getValVT() == MVT::i1) {
800         assert(RegVT.getSizeInBits() <= 32);
801         SDValue T = DAG.getNode(ISD::AND, dl, RegVT,
802                                 Copy, DAG.getConstant(1, dl, RegVT));
803         Copy = DAG.getSetCC(dl, MVT::i1, T, DAG.getConstant(0, dl, RegVT),
804                             ISD::SETNE);
805       } else {
806 #ifndef NDEBUG
807         unsigned RegSize = RegVT.getSizeInBits();
808         assert(RegSize == 32 || RegSize == 64 ||
809                Subtarget.isHVXVectorType(RegVT));
810 #endif
811       }
812       InVals.push_back(Copy);
813       MRI.addLiveIn(VA.getLocReg(), VReg);
814     } else {
815       assert(VA.isMemLoc() && "Argument should be passed in memory");
816 
817       // If it's a byval parameter, then we need to compute the
818       // "real" size, not the size of the pointer.
819       unsigned ObjSize = Flags.isByVal()
820                             ? Flags.getByValSize()
821                             : VA.getLocVT().getStoreSizeInBits() / 8;
822 
823       // Create the frame index object for this incoming parameter.
824       int Offset = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
825       int FI = MFI.CreateFixedObject(ObjSize, Offset, true);
826       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
827 
828       if (Flags.isByVal()) {
829         // If it's a pass-by-value aggregate, then do not dereference the stack
830         // location. Instead, we should generate a reference to the stack
831         // location.
832         InVals.push_back(FIN);
833       } else {
834         SDValue L = DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
835                                 MachinePointerInfo::getFixedStack(MF, FI, 0));
836         InVals.push_back(L);
837       }
838     }
839   }
840 
841 
842   if (IsVarArg) {
843     // This will point to the next argument passed via stack.
844     int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
845     int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
846     HMFI.setVarArgsFrameIndex(FI);
847   }
848 
849   return Chain;
850 }
851 
852 SDValue
853 HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
854   // VASTART stores the address of the VarArgsFrameIndex slot into the
855   // memory location argument.
856   MachineFunction &MF = DAG.getMachineFunction();
857   HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
858   SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
859   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
860   return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1),
861                       MachinePointerInfo(SV));
862 }
863 
864 SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
865   const SDLoc &dl(Op);
866   SDValue LHS = Op.getOperand(0);
867   SDValue RHS = Op.getOperand(1);
868   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
869   MVT ResTy = ty(Op);
870   MVT OpTy = ty(LHS);
871 
872   if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
873     MVT ElemTy = OpTy.getVectorElementType();
874     assert(ElemTy.isScalarInteger());
875     MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
876                                   OpTy.getVectorNumElements());
877     return DAG.getSetCC(dl, ResTy,
878                         DAG.getSExtOrTrunc(LHS, SDLoc(LHS), WideTy),
879                         DAG.getSExtOrTrunc(RHS, SDLoc(RHS), WideTy), CC);
880   }
881 
882   // Treat all other vector types as legal.
883   if (ResTy.isVector())
884     return Op;
885 
886   // Comparisons of short integers should use sign-extend, not zero-extend,
887   // since we can represent small negative values in the compare instructions.
888   // The LLVM default is to use zero-extend arbitrarily in these cases.
889   auto isSExtFree = [this](SDValue N) {
890     switch (N.getOpcode()) {
891       case ISD::TRUNCATE: {
892         // A sign-extend of a truncate of a sign-extend is free.
893         SDValue Op = N.getOperand(0);
894         if (Op.getOpcode() != ISD::AssertSext)
895           return false;
896         EVT OrigTy = cast<VTSDNode>(Op.getOperand(1))->getVT();
897         unsigned ThisBW = ty(N).getSizeInBits();
898         unsigned OrigBW = OrigTy.getSizeInBits();
899         // The type that was sign-extended to get the AssertSext must be
900         // narrower than the type of N (so that N has still the same value
901         // as the original).
902         return ThisBW >= OrigBW;
903       }
904       case ISD::LOAD:
905         // We have sign-extended loads.
906         return true;
907     }
908     return false;
909   };
910 
911   if (OpTy == MVT::i8 || OpTy == MVT::i16) {
912     ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
913     bool IsNegative = C && C->getAPIntValue().isNegative();
914     if (IsNegative || isSExtFree(LHS) || isSExtFree(RHS))
915       return DAG.getSetCC(dl, ResTy,
916                           DAG.getSExtOrTrunc(LHS, SDLoc(LHS), MVT::i32),
917                           DAG.getSExtOrTrunc(RHS, SDLoc(RHS), MVT::i32), CC);
918   }
919 
920   return SDValue();
921 }
922 
923 SDValue
924 HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
925   SDValue PredOp = Op.getOperand(0);
926   SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
927   MVT OpTy = ty(Op1);
928   const SDLoc &dl(Op);
929 
930   if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
931     MVT ElemTy = OpTy.getVectorElementType();
932     assert(ElemTy.isScalarInteger());
933     MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
934                                   OpTy.getVectorNumElements());
935     // Generate (trunc (select (_, sext, sext))).
936     return DAG.getSExtOrTrunc(
937               DAG.getSelect(dl, WideTy, PredOp,
938                             DAG.getSExtOrTrunc(Op1, dl, WideTy),
939                             DAG.getSExtOrTrunc(Op2, dl, WideTy)),
940               dl, OpTy);
941   }
942 
943   return SDValue();
944 }
945 
946 static Constant *convert_i1_to_i8(const Constant *ConstVal) {
947   SmallVector<Constant *, 128> NewConst;
948   const ConstantVector *CV = dyn_cast<ConstantVector>(ConstVal);
949   if (!CV)
950     return nullptr;
951 
952   LLVMContext &Ctx = ConstVal->getContext();
953   IRBuilder<> IRB(Ctx);
954   unsigned NumVectorElements = CV->getNumOperands();
955   assert(isPowerOf2_32(NumVectorElements) &&
956          "conversion only supported for pow2 VectorSize!");
957 
958   for (unsigned i = 0; i < NumVectorElements / 8; ++i) {
959     uint8_t x = 0;
960     for (unsigned j = 0; j < 8; ++j) {
961       uint8_t y = CV->getOperand(i * 8 + j)->getUniqueInteger().getZExtValue();
962       x |= y << (7 - j);
963     }
964     assert((x == 0 || x == 255) && "Either all 0's or all 1's expected!");
965     NewConst.push_back(IRB.getInt8(x));
966   }
967   return ConstantVector::get(NewConst);
968 }
969 
970 SDValue
971 HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
972   EVT ValTy = Op.getValueType();
973   ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
974   Constant *CVal = nullptr;
975   bool isVTi1Type = false;
976   if (const Constant *ConstVal = dyn_cast<Constant>(CPN->getConstVal())) {
977     Type *CValTy = ConstVal->getType();
978     if (CValTy->isVectorTy() &&
979         CValTy->getVectorElementType()->isIntegerTy(1)) {
980       CVal = convert_i1_to_i8(ConstVal);
981       isVTi1Type = (CVal != nullptr);
982     }
983   }
984   unsigned Align = CPN->getAlignment();
985   bool IsPositionIndependent = isPositionIndependent();
986   unsigned char TF = IsPositionIndependent ? HexagonII::MO_PCREL : 0;
987 
988   unsigned Offset = 0;
989   SDValue T;
990   if (CPN->isMachineConstantPoolEntry())
991     T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, Offset,
992                                   TF);
993   else if (isVTi1Type)
994     T = DAG.getTargetConstantPool(CVal, ValTy, Align, Offset, TF);
995   else
996     T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, Offset, TF);
997 
998   assert(cast<ConstantPoolSDNode>(T)->getTargetFlags() == TF &&
999          "Inconsistent target flag encountered");
1000 
1001   if (IsPositionIndependent)
1002     return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T);
1003   return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T);
1004 }
1005 
1006 SDValue
1007 HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1008   EVT VT = Op.getValueType();
1009   int Idx = cast<JumpTableSDNode>(Op)->getIndex();
1010   if (isPositionIndependent()) {
1011     SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
1012     return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T);
1013   }
1014 
1015   SDValue T = DAG.getTargetJumpTable(Idx, VT);
1016   return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T);
1017 }
1018 
1019 SDValue
1020 HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
1021   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
1022   MachineFunction &MF = DAG.getMachineFunction();
1023   MachineFrameInfo &MFI = MF.getFrameInfo();
1024   MFI.setReturnAddressIsTaken(true);
1025 
1026   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1027     return SDValue();
1028 
1029   EVT VT = Op.getValueType();
1030   SDLoc dl(Op);
1031   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1032   if (Depth) {
1033     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
1034     SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
1035     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
1036                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
1037                        MachinePointerInfo());
1038   }
1039 
1040   // Return LR, which contains the return address. Mark it an implicit live-in.
1041   unsigned Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32));
1042   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
1043 }
1044 
1045 SDValue
1046 HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
1047   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
1048   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1049   MFI.setFrameAddressIsTaken(true);
1050 
1051   EVT VT = Op.getValueType();
1052   SDLoc dl(Op);
1053   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1054   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
1055                                          HRI.getFrameRegister(), VT);
1056   while (Depth--)
1057     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
1058                             MachinePointerInfo());
1059   return FrameAddr;
1060 }
1061 
1062 SDValue
1063 HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const {
1064   SDLoc dl(Op);
1065   return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
1066 }
1067 
1068 SDValue
1069 HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
1070   SDLoc dl(Op);
1071   auto *GAN = cast<GlobalAddressSDNode>(Op);
1072   auto PtrVT = getPointerTy(DAG.getDataLayout());
1073   auto *GV = GAN->getGlobal();
1074   int64_t Offset = GAN->getOffset();
1075 
1076   auto &HLOF = *HTM.getObjFileLowering();
1077   Reloc::Model RM = HTM.getRelocationModel();
1078 
1079   if (RM == Reloc::Static) {
1080     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
1081     const GlobalObject *GO = GV->getBaseObject();
1082     if (GO && Subtarget.useSmallData() && HLOF.isGlobalInSmallSection(GO, HTM))
1083       return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
1084     return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
1085   }
1086 
1087   bool UsePCRel = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
1088   if (UsePCRel) {
1089     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset,
1090                                             HexagonII::MO_PCREL);
1091     return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA);
1092   }
1093 
1094   // Use GOT index.
1095   SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
1096   SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT);
1097   SDValue Off = DAG.getConstant(Offset, dl, MVT::i32);
1098   return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off);
1099 }
1100 
1101 // Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
1102 SDValue
1103 HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
1104   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1105   SDLoc dl(Op);
1106   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1107 
1108   Reloc::Model RM = HTM.getRelocationModel();
1109   if (RM == Reloc::Static) {
1110     SDValue A = DAG.getTargetBlockAddress(BA, PtrVT);
1111     return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A);
1112   }
1113 
1114   SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL);
1115   return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A);
1116 }
1117 
1118 SDValue
1119 HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
1120       const {
1121   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1122   SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, PtrVT,
1123                                                HexagonII::MO_PCREL);
1124   return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym);
1125 }
1126 
1127 SDValue
1128 HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
1129       GlobalAddressSDNode *GA, SDValue Glue, EVT PtrVT, unsigned ReturnReg,
1130       unsigned char OperandFlags) const {
1131   MachineFunction &MF = DAG.getMachineFunction();
1132   MachineFrameInfo &MFI = MF.getFrameInfo();
1133   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1134   SDLoc dl(GA);
1135   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
1136                                            GA->getValueType(0),
1137                                            GA->getOffset(),
1138                                            OperandFlags);
1139   // Create Operands for the call.The Operands should have the following:
1140   // 1. Chain SDValue
1141   // 2. Callee which in this case is the Global address value.
1142   // 3. Registers live into the call.In this case its R0, as we
1143   //    have just one argument to be passed.
1144   // 4. Glue.
1145   // Note: The order is important.
1146 
1147   const auto &HRI = *Subtarget.getRegisterInfo();
1148   const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallingConv::C);
1149   assert(Mask && "Missing call preserved mask for calling convention");
1150   SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT),
1151                     DAG.getRegisterMask(Mask), Glue };
1152   Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
1153 
1154   // Inform MFI that function has calls.
1155   MFI.setAdjustsStack(true);
1156 
1157   Glue = Chain.getValue(1);
1158   return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
1159 }
1160 
1161 //
1162 // Lower using the intial executable model for TLS addresses
1163 //
1164 SDValue
1165 HexagonTargetLowering::LowerToTLSInitialExecModel(GlobalAddressSDNode *GA,
1166       SelectionDAG &DAG) const {
1167   SDLoc dl(GA);
1168   int64_t Offset = GA->getOffset();
1169   auto PtrVT = getPointerTy(DAG.getDataLayout());
1170 
1171   // Get the thread pointer.
1172   SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
1173 
1174   bool IsPositionIndependent = isPositionIndependent();
1175   unsigned char TF =
1176       IsPositionIndependent ? HexagonII::MO_IEGOT : HexagonII::MO_IE;
1177 
1178   // First generate the TLS symbol address
1179   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT,
1180                                            Offset, TF);
1181 
1182   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1183 
1184   if (IsPositionIndependent) {
1185     // Generate the GOT pointer in case of position independent code
1186     SDValue GOT = LowerGLOBAL_OFFSET_TABLE(Sym, DAG);
1187 
1188     // Add the TLS Symbol address to GOT pointer.This gives
1189     // GOT relative relocation for the symbol.
1190     Sym = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
1191   }
1192 
1193   // Load the offset value for TLS symbol.This offset is relative to
1194   // thread pointer.
1195   SDValue LoadOffset =
1196       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Sym, MachinePointerInfo());
1197 
1198   // Address of the thread local variable is the add of thread
1199   // pointer and the offset of the variable.
1200   return DAG.getNode(ISD::ADD, dl, PtrVT, TP, LoadOffset);
1201 }
1202 
1203 //
1204 // Lower using the local executable model for TLS addresses
1205 //
1206 SDValue
1207 HexagonTargetLowering::LowerToTLSLocalExecModel(GlobalAddressSDNode *GA,
1208       SelectionDAG &DAG) const {
1209   SDLoc dl(GA);
1210   int64_t Offset = GA->getOffset();
1211   auto PtrVT = getPointerTy(DAG.getDataLayout());
1212 
1213   // Get the thread pointer.
1214   SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
1215   // Generate the TLS symbol address
1216   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
1217                                            HexagonII::MO_TPREL);
1218   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1219 
1220   // Address of the thread local variable is the add of thread
1221   // pointer and the offset of the variable.
1222   return DAG.getNode(ISD::ADD, dl, PtrVT, TP, Sym);
1223 }
1224 
1225 //
1226 // Lower using the general dynamic model for TLS addresses
1227 //
1228 SDValue
1229 HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1230       SelectionDAG &DAG) const {
1231   SDLoc dl(GA);
1232   int64_t Offset = GA->getOffset();
1233   auto PtrVT = getPointerTy(DAG.getDataLayout());
1234 
1235   // First generate the TLS symbol address
1236   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
1237                                            HexagonII::MO_GDGOT);
1238 
1239   // Then, generate the GOT pointer
1240   SDValue GOT = LowerGLOBAL_OFFSET_TABLE(TGA, DAG);
1241 
1242   // Add the TLS symbol and the GOT pointer
1243   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1244   SDValue Chain = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
1245 
1246   // Copy over the argument to R0
1247   SDValue InFlag;
1248   Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag);
1249   InFlag = Chain.getValue(1);
1250 
1251   unsigned Flags =
1252       static_cast<const HexagonSubtarget &>(DAG.getSubtarget()).useLongCalls()
1253           ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended
1254           : HexagonII::MO_GDPLT;
1255 
1256   return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT,
1257                            Hexagon::R0, Flags);
1258 }
1259 
1260 //
1261 // Lower TLS addresses.
1262 //
1263 // For now for dynamic models, we only support the general dynamic model.
1264 //
1265 SDValue
1266 HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1267       SelectionDAG &DAG) const {
1268   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1269 
1270   switch (HTM.getTLSModel(GA->getGlobal())) {
1271     case TLSModel::GeneralDynamic:
1272     case TLSModel::LocalDynamic:
1273       return LowerToTLSGeneralDynamicModel(GA, DAG);
1274     case TLSModel::InitialExec:
1275       return LowerToTLSInitialExecModel(GA, DAG);
1276     case TLSModel::LocalExec:
1277       return LowerToTLSLocalExecModel(GA, DAG);
1278   }
1279   llvm_unreachable("Bogus TLS model");
1280 }
1281 
1282 //===----------------------------------------------------------------------===//
1283 // TargetLowering Implementation
1284 //===----------------------------------------------------------------------===//
1285 
1286 HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
1287                                              const HexagonSubtarget &ST)
1288     : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
1289       Subtarget(ST) {
1290   auto &HRI = *Subtarget.getRegisterInfo();
1291 
1292   setPrefLoopAlignment(Align(16));
1293   setMinFunctionAlignment(Align(4));
1294   setPrefFunctionAlignment(Align(16));
1295   setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
1296   setBooleanContents(TargetLoweringBase::UndefinedBooleanContent);
1297   setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent);
1298 
1299   setMaxAtomicSizeInBitsSupported(64);
1300   setMinCmpXchgSizeInBits(32);
1301 
1302   if (EnableHexSDNodeSched)
1303     setSchedulingPreference(Sched::VLIW);
1304   else
1305     setSchedulingPreference(Sched::Source);
1306 
1307   // Limits for inline expansion of memcpy/memmove
1308   MaxStoresPerMemcpy = MaxStoresPerMemcpyCL;
1309   MaxStoresPerMemcpyOptSize = MaxStoresPerMemcpyOptSizeCL;
1310   MaxStoresPerMemmove = MaxStoresPerMemmoveCL;
1311   MaxStoresPerMemmoveOptSize = MaxStoresPerMemmoveOptSizeCL;
1312   MaxStoresPerMemset = MaxStoresPerMemsetCL;
1313   MaxStoresPerMemsetOptSize = MaxStoresPerMemsetOptSizeCL;
1314 
1315   //
1316   // Set up register classes.
1317   //
1318 
1319   addRegisterClass(MVT::i1,    &Hexagon::PredRegsRegClass);
1320   addRegisterClass(MVT::v2i1,  &Hexagon::PredRegsRegClass);  // bbbbaaaa
1321   addRegisterClass(MVT::v4i1,  &Hexagon::PredRegsRegClass);  // ddccbbaa
1322   addRegisterClass(MVT::v8i1,  &Hexagon::PredRegsRegClass);  // hgfedcba
1323   addRegisterClass(MVT::i32,   &Hexagon::IntRegsRegClass);
1324   addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
1325   addRegisterClass(MVT::v4i8,  &Hexagon::IntRegsRegClass);
1326   addRegisterClass(MVT::i64,   &Hexagon::DoubleRegsRegClass);
1327   addRegisterClass(MVT::v8i8,  &Hexagon::DoubleRegsRegClass);
1328   addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
1329   addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
1330 
1331   addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
1332   addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
1333 
1334   //
1335   // Handling of scalar operations.
1336   //
1337   // All operations default to "legal", except:
1338   // - indexed loads and stores (pre-/post-incremented),
1339   // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
1340   //   ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
1341   //   FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP,
1342   //   FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG,
1343   // which default to "expand" for at least one type.
1344 
1345   // Misc operations.
1346   setOperationAction(ISD::ConstantFP,           MVT::f32,   Legal);
1347   setOperationAction(ISD::ConstantFP,           MVT::f64,   Legal);
1348   setOperationAction(ISD::TRAP,                 MVT::Other, Legal);
1349   setOperationAction(ISD::ConstantPool,         MVT::i32,   Custom);
1350   setOperationAction(ISD::JumpTable,            MVT::i32,   Custom);
1351   setOperationAction(ISD::BUILD_PAIR,           MVT::i64,   Expand);
1352   setOperationAction(ISD::SIGN_EXTEND_INREG,    MVT::i1,    Expand);
1353   setOperationAction(ISD::INLINEASM,            MVT::Other, Custom);
1354   setOperationAction(ISD::INLINEASM_BR,         MVT::Other, Custom);
1355   setOperationAction(ISD::PREFETCH,             MVT::Other, Custom);
1356   setOperationAction(ISD::READCYCLECOUNTER,     MVT::i64,   Custom);
1357   setOperationAction(ISD::INTRINSIC_VOID,       MVT::Other, Custom);
1358   setOperationAction(ISD::EH_RETURN,            MVT::Other, Custom);
1359   setOperationAction(ISD::GLOBAL_OFFSET_TABLE,  MVT::i32,   Custom);
1360   setOperationAction(ISD::GlobalTLSAddress,     MVT::i32,   Custom);
1361   setOperationAction(ISD::ATOMIC_FENCE,         MVT::Other, Custom);
1362 
1363   // Custom legalize GlobalAddress nodes into CONST32.
1364   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
1365   setOperationAction(ISD::GlobalAddress, MVT::i8,  Custom);
1366   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
1367 
1368   // Hexagon needs to optimize cases with negative constants.
1369   setOperationAction(ISD::SETCC, MVT::i8,    Custom);
1370   setOperationAction(ISD::SETCC, MVT::i16,   Custom);
1371   setOperationAction(ISD::SETCC, MVT::v4i8,  Custom);
1372   setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
1373 
1374   // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
1375   setOperationAction(ISD::VASTART, MVT::Other, Custom);
1376   setOperationAction(ISD::VAEND,   MVT::Other, Expand);
1377   setOperationAction(ISD::VAARG,   MVT::Other, Expand);
1378   setOperationAction(ISD::VACOPY,  MVT::Other, Expand);
1379 
1380   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
1381   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
1382   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
1383 
1384   if (EmitJumpTables)
1385     setMinimumJumpTableEntries(MinimumJumpTables);
1386   else
1387     setMinimumJumpTableEntries(std::numeric_limits<unsigned>::max());
1388   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
1389 
1390   setOperationAction(ISD::ABS, MVT::i32, Legal);
1391   setOperationAction(ISD::ABS, MVT::i64, Legal);
1392 
1393   // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
1394   // but they only operate on i64.
1395   for (MVT VT : MVT::integer_valuetypes()) {
1396     setOperationAction(ISD::UADDO,    VT, Custom);
1397     setOperationAction(ISD::USUBO,    VT, Custom);
1398     setOperationAction(ISD::SADDO,    VT, Expand);
1399     setOperationAction(ISD::SSUBO,    VT, Expand);
1400     setOperationAction(ISD::ADDCARRY, VT, Expand);
1401     setOperationAction(ISD::SUBCARRY, VT, Expand);
1402   }
1403   setOperationAction(ISD::ADDCARRY, MVT::i64, Custom);
1404   setOperationAction(ISD::SUBCARRY, MVT::i64, Custom);
1405 
1406   setOperationAction(ISD::CTLZ, MVT::i8,  Promote);
1407   setOperationAction(ISD::CTLZ, MVT::i16, Promote);
1408   setOperationAction(ISD::CTTZ, MVT::i8,  Promote);
1409   setOperationAction(ISD::CTTZ, MVT::i16, Promote);
1410 
1411   // Popcount can count # of 1s in i64 but returns i32.
1412   setOperationAction(ISD::CTPOP, MVT::i8,  Promote);
1413   setOperationAction(ISD::CTPOP, MVT::i16, Promote);
1414   setOperationAction(ISD::CTPOP, MVT::i32, Promote);
1415   setOperationAction(ISD::CTPOP, MVT::i64, Legal);
1416 
1417   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
1418   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
1419   setOperationAction(ISD::BSWAP, MVT::i32, Legal);
1420   setOperationAction(ISD::BSWAP, MVT::i64, Legal);
1421 
1422   setOperationAction(ISD::FSHL, MVT::i32, Legal);
1423   setOperationAction(ISD::FSHL, MVT::i64, Legal);
1424   setOperationAction(ISD::FSHR, MVT::i32, Legal);
1425   setOperationAction(ISD::FSHR, MVT::i64, Legal);
1426 
1427   for (unsigned IntExpOp :
1428        {ISD::SDIV,      ISD::UDIV,      ISD::SREM,      ISD::UREM,
1429         ISD::SDIVREM,   ISD::UDIVREM,   ISD::ROTL,      ISD::ROTR,
1430         ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
1431         ISD::SMUL_LOHI, ISD::UMUL_LOHI}) {
1432     for (MVT VT : MVT::integer_valuetypes())
1433       setOperationAction(IntExpOp, VT, Expand);
1434   }
1435 
1436   for (unsigned FPExpOp :
1437        {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
1438         ISD::FPOW, ISD::FCOPYSIGN}) {
1439     for (MVT VT : MVT::fp_valuetypes())
1440       setOperationAction(FPExpOp, VT, Expand);
1441   }
1442 
1443   // No extending loads from i32.
1444   for (MVT VT : MVT::integer_valuetypes()) {
1445     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
1446     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
1447     setLoadExtAction(ISD::EXTLOAD,  VT, MVT::i32, Expand);
1448   }
1449   // Turn FP truncstore into trunc + store.
1450   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1451   // Turn FP extload into load/fpextend.
1452   for (MVT VT : MVT::fp_valuetypes())
1453     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1454 
1455   // Expand BR_CC and SELECT_CC for all integer and fp types.
1456   for (MVT VT : MVT::integer_valuetypes()) {
1457     setOperationAction(ISD::BR_CC,     VT, Expand);
1458     setOperationAction(ISD::SELECT_CC, VT, Expand);
1459   }
1460   for (MVT VT : MVT::fp_valuetypes()) {
1461     setOperationAction(ISD::BR_CC,     VT, Expand);
1462     setOperationAction(ISD::SELECT_CC, VT, Expand);
1463   }
1464   setOperationAction(ISD::BR_CC, MVT::Other, Expand);
1465 
1466   //
1467   // Handling of vector operations.
1468   //
1469 
1470   // Set the action for vector operations to "expand", then override it with
1471   // either "custom" or "legal" for specific cases.
1472   static const unsigned VectExpOps[] = {
1473     // Integer arithmetic:
1474     ISD::ADD,     ISD::SUB,     ISD::MUL,     ISD::SDIV,      ISD::UDIV,
1475     ISD::SREM,    ISD::UREM,    ISD::SDIVREM, ISD::UDIVREM,   ISD::SADDO,
1476     ISD::UADDO,   ISD::SSUBO,   ISD::USUBO,   ISD::SMUL_LOHI, ISD::UMUL_LOHI,
1477     // Logical/bit:
1478     ISD::AND,     ISD::OR,      ISD::XOR,     ISD::ROTL,    ISD::ROTR,
1479     ISD::CTPOP,   ISD::CTLZ,    ISD::CTTZ,
1480     // Floating point arithmetic/math functions:
1481     ISD::FADD,    ISD::FSUB,    ISD::FMUL,    ISD::FMA,     ISD::FDIV,
1482     ISD::FREM,    ISD::FNEG,    ISD::FABS,    ISD::FSQRT,   ISD::FSIN,
1483     ISD::FCOS,    ISD::FPOW,    ISD::FLOG,    ISD::FLOG2,
1484     ISD::FLOG10,  ISD::FEXP,    ISD::FEXP2,   ISD::FCEIL,   ISD::FTRUNC,
1485     ISD::FRINT,   ISD::FNEARBYINT,            ISD::FROUND,  ISD::FFLOOR,
1486     ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS,
1487     // Misc:
1488     ISD::BR_CC,   ISD::SELECT_CC,             ISD::ConstantPool,
1489     // Vector:
1490     ISD::BUILD_VECTOR,          ISD::SCALAR_TO_VECTOR,
1491     ISD::EXTRACT_VECTOR_ELT,    ISD::INSERT_VECTOR_ELT,
1492     ISD::EXTRACT_SUBVECTOR,     ISD::INSERT_SUBVECTOR,
1493     ISD::CONCAT_VECTORS,        ISD::VECTOR_SHUFFLE
1494   };
1495 
1496   for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1497     for (unsigned VectExpOp : VectExpOps)
1498       setOperationAction(VectExpOp, VT, Expand);
1499 
1500     // Expand all extending loads and truncating stores:
1501     for (MVT TargetVT : MVT::fixedlen_vector_valuetypes()) {
1502       if (TargetVT == VT)
1503         continue;
1504       setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand);
1505       setLoadExtAction(ISD::ZEXTLOAD, TargetVT, VT, Expand);
1506       setLoadExtAction(ISD::SEXTLOAD, TargetVT, VT, Expand);
1507       setTruncStoreAction(VT, TargetVT, Expand);
1508     }
1509 
1510     // Normalize all inputs to SELECT to be vectors of i32.
1511     if (VT.getVectorElementType() != MVT::i32) {
1512       MVT VT32 = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
1513       setOperationAction(ISD::SELECT, VT, Promote);
1514       AddPromotedToType(ISD::SELECT, VT, VT32);
1515     }
1516     setOperationAction(ISD::SRA, VT, Custom);
1517     setOperationAction(ISD::SHL, VT, Custom);
1518     setOperationAction(ISD::SRL, VT, Custom);
1519   }
1520 
1521   // Extending loads from (native) vectors of i8 into (native) vectors of i16
1522   // are legal.
1523   setLoadExtAction(ISD::EXTLOAD,  MVT::v2i16, MVT::v2i8, Legal);
1524   setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
1525   setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
1526   setLoadExtAction(ISD::EXTLOAD,  MVT::v4i16, MVT::v4i8, Legal);
1527   setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
1528   setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
1529 
1530   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8,  Legal);
1531   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1532   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1533 
1534   // Types natively supported:
1535   for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8,
1536                        MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
1537     setOperationAction(ISD::BUILD_VECTOR,       NativeVT, Custom);
1538     setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
1539     setOperationAction(ISD::INSERT_VECTOR_ELT,  NativeVT, Custom);
1540     setOperationAction(ISD::EXTRACT_SUBVECTOR,  NativeVT, Custom);
1541     setOperationAction(ISD::INSERT_SUBVECTOR,   NativeVT, Custom);
1542     setOperationAction(ISD::CONCAT_VECTORS,     NativeVT, Custom);
1543 
1544     setOperationAction(ISD::ADD, NativeVT, Legal);
1545     setOperationAction(ISD::SUB, NativeVT, Legal);
1546     setOperationAction(ISD::MUL, NativeVT, Legal);
1547     setOperationAction(ISD::AND, NativeVT, Legal);
1548     setOperationAction(ISD::OR,  NativeVT, Legal);
1549     setOperationAction(ISD::XOR, NativeVT, Legal);
1550   }
1551 
1552   // Custom lower unaligned loads.
1553   // Also, for both loads and stores, verify the alignment of the address
1554   // in case it is a compile-time constant. This is a usability feature to
1555   // provide a meaningful error message to users.
1556   for (MVT VT : {MVT::i16, MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
1557                  MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
1558     setOperationAction(ISD::LOAD,  VT, Custom);
1559     setOperationAction(ISD::STORE, VT, Custom);
1560   }
1561 
1562   for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v8i8, MVT::v2i32, MVT::v4i16,
1563                  MVT::v2i32}) {
1564     setCondCodeAction(ISD::SETNE,  VT, Expand);
1565     setCondCodeAction(ISD::SETLE,  VT, Expand);
1566     setCondCodeAction(ISD::SETGE,  VT, Expand);
1567     setCondCodeAction(ISD::SETLT,  VT, Expand);
1568     setCondCodeAction(ISD::SETULE, VT, Expand);
1569     setCondCodeAction(ISD::SETUGE, VT, Expand);
1570     setCondCodeAction(ISD::SETULT, VT, Expand);
1571   }
1572 
1573   // Custom-lower bitcasts from i8 to v8i1.
1574   setOperationAction(ISD::BITCAST,        MVT::i8,    Custom);
1575   setOperationAction(ISD::SETCC,          MVT::v2i16, Custom);
1576   setOperationAction(ISD::VSELECT,        MVT::v4i8,  Custom);
1577   setOperationAction(ISD::VSELECT,        MVT::v2i16, Custom);
1578   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8,  Custom);
1579   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
1580   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8,  Custom);
1581 
1582   // V5+.
1583   setOperationAction(ISD::FMA,  MVT::f64, Expand);
1584   setOperationAction(ISD::FADD, MVT::f64, Expand);
1585   setOperationAction(ISD::FSUB, MVT::f64, Expand);
1586   setOperationAction(ISD::FMUL, MVT::f64, Expand);
1587 
1588   setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1589   setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1590 
1591   setOperationAction(ISD::FP_TO_UINT, MVT::i1,  Promote);
1592   setOperationAction(ISD::FP_TO_UINT, MVT::i8,  Promote);
1593   setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
1594   setOperationAction(ISD::FP_TO_SINT, MVT::i1,  Promote);
1595   setOperationAction(ISD::FP_TO_SINT, MVT::i8,  Promote);
1596   setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
1597   setOperationAction(ISD::UINT_TO_FP, MVT::i1,  Promote);
1598   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
1599   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
1600   setOperationAction(ISD::SINT_TO_FP, MVT::i1,  Promote);
1601   setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
1602   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
1603 
1604   // Handling of indexed loads/stores: default is "expand".
1605   //
1606   for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::f32, MVT::f64,
1607                  MVT::v2i16, MVT::v2i32, MVT::v4i8, MVT::v4i16, MVT::v8i8}) {
1608     setIndexedLoadAction(ISD::POST_INC, VT, Legal);
1609     setIndexedStoreAction(ISD::POST_INC, VT, Legal);
1610   }
1611 
1612   // Subtarget-specific operation actions.
1613   //
1614   if (Subtarget.hasV60Ops()) {
1615     setOperationAction(ISD::ROTL, MVT::i32, Legal);
1616     setOperationAction(ISD::ROTL, MVT::i64, Legal);
1617     setOperationAction(ISD::ROTR, MVT::i32, Legal);
1618     setOperationAction(ISD::ROTR, MVT::i64, Legal);
1619   }
1620   if (Subtarget.hasV66Ops()) {
1621     setOperationAction(ISD::FADD, MVT::f64, Legal);
1622     setOperationAction(ISD::FSUB, MVT::f64, Legal);
1623   }
1624 
1625   setTargetDAGCombine(ISD::VSELECT);
1626 
1627   if (Subtarget.useHVXOps())
1628     initializeHVXLowering();
1629 
1630   computeRegisterProperties(&HRI);
1631 
1632   //
1633   // Library calls for unsupported operations
1634   //
1635   bool FastMath  = EnableFastMath;
1636 
1637   setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
1638   setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
1639   setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
1640   setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
1641   setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
1642   setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
1643   setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
1644   setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
1645 
1646   setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
1647   setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
1648   setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
1649   setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
1650   setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
1651   setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
1652 
1653   // This is the only fast library function for sqrtd.
1654   if (FastMath)
1655     setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2");
1656 
1657   // Prefix is: nothing  for "slow-math",
1658   //            "fast2_" for V5+ fast-math double-precision
1659   // (actually, keep fast-math and fast-math2 separate for now)
1660   if (FastMath) {
1661     setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3");
1662     setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3");
1663     setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3");
1664     setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3");
1665     setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3");
1666   } else {
1667     setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
1668     setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
1669     setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
1670     setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
1671     setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
1672   }
1673 
1674   if (FastMath)
1675     setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
1676   else
1677     setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
1678 
1679   // These cause problems when the shift amount is non-constant.
1680   setLibcallName(RTLIB::SHL_I128, nullptr);
1681   setLibcallName(RTLIB::SRL_I128, nullptr);
1682   setLibcallName(RTLIB::SRA_I128, nullptr);
1683 }
1684 
1685 const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
1686   switch ((HexagonISD::NodeType)Opcode) {
1687   case HexagonISD::ADDC:          return "HexagonISD::ADDC";
1688   case HexagonISD::SUBC:          return "HexagonISD::SUBC";
1689   case HexagonISD::ALLOCA:        return "HexagonISD::ALLOCA";
1690   case HexagonISD::AT_GOT:        return "HexagonISD::AT_GOT";
1691   case HexagonISD::AT_PCREL:      return "HexagonISD::AT_PCREL";
1692   case HexagonISD::BARRIER:       return "HexagonISD::BARRIER";
1693   case HexagonISD::CALL:          return "HexagonISD::CALL";
1694   case HexagonISD::CALLnr:        return "HexagonISD::CALLnr";
1695   case HexagonISD::CALLR:         return "HexagonISD::CALLR";
1696   case HexagonISD::COMBINE:       return "HexagonISD::COMBINE";
1697   case HexagonISD::CONST32_GP:    return "HexagonISD::CONST32_GP";
1698   case HexagonISD::CONST32:       return "HexagonISD::CONST32";
1699   case HexagonISD::CP:            return "HexagonISD::CP";
1700   case HexagonISD::DCFETCH:       return "HexagonISD::DCFETCH";
1701   case HexagonISD::EH_RETURN:     return "HexagonISD::EH_RETURN";
1702   case HexagonISD::TSTBIT:        return "HexagonISD::TSTBIT";
1703   case HexagonISD::EXTRACTU:      return "HexagonISD::EXTRACTU";
1704   case HexagonISD::INSERT:        return "HexagonISD::INSERT";
1705   case HexagonISD::JT:            return "HexagonISD::JT";
1706   case HexagonISD::RET_FLAG:      return "HexagonISD::RET_FLAG";
1707   case HexagonISD::TC_RETURN:     return "HexagonISD::TC_RETURN";
1708   case HexagonISD::VASL:          return "HexagonISD::VASL";
1709   case HexagonISD::VASR:          return "HexagonISD::VASR";
1710   case HexagonISD::VLSR:          return "HexagonISD::VLSR";
1711   case HexagonISD::VSPLAT:        return "HexagonISD::VSPLAT";
1712   case HexagonISD::VEXTRACTW:     return "HexagonISD::VEXTRACTW";
1713   case HexagonISD::VINSERTW0:     return "HexagonISD::VINSERTW0";
1714   case HexagonISD::VROR:          return "HexagonISD::VROR";
1715   case HexagonISD::READCYCLE:     return "HexagonISD::READCYCLE";
1716   case HexagonISD::PTRUE:         return "HexagonISD::PTRUE";
1717   case HexagonISD::PFALSE:        return "HexagonISD::PFALSE";
1718   case HexagonISD::VZERO:         return "HexagonISD::VZERO";
1719   case HexagonISD::VSPLATW:       return "HexagonISD::VSPLATW";
1720   case HexagonISD::D2P:           return "HexagonISD::D2P";
1721   case HexagonISD::P2D:           return "HexagonISD::P2D";
1722   case HexagonISD::V2Q:           return "HexagonISD::V2Q";
1723   case HexagonISD::Q2V:           return "HexagonISD::Q2V";
1724   case HexagonISD::QCAT:          return "HexagonISD::QCAT";
1725   case HexagonISD::QTRUE:         return "HexagonISD::QTRUE";
1726   case HexagonISD::QFALSE:        return "HexagonISD::QFALSE";
1727   case HexagonISD::TYPECAST:      return "HexagonISD::TYPECAST";
1728   case HexagonISD::VALIGN:        return "HexagonISD::VALIGN";
1729   case HexagonISD::VALIGNADDR:    return "HexagonISD::VALIGNADDR";
1730   case HexagonISD::OP_END:        break;
1731   }
1732   return nullptr;
1733 }
1734 
1735 void
1736 HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr, const SDLoc &dl,
1737       unsigned NeedAlign) const {
1738   auto *CA = dyn_cast<ConstantSDNode>(Ptr);
1739   if (!CA)
1740     return;
1741   unsigned Addr = CA->getZExtValue();
1742   unsigned HaveAlign = Addr != 0 ? 1u << countTrailingZeros(Addr) : NeedAlign;
1743   if (HaveAlign < NeedAlign) {
1744     std::string ErrMsg;
1745     raw_string_ostream O(ErrMsg);
1746     O << "Misaligned constant address: " << format_hex(Addr, 10)
1747       << " has alignment " << HaveAlign
1748       << ", but the memory access requires " << NeedAlign;
1749     if (DebugLoc DL = dl.getDebugLoc())
1750       DL.print(O << ", at ");
1751     report_fatal_error(O.str());
1752   }
1753 }
1754 
1755 // Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
1756 // intrinsic.
1757 static bool isBrevLdIntrinsic(const Value *Inst) {
1758   unsigned ID = cast<IntrinsicInst>(Inst)->getIntrinsicID();
1759   return (ID == Intrinsic::hexagon_L2_loadrd_pbr ||
1760           ID == Intrinsic::hexagon_L2_loadri_pbr ||
1761           ID == Intrinsic::hexagon_L2_loadrh_pbr ||
1762           ID == Intrinsic::hexagon_L2_loadruh_pbr ||
1763           ID == Intrinsic::hexagon_L2_loadrb_pbr ||
1764           ID == Intrinsic::hexagon_L2_loadrub_pbr);
1765 }
1766 
1767 // Bit-reverse Load Intrinsic :Crawl up and figure out the object from previous
1768 // instruction. So far we only handle bitcast, extract value and bit reverse
1769 // load intrinsic instructions. Should we handle CGEP ?
1770 static Value *getBrevLdObject(Value *V) {
1771   if (Operator::getOpcode(V) == Instruction::ExtractValue ||
1772       Operator::getOpcode(V) == Instruction::BitCast)
1773     V = cast<Operator>(V)->getOperand(0);
1774   else if (isa<IntrinsicInst>(V) && isBrevLdIntrinsic(V))
1775     V = cast<Instruction>(V)->getOperand(0);
1776   return V;
1777 }
1778 
1779 // Bit-reverse Load Intrinsic: For a PHI Node return either an incoming edge or
1780 // a back edge. If the back edge comes from the intrinsic itself, the incoming
1781 // edge is returned.
1782 static Value *returnEdge(const PHINode *PN, Value *IntrBaseVal) {
1783   const BasicBlock *Parent = PN->getParent();
1784   int Idx = -1;
1785   for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
1786     BasicBlock *Blk = PN->getIncomingBlock(i);
1787     // Determine if the back edge is originated from intrinsic.
1788     if (Blk == Parent) {
1789       Value *BackEdgeVal = PN->getIncomingValue(i);
1790       Value *BaseVal;
1791       // Loop over till we return the same Value or we hit the IntrBaseVal.
1792       do {
1793         BaseVal = BackEdgeVal;
1794         BackEdgeVal = getBrevLdObject(BackEdgeVal);
1795       } while ((BaseVal != BackEdgeVal) && (IntrBaseVal != BackEdgeVal));
1796       // If the getBrevLdObject returns IntrBaseVal, we should return the
1797       // incoming edge.
1798       if (IntrBaseVal == BackEdgeVal)
1799         continue;
1800       Idx = i;
1801       break;
1802     } else // Set the node to incoming edge.
1803       Idx = i;
1804   }
1805   assert(Idx >= 0 && "Unexpected index to incoming argument in PHI");
1806   return PN->getIncomingValue(Idx);
1807 }
1808 
1809 // Bit-reverse Load Intrinsic: Figure out the underlying object the base
1810 // pointer points to, for the bit-reverse load intrinsic. Setting this to
1811 // memoperand might help alias analysis to figure out the dependencies.
1812 static Value *getUnderLyingObjectForBrevLdIntr(Value *V) {
1813   Value *IntrBaseVal = V;
1814   Value *BaseVal;
1815   // Loop over till we return the same Value, implies we either figure out
1816   // the object or we hit a PHI
1817   do {
1818     BaseVal = V;
1819     V = getBrevLdObject(V);
1820   } while (BaseVal != V);
1821 
1822   // Identify the object from PHINode.
1823   if (const PHINode *PN = dyn_cast<PHINode>(V))
1824     return returnEdge(PN, IntrBaseVal);
1825   // For non PHI nodes, the object is the last value returned by getBrevLdObject
1826   else
1827     return V;
1828 }
1829 
1830 /// Given an intrinsic, checks if on the target the intrinsic will need to map
1831 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1832 /// true and store the intrinsic information into the IntrinsicInfo that was
1833 /// passed to the function.
1834 bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1835                                                const CallInst &I,
1836                                                MachineFunction &MF,
1837                                                unsigned Intrinsic) const {
1838   switch (Intrinsic) {
1839   case Intrinsic::hexagon_L2_loadrd_pbr:
1840   case Intrinsic::hexagon_L2_loadri_pbr:
1841   case Intrinsic::hexagon_L2_loadrh_pbr:
1842   case Intrinsic::hexagon_L2_loadruh_pbr:
1843   case Intrinsic::hexagon_L2_loadrb_pbr:
1844   case Intrinsic::hexagon_L2_loadrub_pbr: {
1845     Info.opc = ISD::INTRINSIC_W_CHAIN;
1846     auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
1847     auto &Cont = I.getCalledFunction()->getParent()->getContext();
1848     // The intrinsic function call is of the form { ElTy, i8* }
1849     // @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
1850     // should be derived from ElTy.
1851     Type *ElTy = I.getCalledFunction()->getReturnType()->getStructElementType(0);
1852     Info.memVT = MVT::getVT(ElTy);
1853     llvm::Value *BasePtrVal = I.getOperand(0);
1854     Info.ptrVal = getUnderLyingObjectForBrevLdIntr(BasePtrVal);
1855     // The offset value comes through Modifier register. For now, assume the
1856     // offset is 0.
1857     Info.offset = 0;
1858     Info.align =
1859         MaybeAlign(DL.getABITypeAlignment(Info.memVT.getTypeForEVT(Cont)));
1860     Info.flags = MachineMemOperand::MOLoad;
1861     return true;
1862   }
1863   case Intrinsic::hexagon_V6_vgathermw:
1864   case Intrinsic::hexagon_V6_vgathermw_128B:
1865   case Intrinsic::hexagon_V6_vgathermh:
1866   case Intrinsic::hexagon_V6_vgathermh_128B:
1867   case Intrinsic::hexagon_V6_vgathermhw:
1868   case Intrinsic::hexagon_V6_vgathermhw_128B:
1869   case Intrinsic::hexagon_V6_vgathermwq:
1870   case Intrinsic::hexagon_V6_vgathermwq_128B:
1871   case Intrinsic::hexagon_V6_vgathermhq:
1872   case Intrinsic::hexagon_V6_vgathermhq_128B:
1873   case Intrinsic::hexagon_V6_vgathermhwq:
1874   case Intrinsic::hexagon_V6_vgathermhwq_128B: {
1875     const Module &M = *I.getParent()->getParent()->getParent();
1876     Info.opc = ISD::INTRINSIC_W_CHAIN;
1877     Type *VecTy = I.getArgOperand(1)->getType();
1878     Info.memVT = MVT::getVT(VecTy);
1879     Info.ptrVal = I.getArgOperand(0);
1880     Info.offset = 0;
1881     Info.align =
1882         MaybeAlign(M.getDataLayout().getTypeAllocSizeInBits(VecTy) / 8);
1883     Info.flags = MachineMemOperand::MOLoad |
1884                  MachineMemOperand::MOStore |
1885                  MachineMemOperand::MOVolatile;
1886     return true;
1887   }
1888   default:
1889     break;
1890   }
1891   return false;
1892 }
1893 
1894 bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
1895   return X.getValueType().isScalarInteger(); // 'tstbit'
1896 }
1897 
1898 bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
1899   return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
1900 }
1901 
1902 bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
1903   if (!VT1.isSimple() || !VT2.isSimple())
1904     return false;
1905   return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32;
1906 }
1907 
1908 bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(
1909     const MachineFunction &MF, EVT VT) const {
1910   return isOperationLegalOrCustom(ISD::FMA, VT);
1911 }
1912 
1913 // Should we expand the build vector with shuffles?
1914 bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
1915       unsigned DefinedValues) const {
1916   return false;
1917 }
1918 
1919 bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask,
1920                                                EVT VT) const {
1921   return true;
1922 }
1923 
1924 TargetLoweringBase::LegalizeTypeAction
1925 HexagonTargetLowering::getPreferredVectorAction(MVT VT) const {
1926   unsigned VecLen = VT.getVectorNumElements();
1927   MVT ElemTy = VT.getVectorElementType();
1928 
1929   if (VecLen == 1 || VT.isScalableVector())
1930     return TargetLoweringBase::TypeScalarizeVector;
1931 
1932   if (Subtarget.useHVXOps()) {
1933     unsigned HwLen = Subtarget.getVectorLength();
1934     // If the size of VT is at least half of the vector length,
1935     // widen the vector. Note: the threshold was not selected in
1936     // any scientific way.
1937     ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
1938     if (llvm::find(Tys, ElemTy) != Tys.end()) {
1939       unsigned HwWidth = 8*HwLen;
1940       unsigned VecWidth = VT.getSizeInBits();
1941       if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
1942         return TargetLoweringBase::TypeWidenVector;
1943     }
1944     // Split vectors of i1 that correspond to (byte) vector pairs.
1945     if (ElemTy == MVT::i1 && VecLen == 2*HwLen)
1946       return TargetLoweringBase::TypeSplitVector;
1947   }
1948 
1949   // Always widen (remaining) vectors of i1.
1950   if (ElemTy == MVT::i1)
1951     return TargetLoweringBase::TypeWidenVector;
1952 
1953   return TargetLoweringBase::TypeSplitVector;
1954 }
1955 
1956 std::pair<SDValue, int>
1957 HexagonTargetLowering::getBaseAndOffset(SDValue Addr) const {
1958   if (Addr.getOpcode() == ISD::ADD) {
1959     SDValue Op1 = Addr.getOperand(1);
1960     if (auto *CN = dyn_cast<const ConstantSDNode>(Op1.getNode()))
1961       return { Addr.getOperand(0), CN->getSExtValue() };
1962   }
1963   return { Addr, 0 };
1964 }
1965 
1966 // Lower a vector shuffle (V1, V2, V3).  V1 and V2 are the two vectors
1967 // to select data from, V3 is the permutation.
1968 SDValue
1969 HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
1970       const {
1971   const auto *SVN = cast<ShuffleVectorSDNode>(Op);
1972   ArrayRef<int> AM = SVN->getMask();
1973   assert(AM.size() <= 8 && "Unexpected shuffle mask");
1974   unsigned VecLen = AM.size();
1975 
1976   MVT VecTy = ty(Op);
1977   assert(!Subtarget.isHVXVectorType(VecTy, true) &&
1978          "HVX shuffles should be legal");
1979   assert(VecTy.getSizeInBits() <= 64 && "Unexpected vector length");
1980 
1981   SDValue Op0 = Op.getOperand(0);
1982   SDValue Op1 = Op.getOperand(1);
1983   const SDLoc &dl(Op);
1984 
1985   // If the inputs are not the same as the output, bail. This is not an
1986   // error situation, but complicates the handling and the default expansion
1987   // (into BUILD_VECTOR) should be adequate.
1988   if (ty(Op0) != VecTy || ty(Op1) != VecTy)
1989     return SDValue();
1990 
1991   // Normalize the mask so that the first non-negative index comes from
1992   // the first operand.
1993   SmallVector<int,8> Mask(AM.begin(), AM.end());
1994   unsigned F = llvm::find_if(AM, [](int M) { return M >= 0; }) - AM.data();
1995   if (F == AM.size())
1996     return DAG.getUNDEF(VecTy);
1997   if (AM[F] >= int(VecLen)) {
1998     ShuffleVectorSDNode::commuteMask(Mask);
1999     std::swap(Op0, Op1);
2000   }
2001 
2002   // Express the shuffle mask in terms of bytes.
2003   SmallVector<int,8> ByteMask;
2004   unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8;
2005   for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2006     int M = Mask[i];
2007     if (M < 0) {
2008       for (unsigned j = 0; j != ElemBytes; ++j)
2009         ByteMask.push_back(-1);
2010     } else {
2011       for (unsigned j = 0; j != ElemBytes; ++j)
2012         ByteMask.push_back(M*ElemBytes + j);
2013     }
2014   }
2015   assert(ByteMask.size() <= 8);
2016 
2017   // All non-undef (non-negative) indexes are well within [0..127], so they
2018   // fit in a single byte. Build two 64-bit words:
2019   // - MaskIdx where each byte is the corresponding index (for non-negative
2020   //   indexes), and 0xFF for negative indexes, and
2021   // - MaskUnd that has 0xFF for each negative index.
2022   uint64_t MaskIdx = 0;
2023   uint64_t MaskUnd = 0;
2024   for (unsigned i = 0, e = ByteMask.size(); i != e; ++i) {
2025     unsigned S = 8*i;
2026     uint64_t M = ByteMask[i] & 0xFF;
2027     if (M == 0xFF)
2028       MaskUnd |= M << S;
2029     MaskIdx |= M << S;
2030   }
2031 
2032   if (ByteMask.size() == 4) {
2033     // Identity.
2034     if (MaskIdx == (0x03020100 | MaskUnd))
2035       return Op0;
2036     // Byte swap.
2037     if (MaskIdx == (0x00010203 | MaskUnd)) {
2038       SDValue T0 = DAG.getBitcast(MVT::i32, Op0);
2039       SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i32, T0);
2040       return DAG.getBitcast(VecTy, T1);
2041     }
2042 
2043     // Byte packs.
2044     SDValue Concat10 = DAG.getNode(HexagonISD::COMBINE, dl,
2045                                    typeJoin({ty(Op1), ty(Op0)}), {Op1, Op0});
2046     if (MaskIdx == (0x06040200 | MaskUnd))
2047       return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat10}, DAG);
2048     if (MaskIdx == (0x07050301 | MaskUnd))
2049       return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat10}, DAG);
2050 
2051     SDValue Concat01 = DAG.getNode(HexagonISD::COMBINE, dl,
2052                                    typeJoin({ty(Op0), ty(Op1)}), {Op0, Op1});
2053     if (MaskIdx == (0x02000604 | MaskUnd))
2054       return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat01}, DAG);
2055     if (MaskIdx == (0x03010705 | MaskUnd))
2056       return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat01}, DAG);
2057   }
2058 
2059   if (ByteMask.size() == 8) {
2060     // Identity.
2061     if (MaskIdx == (0x0706050403020100ull | MaskUnd))
2062       return Op0;
2063     // Byte swap.
2064     if (MaskIdx == (0x0001020304050607ull | MaskUnd)) {
2065       SDValue T0 = DAG.getBitcast(MVT::i64, Op0);
2066       SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i64, T0);
2067       return DAG.getBitcast(VecTy, T1);
2068     }
2069 
2070     // Halfword picks.
2071     if (MaskIdx == (0x0d0c050409080100ull | MaskUnd))
2072       return getInstr(Hexagon::S2_shuffeh, dl, VecTy, {Op1, Op0}, DAG);
2073     if (MaskIdx == (0x0f0e07060b0a0302ull | MaskUnd))
2074       return getInstr(Hexagon::S2_shuffoh, dl, VecTy, {Op1, Op0}, DAG);
2075     if (MaskIdx == (0x0d0c090805040100ull | MaskUnd))
2076       return getInstr(Hexagon::S2_vtrunewh, dl, VecTy, {Op1, Op0}, DAG);
2077     if (MaskIdx == (0x0f0e0b0a07060302ull | MaskUnd))
2078       return getInstr(Hexagon::S2_vtrunowh, dl, VecTy, {Op1, Op0}, DAG);
2079     if (MaskIdx == (0x0706030205040100ull | MaskUnd)) {
2080       VectorPair P = opSplit(Op0, dl, DAG);
2081       return getInstr(Hexagon::S2_packhl, dl, VecTy, {P.second, P.first}, DAG);
2082     }
2083 
2084     // Byte packs.
2085     if (MaskIdx == (0x0e060c040a020800ull | MaskUnd))
2086       return getInstr(Hexagon::S2_shuffeb, dl, VecTy, {Op1, Op0}, DAG);
2087     if (MaskIdx == (0x0f070d050b030901ull | MaskUnd))
2088       return getInstr(Hexagon::S2_shuffob, dl, VecTy, {Op1, Op0}, DAG);
2089   }
2090 
2091   return SDValue();
2092 }
2093 
2094 // Create a Hexagon-specific node for shifting a vector by an integer.
2095 SDValue
2096 HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG)
2097       const {
2098   if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) {
2099     if (SDValue S = BVN->getSplatValue()) {
2100       unsigned NewOpc;
2101       switch (Op.getOpcode()) {
2102         case ISD::SHL:
2103           NewOpc = HexagonISD::VASL;
2104           break;
2105         case ISD::SRA:
2106           NewOpc = HexagonISD::VASR;
2107           break;
2108         case ISD::SRL:
2109           NewOpc = HexagonISD::VLSR;
2110           break;
2111         default:
2112           llvm_unreachable("Unexpected shift opcode");
2113       }
2114       return DAG.getNode(NewOpc, SDLoc(Op), ty(Op), Op.getOperand(0), S);
2115     }
2116   }
2117 
2118   return SDValue();
2119 }
2120 
2121 SDValue
2122 HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
2123   return getVectorShiftByInt(Op, DAG);
2124 }
2125 
2126 SDValue
2127 HexagonTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
2128   if (isa<ConstantSDNode>(Op.getOperand(1).getNode()))
2129     return Op;
2130   return SDValue();
2131 }
2132 
2133 SDValue
2134 HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
2135   MVT ResTy = ty(Op);
2136   SDValue InpV = Op.getOperand(0);
2137   MVT InpTy = ty(InpV);
2138   assert(ResTy.getSizeInBits() == InpTy.getSizeInBits());
2139   const SDLoc &dl(Op);
2140 
2141   // Handle conversion from i8 to v8i1.
2142   if (ResTy == MVT::v8i1) {
2143     SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV);
2144     SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32);
2145     return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG);
2146   }
2147 
2148   return SDValue();
2149 }
2150 
2151 bool
2152 HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values,
2153       MVT VecTy, SelectionDAG &DAG,
2154       MutableArrayRef<ConstantInt*> Consts) const {
2155   MVT ElemTy = VecTy.getVectorElementType();
2156   unsigned ElemWidth = ElemTy.getSizeInBits();
2157   IntegerType *IntTy = IntegerType::get(*DAG.getContext(), ElemWidth);
2158   bool AllConst = true;
2159 
2160   for (unsigned i = 0, e = Values.size(); i != e; ++i) {
2161     SDValue V = Values[i];
2162     if (V.isUndef()) {
2163       Consts[i] = ConstantInt::get(IntTy, 0);
2164       continue;
2165     }
2166     // Make sure to always cast to IntTy.
2167     if (auto *CN = dyn_cast<ConstantSDNode>(V.getNode())) {
2168       const ConstantInt *CI = CN->getConstantIntValue();
2169       Consts[i] = ConstantInt::get(IntTy, CI->getValue().getSExtValue());
2170     } else if (auto *CN = dyn_cast<ConstantFPSDNode>(V.getNode())) {
2171       const ConstantFP *CF = CN->getConstantFPValue();
2172       APInt A = CF->getValueAPF().bitcastToAPInt();
2173       Consts[i] = ConstantInt::get(IntTy, A.getZExtValue());
2174     } else {
2175       AllConst = false;
2176     }
2177   }
2178   return AllConst;
2179 }
2180 
2181 SDValue
2182 HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
2183                                      MVT VecTy, SelectionDAG &DAG) const {
2184   MVT ElemTy = VecTy.getVectorElementType();
2185   assert(VecTy.getVectorNumElements() == Elem.size());
2186 
2187   SmallVector<ConstantInt*,4> Consts(Elem.size());
2188   bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
2189 
2190   unsigned First, Num = Elem.size();
2191   for (First = 0; First != Num; ++First)
2192     if (!isUndef(Elem[First]))
2193       break;
2194   if (First == Num)
2195     return DAG.getUNDEF(VecTy);
2196 
2197   if (AllConst &&
2198       llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
2199     return getZero(dl, VecTy, DAG);
2200 
2201   if (ElemTy == MVT::i16) {
2202     assert(Elem.size() == 2);
2203     if (AllConst) {
2204       uint32_t V = (Consts[0]->getZExtValue() & 0xFFFF) |
2205                    Consts[1]->getZExtValue() << 16;
2206       return DAG.getBitcast(MVT::v2i16, DAG.getConstant(V, dl, MVT::i32));
2207     }
2208     SDValue N = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32,
2209                          {Elem[1], Elem[0]}, DAG);
2210     return DAG.getBitcast(MVT::v2i16, N);
2211   }
2212 
2213   if (ElemTy == MVT::i8) {
2214     // First try generating a constant.
2215     if (AllConst) {
2216       int32_t V = (Consts[0]->getZExtValue() & 0xFF) |
2217                   (Consts[1]->getZExtValue() & 0xFF) << 8 |
2218                   (Consts[1]->getZExtValue() & 0xFF) << 16 |
2219                   Consts[2]->getZExtValue() << 24;
2220       return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32));
2221     }
2222 
2223     // Then try splat.
2224     bool IsSplat = true;
2225     for (unsigned i = 0; i != Num; ++i) {
2226       if (i == First)
2227         continue;
2228       if (Elem[i] == Elem[First] || isUndef(Elem[i]))
2229         continue;
2230       IsSplat = false;
2231       break;
2232     }
2233     if (IsSplat) {
2234       // Legalize the operand to VSPLAT.
2235       SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
2236       return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Ext);
2237     }
2238 
2239     // Generate
2240     //   (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) |
2241     //   (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16
2242     assert(Elem.size() == 4);
2243     SDValue Vs[4];
2244     for (unsigned i = 0; i != 4; ++i) {
2245       Vs[i] = DAG.getZExtOrTrunc(Elem[i], dl, MVT::i32);
2246       Vs[i] = DAG.getZeroExtendInReg(Vs[i], dl, MVT::i8);
2247     }
2248     SDValue S8 = DAG.getConstant(8, dl, MVT::i32);
2249     SDValue T0 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[1], S8});
2250     SDValue T1 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[3], S8});
2251     SDValue B0 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[0], T0});
2252     SDValue B1 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[2], T1});
2253 
2254     SDValue R = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG);
2255     return DAG.getBitcast(MVT::v4i8, R);
2256   }
2257 
2258 #ifndef NDEBUG
2259   dbgs() << "VecTy: " << EVT(VecTy).getEVTString() << '\n';
2260 #endif
2261   llvm_unreachable("Unexpected vector element type");
2262 }
2263 
2264 SDValue
2265 HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
2266                                      MVT VecTy, SelectionDAG &DAG) const {
2267   MVT ElemTy = VecTy.getVectorElementType();
2268   assert(VecTy.getVectorNumElements() == Elem.size());
2269 
2270   SmallVector<ConstantInt*,8> Consts(Elem.size());
2271   bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
2272 
2273   unsigned First, Num = Elem.size();
2274   for (First = 0; First != Num; ++First)
2275     if (!isUndef(Elem[First]))
2276       break;
2277   if (First == Num)
2278     return DAG.getUNDEF(VecTy);
2279 
2280   if (AllConst &&
2281       llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
2282     return getZero(dl, VecTy, DAG);
2283 
2284   // First try splat if possible.
2285   if (ElemTy == MVT::i16) {
2286     bool IsSplat = true;
2287     for (unsigned i = 0; i != Num; ++i) {
2288       if (i == First)
2289         continue;
2290       if (Elem[i] == Elem[First] || isUndef(Elem[i]))
2291         continue;
2292       IsSplat = false;
2293       break;
2294     }
2295     if (IsSplat) {
2296       // Legalize the operand to VSPLAT.
2297       SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
2298       return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Ext);
2299     }
2300   }
2301 
2302   // Then try constant.
2303   if (AllConst) {
2304     uint64_t Val = 0;
2305     unsigned W = ElemTy.getSizeInBits();
2306     uint64_t Mask = (ElemTy == MVT::i8)  ? 0xFFull
2307                   : (ElemTy == MVT::i16) ? 0xFFFFull : 0xFFFFFFFFull;
2308     for (unsigned i = 0; i != Num; ++i)
2309       Val = (Val << W) | (Consts[Num-1-i]->getZExtValue() & Mask);
2310     SDValue V0 = DAG.getConstant(Val, dl, MVT::i64);
2311     return DAG.getBitcast(VecTy, V0);
2312   }
2313 
2314   // Build two 32-bit vectors and concatenate.
2315   MVT HalfTy = MVT::getVectorVT(ElemTy, Num/2);
2316   SDValue L = (ElemTy == MVT::i32)
2317                 ? Elem[0]
2318                 : buildVector32(Elem.take_front(Num/2), dl, HalfTy, DAG);
2319   SDValue H = (ElemTy == MVT::i32)
2320                 ? Elem[1]
2321                 : buildVector32(Elem.drop_front(Num/2), dl, HalfTy, DAG);
2322   return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, {H, L});
2323 }
2324 
2325 SDValue
2326 HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
2327                                      const SDLoc &dl, MVT ValTy, MVT ResTy,
2328                                      SelectionDAG &DAG) const {
2329   MVT VecTy = ty(VecV);
2330   assert(!ValTy.isVector() ||
2331          VecTy.getVectorElementType() == ValTy.getVectorElementType());
2332   unsigned VecWidth = VecTy.getSizeInBits();
2333   unsigned ValWidth = ValTy.getSizeInBits();
2334   unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits();
2335   assert((VecWidth % ElemWidth) == 0);
2336   auto *IdxN = dyn_cast<ConstantSDNode>(IdxV);
2337 
2338   // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
2339   // without any coprocessors).
2340   if (ElemWidth == 1) {
2341     assert(VecWidth == VecTy.getVectorNumElements() && "Sanity failure");
2342     assert(VecWidth == 8 || VecWidth == 4 || VecWidth == 2);
2343     // Check if this is an extract of the lowest bit.
2344     if (IdxN) {
2345       // Extracting the lowest bit is a no-op, but it changes the type,
2346       // so it must be kept as an operation to avoid errors related to
2347       // type mismatches.
2348       if (IdxN->isNullValue() && ValTy.getSizeInBits() == 1)
2349         return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV);
2350     }
2351 
2352     // If the value extracted is a single bit, use tstbit.
2353     if (ValWidth == 1) {
2354       SDValue A0 = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
2355       SDValue M0 = DAG.getConstant(8 / VecWidth, dl, MVT::i32);
2356       SDValue I0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, M0);
2357       return DAG.getNode(HexagonISD::TSTBIT, dl, MVT::i1, A0, I0);
2358     }
2359 
2360     // Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
2361     // a predicate register. The elements of the vector are repeated
2362     // in the register (if necessary) so that the total number is 8.
2363     // The extracted subvector will need to be expanded in such a way.
2364     unsigned Scale = VecWidth / ValWidth;
2365 
2366     // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
2367     // position 0.
2368     assert(ty(IdxV) == MVT::i32);
2369     unsigned VecRep = 8 / VecWidth;
2370     SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2371                              DAG.getConstant(8*VecRep, dl, MVT::i32));
2372     SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
2373     SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0);
2374     while (Scale > 1) {
2375       // The longest possible subvector is at most 32 bits, so it is always
2376       // contained in the low subregister.
2377       T1 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, T1);
2378       T1 = expandPredicate(T1, dl, DAG);
2379       Scale /= 2;
2380     }
2381 
2382     return DAG.getNode(HexagonISD::D2P, dl, ResTy, T1);
2383   }
2384 
2385   assert(VecWidth == 32 || VecWidth == 64);
2386 
2387   // Cast everything to scalar integer types.
2388   MVT ScalarTy = tyScalar(VecTy);
2389   VecV = DAG.getBitcast(ScalarTy, VecV);
2390 
2391   SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
2392   SDValue ExtV;
2393 
2394   if (IdxN) {
2395     unsigned Off = IdxN->getZExtValue() * ElemWidth;
2396     if (VecWidth == 64 && ValWidth == 32) {
2397       assert(Off == 0 || Off == 32);
2398       unsigned SubIdx = Off == 0 ? Hexagon::isub_lo : Hexagon::isub_hi;
2399       ExtV = DAG.getTargetExtractSubreg(SubIdx, dl, MVT::i32, VecV);
2400     } else if (Off == 0 && (ValWidth % 8) == 0) {
2401       ExtV = DAG.getZeroExtendInReg(VecV, dl, tyScalar(ValTy));
2402     } else {
2403       SDValue OffV = DAG.getConstant(Off, dl, MVT::i32);
2404       // The return type of EXTRACTU must be the same as the type of the
2405       // input vector.
2406       ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
2407                          {VecV, WidthV, OffV});
2408     }
2409   } else {
2410     if (ty(IdxV) != MVT::i32)
2411       IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
2412     SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2413                                DAG.getConstant(ElemWidth, dl, MVT::i32));
2414     ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
2415                        {VecV, WidthV, OffV});
2416   }
2417 
2418   // Cast ExtV to the requested result type.
2419   ExtV = DAG.getZExtOrTrunc(ExtV, dl, tyScalar(ResTy));
2420   ExtV = DAG.getBitcast(ResTy, ExtV);
2421   return ExtV;
2422 }
2423 
2424 SDValue
2425 HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
2426                                     const SDLoc &dl, MVT ValTy,
2427                                     SelectionDAG &DAG) const {
2428   MVT VecTy = ty(VecV);
2429   if (VecTy.getVectorElementType() == MVT::i1) {
2430     MVT ValTy = ty(ValV);
2431     assert(ValTy.getVectorElementType() == MVT::i1);
2432     SDValue ValR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, ValV);
2433     unsigned VecLen = VecTy.getVectorNumElements();
2434     unsigned Scale = VecLen / ValTy.getVectorNumElements();
2435     assert(Scale > 1);
2436 
2437     for (unsigned R = Scale; R > 1; R /= 2) {
2438       ValR = contractPredicate(ValR, dl, DAG);
2439       ValR = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
2440                          DAG.getUNDEF(MVT::i32), ValR);
2441     }
2442     // The longest possible subvector is at most 32 bits, so it is always
2443     // contained in the low subregister.
2444     ValR = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, ValR);
2445 
2446     unsigned ValBytes = 64 / Scale;
2447     SDValue Width = DAG.getConstant(ValBytes*8, dl, MVT::i32);
2448     SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2449                               DAG.getConstant(8, dl, MVT::i32));
2450     SDValue VecR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
2451     SDValue Ins = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
2452                               {VecR, ValR, Width, Idx});
2453     return DAG.getNode(HexagonISD::D2P, dl, VecTy, Ins);
2454   }
2455 
2456   unsigned VecWidth = VecTy.getSizeInBits();
2457   unsigned ValWidth = ValTy.getSizeInBits();
2458   assert(VecWidth == 32 || VecWidth == 64);
2459   assert((VecWidth % ValWidth) == 0);
2460 
2461   // Cast everything to scalar integer types.
2462   MVT ScalarTy = MVT::getIntegerVT(VecWidth);
2463   // The actual type of ValV may be different than ValTy (which is related
2464   // to the vector type).
2465   unsigned VW = ty(ValV).getSizeInBits();
2466   ValV = DAG.getBitcast(MVT::getIntegerVT(VW), ValV);
2467   VecV = DAG.getBitcast(ScalarTy, VecV);
2468   if (VW != VecWidth)
2469     ValV = DAG.getAnyExtOrTrunc(ValV, dl, ScalarTy);
2470 
2471   SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
2472   SDValue InsV;
2473 
2474   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(IdxV)) {
2475     unsigned W = C->getZExtValue() * ValWidth;
2476     SDValue OffV = DAG.getConstant(W, dl, MVT::i32);
2477     InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
2478                        {VecV, ValV, WidthV, OffV});
2479   } else {
2480     if (ty(IdxV) != MVT::i32)
2481       IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
2482     SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, WidthV);
2483     InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
2484                        {VecV, ValV, WidthV, OffV});
2485   }
2486 
2487   return DAG.getNode(ISD::BITCAST, dl, VecTy, InsV);
2488 }
2489 
2490 SDValue
2491 HexagonTargetLowering::expandPredicate(SDValue Vec32, const SDLoc &dl,
2492                                        SelectionDAG &DAG) const {
2493   assert(ty(Vec32).getSizeInBits() == 32);
2494   if (isUndef(Vec32))
2495     return DAG.getUNDEF(MVT::i64);
2496   return getInstr(Hexagon::S2_vsxtbh, dl, MVT::i64, {Vec32}, DAG);
2497 }
2498 
2499 SDValue
2500 HexagonTargetLowering::contractPredicate(SDValue Vec64, const SDLoc &dl,
2501                                          SelectionDAG &DAG) const {
2502   assert(ty(Vec64).getSizeInBits() == 64);
2503   if (isUndef(Vec64))
2504     return DAG.getUNDEF(MVT::i32);
2505   return getInstr(Hexagon::S2_vtrunehb, dl, MVT::i32, {Vec64}, DAG);
2506 }
2507 
2508 SDValue
2509 HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
2510       const {
2511   if (Ty.isVector()) {
2512     assert(Ty.isInteger() && "Only integer vectors are supported here");
2513     unsigned W = Ty.getSizeInBits();
2514     if (W <= 64)
2515       return DAG.getBitcast(Ty, DAG.getConstant(0, dl, MVT::getIntegerVT(W)));
2516     return DAG.getNode(HexagonISD::VZERO, dl, Ty);
2517   }
2518 
2519   if (Ty.isInteger())
2520     return DAG.getConstant(0, dl, Ty);
2521   if (Ty.isFloatingPoint())
2522     return DAG.getConstantFP(0.0, dl, Ty);
2523   llvm_unreachable("Invalid type for zero");
2524 }
2525 
2526 SDValue
2527 HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
2528   MVT VecTy = ty(Op);
2529   unsigned BW = VecTy.getSizeInBits();
2530   const SDLoc &dl(Op);
2531   SmallVector<SDValue,8> Ops;
2532   for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
2533     Ops.push_back(Op.getOperand(i));
2534 
2535   if (BW == 32)
2536     return buildVector32(Ops, dl, VecTy, DAG);
2537   if (BW == 64)
2538     return buildVector64(Ops, dl, VecTy, DAG);
2539 
2540   if (VecTy == MVT::v8i1 || VecTy == MVT::v4i1 || VecTy == MVT::v2i1) {
2541     // Check if this is a special case or all-0 or all-1.
2542     bool All0 = true, All1 = true;
2543     for (SDValue P : Ops) {
2544       auto *CN = dyn_cast<ConstantSDNode>(P.getNode());
2545       if (CN == nullptr) {
2546         All0 = All1 = false;
2547         break;
2548       }
2549       uint32_t C = CN->getZExtValue();
2550       All0 &= (C == 0);
2551       All1 &= (C == 1);
2552     }
2553     if (All0)
2554       return DAG.getNode(HexagonISD::PFALSE, dl, VecTy);
2555     if (All1)
2556       return DAG.getNode(HexagonISD::PTRUE, dl, VecTy);
2557 
2558     // For each i1 element in the resulting predicate register, put 1
2559     // shifted by the index of the element into a general-purpose register,
2560     // then or them together and transfer it back into a predicate register.
2561     SDValue Rs[8];
2562     SDValue Z = getZero(dl, MVT::i32, DAG);
2563     // Always produce 8 bits, repeat inputs if necessary.
2564     unsigned Rep = 8 / VecTy.getVectorNumElements();
2565     for (unsigned i = 0; i != 8; ++i) {
2566       SDValue S = DAG.getConstant(1ull << i, dl, MVT::i32);
2567       Rs[i] = DAG.getSelect(dl, MVT::i32, Ops[i/Rep], S, Z);
2568     }
2569     for (ArrayRef<SDValue> A(Rs); A.size() != 1; A = A.drop_back(A.size()/2)) {
2570       for (unsigned i = 0, e = A.size()/2; i != e; ++i)
2571         Rs[i] = DAG.getNode(ISD::OR, dl, MVT::i32, Rs[2*i], Rs[2*i+1]);
2572     }
2573     // Move the value directly to a predicate register.
2574     return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Rs[0]}, DAG);
2575   }
2576 
2577   return SDValue();
2578 }
2579 
2580 SDValue
2581 HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
2582                                            SelectionDAG &DAG) const {
2583   MVT VecTy = ty(Op);
2584   const SDLoc &dl(Op);
2585   if (VecTy.getSizeInBits() == 64) {
2586     assert(Op.getNumOperands() == 2);
2587     return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, Op.getOperand(1),
2588                        Op.getOperand(0));
2589   }
2590 
2591   MVT ElemTy = VecTy.getVectorElementType();
2592   if (ElemTy == MVT::i1) {
2593     assert(VecTy == MVT::v2i1 || VecTy == MVT::v4i1 || VecTy == MVT::v8i1);
2594     MVT OpTy = ty(Op.getOperand(0));
2595     // Scale is how many times the operands need to be contracted to match
2596     // the representation in the target register.
2597     unsigned Scale = VecTy.getVectorNumElements() / OpTy.getVectorNumElements();
2598     assert(Scale == Op.getNumOperands() && Scale > 1);
2599 
2600     // First, convert all bool vectors to integers, then generate pairwise
2601     // inserts to form values of doubled length. Up until there are only
2602     // two values left to concatenate, all of these values will fit in a
2603     // 32-bit integer, so keep them as i32 to use 32-bit inserts.
2604     SmallVector<SDValue,4> Words[2];
2605     unsigned IdxW = 0;
2606 
2607     for (SDValue P : Op.getNode()->op_values()) {
2608       SDValue W = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, P);
2609       for (unsigned R = Scale; R > 1; R /= 2) {
2610         W = contractPredicate(W, dl, DAG);
2611         W = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
2612                         DAG.getUNDEF(MVT::i32), W);
2613       }
2614       W = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, W);
2615       Words[IdxW].push_back(W);
2616     }
2617 
2618     while (Scale > 2) {
2619       SDValue WidthV = DAG.getConstant(64 / Scale, dl, MVT::i32);
2620       Words[IdxW ^ 1].clear();
2621 
2622       for (unsigned i = 0, e = Words[IdxW].size(); i != e; i += 2) {
2623         SDValue W0 = Words[IdxW][i], W1 = Words[IdxW][i+1];
2624         // Insert W1 into W0 right next to the significant bits of W0.
2625         SDValue T = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
2626                                 {W0, W1, WidthV, WidthV});
2627         Words[IdxW ^ 1].push_back(T);
2628       }
2629       IdxW ^= 1;
2630       Scale /= 2;
2631     }
2632 
2633     // Another sanity check. At this point there should only be two words
2634     // left, and Scale should be 2.
2635     assert(Scale == 2 && Words[IdxW].size() == 2);
2636 
2637     SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
2638                              Words[IdxW][1], Words[IdxW][0]);
2639     return DAG.getNode(HexagonISD::D2P, dl, VecTy, WW);
2640   }
2641 
2642   return SDValue();
2643 }
2644 
2645 SDValue
2646 HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
2647                                                SelectionDAG &DAG) const {
2648   SDValue Vec = Op.getOperand(0);
2649   MVT ElemTy = ty(Vec).getVectorElementType();
2650   return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ElemTy, ty(Op), DAG);
2651 }
2652 
2653 SDValue
2654 HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
2655                                               SelectionDAG &DAG) const {
2656   return extractVector(Op.getOperand(0), Op.getOperand(1), SDLoc(Op),
2657                        ty(Op), ty(Op), DAG);
2658 }
2659 
2660 SDValue
2661 HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
2662                                               SelectionDAG &DAG) const {
2663   return insertVector(Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
2664                       SDLoc(Op), ty(Op).getVectorElementType(), DAG);
2665 }
2666 
2667 SDValue
2668 HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
2669                                              SelectionDAG &DAG) const {
2670   SDValue ValV = Op.getOperand(1);
2671   return insertVector(Op.getOperand(0), ValV, Op.getOperand(2),
2672                       SDLoc(Op), ty(ValV), DAG);
2673 }
2674 
2675 bool
2676 HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
2677   // Assuming the caller does not have either a signext or zeroext modifier, and
2678   // only one value is accepted, any reasonable truncation is allowed.
2679   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
2680     return false;
2681 
2682   // FIXME: in principle up to 64-bit could be made safe, but it would be very
2683   // fragile at the moment: any support for multiple value returns would be
2684   // liable to disallow tail calls involving i64 -> iN truncation in many cases.
2685   return Ty1->getPrimitiveSizeInBits() <= 32;
2686 }
2687 
2688 SDValue
2689 HexagonTargetLowering::LowerLoad(SDValue Op, SelectionDAG &DAG) const {
2690   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
2691   unsigned ClaimAlign = LN->getAlignment();
2692   validateConstPtrAlignment(LN->getBasePtr(), SDLoc(Op), ClaimAlign);
2693   // Call LowerUnalignedLoad for all loads, it recognizes loads that
2694   // don't need extra aligning.
2695   return LowerUnalignedLoad(Op, DAG);
2696 }
2697 
2698 SDValue
2699 HexagonTargetLowering::LowerStore(SDValue Op, SelectionDAG &DAG) const {
2700   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
2701   unsigned ClaimAlign = SN->getAlignment();
2702   SDValue Ptr = SN->getBasePtr();
2703   const SDLoc &dl(Op);
2704   validateConstPtrAlignment(Ptr, dl, ClaimAlign);
2705 
2706   MVT StoreTy = SN->getMemoryVT().getSimpleVT();
2707   unsigned NeedAlign = Subtarget.getTypeAlignment(StoreTy);
2708   if (ClaimAlign < NeedAlign)
2709     return expandUnalignedStore(SN, DAG);
2710   return Op;
2711 }
2712 
2713 SDValue
2714 HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
2715       const {
2716   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
2717   MVT LoadTy = ty(Op);
2718   unsigned NeedAlign = Subtarget.getTypeAlignment(LoadTy);
2719   unsigned HaveAlign = LN->getAlignment();
2720   if (HaveAlign >= NeedAlign)
2721     return Op;
2722 
2723   const SDLoc &dl(Op);
2724   const DataLayout &DL = DAG.getDataLayout();
2725   LLVMContext &Ctx = *DAG.getContext();
2726 
2727   // If the load aligning is disabled or the load can be broken up into two
2728   // smaller legal loads, do the default (target-independent) expansion.
2729   bool DoDefault = false;
2730   // Handle it in the default way if this is an indexed load.
2731   if (!LN->isUnindexed())
2732     DoDefault = true;
2733 
2734   if (!AlignLoads) {
2735     if (allowsMemoryAccessForAlignment(Ctx, DL, LN->getMemoryVT(),
2736                                        *LN->getMemOperand()))
2737       return Op;
2738     DoDefault = true;
2739   }
2740   if (!DoDefault && (2 * HaveAlign) == NeedAlign) {
2741     // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
2742     MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8 * HaveAlign)
2743                                 : MVT::getVectorVT(MVT::i8, HaveAlign);
2744     DoDefault =
2745         allowsMemoryAccessForAlignment(Ctx, DL, PartTy, *LN->getMemOperand());
2746   }
2747   if (DoDefault) {
2748     std::pair<SDValue, SDValue> P = expandUnalignedLoad(LN, DAG);
2749     return DAG.getMergeValues({P.first, P.second}, dl);
2750   }
2751 
2752   // The code below generates two loads, both aligned as NeedAlign, and
2753   // with the distance of NeedAlign between them. For that to cover the
2754   // bits that need to be loaded (and without overlapping), the size of
2755   // the loads should be equal to NeedAlign. This is true for all loadable
2756   // types, but add an assertion in case something changes in the future.
2757   assert(LoadTy.getSizeInBits() == 8*NeedAlign);
2758 
2759   unsigned LoadLen = NeedAlign;
2760   SDValue Base = LN->getBasePtr();
2761   SDValue Chain = LN->getChain();
2762   auto BO = getBaseAndOffset(Base);
2763   unsigned BaseOpc = BO.first.getOpcode();
2764   if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % LoadLen == 0)
2765     return Op;
2766 
2767   if (BO.second % LoadLen != 0) {
2768     BO.first = DAG.getNode(ISD::ADD, dl, MVT::i32, BO.first,
2769                            DAG.getConstant(BO.second % LoadLen, dl, MVT::i32));
2770     BO.second -= BO.second % LoadLen;
2771   }
2772   SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR)
2773       ? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first,
2774                     DAG.getConstant(NeedAlign, dl, MVT::i32))
2775       : BO.first;
2776   SDValue Base0 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second, dl);
2777   SDValue Base1 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second+LoadLen, dl);
2778 
2779   MachineMemOperand *WideMMO = nullptr;
2780   if (MachineMemOperand *MMO = LN->getMemOperand()) {
2781     MachineFunction &MF = DAG.getMachineFunction();
2782     WideMMO = MF.getMachineMemOperand(MMO->getPointerInfo(), MMO->getFlags(),
2783                     2*LoadLen, LoadLen, MMO->getAAInfo(), MMO->getRanges(),
2784                     MMO->getSyncScopeID(), MMO->getOrdering(),
2785                     MMO->getFailureOrdering());
2786   }
2787 
2788   SDValue Load0 = DAG.getLoad(LoadTy, dl, Chain, Base0, WideMMO);
2789   SDValue Load1 = DAG.getLoad(LoadTy, dl, Chain, Base1, WideMMO);
2790 
2791   SDValue Aligned = DAG.getNode(HexagonISD::VALIGN, dl, LoadTy,
2792                                 {Load1, Load0, BaseNoOff.getOperand(0)});
2793   SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2794                                  Load0.getValue(1), Load1.getValue(1));
2795   SDValue M = DAG.getMergeValues({Aligned, NewChain}, dl);
2796   return M;
2797 }
2798 
2799 SDValue
2800 HexagonTargetLowering::LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const {
2801   SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
2802   auto *CY = dyn_cast<ConstantSDNode>(Y);
2803   if (!CY)
2804     return SDValue();
2805 
2806   const SDLoc &dl(Op);
2807   SDVTList VTs = Op.getNode()->getVTList();
2808   assert(VTs.NumVTs == 2);
2809   assert(VTs.VTs[1] == MVT::i1);
2810   unsigned Opc = Op.getOpcode();
2811 
2812   if (CY) {
2813     uint32_t VY = CY->getZExtValue();
2814     assert(VY != 0 && "This should have been folded");
2815     // X +/- 1
2816     if (VY != 1)
2817       return SDValue();
2818 
2819     if (Opc == ISD::UADDO) {
2820       SDValue Op = DAG.getNode(ISD::ADD, dl, VTs.VTs[0], {X, Y});
2821       SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op, getZero(dl, ty(Op), DAG),
2822                                 ISD::SETEQ);
2823       return DAG.getMergeValues({Op, Ov}, dl);
2824     }
2825     if (Opc == ISD::USUBO) {
2826       SDValue Op = DAG.getNode(ISD::SUB, dl, VTs.VTs[0], {X, Y});
2827       SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op,
2828                                 DAG.getConstant(-1, dl, ty(Op)), ISD::SETEQ);
2829       return DAG.getMergeValues({Op, Ov}, dl);
2830     }
2831   }
2832 
2833   return SDValue();
2834 }
2835 
2836 SDValue
2837 HexagonTargetLowering::LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const {
2838   const SDLoc &dl(Op);
2839   unsigned Opc = Op.getOpcode();
2840   SDValue X = Op.getOperand(0), Y = Op.getOperand(1), C = Op.getOperand(2);
2841 
2842   if (Opc == ISD::ADDCARRY)
2843     return DAG.getNode(HexagonISD::ADDC, dl, Op.getNode()->getVTList(),
2844                        { X, Y, C });
2845 
2846   EVT CarryTy = C.getValueType();
2847   SDValue SubC = DAG.getNode(HexagonISD::SUBC, dl, Op.getNode()->getVTList(),
2848                              { X, Y, DAG.getLogicalNOT(dl, C, CarryTy) });
2849   SDValue Out[] = { SubC.getValue(0),
2850                     DAG.getLogicalNOT(dl, SubC.getValue(1), CarryTy) };
2851   return DAG.getMergeValues(Out, dl);
2852 }
2853 
2854 SDValue
2855 HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
2856   SDValue Chain     = Op.getOperand(0);
2857   SDValue Offset    = Op.getOperand(1);
2858   SDValue Handler   = Op.getOperand(2);
2859   SDLoc dl(Op);
2860   auto PtrVT = getPointerTy(DAG.getDataLayout());
2861 
2862   // Mark function as containing a call to EH_RETURN.
2863   HexagonMachineFunctionInfo *FuncInfo =
2864     DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>();
2865   FuncInfo->setHasEHReturn();
2866 
2867   unsigned OffsetReg = Hexagon::R28;
2868 
2869   SDValue StoreAddr =
2870       DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT),
2871                   DAG.getIntPtrConstant(4, dl));
2872   Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo());
2873   Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
2874 
2875   // Not needed we already use it as explict input to EH_RETURN.
2876   // MF.getRegInfo().addLiveOut(OffsetReg);
2877 
2878   return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain);
2879 }
2880 
2881 SDValue
2882 HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
2883   unsigned Opc = Op.getOpcode();
2884 
2885   // Handle INLINEASM first.
2886   if (Opc == ISD::INLINEASM || Opc == ISD::INLINEASM_BR)
2887     return LowerINLINEASM(Op, DAG);
2888 
2889   if (isHvxOperation(Op)) {
2890     // If HVX lowering returns nothing, try the default lowering.
2891     if (SDValue V = LowerHvxOperation(Op, DAG))
2892       return V;
2893   }
2894 
2895   switch (Opc) {
2896     default:
2897 #ifndef NDEBUG
2898       Op.getNode()->dumpr(&DAG);
2899       if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END)
2900         errs() << "Error: check for a non-legal type in this operation\n";
2901 #endif
2902       llvm_unreachable("Should not custom lower this!");
2903     case ISD::CONCAT_VECTORS:       return LowerCONCAT_VECTORS(Op, DAG);
2904     case ISD::INSERT_SUBVECTOR:     return LowerINSERT_SUBVECTOR(Op, DAG);
2905     case ISD::INSERT_VECTOR_ELT:    return LowerINSERT_VECTOR_ELT(Op, DAG);
2906     case ISD::EXTRACT_SUBVECTOR:    return LowerEXTRACT_SUBVECTOR(Op, DAG);
2907     case ISD::EXTRACT_VECTOR_ELT:   return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2908     case ISD::BUILD_VECTOR:         return LowerBUILD_VECTOR(Op, DAG);
2909     case ISD::VECTOR_SHUFFLE:       return LowerVECTOR_SHUFFLE(Op, DAG);
2910     case ISD::BITCAST:              return LowerBITCAST(Op, DAG);
2911     case ISD::LOAD:                 return LowerLoad(Op, DAG);
2912     case ISD::STORE:                return LowerStore(Op, DAG);
2913     case ISD::UADDO:
2914     case ISD::USUBO:                return LowerUAddSubO(Op, DAG);
2915     case ISD::ADDCARRY:
2916     case ISD::SUBCARRY:             return LowerAddSubCarry(Op, DAG);
2917     case ISD::SRA:
2918     case ISD::SHL:
2919     case ISD::SRL:                  return LowerVECTOR_SHIFT(Op, DAG);
2920     case ISD::ROTL:                 return LowerROTL(Op, DAG);
2921     case ISD::ConstantPool:         return LowerConstantPool(Op, DAG);
2922     case ISD::JumpTable:            return LowerJumpTable(Op, DAG);
2923     case ISD::EH_RETURN:            return LowerEH_RETURN(Op, DAG);
2924     case ISD::RETURNADDR:           return LowerRETURNADDR(Op, DAG);
2925     case ISD::FRAMEADDR:            return LowerFRAMEADDR(Op, DAG);
2926     case ISD::GlobalTLSAddress:     return LowerGlobalTLSAddress(Op, DAG);
2927     case ISD::ATOMIC_FENCE:         return LowerATOMIC_FENCE(Op, DAG);
2928     case ISD::GlobalAddress:        return LowerGLOBALADDRESS(Op, DAG);
2929     case ISD::BlockAddress:         return LowerBlockAddress(Op, DAG);
2930     case ISD::GLOBAL_OFFSET_TABLE:  return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
2931     case ISD::VASTART:              return LowerVASTART(Op, DAG);
2932     case ISD::DYNAMIC_STACKALLOC:   return LowerDYNAMIC_STACKALLOC(Op, DAG);
2933     case ISD::SETCC:                return LowerSETCC(Op, DAG);
2934     case ISD::VSELECT:              return LowerVSELECT(Op, DAG);
2935     case ISD::INTRINSIC_WO_CHAIN:   return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2936     case ISD::INTRINSIC_VOID:       return LowerINTRINSIC_VOID(Op, DAG);
2937     case ISD::PREFETCH:             return LowerPREFETCH(Op, DAG);
2938     case ISD::READCYCLECOUNTER:     return LowerREADCYCLECOUNTER(Op, DAG);
2939       break;
2940   }
2941 
2942   return SDValue();
2943 }
2944 
2945 void
2946 HexagonTargetLowering::LowerOperationWrapper(SDNode *N,
2947                                              SmallVectorImpl<SDValue> &Results,
2948                                              SelectionDAG &DAG) const {
2949   // We are only custom-lowering stores to verify the alignment of the
2950   // address if it is a compile-time constant. Since a store can be modified
2951   // during type-legalization (the value being stored may need legalization),
2952   // return empty Results here to indicate that we don't really make any
2953   // changes in the custom lowering.
2954   if (N->getOpcode() != ISD::STORE)
2955     return TargetLowering::LowerOperationWrapper(N, Results, DAG);
2956 }
2957 
2958 void
2959 HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
2960                                           SmallVectorImpl<SDValue> &Results,
2961                                           SelectionDAG &DAG) const {
2962   const SDLoc &dl(N);
2963   switch (N->getOpcode()) {
2964     case ISD::SRL:
2965     case ISD::SRA:
2966     case ISD::SHL:
2967       return;
2968     case ISD::BITCAST:
2969       // Handle a bitcast from v8i1 to i8.
2970       if (N->getValueType(0) == MVT::i8) {
2971         SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32,
2972                              N->getOperand(0), DAG);
2973         SDValue T = DAG.getAnyExtOrTrunc(P, dl, MVT::i8);
2974         Results.push_back(T);
2975       }
2976       break;
2977   }
2978 }
2979 
2980 SDValue
2981 HexagonTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
2982       const {
2983   SDValue Op(N, 0);
2984   if (isHvxOperation(Op)) {
2985     if (SDValue V = PerformHvxDAGCombine(N, DCI))
2986       return V;
2987     return SDValue();
2988   }
2989 
2990   const SDLoc &dl(Op);
2991   unsigned Opc = Op.getOpcode();
2992 
2993   if (Opc == HexagonISD::P2D) {
2994     SDValue P = Op.getOperand(0);
2995     switch (P.getOpcode()) {
2996       case HexagonISD::PTRUE:
2997         return DCI.DAG.getConstant(-1, dl, ty(Op));
2998       case HexagonISD::PFALSE:
2999         return getZero(dl, ty(Op), DCI.DAG);
3000       default:
3001         break;
3002     }
3003   } else if (Opc == ISD::VSELECT) {
3004     // This is pretty much duplicated in HexagonISelLoweringHVX...
3005     //
3006     // (vselect (xor x, ptrue), v0, v1) -> (vselect x, v1, v0)
3007     SDValue Cond = Op.getOperand(0);
3008     if (Cond->getOpcode() == ISD::XOR) {
3009       SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
3010       if (C1->getOpcode() == HexagonISD::PTRUE) {
3011         SDValue VSel = DCI.DAG.getNode(ISD::VSELECT, dl, ty(Op), C0,
3012                                        Op.getOperand(2), Op.getOperand(1));
3013         return VSel;
3014       }
3015     }
3016   }
3017 
3018   return SDValue();
3019 }
3020 
3021 /// Returns relocation base for the given PIC jumptable.
3022 SDValue
3023 HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3024                                                 SelectionDAG &DAG) const {
3025   int Idx = cast<JumpTableSDNode>(Table)->getIndex();
3026   EVT VT = Table.getValueType();
3027   SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
3028   return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T);
3029 }
3030 
3031 //===----------------------------------------------------------------------===//
3032 // Inline Assembly Support
3033 //===----------------------------------------------------------------------===//
3034 
3035 TargetLowering::ConstraintType
3036 HexagonTargetLowering::getConstraintType(StringRef Constraint) const {
3037   if (Constraint.size() == 1) {
3038     switch (Constraint[0]) {
3039       case 'q':
3040       case 'v':
3041         if (Subtarget.useHVXOps())
3042           return C_RegisterClass;
3043         break;
3044       case 'a':
3045         return C_RegisterClass;
3046       default:
3047         break;
3048     }
3049   }
3050   return TargetLowering::getConstraintType(Constraint);
3051 }
3052 
3053 std::pair<unsigned, const TargetRegisterClass*>
3054 HexagonTargetLowering::getRegForInlineAsmConstraint(
3055     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
3056 
3057   if (Constraint.size() == 1) {
3058     switch (Constraint[0]) {
3059     case 'r':   // R0-R31
3060       switch (VT.SimpleTy) {
3061       default:
3062         return {0u, nullptr};
3063       case MVT::i1:
3064       case MVT::i8:
3065       case MVT::i16:
3066       case MVT::i32:
3067       case MVT::f32:
3068         return {0u, &Hexagon::IntRegsRegClass};
3069       case MVT::i64:
3070       case MVT::f64:
3071         return {0u, &Hexagon::DoubleRegsRegClass};
3072       }
3073       break;
3074     case 'a': // M0-M1
3075       if (VT != MVT::i32)
3076         return {0u, nullptr};
3077       return {0u, &Hexagon::ModRegsRegClass};
3078     case 'q': // q0-q3
3079       switch (VT.getSizeInBits()) {
3080       default:
3081         return {0u, nullptr};
3082       case 512:
3083       case 1024:
3084         return {0u, &Hexagon::HvxQRRegClass};
3085       }
3086       break;
3087     case 'v': // V0-V31
3088       switch (VT.getSizeInBits()) {
3089       default:
3090         return {0u, nullptr};
3091       case 512:
3092         return {0u, &Hexagon::HvxVRRegClass};
3093       case 1024:
3094         if (Subtarget.hasV60Ops() && Subtarget.useHVX128BOps())
3095           return {0u, &Hexagon::HvxVRRegClass};
3096         return {0u, &Hexagon::HvxWRRegClass};
3097       case 2048:
3098         return {0u, &Hexagon::HvxWRRegClass};
3099       }
3100       break;
3101     default:
3102       return {0u, nullptr};
3103     }
3104   }
3105 
3106   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3107 }
3108 
3109 /// isFPImmLegal - Returns true if the target can instruction select the
3110 /// specified FP immediate natively. If false, the legalizer will
3111 /// materialize the FP immediate as a load from a constant pool.
3112 bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
3113                                          bool ForCodeSize) const {
3114   return true;
3115 }
3116 
3117 /// isLegalAddressingMode - Return true if the addressing mode represented by
3118 /// AM is legal for this target, for a load/store of the specified type.
3119 bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
3120                                                   const AddrMode &AM, Type *Ty,
3121                                                   unsigned AS, Instruction *I) const {
3122   if (Ty->isSized()) {
3123     // When LSR detects uses of the same base address to access different
3124     // types (e.g. unions), it will assume a conservative type for these
3125     // uses:
3126     //   LSR Use: Kind=Address of void in addrspace(4294967295), ...
3127     // The type Ty passed here would then be "void". Skip the alignment
3128     // checks, but do not return false right away, since that confuses
3129     // LSR into crashing.
3130     unsigned A = DL.getABITypeAlignment(Ty);
3131     // The base offset must be a multiple of the alignment.
3132     if ((AM.BaseOffs % A) != 0)
3133       return false;
3134     // The shifted offset must fit in 11 bits.
3135     if (!isInt<11>(AM.BaseOffs >> Log2_32(A)))
3136       return false;
3137   }
3138 
3139   // No global is ever allowed as a base.
3140   if (AM.BaseGV)
3141     return false;
3142 
3143   int Scale = AM.Scale;
3144   if (Scale < 0)
3145     Scale = -Scale;
3146   switch (Scale) {
3147   case 0:  // No scale reg, "r+i", "r", or just "i".
3148     break;
3149   default: // No scaled addressing mode.
3150     return false;
3151   }
3152   return true;
3153 }
3154 
3155 /// Return true if folding a constant offset with the given GlobalAddress is
3156 /// legal.  It is frequently not legal in PIC relocation models.
3157 bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA)
3158       const {
3159   return HTM.getRelocationModel() == Reloc::Static;
3160 }
3161 
3162 /// isLegalICmpImmediate - Return true if the specified immediate is legal
3163 /// icmp immediate, that is the target has icmp instructions which can compare
3164 /// a register against the immediate without having to materialize the
3165 /// immediate into a register.
3166 bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
3167   return Imm >= -512 && Imm <= 511;
3168 }
3169 
3170 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
3171 /// for tail call optimization. Targets which want to do tail call
3172 /// optimization should implement this function.
3173 bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
3174                                  SDValue Callee,
3175                                  CallingConv::ID CalleeCC,
3176                                  bool IsVarArg,
3177                                  bool IsCalleeStructRet,
3178                                  bool IsCallerStructRet,
3179                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
3180                                  const SmallVectorImpl<SDValue> &OutVals,
3181                                  const SmallVectorImpl<ISD::InputArg> &Ins,
3182                                  SelectionDAG& DAG) const {
3183   const Function &CallerF = DAG.getMachineFunction().getFunction();
3184   CallingConv::ID CallerCC = CallerF.getCallingConv();
3185   bool CCMatch = CallerCC == CalleeCC;
3186 
3187   // ***************************************************************************
3188   //  Look for obvious safe cases to perform tail call optimization that do not
3189   //  require ABI changes.
3190   // ***************************************************************************
3191 
3192   // If this is a tail call via a function pointer, then don't do it!
3193   if (!isa<GlobalAddressSDNode>(Callee) &&
3194       !isa<ExternalSymbolSDNode>(Callee)) {
3195     return false;
3196   }
3197 
3198   // Do not optimize if the calling conventions do not match and the conventions
3199   // used are not C or Fast.
3200   if (!CCMatch) {
3201     bool R = (CallerCC == CallingConv::C || CallerCC == CallingConv::Fast);
3202     bool E = (CalleeCC == CallingConv::C || CalleeCC == CallingConv::Fast);
3203     // If R & E, then ok.
3204     if (!R || !E)
3205       return false;
3206   }
3207 
3208   // Do not tail call optimize vararg calls.
3209   if (IsVarArg)
3210     return false;
3211 
3212   // Also avoid tail call optimization if either caller or callee uses struct
3213   // return semantics.
3214   if (IsCalleeStructRet || IsCallerStructRet)
3215     return false;
3216 
3217   // In addition to the cases above, we also disable Tail Call Optimization if
3218   // the calling convention code that at least one outgoing argument needs to
3219   // go on the stack. We cannot check that here because at this point that
3220   // information is not available.
3221   return true;
3222 }
3223 
3224 /// Returns the target specific optimal type for load and store operations as
3225 /// a result of memset, memcpy, and memmove lowering.
3226 ///
3227 /// If DstAlign is zero that means it's safe to destination alignment can
3228 /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
3229 /// a need to check it against alignment requirement, probably because the
3230 /// source does not need to be loaded. If 'IsMemset' is true, that means it's
3231 /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
3232 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
3233 /// does not need to be loaded.  It returns EVT::Other if the type should be
3234 /// determined using generic target-independent logic.
3235 EVT HexagonTargetLowering::getOptimalMemOpType(uint64_t Size,
3236       unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset,
3237       bool MemcpyStrSrc, const AttributeList &FuncAttributes) const {
3238 
3239   auto Aligned = [](unsigned GivenA, unsigned MinA) -> bool {
3240     return (GivenA % MinA) == 0;
3241   };
3242 
3243   if (Size >= 8 && Aligned(DstAlign, 8) && (IsMemset || Aligned(SrcAlign, 8)))
3244     return MVT::i64;
3245   if (Size >= 4 && Aligned(DstAlign, 4) && (IsMemset || Aligned(SrcAlign, 4)))
3246     return MVT::i32;
3247   if (Size >= 2 && Aligned(DstAlign, 2) && (IsMemset || Aligned(SrcAlign, 2)))
3248     return MVT::i16;
3249 
3250   return MVT::Other;
3251 }
3252 
3253 bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
3254     EVT VT, unsigned AS, unsigned Align, MachineMemOperand::Flags Flags,
3255     bool *Fast) const {
3256   if (Fast)
3257     *Fast = false;
3258   return Subtarget.isHVXVectorType(VT.getSimpleVT());
3259 }
3260 
3261 std::pair<const TargetRegisterClass*, uint8_t>
3262 HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
3263       MVT VT) const {
3264   if (Subtarget.isHVXVectorType(VT, true)) {
3265     unsigned BitWidth = VT.getSizeInBits();
3266     unsigned VecWidth = Subtarget.getVectorLength() * 8;
3267 
3268     if (VT.getVectorElementType() == MVT::i1)
3269       return std::make_pair(&Hexagon::HvxQRRegClass, 1);
3270     if (BitWidth == VecWidth)
3271       return std::make_pair(&Hexagon::HvxVRRegClass, 1);
3272     assert(BitWidth == 2 * VecWidth);
3273     return std::make_pair(&Hexagon::HvxWRRegClass, 1);
3274   }
3275 
3276   return TargetLowering::findRepresentativeClass(TRI, VT);
3277 }
3278 
3279 bool HexagonTargetLowering::shouldReduceLoadWidth(SDNode *Load,
3280       ISD::LoadExtType ExtTy, EVT NewVT) const {
3281   // TODO: This may be worth removing. Check regression tests for diffs.
3282   if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
3283     return false;
3284 
3285   auto *L = cast<LoadSDNode>(Load);
3286   std::pair<SDValue,int> BO = getBaseAndOffset(L->getBasePtr());
3287   // Small-data object, do not shrink.
3288   if (BO.first.getOpcode() == HexagonISD::CONST32_GP)
3289     return false;
3290   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(BO.first)) {
3291     auto &HTM = static_cast<const HexagonTargetMachine&>(getTargetMachine());
3292     const auto *GO = dyn_cast_or_null<const GlobalObject>(GA->getGlobal());
3293     return !GO || !HTM.getObjFileLowering()->isGlobalInSmallSection(GO, HTM);
3294   }
3295   return true;
3296 }
3297 
3298 Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
3299       AtomicOrdering Ord) const {
3300   BasicBlock *BB = Builder.GetInsertBlock();
3301   Module *M = BB->getParent()->getParent();
3302   auto PT = cast<PointerType>(Addr->getType());
3303   Type *Ty = PT->getElementType();
3304   unsigned SZ = Ty->getPrimitiveSizeInBits();
3305   assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported");
3306   Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
3307                                    : Intrinsic::hexagon_L4_loadd_locked;
3308   Function *Fn = Intrinsic::getDeclaration(M, IntID);
3309 
3310   PointerType *NewPtrTy
3311     = Builder.getIntNTy(SZ)->getPointerTo(PT->getAddressSpace());
3312   Addr = Builder.CreateBitCast(Addr, NewPtrTy);
3313 
3314   Value *Call = Builder.CreateCall(Fn, Addr, "larx");
3315 
3316   return Builder.CreateBitCast(Call, Ty);
3317 }
3318 
3319 /// Perform a store-conditional operation to Addr. Return the status of the
3320 /// store. This should be 0 if the store succeeded, non-zero otherwise.
3321 Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder,
3322       Value *Val, Value *Addr, AtomicOrdering Ord) const {
3323   BasicBlock *BB = Builder.GetInsertBlock();
3324   Module *M = BB->getParent()->getParent();
3325   Type *Ty = Val->getType();
3326   unsigned SZ = Ty->getPrimitiveSizeInBits();
3327 
3328   Type *CastTy = Builder.getIntNTy(SZ);
3329   assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported");
3330   Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
3331                                    : Intrinsic::hexagon_S4_stored_locked;
3332   Function *Fn = Intrinsic::getDeclaration(M, IntID);
3333 
3334   unsigned AS = Addr->getType()->getPointerAddressSpace();
3335   Addr = Builder.CreateBitCast(Addr, CastTy->getPointerTo(AS));
3336   Val = Builder.CreateBitCast(Val, CastTy);
3337 
3338   Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx");
3339   Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), "");
3340   Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext()));
3341   return Ext;
3342 }
3343 
3344 TargetLowering::AtomicExpansionKind
3345 HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
3346   // Do not expand loads and stores that don't exceed 64 bits.
3347   return LI->getType()->getPrimitiveSizeInBits() > 64
3348              ? AtomicExpansionKind::LLOnly
3349              : AtomicExpansionKind::None;
3350 }
3351 
3352 bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
3353   // Do not expand loads and stores that don't exceed 64 bits.
3354   return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
3355 }
3356 
3357 TargetLowering::AtomicExpansionKind
3358 HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
3359     AtomicCmpXchgInst *AI) const {
3360   const DataLayout &DL = AI->getModule()->getDataLayout();
3361   unsigned Size = DL.getTypeStoreSize(AI->getCompareOperand()->getType());
3362   if (Size >= 4 && Size <= 8)
3363     return AtomicExpansionKind::LLSC;
3364   return AtomicExpansionKind::None;
3365 }
3366