xref: /freebsd/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interfaces that Hexagon uses to lower LLVM code
10 // into a selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "HexagonISelLowering.h"
15 #include "Hexagon.h"
16 #include "HexagonMachineFunctionInfo.h"
17 #include "HexagonRegisterInfo.h"
18 #include "HexagonSubtarget.h"
19 #include "HexagonTargetMachine.h"
20 #include "HexagonTargetObjectFile.h"
21 #include "llvm/ADT/APInt.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/CodeGen/CallingConvLower.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineMemOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/SelectionDAG.h"
31 #include "llvm/CodeGen/TargetCallingConv.h"
32 #include "llvm/CodeGen/ValueTypes.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/CallingConv.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/DiagnosticInfo.h"
38 #include "llvm/IR/DiagnosticPrinter.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/GlobalValue.h"
41 #include "llvm/IR/IRBuilder.h"
42 #include "llvm/IR/InlineAsm.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/Intrinsics.h"
46 #include "llvm/IR/IntrinsicsHexagon.h"
47 #include "llvm/IR/Module.h"
48 #include "llvm/IR/Type.h"
49 #include "llvm/IR/Value.h"
50 #include "llvm/Support/Casting.h"
51 #include "llvm/Support/CodeGen.h"
52 #include "llvm/Support/CommandLine.h"
53 #include "llvm/Support/Debug.h"
54 #include "llvm/Support/ErrorHandling.h"
55 #include "llvm/Support/MathExtras.h"
56 #include "llvm/Support/raw_ostream.h"
57 #include "llvm/Target/TargetMachine.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstdint>
61 #include <limits>
62 #include <utility>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "hexagon-lowering"
67 
68 static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
69   cl::init(true), cl::Hidden,
70   cl::desc("Control jump table emission on Hexagon target"));
71 
72 static cl::opt<bool>
73     EnableHexSDNodeSched("enable-hexagon-sdnode-sched", cl::Hidden,
74                          cl::desc("Enable Hexagon SDNode scheduling"));
75 
76 static cl::opt<int> MinimumJumpTables("minimum-jump-tables", cl::Hidden,
77                                       cl::init(5),
78                                       cl::desc("Set minimum jump tables"));
79 
80 static cl::opt<int>
81     MaxStoresPerMemcpyCL("max-store-memcpy", cl::Hidden, cl::init(6),
82                          cl::desc("Max #stores to inline memcpy"));
83 
84 static cl::opt<int>
85     MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os", cl::Hidden, cl::init(4),
86                                 cl::desc("Max #stores to inline memcpy"));
87 
88 static cl::opt<int>
89     MaxStoresPerMemmoveCL("max-store-memmove", cl::Hidden, cl::init(6),
90                           cl::desc("Max #stores to inline memmove"));
91 
92 static cl::opt<int>
93     MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os", cl::Hidden,
94                                  cl::init(4),
95                                  cl::desc("Max #stores to inline memmove"));
96 
97 static cl::opt<int>
98     MaxStoresPerMemsetCL("max-store-memset", cl::Hidden, cl::init(8),
99                          cl::desc("Max #stores to inline memset"));
100 
101 static cl::opt<int>
102     MaxStoresPerMemsetOptSizeCL("max-store-memset-Os", cl::Hidden, cl::init(4),
103                                 cl::desc("Max #stores to inline memset"));
104 
105 static cl::opt<bool>
106     ConstantLoadsToImm("constant-loads-to-imm", cl::Hidden, cl::init(true),
107                        cl::desc("Convert constant loads to immediate values."));
108 
109 static cl::opt<bool> AlignLoads("hexagon-align-loads",
110   cl::Hidden, cl::init(false),
111   cl::desc("Rewrite unaligned loads as a pair of aligned loads"));
112 
113 static cl::opt<bool>
114     DisableArgsMinAlignment("hexagon-disable-args-min-alignment", cl::Hidden,
115                             cl::init(false),
116                             cl::desc("Disable minimum alignment of 1 for "
117                                      "arguments passed by value on stack"));
118 
119 namespace {
120 
121   class HexagonCCState : public CCState {
122     unsigned NumNamedVarArgParams = 0;
123 
124   public:
HexagonCCState(CallingConv::ID CC,bool IsVarArg,MachineFunction & MF,SmallVectorImpl<CCValAssign> & locs,LLVMContext & C,unsigned NumNamedArgs)125     HexagonCCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
126                    SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
127                    unsigned NumNamedArgs)
128         : CCState(CC, IsVarArg, MF, locs, C),
129           NumNamedVarArgParams(NumNamedArgs) {}
getNumNamedVarArgParams() const130     unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
131   };
132 
133 } // end anonymous namespace
134 
135 
136 // Implement calling convention for Hexagon.
137 
CC_SkipOdd(unsigned & ValNo,MVT & ValVT,MVT & LocVT,CCValAssign::LocInfo & LocInfo,ISD::ArgFlagsTy & ArgFlags,CCState & State)138 static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
139                        CCValAssign::LocInfo &LocInfo,
140                        ISD::ArgFlagsTy &ArgFlags, CCState &State) {
141   static const MCPhysReg ArgRegs[] = {
142     Hexagon::R0, Hexagon::R1, Hexagon::R2,
143     Hexagon::R3, Hexagon::R4, Hexagon::R5
144   };
145   const unsigned NumArgRegs = std::size(ArgRegs);
146   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
147 
148   // RegNum is an index into ArgRegs: skip a register if RegNum is odd.
149   if (RegNum != NumArgRegs && RegNum % 2 == 1)
150     State.AllocateReg(ArgRegs[RegNum]);
151 
152   // Always return false here, as this function only makes sure that the first
153   // unallocated register has an even register number and does not actually
154   // allocate a register for the current argument.
155   return false;
156 }
157 
158 #include "HexagonGenCallingConv.inc"
159 
getVectorTypeBreakdownForCallingConv(LLVMContext & Context,CallingConv::ID CC,EVT VT,EVT & IntermediateVT,unsigned & NumIntermediates,MVT & RegisterVT) const160 unsigned HexagonTargetLowering::getVectorTypeBreakdownForCallingConv(
161     LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
162     unsigned &NumIntermediates, MVT &RegisterVT) const {
163 
164   bool isBoolVector = VT.getVectorElementType() == MVT::i1;
165   bool isPowerOf2 = VT.isPow2VectorType();
166   unsigned NumElts = VT.getVectorNumElements();
167 
168   // Split vectors of type vXi1 into (X/8) vectors of type v8i1,
169   // where X is divisible by 8.
170   if (isBoolVector && !Subtarget.useHVXOps() && isPowerOf2 && NumElts >= 8) {
171     RegisterVT = MVT::v8i8;
172     IntermediateVT = MVT::v8i1;
173     NumIntermediates = NumElts / 8;
174     return NumIntermediates;
175   }
176 
177   // In HVX 64-byte mode, vectors of type vXi1 are split into (X / 64) vectors
178   // of type v64i1, provided that X is divisible by 64.
179   if (isBoolVector && Subtarget.useHVX64BOps() && isPowerOf2 && NumElts >= 64) {
180     RegisterVT = MVT::v64i8;
181     IntermediateVT = MVT::v64i1;
182     NumIntermediates = NumElts / 64;
183     return NumIntermediates;
184   }
185 
186   // In HVX 128-byte mode, vectors of type vXi1 are split into (X / 128) vectors
187   // of type v128i1, provided that X is divisible by 128.
188   if (isBoolVector && Subtarget.useHVX128BOps() && isPowerOf2 &&
189       NumElts >= 128) {
190     RegisterVT = MVT::v128i8;
191     IntermediateVT = MVT::v128i1;
192     NumIntermediates = NumElts / 128;
193     return NumIntermediates;
194   }
195 
196   return TargetLowering::getVectorTypeBreakdownForCallingConv(
197       Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
198 }
199 
200 std::pair<MVT, unsigned>
handleMaskRegisterForCallingConv(const HexagonSubtarget & Subtarget,EVT VT) const201 HexagonTargetLowering::handleMaskRegisterForCallingConv(
202     const HexagonSubtarget &Subtarget, EVT VT) const {
203   assert(VT.getVectorElementType() == MVT::i1);
204 
205   const unsigned NumElems = VT.getVectorNumElements();
206 
207   if (!VT.isPow2VectorType())
208     return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
209 
210   if (!Subtarget.useHVXOps() && NumElems >= 8)
211     return {MVT::v8i8, NumElems / 8};
212 
213   if (Subtarget.useHVX64BOps() && NumElems >= 64)
214     return {MVT::v64i8, NumElems / 64};
215 
216   if (Subtarget.useHVX128BOps() && NumElems >= 128)
217     return {MVT::v128i8, NumElems / 128};
218 
219   return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
220 }
221 
getRegisterTypeForCallingConv(LLVMContext & Context,CallingConv::ID CC,EVT VT) const222 MVT HexagonTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
223                                                          CallingConv::ID CC,
224                                                          EVT VT) const {
225 
226   if (VT.isVector() && VT.getVectorElementType() == MVT::i1) {
227     auto [RegisterVT, NumRegisters] =
228         handleMaskRegisterForCallingConv(Subtarget, VT);
229     if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
230       return RegisterVT;
231   }
232 
233   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
234 }
235 
236 SDValue
LowerINTRINSIC_WO_CHAIN(SDValue Op,SelectionDAG & DAG) const237 HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
238       const {
239   return SDValue();
240 }
241 
242 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
243 /// by "Src" to address "Dst" of size "Size".  Alignment information is
244 /// specified by the specific parameter attribute. The copy will be passed as
245 /// a byval function parameter.  Sometimes what we are copying is the end of a
246 /// larger object, the part that does not fit in registers.
CreateCopyOfByValArgument(SDValue Src,SDValue Dst,SDValue Chain,ISD::ArgFlagsTy Flags,SelectionDAG & DAG,const SDLoc & dl)247 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
248                                          SDValue Chain, ISD::ArgFlagsTy Flags,
249                                          SelectionDAG &DAG, const SDLoc &dl) {
250   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
251   return DAG.getMemcpy(
252       Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
253       /*isVolatile=*/false, /*AlwaysInline=*/false,
254       /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
255 }
256 
257 bool
CanLowerReturn(CallingConv::ID CallConv,MachineFunction & MF,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,LLVMContext & Context,const Type * RetTy) const258 HexagonTargetLowering::CanLowerReturn(
259     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
260     const SmallVectorImpl<ISD::OutputArg> &Outs,
261     LLVMContext &Context, const Type *RetTy) const {
262   SmallVector<CCValAssign, 16> RVLocs;
263   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
264 
265   if (MF.getSubtarget<HexagonSubtarget>().useHVXOps())
266     return CCInfo.CheckReturn(Outs, RetCC_Hexagon_HVX);
267   return CCInfo.CheckReturn(Outs, RetCC_Hexagon);
268 }
269 
270 // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
271 // passed by value, the function prototype is modified to return void and
272 // the value is stored in memory pointed by a pointer passed by caller.
273 SDValue
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & dl,SelectionDAG & DAG) const274 HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
275                                    bool IsVarArg,
276                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
277                                    const SmallVectorImpl<SDValue> &OutVals,
278                                    const SDLoc &dl, SelectionDAG &DAG) const {
279   // CCValAssign - represent the assignment of the return value to locations.
280   SmallVector<CCValAssign, 16> RVLocs;
281 
282   // CCState - Info about the registers and stack slot.
283   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
284                  *DAG.getContext());
285 
286   // Analyze return values of ISD::RET
287   if (Subtarget.useHVXOps())
288     CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon_HVX);
289   else
290     CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
291 
292   SDValue Glue;
293   SmallVector<SDValue, 4> RetOps(1, Chain);
294 
295   // Copy the result values into the output registers.
296   for (unsigned i = 0; i != RVLocs.size(); ++i) {
297     CCValAssign &VA = RVLocs[i];
298     SDValue Val = OutVals[i];
299 
300     switch (VA.getLocInfo()) {
301       default:
302         // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
303         llvm_unreachable("Unknown loc info!");
304       case CCValAssign::Full:
305         break;
306       case CCValAssign::BCvt:
307         Val = DAG.getBitcast(VA.getLocVT(), Val);
308         break;
309       case CCValAssign::SExt:
310         Val = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Val);
311         break;
312       case CCValAssign::ZExt:
313         Val = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Val);
314         break;
315       case CCValAssign::AExt:
316         Val = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Val);
317         break;
318     }
319 
320     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Val, Glue);
321 
322     // Guarantee that all emitted copies are stuck together with flags.
323     Glue = Chain.getValue(1);
324     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
325   }
326 
327   RetOps[0] = Chain;  // Update chain.
328 
329   // Add the glue if we have it.
330   if (Glue.getNode())
331     RetOps.push_back(Glue);
332 
333   return DAG.getNode(HexagonISD::RET_GLUE, dl, MVT::Other, RetOps);
334 }
335 
mayBeEmittedAsTailCall(const CallInst * CI) const336 bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
337   // If either no tail call or told not to tail call at all, don't.
338   return CI->isTailCall();
339 }
340 
getRegisterByName(const char * RegName,LLT VT,const MachineFunction &) const341 Register HexagonTargetLowering::getRegisterByName(
342       const char* RegName, LLT VT, const MachineFunction &) const {
343   // Just support r19, the linux kernel uses it.
344   Register Reg = StringSwitch<Register>(RegName)
345                      .Case("r0", Hexagon::R0)
346                      .Case("r1", Hexagon::R1)
347                      .Case("r2", Hexagon::R2)
348                      .Case("r3", Hexagon::R3)
349                      .Case("r4", Hexagon::R4)
350                      .Case("r5", Hexagon::R5)
351                      .Case("r6", Hexagon::R6)
352                      .Case("r7", Hexagon::R7)
353                      .Case("r8", Hexagon::R8)
354                      .Case("r9", Hexagon::R9)
355                      .Case("r10", Hexagon::R10)
356                      .Case("r11", Hexagon::R11)
357                      .Case("r12", Hexagon::R12)
358                      .Case("r13", Hexagon::R13)
359                      .Case("r14", Hexagon::R14)
360                      .Case("r15", Hexagon::R15)
361                      .Case("r16", Hexagon::R16)
362                      .Case("r17", Hexagon::R17)
363                      .Case("r18", Hexagon::R18)
364                      .Case("r19", Hexagon::R19)
365                      .Case("r20", Hexagon::R20)
366                      .Case("r21", Hexagon::R21)
367                      .Case("r22", Hexagon::R22)
368                      .Case("r23", Hexagon::R23)
369                      .Case("r24", Hexagon::R24)
370                      .Case("r25", Hexagon::R25)
371                      .Case("r26", Hexagon::R26)
372                      .Case("r27", Hexagon::R27)
373                      .Case("r28", Hexagon::R28)
374                      .Case("r29", Hexagon::R29)
375                      .Case("r30", Hexagon::R30)
376                      .Case("r31", Hexagon::R31)
377                      .Case("r1:0", Hexagon::D0)
378                      .Case("r3:2", Hexagon::D1)
379                      .Case("r5:4", Hexagon::D2)
380                      .Case("r7:6", Hexagon::D3)
381                      .Case("r9:8", Hexagon::D4)
382                      .Case("r11:10", Hexagon::D5)
383                      .Case("r13:12", Hexagon::D6)
384                      .Case("r15:14", Hexagon::D7)
385                      .Case("r17:16", Hexagon::D8)
386                      .Case("r19:18", Hexagon::D9)
387                      .Case("r21:20", Hexagon::D10)
388                      .Case("r23:22", Hexagon::D11)
389                      .Case("r25:24", Hexagon::D12)
390                      .Case("r27:26", Hexagon::D13)
391                      .Case("r29:28", Hexagon::D14)
392                      .Case("r31:30", Hexagon::D15)
393                      .Case("sp", Hexagon::R29)
394                      .Case("fp", Hexagon::R30)
395                      .Case("lr", Hexagon::R31)
396                      .Case("p0", Hexagon::P0)
397                      .Case("p1", Hexagon::P1)
398                      .Case("p2", Hexagon::P2)
399                      .Case("p3", Hexagon::P3)
400                      .Case("sa0", Hexagon::SA0)
401                      .Case("lc0", Hexagon::LC0)
402                      .Case("sa1", Hexagon::SA1)
403                      .Case("lc1", Hexagon::LC1)
404                      .Case("m0", Hexagon::M0)
405                      .Case("m1", Hexagon::M1)
406                      .Case("usr", Hexagon::USR)
407                      .Case("ugp", Hexagon::UGP)
408                      .Case("cs0", Hexagon::CS0)
409                      .Case("cs1", Hexagon::CS1)
410                      .Default(Register());
411   return Reg;
412 }
413 
414 /// LowerCallResult - Lower the result values of an ISD::CALL into the
415 /// appropriate copies out of appropriate physical registers.  This assumes that
416 /// Chain/Glue are the input chain/glue to use, and that TheCall is the call
417 /// being lowered. Returns a SDNode with the same number of values as the
418 /// ISD::CALL.
LowerCallResult(SDValue Chain,SDValue Glue,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & dl,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals,const SmallVectorImpl<SDValue> & OutVals,SDValue Callee) const419 SDValue HexagonTargetLowering::LowerCallResult(
420     SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool IsVarArg,
421     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
422     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
423     const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const {
424   // Assign locations to each value returned by this call.
425   SmallVector<CCValAssign, 16> RVLocs;
426 
427   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
428                  *DAG.getContext());
429 
430   if (Subtarget.useHVXOps())
431     CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon_HVX);
432   else
433     CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
434 
435   // Copy all of the result registers out of their specified physreg.
436   for (unsigned i = 0; i != RVLocs.size(); ++i) {
437     SDValue RetVal;
438     if (RVLocs[i].getValVT() == MVT::i1) {
439       // Return values of type MVT::i1 require special handling. The reason
440       // is that MVT::i1 is associated with the PredRegs register class, but
441       // values of that type are still returned in R0. Generate an explicit
442       // copy into a predicate register from R0, and treat the value of the
443       // predicate register as the call result.
444       auto &MRI = DAG.getMachineFunction().getRegInfo();
445       SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
446                                        MVT::i32, Glue);
447       // FR0 = (Value, Chain, Glue)
448       Register PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
449       SDValue TPR = DAG.getCopyToReg(FR0.getValue(1), dl, PredR,
450                                      FR0.getValue(0), FR0.getValue(2));
451       // TPR = (Chain, Glue)
452       // Don't glue this CopyFromReg, because it copies from a virtual
453       // register. If it is glued to the call, InstrEmitter will add it
454       // as an implicit def to the call (EmitMachineNode).
455       RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1);
456       Glue = TPR.getValue(1);
457       Chain = TPR.getValue(0);
458     } else {
459       RetVal = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
460                                   RVLocs[i].getValVT(), Glue);
461       Glue = RetVal.getValue(2);
462       Chain = RetVal.getValue(1);
463     }
464     InVals.push_back(RetVal.getValue(0));
465   }
466 
467   return Chain;
468 }
469 
470 /// LowerCall - Functions arguments are copied from virtual regs to
471 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
472 SDValue
LowerCall(TargetLowering::CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const473 HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
474                                  SmallVectorImpl<SDValue> &InVals) const {
475   SelectionDAG &DAG                     = CLI.DAG;
476   SDLoc &dl                             = CLI.DL;
477   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
478   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
479   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
480   SDValue Chain                         = CLI.Chain;
481   SDValue Callee                        = CLI.Callee;
482   CallingConv::ID CallConv              = CLI.CallConv;
483   bool IsVarArg                         = CLI.IsVarArg;
484   bool DoesNotReturn                    = CLI.DoesNotReturn;
485 
486   bool IsStructRet    = Outs.empty() ? false : Outs[0].Flags.isSRet();
487   MachineFunction &MF = DAG.getMachineFunction();
488   MachineFrameInfo &MFI = MF.getFrameInfo();
489   auto PtrVT = getPointerTy(MF.getDataLayout());
490 
491   unsigned NumParams = CLI.CB ? CLI.CB->getFunctionType()->getNumParams() : 0;
492   if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee))
493     Callee = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, MVT::i32);
494 
495   // Linux ABI treats var-arg calls the same way as regular ones.
496   bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
497 
498   // Analyze operands of the call, assigning locations to each operand.
499   SmallVector<CCValAssign, 16> ArgLocs;
500   HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs, *DAG.getContext(),
501                         NumParams);
502 
503   if (Subtarget.useHVXOps())
504     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_HVX);
505   else if (DisableArgsMinAlignment)
506     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_Legacy);
507   else
508     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
509 
510   if (CLI.IsTailCall) {
511     bool StructAttrFlag = MF.getFunction().hasStructRetAttr();
512     CLI.IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
513                         IsVarArg, IsStructRet, StructAttrFlag, Outs,
514                         OutVals, Ins, DAG);
515     for (const CCValAssign &VA : ArgLocs) {
516       if (VA.isMemLoc()) {
517         CLI.IsTailCall = false;
518         break;
519       }
520     }
521     LLVM_DEBUG(dbgs() << (CLI.IsTailCall ? "Eligible for Tail Call\n"
522                                          : "Argument must be passed on stack. "
523                                            "Not eligible for Tail Call\n"));
524   }
525   // Get a count of how many bytes are to be pushed on the stack.
526   unsigned NumBytes = CCInfo.getStackSize();
527   SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
528   SmallVector<SDValue, 8> MemOpChains;
529 
530   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
531   SDValue StackPtr =
532       DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
533 
534   bool NeedsArgAlign = false;
535   Align LargestAlignSeen;
536   // Walk the register/memloc assignments, inserting copies/loads.
537   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
538     CCValAssign &VA = ArgLocs[i];
539     SDValue Arg = OutVals[i];
540     ISD::ArgFlagsTy Flags = Outs[i].Flags;
541     // Record if we need > 8 byte alignment on an argument.
542     bool ArgAlign = Subtarget.isHVXVectorType(VA.getValVT());
543     NeedsArgAlign |= ArgAlign;
544 
545     // Promote the value if needed.
546     switch (VA.getLocInfo()) {
547       default:
548         // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
549         llvm_unreachable("Unknown loc info!");
550       case CCValAssign::Full:
551         break;
552       case CCValAssign::BCvt:
553         Arg = DAG.getBitcast(VA.getLocVT(), Arg);
554         break;
555       case CCValAssign::SExt:
556         Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
557         break;
558       case CCValAssign::ZExt:
559         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
560         break;
561       case CCValAssign::AExt:
562         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
563         break;
564     }
565 
566     if (VA.isMemLoc()) {
567       unsigned LocMemOffset = VA.getLocMemOffset();
568       SDValue MemAddr = DAG.getConstant(LocMemOffset, dl,
569                                         StackPtr.getValueType());
570       MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
571       if (ArgAlign)
572         LargestAlignSeen = std::max(
573             LargestAlignSeen, Align(VA.getLocVT().getStoreSizeInBits() / 8));
574       if (Flags.isByVal()) {
575         // The argument is a struct passed by value. According to LLVM, "Arg"
576         // is a pointer.
577         MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
578                                                         Flags, DAG, dl));
579       } else {
580         MachinePointerInfo LocPI = MachinePointerInfo::getStack(
581             DAG.getMachineFunction(), LocMemOffset);
582         SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI);
583         MemOpChains.push_back(S);
584       }
585       continue;
586     }
587 
588     // Arguments that can be passed on register must be kept at RegsToPass
589     // vector.
590     if (VA.isRegLoc())
591       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
592   }
593 
594   if (NeedsArgAlign && Subtarget.hasV60Ops()) {
595     LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
596     Align VecAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
597     LargestAlignSeen = std::max(LargestAlignSeen, VecAlign);
598     MFI.ensureMaxAlignment(LargestAlignSeen);
599   }
600   // Transform all store nodes into one single node because all store
601   // nodes are independent of each other.
602   if (!MemOpChains.empty())
603     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
604 
605   SDValue Glue;
606   if (!CLI.IsTailCall) {
607     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
608     Glue = Chain.getValue(1);
609   }
610 
611   // Build a sequence of copy-to-reg nodes chained together with token
612   // chain and flag operands which copy the outgoing args into registers.
613   // The Glue is necessary since all emitted instructions must be
614   // stuck together.
615   if (!CLI.IsTailCall) {
616     for (const auto &R : RegsToPass) {
617       Chain = DAG.getCopyToReg(Chain, dl, R.first, R.second, Glue);
618       Glue = Chain.getValue(1);
619     }
620   } else {
621     // For tail calls lower the arguments to the 'real' stack slot.
622     //
623     // Force all the incoming stack arguments to be loaded from the stack
624     // before any new outgoing arguments are stored to the stack, because the
625     // outgoing stack slots may alias the incoming argument stack slots, and
626     // the alias isn't otherwise explicit. This is slightly more conservative
627     // than necessary, because it means that each store effectively depends
628     // on every argument instead of just those arguments it would clobber.
629     //
630     // Do not flag preceding copytoreg stuff together with the following stuff.
631     Glue = SDValue();
632     for (const auto &R : RegsToPass) {
633       Chain = DAG.getCopyToReg(Chain, dl, R.first, R.second, Glue);
634       Glue = Chain.getValue(1);
635     }
636     Glue = SDValue();
637   }
638 
639   bool LongCalls = MF.getSubtarget<HexagonSubtarget>().useLongCalls();
640   unsigned Flags = LongCalls ? HexagonII::HMOTF_ConstExtended : 0;
641 
642   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
643   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
644   // node so that legalize doesn't hack it.
645   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
646     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT, 0, Flags);
647   } else if (ExternalSymbolSDNode *S =
648              dyn_cast<ExternalSymbolSDNode>(Callee)) {
649     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, Flags);
650   }
651 
652   // Returns a chain & a flag for retval copy to use.
653   SmallVector<SDValue, 8> Ops;
654   Ops.push_back(Chain);
655   Ops.push_back(Callee);
656 
657   // Add argument registers to the end of the list so that they are
658   // known live into the call.
659   for (const auto &R : RegsToPass)
660     Ops.push_back(DAG.getRegister(R.first, R.second.getValueType()));
661 
662   const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallConv);
663   assert(Mask && "Missing call preserved mask for calling convention");
664   Ops.push_back(DAG.getRegisterMask(Mask));
665 
666   if (Glue.getNode())
667     Ops.push_back(Glue);
668 
669   if (CLI.IsTailCall) {
670     MFI.setHasTailCall();
671     return DAG.getNode(HexagonISD::TC_RETURN, dl, MVT::Other, Ops);
672   }
673 
674   // Set this here because we need to know this for "hasFP" in frame lowering.
675   // The target-independent code calls getFrameRegister before setting it, and
676   // getFrameRegister uses hasFP to determine whether the function has FP.
677   MFI.setHasCalls(true);
678 
679   unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL;
680   Chain = DAG.getNode(OpCode, dl, {MVT::Other, MVT::Glue}, Ops);
681   Glue = Chain.getValue(1);
682 
683   // Create the CALLSEQ_END node.
684   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, dl);
685   Glue = Chain.getValue(1);
686 
687   // Handle result values, copying them out of physregs into vregs that we
688   // return.
689   return LowerCallResult(Chain, Glue, CallConv, IsVarArg, Ins, dl, DAG,
690                          InVals, OutVals, Callee);
691 }
692 
693 /// Returns true by value, base pointer and offset pointer and addressing
694 /// mode by reference if this node can be combined with a load / store to
695 /// form a post-indexed load / store.
getPostIndexedAddressParts(SDNode * N,SDNode * Op,SDValue & Base,SDValue & Offset,ISD::MemIndexedMode & AM,SelectionDAG & DAG) const696 bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
697       SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM,
698       SelectionDAG &DAG) const {
699   LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N);
700   if (!LSN)
701     return false;
702   EVT VT = LSN->getMemoryVT();
703   if (!VT.isSimple())
704     return false;
705   bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
706                      VT == MVT::i64 || VT == MVT::f32 || VT == MVT::f64 ||
707                      VT == MVT::v2i16 || VT == MVT::v2i32 || VT == MVT::v4i8 ||
708                      VT == MVT::v4i16 || VT == MVT::v8i8 ||
709                      Subtarget.isHVXVectorType(VT.getSimpleVT());
710   if (!IsLegalType)
711     return false;
712 
713   if (Op->getOpcode() != ISD::ADD)
714     return false;
715   Base = Op->getOperand(0);
716   Offset = Op->getOperand(1);
717   if (!isa<ConstantSDNode>(Offset.getNode()))
718     return false;
719   AM = ISD::POST_INC;
720 
721   int32_t V = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
722   return Subtarget.getInstrInfo()->isValidAutoIncImm(VT, V);
723 }
724 
LowerFDIV(SDValue Op,SelectionDAG & DAG) const725 SDValue HexagonTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
726   if (DAG.getMachineFunction().getFunction().hasOptSize())
727     return SDValue();
728   else
729     return Op;
730 }
731 
732 SDValue
LowerINLINEASM(SDValue Op,SelectionDAG & DAG) const733 HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
734   MachineFunction &MF = DAG.getMachineFunction();
735   auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
736   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
737   unsigned LR = HRI.getRARegister();
738 
739   if ((Op.getOpcode() != ISD::INLINEASM &&
740        Op.getOpcode() != ISD::INLINEASM_BR) || HMFI.hasClobberLR())
741     return Op;
742 
743   unsigned NumOps = Op.getNumOperands();
744   if (Op.getOperand(NumOps-1).getValueType() == MVT::Glue)
745     --NumOps;  // Ignore the flag operand.
746 
747   for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
748     const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
749     unsigned NumVals = Flags.getNumOperandRegisters();
750     ++i;  // Skip the ID value.
751 
752     switch (Flags.getKind()) {
753     default:
754       llvm_unreachable("Bad flags!");
755     case InlineAsm::Kind::RegUse:
756     case InlineAsm::Kind::Imm:
757     case InlineAsm::Kind::Mem:
758       i += NumVals;
759       break;
760     case InlineAsm::Kind::Clobber:
761     case InlineAsm::Kind::RegDef:
762     case InlineAsm::Kind::RegDefEarlyClobber: {
763       for (; NumVals; --NumVals, ++i) {
764         Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
765         if (Reg != LR)
766           continue;
767         HMFI.setHasClobberLR(true);
768         return Op;
769       }
770       break;
771       }
772       }
773   }
774 
775   return Op;
776 }
777 
778 // Need to transform ISD::PREFETCH into something that doesn't inherit
779 // all of the properties of ISD::PREFETCH, specifically SDNPMayLoad and
780 // SDNPMayStore.
LowerPREFETCH(SDValue Op,SelectionDAG & DAG) const781 SDValue HexagonTargetLowering::LowerPREFETCH(SDValue Op,
782                                              SelectionDAG &DAG) const {
783   SDValue Chain = Op.getOperand(0);
784   SDValue Addr = Op.getOperand(1);
785   // Lower it to DCFETCH($reg, #0).  A "pat" will try to merge the offset in,
786   // if the "reg" is fed by an "add".
787   SDLoc DL(Op);
788   SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
789   return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
790 }
791 
792 // Custom-handle ISD::READCYCLECOUNTER because the target-independent SDNode
793 // is marked as having side-effects, while the register read on Hexagon does
794 // not have any. TableGen refuses to accept the direct pattern from that node
795 // to the A4_tfrcpp.
LowerREADCYCLECOUNTER(SDValue Op,SelectionDAG & DAG) const796 SDValue HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
797                                                      SelectionDAG &DAG) const {
798   SDValue Chain = Op.getOperand(0);
799   SDLoc dl(Op);
800   SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
801   return DAG.getNode(HexagonISD::READCYCLE, dl, VTs, Chain);
802 }
803 
804 // Custom-handle ISD::READSTEADYCOUNTER because the target-independent SDNode
805 // is marked as having side-effects, while the register read on Hexagon does
806 // not have any. TableGen refuses to accept the direct pattern from that node
807 // to the A4_tfrcpp.
LowerREADSTEADYCOUNTER(SDValue Op,SelectionDAG & DAG) const808 SDValue HexagonTargetLowering::LowerREADSTEADYCOUNTER(SDValue Op,
809                                                       SelectionDAG &DAG) const {
810   SDValue Chain = Op.getOperand(0);
811   SDLoc dl(Op);
812   SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
813   return DAG.getNode(HexagonISD::READTIMER, dl, VTs, Chain);
814 }
815 
LowerINTRINSIC_VOID(SDValue Op,SelectionDAG & DAG) const816 SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
817       SelectionDAG &DAG) const {
818   SDValue Chain = Op.getOperand(0);
819   unsigned IntNo = Op.getConstantOperandVal(1);
820   // Lower the hexagon_prefetch builtin to DCFETCH, as above.
821   if (IntNo == Intrinsic::hexagon_prefetch) {
822     SDValue Addr = Op.getOperand(2);
823     SDLoc DL(Op);
824     SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
825     return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
826   }
827   return SDValue();
828 }
829 
830 SDValue
LowerDYNAMIC_STACKALLOC(SDValue Op,SelectionDAG & DAG) const831 HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
832                                                SelectionDAG &DAG) const {
833   SDValue Chain = Op.getOperand(0);
834   SDValue Size = Op.getOperand(1);
835   SDValue Align = Op.getOperand(2);
836   SDLoc dl(Op);
837 
838   ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Align);
839   assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC");
840 
841   unsigned A = AlignConst->getSExtValue();
842   auto &HFI = *Subtarget.getFrameLowering();
843   // "Zero" means natural stack alignment.
844   if (A == 0)
845     A = HFI.getStackAlign().value();
846 
847   LLVM_DEBUG({
848     dbgs () << __func__ << " Align: " << A << " Size: ";
849     Size.getNode()->dump(&DAG);
850     dbgs() << "\n";
851   });
852 
853   SDValue AC = DAG.getConstant(A, dl, MVT::i32);
854   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
855   SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
856 
857   DAG.ReplaceAllUsesOfValueWith(Op, AA);
858   return AA;
859 }
860 
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & dl,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const861 SDValue HexagonTargetLowering::LowerFormalArguments(
862     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
863     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
864     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
865   MachineFunction &MF = DAG.getMachineFunction();
866   MachineFrameInfo &MFI = MF.getFrameInfo();
867   MachineRegisterInfo &MRI = MF.getRegInfo();
868 
869   // Linux ABI treats var-arg calls the same way as regular ones.
870   bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
871 
872   // Assign locations to all of the incoming arguments.
873   SmallVector<CCValAssign, 16> ArgLocs;
874   HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs,
875                         *DAG.getContext(),
876                         MF.getFunction().getFunctionType()->getNumParams());
877 
878   if (Subtarget.useHVXOps())
879     CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_HVX);
880   else if (DisableArgsMinAlignment)
881     CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_Legacy);
882   else
883     CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
884 
885   // For LLVM, in the case when returning a struct by value (>8byte),
886   // the first argument is a pointer that points to the location on caller's
887   // stack where the return value will be stored. For Hexagon, the location on
888   // caller's stack is passed only when the struct size is smaller than (and
889   // equal to) 8 bytes. If not, no address will be passed into callee and
890   // callee return the result directly through R0/R1.
891   auto NextSingleReg = [] (const TargetRegisterClass &RC, unsigned Reg) {
892     switch (RC.getID()) {
893     case Hexagon::IntRegsRegClassID:
894       return Reg - Hexagon::R0 + 1;
895     case Hexagon::DoubleRegsRegClassID:
896       return (Reg - Hexagon::D0 + 1) * 2;
897     case Hexagon::HvxVRRegClassID:
898       return Reg - Hexagon::V0 + 1;
899     case Hexagon::HvxWRRegClassID:
900       return (Reg - Hexagon::W0 + 1) * 2;
901     }
902     llvm_unreachable("Unexpected register class");
903   };
904 
905   auto &HFL = const_cast<HexagonFrameLowering&>(*Subtarget.getFrameLowering());
906   auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
907   HFL.FirstVarArgSavedReg = 0;
908   HMFI.setFirstNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
909 
910   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
911     CCValAssign &VA = ArgLocs[i];
912     ISD::ArgFlagsTy Flags = Ins[i].Flags;
913     bool ByVal = Flags.isByVal();
914 
915     // Arguments passed in registers:
916     // 1. 32- and 64-bit values and HVX vectors are passed directly,
917     // 2. Large structs are passed via an address, and the address is
918     //    passed in a register.
919     if (VA.isRegLoc() && ByVal && Flags.getByValSize() <= 8)
920       llvm_unreachable("ByValSize must be bigger than 8 bytes");
921 
922     bool InReg = VA.isRegLoc() &&
923                  (!ByVal || (ByVal && Flags.getByValSize() > 8));
924 
925     if (InReg) {
926       MVT RegVT = VA.getLocVT();
927       if (VA.getLocInfo() == CCValAssign::BCvt)
928         RegVT = VA.getValVT();
929 
930       const TargetRegisterClass *RC = getRegClassFor(RegVT);
931       Register VReg = MRI.createVirtualRegister(RC);
932       SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
933 
934       // Treat values of type MVT::i1 specially: they are passed in
935       // registers of type i32, but they need to remain as values of
936       // type i1 for consistency of the argument lowering.
937       if (VA.getValVT() == MVT::i1) {
938         assert(RegVT.getSizeInBits() <= 32);
939         SDValue T = DAG.getNode(ISD::AND, dl, RegVT,
940                                 Copy, DAG.getConstant(1, dl, RegVT));
941         Copy = DAG.getSetCC(dl, MVT::i1, T, DAG.getConstant(0, dl, RegVT),
942                             ISD::SETNE);
943       } else {
944 #ifndef NDEBUG
945         unsigned RegSize = RegVT.getSizeInBits();
946         assert(RegSize == 32 || RegSize == 64 ||
947                Subtarget.isHVXVectorType(RegVT));
948 #endif
949       }
950       InVals.push_back(Copy);
951       MRI.addLiveIn(VA.getLocReg(), VReg);
952       HFL.FirstVarArgSavedReg = NextSingleReg(*RC, VA.getLocReg());
953     } else {
954       assert(VA.isMemLoc() && "Argument should be passed in memory");
955 
956       // If it's a byval parameter, then we need to compute the
957       // "real" size, not the size of the pointer.
958       unsigned ObjSize = Flags.isByVal()
959                             ? Flags.getByValSize()
960                             : VA.getLocVT().getStoreSizeInBits() / 8;
961 
962       // Create the frame index object for this incoming parameter.
963       int Offset = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
964       int FI = MFI.CreateFixedObject(ObjSize, Offset, true);
965       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
966 
967       if (Flags.isByVal()) {
968         // If it's a pass-by-value aggregate, then do not dereference the stack
969         // location. Instead, we should generate a reference to the stack
970         // location.
971         InVals.push_back(FIN);
972       } else {
973         SDValue L = DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
974                                 MachinePointerInfo::getFixedStack(MF, FI, 0));
975         InVals.push_back(L);
976       }
977     }
978   }
979 
980   if (IsVarArg && Subtarget.isEnvironmentMusl()) {
981     for (int i = HFL.FirstVarArgSavedReg; i < 6; i++)
982       MRI.addLiveIn(Hexagon::R0+i);
983   }
984 
985   if (IsVarArg && Subtarget.isEnvironmentMusl()) {
986     HMFI.setFirstNamedArgFrameIndex(HMFI.getFirstNamedArgFrameIndex() - 1);
987     HMFI.setLastNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
988 
989     // Create Frame index for the start of register saved area.
990     int NumVarArgRegs = 6 - HFL.FirstVarArgSavedReg;
991     bool RequiresPadding = (NumVarArgRegs & 1);
992     int RegSaveAreaSizePlusPadding = RequiresPadding
993                                         ? (NumVarArgRegs + 1) * 4
994                                         : NumVarArgRegs * 4;
995 
996     if (RegSaveAreaSizePlusPadding > 0) {
997       // The offset to saved register area should be 8 byte aligned.
998       int RegAreaStart = HEXAGON_LRFP_SIZE + CCInfo.getStackSize();
999       if (!(RegAreaStart % 8))
1000         RegAreaStart = (RegAreaStart + 7) & -8;
1001 
1002       int RegSaveAreaFrameIndex =
1003         MFI.CreateFixedObject(RegSaveAreaSizePlusPadding, RegAreaStart, true);
1004       HMFI.setRegSavedAreaStartFrameIndex(RegSaveAreaFrameIndex);
1005 
1006       // This will point to the next argument passed via stack.
1007       int Offset = RegAreaStart + RegSaveAreaSizePlusPadding;
1008       int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
1009       HMFI.setVarArgsFrameIndex(FI);
1010     } else {
1011       // This will point to the next argument passed via stack, when
1012       // there is no saved register area.
1013       int Offset = HEXAGON_LRFP_SIZE + CCInfo.getStackSize();
1014       int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
1015       HMFI.setRegSavedAreaStartFrameIndex(FI);
1016       HMFI.setVarArgsFrameIndex(FI);
1017     }
1018   }
1019 
1020 
1021   if (IsVarArg && !Subtarget.isEnvironmentMusl()) {
1022     // This will point to the next argument passed via stack.
1023     int Offset = HEXAGON_LRFP_SIZE + CCInfo.getStackSize();
1024     int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
1025     HMFI.setVarArgsFrameIndex(FI);
1026   }
1027 
1028   return Chain;
1029 }
1030 
1031 SDValue
LowerVASTART(SDValue Op,SelectionDAG & DAG) const1032 HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1033   // VASTART stores the address of the VarArgsFrameIndex slot into the
1034   // memory location argument.
1035   MachineFunction &MF = DAG.getMachineFunction();
1036   HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
1037   SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
1038   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1039 
1040   if (!Subtarget.isEnvironmentMusl()) {
1041     return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1),
1042                         MachinePointerInfo(SV));
1043   }
1044   auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
1045   auto &HFL = *Subtarget.getFrameLowering();
1046   SDLoc DL(Op);
1047   SmallVector<SDValue, 8> MemOps;
1048 
1049   // Get frame index of va_list.
1050   SDValue FIN = Op.getOperand(1);
1051 
1052   // If first Vararg register is odd, add 4 bytes to start of
1053   // saved register area to point to the first register location.
1054   // This is because the saved register area has to be 8 byte aligned.
1055   // In case of an odd start register, there will be 4 bytes of padding in
1056   // the beginning of saved register area. If all registers area used up,
1057   // the following condition will handle it correctly.
1058   SDValue SavedRegAreaStartFrameIndex =
1059     DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(), MVT::i32);
1060 
1061   auto PtrVT = getPointerTy(DAG.getDataLayout());
1062 
1063   if (HFL.FirstVarArgSavedReg & 1)
1064     SavedRegAreaStartFrameIndex =
1065       DAG.getNode(ISD::ADD, DL, PtrVT,
1066                   DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(),
1067                                     MVT::i32),
1068                   DAG.getIntPtrConstant(4, DL));
1069 
1070   // Store the saved register area start pointer.
1071   SDValue Store =
1072     DAG.getStore(Op.getOperand(0), DL,
1073                  SavedRegAreaStartFrameIndex,
1074                  FIN, MachinePointerInfo(SV));
1075   MemOps.push_back(Store);
1076 
1077   // Store saved register area end pointer.
1078   FIN = DAG.getNode(ISD::ADD, DL, PtrVT,
1079                     FIN, DAG.getIntPtrConstant(4, DL));
1080   Store = DAG.getStore(Op.getOperand(0), DL,
1081                        DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(),
1082                                          PtrVT),
1083                        FIN, MachinePointerInfo(SV, 4));
1084   MemOps.push_back(Store);
1085 
1086   // Store overflow area pointer.
1087   FIN = DAG.getNode(ISD::ADD, DL, PtrVT,
1088                     FIN, DAG.getIntPtrConstant(4, DL));
1089   Store = DAG.getStore(Op.getOperand(0), DL,
1090                        DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(),
1091                                          PtrVT),
1092                        FIN, MachinePointerInfo(SV, 8));
1093   MemOps.push_back(Store);
1094 
1095   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1096 }
1097 
1098 SDValue
LowerVACOPY(SDValue Op,SelectionDAG & DAG) const1099 HexagonTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
1100   // Assert that the linux ABI is enabled for the current compilation.
1101   assert(Subtarget.isEnvironmentMusl() && "Linux ABI should be enabled");
1102   SDValue Chain = Op.getOperand(0);
1103   SDValue DestPtr = Op.getOperand(1);
1104   SDValue SrcPtr = Op.getOperand(2);
1105   const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
1106   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
1107   SDLoc DL(Op);
1108   // Size of the va_list is 12 bytes as it has 3 pointers. Therefore,
1109   // we need to memcopy 12 bytes from va_list to another similar list.
1110   return DAG.getMemcpy(
1111       Chain, DL, DestPtr, SrcPtr, DAG.getIntPtrConstant(12, DL), Align(4),
1112       /*isVolatile*/ false, false, /*CI=*/nullptr, std::nullopt,
1113       MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
1114 }
1115 
LowerSETCC(SDValue Op,SelectionDAG & DAG) const1116 SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
1117   const SDLoc &dl(Op);
1118   SDValue LHS = Op.getOperand(0);
1119   SDValue RHS = Op.getOperand(1);
1120   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1121   MVT ResTy = ty(Op);
1122   MVT OpTy = ty(LHS);
1123 
1124   if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
1125     MVT ElemTy = OpTy.getVectorElementType();
1126     assert(ElemTy.isScalarInteger());
1127     MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
1128                                   OpTy.getVectorNumElements());
1129     return DAG.getSetCC(dl, ResTy,
1130                         DAG.getSExtOrTrunc(LHS, SDLoc(LHS), WideTy),
1131                         DAG.getSExtOrTrunc(RHS, SDLoc(RHS), WideTy), CC);
1132   }
1133 
1134   // Treat all other vector types as legal.
1135   if (ResTy.isVector())
1136     return Op;
1137 
1138   // Comparisons of short integers should use sign-extend, not zero-extend,
1139   // since we can represent small negative values in the compare instructions.
1140   // The LLVM default is to use zero-extend arbitrarily in these cases.
1141   auto isSExtFree = [this](SDValue N) {
1142     switch (N.getOpcode()) {
1143       case ISD::TRUNCATE: {
1144         // A sign-extend of a truncate of a sign-extend is free.
1145         SDValue Op = N.getOperand(0);
1146         if (Op.getOpcode() != ISD::AssertSext)
1147           return false;
1148         EVT OrigTy = cast<VTSDNode>(Op.getOperand(1))->getVT();
1149         unsigned ThisBW = ty(N).getSizeInBits();
1150         unsigned OrigBW = OrigTy.getSizeInBits();
1151         // The type that was sign-extended to get the AssertSext must be
1152         // narrower than the type of N (so that N has still the same value
1153         // as the original).
1154         return ThisBW >= OrigBW;
1155       }
1156       case ISD::LOAD:
1157         // We have sign-extended loads.
1158         return true;
1159     }
1160     return false;
1161   };
1162 
1163   if (OpTy == MVT::i8 || OpTy == MVT::i16) {
1164     ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
1165     bool IsNegative = C && C->getAPIntValue().isNegative();
1166     if (IsNegative || isSExtFree(LHS) || isSExtFree(RHS))
1167       return DAG.getSetCC(dl, ResTy,
1168                           DAG.getSExtOrTrunc(LHS, SDLoc(LHS), MVT::i32),
1169                           DAG.getSExtOrTrunc(RHS, SDLoc(RHS), MVT::i32), CC);
1170   }
1171 
1172   return SDValue();
1173 }
1174 
1175 SDValue
LowerVSELECT(SDValue Op,SelectionDAG & DAG) const1176 HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
1177   SDValue PredOp = Op.getOperand(0);
1178   SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
1179   MVT OpTy = ty(Op1);
1180   const SDLoc &dl(Op);
1181 
1182   if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
1183     MVT ElemTy = OpTy.getVectorElementType();
1184     assert(ElemTy.isScalarInteger());
1185     MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
1186                                   OpTy.getVectorNumElements());
1187     // Generate (trunc (select (_, sext, sext))).
1188     return DAG.getSExtOrTrunc(
1189               DAG.getSelect(dl, WideTy, PredOp,
1190                             DAG.getSExtOrTrunc(Op1, dl, WideTy),
1191                             DAG.getSExtOrTrunc(Op2, dl, WideTy)),
1192               dl, OpTy);
1193   }
1194 
1195   return SDValue();
1196 }
1197 
1198 SDValue
LowerConstantPool(SDValue Op,SelectionDAG & DAG) const1199 HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
1200   EVT ValTy = Op.getValueType();
1201   ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
1202   Constant *CVal = nullptr;
1203   bool isVTi1Type = false;
1204   if (auto *CV = dyn_cast<ConstantVector>(CPN->getConstVal())) {
1205     if (cast<VectorType>(CV->getType())->getElementType()->isIntegerTy(1)) {
1206       IRBuilder<> IRB(CV->getContext());
1207       SmallVector<Constant*, 128> NewConst;
1208       unsigned VecLen = CV->getNumOperands();
1209       assert(isPowerOf2_32(VecLen) &&
1210              "conversion only supported for pow2 VectorSize");
1211       for (unsigned i = 0; i < VecLen; ++i)
1212         NewConst.push_back(IRB.getInt8(CV->getOperand(i)->isZeroValue()));
1213 
1214       CVal = ConstantVector::get(NewConst);
1215       isVTi1Type = true;
1216     }
1217   }
1218   Align Alignment = CPN->getAlign();
1219   bool IsPositionIndependent = isPositionIndependent();
1220   unsigned char TF = IsPositionIndependent ? HexagonII::MO_PCREL : 0;
1221 
1222   unsigned Offset = 0;
1223   SDValue T;
1224   if (CPN->isMachineConstantPoolEntry())
1225     T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Alignment,
1226                                   Offset, TF);
1227   else if (isVTi1Type)
1228     T = DAG.getTargetConstantPool(CVal, ValTy, Alignment, Offset, TF);
1229   else
1230     T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Alignment, Offset,
1231                                   TF);
1232 
1233   assert(cast<ConstantPoolSDNode>(T)->getTargetFlags() == TF &&
1234          "Inconsistent target flag encountered");
1235 
1236   if (IsPositionIndependent)
1237     return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T);
1238   return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T);
1239 }
1240 
1241 SDValue
LowerJumpTable(SDValue Op,SelectionDAG & DAG) const1242 HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1243   EVT VT = Op.getValueType();
1244   int Idx = cast<JumpTableSDNode>(Op)->getIndex();
1245   if (isPositionIndependent()) {
1246     SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
1247     return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T);
1248   }
1249 
1250   SDValue T = DAG.getTargetJumpTable(Idx, VT);
1251   return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T);
1252 }
1253 
1254 SDValue
LowerRETURNADDR(SDValue Op,SelectionDAG & DAG) const1255 HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
1256   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
1257   MachineFunction &MF = DAG.getMachineFunction();
1258   MachineFrameInfo &MFI = MF.getFrameInfo();
1259   MFI.setReturnAddressIsTaken(true);
1260 
1261   EVT VT = Op.getValueType();
1262   SDLoc dl(Op);
1263   unsigned Depth = Op.getConstantOperandVal(0);
1264   if (Depth) {
1265     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
1266     SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
1267     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
1268                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
1269                        MachinePointerInfo());
1270   }
1271 
1272   // Return LR, which contains the return address. Mark it an implicit live-in.
1273   Register Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32));
1274   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
1275 }
1276 
1277 SDValue
LowerFRAMEADDR(SDValue Op,SelectionDAG & DAG) const1278 HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
1279   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
1280   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1281   MFI.setFrameAddressIsTaken(true);
1282 
1283   EVT VT = Op.getValueType();
1284   SDLoc dl(Op);
1285   unsigned Depth = Op.getConstantOperandVal(0);
1286   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
1287                                          HRI.getFrameRegister(), VT);
1288   while (Depth--)
1289     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
1290                             MachinePointerInfo());
1291   return FrameAddr;
1292 }
1293 
1294 SDValue
LowerATOMIC_FENCE(SDValue Op,SelectionDAG & DAG) const1295 HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const {
1296   SDLoc dl(Op);
1297   return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
1298 }
1299 
1300 SDValue
LowerGLOBALADDRESS(SDValue Op,SelectionDAG & DAG) const1301 HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
1302   SDLoc dl(Op);
1303   auto *GAN = cast<GlobalAddressSDNode>(Op);
1304   auto PtrVT = getPointerTy(DAG.getDataLayout());
1305   auto *GV = GAN->getGlobal();
1306   int64_t Offset = GAN->getOffset();
1307 
1308   auto &HLOF = *HTM.getObjFileLowering();
1309   Reloc::Model RM = HTM.getRelocationModel();
1310 
1311   if (RM == Reloc::Static) {
1312     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
1313     const GlobalObject *GO = GV->getAliaseeObject();
1314     if (GO && Subtarget.useSmallData() && HLOF.isGlobalInSmallSection(GO, HTM))
1315       return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
1316     return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
1317   }
1318 
1319   bool UsePCRel = getTargetMachine().shouldAssumeDSOLocal(GV);
1320   if (UsePCRel) {
1321     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset,
1322                                             HexagonII::MO_PCREL);
1323     return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA);
1324   }
1325 
1326   // Use GOT index.
1327   SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
1328   SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT);
1329   SDValue Off = DAG.getConstant(Offset, dl, MVT::i32);
1330   return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off);
1331 }
1332 
1333 // Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
1334 SDValue
LowerBlockAddress(SDValue Op,SelectionDAG & DAG) const1335 HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
1336   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1337   SDLoc dl(Op);
1338   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1339 
1340   Reloc::Model RM = HTM.getRelocationModel();
1341   if (RM == Reloc::Static) {
1342     SDValue A = DAG.getTargetBlockAddress(BA, PtrVT);
1343     return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A);
1344   }
1345 
1346   SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL);
1347   return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A);
1348 }
1349 
1350 SDValue
LowerGLOBAL_OFFSET_TABLE(SDValue Op,SelectionDAG & DAG) const1351 HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
1352       const {
1353   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1354   SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, PtrVT,
1355                                                HexagonII::MO_PCREL);
1356   return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym);
1357 }
1358 
1359 SDValue
GetDynamicTLSAddr(SelectionDAG & DAG,SDValue Chain,GlobalAddressSDNode * GA,SDValue Glue,EVT PtrVT,unsigned ReturnReg,unsigned char OperandFlags) const1360 HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
1361       GlobalAddressSDNode *GA, SDValue Glue, EVT PtrVT, unsigned ReturnReg,
1362       unsigned char OperandFlags) const {
1363   MachineFunction &MF = DAG.getMachineFunction();
1364   MachineFrameInfo &MFI = MF.getFrameInfo();
1365   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1366   SDLoc dl(GA);
1367   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
1368                                            GA->getValueType(0),
1369                                            GA->getOffset(),
1370                                            OperandFlags);
1371   // Create Operands for the call.The Operands should have the following:
1372   // 1. Chain SDValue
1373   // 2. Callee which in this case is the Global address value.
1374   // 3. Registers live into the call.In this case its R0, as we
1375   //    have just one argument to be passed.
1376   // 4. Glue.
1377   // Note: The order is important.
1378 
1379   const auto &HRI = *Subtarget.getRegisterInfo();
1380   const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallingConv::C);
1381   assert(Mask && "Missing call preserved mask for calling convention");
1382   SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT),
1383                     DAG.getRegisterMask(Mask), Glue };
1384   Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
1385 
1386   // Inform MFI that function has calls.
1387   MFI.setAdjustsStack(true);
1388 
1389   Glue = Chain.getValue(1);
1390   return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
1391 }
1392 
1393 //
1394 // Lower using the initial executable model for TLS addresses
1395 //
1396 SDValue
LowerToTLSInitialExecModel(GlobalAddressSDNode * GA,SelectionDAG & DAG) const1397 HexagonTargetLowering::LowerToTLSInitialExecModel(GlobalAddressSDNode *GA,
1398       SelectionDAG &DAG) const {
1399   SDLoc dl(GA);
1400   int64_t Offset = GA->getOffset();
1401   auto PtrVT = getPointerTy(DAG.getDataLayout());
1402 
1403   // Get the thread pointer.
1404   SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
1405 
1406   bool IsPositionIndependent = isPositionIndependent();
1407   unsigned char TF =
1408       IsPositionIndependent ? HexagonII::MO_IEGOT : HexagonII::MO_IE;
1409 
1410   // First generate the TLS symbol address
1411   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT,
1412                                            Offset, TF);
1413 
1414   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1415 
1416   if (IsPositionIndependent) {
1417     // Generate the GOT pointer in case of position independent code
1418     SDValue GOT = LowerGLOBAL_OFFSET_TABLE(Sym, DAG);
1419 
1420     // Add the TLS Symbol address to GOT pointer.This gives
1421     // GOT relative relocation for the symbol.
1422     Sym = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
1423   }
1424 
1425   // Load the offset value for TLS symbol.This offset is relative to
1426   // thread pointer.
1427   SDValue LoadOffset =
1428       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Sym, MachinePointerInfo());
1429 
1430   // Address of the thread local variable is the add of thread
1431   // pointer and the offset of the variable.
1432   return DAG.getNode(ISD::ADD, dl, PtrVT, TP, LoadOffset);
1433 }
1434 
1435 //
1436 // Lower using the local executable model for TLS addresses
1437 //
1438 SDValue
LowerToTLSLocalExecModel(GlobalAddressSDNode * GA,SelectionDAG & DAG) const1439 HexagonTargetLowering::LowerToTLSLocalExecModel(GlobalAddressSDNode *GA,
1440       SelectionDAG &DAG) const {
1441   SDLoc dl(GA);
1442   int64_t Offset = GA->getOffset();
1443   auto PtrVT = getPointerTy(DAG.getDataLayout());
1444 
1445   // Get the thread pointer.
1446   SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
1447   // Generate the TLS symbol address
1448   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
1449                                            HexagonII::MO_TPREL);
1450   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1451 
1452   // Address of the thread local variable is the add of thread
1453   // pointer and the offset of the variable.
1454   return DAG.getNode(ISD::ADD, dl, PtrVT, TP, Sym);
1455 }
1456 
1457 //
1458 // Lower using the general dynamic model for TLS addresses
1459 //
1460 SDValue
LowerToTLSGeneralDynamicModel(GlobalAddressSDNode * GA,SelectionDAG & DAG) const1461 HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1462       SelectionDAG &DAG) const {
1463   SDLoc dl(GA);
1464   int64_t Offset = GA->getOffset();
1465   auto PtrVT = getPointerTy(DAG.getDataLayout());
1466 
1467   // First generate the TLS symbol address
1468   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
1469                                            HexagonII::MO_GDGOT);
1470 
1471   // Then, generate the GOT pointer
1472   SDValue GOT = LowerGLOBAL_OFFSET_TABLE(TGA, DAG);
1473 
1474   // Add the TLS symbol and the GOT pointer
1475   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1476   SDValue Chain = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
1477 
1478   // Copy over the argument to R0
1479   SDValue InGlue;
1480   Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InGlue);
1481   InGlue = Chain.getValue(1);
1482 
1483   unsigned Flags = DAG.getSubtarget<HexagonSubtarget>().useLongCalls()
1484                        ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended
1485                        : HexagonII::MO_GDPLT;
1486 
1487   return GetDynamicTLSAddr(DAG, Chain, GA, InGlue, PtrVT,
1488                            Hexagon::R0, Flags);
1489 }
1490 
1491 //
1492 // Lower TLS addresses.
1493 //
1494 // For now for dynamic models, we only support the general dynamic model.
1495 //
1496 SDValue
LowerGlobalTLSAddress(SDValue Op,SelectionDAG & DAG) const1497 HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1498       SelectionDAG &DAG) const {
1499   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1500 
1501   switch (HTM.getTLSModel(GA->getGlobal())) {
1502     case TLSModel::GeneralDynamic:
1503     case TLSModel::LocalDynamic:
1504       return LowerToTLSGeneralDynamicModel(GA, DAG);
1505     case TLSModel::InitialExec:
1506       return LowerToTLSInitialExecModel(GA, DAG);
1507     case TLSModel::LocalExec:
1508       return LowerToTLSLocalExecModel(GA, DAG);
1509   }
1510   llvm_unreachable("Bogus TLS model");
1511 }
1512 
1513 //===----------------------------------------------------------------------===//
1514 // TargetLowering Implementation
1515 //===----------------------------------------------------------------------===//
1516 
HexagonTargetLowering(const TargetMachine & TM,const HexagonSubtarget & ST)1517 HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
1518                                              const HexagonSubtarget &ST)
1519     : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
1520       Subtarget(ST) {
1521   auto &HRI = *Subtarget.getRegisterInfo();
1522 
1523   setPrefLoopAlignment(Align(16));
1524   setMinFunctionAlignment(Align(4));
1525   setPrefFunctionAlignment(Align(16));
1526   setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
1527   setBooleanContents(TargetLoweringBase::UndefinedBooleanContent);
1528   setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent);
1529 
1530   setMaxAtomicSizeInBitsSupported(64);
1531   setMinCmpXchgSizeInBits(32);
1532 
1533   if (EnableHexSDNodeSched)
1534     setSchedulingPreference(Sched::VLIW);
1535   else
1536     setSchedulingPreference(Sched::Source);
1537 
1538   // Limits for inline expansion of memcpy/memmove
1539   MaxStoresPerMemcpy = MaxStoresPerMemcpyCL;
1540   MaxStoresPerMemcpyOptSize = MaxStoresPerMemcpyOptSizeCL;
1541   MaxStoresPerMemmove = MaxStoresPerMemmoveCL;
1542   MaxStoresPerMemmoveOptSize = MaxStoresPerMemmoveOptSizeCL;
1543   MaxStoresPerMemset = MaxStoresPerMemsetCL;
1544   MaxStoresPerMemsetOptSize = MaxStoresPerMemsetOptSizeCL;
1545 
1546   //
1547   // Set up register classes.
1548   //
1549 
1550   addRegisterClass(MVT::i1,    &Hexagon::PredRegsRegClass);
1551   addRegisterClass(MVT::v2i1,  &Hexagon::PredRegsRegClass);  // bbbbaaaa
1552   addRegisterClass(MVT::v4i1,  &Hexagon::PredRegsRegClass);  // ddccbbaa
1553   addRegisterClass(MVT::v8i1,  &Hexagon::PredRegsRegClass);  // hgfedcba
1554   addRegisterClass(MVT::i32,   &Hexagon::IntRegsRegClass);
1555   addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
1556   addRegisterClass(MVT::v4i8,  &Hexagon::IntRegsRegClass);
1557   addRegisterClass(MVT::i64,   &Hexagon::DoubleRegsRegClass);
1558   addRegisterClass(MVT::v8i8,  &Hexagon::DoubleRegsRegClass);
1559   addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
1560   addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
1561 
1562   addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
1563   addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
1564 
1565   //
1566   // Handling of scalar operations.
1567   //
1568   // All operations default to "legal", except:
1569   // - indexed loads and stores (pre-/post-incremented),
1570   // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
1571   //   ConstantFP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
1572   //   FLOG, FLOG2, FLOG10, FMAXIMUMNUM, FMINIMUMNUM, FNEARBYINT, FRINT, FROUND,
1573   //   TRAP, FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG,
1574   //   ZERO_EXTEND_VECTOR_INREG,
1575   // which default to "expand" for at least one type.
1576 
1577   // Misc operations.
1578   setOperationAction(ISD::ConstantFP,           MVT::f32,   Legal);
1579   setOperationAction(ISD::ConstantFP,           MVT::f64,   Legal);
1580   setOperationAction(ISD::TRAP,                 MVT::Other, Legal);
1581   setOperationAction(ISD::DEBUGTRAP,            MVT::Other, Legal);
1582   setOperationAction(ISD::ConstantPool,         MVT::i32,   Custom);
1583   setOperationAction(ISD::JumpTable,            MVT::i32,   Custom);
1584   setOperationAction(ISD::BUILD_PAIR,           MVT::i64,   Expand);
1585   setOperationAction(ISD::SIGN_EXTEND_INREG,    MVT::i1,    Expand);
1586   setOperationAction(ISD::INLINEASM,            MVT::Other, Custom);
1587   setOperationAction(ISD::INLINEASM_BR,         MVT::Other, Custom);
1588   setOperationAction(ISD::PREFETCH,             MVT::Other, Custom);
1589   setOperationAction(ISD::READCYCLECOUNTER,     MVT::i64,   Custom);
1590   setOperationAction(ISD::READSTEADYCOUNTER,    MVT::i64,   Custom);
1591   setOperationAction(ISD::INTRINSIC_VOID,       MVT::Other, Custom);
1592   setOperationAction(ISD::EH_RETURN,            MVT::Other, Custom);
1593   setOperationAction(ISD::GLOBAL_OFFSET_TABLE,  MVT::i32,   Custom);
1594   setOperationAction(ISD::GlobalTLSAddress,     MVT::i32,   Custom);
1595   setOperationAction(ISD::ATOMIC_FENCE,         MVT::Other, Custom);
1596 
1597   // Custom legalize GlobalAddress nodes into CONST32.
1598   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
1599   setOperationAction(ISD::GlobalAddress, MVT::i8,  Custom);
1600   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
1601 
1602   // Hexagon needs to optimize cases with negative constants.
1603   setOperationAction(ISD::SETCC, MVT::i8,    Custom);
1604   setOperationAction(ISD::SETCC, MVT::i16,   Custom);
1605   setOperationAction(ISD::SETCC, MVT::v4i8,  Custom);
1606   setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
1607 
1608   // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
1609   setOperationAction(ISD::VASTART, MVT::Other, Custom);
1610   setOperationAction(ISD::VAEND,   MVT::Other, Expand);
1611   setOperationAction(ISD::VAARG,   MVT::Other, Expand);
1612   if (Subtarget.isEnvironmentMusl())
1613     setOperationAction(ISD::VACOPY, MVT::Other, Custom);
1614   else
1615     setOperationAction(ISD::VACOPY,  MVT::Other, Expand);
1616 
1617   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
1618   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
1619   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
1620 
1621   if (EmitJumpTables)
1622     setMinimumJumpTableEntries(MinimumJumpTables);
1623   else
1624     setMinimumJumpTableEntries(std::numeric_limits<unsigned>::max());
1625   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
1626 
1627   for (unsigned LegalIntOp :
1628        {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) {
1629     setOperationAction(LegalIntOp, MVT::i32, Legal);
1630     setOperationAction(LegalIntOp, MVT::i64, Legal);
1631   }
1632 
1633   // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
1634   // but they only operate on i64.
1635   for (MVT VT : MVT::integer_valuetypes()) {
1636     setOperationAction(ISD::UADDO, VT, Custom);
1637     setOperationAction(ISD::USUBO, VT, Custom);
1638     setOperationAction(ISD::SADDO, VT, Expand);
1639     setOperationAction(ISD::SSUBO, VT, Expand);
1640     setOperationAction(ISD::UADDO_CARRY, VT, Expand);
1641     setOperationAction(ISD::USUBO_CARRY, VT, Expand);
1642   }
1643   setOperationAction(ISD::UADDO_CARRY, MVT::i64, Custom);
1644   setOperationAction(ISD::USUBO_CARRY, MVT::i64, Custom);
1645 
1646   setOperationAction(ISD::CTLZ, MVT::i8,  Promote);
1647   setOperationAction(ISD::CTLZ, MVT::i16, Promote);
1648   setOperationAction(ISD::CTTZ, MVT::i8,  Promote);
1649   setOperationAction(ISD::CTTZ, MVT::i16, Promote);
1650 
1651   // Popcount can count # of 1s in i64 but returns i32.
1652   setOperationAction(ISD::CTPOP, MVT::i8,  Promote);
1653   setOperationAction(ISD::CTPOP, MVT::i16, Promote);
1654   setOperationAction(ISD::CTPOP, MVT::i32, Promote);
1655   setOperationAction(ISD::CTPOP, MVT::i64, Legal);
1656 
1657   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
1658   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
1659   setOperationAction(ISD::BSWAP, MVT::i32, Legal);
1660   setOperationAction(ISD::BSWAP, MVT::i64, Legal);
1661 
1662   setOperationAction(ISD::FSHL, MVT::i32, Legal);
1663   setOperationAction(ISD::FSHL, MVT::i64, Legal);
1664   setOperationAction(ISD::FSHR, MVT::i32, Legal);
1665   setOperationAction(ISD::FSHR, MVT::i64, Legal);
1666 
1667   for (unsigned IntExpOp :
1668        {ISD::SDIV,      ISD::UDIV,      ISD::SREM,      ISD::UREM,
1669         ISD::SDIVREM,   ISD::UDIVREM,   ISD::ROTL,      ISD::ROTR,
1670         ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
1671         ISD::SMUL_LOHI, ISD::UMUL_LOHI}) {
1672     for (MVT VT : MVT::integer_valuetypes())
1673       setOperationAction(IntExpOp, VT, Expand);
1674   }
1675 
1676   for (unsigned FPExpOp :
1677        {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
1678         ISD::FPOW, ISD::FCOPYSIGN}) {
1679     for (MVT VT : MVT::fp_valuetypes())
1680       setOperationAction(FPExpOp, VT, Expand);
1681   }
1682 
1683   // No extending loads from i32.
1684   for (MVT VT : MVT::integer_valuetypes()) {
1685     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
1686     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
1687     setLoadExtAction(ISD::EXTLOAD,  VT, MVT::i32, Expand);
1688   }
1689   // Turn FP truncstore into trunc + store.
1690   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1691   // Turn FP extload into load/fpextend.
1692   for (MVT VT : MVT::fp_valuetypes())
1693     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1694 
1695   // Expand BR_CC and SELECT_CC for all integer and fp types.
1696   for (MVT VT : MVT::integer_valuetypes()) {
1697     setOperationAction(ISD::BR_CC,     VT, Expand);
1698     setOperationAction(ISD::SELECT_CC, VT, Expand);
1699   }
1700   for (MVT VT : MVT::fp_valuetypes()) {
1701     setOperationAction(ISD::BR_CC,     VT, Expand);
1702     setOperationAction(ISD::SELECT_CC, VT, Expand);
1703   }
1704   setOperationAction(ISD::BR_CC, MVT::Other, Expand);
1705 
1706   //
1707   // Handling of vector operations.
1708   //
1709 
1710   // Set the action for vector operations to "expand", then override it with
1711   // either "custom" or "legal" for specific cases.
1712   // clang-format off
1713   static const unsigned VectExpOps[] = {
1714     // Integer arithmetic:
1715     ISD::ADD,     ISD::SUB,     ISD::MUL,     ISD::SDIV,      ISD::UDIV,
1716     ISD::SREM,    ISD::UREM,    ISD::SDIVREM, ISD::UDIVREM,   ISD::SADDO,
1717     ISD::UADDO,   ISD::SSUBO,   ISD::USUBO,   ISD::SMUL_LOHI, ISD::UMUL_LOHI,
1718     // Logical/bit:
1719     ISD::AND,     ISD::OR,      ISD::XOR,     ISD::ROTL,    ISD::ROTR,
1720     ISD::CTPOP,   ISD::CTLZ,    ISD::CTTZ,    ISD::BSWAP,   ISD::BITREVERSE,
1721     // Floating point arithmetic/math functions:
1722     ISD::FADD,    ISD::FSUB,    ISD::FMUL,    ISD::FMA,     ISD::FDIV,
1723     ISD::FREM,    ISD::FNEG,    ISD::FABS,    ISD::FSQRT,   ISD::FSIN,
1724     ISD::FCOS,    ISD::FPOW,    ISD::FLOG,    ISD::FLOG2,
1725     ISD::FLOG10,  ISD::FEXP,    ISD::FEXP2,   ISD::FCEIL,   ISD::FTRUNC,
1726     ISD::FRINT,   ISD::FNEARBYINT,            ISD::FROUND,  ISD::FFLOOR,
1727     ISD::FMINIMUMNUM,           ISD::FMAXIMUMNUM,
1728     ISD::FSINCOS, ISD::FLDEXP,
1729     // Misc:
1730     ISD::BR_CC,   ISD::SELECT_CC,             ISD::ConstantPool,
1731     // Vector:
1732     ISD::BUILD_VECTOR,          ISD::SCALAR_TO_VECTOR,
1733     ISD::EXTRACT_VECTOR_ELT,    ISD::INSERT_VECTOR_ELT,
1734     ISD::EXTRACT_SUBVECTOR,     ISD::INSERT_SUBVECTOR,
1735     ISD::CONCAT_VECTORS,        ISD::VECTOR_SHUFFLE,
1736     ISD::SPLAT_VECTOR,
1737   };
1738   // clang-format on
1739 
1740   for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1741     for (unsigned VectExpOp : VectExpOps)
1742       setOperationAction(VectExpOp, VT, Expand);
1743 
1744     // Expand all extending loads and truncating stores:
1745     for (MVT TargetVT : MVT::fixedlen_vector_valuetypes()) {
1746       if (TargetVT == VT)
1747         continue;
1748       setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand);
1749       setLoadExtAction(ISD::ZEXTLOAD, TargetVT, VT, Expand);
1750       setLoadExtAction(ISD::SEXTLOAD, TargetVT, VT, Expand);
1751       setTruncStoreAction(VT, TargetVT, Expand);
1752     }
1753 
1754     // Normalize all inputs to SELECT to be vectors of i32.
1755     if (VT.getVectorElementType() != MVT::i32) {
1756       MVT VT32 = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
1757       setOperationAction(ISD::SELECT, VT, Promote);
1758       AddPromotedToType(ISD::SELECT, VT, VT32);
1759     }
1760     setOperationAction(ISD::SRA, VT, Custom);
1761     setOperationAction(ISD::SHL, VT, Custom);
1762     setOperationAction(ISD::SRL, VT, Custom);
1763   }
1764 
1765   setOperationAction(ISD::SADDSAT, MVT::i32, Legal);
1766   setOperationAction(ISD::SADDSAT, MVT::i64, Legal);
1767 
1768   // Extending loads from (native) vectors of i8 into (native) vectors of i16
1769   // are legal.
1770   setLoadExtAction(ISD::EXTLOAD,  MVT::v2i16, MVT::v2i8, Legal);
1771   setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
1772   setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
1773   setLoadExtAction(ISD::EXTLOAD,  MVT::v4i16, MVT::v4i8, Legal);
1774   setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
1775   setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
1776 
1777   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8,  Legal);
1778   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1779   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1780 
1781   // Types natively supported:
1782   for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8,
1783                        MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
1784     setOperationAction(ISD::BUILD_VECTOR,       NativeVT, Custom);
1785     setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
1786     setOperationAction(ISD::INSERT_VECTOR_ELT,  NativeVT, Custom);
1787     setOperationAction(ISD::EXTRACT_SUBVECTOR,  NativeVT, Custom);
1788     setOperationAction(ISD::INSERT_SUBVECTOR,   NativeVT, Custom);
1789     setOperationAction(ISD::CONCAT_VECTORS,     NativeVT, Custom);
1790 
1791     setOperationAction(ISD::ADD, NativeVT, Legal);
1792     setOperationAction(ISD::SUB, NativeVT, Legal);
1793     setOperationAction(ISD::MUL, NativeVT, Legal);
1794     setOperationAction(ISD::AND, NativeVT, Legal);
1795     setOperationAction(ISD::OR,  NativeVT, Legal);
1796     setOperationAction(ISD::XOR, NativeVT, Legal);
1797 
1798     if (NativeVT.getVectorElementType() != MVT::i1) {
1799       setOperationAction(ISD::SPLAT_VECTOR, NativeVT, Legal);
1800       setOperationAction(ISD::BSWAP,        NativeVT, Legal);
1801       setOperationAction(ISD::BITREVERSE,   NativeVT, Legal);
1802     }
1803   }
1804 
1805   for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32}) {
1806     setOperationAction(ISD::SMIN, VT, Legal);
1807     setOperationAction(ISD::SMAX, VT, Legal);
1808     setOperationAction(ISD::UMIN, VT, Legal);
1809     setOperationAction(ISD::UMAX, VT, Legal);
1810   }
1811 
1812   // Custom lower unaligned loads.
1813   // Also, for both loads and stores, verify the alignment of the address
1814   // in case it is a compile-time constant. This is a usability feature to
1815   // provide a meaningful error message to users.
1816   for (MVT VT : {MVT::i16, MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
1817                  MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
1818     setOperationAction(ISD::LOAD,  VT, Custom);
1819     setOperationAction(ISD::STORE, VT, Custom);
1820   }
1821 
1822   // Custom-lower load/stores of boolean vectors.
1823   for (MVT VT : {MVT::v2i1, MVT::v4i1, MVT::v8i1}) {
1824     setOperationAction(ISD::LOAD,  VT, Custom);
1825     setOperationAction(ISD::STORE, VT, Custom);
1826   }
1827 
1828   // Normalize integer compares to EQ/GT/UGT
1829   for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v8i8, MVT::v2i32, MVT::v4i16,
1830                  MVT::v2i32}) {
1831     setCondCodeAction(ISD::SETNE,  VT, Expand);
1832     setCondCodeAction(ISD::SETLE,  VT, Expand);
1833     setCondCodeAction(ISD::SETGE,  VT, Expand);
1834     setCondCodeAction(ISD::SETLT,  VT, Expand);
1835     setCondCodeAction(ISD::SETULE, VT, Expand);
1836     setCondCodeAction(ISD::SETUGE, VT, Expand);
1837     setCondCodeAction(ISD::SETULT, VT, Expand);
1838   }
1839 
1840   // Normalize boolean compares to [U]LE/[U]LT
1841   for (MVT VT : {MVT::i1, MVT::v2i1, MVT::v4i1, MVT::v8i1}) {
1842     setCondCodeAction(ISD::SETGE,  VT, Expand);
1843     setCondCodeAction(ISD::SETGT,  VT, Expand);
1844     setCondCodeAction(ISD::SETUGE, VT, Expand);
1845     setCondCodeAction(ISD::SETUGT, VT, Expand);
1846   }
1847 
1848   // Custom-lower bitcasts from i8 to v8i1.
1849   setOperationAction(ISD::BITCAST,        MVT::i8,    Custom);
1850   setOperationAction(ISD::SETCC,          MVT::v2i16, Custom);
1851   setOperationAction(ISD::VSELECT,        MVT::v4i8,  Custom);
1852   setOperationAction(ISD::VSELECT,        MVT::v2i16, Custom);
1853   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8,  Custom);
1854   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
1855   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8,  Custom);
1856 
1857   // V5+.
1858   setOperationAction(ISD::FMA,  MVT::f64, Expand);
1859   setOperationAction(ISD::FADD, MVT::f64, Expand);
1860   setOperationAction(ISD::FSUB, MVT::f64, Expand);
1861   setOperationAction(ISD::FMUL, MVT::f64, Expand);
1862   setOperationAction(ISD::FDIV, MVT::f32, Custom);
1863 
1864   setOperationAction(ISD::FMINIMUMNUM, MVT::f32, Legal);
1865   setOperationAction(ISD::FMAXIMUMNUM, MVT::f32, Legal);
1866 
1867   setOperationAction(ISD::FP_TO_UINT, MVT::i1,  Promote);
1868   setOperationAction(ISD::FP_TO_UINT, MVT::i8,  Promote);
1869   setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
1870   setOperationAction(ISD::FP_TO_SINT, MVT::i1,  Promote);
1871   setOperationAction(ISD::FP_TO_SINT, MVT::i8,  Promote);
1872   setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
1873   setOperationAction(ISD::UINT_TO_FP, MVT::i1,  Promote);
1874   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
1875   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
1876   setOperationAction(ISD::SINT_TO_FP, MVT::i1,  Promote);
1877   setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
1878   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
1879 
1880   // Special handling for half-precision floating point conversions.
1881   // Lower half float conversions into library calls.
1882   setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
1883   setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
1884   setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
1885   setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
1886 
1887   setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
1888   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
1889   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1890   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1891 
1892   // Handling of indexed loads/stores: default is "expand".
1893   //
1894   for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::f32, MVT::f64,
1895                  MVT::v2i16, MVT::v2i32, MVT::v4i8, MVT::v4i16, MVT::v8i8}) {
1896     setIndexedLoadAction(ISD::POST_INC, VT, Legal);
1897     setIndexedStoreAction(ISD::POST_INC, VT, Legal);
1898   }
1899 
1900   // Subtarget-specific operation actions.
1901   //
1902   if (Subtarget.hasV60Ops()) {
1903     setOperationAction(ISD::ROTL, MVT::i32, Legal);
1904     setOperationAction(ISD::ROTL, MVT::i64, Legal);
1905     setOperationAction(ISD::ROTR, MVT::i32, Legal);
1906     setOperationAction(ISD::ROTR, MVT::i64, Legal);
1907   }
1908   if (Subtarget.hasV66Ops()) {
1909     setOperationAction(ISD::FADD, MVT::f64, Legal);
1910     setOperationAction(ISD::FSUB, MVT::f64, Legal);
1911   }
1912   if (Subtarget.hasV67Ops()) {
1913     setOperationAction(ISD::FMINIMUMNUM, MVT::f64, Legal);
1914     setOperationAction(ISD::FMAXIMUMNUM, MVT::f64, Legal);
1915     setOperationAction(ISD::FMUL,    MVT::f64, Legal);
1916   }
1917 
1918   setTargetDAGCombine(ISD::OR);
1919   setTargetDAGCombine(ISD::TRUNCATE);
1920   setTargetDAGCombine(ISD::VSELECT);
1921 
1922   if (Subtarget.useHVXOps())
1923     initializeHVXLowering();
1924 
1925   computeRegisterProperties(&HRI);
1926 }
1927 
getTargetNodeName(unsigned Opcode) const1928 const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
1929   switch ((HexagonISD::NodeType)Opcode) {
1930   case HexagonISD::ADDC:          return "HexagonISD::ADDC";
1931   case HexagonISD::SUBC:          return "HexagonISD::SUBC";
1932   case HexagonISD::ALLOCA:        return "HexagonISD::ALLOCA";
1933   case HexagonISD::AT_GOT:        return "HexagonISD::AT_GOT";
1934   case HexagonISD::AT_PCREL:      return "HexagonISD::AT_PCREL";
1935   case HexagonISD::BARRIER:       return "HexagonISD::BARRIER";
1936   case HexagonISD::CALL:          return "HexagonISD::CALL";
1937   case HexagonISD::CALLnr:        return "HexagonISD::CALLnr";
1938   case HexagonISD::CALLR:         return "HexagonISD::CALLR";
1939   case HexagonISD::COMBINE:       return "HexagonISD::COMBINE";
1940   case HexagonISD::CONST32_GP:    return "HexagonISD::CONST32_GP";
1941   case HexagonISD::CONST32:       return "HexagonISD::CONST32";
1942   case HexagonISD::CP:            return "HexagonISD::CP";
1943   case HexagonISD::DCFETCH:       return "HexagonISD::DCFETCH";
1944   case HexagonISD::EH_RETURN:     return "HexagonISD::EH_RETURN";
1945   case HexagonISD::TSTBIT:        return "HexagonISD::TSTBIT";
1946   case HexagonISD::EXTRACTU:      return "HexagonISD::EXTRACTU";
1947   case HexagonISD::INSERT:        return "HexagonISD::INSERT";
1948   case HexagonISD::JT:            return "HexagonISD::JT";
1949   case HexagonISD::RET_GLUE:      return "HexagonISD::RET_GLUE";
1950   case HexagonISD::TC_RETURN:     return "HexagonISD::TC_RETURN";
1951   case HexagonISD::VASL:          return "HexagonISD::VASL";
1952   case HexagonISD::VASR:          return "HexagonISD::VASR";
1953   case HexagonISD::VLSR:          return "HexagonISD::VLSR";
1954   case HexagonISD::MFSHL:         return "HexagonISD::MFSHL";
1955   case HexagonISD::MFSHR:         return "HexagonISD::MFSHR";
1956   case HexagonISD::SSAT:          return "HexagonISD::SSAT";
1957   case HexagonISD::USAT:          return "HexagonISD::USAT";
1958   case HexagonISD::SMUL_LOHI:     return "HexagonISD::SMUL_LOHI";
1959   case HexagonISD::UMUL_LOHI:     return "HexagonISD::UMUL_LOHI";
1960   case HexagonISD::USMUL_LOHI:    return "HexagonISD::USMUL_LOHI";
1961   case HexagonISD::VEXTRACTW:     return "HexagonISD::VEXTRACTW";
1962   case HexagonISD::VINSERTW0:     return "HexagonISD::VINSERTW0";
1963   case HexagonISD::VROR:          return "HexagonISD::VROR";
1964   case HexagonISD::READCYCLE:     return "HexagonISD::READCYCLE";
1965   case HexagonISD::READTIMER:     return "HexagonISD::READTIMER";
1966   case HexagonISD::PTRUE:         return "HexagonISD::PTRUE";
1967   case HexagonISD::PFALSE:        return "HexagonISD::PFALSE";
1968   case HexagonISD::D2P:           return "HexagonISD::D2P";
1969   case HexagonISD::P2D:           return "HexagonISD::P2D";
1970   case HexagonISD::V2Q:           return "HexagonISD::V2Q";
1971   case HexagonISD::Q2V:           return "HexagonISD::Q2V";
1972   case HexagonISD::QCAT:          return "HexagonISD::QCAT";
1973   case HexagonISD::QTRUE:         return "HexagonISD::QTRUE";
1974   case HexagonISD::QFALSE:        return "HexagonISD::QFALSE";
1975   case HexagonISD::TL_EXTEND:     return "HexagonISD::TL_EXTEND";
1976   case HexagonISD::TL_TRUNCATE:   return "HexagonISD::TL_TRUNCATE";
1977   case HexagonISD::TYPECAST:      return "HexagonISD::TYPECAST";
1978   case HexagonISD::VALIGN:        return "HexagonISD::VALIGN";
1979   case HexagonISD::VALIGNADDR:    return "HexagonISD::VALIGNADDR";
1980   case HexagonISD::ISEL:          return "HexagonISD::ISEL";
1981   case HexagonISD::OP_END:        break;
1982   }
1983   return nullptr;
1984 }
1985 
1986 bool
validateConstPtrAlignment(SDValue Ptr,Align NeedAlign,const SDLoc & dl,SelectionDAG & DAG) const1987 HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr, Align NeedAlign,
1988       const SDLoc &dl, SelectionDAG &DAG) const {
1989   auto *CA = dyn_cast<ConstantSDNode>(Ptr);
1990   if (!CA)
1991     return true;
1992   unsigned Addr = CA->getZExtValue();
1993   Align HaveAlign =
1994       Addr != 0 ? Align(1ull << llvm::countr_zero(Addr)) : NeedAlign;
1995   if (HaveAlign >= NeedAlign)
1996     return true;
1997 
1998   static int DK_MisalignedTrap = llvm::getNextAvailablePluginDiagnosticKind();
1999 
2000   struct DiagnosticInfoMisalignedTrap : public DiagnosticInfo {
2001     DiagnosticInfoMisalignedTrap(StringRef M)
2002       : DiagnosticInfo(DK_MisalignedTrap, DS_Remark), Msg(M) {}
2003     void print(DiagnosticPrinter &DP) const override {
2004       DP << Msg;
2005     }
2006     static bool classof(const DiagnosticInfo *DI) {
2007       return DI->getKind() == DK_MisalignedTrap;
2008     }
2009     StringRef Msg;
2010   };
2011 
2012   std::string ErrMsg;
2013   raw_string_ostream O(ErrMsg);
2014   O << "Misaligned constant address: " << format_hex(Addr, 10)
2015     << " has alignment " << HaveAlign.value()
2016     << ", but the memory access requires " << NeedAlign.value();
2017   if (DebugLoc DL = dl.getDebugLoc())
2018     DL.print(O << ", at ");
2019   O << ". The instruction has been replaced with a trap.";
2020 
2021   DAG.getContext()->diagnose(DiagnosticInfoMisalignedTrap(O.str()));
2022   return false;
2023 }
2024 
2025 SDValue
replaceMemWithUndef(SDValue Op,SelectionDAG & DAG) const2026 HexagonTargetLowering::replaceMemWithUndef(SDValue Op, SelectionDAG &DAG)
2027       const {
2028   const SDLoc &dl(Op);
2029   auto *LS = cast<LSBaseSDNode>(Op.getNode());
2030   assert(!LS->isIndexed() && "Not expecting indexed ops on constant address");
2031 
2032   SDValue Chain = LS->getChain();
2033   SDValue Trap = DAG.getNode(ISD::TRAP, dl, MVT::Other, Chain);
2034   if (LS->getOpcode() == ISD::LOAD)
2035     return DAG.getMergeValues({DAG.getUNDEF(ty(Op)), Trap}, dl);
2036   return Trap;
2037 }
2038 
2039 // Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
2040 // intrinsic.
isBrevLdIntrinsic(const Value * Inst)2041 static bool isBrevLdIntrinsic(const Value *Inst) {
2042   unsigned ID = cast<IntrinsicInst>(Inst)->getIntrinsicID();
2043   return (ID == Intrinsic::hexagon_L2_loadrd_pbr ||
2044           ID == Intrinsic::hexagon_L2_loadri_pbr ||
2045           ID == Intrinsic::hexagon_L2_loadrh_pbr ||
2046           ID == Intrinsic::hexagon_L2_loadruh_pbr ||
2047           ID == Intrinsic::hexagon_L2_loadrb_pbr ||
2048           ID == Intrinsic::hexagon_L2_loadrub_pbr);
2049 }
2050 
2051 // Bit-reverse Load Intrinsic :Crawl up and figure out the object from previous
2052 // instruction. So far we only handle bitcast, extract value and bit reverse
2053 // load intrinsic instructions. Should we handle CGEP ?
getBrevLdObject(Value * V)2054 static Value *getBrevLdObject(Value *V) {
2055   if (Operator::getOpcode(V) == Instruction::ExtractValue ||
2056       Operator::getOpcode(V) == Instruction::BitCast)
2057     V = cast<Operator>(V)->getOperand(0);
2058   else if (isa<IntrinsicInst>(V) && isBrevLdIntrinsic(V))
2059     V = cast<Instruction>(V)->getOperand(0);
2060   return V;
2061 }
2062 
2063 // Bit-reverse Load Intrinsic: For a PHI Node return either an incoming edge or
2064 // a back edge. If the back edge comes from the intrinsic itself, the incoming
2065 // edge is returned.
returnEdge(const PHINode * PN,Value * IntrBaseVal)2066 static Value *returnEdge(const PHINode *PN, Value *IntrBaseVal) {
2067   const BasicBlock *Parent = PN->getParent();
2068   int Idx = -1;
2069   for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
2070     BasicBlock *Blk = PN->getIncomingBlock(i);
2071     // Determine if the back edge is originated from intrinsic.
2072     if (Blk == Parent) {
2073       Value *BackEdgeVal = PN->getIncomingValue(i);
2074       Value *BaseVal;
2075       // Loop over till we return the same Value or we hit the IntrBaseVal.
2076       do {
2077         BaseVal = BackEdgeVal;
2078         BackEdgeVal = getBrevLdObject(BackEdgeVal);
2079       } while ((BaseVal != BackEdgeVal) && (IntrBaseVal != BackEdgeVal));
2080       // If the getBrevLdObject returns IntrBaseVal, we should return the
2081       // incoming edge.
2082       if (IntrBaseVal == BackEdgeVal)
2083         continue;
2084       Idx = i;
2085       break;
2086     } else // Set the node to incoming edge.
2087       Idx = i;
2088   }
2089   assert(Idx >= 0 && "Unexpected index to incoming argument in PHI");
2090   return PN->getIncomingValue(Idx);
2091 }
2092 
2093 // Bit-reverse Load Intrinsic: Figure out the underlying object the base
2094 // pointer points to, for the bit-reverse load intrinsic. Setting this to
2095 // memoperand might help alias analysis to figure out the dependencies.
getUnderLyingObjectForBrevLdIntr(Value * V)2096 static Value *getUnderLyingObjectForBrevLdIntr(Value *V) {
2097   Value *IntrBaseVal = V;
2098   Value *BaseVal;
2099   // Loop over till we return the same Value, implies we either figure out
2100   // the object or we hit a PHI
2101   do {
2102     BaseVal = V;
2103     V = getBrevLdObject(V);
2104   } while (BaseVal != V);
2105 
2106   // Identify the object from PHINode.
2107   if (const PHINode *PN = dyn_cast<PHINode>(V))
2108     return returnEdge(PN, IntrBaseVal);
2109   // For non PHI nodes, the object is the last value returned by getBrevLdObject
2110   else
2111     return V;
2112 }
2113 
2114 /// Given an intrinsic, checks if on the target the intrinsic will need to map
2115 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
2116 /// true and store the intrinsic information into the IntrinsicInfo that was
2117 /// passed to the function.
getTgtMemIntrinsic(IntrinsicInfo & Info,const CallInst & I,MachineFunction & MF,unsigned Intrinsic) const2118 bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
2119                                                const CallInst &I,
2120                                                MachineFunction &MF,
2121                                                unsigned Intrinsic) const {
2122   switch (Intrinsic) {
2123   case Intrinsic::hexagon_L2_loadrd_pbr:
2124   case Intrinsic::hexagon_L2_loadri_pbr:
2125   case Intrinsic::hexagon_L2_loadrh_pbr:
2126   case Intrinsic::hexagon_L2_loadruh_pbr:
2127   case Intrinsic::hexagon_L2_loadrb_pbr:
2128   case Intrinsic::hexagon_L2_loadrub_pbr: {
2129     Info.opc = ISD::INTRINSIC_W_CHAIN;
2130     auto &DL = I.getDataLayout();
2131     auto &Cont = I.getCalledFunction()->getParent()->getContext();
2132     // The intrinsic function call is of the form { ElTy, i8* }
2133     // @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
2134     // should be derived from ElTy.
2135     Type *ElTy = I.getCalledFunction()->getReturnType()->getStructElementType(0);
2136     Info.memVT = MVT::getVT(ElTy);
2137     llvm::Value *BasePtrVal = I.getOperand(0);
2138     Info.ptrVal = getUnderLyingObjectForBrevLdIntr(BasePtrVal);
2139     // The offset value comes through Modifier register. For now, assume the
2140     // offset is 0.
2141     Info.offset = 0;
2142     Info.align = DL.getABITypeAlign(Info.memVT.getTypeForEVT(Cont));
2143     Info.flags = MachineMemOperand::MOLoad;
2144     return true;
2145   }
2146   case Intrinsic::hexagon_V6_vgathermw:
2147   case Intrinsic::hexagon_V6_vgathermw_128B:
2148   case Intrinsic::hexagon_V6_vgathermh:
2149   case Intrinsic::hexagon_V6_vgathermh_128B:
2150   case Intrinsic::hexagon_V6_vgathermhw:
2151   case Intrinsic::hexagon_V6_vgathermhw_128B:
2152   case Intrinsic::hexagon_V6_vgathermwq:
2153   case Intrinsic::hexagon_V6_vgathermwq_128B:
2154   case Intrinsic::hexagon_V6_vgathermhq:
2155   case Intrinsic::hexagon_V6_vgathermhq_128B:
2156   case Intrinsic::hexagon_V6_vgathermhwq:
2157   case Intrinsic::hexagon_V6_vgathermhwq_128B: {
2158     const Module &M = *I.getParent()->getParent()->getParent();
2159     Info.opc = ISD::INTRINSIC_W_CHAIN;
2160     Type *VecTy = I.getArgOperand(1)->getType();
2161     Info.memVT = MVT::getVT(VecTy);
2162     Info.ptrVal = I.getArgOperand(0);
2163     Info.offset = 0;
2164     Info.align =
2165         MaybeAlign(M.getDataLayout().getTypeAllocSizeInBits(VecTy) / 8);
2166     Info.flags = MachineMemOperand::MOLoad |
2167                  MachineMemOperand::MOStore |
2168                  MachineMemOperand::MOVolatile;
2169     return true;
2170   }
2171   default:
2172     break;
2173   }
2174   return false;
2175 }
2176 
hasBitTest(SDValue X,SDValue Y) const2177 bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
2178   return X.getValueType().isScalarInteger(); // 'tstbit'
2179 }
2180 
isTruncateFree(Type * Ty1,Type * Ty2) const2181 bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
2182   return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
2183 }
2184 
isTruncateFree(EVT VT1,EVT VT2) const2185 bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
2186   if (!VT1.isSimple() || !VT2.isSimple())
2187     return false;
2188   return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32;
2189 }
2190 
isFMAFasterThanFMulAndFAdd(const MachineFunction & MF,EVT VT) const2191 bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(
2192     const MachineFunction &MF, EVT VT) const {
2193   return isOperationLegalOrCustom(ISD::FMA, VT);
2194 }
2195 
2196 // Should we expand the build vector with shuffles?
shouldExpandBuildVectorWithShuffles(EVT VT,unsigned DefinedValues) const2197 bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
2198       unsigned DefinedValues) const {
2199   return false;
2200 }
2201 
isExtractSubvectorCheap(EVT ResVT,EVT SrcVT,unsigned Index) const2202 bool HexagonTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
2203       unsigned Index) const {
2204   assert(ResVT.getVectorElementType() == SrcVT.getVectorElementType());
2205   if (!ResVT.isSimple() || !SrcVT.isSimple())
2206     return false;
2207 
2208   MVT ResTy = ResVT.getSimpleVT(), SrcTy = SrcVT.getSimpleVT();
2209   if (ResTy.getVectorElementType() != MVT::i1)
2210     return true;
2211 
2212   // Non-HVX bool vectors are relatively cheap.
2213   return SrcTy.getVectorNumElements() <= 8;
2214 }
2215 
isTargetCanonicalConstantNode(SDValue Op) const2216 bool HexagonTargetLowering::isTargetCanonicalConstantNode(SDValue Op) const {
2217   return Op.getOpcode() == ISD::CONCAT_VECTORS ||
2218          TargetLowering::isTargetCanonicalConstantNode(Op);
2219 }
2220 
isShuffleMaskLegal(ArrayRef<int> Mask,EVT VT) const2221 bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask,
2222                                                EVT VT) const {
2223   return true;
2224 }
2225 
2226 TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(MVT VT) const2227 HexagonTargetLowering::getPreferredVectorAction(MVT VT) const {
2228   unsigned VecLen = VT.getVectorMinNumElements();
2229   MVT ElemTy = VT.getVectorElementType();
2230 
2231   if (VecLen == 1 || VT.isScalableVector())
2232     return TargetLoweringBase::TypeScalarizeVector;
2233 
2234   if (Subtarget.useHVXOps()) {
2235     unsigned Action = getPreferredHvxVectorAction(VT);
2236     if (Action != ~0u)
2237       return static_cast<TargetLoweringBase::LegalizeTypeAction>(Action);
2238   }
2239 
2240   // Always widen (remaining) vectors of i1.
2241   if (ElemTy == MVT::i1)
2242     return TargetLoweringBase::TypeWidenVector;
2243   // Widen non-power-of-2 vectors. Such types cannot be split right now,
2244   // and computeRegisterProperties will override "split" with "widen",
2245   // which can cause other issues.
2246   if (!isPowerOf2_32(VecLen))
2247     return TargetLoweringBase::TypeWidenVector;
2248 
2249   return TargetLoweringBase::TypeSplitVector;
2250 }
2251 
2252 TargetLoweringBase::LegalizeAction
getCustomOperationAction(SDNode & Op) const2253 HexagonTargetLowering::getCustomOperationAction(SDNode &Op) const {
2254   if (Subtarget.useHVXOps()) {
2255     unsigned Action = getCustomHvxOperationAction(Op);
2256     if (Action != ~0u)
2257       return static_cast<TargetLoweringBase::LegalizeAction>(Action);
2258   }
2259   return TargetLoweringBase::Legal;
2260 }
2261 
2262 std::pair<SDValue, int>
getBaseAndOffset(SDValue Addr) const2263 HexagonTargetLowering::getBaseAndOffset(SDValue Addr) const {
2264   if (Addr.getOpcode() == ISD::ADD) {
2265     SDValue Op1 = Addr.getOperand(1);
2266     if (auto *CN = dyn_cast<const ConstantSDNode>(Op1.getNode()))
2267       return { Addr.getOperand(0), CN->getSExtValue() };
2268   }
2269   return { Addr, 0 };
2270 }
2271 
2272 // Lower a vector shuffle (V1, V2, V3).  V1 and V2 are the two vectors
2273 // to select data from, V3 is the permutation.
2274 SDValue
LowerVECTOR_SHUFFLE(SDValue Op,SelectionDAG & DAG) const2275 HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
2276       const {
2277   const auto *SVN = cast<ShuffleVectorSDNode>(Op);
2278   ArrayRef<int> AM = SVN->getMask();
2279   assert(AM.size() <= 8 && "Unexpected shuffle mask");
2280   unsigned VecLen = AM.size();
2281 
2282   MVT VecTy = ty(Op);
2283   assert(!Subtarget.isHVXVectorType(VecTy, true) &&
2284          "HVX shuffles should be legal");
2285   assert(VecTy.getSizeInBits() <= 64 && "Unexpected vector length");
2286 
2287   SDValue Op0 = Op.getOperand(0);
2288   SDValue Op1 = Op.getOperand(1);
2289   const SDLoc &dl(Op);
2290 
2291   // If the inputs are not the same as the output, bail. This is not an
2292   // error situation, but complicates the handling and the default expansion
2293   // (into BUILD_VECTOR) should be adequate.
2294   if (ty(Op0) != VecTy || ty(Op1) != VecTy)
2295     return SDValue();
2296 
2297   // Normalize the mask so that the first non-negative index comes from
2298   // the first operand.
2299   SmallVector<int, 8> Mask(AM);
2300   unsigned F = llvm::find_if(AM, [](int M) { return M >= 0; }) - AM.data();
2301   if (F == AM.size())
2302     return DAG.getUNDEF(VecTy);
2303   if (AM[F] >= int(VecLen)) {
2304     ShuffleVectorSDNode::commuteMask(Mask);
2305     std::swap(Op0, Op1);
2306   }
2307 
2308   // Express the shuffle mask in terms of bytes.
2309   SmallVector<int,8> ByteMask;
2310   unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8;
2311   for (int M : Mask) {
2312     if (M < 0) {
2313       for (unsigned j = 0; j != ElemBytes; ++j)
2314         ByteMask.push_back(-1);
2315     } else {
2316       for (unsigned j = 0; j != ElemBytes; ++j)
2317         ByteMask.push_back(M*ElemBytes + j);
2318     }
2319   }
2320   assert(ByteMask.size() <= 8);
2321 
2322   // All non-undef (non-negative) indexes are well within [0..127], so they
2323   // fit in a single byte. Build two 64-bit words:
2324   // - MaskIdx where each byte is the corresponding index (for non-negative
2325   //   indexes), and 0xFF for negative indexes, and
2326   // - MaskUnd that has 0xFF for each negative index.
2327   uint64_t MaskIdx = 0;
2328   uint64_t MaskUnd = 0;
2329   for (unsigned i = 0, e = ByteMask.size(); i != e; ++i) {
2330     unsigned S = 8*i;
2331     uint64_t M = ByteMask[i] & 0xFF;
2332     if (M == 0xFF)
2333       MaskUnd |= M << S;
2334     MaskIdx |= M << S;
2335   }
2336 
2337   if (ByteMask.size() == 4) {
2338     // Identity.
2339     if (MaskIdx == (0x03020100 | MaskUnd))
2340       return Op0;
2341     // Byte swap.
2342     if (MaskIdx == (0x00010203 | MaskUnd)) {
2343       SDValue T0 = DAG.getBitcast(MVT::i32, Op0);
2344       SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i32, T0);
2345       return DAG.getBitcast(VecTy, T1);
2346     }
2347 
2348     // Byte packs.
2349     SDValue Concat10 =
2350         getCombine(Op1, Op0, dl, typeJoin({ty(Op1), ty(Op0)}), DAG);
2351     if (MaskIdx == (0x06040200 | MaskUnd))
2352       return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat10}, DAG);
2353     if (MaskIdx == (0x07050301 | MaskUnd))
2354       return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat10}, DAG);
2355 
2356     SDValue Concat01 =
2357         getCombine(Op0, Op1, dl, typeJoin({ty(Op0), ty(Op1)}), DAG);
2358     if (MaskIdx == (0x02000604 | MaskUnd))
2359       return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat01}, DAG);
2360     if (MaskIdx == (0x03010705 | MaskUnd))
2361       return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat01}, DAG);
2362   }
2363 
2364   if (ByteMask.size() == 8) {
2365     // Identity.
2366     if (MaskIdx == (0x0706050403020100ull | MaskUnd))
2367       return Op0;
2368     // Byte swap.
2369     if (MaskIdx == (0x0001020304050607ull | MaskUnd)) {
2370       SDValue T0 = DAG.getBitcast(MVT::i64, Op0);
2371       SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i64, T0);
2372       return DAG.getBitcast(VecTy, T1);
2373     }
2374 
2375     // Halfword picks.
2376     if (MaskIdx == (0x0d0c050409080100ull | MaskUnd))
2377       return getInstr(Hexagon::S2_shuffeh, dl, VecTy, {Op1, Op0}, DAG);
2378     if (MaskIdx == (0x0f0e07060b0a0302ull | MaskUnd))
2379       return getInstr(Hexagon::S2_shuffoh, dl, VecTy, {Op1, Op0}, DAG);
2380     if (MaskIdx == (0x0d0c090805040100ull | MaskUnd))
2381       return getInstr(Hexagon::S2_vtrunewh, dl, VecTy, {Op1, Op0}, DAG);
2382     if (MaskIdx == (0x0f0e0b0a07060302ull | MaskUnd))
2383       return getInstr(Hexagon::S2_vtrunowh, dl, VecTy, {Op1, Op0}, DAG);
2384     if (MaskIdx == (0x0706030205040100ull | MaskUnd)) {
2385       VectorPair P = opSplit(Op0, dl, DAG);
2386       return getInstr(Hexagon::S2_packhl, dl, VecTy, {P.second, P.first}, DAG);
2387     }
2388 
2389     // Byte packs.
2390     if (MaskIdx == (0x0e060c040a020800ull | MaskUnd))
2391       return getInstr(Hexagon::S2_shuffeb, dl, VecTy, {Op1, Op0}, DAG);
2392     if (MaskIdx == (0x0f070d050b030901ull | MaskUnd))
2393       return getInstr(Hexagon::S2_shuffob, dl, VecTy, {Op1, Op0}, DAG);
2394   }
2395 
2396   return SDValue();
2397 }
2398 
2399 SDValue
getSplatValue(SDValue Op,SelectionDAG & DAG) const2400 HexagonTargetLowering::getSplatValue(SDValue Op, SelectionDAG &DAG) const {
2401   switch (Op.getOpcode()) {
2402     case ISD::BUILD_VECTOR:
2403       if (SDValue S = cast<BuildVectorSDNode>(Op)->getSplatValue())
2404         return S;
2405       break;
2406     case ISD::SPLAT_VECTOR:
2407       return Op.getOperand(0);
2408   }
2409   return SDValue();
2410 }
2411 
2412 // Create a Hexagon-specific node for shifting a vector by an integer.
2413 SDValue
getVectorShiftByInt(SDValue Op,SelectionDAG & DAG) const2414 HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG)
2415       const {
2416   unsigned NewOpc;
2417   switch (Op.getOpcode()) {
2418     case ISD::SHL:
2419       NewOpc = HexagonISD::VASL;
2420       break;
2421     case ISD::SRA:
2422       NewOpc = HexagonISD::VASR;
2423       break;
2424     case ISD::SRL:
2425       NewOpc = HexagonISD::VLSR;
2426       break;
2427     default:
2428       llvm_unreachable("Unexpected shift opcode");
2429   }
2430 
2431   if (SDValue Sp = getSplatValue(Op.getOperand(1), DAG))
2432     return DAG.getNode(NewOpc, SDLoc(Op), ty(Op), Op.getOperand(0), Sp);
2433   return SDValue();
2434 }
2435 
2436 SDValue
LowerVECTOR_SHIFT(SDValue Op,SelectionDAG & DAG) const2437 HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
2438   const SDLoc &dl(Op);
2439 
2440   // First try to convert the shift (by vector) to a shift by a scalar.
2441   // If we first split the shift, the shift amount will become 'extract
2442   // subvector', and will no longer be recognized as scalar.
2443   SDValue Res = Op;
2444   if (SDValue S = getVectorShiftByInt(Op, DAG))
2445     Res = S;
2446 
2447   unsigned Opc = Res.getOpcode();
2448   switch (Opc) {
2449   case HexagonISD::VASR:
2450   case HexagonISD::VLSR:
2451   case HexagonISD::VASL:
2452     break;
2453   default:
2454     // No instructions for shifts by non-scalars.
2455     return SDValue();
2456   }
2457 
2458   MVT ResTy = ty(Res);
2459   if (ResTy.getVectorElementType() != MVT::i8)
2460     return Res;
2461 
2462   // For shifts of i8, extend the inputs to i16, then truncate back to i8.
2463   assert(ResTy.getVectorElementType() == MVT::i8);
2464   SDValue Val = Res.getOperand(0), Amt = Res.getOperand(1);
2465 
2466   auto ShiftPartI8 = [&dl, &DAG, this](unsigned Opc, SDValue V, SDValue A) {
2467     MVT Ty = ty(V);
2468     MVT ExtTy = MVT::getVectorVT(MVT::i16, Ty.getVectorNumElements());
2469     SDValue ExtV = Opc == HexagonISD::VASR ? DAG.getSExtOrTrunc(V, dl, ExtTy)
2470                                            : DAG.getZExtOrTrunc(V, dl, ExtTy);
2471     SDValue ExtS = DAG.getNode(Opc, dl, ExtTy, {ExtV, A});
2472     return DAG.getZExtOrTrunc(ExtS, dl, Ty);
2473   };
2474 
2475   if (ResTy.getSizeInBits() == 32)
2476     return ShiftPartI8(Opc, Val, Amt);
2477 
2478   auto [LoV, HiV] = opSplit(Val, dl, DAG);
2479   return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy,
2480                      {ShiftPartI8(Opc, LoV, Amt), ShiftPartI8(Opc, HiV, Amt)});
2481 }
2482 
2483 SDValue
LowerROTL(SDValue Op,SelectionDAG & DAG) const2484 HexagonTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
2485   if (isa<ConstantSDNode>(Op.getOperand(1).getNode()))
2486     return Op;
2487   return SDValue();
2488 }
2489 
2490 SDValue
LowerBITCAST(SDValue Op,SelectionDAG & DAG) const2491 HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
2492   MVT ResTy = ty(Op);
2493   SDValue InpV = Op.getOperand(0);
2494   MVT InpTy = ty(InpV);
2495   assert(ResTy.getSizeInBits() == InpTy.getSizeInBits());
2496   const SDLoc &dl(Op);
2497 
2498   // Handle conversion from i8 to v8i1.
2499   if (InpTy == MVT::i8) {
2500     if (ResTy == MVT::v8i1) {
2501       SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV);
2502       SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32);
2503       return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG);
2504     }
2505     return SDValue();
2506   }
2507 
2508   return Op;
2509 }
2510 
2511 bool
getBuildVectorConstInts(ArrayRef<SDValue> Values,MVT VecTy,SelectionDAG & DAG,MutableArrayRef<ConstantInt * > Consts) const2512 HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values,
2513       MVT VecTy, SelectionDAG &DAG,
2514       MutableArrayRef<ConstantInt*> Consts) const {
2515   MVT ElemTy = VecTy.getVectorElementType();
2516   unsigned ElemWidth = ElemTy.getSizeInBits();
2517   IntegerType *IntTy = IntegerType::get(*DAG.getContext(), ElemWidth);
2518   bool AllConst = true;
2519 
2520   for (unsigned i = 0, e = Values.size(); i != e; ++i) {
2521     SDValue V = Values[i];
2522     if (V.isUndef()) {
2523       Consts[i] = ConstantInt::get(IntTy, 0);
2524       continue;
2525     }
2526     // Make sure to always cast to IntTy.
2527     if (auto *CN = dyn_cast<ConstantSDNode>(V.getNode())) {
2528       const ConstantInt *CI = CN->getConstantIntValue();
2529       Consts[i] = ConstantInt::get(IntTy, CI->getValue().getSExtValue());
2530     } else if (auto *CN = dyn_cast<ConstantFPSDNode>(V.getNode())) {
2531       const ConstantFP *CF = CN->getConstantFPValue();
2532       APInt A = CF->getValueAPF().bitcastToAPInt();
2533       Consts[i] = ConstantInt::get(IntTy, A.getZExtValue());
2534     } else {
2535       AllConst = false;
2536     }
2537   }
2538   return AllConst;
2539 }
2540 
2541 SDValue
buildVector32(ArrayRef<SDValue> Elem,const SDLoc & dl,MVT VecTy,SelectionDAG & DAG) const2542 HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
2543                                      MVT VecTy, SelectionDAG &DAG) const {
2544   MVT ElemTy = VecTy.getVectorElementType();
2545   assert(VecTy.getVectorNumElements() == Elem.size());
2546 
2547   SmallVector<ConstantInt*,4> Consts(Elem.size());
2548   bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
2549 
2550   unsigned First, Num = Elem.size();
2551   for (First = 0; First != Num; ++First) {
2552     if (!isUndef(Elem[First]))
2553       break;
2554   }
2555   if (First == Num)
2556     return DAG.getUNDEF(VecTy);
2557 
2558   if (AllConst &&
2559       llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
2560     return getZero(dl, VecTy, DAG);
2561 
2562   if (ElemTy == MVT::i16 || ElemTy == MVT::f16) {
2563     assert(Elem.size() == 2);
2564     if (AllConst) {
2565       // The 'Consts' array will have all values as integers regardless
2566       // of the vector element type.
2567       uint32_t V = (Consts[0]->getZExtValue() & 0xFFFF) |
2568                    Consts[1]->getZExtValue() << 16;
2569       return DAG.getBitcast(VecTy, DAG.getConstant(V, dl, MVT::i32));
2570     }
2571     SDValue E0, E1;
2572     if (ElemTy == MVT::f16) {
2573       E0 = DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Elem[0]), dl, MVT::i32);
2574       E1 = DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Elem[1]), dl, MVT::i32);
2575     } else {
2576       E0 = Elem[0];
2577       E1 = Elem[1];
2578     }
2579     SDValue N = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {E1, E0}, DAG);
2580     return DAG.getBitcast(VecTy, N);
2581   }
2582 
2583   if (ElemTy == MVT::i8) {
2584     // First try generating a constant.
2585     if (AllConst) {
2586       uint32_t V = (Consts[0]->getZExtValue() & 0xFF) |
2587                    (Consts[1]->getZExtValue() & 0xFF) << 8 |
2588                    (Consts[2]->getZExtValue() & 0xFF) << 16 |
2589                    Consts[3]->getZExtValue() << 24;
2590       return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32));
2591     }
2592 
2593     // Then try splat.
2594     bool IsSplat = true;
2595     for (unsigned i = First+1; i != Num; ++i) {
2596       if (Elem[i] == Elem[First] || isUndef(Elem[i]))
2597         continue;
2598       IsSplat = false;
2599       break;
2600     }
2601     if (IsSplat) {
2602       // Legalize the operand of SPLAT_VECTOR.
2603       SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
2604       return DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Ext);
2605     }
2606 
2607     // Generate
2608     //   (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) |
2609     //   (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16
2610     assert(Elem.size() == 4);
2611     SDValue Vs[4];
2612     for (unsigned i = 0; i != 4; ++i) {
2613       Vs[i] = DAG.getZExtOrTrunc(Elem[i], dl, MVT::i32);
2614       Vs[i] = DAG.getZeroExtendInReg(Vs[i], dl, MVT::i8);
2615     }
2616     SDValue S8 = DAG.getConstant(8, dl, MVT::i32);
2617     SDValue T0 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[1], S8});
2618     SDValue T1 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[3], S8});
2619     SDValue B0 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[0], T0});
2620     SDValue B1 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[2], T1});
2621 
2622     SDValue R = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG);
2623     return DAG.getBitcast(MVT::v4i8, R);
2624   }
2625 
2626 #ifndef NDEBUG
2627   dbgs() << "VecTy: " << VecTy << '\n';
2628 #endif
2629   llvm_unreachable("Unexpected vector element type");
2630 }
2631 
2632 SDValue
buildVector64(ArrayRef<SDValue> Elem,const SDLoc & dl,MVT VecTy,SelectionDAG & DAG) const2633 HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
2634                                      MVT VecTy, SelectionDAG &DAG) const {
2635   MVT ElemTy = VecTy.getVectorElementType();
2636   assert(VecTy.getVectorNumElements() == Elem.size());
2637 
2638   SmallVector<ConstantInt*,8> Consts(Elem.size());
2639   bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
2640 
2641   unsigned First, Num = Elem.size();
2642   for (First = 0; First != Num; ++First) {
2643     if (!isUndef(Elem[First]))
2644       break;
2645   }
2646   if (First == Num)
2647     return DAG.getUNDEF(VecTy);
2648 
2649   if (AllConst &&
2650       llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
2651     return getZero(dl, VecTy, DAG);
2652 
2653   // First try splat if possible.
2654   if (ElemTy == MVT::i16 || ElemTy == MVT::f16) {
2655     bool IsSplat = true;
2656     for (unsigned i = First+1; i != Num; ++i) {
2657       if (Elem[i] == Elem[First] || isUndef(Elem[i]))
2658         continue;
2659       IsSplat = false;
2660       break;
2661     }
2662     if (IsSplat) {
2663       // Legalize the operand of SPLAT_VECTOR
2664       SDValue S = ElemTy == MVT::f16 ? DAG.getBitcast(MVT::i16, Elem[First])
2665                                      : Elem[First];
2666       SDValue Ext = DAG.getZExtOrTrunc(S, dl, MVT::i32);
2667       return DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Ext);
2668     }
2669   }
2670 
2671   // Then try constant.
2672   if (AllConst) {
2673     uint64_t Val = 0;
2674     unsigned W = ElemTy.getSizeInBits();
2675     uint64_t Mask = (1ull << W) - 1;
2676     for (unsigned i = 0; i != Num; ++i)
2677       Val = (Val << W) | (Consts[Num-1-i]->getZExtValue() & Mask);
2678     SDValue V0 = DAG.getConstant(Val, dl, MVT::i64);
2679     return DAG.getBitcast(VecTy, V0);
2680   }
2681 
2682   // Build two 32-bit vectors and concatenate.
2683   MVT HalfTy = MVT::getVectorVT(ElemTy, Num/2);
2684   SDValue L = (ElemTy == MVT::i32)
2685                 ? Elem[0]
2686                 : buildVector32(Elem.take_front(Num/2), dl, HalfTy, DAG);
2687   SDValue H = (ElemTy == MVT::i32)
2688                 ? Elem[1]
2689                 : buildVector32(Elem.drop_front(Num/2), dl, HalfTy, DAG);
2690   return getCombine(H, L, dl, VecTy, DAG);
2691 }
2692 
2693 SDValue
extractVector(SDValue VecV,SDValue IdxV,const SDLoc & dl,MVT ValTy,MVT ResTy,SelectionDAG & DAG) const2694 HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
2695                                      const SDLoc &dl, MVT ValTy, MVT ResTy,
2696                                      SelectionDAG &DAG) const {
2697   MVT VecTy = ty(VecV);
2698   assert(!ValTy.isVector() ||
2699          VecTy.getVectorElementType() == ValTy.getVectorElementType());
2700   if (VecTy.getVectorElementType() == MVT::i1)
2701     return extractVectorPred(VecV, IdxV, dl, ValTy, ResTy, DAG);
2702 
2703   unsigned VecWidth = VecTy.getSizeInBits();
2704   unsigned ValWidth = ValTy.getSizeInBits();
2705   unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits();
2706   assert((VecWidth % ElemWidth) == 0);
2707   assert(VecWidth == 32 || VecWidth == 64);
2708 
2709   // Cast everything to scalar integer types.
2710   MVT ScalarTy = tyScalar(VecTy);
2711   VecV = DAG.getBitcast(ScalarTy, VecV);
2712 
2713   SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
2714   SDValue ExtV;
2715 
2716   if (auto *IdxN = dyn_cast<ConstantSDNode>(IdxV)) {
2717     unsigned Off = IdxN->getZExtValue() * ElemWidth;
2718     if (VecWidth == 64 && ValWidth == 32) {
2719       assert(Off == 0 || Off == 32);
2720       ExtV = Off == 0 ? LoHalf(VecV, DAG) : HiHalf(VecV, DAG);
2721     } else if (Off == 0 && (ValWidth % 8) == 0) {
2722       ExtV = DAG.getZeroExtendInReg(VecV, dl, tyScalar(ValTy));
2723     } else {
2724       SDValue OffV = DAG.getConstant(Off, dl, MVT::i32);
2725       // The return type of EXTRACTU must be the same as the type of the
2726       // input vector.
2727       ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
2728                          {VecV, WidthV, OffV});
2729     }
2730   } else {
2731     if (ty(IdxV) != MVT::i32)
2732       IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
2733     SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2734                                DAG.getConstant(ElemWidth, dl, MVT::i32));
2735     ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
2736                        {VecV, WidthV, OffV});
2737   }
2738 
2739   // Cast ExtV to the requested result type.
2740   ExtV = DAG.getZExtOrTrunc(ExtV, dl, tyScalar(ResTy));
2741   ExtV = DAG.getBitcast(ResTy, ExtV);
2742   return ExtV;
2743 }
2744 
2745 SDValue
extractVectorPred(SDValue VecV,SDValue IdxV,const SDLoc & dl,MVT ValTy,MVT ResTy,SelectionDAG & DAG) const2746 HexagonTargetLowering::extractVectorPred(SDValue VecV, SDValue IdxV,
2747                                          const SDLoc &dl, MVT ValTy, MVT ResTy,
2748                                          SelectionDAG &DAG) const {
2749   // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
2750   // without any coprocessors).
2751   MVT VecTy = ty(VecV);
2752   unsigned VecWidth = VecTy.getSizeInBits();
2753   unsigned ValWidth = ValTy.getSizeInBits();
2754   assert(VecWidth == VecTy.getVectorNumElements() &&
2755          "Vector elements should equal vector width size");
2756   assert(VecWidth == 8 || VecWidth == 4 || VecWidth == 2);
2757 
2758   // Check if this is an extract of the lowest bit.
2759   if (isNullConstant(IdxV) && ValTy.getSizeInBits() == 1) {
2760     // Extracting the lowest bit is a no-op, but it changes the type,
2761     // so it must be kept as an operation to avoid errors related to
2762     // type mismatches.
2763     return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV);
2764   }
2765 
2766   // If the value extracted is a single bit, use tstbit.
2767   if (ValWidth == 1) {
2768     SDValue A0 = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
2769     SDValue M0 = DAG.getConstant(8 / VecWidth, dl, MVT::i32);
2770     SDValue I0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, M0);
2771     return DAG.getNode(HexagonISD::TSTBIT, dl, MVT::i1, A0, I0);
2772   }
2773 
2774   // Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
2775   // a predicate register. The elements of the vector are repeated
2776   // in the register (if necessary) so that the total number is 8.
2777   // The extracted subvector will need to be expanded in such a way.
2778   unsigned Scale = VecWidth / ValWidth;
2779 
2780   // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
2781   // position 0.
2782   assert(ty(IdxV) == MVT::i32);
2783   unsigned VecRep = 8 / VecWidth;
2784   SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2785                            DAG.getConstant(8*VecRep, dl, MVT::i32));
2786   SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
2787   SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0);
2788   while (Scale > 1) {
2789     // The longest possible subvector is at most 32 bits, so it is always
2790     // contained in the low subregister.
2791     T1 = LoHalf(T1, DAG);
2792     T1 = expandPredicate(T1, dl, DAG);
2793     Scale /= 2;
2794   }
2795 
2796   return DAG.getNode(HexagonISD::D2P, dl, ResTy, T1);
2797 }
2798 
2799 SDValue
insertVector(SDValue VecV,SDValue ValV,SDValue IdxV,const SDLoc & dl,MVT ValTy,SelectionDAG & DAG) const2800 HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
2801                                     const SDLoc &dl, MVT ValTy,
2802                                     SelectionDAG &DAG) const {
2803   MVT VecTy = ty(VecV);
2804   if (VecTy.getVectorElementType() == MVT::i1)
2805     return insertVectorPred(VecV, ValV, IdxV, dl, ValTy, DAG);
2806 
2807   unsigned VecWidth = VecTy.getSizeInBits();
2808   unsigned ValWidth = ValTy.getSizeInBits();
2809   assert(VecWidth == 32 || VecWidth == 64);
2810   assert((VecWidth % ValWidth) == 0);
2811 
2812   // Cast everything to scalar integer types.
2813   MVT ScalarTy = MVT::getIntegerVT(VecWidth);
2814   // The actual type of ValV may be different than ValTy (which is related
2815   // to the vector type).
2816   unsigned VW = ty(ValV).getSizeInBits();
2817   ValV = DAG.getBitcast(MVT::getIntegerVT(VW), ValV);
2818   VecV = DAG.getBitcast(ScalarTy, VecV);
2819   if (VW != VecWidth)
2820     ValV = DAG.getAnyExtOrTrunc(ValV, dl, ScalarTy);
2821 
2822   SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
2823   SDValue InsV;
2824 
2825   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(IdxV)) {
2826     unsigned W = C->getZExtValue() * ValWidth;
2827     SDValue OffV = DAG.getConstant(W, dl, MVT::i32);
2828     InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
2829                        {VecV, ValV, WidthV, OffV});
2830   } else {
2831     if (ty(IdxV) != MVT::i32)
2832       IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
2833     SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, WidthV);
2834     InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
2835                        {VecV, ValV, WidthV, OffV});
2836   }
2837 
2838   return DAG.getNode(ISD::BITCAST, dl, VecTy, InsV);
2839 }
2840 
2841 SDValue
insertVectorPred(SDValue VecV,SDValue ValV,SDValue IdxV,const SDLoc & dl,MVT ValTy,SelectionDAG & DAG) const2842 HexagonTargetLowering::insertVectorPred(SDValue VecV, SDValue ValV,
2843                                         SDValue IdxV, const SDLoc &dl,
2844                                         MVT ValTy, SelectionDAG &DAG) const {
2845   MVT VecTy = ty(VecV);
2846   unsigned VecLen = VecTy.getVectorNumElements();
2847 
2848   if (ValTy == MVT::i1) {
2849     SDValue ToReg = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
2850     SDValue Ext = DAG.getSExtOrTrunc(ValV, dl, MVT::i32);
2851     SDValue Width = DAG.getConstant(8 / VecLen, dl, MVT::i32);
2852     SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, Width);
2853     SDValue Ins =
2854         DAG.getNode(HexagonISD::INSERT, dl, MVT::i32, {ToReg, Ext, Width, Idx});
2855     return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Ins}, DAG);
2856   }
2857 
2858   assert(ValTy.getVectorElementType() == MVT::i1);
2859   SDValue ValR = ValTy.isVector()
2860                      ? DAG.getNode(HexagonISD::P2D, dl, MVT::i64, ValV)
2861                      : DAG.getSExtOrTrunc(ValV, dl, MVT::i64);
2862 
2863   unsigned Scale = VecLen / ValTy.getVectorNumElements();
2864   assert(Scale > 1);
2865 
2866   for (unsigned R = Scale; R > 1; R /= 2) {
2867     ValR = contractPredicate(ValR, dl, DAG);
2868     ValR = getCombine(DAG.getUNDEF(MVT::i32), ValR, dl, MVT::i64, DAG);
2869   }
2870 
2871   SDValue Width = DAG.getConstant(64 / Scale, dl, MVT::i32);
2872   SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, Width);
2873   SDValue VecR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
2874   SDValue Ins =
2875       DAG.getNode(HexagonISD::INSERT, dl, MVT::i64, {VecR, ValR, Width, Idx});
2876   return DAG.getNode(HexagonISD::D2P, dl, VecTy, Ins);
2877 }
2878 
2879 SDValue
expandPredicate(SDValue Vec32,const SDLoc & dl,SelectionDAG & DAG) const2880 HexagonTargetLowering::expandPredicate(SDValue Vec32, const SDLoc &dl,
2881                                        SelectionDAG &DAG) const {
2882   assert(ty(Vec32).getSizeInBits() == 32);
2883   if (isUndef(Vec32))
2884     return DAG.getUNDEF(MVT::i64);
2885   SDValue P = DAG.getBitcast(MVT::v4i8, Vec32);
2886   SDValue X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i16, P);
2887   return DAG.getBitcast(MVT::i64, X);
2888 }
2889 
2890 SDValue
contractPredicate(SDValue Vec64,const SDLoc & dl,SelectionDAG & DAG) const2891 HexagonTargetLowering::contractPredicate(SDValue Vec64, const SDLoc &dl,
2892                                          SelectionDAG &DAG) const {
2893   assert(ty(Vec64).getSizeInBits() == 64);
2894   if (isUndef(Vec64))
2895     return DAG.getUNDEF(MVT::i32);
2896   // Collect even bytes:
2897   SDValue A = DAG.getBitcast(MVT::v8i8, Vec64);
2898   SDValue S = DAG.getVectorShuffle(MVT::v8i8, dl, A, DAG.getUNDEF(MVT::v8i8),
2899                                    {0, 2, 4, 6, 1, 3, 5, 7});
2900   return extractVector(S, DAG.getConstant(0, dl, MVT::i32), dl, MVT::v4i8,
2901                        MVT::i32, DAG);
2902 }
2903 
2904 SDValue
getZero(const SDLoc & dl,MVT Ty,SelectionDAG & DAG) const2905 HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
2906       const {
2907   if (Ty.isVector()) {
2908     unsigned W = Ty.getSizeInBits();
2909     if (W <= 64)
2910       return DAG.getBitcast(Ty, DAG.getConstant(0, dl, MVT::getIntegerVT(W)));
2911     return DAG.getNode(ISD::SPLAT_VECTOR, dl, Ty, getZero(dl, MVT::i32, DAG));
2912   }
2913 
2914   if (Ty.isInteger())
2915     return DAG.getConstant(0, dl, Ty);
2916   if (Ty.isFloatingPoint())
2917     return DAG.getConstantFP(0.0, dl, Ty);
2918   llvm_unreachable("Invalid type for zero");
2919 }
2920 
2921 SDValue
appendUndef(SDValue Val,MVT ResTy,SelectionDAG & DAG) const2922 HexagonTargetLowering::appendUndef(SDValue Val, MVT ResTy, SelectionDAG &DAG)
2923       const {
2924   MVT ValTy = ty(Val);
2925   assert(ValTy.getVectorElementType() == ResTy.getVectorElementType());
2926 
2927   unsigned ValLen = ValTy.getVectorNumElements();
2928   unsigned ResLen = ResTy.getVectorNumElements();
2929   if (ValLen == ResLen)
2930     return Val;
2931 
2932   const SDLoc &dl(Val);
2933   assert(ValLen < ResLen);
2934   assert(ResLen % ValLen == 0);
2935 
2936   SmallVector<SDValue, 4> Concats = {Val};
2937   for (unsigned i = 1, e = ResLen / ValLen; i < e; ++i)
2938     Concats.push_back(DAG.getUNDEF(ValTy));
2939 
2940   return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, Concats);
2941 }
2942 
2943 SDValue
getCombine(SDValue Hi,SDValue Lo,const SDLoc & dl,MVT ResTy,SelectionDAG & DAG) const2944 HexagonTargetLowering::getCombine(SDValue Hi, SDValue Lo, const SDLoc &dl,
2945                                   MVT ResTy, SelectionDAG &DAG) const {
2946   MVT ElemTy = ty(Hi);
2947   assert(ElemTy == ty(Lo));
2948 
2949   if (!ElemTy.isVector()) {
2950     assert(ElemTy.isScalarInteger());
2951     MVT PairTy = MVT::getIntegerVT(2 * ElemTy.getSizeInBits());
2952     SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, PairTy, Lo, Hi);
2953     return DAG.getBitcast(ResTy, Pair);
2954   }
2955 
2956   unsigned Width = ElemTy.getSizeInBits();
2957   MVT IntTy = MVT::getIntegerVT(Width);
2958   MVT PairTy = MVT::getIntegerVT(2 * Width);
2959   SDValue Pair =
2960       DAG.getNode(ISD::BUILD_PAIR, dl, PairTy,
2961                   {DAG.getBitcast(IntTy, Lo), DAG.getBitcast(IntTy, Hi)});
2962   return DAG.getBitcast(ResTy, Pair);
2963 }
2964 
2965 SDValue
LowerBUILD_VECTOR(SDValue Op,SelectionDAG & DAG) const2966 HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
2967   MVT VecTy = ty(Op);
2968   unsigned BW = VecTy.getSizeInBits();
2969   const SDLoc &dl(Op);
2970   SmallVector<SDValue,8> Ops;
2971   for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
2972     Ops.push_back(Op.getOperand(i));
2973 
2974   if (BW == 32)
2975     return buildVector32(Ops, dl, VecTy, DAG);
2976   if (BW == 64)
2977     return buildVector64(Ops, dl, VecTy, DAG);
2978 
2979   if (VecTy == MVT::v8i1 || VecTy == MVT::v4i1 || VecTy == MVT::v2i1) {
2980     // Check if this is a special case or all-0 or all-1.
2981     bool All0 = true, All1 = true;
2982     for (SDValue P : Ops) {
2983       auto *CN = dyn_cast<ConstantSDNode>(P.getNode());
2984       if (CN == nullptr) {
2985         All0 = All1 = false;
2986         break;
2987       }
2988       uint32_t C = CN->getZExtValue();
2989       All0 &= (C == 0);
2990       All1 &= (C == 1);
2991     }
2992     if (All0)
2993       return DAG.getNode(HexagonISD::PFALSE, dl, VecTy);
2994     if (All1)
2995       return DAG.getNode(HexagonISD::PTRUE, dl, VecTy);
2996 
2997     // For each i1 element in the resulting predicate register, put 1
2998     // shifted by the index of the element into a general-purpose register,
2999     // then or them together and transfer it back into a predicate register.
3000     SDValue Rs[8];
3001     SDValue Z = getZero(dl, MVT::i32, DAG);
3002     // Always produce 8 bits, repeat inputs if necessary.
3003     unsigned Rep = 8 / VecTy.getVectorNumElements();
3004     for (unsigned i = 0; i != 8; ++i) {
3005       SDValue S = DAG.getConstant(1ull << i, dl, MVT::i32);
3006       Rs[i] = DAG.getSelect(dl, MVT::i32, Ops[i/Rep], S, Z);
3007     }
3008     for (ArrayRef<SDValue> A(Rs); A.size() != 1; A = A.drop_back(A.size()/2)) {
3009       for (unsigned i = 0, e = A.size()/2; i != e; ++i)
3010         Rs[i] = DAG.getNode(ISD::OR, dl, MVT::i32, Rs[2*i], Rs[2*i+1]);
3011     }
3012     // Move the value directly to a predicate register.
3013     return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Rs[0]}, DAG);
3014   }
3015 
3016   return SDValue();
3017 }
3018 
3019 SDValue
LowerCONCAT_VECTORS(SDValue Op,SelectionDAG & DAG) const3020 HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
3021                                            SelectionDAG &DAG) const {
3022   MVT VecTy = ty(Op);
3023   const SDLoc &dl(Op);
3024   if (VecTy.getSizeInBits() == 64) {
3025     assert(Op.getNumOperands() == 2);
3026     return getCombine(Op.getOperand(1), Op.getOperand(0), dl, VecTy, DAG);
3027   }
3028 
3029   MVT ElemTy = VecTy.getVectorElementType();
3030   if (ElemTy == MVT::i1) {
3031     assert(VecTy == MVT::v2i1 || VecTy == MVT::v4i1 || VecTy == MVT::v8i1);
3032     MVT OpTy = ty(Op.getOperand(0));
3033     // Scale is how many times the operands need to be contracted to match
3034     // the representation in the target register.
3035     unsigned Scale = VecTy.getVectorNumElements() / OpTy.getVectorNumElements();
3036     assert(Scale == Op.getNumOperands() && Scale > 1);
3037 
3038     // First, convert all bool vectors to integers, then generate pairwise
3039     // inserts to form values of doubled length. Up until there are only
3040     // two values left to concatenate, all of these values will fit in a
3041     // 32-bit integer, so keep them as i32 to use 32-bit inserts.
3042     SmallVector<SDValue,4> Words[2];
3043     unsigned IdxW = 0;
3044 
3045     for (SDValue P : Op.getNode()->op_values()) {
3046       SDValue W = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, P);
3047       for (unsigned R = Scale; R > 1; R /= 2) {
3048         W = contractPredicate(W, dl, DAG);
3049         W = getCombine(DAG.getUNDEF(MVT::i32), W, dl, MVT::i64, DAG);
3050       }
3051       W = LoHalf(W, DAG);
3052       Words[IdxW].push_back(W);
3053     }
3054 
3055     while (Scale > 2) {
3056       SDValue WidthV = DAG.getConstant(64 / Scale, dl, MVT::i32);
3057       Words[IdxW ^ 1].clear();
3058 
3059       for (unsigned i = 0, e = Words[IdxW].size(); i != e; i += 2) {
3060         SDValue W0 = Words[IdxW][i], W1 = Words[IdxW][i+1];
3061         // Insert W1 into W0 right next to the significant bits of W0.
3062         SDValue T = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
3063                                 {W0, W1, WidthV, WidthV});
3064         Words[IdxW ^ 1].push_back(T);
3065       }
3066       IdxW ^= 1;
3067       Scale /= 2;
3068     }
3069 
3070     // At this point there should only be two words left, and Scale should be 2.
3071     assert(Scale == 2 && Words[IdxW].size() == 2);
3072 
3073     SDValue WW = getCombine(Words[IdxW][1], Words[IdxW][0], dl, MVT::i64, DAG);
3074     return DAG.getNode(HexagonISD::D2P, dl, VecTy, WW);
3075   }
3076 
3077   return SDValue();
3078 }
3079 
3080 SDValue
LowerEXTRACT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const3081 HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
3082                                                SelectionDAG &DAG) const {
3083   SDValue Vec = Op.getOperand(0);
3084   MVT ElemTy = ty(Vec).getVectorElementType();
3085   return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ElemTy, ty(Op), DAG);
3086 }
3087 
3088 SDValue
LowerEXTRACT_SUBVECTOR(SDValue Op,SelectionDAG & DAG) const3089 HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
3090                                               SelectionDAG &DAG) const {
3091   return extractVector(Op.getOperand(0), Op.getOperand(1), SDLoc(Op),
3092                        ty(Op), ty(Op), DAG);
3093 }
3094 
3095 SDValue
LowerINSERT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const3096 HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
3097                                               SelectionDAG &DAG) const {
3098   return insertVector(Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
3099                       SDLoc(Op), ty(Op).getVectorElementType(), DAG);
3100 }
3101 
3102 SDValue
LowerINSERT_SUBVECTOR(SDValue Op,SelectionDAG & DAG) const3103 HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
3104                                              SelectionDAG &DAG) const {
3105   SDValue ValV = Op.getOperand(1);
3106   return insertVector(Op.getOperand(0), ValV, Op.getOperand(2),
3107                       SDLoc(Op), ty(ValV), DAG);
3108 }
3109 
3110 bool
allowTruncateForTailCall(Type * Ty1,Type * Ty2) const3111 HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
3112   // Assuming the caller does not have either a signext or zeroext modifier, and
3113   // only one value is accepted, any reasonable truncation is allowed.
3114   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
3115     return false;
3116 
3117   // FIXME: in principle up to 64-bit could be made safe, but it would be very
3118   // fragile at the moment: any support for multiple value returns would be
3119   // liable to disallow tail calls involving i64 -> iN truncation in many cases.
3120   return Ty1->getPrimitiveSizeInBits() <= 32;
3121 }
3122 
3123 SDValue
LowerLoad(SDValue Op,SelectionDAG & DAG) const3124 HexagonTargetLowering::LowerLoad(SDValue Op, SelectionDAG &DAG) const {
3125   MVT Ty = ty(Op);
3126   const SDLoc &dl(Op);
3127   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
3128   MVT MemTy = LN->getMemoryVT().getSimpleVT();
3129   ISD::LoadExtType ET = LN->getExtensionType();
3130 
3131   bool LoadPred = MemTy == MVT::v2i1 || MemTy == MVT::v4i1 || MemTy == MVT::v8i1;
3132   if (LoadPred) {
3133     SDValue NL = DAG.getLoad(
3134         LN->getAddressingMode(), ISD::ZEXTLOAD, MVT::i32, dl, LN->getChain(),
3135         LN->getBasePtr(), LN->getOffset(), LN->getPointerInfo(),
3136         /*MemoryVT*/ MVT::i8, LN->getAlign(), LN->getMemOperand()->getFlags(),
3137         LN->getAAInfo(), LN->getRanges());
3138     LN = cast<LoadSDNode>(NL.getNode());
3139   }
3140 
3141   Align ClaimAlign = LN->getAlign();
3142   if (!validateConstPtrAlignment(LN->getBasePtr(), ClaimAlign, dl, DAG))
3143     return replaceMemWithUndef(Op, DAG);
3144 
3145   // Call LowerUnalignedLoad for all loads, it recognizes loads that
3146   // don't need extra aligning.
3147   SDValue LU = LowerUnalignedLoad(SDValue(LN, 0), DAG);
3148   if (LoadPred) {
3149     SDValue TP = getInstr(Hexagon::C2_tfrrp, dl, MemTy, {LU}, DAG);
3150     if (ET == ISD::SEXTLOAD) {
3151       TP = DAG.getSExtOrTrunc(TP, dl, Ty);
3152     } else if (ET != ISD::NON_EXTLOAD) {
3153       TP = DAG.getZExtOrTrunc(TP, dl, Ty);
3154     }
3155     SDValue Ch = cast<LoadSDNode>(LU.getNode())->getChain();
3156     return DAG.getMergeValues({TP, Ch}, dl);
3157   }
3158   return LU;
3159 }
3160 
3161 SDValue
LowerStore(SDValue Op,SelectionDAG & DAG) const3162 HexagonTargetLowering::LowerStore(SDValue Op, SelectionDAG &DAG) const {
3163   const SDLoc &dl(Op);
3164   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
3165   SDValue Val = SN->getValue();
3166   MVT Ty = ty(Val);
3167 
3168   if (Ty == MVT::v2i1 || Ty == MVT::v4i1 || Ty == MVT::v8i1) {
3169     // Store the exact predicate (all bits).
3170     SDValue TR = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {Val}, DAG);
3171     SDValue NS = DAG.getTruncStore(SN->getChain(), dl, TR, SN->getBasePtr(),
3172                                    MVT::i8, SN->getMemOperand());
3173     if (SN->isIndexed()) {
3174       NS = DAG.getIndexedStore(NS, dl, SN->getBasePtr(), SN->getOffset(),
3175                                SN->getAddressingMode());
3176     }
3177     SN = cast<StoreSDNode>(NS.getNode());
3178   }
3179 
3180   Align ClaimAlign = SN->getAlign();
3181   if (!validateConstPtrAlignment(SN->getBasePtr(), ClaimAlign, dl, DAG))
3182     return replaceMemWithUndef(Op, DAG);
3183 
3184   MVT StoreTy = SN->getMemoryVT().getSimpleVT();
3185   Align NeedAlign = Subtarget.getTypeAlignment(StoreTy);
3186   if (ClaimAlign < NeedAlign)
3187     return expandUnalignedStore(SN, DAG);
3188   return SDValue(SN, 0);
3189 }
3190 
3191 SDValue
LowerUnalignedLoad(SDValue Op,SelectionDAG & DAG) const3192 HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
3193       const {
3194   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
3195   MVT LoadTy = ty(Op);
3196   unsigned NeedAlign = Subtarget.getTypeAlignment(LoadTy).value();
3197   unsigned HaveAlign = LN->getAlign().value();
3198   if (HaveAlign >= NeedAlign)
3199     return Op;
3200 
3201   const SDLoc &dl(Op);
3202   const DataLayout &DL = DAG.getDataLayout();
3203   LLVMContext &Ctx = *DAG.getContext();
3204 
3205   // If the load aligning is disabled or the load can be broken up into two
3206   // smaller legal loads, do the default (target-independent) expansion.
3207   bool DoDefault = false;
3208   // Handle it in the default way if this is an indexed load.
3209   if (!LN->isUnindexed())
3210     DoDefault = true;
3211 
3212   if (!AlignLoads) {
3213     if (allowsMemoryAccessForAlignment(Ctx, DL, LN->getMemoryVT(),
3214                                        *LN->getMemOperand()))
3215       return Op;
3216     DoDefault = true;
3217   }
3218   if (!DoDefault && (2 * HaveAlign) == NeedAlign) {
3219     // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
3220     MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8 * HaveAlign)
3221                                 : MVT::getVectorVT(MVT::i8, HaveAlign);
3222     DoDefault =
3223         allowsMemoryAccessForAlignment(Ctx, DL, PartTy, *LN->getMemOperand());
3224   }
3225   if (DoDefault) {
3226     std::pair<SDValue, SDValue> P = expandUnalignedLoad(LN, DAG);
3227     return DAG.getMergeValues({P.first, P.second}, dl);
3228   }
3229 
3230   // The code below generates two loads, both aligned as NeedAlign, and
3231   // with the distance of NeedAlign between them. For that to cover the
3232   // bits that need to be loaded (and without overlapping), the size of
3233   // the loads should be equal to NeedAlign. This is true for all loadable
3234   // types, but add an assertion in case something changes in the future.
3235   assert(LoadTy.getSizeInBits() == 8*NeedAlign);
3236 
3237   unsigned LoadLen = NeedAlign;
3238   SDValue Base = LN->getBasePtr();
3239   SDValue Chain = LN->getChain();
3240   auto BO = getBaseAndOffset(Base);
3241   unsigned BaseOpc = BO.first.getOpcode();
3242   if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % LoadLen == 0)
3243     return Op;
3244 
3245   if (BO.second % LoadLen != 0) {
3246     BO.first = DAG.getNode(ISD::ADD, dl, MVT::i32, BO.first,
3247                            DAG.getConstant(BO.second % LoadLen, dl, MVT::i32));
3248     BO.second -= BO.second % LoadLen;
3249   }
3250   SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR)
3251       ? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first,
3252                     DAG.getConstant(NeedAlign, dl, MVT::i32))
3253       : BO.first;
3254   SDValue Base0 =
3255       DAG.getMemBasePlusOffset(BaseNoOff, TypeSize::getFixed(BO.second), dl);
3256   SDValue Base1 = DAG.getMemBasePlusOffset(
3257       BaseNoOff, TypeSize::getFixed(BO.second + LoadLen), dl);
3258 
3259   MachineMemOperand *WideMMO = nullptr;
3260   if (MachineMemOperand *MMO = LN->getMemOperand()) {
3261     MachineFunction &MF = DAG.getMachineFunction();
3262     WideMMO = MF.getMachineMemOperand(
3263         MMO->getPointerInfo(), MMO->getFlags(), 2 * LoadLen, Align(LoadLen),
3264         MMO->getAAInfo(), MMO->getRanges(), MMO->getSyncScopeID(),
3265         MMO->getSuccessOrdering(), MMO->getFailureOrdering());
3266   }
3267 
3268   SDValue Load0 = DAG.getLoad(LoadTy, dl, Chain, Base0, WideMMO);
3269   SDValue Load1 = DAG.getLoad(LoadTy, dl, Chain, Base1, WideMMO);
3270 
3271   SDValue Aligned = DAG.getNode(HexagonISD::VALIGN, dl, LoadTy,
3272                                 {Load1, Load0, BaseNoOff.getOperand(0)});
3273   SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3274                                  Load0.getValue(1), Load1.getValue(1));
3275   SDValue M = DAG.getMergeValues({Aligned, NewChain}, dl);
3276   return M;
3277 }
3278 
3279 SDValue
LowerUAddSubO(SDValue Op,SelectionDAG & DAG) const3280 HexagonTargetLowering::LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const {
3281   SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
3282   auto *CY = dyn_cast<ConstantSDNode>(Y);
3283   if (!CY)
3284     return SDValue();
3285 
3286   const SDLoc &dl(Op);
3287   SDVTList VTs = Op.getNode()->getVTList();
3288   assert(VTs.NumVTs == 2);
3289   assert(VTs.VTs[1] == MVT::i1);
3290   unsigned Opc = Op.getOpcode();
3291 
3292   if (CY) {
3293     uint64_t VY = CY->getZExtValue();
3294     assert(VY != 0 && "This should have been folded");
3295     // X +/- 1
3296     if (VY != 1)
3297       return SDValue();
3298 
3299     if (Opc == ISD::UADDO) {
3300       SDValue Op = DAG.getNode(ISD::ADD, dl, VTs.VTs[0], {X, Y});
3301       SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op, getZero(dl, ty(Op), DAG),
3302                                 ISD::SETEQ);
3303       return DAG.getMergeValues({Op, Ov}, dl);
3304     }
3305     if (Opc == ISD::USUBO) {
3306       SDValue Op = DAG.getNode(ISD::SUB, dl, VTs.VTs[0], {X, Y});
3307       SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op,
3308                                 DAG.getAllOnesConstant(dl, ty(Op)), ISD::SETEQ);
3309       return DAG.getMergeValues({Op, Ov}, dl);
3310     }
3311   }
3312 
3313   return SDValue();
3314 }
3315 
LowerUAddSubOCarry(SDValue Op,SelectionDAG & DAG) const3316 SDValue HexagonTargetLowering::LowerUAddSubOCarry(SDValue Op,
3317                                                   SelectionDAG &DAG) const {
3318   const SDLoc &dl(Op);
3319   unsigned Opc = Op.getOpcode();
3320   SDValue X = Op.getOperand(0), Y = Op.getOperand(1), C = Op.getOperand(2);
3321 
3322   if (Opc == ISD::UADDO_CARRY)
3323     return DAG.getNode(HexagonISD::ADDC, dl, Op.getNode()->getVTList(),
3324                        { X, Y, C });
3325 
3326   EVT CarryTy = C.getValueType();
3327   SDValue SubC = DAG.getNode(HexagonISD::SUBC, dl, Op.getNode()->getVTList(),
3328                              { X, Y, DAG.getLogicalNOT(dl, C, CarryTy) });
3329   SDValue Out[] = { SubC.getValue(0),
3330                     DAG.getLogicalNOT(dl, SubC.getValue(1), CarryTy) };
3331   return DAG.getMergeValues(Out, dl);
3332 }
3333 
3334 SDValue
LowerEH_RETURN(SDValue Op,SelectionDAG & DAG) const3335 HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
3336   SDValue Chain     = Op.getOperand(0);
3337   SDValue Offset    = Op.getOperand(1);
3338   SDValue Handler   = Op.getOperand(2);
3339   SDLoc dl(Op);
3340   auto PtrVT = getPointerTy(DAG.getDataLayout());
3341 
3342   // Mark function as containing a call to EH_RETURN.
3343   HexagonMachineFunctionInfo *FuncInfo =
3344     DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>();
3345   FuncInfo->setHasEHReturn();
3346 
3347   unsigned OffsetReg = Hexagon::R28;
3348 
3349   SDValue StoreAddr =
3350       DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT),
3351                   DAG.getIntPtrConstant(4, dl));
3352   Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo());
3353   Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
3354 
3355   // Not needed we already use it as explicit input to EH_RETURN.
3356   // MF.getRegInfo().addLiveOut(OffsetReg);
3357 
3358   return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain);
3359 }
3360 
3361 SDValue
LowerOperation(SDValue Op,SelectionDAG & DAG) const3362 HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3363   unsigned Opc = Op.getOpcode();
3364 
3365   // Handle INLINEASM first.
3366   if (Opc == ISD::INLINEASM || Opc == ISD::INLINEASM_BR)
3367     return LowerINLINEASM(Op, DAG);
3368 
3369   if (isHvxOperation(Op.getNode(), DAG)) {
3370     // If HVX lowering returns nothing, try the default lowering.
3371     if (SDValue V = LowerHvxOperation(Op, DAG))
3372       return V;
3373   }
3374 
3375   switch (Opc) {
3376     default:
3377 #ifndef NDEBUG
3378       Op.getNode()->dumpr(&DAG);
3379       if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END)
3380         errs() << "Error: check for a non-legal type in this operation\n";
3381 #endif
3382       llvm_unreachable("Should not custom lower this!");
3383 
3384     case ISD::FDIV:
3385       return LowerFDIV(Op, DAG);
3386     case ISD::CONCAT_VECTORS:       return LowerCONCAT_VECTORS(Op, DAG);
3387     case ISD::INSERT_SUBVECTOR:     return LowerINSERT_SUBVECTOR(Op, DAG);
3388     case ISD::INSERT_VECTOR_ELT:    return LowerINSERT_VECTOR_ELT(Op, DAG);
3389     case ISD::EXTRACT_SUBVECTOR:    return LowerEXTRACT_SUBVECTOR(Op, DAG);
3390     case ISD::EXTRACT_VECTOR_ELT:   return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3391     case ISD::BUILD_VECTOR:         return LowerBUILD_VECTOR(Op, DAG);
3392     case ISD::VECTOR_SHUFFLE:       return LowerVECTOR_SHUFFLE(Op, DAG);
3393     case ISD::BITCAST:              return LowerBITCAST(Op, DAG);
3394     case ISD::LOAD:                 return LowerLoad(Op, DAG);
3395     case ISD::STORE:                return LowerStore(Op, DAG);
3396     case ISD::UADDO:
3397     case ISD::USUBO:                return LowerUAddSubO(Op, DAG);
3398     case ISD::UADDO_CARRY:
3399     case ISD::USUBO_CARRY:          return LowerUAddSubOCarry(Op, DAG);
3400     case ISD::SRA:
3401     case ISD::SHL:
3402     case ISD::SRL:                  return LowerVECTOR_SHIFT(Op, DAG);
3403     case ISD::ROTL:                 return LowerROTL(Op, DAG);
3404     case ISD::ConstantPool:         return LowerConstantPool(Op, DAG);
3405     case ISD::JumpTable:            return LowerJumpTable(Op, DAG);
3406     case ISD::EH_RETURN:            return LowerEH_RETURN(Op, DAG);
3407     case ISD::RETURNADDR:           return LowerRETURNADDR(Op, DAG);
3408     case ISD::FRAMEADDR:            return LowerFRAMEADDR(Op, DAG);
3409     case ISD::GlobalTLSAddress:     return LowerGlobalTLSAddress(Op, DAG);
3410     case ISD::ATOMIC_FENCE:         return LowerATOMIC_FENCE(Op, DAG);
3411     case ISD::GlobalAddress:        return LowerGLOBALADDRESS(Op, DAG);
3412     case ISD::BlockAddress:         return LowerBlockAddress(Op, DAG);
3413     case ISD::GLOBAL_OFFSET_TABLE:  return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
3414     case ISD::VACOPY:               return LowerVACOPY(Op, DAG);
3415     case ISD::VASTART:              return LowerVASTART(Op, DAG);
3416     case ISD::DYNAMIC_STACKALLOC:   return LowerDYNAMIC_STACKALLOC(Op, DAG);
3417     case ISD::SETCC:                return LowerSETCC(Op, DAG);
3418     case ISD::VSELECT:              return LowerVSELECT(Op, DAG);
3419     case ISD::INTRINSIC_WO_CHAIN:   return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3420     case ISD::INTRINSIC_VOID:       return LowerINTRINSIC_VOID(Op, DAG);
3421     case ISD::PREFETCH:             return LowerPREFETCH(Op, DAG);
3422     case ISD::READCYCLECOUNTER:     return LowerREADCYCLECOUNTER(Op, DAG);
3423     case ISD::READSTEADYCOUNTER:    return LowerREADSTEADYCOUNTER(Op, DAG);
3424       break;
3425   }
3426 
3427   return SDValue();
3428 }
3429 
3430 void
LowerOperationWrapper(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const3431 HexagonTargetLowering::LowerOperationWrapper(SDNode *N,
3432                                              SmallVectorImpl<SDValue> &Results,
3433                                              SelectionDAG &DAG) const {
3434   if (isHvxOperation(N, DAG)) {
3435     LowerHvxOperationWrapper(N, Results, DAG);
3436     if (!Results.empty())
3437       return;
3438   }
3439 
3440   SDValue Op(N, 0);
3441   unsigned Opc = N->getOpcode();
3442 
3443   switch (Opc) {
3444     case HexagonISD::SSAT:
3445     case HexagonISD::USAT:
3446       Results.push_back(opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG));
3447       break;
3448     case ISD::STORE:
3449       // We are only custom-lowering stores to verify the alignment of the
3450       // address if it is a compile-time constant. Since a store can be
3451       // modified during type-legalization (the value being stored may need
3452       // legalization), return empty Results here to indicate that we don't
3453       // really make any changes in the custom lowering.
3454       return;
3455     default:
3456       TargetLowering::LowerOperationWrapper(N, Results, DAG);
3457       break;
3458   }
3459 }
3460 
3461 void
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const3462 HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
3463                                           SmallVectorImpl<SDValue> &Results,
3464                                           SelectionDAG &DAG) const {
3465   if (isHvxOperation(N, DAG)) {
3466     ReplaceHvxNodeResults(N, Results, DAG);
3467     if (!Results.empty())
3468       return;
3469   }
3470 
3471   const SDLoc &dl(N);
3472   switch (N->getOpcode()) {
3473     case ISD::SRL:
3474     case ISD::SRA:
3475     case ISD::SHL:
3476       return;
3477     case ISD::BITCAST:
3478       // Handle a bitcast from v8i1 to i8.
3479       if (N->getValueType(0) == MVT::i8) {
3480         if (N->getOperand(0).getValueType() == MVT::v8i1) {
3481           SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32,
3482                                N->getOperand(0), DAG);
3483           SDValue T = DAG.getAnyExtOrTrunc(P, dl, MVT::i8);
3484           Results.push_back(T);
3485         }
3486       }
3487       break;
3488   }
3489 }
3490 
3491 SDValue
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const3492 HexagonTargetLowering::PerformDAGCombine(SDNode *N,
3493                                          DAGCombinerInfo &DCI) const {
3494   if (isHvxOperation(N, DCI.DAG)) {
3495     if (SDValue V = PerformHvxDAGCombine(N, DCI))
3496       return V;
3497     return SDValue();
3498   }
3499 
3500   SDValue Op(N, 0);
3501   const SDLoc &dl(Op);
3502   unsigned Opc = Op.getOpcode();
3503 
3504   if (Opc == ISD::TRUNCATE) {
3505     SDValue Op0 = Op.getOperand(0);
3506     // fold (truncate (build pair x, y)) -> (truncate x) or x
3507     if (Op0.getOpcode() == ISD::BUILD_PAIR) {
3508       EVT TruncTy = Op.getValueType();
3509       SDValue Elem0 = Op0.getOperand(0);
3510       // if we match the low element of the pair, just return it.
3511       if (Elem0.getValueType() == TruncTy)
3512         return Elem0;
3513       // otherwise, if the low part is still too large, apply the truncate.
3514       if (Elem0.getValueType().bitsGT(TruncTy))
3515         return DCI.DAG.getNode(ISD::TRUNCATE, dl, TruncTy, Elem0);
3516     }
3517   }
3518 
3519   if (DCI.isBeforeLegalizeOps())
3520     return SDValue();
3521 
3522   if (Opc == HexagonISD::P2D) {
3523     SDValue P = Op.getOperand(0);
3524     switch (P.getOpcode()) {
3525     case HexagonISD::PTRUE:
3526       return DCI.DAG.getAllOnesConstant(dl, ty(Op));
3527     case HexagonISD::PFALSE:
3528       return getZero(dl, ty(Op), DCI.DAG);
3529     default:
3530       break;
3531     }
3532   } else if (Opc == ISD::VSELECT) {
3533     // This is pretty much duplicated in HexagonISelLoweringHVX...
3534     //
3535     // (vselect (xor x, ptrue), v0, v1) -> (vselect x, v1, v0)
3536     SDValue Cond = Op.getOperand(0);
3537     if (Cond->getOpcode() == ISD::XOR) {
3538       SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
3539       if (C1->getOpcode() == HexagonISD::PTRUE) {
3540         SDValue VSel = DCI.DAG.getNode(ISD::VSELECT, dl, ty(Op), C0,
3541                                        Op.getOperand(2), Op.getOperand(1));
3542         return VSel;
3543       }
3544     }
3545   } else if (Opc == ISD::TRUNCATE) {
3546     SDValue Op0 = Op.getOperand(0);
3547     // fold (truncate (build pair x, y)) -> (truncate x) or x
3548     if (Op0.getOpcode() == ISD::BUILD_PAIR) {
3549       MVT TruncTy = ty(Op);
3550       SDValue Elem0 = Op0.getOperand(0);
3551       // if we match the low element of the pair, just return it.
3552       if (ty(Elem0) == TruncTy)
3553         return Elem0;
3554       // otherwise, if the low part is still too large, apply the truncate.
3555       if (ty(Elem0).bitsGT(TruncTy))
3556         return DCI.DAG.getNode(ISD::TRUNCATE, dl, TruncTy, Elem0);
3557     }
3558   } else if (Opc == ISD::OR) {
3559     // fold (or (shl xx, s), (zext y)) -> (COMBINE (shl xx, s-32), y)
3560     // if s >= 32
3561     auto fold0 = [&, this](SDValue Op) {
3562       if (ty(Op) != MVT::i64)
3563         return SDValue();
3564       SDValue Shl = Op.getOperand(0);
3565       SDValue Zxt = Op.getOperand(1);
3566       if (Shl.getOpcode() != ISD::SHL)
3567         std::swap(Shl, Zxt);
3568 
3569       if (Shl.getOpcode() != ISD::SHL || Zxt.getOpcode() != ISD::ZERO_EXTEND)
3570         return SDValue();
3571 
3572       SDValue Z = Zxt.getOperand(0);
3573       auto *Amt = dyn_cast<ConstantSDNode>(Shl.getOperand(1));
3574       if (Amt && Amt->getZExtValue() >= 32 && ty(Z).getSizeInBits() <= 32) {
3575         unsigned A = Amt->getZExtValue();
3576         SDValue S = Shl.getOperand(0);
3577         SDValue T0 = DCI.DAG.getNode(ISD::SHL, dl, ty(S), S,
3578                                      DCI.DAG.getConstant(A - 32, dl, MVT::i32));
3579         SDValue T1 = DCI.DAG.getZExtOrTrunc(T0, dl, MVT::i32);
3580         SDValue T2 = DCI.DAG.getZExtOrTrunc(Z, dl, MVT::i32);
3581         return DCI.DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, {T1, T2});
3582       }
3583       return SDValue();
3584     };
3585 
3586     if (SDValue R = fold0(Op))
3587       return R;
3588   }
3589 
3590   return SDValue();
3591 }
3592 
3593 /// Returns relocation base for the given PIC jumptable.
3594 SDValue
getPICJumpTableRelocBase(SDValue Table,SelectionDAG & DAG) const3595 HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3596                                                 SelectionDAG &DAG) const {
3597   int Idx = cast<JumpTableSDNode>(Table)->getIndex();
3598   EVT VT = Table.getValueType();
3599   SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
3600   return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T);
3601 }
3602 
3603 //===----------------------------------------------------------------------===//
3604 // Inline Assembly Support
3605 //===----------------------------------------------------------------------===//
3606 
3607 TargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const3608 HexagonTargetLowering::getConstraintType(StringRef Constraint) const {
3609   if (Constraint.size() == 1) {
3610     switch (Constraint[0]) {
3611       case 'q':
3612       case 'v':
3613         if (Subtarget.useHVXOps())
3614           return C_RegisterClass;
3615         break;
3616       case 'a':
3617         return C_RegisterClass;
3618       default:
3619         break;
3620     }
3621   }
3622   return TargetLowering::getConstraintType(Constraint);
3623 }
3624 
3625 std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const TargetRegisterInfo * TRI,StringRef Constraint,MVT VT) const3626 HexagonTargetLowering::getRegForInlineAsmConstraint(
3627     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
3628 
3629   if (Constraint.size() == 1) {
3630     switch (Constraint[0]) {
3631     case 'r':   // R0-R31
3632       switch (VT.SimpleTy) {
3633       default:
3634         return {0u, nullptr};
3635       case MVT::i1:
3636       case MVT::i8:
3637       case MVT::i16:
3638       case MVT::i32:
3639       case MVT::f32:
3640         return {0u, &Hexagon::IntRegsRegClass};
3641       case MVT::i64:
3642       case MVT::f64:
3643         return {0u, &Hexagon::DoubleRegsRegClass};
3644       }
3645       break;
3646     case 'a': // M0-M1
3647       if (VT != MVT::i32)
3648         return {0u, nullptr};
3649       return {0u, &Hexagon::ModRegsRegClass};
3650     case 'q': // q0-q3
3651       switch (VT.getSizeInBits()) {
3652       default:
3653         return {0u, nullptr};
3654       case 64:
3655       case 128:
3656         return {0u, &Hexagon::HvxQRRegClass};
3657       }
3658       break;
3659     case 'v': // V0-V31
3660       switch (VT.getSizeInBits()) {
3661       default:
3662         return {0u, nullptr};
3663       case 512:
3664         return {0u, &Hexagon::HvxVRRegClass};
3665       case 1024:
3666         if (Subtarget.hasV60Ops() && Subtarget.useHVX128BOps())
3667           return {0u, &Hexagon::HvxVRRegClass};
3668         return {0u, &Hexagon::HvxWRRegClass};
3669       case 2048:
3670         return {0u, &Hexagon::HvxWRRegClass};
3671       }
3672       break;
3673     default:
3674       return {0u, nullptr};
3675     }
3676   }
3677 
3678   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3679 }
3680 
3681 /// isFPImmLegal - Returns true if the target can instruction select the
3682 /// specified FP immediate natively. If false, the legalizer will
3683 /// materialize the FP immediate as a load from a constant pool.
isFPImmLegal(const APFloat & Imm,EVT VT,bool ForCodeSize) const3684 bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
3685                                          bool ForCodeSize) const {
3686   return true;
3687 }
3688 
3689 /// Returns true if it is beneficial to convert a load of a constant
3690 /// to just the constant itself.
shouldConvertConstantLoadToIntImm(const APInt & Imm,Type * Ty) const3691 bool HexagonTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
3692                                                               Type *Ty) const {
3693   if (!ConstantLoadsToImm)
3694     return false;
3695 
3696   assert(Ty->isIntegerTy());
3697   unsigned BitSize = Ty->getPrimitiveSizeInBits();
3698   return (BitSize > 0 && BitSize <= 64);
3699 }
3700 
3701 /// isLegalAddressingMode - Return true if the addressing mode represented by
3702 /// AM is legal for this target, for a load/store of the specified type.
isLegalAddressingMode(const DataLayout & DL,const AddrMode & AM,Type * Ty,unsigned AS,Instruction * I) const3703 bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
3704                                                   const AddrMode &AM, Type *Ty,
3705                                                   unsigned AS, Instruction *I) const {
3706   if (Ty->isSized()) {
3707     // When LSR detects uses of the same base address to access different
3708     // types (e.g. unions), it will assume a conservative type for these
3709     // uses:
3710     //   LSR Use: Kind=Address of void in addrspace(4294967295), ...
3711     // The type Ty passed here would then be "void". Skip the alignment
3712     // checks, but do not return false right away, since that confuses
3713     // LSR into crashing.
3714     Align A = DL.getABITypeAlign(Ty);
3715     // The base offset must be a multiple of the alignment.
3716     if (!isAligned(A, AM.BaseOffs))
3717       return false;
3718     // The shifted offset must fit in 11 bits.
3719     if (!isInt<11>(AM.BaseOffs >> Log2(A)))
3720       return false;
3721   }
3722 
3723   // No global is ever allowed as a base.
3724   if (AM.BaseGV)
3725     return false;
3726 
3727   int Scale = AM.Scale;
3728   if (Scale < 0)
3729     Scale = -Scale;
3730   switch (Scale) {
3731   case 0:  // No scale reg, "r+i", "r", or just "i".
3732     break;
3733   default: // No scaled addressing mode.
3734     return false;
3735   }
3736   return true;
3737 }
3738 
3739 /// Return true if folding a constant offset with the given GlobalAddress is
3740 /// legal.  It is frequently not legal in PIC relocation models.
isOffsetFoldingLegal(const GlobalAddressSDNode * GA) const3741 bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA)
3742       const {
3743   return HTM.getRelocationModel() == Reloc::Static;
3744 }
3745 
3746 /// isLegalICmpImmediate - Return true if the specified immediate is legal
3747 /// icmp immediate, that is the target has icmp instructions which can compare
3748 /// a register against the immediate without having to materialize the
3749 /// immediate into a register.
isLegalICmpImmediate(int64_t Imm) const3750 bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
3751   return Imm >= -512 && Imm <= 511;
3752 }
3753 
3754 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
3755 /// for tail call optimization. Targets which want to do tail call
3756 /// optimization should implement this function.
IsEligibleForTailCallOptimization(SDValue Callee,CallingConv::ID CalleeCC,bool IsVarArg,bool IsCalleeStructRet,bool IsCallerStructRet,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SmallVectorImpl<ISD::InputArg> & Ins,SelectionDAG & DAG) const3757 bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
3758                                  SDValue Callee,
3759                                  CallingConv::ID CalleeCC,
3760                                  bool IsVarArg,
3761                                  bool IsCalleeStructRet,
3762                                  bool IsCallerStructRet,
3763                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
3764                                  const SmallVectorImpl<SDValue> &OutVals,
3765                                  const SmallVectorImpl<ISD::InputArg> &Ins,
3766                                  SelectionDAG& DAG) const {
3767   const Function &CallerF = DAG.getMachineFunction().getFunction();
3768   CallingConv::ID CallerCC = CallerF.getCallingConv();
3769   bool CCMatch = CallerCC == CalleeCC;
3770 
3771   // ***************************************************************************
3772   //  Look for obvious safe cases to perform tail call optimization that do not
3773   //  require ABI changes.
3774   // ***************************************************************************
3775 
3776   // If this is a tail call via a function pointer, then don't do it!
3777   if (!isa<GlobalAddressSDNode>(Callee) &&
3778       !isa<ExternalSymbolSDNode>(Callee)) {
3779     return false;
3780   }
3781 
3782   // Do not optimize if the calling conventions do not match and the conventions
3783   // used are not C or Fast.
3784   if (!CCMatch) {
3785     bool R = (CallerCC == CallingConv::C || CallerCC == CallingConv::Fast);
3786     bool E = (CalleeCC == CallingConv::C || CalleeCC == CallingConv::Fast);
3787     // If R & E, then ok.
3788     if (!R || !E)
3789       return false;
3790   }
3791 
3792   // Do not tail call optimize vararg calls.
3793   if (IsVarArg)
3794     return false;
3795 
3796   // Also avoid tail call optimization if either caller or callee uses struct
3797   // return semantics.
3798   if (IsCalleeStructRet || IsCallerStructRet)
3799     return false;
3800 
3801   // In addition to the cases above, we also disable Tail Call Optimization if
3802   // the calling convention code that at least one outgoing argument needs to
3803   // go on the stack. We cannot check that here because at this point that
3804   // information is not available.
3805   return true;
3806 }
3807 
3808 /// Returns the target specific optimal type for load and store operations as
3809 /// a result of memset, memcpy, and memmove lowering.
3810 ///
3811 /// If DstAlign is zero that means it's safe to destination alignment can
3812 /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
3813 /// a need to check it against alignment requirement, probably because the
3814 /// source does not need to be loaded. If 'IsMemset' is true, that means it's
3815 /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
3816 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
3817 /// does not need to be loaded.  It returns EVT::Other if the type should be
3818 /// determined using generic target-independent logic.
getOptimalMemOpType(LLVMContext & Context,const MemOp & Op,const AttributeList & FuncAttributes) const3819 EVT HexagonTargetLowering::getOptimalMemOpType(
3820     LLVMContext &Context, const MemOp &Op,
3821     const AttributeList &FuncAttributes) const {
3822   if (Op.size() >= 8 && Op.isAligned(Align(8)))
3823     return MVT::i64;
3824   if (Op.size() >= 4 && Op.isAligned(Align(4)))
3825     return MVT::i32;
3826   if (Op.size() >= 2 && Op.isAligned(Align(2)))
3827     return MVT::i16;
3828   return MVT::Other;
3829 }
3830 
allowsMemoryAccess(LLVMContext & Context,const DataLayout & DL,EVT VT,unsigned AddrSpace,Align Alignment,MachineMemOperand::Flags Flags,unsigned * Fast) const3831 bool HexagonTargetLowering::allowsMemoryAccess(
3832     LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
3833     Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const {
3834   if (!VT.isSimple())
3835     return false;
3836   MVT SVT = VT.getSimpleVT();
3837   if (Subtarget.isHVXVectorType(SVT, true))
3838     return allowsHvxMemoryAccess(SVT, Flags, Fast);
3839   return TargetLoweringBase::allowsMemoryAccess(
3840               Context, DL, VT, AddrSpace, Alignment, Flags, Fast);
3841 }
3842 
allowsMisalignedMemoryAccesses(EVT VT,unsigned AddrSpace,Align Alignment,MachineMemOperand::Flags Flags,unsigned * Fast) const3843 bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
3844     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
3845     unsigned *Fast) const {
3846   if (!VT.isSimple())
3847     return false;
3848   MVT SVT = VT.getSimpleVT();
3849   if (Subtarget.isHVXVectorType(SVT, true))
3850     return allowsHvxMisalignedMemoryAccesses(SVT, Flags, Fast);
3851   if (Fast)
3852     *Fast = 0;
3853   return false;
3854 }
3855 
3856 std::pair<const TargetRegisterClass*, uint8_t>
findRepresentativeClass(const TargetRegisterInfo * TRI,MVT VT) const3857 HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
3858       MVT VT) const {
3859   if (Subtarget.isHVXVectorType(VT, true)) {
3860     unsigned BitWidth = VT.getSizeInBits();
3861     unsigned VecWidth = Subtarget.getVectorLength() * 8;
3862 
3863     if (VT.getVectorElementType() == MVT::i1)
3864       return std::make_pair(&Hexagon::HvxQRRegClass, 1);
3865     if (BitWidth == VecWidth)
3866       return std::make_pair(&Hexagon::HvxVRRegClass, 1);
3867     assert(BitWidth == 2 * VecWidth);
3868     return std::make_pair(&Hexagon::HvxWRRegClass, 1);
3869   }
3870 
3871   return TargetLowering::findRepresentativeClass(TRI, VT);
3872 }
3873 
shouldReduceLoadWidth(SDNode * Load,ISD::LoadExtType ExtTy,EVT NewVT,std::optional<unsigned> ByteOffset) const3874 bool HexagonTargetLowering::shouldReduceLoadWidth(
3875     SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
3876     std::optional<unsigned> ByteOffset) const {
3877   // TODO: This may be worth removing. Check regression tests for diffs.
3878   if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT,
3879                                                  ByteOffset))
3880     return false;
3881 
3882   auto *L = cast<LoadSDNode>(Load);
3883   std::pair<SDValue, int> BO = getBaseAndOffset(L->getBasePtr());
3884   // Small-data object, do not shrink.
3885   if (BO.first.getOpcode() == HexagonISD::CONST32_GP)
3886     return false;
3887   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(BO.first)) {
3888     auto &HTM = static_cast<const HexagonTargetMachine &>(getTargetMachine());
3889     const auto *GO = dyn_cast_or_null<const GlobalObject>(GA->getGlobal());
3890     return !GO || !HTM.getObjFileLowering()->isGlobalInSmallSection(GO, HTM);
3891   }
3892   return true;
3893 }
3894 
AdjustInstrPostInstrSelection(MachineInstr & MI,SDNode * Node) const3895 void HexagonTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
3896       SDNode *Node) const {
3897   AdjustHvxInstrPostInstrSelection(MI, Node);
3898 }
3899 
emitLoadLinked(IRBuilderBase & Builder,Type * ValueTy,Value * Addr,AtomicOrdering Ord) const3900 Value *HexagonTargetLowering::emitLoadLinked(IRBuilderBase &Builder,
3901                                              Type *ValueTy, Value *Addr,
3902                                              AtomicOrdering Ord) const {
3903   unsigned SZ = ValueTy->getPrimitiveSizeInBits();
3904   assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported");
3905   Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
3906                                    : Intrinsic::hexagon_L4_loadd_locked;
3907 
3908   Value *Call =
3909       Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
3910 
3911   return Builder.CreateBitCast(Call, ValueTy);
3912 }
3913 
3914 /// Perform a store-conditional operation to Addr. Return the status of the
3915 /// store. This should be 0 if the store succeeded, non-zero otherwise.
emitStoreConditional(IRBuilderBase & Builder,Value * Val,Value * Addr,AtomicOrdering Ord) const3916 Value *HexagonTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
3917                                                    Value *Val, Value *Addr,
3918                                                    AtomicOrdering Ord) const {
3919   BasicBlock *BB = Builder.GetInsertBlock();
3920   Module *M = BB->getParent()->getParent();
3921   Type *Ty = Val->getType();
3922   unsigned SZ = Ty->getPrimitiveSizeInBits();
3923 
3924   Type *CastTy = Builder.getIntNTy(SZ);
3925   assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported");
3926   Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
3927                                    : Intrinsic::hexagon_S4_stored_locked;
3928 
3929   Val = Builder.CreateBitCast(Val, CastTy);
3930 
3931   Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
3932                                         /*FMFSource=*/nullptr, "stcx");
3933   Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), "");
3934   Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext()));
3935   return Ext;
3936 }
3937 
3938 TargetLowering::AtomicExpansionKind
shouldExpandAtomicLoadInIR(LoadInst * LI) const3939 HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
3940   // Do not expand loads and stores that don't exceed 64 bits.
3941   return LI->getType()->getPrimitiveSizeInBits() > 64
3942              ? AtomicExpansionKind::LLOnly
3943              : AtomicExpansionKind::None;
3944 }
3945 
3946 TargetLowering::AtomicExpansionKind
shouldExpandAtomicStoreInIR(StoreInst * SI) const3947 HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
3948   // Do not expand loads and stores that don't exceed 64 bits.
3949   return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64
3950              ? AtomicExpansionKind::Expand
3951              : AtomicExpansionKind::None;
3952 }
3953 
3954 TargetLowering::AtomicExpansionKind
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst * AI) const3955 HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
3956     AtomicCmpXchgInst *AI) const {
3957   return AtomicExpansionKind::LLSC;
3958 }
3959