1 //===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interfaces that Hexagon uses to lower LLVM code
10 // into a selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "HexagonISelLowering.h"
15 #include "Hexagon.h"
16 #include "HexagonMachineFunctionInfo.h"
17 #include "HexagonRegisterInfo.h"
18 #include "HexagonSubtarget.h"
19 #include "HexagonTargetMachine.h"
20 #include "HexagonTargetObjectFile.h"
21 #include "llvm/ADT/APInt.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/CodeGen/CallingConvLower.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineMemOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/SelectionDAG.h"
31 #include "llvm/CodeGen/TargetCallingConv.h"
32 #include "llvm/CodeGen/ValueTypes.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/CallingConv.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/DiagnosticInfo.h"
38 #include "llvm/IR/DiagnosticPrinter.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/GlobalValue.h"
41 #include "llvm/IR/IRBuilder.h"
42 #include "llvm/IR/InlineAsm.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/Intrinsics.h"
46 #include "llvm/IR/IntrinsicsHexagon.h"
47 #include "llvm/IR/Module.h"
48 #include "llvm/IR/Type.h"
49 #include "llvm/IR/Value.h"
50 #include "llvm/Support/Casting.h"
51 #include "llvm/Support/CodeGen.h"
52 #include "llvm/Support/CommandLine.h"
53 #include "llvm/Support/Debug.h"
54 #include "llvm/Support/ErrorHandling.h"
55 #include "llvm/Support/MathExtras.h"
56 #include "llvm/Support/raw_ostream.h"
57 #include "llvm/Target/TargetMachine.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstdint>
61 #include <limits>
62 #include <utility>
63
64 using namespace llvm;
65
66 #define DEBUG_TYPE "hexagon-lowering"
67
68 static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
69 cl::init(true), cl::Hidden,
70 cl::desc("Control jump table emission on Hexagon target"));
71
72 static cl::opt<bool>
73 EnableHexSDNodeSched("enable-hexagon-sdnode-sched", cl::Hidden,
74 cl::desc("Enable Hexagon SDNode scheduling"));
75
76 static cl::opt<int> MinimumJumpTables("minimum-jump-tables", cl::Hidden,
77 cl::init(5),
78 cl::desc("Set minimum jump tables"));
79
80 static cl::opt<int>
81 MaxStoresPerMemcpyCL("max-store-memcpy", cl::Hidden, cl::init(6),
82 cl::desc("Max #stores to inline memcpy"));
83
84 static cl::opt<int>
85 MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os", cl::Hidden, cl::init(4),
86 cl::desc("Max #stores to inline memcpy"));
87
88 static cl::opt<int>
89 MaxStoresPerMemmoveCL("max-store-memmove", cl::Hidden, cl::init(6),
90 cl::desc("Max #stores to inline memmove"));
91
92 static cl::opt<int>
93 MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os", cl::Hidden,
94 cl::init(4),
95 cl::desc("Max #stores to inline memmove"));
96
97 static cl::opt<int>
98 MaxStoresPerMemsetCL("max-store-memset", cl::Hidden, cl::init(8),
99 cl::desc("Max #stores to inline memset"));
100
101 static cl::opt<int>
102 MaxStoresPerMemsetOptSizeCL("max-store-memset-Os", cl::Hidden, cl::init(4),
103 cl::desc("Max #stores to inline memset"));
104
105 static cl::opt<bool>
106 ConstantLoadsToImm("constant-loads-to-imm", cl::Hidden, cl::init(true),
107 cl::desc("Convert constant loads to immediate values."));
108
109 static cl::opt<bool> AlignLoads("hexagon-align-loads",
110 cl::Hidden, cl::init(false),
111 cl::desc("Rewrite unaligned loads as a pair of aligned loads"));
112
113 static cl::opt<bool>
114 DisableArgsMinAlignment("hexagon-disable-args-min-alignment", cl::Hidden,
115 cl::init(false),
116 cl::desc("Disable minimum alignment of 1 for "
117 "arguments passed by value on stack"));
118
119 namespace {
120
121 class HexagonCCState : public CCState {
122 unsigned NumNamedVarArgParams = 0;
123
124 public:
HexagonCCState(CallingConv::ID CC,bool IsVarArg,MachineFunction & MF,SmallVectorImpl<CCValAssign> & locs,LLVMContext & C,unsigned NumNamedArgs)125 HexagonCCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
126 SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
127 unsigned NumNamedArgs)
128 : CCState(CC, IsVarArg, MF, locs, C),
129 NumNamedVarArgParams(NumNamedArgs) {}
getNumNamedVarArgParams() const130 unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
131 };
132
133 } // end anonymous namespace
134
135
136 // Implement calling convention for Hexagon.
137
CC_SkipOdd(unsigned & ValNo,MVT & ValVT,MVT & LocVT,CCValAssign::LocInfo & LocInfo,ISD::ArgFlagsTy & ArgFlags,CCState & State)138 static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
139 CCValAssign::LocInfo &LocInfo,
140 ISD::ArgFlagsTy &ArgFlags, CCState &State) {
141 static const MCPhysReg ArgRegs[] = {
142 Hexagon::R0, Hexagon::R1, Hexagon::R2,
143 Hexagon::R3, Hexagon::R4, Hexagon::R5
144 };
145 const unsigned NumArgRegs = std::size(ArgRegs);
146 unsigned RegNum = State.getFirstUnallocated(ArgRegs);
147
148 // RegNum is an index into ArgRegs: skip a register if RegNum is odd.
149 if (RegNum != NumArgRegs && RegNum % 2 == 1)
150 State.AllocateReg(ArgRegs[RegNum]);
151
152 // Always return false here, as this function only makes sure that the first
153 // unallocated register has an even register number and does not actually
154 // allocate a register for the current argument.
155 return false;
156 }
157
158 #include "HexagonGenCallingConv.inc"
159
getVectorTypeBreakdownForCallingConv(LLVMContext & Context,CallingConv::ID CC,EVT VT,EVT & IntermediateVT,unsigned & NumIntermediates,MVT & RegisterVT) const160 unsigned HexagonTargetLowering::getVectorTypeBreakdownForCallingConv(
161 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
162 unsigned &NumIntermediates, MVT &RegisterVT) const {
163
164 bool isBoolVector = VT.getVectorElementType() == MVT::i1;
165 bool isPowerOf2 = VT.isPow2VectorType();
166 unsigned NumElts = VT.getVectorNumElements();
167
168 // Split vectors of type vXi1 into (X/8) vectors of type v8i1,
169 // where X is divisible by 8.
170 if (isBoolVector && !Subtarget.useHVXOps() && isPowerOf2 && NumElts >= 8) {
171 RegisterVT = MVT::v8i8;
172 IntermediateVT = MVT::v8i1;
173 NumIntermediates = NumElts / 8;
174 return NumIntermediates;
175 }
176
177 // In HVX 64-byte mode, vectors of type vXi1 are split into (X / 64) vectors
178 // of type v64i1, provided that X is divisible by 64.
179 if (isBoolVector && Subtarget.useHVX64BOps() && isPowerOf2 && NumElts >= 64) {
180 RegisterVT = MVT::v64i8;
181 IntermediateVT = MVT::v64i1;
182 NumIntermediates = NumElts / 64;
183 return NumIntermediates;
184 }
185
186 // In HVX 128-byte mode, vectors of type vXi1 are split into (X / 128) vectors
187 // of type v128i1, provided that X is divisible by 128.
188 if (isBoolVector && Subtarget.useHVX128BOps() && isPowerOf2 &&
189 NumElts >= 128) {
190 RegisterVT = MVT::v128i8;
191 IntermediateVT = MVT::v128i1;
192 NumIntermediates = NumElts / 128;
193 return NumIntermediates;
194 }
195
196 return TargetLowering::getVectorTypeBreakdownForCallingConv(
197 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
198 }
199
200 std::pair<MVT, unsigned>
handleMaskRegisterForCallingConv(const HexagonSubtarget & Subtarget,EVT VT) const201 HexagonTargetLowering::handleMaskRegisterForCallingConv(
202 const HexagonSubtarget &Subtarget, EVT VT) const {
203 assert(VT.getVectorElementType() == MVT::i1);
204
205 const unsigned NumElems = VT.getVectorNumElements();
206
207 if (!VT.isPow2VectorType())
208 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
209
210 if (!Subtarget.useHVXOps() && NumElems >= 8)
211 return {MVT::v8i8, NumElems / 8};
212
213 if (Subtarget.useHVX64BOps() && NumElems >= 64)
214 return {MVT::v64i8, NumElems / 64};
215
216 if (Subtarget.useHVX128BOps() && NumElems >= 128)
217 return {MVT::v128i8, NumElems / 128};
218
219 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
220 }
221
getRegisterTypeForCallingConv(LLVMContext & Context,CallingConv::ID CC,EVT VT) const222 MVT HexagonTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
223 CallingConv::ID CC,
224 EVT VT) const {
225
226 if (VT.isVector() && VT.getVectorElementType() == MVT::i1) {
227 auto [RegisterVT, NumRegisters] =
228 handleMaskRegisterForCallingConv(Subtarget, VT);
229 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
230 return RegisterVT;
231 }
232
233 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
234 }
235
236 SDValue
LowerINTRINSIC_WO_CHAIN(SDValue Op,SelectionDAG & DAG) const237 HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
238 const {
239 return SDValue();
240 }
241
242 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
243 /// by "Src" to address "Dst" of size "Size". Alignment information is
244 /// specified by the specific parameter attribute. The copy will be passed as
245 /// a byval function parameter. Sometimes what we are copying is the end of a
246 /// larger object, the part that does not fit in registers.
CreateCopyOfByValArgument(SDValue Src,SDValue Dst,SDValue Chain,ISD::ArgFlagsTy Flags,SelectionDAG & DAG,const SDLoc & dl)247 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
248 SDValue Chain, ISD::ArgFlagsTy Flags,
249 SelectionDAG &DAG, const SDLoc &dl) {
250 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
251 return DAG.getMemcpy(
252 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
253 /*isVolatile=*/false, /*AlwaysInline=*/false,
254 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
255 }
256
257 bool
CanLowerReturn(CallingConv::ID CallConv,MachineFunction & MF,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,LLVMContext & Context,const Type * RetTy) const258 HexagonTargetLowering::CanLowerReturn(
259 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
260 const SmallVectorImpl<ISD::OutputArg> &Outs,
261 LLVMContext &Context, const Type *RetTy) const {
262 SmallVector<CCValAssign, 16> RVLocs;
263 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
264
265 if (MF.getSubtarget<HexagonSubtarget>().useHVXOps())
266 return CCInfo.CheckReturn(Outs, RetCC_Hexagon_HVX);
267 return CCInfo.CheckReturn(Outs, RetCC_Hexagon);
268 }
269
270 // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
271 // passed by value, the function prototype is modified to return void and
272 // the value is stored in memory pointed by a pointer passed by caller.
273 SDValue
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & dl,SelectionDAG & DAG) const274 HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
275 bool IsVarArg,
276 const SmallVectorImpl<ISD::OutputArg> &Outs,
277 const SmallVectorImpl<SDValue> &OutVals,
278 const SDLoc &dl, SelectionDAG &DAG) const {
279 // CCValAssign - represent the assignment of the return value to locations.
280 SmallVector<CCValAssign, 16> RVLocs;
281
282 // CCState - Info about the registers and stack slot.
283 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
284 *DAG.getContext());
285
286 // Analyze return values of ISD::RET
287 if (Subtarget.useHVXOps())
288 CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon_HVX);
289 else
290 CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
291
292 SDValue Glue;
293 SmallVector<SDValue, 4> RetOps(1, Chain);
294
295 // Copy the result values into the output registers.
296 for (unsigned i = 0; i != RVLocs.size(); ++i) {
297 CCValAssign &VA = RVLocs[i];
298 SDValue Val = OutVals[i];
299
300 switch (VA.getLocInfo()) {
301 default:
302 // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
303 llvm_unreachable("Unknown loc info!");
304 case CCValAssign::Full:
305 break;
306 case CCValAssign::BCvt:
307 Val = DAG.getBitcast(VA.getLocVT(), Val);
308 break;
309 case CCValAssign::SExt:
310 Val = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Val);
311 break;
312 case CCValAssign::ZExt:
313 Val = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Val);
314 break;
315 case CCValAssign::AExt:
316 Val = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Val);
317 break;
318 }
319
320 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Val, Glue);
321
322 // Guarantee that all emitted copies are stuck together with flags.
323 Glue = Chain.getValue(1);
324 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
325 }
326
327 RetOps[0] = Chain; // Update chain.
328
329 // Add the glue if we have it.
330 if (Glue.getNode())
331 RetOps.push_back(Glue);
332
333 return DAG.getNode(HexagonISD::RET_GLUE, dl, MVT::Other, RetOps);
334 }
335
mayBeEmittedAsTailCall(const CallInst * CI) const336 bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
337 // If either no tail call or told not to tail call at all, don't.
338 return CI->isTailCall();
339 }
340
getRegisterByName(const char * RegName,LLT VT,const MachineFunction &) const341 Register HexagonTargetLowering::getRegisterByName(
342 const char* RegName, LLT VT, const MachineFunction &) const {
343 // Just support r19, the linux kernel uses it.
344 Register Reg = StringSwitch<Register>(RegName)
345 .Case("r0", Hexagon::R0)
346 .Case("r1", Hexagon::R1)
347 .Case("r2", Hexagon::R2)
348 .Case("r3", Hexagon::R3)
349 .Case("r4", Hexagon::R4)
350 .Case("r5", Hexagon::R5)
351 .Case("r6", Hexagon::R6)
352 .Case("r7", Hexagon::R7)
353 .Case("r8", Hexagon::R8)
354 .Case("r9", Hexagon::R9)
355 .Case("r10", Hexagon::R10)
356 .Case("r11", Hexagon::R11)
357 .Case("r12", Hexagon::R12)
358 .Case("r13", Hexagon::R13)
359 .Case("r14", Hexagon::R14)
360 .Case("r15", Hexagon::R15)
361 .Case("r16", Hexagon::R16)
362 .Case("r17", Hexagon::R17)
363 .Case("r18", Hexagon::R18)
364 .Case("r19", Hexagon::R19)
365 .Case("r20", Hexagon::R20)
366 .Case("r21", Hexagon::R21)
367 .Case("r22", Hexagon::R22)
368 .Case("r23", Hexagon::R23)
369 .Case("r24", Hexagon::R24)
370 .Case("r25", Hexagon::R25)
371 .Case("r26", Hexagon::R26)
372 .Case("r27", Hexagon::R27)
373 .Case("r28", Hexagon::R28)
374 .Case("r29", Hexagon::R29)
375 .Case("r30", Hexagon::R30)
376 .Case("r31", Hexagon::R31)
377 .Case("r1:0", Hexagon::D0)
378 .Case("r3:2", Hexagon::D1)
379 .Case("r5:4", Hexagon::D2)
380 .Case("r7:6", Hexagon::D3)
381 .Case("r9:8", Hexagon::D4)
382 .Case("r11:10", Hexagon::D5)
383 .Case("r13:12", Hexagon::D6)
384 .Case("r15:14", Hexagon::D7)
385 .Case("r17:16", Hexagon::D8)
386 .Case("r19:18", Hexagon::D9)
387 .Case("r21:20", Hexagon::D10)
388 .Case("r23:22", Hexagon::D11)
389 .Case("r25:24", Hexagon::D12)
390 .Case("r27:26", Hexagon::D13)
391 .Case("r29:28", Hexagon::D14)
392 .Case("r31:30", Hexagon::D15)
393 .Case("sp", Hexagon::R29)
394 .Case("fp", Hexagon::R30)
395 .Case("lr", Hexagon::R31)
396 .Case("p0", Hexagon::P0)
397 .Case("p1", Hexagon::P1)
398 .Case("p2", Hexagon::P2)
399 .Case("p3", Hexagon::P3)
400 .Case("sa0", Hexagon::SA0)
401 .Case("lc0", Hexagon::LC0)
402 .Case("sa1", Hexagon::SA1)
403 .Case("lc1", Hexagon::LC1)
404 .Case("m0", Hexagon::M0)
405 .Case("m1", Hexagon::M1)
406 .Case("usr", Hexagon::USR)
407 .Case("ugp", Hexagon::UGP)
408 .Case("cs0", Hexagon::CS0)
409 .Case("cs1", Hexagon::CS1)
410 .Default(Register());
411 return Reg;
412 }
413
414 /// LowerCallResult - Lower the result values of an ISD::CALL into the
415 /// appropriate copies out of appropriate physical registers. This assumes that
416 /// Chain/Glue are the input chain/glue to use, and that TheCall is the call
417 /// being lowered. Returns a SDNode with the same number of values as the
418 /// ISD::CALL.
LowerCallResult(SDValue Chain,SDValue Glue,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & dl,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals,const SmallVectorImpl<SDValue> & OutVals,SDValue Callee) const419 SDValue HexagonTargetLowering::LowerCallResult(
420 SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool IsVarArg,
421 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
422 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
423 const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const {
424 // Assign locations to each value returned by this call.
425 SmallVector<CCValAssign, 16> RVLocs;
426
427 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
428 *DAG.getContext());
429
430 if (Subtarget.useHVXOps())
431 CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon_HVX);
432 else
433 CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
434
435 // Copy all of the result registers out of their specified physreg.
436 for (unsigned i = 0; i != RVLocs.size(); ++i) {
437 SDValue RetVal;
438 if (RVLocs[i].getValVT() == MVT::i1) {
439 // Return values of type MVT::i1 require special handling. The reason
440 // is that MVT::i1 is associated with the PredRegs register class, but
441 // values of that type are still returned in R0. Generate an explicit
442 // copy into a predicate register from R0, and treat the value of the
443 // predicate register as the call result.
444 auto &MRI = DAG.getMachineFunction().getRegInfo();
445 SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
446 MVT::i32, Glue);
447 // FR0 = (Value, Chain, Glue)
448 Register PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
449 SDValue TPR = DAG.getCopyToReg(FR0.getValue(1), dl, PredR,
450 FR0.getValue(0), FR0.getValue(2));
451 // TPR = (Chain, Glue)
452 // Don't glue this CopyFromReg, because it copies from a virtual
453 // register. If it is glued to the call, InstrEmitter will add it
454 // as an implicit def to the call (EmitMachineNode).
455 RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1);
456 Glue = TPR.getValue(1);
457 Chain = TPR.getValue(0);
458 } else {
459 RetVal = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
460 RVLocs[i].getValVT(), Glue);
461 Glue = RetVal.getValue(2);
462 Chain = RetVal.getValue(1);
463 }
464 InVals.push_back(RetVal.getValue(0));
465 }
466
467 return Chain;
468 }
469
470 /// LowerCall - Functions arguments are copied from virtual regs to
471 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
472 SDValue
LowerCall(TargetLowering::CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const473 HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
474 SmallVectorImpl<SDValue> &InVals) const {
475 SelectionDAG &DAG = CLI.DAG;
476 SDLoc &dl = CLI.DL;
477 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
478 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
479 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
480 SDValue Chain = CLI.Chain;
481 SDValue Callee = CLI.Callee;
482 CallingConv::ID CallConv = CLI.CallConv;
483 bool IsVarArg = CLI.IsVarArg;
484 bool DoesNotReturn = CLI.DoesNotReturn;
485
486 bool IsStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
487 MachineFunction &MF = DAG.getMachineFunction();
488 MachineFrameInfo &MFI = MF.getFrameInfo();
489 auto PtrVT = getPointerTy(MF.getDataLayout());
490
491 unsigned NumParams = CLI.CB ? CLI.CB->getFunctionType()->getNumParams() : 0;
492 if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee))
493 Callee = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, MVT::i32);
494
495 // Linux ABI treats var-arg calls the same way as regular ones.
496 bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
497
498 // Analyze operands of the call, assigning locations to each operand.
499 SmallVector<CCValAssign, 16> ArgLocs;
500 HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs, *DAG.getContext(),
501 NumParams);
502
503 if (Subtarget.useHVXOps())
504 CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_HVX);
505 else if (DisableArgsMinAlignment)
506 CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_Legacy);
507 else
508 CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
509
510 if (CLI.IsTailCall) {
511 bool StructAttrFlag = MF.getFunction().hasStructRetAttr();
512 CLI.IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
513 IsVarArg, IsStructRet, StructAttrFlag, Outs,
514 OutVals, Ins, DAG);
515 for (const CCValAssign &VA : ArgLocs) {
516 if (VA.isMemLoc()) {
517 CLI.IsTailCall = false;
518 break;
519 }
520 }
521 LLVM_DEBUG(dbgs() << (CLI.IsTailCall ? "Eligible for Tail Call\n"
522 : "Argument must be passed on stack. "
523 "Not eligible for Tail Call\n"));
524 }
525 // Get a count of how many bytes are to be pushed on the stack.
526 unsigned NumBytes = CCInfo.getStackSize();
527 SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
528 SmallVector<SDValue, 8> MemOpChains;
529
530 const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
531 SDValue StackPtr =
532 DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
533
534 bool NeedsArgAlign = false;
535 Align LargestAlignSeen;
536 // Walk the register/memloc assignments, inserting copies/loads.
537 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
538 CCValAssign &VA = ArgLocs[i];
539 SDValue Arg = OutVals[i];
540 ISD::ArgFlagsTy Flags = Outs[i].Flags;
541 // Record if we need > 8 byte alignment on an argument.
542 bool ArgAlign = Subtarget.isHVXVectorType(VA.getValVT());
543 NeedsArgAlign |= ArgAlign;
544
545 // Promote the value if needed.
546 switch (VA.getLocInfo()) {
547 default:
548 // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
549 llvm_unreachable("Unknown loc info!");
550 case CCValAssign::Full:
551 break;
552 case CCValAssign::BCvt:
553 Arg = DAG.getBitcast(VA.getLocVT(), Arg);
554 break;
555 case CCValAssign::SExt:
556 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
557 break;
558 case CCValAssign::ZExt:
559 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
560 break;
561 case CCValAssign::AExt:
562 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
563 break;
564 }
565
566 if (VA.isMemLoc()) {
567 unsigned LocMemOffset = VA.getLocMemOffset();
568 SDValue MemAddr = DAG.getConstant(LocMemOffset, dl,
569 StackPtr.getValueType());
570 MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
571 if (ArgAlign)
572 LargestAlignSeen = std::max(
573 LargestAlignSeen, Align(VA.getLocVT().getStoreSizeInBits() / 8));
574 if (Flags.isByVal()) {
575 // The argument is a struct passed by value. According to LLVM, "Arg"
576 // is a pointer.
577 MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
578 Flags, DAG, dl));
579 } else {
580 MachinePointerInfo LocPI = MachinePointerInfo::getStack(
581 DAG.getMachineFunction(), LocMemOffset);
582 SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI);
583 MemOpChains.push_back(S);
584 }
585 continue;
586 }
587
588 // Arguments that can be passed on register must be kept at RegsToPass
589 // vector.
590 if (VA.isRegLoc())
591 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
592 }
593
594 if (NeedsArgAlign && Subtarget.hasV60Ops()) {
595 LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
596 Align VecAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
597 LargestAlignSeen = std::max(LargestAlignSeen, VecAlign);
598 MFI.ensureMaxAlignment(LargestAlignSeen);
599 }
600 // Transform all store nodes into one single node because all store
601 // nodes are independent of each other.
602 if (!MemOpChains.empty())
603 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
604
605 SDValue Glue;
606 if (!CLI.IsTailCall) {
607 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
608 Glue = Chain.getValue(1);
609 }
610
611 // Build a sequence of copy-to-reg nodes chained together with token
612 // chain and flag operands which copy the outgoing args into registers.
613 // The Glue is necessary since all emitted instructions must be
614 // stuck together.
615 if (!CLI.IsTailCall) {
616 for (const auto &R : RegsToPass) {
617 Chain = DAG.getCopyToReg(Chain, dl, R.first, R.second, Glue);
618 Glue = Chain.getValue(1);
619 }
620 } else {
621 // For tail calls lower the arguments to the 'real' stack slot.
622 //
623 // Force all the incoming stack arguments to be loaded from the stack
624 // before any new outgoing arguments are stored to the stack, because the
625 // outgoing stack slots may alias the incoming argument stack slots, and
626 // the alias isn't otherwise explicit. This is slightly more conservative
627 // than necessary, because it means that each store effectively depends
628 // on every argument instead of just those arguments it would clobber.
629 //
630 // Do not flag preceding copytoreg stuff together with the following stuff.
631 Glue = SDValue();
632 for (const auto &R : RegsToPass) {
633 Chain = DAG.getCopyToReg(Chain, dl, R.first, R.second, Glue);
634 Glue = Chain.getValue(1);
635 }
636 Glue = SDValue();
637 }
638
639 bool LongCalls = MF.getSubtarget<HexagonSubtarget>().useLongCalls();
640 unsigned Flags = LongCalls ? HexagonII::HMOTF_ConstExtended : 0;
641
642 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
643 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
644 // node so that legalize doesn't hack it.
645 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
646 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT, 0, Flags);
647 } else if (ExternalSymbolSDNode *S =
648 dyn_cast<ExternalSymbolSDNode>(Callee)) {
649 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, Flags);
650 }
651
652 // Returns a chain & a flag for retval copy to use.
653 SmallVector<SDValue, 8> Ops;
654 Ops.push_back(Chain);
655 Ops.push_back(Callee);
656
657 // Add argument registers to the end of the list so that they are
658 // known live into the call.
659 for (const auto &R : RegsToPass)
660 Ops.push_back(DAG.getRegister(R.first, R.second.getValueType()));
661
662 const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallConv);
663 assert(Mask && "Missing call preserved mask for calling convention");
664 Ops.push_back(DAG.getRegisterMask(Mask));
665
666 if (Glue.getNode())
667 Ops.push_back(Glue);
668
669 if (CLI.IsTailCall) {
670 MFI.setHasTailCall();
671 return DAG.getNode(HexagonISD::TC_RETURN, dl, MVT::Other, Ops);
672 }
673
674 // Set this here because we need to know this for "hasFP" in frame lowering.
675 // The target-independent code calls getFrameRegister before setting it, and
676 // getFrameRegister uses hasFP to determine whether the function has FP.
677 MFI.setHasCalls(true);
678
679 unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL;
680 Chain = DAG.getNode(OpCode, dl, {MVT::Other, MVT::Glue}, Ops);
681 Glue = Chain.getValue(1);
682
683 // Create the CALLSEQ_END node.
684 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, dl);
685 Glue = Chain.getValue(1);
686
687 // Handle result values, copying them out of physregs into vregs that we
688 // return.
689 return LowerCallResult(Chain, Glue, CallConv, IsVarArg, Ins, dl, DAG,
690 InVals, OutVals, Callee);
691 }
692
693 /// Returns true by value, base pointer and offset pointer and addressing
694 /// mode by reference if this node can be combined with a load / store to
695 /// form a post-indexed load / store.
getPostIndexedAddressParts(SDNode * N,SDNode * Op,SDValue & Base,SDValue & Offset,ISD::MemIndexedMode & AM,SelectionDAG & DAG) const696 bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
697 SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM,
698 SelectionDAG &DAG) const {
699 LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N);
700 if (!LSN)
701 return false;
702 EVT VT = LSN->getMemoryVT();
703 if (!VT.isSimple())
704 return false;
705 bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
706 VT == MVT::i64 || VT == MVT::f32 || VT == MVT::f64 ||
707 VT == MVT::v2i16 || VT == MVT::v2i32 || VT == MVT::v4i8 ||
708 VT == MVT::v4i16 || VT == MVT::v8i8 ||
709 Subtarget.isHVXVectorType(VT.getSimpleVT());
710 if (!IsLegalType)
711 return false;
712
713 if (Op->getOpcode() != ISD::ADD)
714 return false;
715 Base = Op->getOperand(0);
716 Offset = Op->getOperand(1);
717 if (!isa<ConstantSDNode>(Offset.getNode()))
718 return false;
719 AM = ISD::POST_INC;
720
721 int32_t V = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
722 return Subtarget.getInstrInfo()->isValidAutoIncImm(VT, V);
723 }
724
LowerFDIV(SDValue Op,SelectionDAG & DAG) const725 SDValue HexagonTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
726 if (DAG.getMachineFunction().getFunction().hasOptSize())
727 return SDValue();
728 else
729 return Op;
730 }
731
732 SDValue
LowerINLINEASM(SDValue Op,SelectionDAG & DAG) const733 HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
734 MachineFunction &MF = DAG.getMachineFunction();
735 auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
736 const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
737 unsigned LR = HRI.getRARegister();
738
739 if ((Op.getOpcode() != ISD::INLINEASM &&
740 Op.getOpcode() != ISD::INLINEASM_BR) || HMFI.hasClobberLR())
741 return Op;
742
743 unsigned NumOps = Op.getNumOperands();
744 if (Op.getOperand(NumOps-1).getValueType() == MVT::Glue)
745 --NumOps; // Ignore the flag operand.
746
747 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
748 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
749 unsigned NumVals = Flags.getNumOperandRegisters();
750 ++i; // Skip the ID value.
751
752 switch (Flags.getKind()) {
753 default:
754 llvm_unreachable("Bad flags!");
755 case InlineAsm::Kind::RegUse:
756 case InlineAsm::Kind::Imm:
757 case InlineAsm::Kind::Mem:
758 i += NumVals;
759 break;
760 case InlineAsm::Kind::Clobber:
761 case InlineAsm::Kind::RegDef:
762 case InlineAsm::Kind::RegDefEarlyClobber: {
763 for (; NumVals; --NumVals, ++i) {
764 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
765 if (Reg != LR)
766 continue;
767 HMFI.setHasClobberLR(true);
768 return Op;
769 }
770 break;
771 }
772 }
773 }
774
775 return Op;
776 }
777
778 // Need to transform ISD::PREFETCH into something that doesn't inherit
779 // all of the properties of ISD::PREFETCH, specifically SDNPMayLoad and
780 // SDNPMayStore.
LowerPREFETCH(SDValue Op,SelectionDAG & DAG) const781 SDValue HexagonTargetLowering::LowerPREFETCH(SDValue Op,
782 SelectionDAG &DAG) const {
783 SDValue Chain = Op.getOperand(0);
784 SDValue Addr = Op.getOperand(1);
785 // Lower it to DCFETCH($reg, #0). A "pat" will try to merge the offset in,
786 // if the "reg" is fed by an "add".
787 SDLoc DL(Op);
788 SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
789 return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
790 }
791
792 // Custom-handle ISD::READCYCLECOUNTER because the target-independent SDNode
793 // is marked as having side-effects, while the register read on Hexagon does
794 // not have any. TableGen refuses to accept the direct pattern from that node
795 // to the A4_tfrcpp.
LowerREADCYCLECOUNTER(SDValue Op,SelectionDAG & DAG) const796 SDValue HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
797 SelectionDAG &DAG) const {
798 SDValue Chain = Op.getOperand(0);
799 SDLoc dl(Op);
800 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
801 return DAG.getNode(HexagonISD::READCYCLE, dl, VTs, Chain);
802 }
803
804 // Custom-handle ISD::READSTEADYCOUNTER because the target-independent SDNode
805 // is marked as having side-effects, while the register read on Hexagon does
806 // not have any. TableGen refuses to accept the direct pattern from that node
807 // to the A4_tfrcpp.
LowerREADSTEADYCOUNTER(SDValue Op,SelectionDAG & DAG) const808 SDValue HexagonTargetLowering::LowerREADSTEADYCOUNTER(SDValue Op,
809 SelectionDAG &DAG) const {
810 SDValue Chain = Op.getOperand(0);
811 SDLoc dl(Op);
812 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
813 return DAG.getNode(HexagonISD::READTIMER, dl, VTs, Chain);
814 }
815
LowerINTRINSIC_VOID(SDValue Op,SelectionDAG & DAG) const816 SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
817 SelectionDAG &DAG) const {
818 SDValue Chain = Op.getOperand(0);
819 unsigned IntNo = Op.getConstantOperandVal(1);
820 // Lower the hexagon_prefetch builtin to DCFETCH, as above.
821 if (IntNo == Intrinsic::hexagon_prefetch) {
822 SDValue Addr = Op.getOperand(2);
823 SDLoc DL(Op);
824 SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
825 return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
826 }
827 return SDValue();
828 }
829
830 SDValue
LowerDYNAMIC_STACKALLOC(SDValue Op,SelectionDAG & DAG) const831 HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
832 SelectionDAG &DAG) const {
833 SDValue Chain = Op.getOperand(0);
834 SDValue Size = Op.getOperand(1);
835 SDValue Align = Op.getOperand(2);
836 SDLoc dl(Op);
837
838 ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Align);
839 assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC");
840
841 unsigned A = AlignConst->getSExtValue();
842 auto &HFI = *Subtarget.getFrameLowering();
843 // "Zero" means natural stack alignment.
844 if (A == 0)
845 A = HFI.getStackAlign().value();
846
847 LLVM_DEBUG({
848 dbgs () << __func__ << " Align: " << A << " Size: ";
849 Size.getNode()->dump(&DAG);
850 dbgs() << "\n";
851 });
852
853 SDValue AC = DAG.getConstant(A, dl, MVT::i32);
854 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
855 SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
856
857 DAG.ReplaceAllUsesOfValueWith(Op, AA);
858 return AA;
859 }
860
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & dl,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const861 SDValue HexagonTargetLowering::LowerFormalArguments(
862 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
863 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
864 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
865 MachineFunction &MF = DAG.getMachineFunction();
866 MachineFrameInfo &MFI = MF.getFrameInfo();
867 MachineRegisterInfo &MRI = MF.getRegInfo();
868
869 // Linux ABI treats var-arg calls the same way as regular ones.
870 bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
871
872 // Assign locations to all of the incoming arguments.
873 SmallVector<CCValAssign, 16> ArgLocs;
874 HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs,
875 *DAG.getContext(),
876 MF.getFunction().getFunctionType()->getNumParams());
877
878 if (Subtarget.useHVXOps())
879 CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_HVX);
880 else if (DisableArgsMinAlignment)
881 CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_Legacy);
882 else
883 CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
884
885 // For LLVM, in the case when returning a struct by value (>8byte),
886 // the first argument is a pointer that points to the location on caller's
887 // stack where the return value will be stored. For Hexagon, the location on
888 // caller's stack is passed only when the struct size is smaller than (and
889 // equal to) 8 bytes. If not, no address will be passed into callee and
890 // callee return the result directly through R0/R1.
891 auto NextSingleReg = [] (const TargetRegisterClass &RC, unsigned Reg) {
892 switch (RC.getID()) {
893 case Hexagon::IntRegsRegClassID:
894 return Reg - Hexagon::R0 + 1;
895 case Hexagon::DoubleRegsRegClassID:
896 return (Reg - Hexagon::D0 + 1) * 2;
897 case Hexagon::HvxVRRegClassID:
898 return Reg - Hexagon::V0 + 1;
899 case Hexagon::HvxWRRegClassID:
900 return (Reg - Hexagon::W0 + 1) * 2;
901 }
902 llvm_unreachable("Unexpected register class");
903 };
904
905 auto &HFL = const_cast<HexagonFrameLowering&>(*Subtarget.getFrameLowering());
906 auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
907 HFL.FirstVarArgSavedReg = 0;
908 HMFI.setFirstNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
909
910 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
911 CCValAssign &VA = ArgLocs[i];
912 ISD::ArgFlagsTy Flags = Ins[i].Flags;
913 bool ByVal = Flags.isByVal();
914
915 // Arguments passed in registers:
916 // 1. 32- and 64-bit values and HVX vectors are passed directly,
917 // 2. Large structs are passed via an address, and the address is
918 // passed in a register.
919 if (VA.isRegLoc() && ByVal && Flags.getByValSize() <= 8)
920 llvm_unreachable("ByValSize must be bigger than 8 bytes");
921
922 bool InReg = VA.isRegLoc() &&
923 (!ByVal || (ByVal && Flags.getByValSize() > 8));
924
925 if (InReg) {
926 MVT RegVT = VA.getLocVT();
927 if (VA.getLocInfo() == CCValAssign::BCvt)
928 RegVT = VA.getValVT();
929
930 const TargetRegisterClass *RC = getRegClassFor(RegVT);
931 Register VReg = MRI.createVirtualRegister(RC);
932 SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
933
934 // Treat values of type MVT::i1 specially: they are passed in
935 // registers of type i32, but they need to remain as values of
936 // type i1 for consistency of the argument lowering.
937 if (VA.getValVT() == MVT::i1) {
938 assert(RegVT.getSizeInBits() <= 32);
939 SDValue T = DAG.getNode(ISD::AND, dl, RegVT,
940 Copy, DAG.getConstant(1, dl, RegVT));
941 Copy = DAG.getSetCC(dl, MVT::i1, T, DAG.getConstant(0, dl, RegVT),
942 ISD::SETNE);
943 } else {
944 #ifndef NDEBUG
945 unsigned RegSize = RegVT.getSizeInBits();
946 assert(RegSize == 32 || RegSize == 64 ||
947 Subtarget.isHVXVectorType(RegVT));
948 #endif
949 }
950 InVals.push_back(Copy);
951 MRI.addLiveIn(VA.getLocReg(), VReg);
952 HFL.FirstVarArgSavedReg = NextSingleReg(*RC, VA.getLocReg());
953 } else {
954 assert(VA.isMemLoc() && "Argument should be passed in memory");
955
956 // If it's a byval parameter, then we need to compute the
957 // "real" size, not the size of the pointer.
958 unsigned ObjSize = Flags.isByVal()
959 ? Flags.getByValSize()
960 : VA.getLocVT().getStoreSizeInBits() / 8;
961
962 // Create the frame index object for this incoming parameter.
963 int Offset = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
964 int FI = MFI.CreateFixedObject(ObjSize, Offset, true);
965 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
966
967 if (Flags.isByVal()) {
968 // If it's a pass-by-value aggregate, then do not dereference the stack
969 // location. Instead, we should generate a reference to the stack
970 // location.
971 InVals.push_back(FIN);
972 } else {
973 SDValue L = DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
974 MachinePointerInfo::getFixedStack(MF, FI, 0));
975 InVals.push_back(L);
976 }
977 }
978 }
979
980 if (IsVarArg && Subtarget.isEnvironmentMusl()) {
981 for (int i = HFL.FirstVarArgSavedReg; i < 6; i++)
982 MRI.addLiveIn(Hexagon::R0+i);
983 }
984
985 if (IsVarArg && Subtarget.isEnvironmentMusl()) {
986 HMFI.setFirstNamedArgFrameIndex(HMFI.getFirstNamedArgFrameIndex() - 1);
987 HMFI.setLastNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
988
989 // Create Frame index for the start of register saved area.
990 int NumVarArgRegs = 6 - HFL.FirstVarArgSavedReg;
991 bool RequiresPadding = (NumVarArgRegs & 1);
992 int RegSaveAreaSizePlusPadding = RequiresPadding
993 ? (NumVarArgRegs + 1) * 4
994 : NumVarArgRegs * 4;
995
996 if (RegSaveAreaSizePlusPadding > 0) {
997 // The offset to saved register area should be 8 byte aligned.
998 int RegAreaStart = HEXAGON_LRFP_SIZE + CCInfo.getStackSize();
999 if (!(RegAreaStart % 8))
1000 RegAreaStart = (RegAreaStart + 7) & -8;
1001
1002 int RegSaveAreaFrameIndex =
1003 MFI.CreateFixedObject(RegSaveAreaSizePlusPadding, RegAreaStart, true);
1004 HMFI.setRegSavedAreaStartFrameIndex(RegSaveAreaFrameIndex);
1005
1006 // This will point to the next argument passed via stack.
1007 int Offset = RegAreaStart + RegSaveAreaSizePlusPadding;
1008 int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
1009 HMFI.setVarArgsFrameIndex(FI);
1010 } else {
1011 // This will point to the next argument passed via stack, when
1012 // there is no saved register area.
1013 int Offset = HEXAGON_LRFP_SIZE + CCInfo.getStackSize();
1014 int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
1015 HMFI.setRegSavedAreaStartFrameIndex(FI);
1016 HMFI.setVarArgsFrameIndex(FI);
1017 }
1018 }
1019
1020
1021 if (IsVarArg && !Subtarget.isEnvironmentMusl()) {
1022 // This will point to the next argument passed via stack.
1023 int Offset = HEXAGON_LRFP_SIZE + CCInfo.getStackSize();
1024 int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
1025 HMFI.setVarArgsFrameIndex(FI);
1026 }
1027
1028 return Chain;
1029 }
1030
1031 SDValue
LowerVASTART(SDValue Op,SelectionDAG & DAG) const1032 HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1033 // VASTART stores the address of the VarArgsFrameIndex slot into the
1034 // memory location argument.
1035 MachineFunction &MF = DAG.getMachineFunction();
1036 HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
1037 SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
1038 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1039
1040 if (!Subtarget.isEnvironmentMusl()) {
1041 return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1),
1042 MachinePointerInfo(SV));
1043 }
1044 auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
1045 auto &HFL = *Subtarget.getFrameLowering();
1046 SDLoc DL(Op);
1047 SmallVector<SDValue, 8> MemOps;
1048
1049 // Get frame index of va_list.
1050 SDValue FIN = Op.getOperand(1);
1051
1052 // If first Vararg register is odd, add 4 bytes to start of
1053 // saved register area to point to the first register location.
1054 // This is because the saved register area has to be 8 byte aligned.
1055 // In case of an odd start register, there will be 4 bytes of padding in
1056 // the beginning of saved register area. If all registers area used up,
1057 // the following condition will handle it correctly.
1058 SDValue SavedRegAreaStartFrameIndex =
1059 DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(), MVT::i32);
1060
1061 auto PtrVT = getPointerTy(DAG.getDataLayout());
1062
1063 if (HFL.FirstVarArgSavedReg & 1)
1064 SavedRegAreaStartFrameIndex =
1065 DAG.getNode(ISD::ADD, DL, PtrVT,
1066 DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(),
1067 MVT::i32),
1068 DAG.getIntPtrConstant(4, DL));
1069
1070 // Store the saved register area start pointer.
1071 SDValue Store =
1072 DAG.getStore(Op.getOperand(0), DL,
1073 SavedRegAreaStartFrameIndex,
1074 FIN, MachinePointerInfo(SV));
1075 MemOps.push_back(Store);
1076
1077 // Store saved register area end pointer.
1078 FIN = DAG.getNode(ISD::ADD, DL, PtrVT,
1079 FIN, DAG.getIntPtrConstant(4, DL));
1080 Store = DAG.getStore(Op.getOperand(0), DL,
1081 DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(),
1082 PtrVT),
1083 FIN, MachinePointerInfo(SV, 4));
1084 MemOps.push_back(Store);
1085
1086 // Store overflow area pointer.
1087 FIN = DAG.getNode(ISD::ADD, DL, PtrVT,
1088 FIN, DAG.getIntPtrConstant(4, DL));
1089 Store = DAG.getStore(Op.getOperand(0), DL,
1090 DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(),
1091 PtrVT),
1092 FIN, MachinePointerInfo(SV, 8));
1093 MemOps.push_back(Store);
1094
1095 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1096 }
1097
1098 SDValue
LowerVACOPY(SDValue Op,SelectionDAG & DAG) const1099 HexagonTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
1100 // Assert that the linux ABI is enabled for the current compilation.
1101 assert(Subtarget.isEnvironmentMusl() && "Linux ABI should be enabled");
1102 SDValue Chain = Op.getOperand(0);
1103 SDValue DestPtr = Op.getOperand(1);
1104 SDValue SrcPtr = Op.getOperand(2);
1105 const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
1106 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
1107 SDLoc DL(Op);
1108 // Size of the va_list is 12 bytes as it has 3 pointers. Therefore,
1109 // we need to memcopy 12 bytes from va_list to another similar list.
1110 return DAG.getMemcpy(
1111 Chain, DL, DestPtr, SrcPtr, DAG.getIntPtrConstant(12, DL), Align(4),
1112 /*isVolatile*/ false, false, /*CI=*/nullptr, std::nullopt,
1113 MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
1114 }
1115
LowerSETCC(SDValue Op,SelectionDAG & DAG) const1116 SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
1117 const SDLoc &dl(Op);
1118 SDValue LHS = Op.getOperand(0);
1119 SDValue RHS = Op.getOperand(1);
1120 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1121 MVT ResTy = ty(Op);
1122 MVT OpTy = ty(LHS);
1123
1124 if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
1125 MVT ElemTy = OpTy.getVectorElementType();
1126 assert(ElemTy.isScalarInteger());
1127 MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
1128 OpTy.getVectorNumElements());
1129 return DAG.getSetCC(dl, ResTy,
1130 DAG.getSExtOrTrunc(LHS, SDLoc(LHS), WideTy),
1131 DAG.getSExtOrTrunc(RHS, SDLoc(RHS), WideTy), CC);
1132 }
1133
1134 // Treat all other vector types as legal.
1135 if (ResTy.isVector())
1136 return Op;
1137
1138 // Comparisons of short integers should use sign-extend, not zero-extend,
1139 // since we can represent small negative values in the compare instructions.
1140 // The LLVM default is to use zero-extend arbitrarily in these cases.
1141 auto isSExtFree = [this](SDValue N) {
1142 switch (N.getOpcode()) {
1143 case ISD::TRUNCATE: {
1144 // A sign-extend of a truncate of a sign-extend is free.
1145 SDValue Op = N.getOperand(0);
1146 if (Op.getOpcode() != ISD::AssertSext)
1147 return false;
1148 EVT OrigTy = cast<VTSDNode>(Op.getOperand(1))->getVT();
1149 unsigned ThisBW = ty(N).getSizeInBits();
1150 unsigned OrigBW = OrigTy.getSizeInBits();
1151 // The type that was sign-extended to get the AssertSext must be
1152 // narrower than the type of N (so that N has still the same value
1153 // as the original).
1154 return ThisBW >= OrigBW;
1155 }
1156 case ISD::LOAD:
1157 // We have sign-extended loads.
1158 return true;
1159 }
1160 return false;
1161 };
1162
1163 if (OpTy == MVT::i8 || OpTy == MVT::i16) {
1164 ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
1165 bool IsNegative = C && C->getAPIntValue().isNegative();
1166 if (IsNegative || isSExtFree(LHS) || isSExtFree(RHS))
1167 return DAG.getSetCC(dl, ResTy,
1168 DAG.getSExtOrTrunc(LHS, SDLoc(LHS), MVT::i32),
1169 DAG.getSExtOrTrunc(RHS, SDLoc(RHS), MVT::i32), CC);
1170 }
1171
1172 return SDValue();
1173 }
1174
1175 SDValue
LowerVSELECT(SDValue Op,SelectionDAG & DAG) const1176 HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
1177 SDValue PredOp = Op.getOperand(0);
1178 SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
1179 MVT OpTy = ty(Op1);
1180 const SDLoc &dl(Op);
1181
1182 if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
1183 MVT ElemTy = OpTy.getVectorElementType();
1184 assert(ElemTy.isScalarInteger());
1185 MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
1186 OpTy.getVectorNumElements());
1187 // Generate (trunc (select (_, sext, sext))).
1188 return DAG.getSExtOrTrunc(
1189 DAG.getSelect(dl, WideTy, PredOp,
1190 DAG.getSExtOrTrunc(Op1, dl, WideTy),
1191 DAG.getSExtOrTrunc(Op2, dl, WideTy)),
1192 dl, OpTy);
1193 }
1194
1195 return SDValue();
1196 }
1197
1198 SDValue
LowerConstantPool(SDValue Op,SelectionDAG & DAG) const1199 HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
1200 EVT ValTy = Op.getValueType();
1201 ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
1202 Constant *CVal = nullptr;
1203 bool isVTi1Type = false;
1204 if (auto *CV = dyn_cast<ConstantVector>(CPN->getConstVal())) {
1205 if (cast<VectorType>(CV->getType())->getElementType()->isIntegerTy(1)) {
1206 IRBuilder<> IRB(CV->getContext());
1207 SmallVector<Constant*, 128> NewConst;
1208 unsigned VecLen = CV->getNumOperands();
1209 assert(isPowerOf2_32(VecLen) &&
1210 "conversion only supported for pow2 VectorSize");
1211 for (unsigned i = 0; i < VecLen; ++i)
1212 NewConst.push_back(IRB.getInt8(CV->getOperand(i)->isZeroValue()));
1213
1214 CVal = ConstantVector::get(NewConst);
1215 isVTi1Type = true;
1216 }
1217 }
1218 Align Alignment = CPN->getAlign();
1219 bool IsPositionIndependent = isPositionIndependent();
1220 unsigned char TF = IsPositionIndependent ? HexagonII::MO_PCREL : 0;
1221
1222 unsigned Offset = 0;
1223 SDValue T;
1224 if (CPN->isMachineConstantPoolEntry())
1225 T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Alignment,
1226 Offset, TF);
1227 else if (isVTi1Type)
1228 T = DAG.getTargetConstantPool(CVal, ValTy, Alignment, Offset, TF);
1229 else
1230 T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Alignment, Offset,
1231 TF);
1232
1233 assert(cast<ConstantPoolSDNode>(T)->getTargetFlags() == TF &&
1234 "Inconsistent target flag encountered");
1235
1236 if (IsPositionIndependent)
1237 return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T);
1238 return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T);
1239 }
1240
1241 SDValue
LowerJumpTable(SDValue Op,SelectionDAG & DAG) const1242 HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1243 EVT VT = Op.getValueType();
1244 int Idx = cast<JumpTableSDNode>(Op)->getIndex();
1245 if (isPositionIndependent()) {
1246 SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
1247 return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T);
1248 }
1249
1250 SDValue T = DAG.getTargetJumpTable(Idx, VT);
1251 return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T);
1252 }
1253
1254 SDValue
LowerRETURNADDR(SDValue Op,SelectionDAG & DAG) const1255 HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
1256 const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
1257 MachineFunction &MF = DAG.getMachineFunction();
1258 MachineFrameInfo &MFI = MF.getFrameInfo();
1259 MFI.setReturnAddressIsTaken(true);
1260
1261 EVT VT = Op.getValueType();
1262 SDLoc dl(Op);
1263 unsigned Depth = Op.getConstantOperandVal(0);
1264 if (Depth) {
1265 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
1266 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
1267 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
1268 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
1269 MachinePointerInfo());
1270 }
1271
1272 // Return LR, which contains the return address. Mark it an implicit live-in.
1273 Register Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32));
1274 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
1275 }
1276
1277 SDValue
LowerFRAMEADDR(SDValue Op,SelectionDAG & DAG) const1278 HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
1279 const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
1280 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1281 MFI.setFrameAddressIsTaken(true);
1282
1283 EVT VT = Op.getValueType();
1284 SDLoc dl(Op);
1285 unsigned Depth = Op.getConstantOperandVal(0);
1286 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
1287 HRI.getFrameRegister(), VT);
1288 while (Depth--)
1289 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
1290 MachinePointerInfo());
1291 return FrameAddr;
1292 }
1293
1294 SDValue
LowerATOMIC_FENCE(SDValue Op,SelectionDAG & DAG) const1295 HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const {
1296 SDLoc dl(Op);
1297 return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
1298 }
1299
1300 SDValue
LowerGLOBALADDRESS(SDValue Op,SelectionDAG & DAG) const1301 HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
1302 SDLoc dl(Op);
1303 auto *GAN = cast<GlobalAddressSDNode>(Op);
1304 auto PtrVT = getPointerTy(DAG.getDataLayout());
1305 auto *GV = GAN->getGlobal();
1306 int64_t Offset = GAN->getOffset();
1307
1308 auto &HLOF = *HTM.getObjFileLowering();
1309 Reloc::Model RM = HTM.getRelocationModel();
1310
1311 if (RM == Reloc::Static) {
1312 SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
1313 const GlobalObject *GO = GV->getAliaseeObject();
1314 if (GO && Subtarget.useSmallData() && HLOF.isGlobalInSmallSection(GO, HTM))
1315 return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
1316 return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
1317 }
1318
1319 bool UsePCRel = getTargetMachine().shouldAssumeDSOLocal(GV);
1320 if (UsePCRel) {
1321 SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset,
1322 HexagonII::MO_PCREL);
1323 return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA);
1324 }
1325
1326 // Use GOT index.
1327 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
1328 SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT);
1329 SDValue Off = DAG.getConstant(Offset, dl, MVT::i32);
1330 return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off);
1331 }
1332
1333 // Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
1334 SDValue
LowerBlockAddress(SDValue Op,SelectionDAG & DAG) const1335 HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
1336 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1337 SDLoc dl(Op);
1338 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1339
1340 Reloc::Model RM = HTM.getRelocationModel();
1341 if (RM == Reloc::Static) {
1342 SDValue A = DAG.getTargetBlockAddress(BA, PtrVT);
1343 return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A);
1344 }
1345
1346 SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL);
1347 return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A);
1348 }
1349
1350 SDValue
LowerGLOBAL_OFFSET_TABLE(SDValue Op,SelectionDAG & DAG) const1351 HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
1352 const {
1353 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1354 SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, PtrVT,
1355 HexagonII::MO_PCREL);
1356 return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym);
1357 }
1358
1359 SDValue
GetDynamicTLSAddr(SelectionDAG & DAG,SDValue Chain,GlobalAddressSDNode * GA,SDValue Glue,EVT PtrVT,unsigned ReturnReg,unsigned char OperandFlags) const1360 HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
1361 GlobalAddressSDNode *GA, SDValue Glue, EVT PtrVT, unsigned ReturnReg,
1362 unsigned char OperandFlags) const {
1363 MachineFunction &MF = DAG.getMachineFunction();
1364 MachineFrameInfo &MFI = MF.getFrameInfo();
1365 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1366 SDLoc dl(GA);
1367 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
1368 GA->getValueType(0),
1369 GA->getOffset(),
1370 OperandFlags);
1371 // Create Operands for the call.The Operands should have the following:
1372 // 1. Chain SDValue
1373 // 2. Callee which in this case is the Global address value.
1374 // 3. Registers live into the call.In this case its R0, as we
1375 // have just one argument to be passed.
1376 // 4. Glue.
1377 // Note: The order is important.
1378
1379 const auto &HRI = *Subtarget.getRegisterInfo();
1380 const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallingConv::C);
1381 assert(Mask && "Missing call preserved mask for calling convention");
1382 SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT),
1383 DAG.getRegisterMask(Mask), Glue };
1384 Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
1385
1386 // Inform MFI that function has calls.
1387 MFI.setAdjustsStack(true);
1388
1389 Glue = Chain.getValue(1);
1390 return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
1391 }
1392
1393 //
1394 // Lower using the initial executable model for TLS addresses
1395 //
1396 SDValue
LowerToTLSInitialExecModel(GlobalAddressSDNode * GA,SelectionDAG & DAG) const1397 HexagonTargetLowering::LowerToTLSInitialExecModel(GlobalAddressSDNode *GA,
1398 SelectionDAG &DAG) const {
1399 SDLoc dl(GA);
1400 int64_t Offset = GA->getOffset();
1401 auto PtrVT = getPointerTy(DAG.getDataLayout());
1402
1403 // Get the thread pointer.
1404 SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
1405
1406 bool IsPositionIndependent = isPositionIndependent();
1407 unsigned char TF =
1408 IsPositionIndependent ? HexagonII::MO_IEGOT : HexagonII::MO_IE;
1409
1410 // First generate the TLS symbol address
1411 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT,
1412 Offset, TF);
1413
1414 SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1415
1416 if (IsPositionIndependent) {
1417 // Generate the GOT pointer in case of position independent code
1418 SDValue GOT = LowerGLOBAL_OFFSET_TABLE(Sym, DAG);
1419
1420 // Add the TLS Symbol address to GOT pointer.This gives
1421 // GOT relative relocation for the symbol.
1422 Sym = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
1423 }
1424
1425 // Load the offset value for TLS symbol.This offset is relative to
1426 // thread pointer.
1427 SDValue LoadOffset =
1428 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Sym, MachinePointerInfo());
1429
1430 // Address of the thread local variable is the add of thread
1431 // pointer and the offset of the variable.
1432 return DAG.getNode(ISD::ADD, dl, PtrVT, TP, LoadOffset);
1433 }
1434
1435 //
1436 // Lower using the local executable model for TLS addresses
1437 //
1438 SDValue
LowerToTLSLocalExecModel(GlobalAddressSDNode * GA,SelectionDAG & DAG) const1439 HexagonTargetLowering::LowerToTLSLocalExecModel(GlobalAddressSDNode *GA,
1440 SelectionDAG &DAG) const {
1441 SDLoc dl(GA);
1442 int64_t Offset = GA->getOffset();
1443 auto PtrVT = getPointerTy(DAG.getDataLayout());
1444
1445 // Get the thread pointer.
1446 SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
1447 // Generate the TLS symbol address
1448 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
1449 HexagonII::MO_TPREL);
1450 SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1451
1452 // Address of the thread local variable is the add of thread
1453 // pointer and the offset of the variable.
1454 return DAG.getNode(ISD::ADD, dl, PtrVT, TP, Sym);
1455 }
1456
1457 //
1458 // Lower using the general dynamic model for TLS addresses
1459 //
1460 SDValue
LowerToTLSGeneralDynamicModel(GlobalAddressSDNode * GA,SelectionDAG & DAG) const1461 HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1462 SelectionDAG &DAG) const {
1463 SDLoc dl(GA);
1464 int64_t Offset = GA->getOffset();
1465 auto PtrVT = getPointerTy(DAG.getDataLayout());
1466
1467 // First generate the TLS symbol address
1468 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
1469 HexagonII::MO_GDGOT);
1470
1471 // Then, generate the GOT pointer
1472 SDValue GOT = LowerGLOBAL_OFFSET_TABLE(TGA, DAG);
1473
1474 // Add the TLS symbol and the GOT pointer
1475 SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1476 SDValue Chain = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
1477
1478 // Copy over the argument to R0
1479 SDValue InGlue;
1480 Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InGlue);
1481 InGlue = Chain.getValue(1);
1482
1483 unsigned Flags = DAG.getSubtarget<HexagonSubtarget>().useLongCalls()
1484 ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended
1485 : HexagonII::MO_GDPLT;
1486
1487 return GetDynamicTLSAddr(DAG, Chain, GA, InGlue, PtrVT,
1488 Hexagon::R0, Flags);
1489 }
1490
1491 //
1492 // Lower TLS addresses.
1493 //
1494 // For now for dynamic models, we only support the general dynamic model.
1495 //
1496 SDValue
LowerGlobalTLSAddress(SDValue Op,SelectionDAG & DAG) const1497 HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1498 SelectionDAG &DAG) const {
1499 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1500
1501 switch (HTM.getTLSModel(GA->getGlobal())) {
1502 case TLSModel::GeneralDynamic:
1503 case TLSModel::LocalDynamic:
1504 return LowerToTLSGeneralDynamicModel(GA, DAG);
1505 case TLSModel::InitialExec:
1506 return LowerToTLSInitialExecModel(GA, DAG);
1507 case TLSModel::LocalExec:
1508 return LowerToTLSLocalExecModel(GA, DAG);
1509 }
1510 llvm_unreachable("Bogus TLS model");
1511 }
1512
1513 //===----------------------------------------------------------------------===//
1514 // TargetLowering Implementation
1515 //===----------------------------------------------------------------------===//
1516
HexagonTargetLowering(const TargetMachine & TM,const HexagonSubtarget & ST)1517 HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
1518 const HexagonSubtarget &ST)
1519 : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
1520 Subtarget(ST) {
1521 auto &HRI = *Subtarget.getRegisterInfo();
1522
1523 setPrefLoopAlignment(Align(16));
1524 setMinFunctionAlignment(Align(4));
1525 setPrefFunctionAlignment(Align(16));
1526 setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
1527 setBooleanContents(TargetLoweringBase::UndefinedBooleanContent);
1528 setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent);
1529
1530 setMaxAtomicSizeInBitsSupported(64);
1531 setMinCmpXchgSizeInBits(32);
1532
1533 if (EnableHexSDNodeSched)
1534 setSchedulingPreference(Sched::VLIW);
1535 else
1536 setSchedulingPreference(Sched::Source);
1537
1538 // Limits for inline expansion of memcpy/memmove
1539 MaxStoresPerMemcpy = MaxStoresPerMemcpyCL;
1540 MaxStoresPerMemcpyOptSize = MaxStoresPerMemcpyOptSizeCL;
1541 MaxStoresPerMemmove = MaxStoresPerMemmoveCL;
1542 MaxStoresPerMemmoveOptSize = MaxStoresPerMemmoveOptSizeCL;
1543 MaxStoresPerMemset = MaxStoresPerMemsetCL;
1544 MaxStoresPerMemsetOptSize = MaxStoresPerMemsetOptSizeCL;
1545
1546 //
1547 // Set up register classes.
1548 //
1549
1550 addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass);
1551 addRegisterClass(MVT::v2i1, &Hexagon::PredRegsRegClass); // bbbbaaaa
1552 addRegisterClass(MVT::v4i1, &Hexagon::PredRegsRegClass); // ddccbbaa
1553 addRegisterClass(MVT::v8i1, &Hexagon::PredRegsRegClass); // hgfedcba
1554 addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass);
1555 addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
1556 addRegisterClass(MVT::v4i8, &Hexagon::IntRegsRegClass);
1557 addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
1558 addRegisterClass(MVT::v8i8, &Hexagon::DoubleRegsRegClass);
1559 addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
1560 addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
1561
1562 addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
1563 addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
1564
1565 //
1566 // Handling of scalar operations.
1567 //
1568 // All operations default to "legal", except:
1569 // - indexed loads and stores (pre-/post-incremented),
1570 // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
1571 // ConstantFP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
1572 // FLOG, FLOG2, FLOG10, FMAXIMUMNUM, FMINIMUMNUM, FNEARBYINT, FRINT, FROUND,
1573 // TRAP, FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG,
1574 // ZERO_EXTEND_VECTOR_INREG,
1575 // which default to "expand" for at least one type.
1576
1577 // Misc operations.
1578 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
1579 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
1580 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1581 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
1582 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
1583 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
1584 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
1585 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
1586 setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
1587 setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
1588 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1589 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
1590 setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Custom);
1591 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1592 setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
1593 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
1594 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
1595 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
1596
1597 // Custom legalize GlobalAddress nodes into CONST32.
1598 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
1599 setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
1600 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
1601
1602 // Hexagon needs to optimize cases with negative constants.
1603 setOperationAction(ISD::SETCC, MVT::i8, Custom);
1604 setOperationAction(ISD::SETCC, MVT::i16, Custom);
1605 setOperationAction(ISD::SETCC, MVT::v4i8, Custom);
1606 setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
1607
1608 // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
1609 setOperationAction(ISD::VASTART, MVT::Other, Custom);
1610 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1611 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1612 if (Subtarget.isEnvironmentMusl())
1613 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
1614 else
1615 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
1616
1617 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
1618 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
1619 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
1620
1621 if (EmitJumpTables)
1622 setMinimumJumpTableEntries(MinimumJumpTables);
1623 else
1624 setMinimumJumpTableEntries(std::numeric_limits<unsigned>::max());
1625 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
1626
1627 for (unsigned LegalIntOp :
1628 {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) {
1629 setOperationAction(LegalIntOp, MVT::i32, Legal);
1630 setOperationAction(LegalIntOp, MVT::i64, Legal);
1631 }
1632
1633 // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
1634 // but they only operate on i64.
1635 for (MVT VT : MVT::integer_valuetypes()) {
1636 setOperationAction(ISD::UADDO, VT, Custom);
1637 setOperationAction(ISD::USUBO, VT, Custom);
1638 setOperationAction(ISD::SADDO, VT, Expand);
1639 setOperationAction(ISD::SSUBO, VT, Expand);
1640 setOperationAction(ISD::UADDO_CARRY, VT, Expand);
1641 setOperationAction(ISD::USUBO_CARRY, VT, Expand);
1642 }
1643 setOperationAction(ISD::UADDO_CARRY, MVT::i64, Custom);
1644 setOperationAction(ISD::USUBO_CARRY, MVT::i64, Custom);
1645
1646 setOperationAction(ISD::CTLZ, MVT::i8, Promote);
1647 setOperationAction(ISD::CTLZ, MVT::i16, Promote);
1648 setOperationAction(ISD::CTTZ, MVT::i8, Promote);
1649 setOperationAction(ISD::CTTZ, MVT::i16, Promote);
1650
1651 // Popcount can count # of 1s in i64 but returns i32.
1652 setOperationAction(ISD::CTPOP, MVT::i8, Promote);
1653 setOperationAction(ISD::CTPOP, MVT::i16, Promote);
1654 setOperationAction(ISD::CTPOP, MVT::i32, Promote);
1655 setOperationAction(ISD::CTPOP, MVT::i64, Legal);
1656
1657 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
1658 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
1659 setOperationAction(ISD::BSWAP, MVT::i32, Legal);
1660 setOperationAction(ISD::BSWAP, MVT::i64, Legal);
1661
1662 setOperationAction(ISD::FSHL, MVT::i32, Legal);
1663 setOperationAction(ISD::FSHL, MVT::i64, Legal);
1664 setOperationAction(ISD::FSHR, MVT::i32, Legal);
1665 setOperationAction(ISD::FSHR, MVT::i64, Legal);
1666
1667 for (unsigned IntExpOp :
1668 {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
1669 ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR,
1670 ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
1671 ISD::SMUL_LOHI, ISD::UMUL_LOHI}) {
1672 for (MVT VT : MVT::integer_valuetypes())
1673 setOperationAction(IntExpOp, VT, Expand);
1674 }
1675
1676 for (unsigned FPExpOp :
1677 {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
1678 ISD::FPOW, ISD::FCOPYSIGN}) {
1679 for (MVT VT : MVT::fp_valuetypes())
1680 setOperationAction(FPExpOp, VT, Expand);
1681 }
1682
1683 // No extending loads from i32.
1684 for (MVT VT : MVT::integer_valuetypes()) {
1685 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
1686 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
1687 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand);
1688 }
1689 // Turn FP truncstore into trunc + store.
1690 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1691 // Turn FP extload into load/fpextend.
1692 for (MVT VT : MVT::fp_valuetypes())
1693 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1694
1695 // Expand BR_CC and SELECT_CC for all integer and fp types.
1696 for (MVT VT : MVT::integer_valuetypes()) {
1697 setOperationAction(ISD::BR_CC, VT, Expand);
1698 setOperationAction(ISD::SELECT_CC, VT, Expand);
1699 }
1700 for (MVT VT : MVT::fp_valuetypes()) {
1701 setOperationAction(ISD::BR_CC, VT, Expand);
1702 setOperationAction(ISD::SELECT_CC, VT, Expand);
1703 }
1704 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
1705
1706 //
1707 // Handling of vector operations.
1708 //
1709
1710 // Set the action for vector operations to "expand", then override it with
1711 // either "custom" or "legal" for specific cases.
1712 // clang-format off
1713 static const unsigned VectExpOps[] = {
1714 // Integer arithmetic:
1715 ISD::ADD, ISD::SUB, ISD::MUL, ISD::SDIV, ISD::UDIV,
1716 ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::SADDO,
1717 ISD::UADDO, ISD::SSUBO, ISD::USUBO, ISD::SMUL_LOHI, ISD::UMUL_LOHI,
1718 // Logical/bit:
1719 ISD::AND, ISD::OR, ISD::XOR, ISD::ROTL, ISD::ROTR,
1720 ISD::CTPOP, ISD::CTLZ, ISD::CTTZ, ISD::BSWAP, ISD::BITREVERSE,
1721 // Floating point arithmetic/math functions:
1722 ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FMA, ISD::FDIV,
1723 ISD::FREM, ISD::FNEG, ISD::FABS, ISD::FSQRT, ISD::FSIN,
1724 ISD::FCOS, ISD::FPOW, ISD::FLOG, ISD::FLOG2,
1725 ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FCEIL, ISD::FTRUNC,
1726 ISD::FRINT, ISD::FNEARBYINT, ISD::FROUND, ISD::FFLOOR,
1727 ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM,
1728 ISD::FSINCOS, ISD::FLDEXP,
1729 // Misc:
1730 ISD::BR_CC, ISD::SELECT_CC, ISD::ConstantPool,
1731 // Vector:
1732 ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR,
1733 ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT,
1734 ISD::EXTRACT_SUBVECTOR, ISD::INSERT_SUBVECTOR,
1735 ISD::CONCAT_VECTORS, ISD::VECTOR_SHUFFLE,
1736 ISD::SPLAT_VECTOR,
1737 };
1738 // clang-format on
1739
1740 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1741 for (unsigned VectExpOp : VectExpOps)
1742 setOperationAction(VectExpOp, VT, Expand);
1743
1744 // Expand all extending loads and truncating stores:
1745 for (MVT TargetVT : MVT::fixedlen_vector_valuetypes()) {
1746 if (TargetVT == VT)
1747 continue;
1748 setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand);
1749 setLoadExtAction(ISD::ZEXTLOAD, TargetVT, VT, Expand);
1750 setLoadExtAction(ISD::SEXTLOAD, TargetVT, VT, Expand);
1751 setTruncStoreAction(VT, TargetVT, Expand);
1752 }
1753
1754 // Normalize all inputs to SELECT to be vectors of i32.
1755 if (VT.getVectorElementType() != MVT::i32) {
1756 MVT VT32 = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
1757 setOperationAction(ISD::SELECT, VT, Promote);
1758 AddPromotedToType(ISD::SELECT, VT, VT32);
1759 }
1760 setOperationAction(ISD::SRA, VT, Custom);
1761 setOperationAction(ISD::SHL, VT, Custom);
1762 setOperationAction(ISD::SRL, VT, Custom);
1763 }
1764
1765 setOperationAction(ISD::SADDSAT, MVT::i32, Legal);
1766 setOperationAction(ISD::SADDSAT, MVT::i64, Legal);
1767
1768 // Extending loads from (native) vectors of i8 into (native) vectors of i16
1769 // are legal.
1770 setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
1771 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
1772 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
1773 setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
1774 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
1775 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
1776
1777 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);
1778 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1779 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1780
1781 // Types natively supported:
1782 for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8,
1783 MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
1784 setOperationAction(ISD::BUILD_VECTOR, NativeVT, Custom);
1785 setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
1786 setOperationAction(ISD::INSERT_VECTOR_ELT, NativeVT, Custom);
1787 setOperationAction(ISD::EXTRACT_SUBVECTOR, NativeVT, Custom);
1788 setOperationAction(ISD::INSERT_SUBVECTOR, NativeVT, Custom);
1789 setOperationAction(ISD::CONCAT_VECTORS, NativeVT, Custom);
1790
1791 setOperationAction(ISD::ADD, NativeVT, Legal);
1792 setOperationAction(ISD::SUB, NativeVT, Legal);
1793 setOperationAction(ISD::MUL, NativeVT, Legal);
1794 setOperationAction(ISD::AND, NativeVT, Legal);
1795 setOperationAction(ISD::OR, NativeVT, Legal);
1796 setOperationAction(ISD::XOR, NativeVT, Legal);
1797
1798 if (NativeVT.getVectorElementType() != MVT::i1) {
1799 setOperationAction(ISD::SPLAT_VECTOR, NativeVT, Legal);
1800 setOperationAction(ISD::BSWAP, NativeVT, Legal);
1801 setOperationAction(ISD::BITREVERSE, NativeVT, Legal);
1802 }
1803 }
1804
1805 for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32}) {
1806 setOperationAction(ISD::SMIN, VT, Legal);
1807 setOperationAction(ISD::SMAX, VT, Legal);
1808 setOperationAction(ISD::UMIN, VT, Legal);
1809 setOperationAction(ISD::UMAX, VT, Legal);
1810 }
1811
1812 // Custom lower unaligned loads.
1813 // Also, for both loads and stores, verify the alignment of the address
1814 // in case it is a compile-time constant. This is a usability feature to
1815 // provide a meaningful error message to users.
1816 for (MVT VT : {MVT::i16, MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
1817 MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
1818 setOperationAction(ISD::LOAD, VT, Custom);
1819 setOperationAction(ISD::STORE, VT, Custom);
1820 }
1821
1822 // Custom-lower load/stores of boolean vectors.
1823 for (MVT VT : {MVT::v2i1, MVT::v4i1, MVT::v8i1}) {
1824 setOperationAction(ISD::LOAD, VT, Custom);
1825 setOperationAction(ISD::STORE, VT, Custom);
1826 }
1827
1828 // Normalize integer compares to EQ/GT/UGT
1829 for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v8i8, MVT::v2i32, MVT::v4i16,
1830 MVT::v2i32}) {
1831 setCondCodeAction(ISD::SETNE, VT, Expand);
1832 setCondCodeAction(ISD::SETLE, VT, Expand);
1833 setCondCodeAction(ISD::SETGE, VT, Expand);
1834 setCondCodeAction(ISD::SETLT, VT, Expand);
1835 setCondCodeAction(ISD::SETULE, VT, Expand);
1836 setCondCodeAction(ISD::SETUGE, VT, Expand);
1837 setCondCodeAction(ISD::SETULT, VT, Expand);
1838 }
1839
1840 // Normalize boolean compares to [U]LE/[U]LT
1841 for (MVT VT : {MVT::i1, MVT::v2i1, MVT::v4i1, MVT::v8i1}) {
1842 setCondCodeAction(ISD::SETGE, VT, Expand);
1843 setCondCodeAction(ISD::SETGT, VT, Expand);
1844 setCondCodeAction(ISD::SETUGE, VT, Expand);
1845 setCondCodeAction(ISD::SETUGT, VT, Expand);
1846 }
1847
1848 // Custom-lower bitcasts from i8 to v8i1.
1849 setOperationAction(ISD::BITCAST, MVT::i8, Custom);
1850 setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
1851 setOperationAction(ISD::VSELECT, MVT::v4i8, Custom);
1852 setOperationAction(ISD::VSELECT, MVT::v2i16, Custom);
1853 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom);
1854 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
1855 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
1856
1857 // V5+.
1858 setOperationAction(ISD::FMA, MVT::f64, Expand);
1859 setOperationAction(ISD::FADD, MVT::f64, Expand);
1860 setOperationAction(ISD::FSUB, MVT::f64, Expand);
1861 setOperationAction(ISD::FMUL, MVT::f64, Expand);
1862 setOperationAction(ISD::FDIV, MVT::f32, Custom);
1863
1864 setOperationAction(ISD::FMINIMUMNUM, MVT::f32, Legal);
1865 setOperationAction(ISD::FMAXIMUMNUM, MVT::f32, Legal);
1866
1867 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
1868 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
1869 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
1870 setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
1871 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
1872 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
1873 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
1874 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
1875 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
1876 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
1877 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
1878 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
1879
1880 // Special handling for half-precision floating point conversions.
1881 // Lower half float conversions into library calls.
1882 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
1883 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
1884 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
1885 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
1886
1887 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
1888 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
1889 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1890 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1891
1892 // Handling of indexed loads/stores: default is "expand".
1893 //
1894 for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::f32, MVT::f64,
1895 MVT::v2i16, MVT::v2i32, MVT::v4i8, MVT::v4i16, MVT::v8i8}) {
1896 setIndexedLoadAction(ISD::POST_INC, VT, Legal);
1897 setIndexedStoreAction(ISD::POST_INC, VT, Legal);
1898 }
1899
1900 // Subtarget-specific operation actions.
1901 //
1902 if (Subtarget.hasV60Ops()) {
1903 setOperationAction(ISD::ROTL, MVT::i32, Legal);
1904 setOperationAction(ISD::ROTL, MVT::i64, Legal);
1905 setOperationAction(ISD::ROTR, MVT::i32, Legal);
1906 setOperationAction(ISD::ROTR, MVT::i64, Legal);
1907 }
1908 if (Subtarget.hasV66Ops()) {
1909 setOperationAction(ISD::FADD, MVT::f64, Legal);
1910 setOperationAction(ISD::FSUB, MVT::f64, Legal);
1911 }
1912 if (Subtarget.hasV67Ops()) {
1913 setOperationAction(ISD::FMINIMUMNUM, MVT::f64, Legal);
1914 setOperationAction(ISD::FMAXIMUMNUM, MVT::f64, Legal);
1915 setOperationAction(ISD::FMUL, MVT::f64, Legal);
1916 }
1917
1918 setTargetDAGCombine(ISD::OR);
1919 setTargetDAGCombine(ISD::TRUNCATE);
1920 setTargetDAGCombine(ISD::VSELECT);
1921
1922 if (Subtarget.useHVXOps())
1923 initializeHVXLowering();
1924
1925 computeRegisterProperties(&HRI);
1926 }
1927
getTargetNodeName(unsigned Opcode) const1928 const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
1929 switch ((HexagonISD::NodeType)Opcode) {
1930 case HexagonISD::ADDC: return "HexagonISD::ADDC";
1931 case HexagonISD::SUBC: return "HexagonISD::SUBC";
1932 case HexagonISD::ALLOCA: return "HexagonISD::ALLOCA";
1933 case HexagonISD::AT_GOT: return "HexagonISD::AT_GOT";
1934 case HexagonISD::AT_PCREL: return "HexagonISD::AT_PCREL";
1935 case HexagonISD::BARRIER: return "HexagonISD::BARRIER";
1936 case HexagonISD::CALL: return "HexagonISD::CALL";
1937 case HexagonISD::CALLnr: return "HexagonISD::CALLnr";
1938 case HexagonISD::CALLR: return "HexagonISD::CALLR";
1939 case HexagonISD::COMBINE: return "HexagonISD::COMBINE";
1940 case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP";
1941 case HexagonISD::CONST32: return "HexagonISD::CONST32";
1942 case HexagonISD::CP: return "HexagonISD::CP";
1943 case HexagonISD::DCFETCH: return "HexagonISD::DCFETCH";
1944 case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN";
1945 case HexagonISD::TSTBIT: return "HexagonISD::TSTBIT";
1946 case HexagonISD::EXTRACTU: return "HexagonISD::EXTRACTU";
1947 case HexagonISD::INSERT: return "HexagonISD::INSERT";
1948 case HexagonISD::JT: return "HexagonISD::JT";
1949 case HexagonISD::RET_GLUE: return "HexagonISD::RET_GLUE";
1950 case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
1951 case HexagonISD::VASL: return "HexagonISD::VASL";
1952 case HexagonISD::VASR: return "HexagonISD::VASR";
1953 case HexagonISD::VLSR: return "HexagonISD::VLSR";
1954 case HexagonISD::MFSHL: return "HexagonISD::MFSHL";
1955 case HexagonISD::MFSHR: return "HexagonISD::MFSHR";
1956 case HexagonISD::SSAT: return "HexagonISD::SSAT";
1957 case HexagonISD::USAT: return "HexagonISD::USAT";
1958 case HexagonISD::SMUL_LOHI: return "HexagonISD::SMUL_LOHI";
1959 case HexagonISD::UMUL_LOHI: return "HexagonISD::UMUL_LOHI";
1960 case HexagonISD::USMUL_LOHI: return "HexagonISD::USMUL_LOHI";
1961 case HexagonISD::VEXTRACTW: return "HexagonISD::VEXTRACTW";
1962 case HexagonISD::VINSERTW0: return "HexagonISD::VINSERTW0";
1963 case HexagonISD::VROR: return "HexagonISD::VROR";
1964 case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE";
1965 case HexagonISD::READTIMER: return "HexagonISD::READTIMER";
1966 case HexagonISD::PTRUE: return "HexagonISD::PTRUE";
1967 case HexagonISD::PFALSE: return "HexagonISD::PFALSE";
1968 case HexagonISD::D2P: return "HexagonISD::D2P";
1969 case HexagonISD::P2D: return "HexagonISD::P2D";
1970 case HexagonISD::V2Q: return "HexagonISD::V2Q";
1971 case HexagonISD::Q2V: return "HexagonISD::Q2V";
1972 case HexagonISD::QCAT: return "HexagonISD::QCAT";
1973 case HexagonISD::QTRUE: return "HexagonISD::QTRUE";
1974 case HexagonISD::QFALSE: return "HexagonISD::QFALSE";
1975 case HexagonISD::TL_EXTEND: return "HexagonISD::TL_EXTEND";
1976 case HexagonISD::TL_TRUNCATE: return "HexagonISD::TL_TRUNCATE";
1977 case HexagonISD::TYPECAST: return "HexagonISD::TYPECAST";
1978 case HexagonISD::VALIGN: return "HexagonISD::VALIGN";
1979 case HexagonISD::VALIGNADDR: return "HexagonISD::VALIGNADDR";
1980 case HexagonISD::ISEL: return "HexagonISD::ISEL";
1981 case HexagonISD::OP_END: break;
1982 }
1983 return nullptr;
1984 }
1985
1986 bool
validateConstPtrAlignment(SDValue Ptr,Align NeedAlign,const SDLoc & dl,SelectionDAG & DAG) const1987 HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr, Align NeedAlign,
1988 const SDLoc &dl, SelectionDAG &DAG) const {
1989 auto *CA = dyn_cast<ConstantSDNode>(Ptr);
1990 if (!CA)
1991 return true;
1992 unsigned Addr = CA->getZExtValue();
1993 Align HaveAlign =
1994 Addr != 0 ? Align(1ull << llvm::countr_zero(Addr)) : NeedAlign;
1995 if (HaveAlign >= NeedAlign)
1996 return true;
1997
1998 static int DK_MisalignedTrap = llvm::getNextAvailablePluginDiagnosticKind();
1999
2000 struct DiagnosticInfoMisalignedTrap : public DiagnosticInfo {
2001 DiagnosticInfoMisalignedTrap(StringRef M)
2002 : DiagnosticInfo(DK_MisalignedTrap, DS_Remark), Msg(M) {}
2003 void print(DiagnosticPrinter &DP) const override {
2004 DP << Msg;
2005 }
2006 static bool classof(const DiagnosticInfo *DI) {
2007 return DI->getKind() == DK_MisalignedTrap;
2008 }
2009 StringRef Msg;
2010 };
2011
2012 std::string ErrMsg;
2013 raw_string_ostream O(ErrMsg);
2014 O << "Misaligned constant address: " << format_hex(Addr, 10)
2015 << " has alignment " << HaveAlign.value()
2016 << ", but the memory access requires " << NeedAlign.value();
2017 if (DebugLoc DL = dl.getDebugLoc())
2018 DL.print(O << ", at ");
2019 O << ". The instruction has been replaced with a trap.";
2020
2021 DAG.getContext()->diagnose(DiagnosticInfoMisalignedTrap(O.str()));
2022 return false;
2023 }
2024
2025 SDValue
replaceMemWithUndef(SDValue Op,SelectionDAG & DAG) const2026 HexagonTargetLowering::replaceMemWithUndef(SDValue Op, SelectionDAG &DAG)
2027 const {
2028 const SDLoc &dl(Op);
2029 auto *LS = cast<LSBaseSDNode>(Op.getNode());
2030 assert(!LS->isIndexed() && "Not expecting indexed ops on constant address");
2031
2032 SDValue Chain = LS->getChain();
2033 SDValue Trap = DAG.getNode(ISD::TRAP, dl, MVT::Other, Chain);
2034 if (LS->getOpcode() == ISD::LOAD)
2035 return DAG.getMergeValues({DAG.getUNDEF(ty(Op)), Trap}, dl);
2036 return Trap;
2037 }
2038
2039 // Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
2040 // intrinsic.
isBrevLdIntrinsic(const Value * Inst)2041 static bool isBrevLdIntrinsic(const Value *Inst) {
2042 unsigned ID = cast<IntrinsicInst>(Inst)->getIntrinsicID();
2043 return (ID == Intrinsic::hexagon_L2_loadrd_pbr ||
2044 ID == Intrinsic::hexagon_L2_loadri_pbr ||
2045 ID == Intrinsic::hexagon_L2_loadrh_pbr ||
2046 ID == Intrinsic::hexagon_L2_loadruh_pbr ||
2047 ID == Intrinsic::hexagon_L2_loadrb_pbr ||
2048 ID == Intrinsic::hexagon_L2_loadrub_pbr);
2049 }
2050
2051 // Bit-reverse Load Intrinsic :Crawl up and figure out the object from previous
2052 // instruction. So far we only handle bitcast, extract value and bit reverse
2053 // load intrinsic instructions. Should we handle CGEP ?
getBrevLdObject(Value * V)2054 static Value *getBrevLdObject(Value *V) {
2055 if (Operator::getOpcode(V) == Instruction::ExtractValue ||
2056 Operator::getOpcode(V) == Instruction::BitCast)
2057 V = cast<Operator>(V)->getOperand(0);
2058 else if (isa<IntrinsicInst>(V) && isBrevLdIntrinsic(V))
2059 V = cast<Instruction>(V)->getOperand(0);
2060 return V;
2061 }
2062
2063 // Bit-reverse Load Intrinsic: For a PHI Node return either an incoming edge or
2064 // a back edge. If the back edge comes from the intrinsic itself, the incoming
2065 // edge is returned.
returnEdge(const PHINode * PN,Value * IntrBaseVal)2066 static Value *returnEdge(const PHINode *PN, Value *IntrBaseVal) {
2067 const BasicBlock *Parent = PN->getParent();
2068 int Idx = -1;
2069 for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
2070 BasicBlock *Blk = PN->getIncomingBlock(i);
2071 // Determine if the back edge is originated from intrinsic.
2072 if (Blk == Parent) {
2073 Value *BackEdgeVal = PN->getIncomingValue(i);
2074 Value *BaseVal;
2075 // Loop over till we return the same Value or we hit the IntrBaseVal.
2076 do {
2077 BaseVal = BackEdgeVal;
2078 BackEdgeVal = getBrevLdObject(BackEdgeVal);
2079 } while ((BaseVal != BackEdgeVal) && (IntrBaseVal != BackEdgeVal));
2080 // If the getBrevLdObject returns IntrBaseVal, we should return the
2081 // incoming edge.
2082 if (IntrBaseVal == BackEdgeVal)
2083 continue;
2084 Idx = i;
2085 break;
2086 } else // Set the node to incoming edge.
2087 Idx = i;
2088 }
2089 assert(Idx >= 0 && "Unexpected index to incoming argument in PHI");
2090 return PN->getIncomingValue(Idx);
2091 }
2092
2093 // Bit-reverse Load Intrinsic: Figure out the underlying object the base
2094 // pointer points to, for the bit-reverse load intrinsic. Setting this to
2095 // memoperand might help alias analysis to figure out the dependencies.
getUnderLyingObjectForBrevLdIntr(Value * V)2096 static Value *getUnderLyingObjectForBrevLdIntr(Value *V) {
2097 Value *IntrBaseVal = V;
2098 Value *BaseVal;
2099 // Loop over till we return the same Value, implies we either figure out
2100 // the object or we hit a PHI
2101 do {
2102 BaseVal = V;
2103 V = getBrevLdObject(V);
2104 } while (BaseVal != V);
2105
2106 // Identify the object from PHINode.
2107 if (const PHINode *PN = dyn_cast<PHINode>(V))
2108 return returnEdge(PN, IntrBaseVal);
2109 // For non PHI nodes, the object is the last value returned by getBrevLdObject
2110 else
2111 return V;
2112 }
2113
2114 /// Given an intrinsic, checks if on the target the intrinsic will need to map
2115 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
2116 /// true and store the intrinsic information into the IntrinsicInfo that was
2117 /// passed to the function.
getTgtMemIntrinsic(IntrinsicInfo & Info,const CallInst & I,MachineFunction & MF,unsigned Intrinsic) const2118 bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
2119 const CallInst &I,
2120 MachineFunction &MF,
2121 unsigned Intrinsic) const {
2122 switch (Intrinsic) {
2123 case Intrinsic::hexagon_L2_loadrd_pbr:
2124 case Intrinsic::hexagon_L2_loadri_pbr:
2125 case Intrinsic::hexagon_L2_loadrh_pbr:
2126 case Intrinsic::hexagon_L2_loadruh_pbr:
2127 case Intrinsic::hexagon_L2_loadrb_pbr:
2128 case Intrinsic::hexagon_L2_loadrub_pbr: {
2129 Info.opc = ISD::INTRINSIC_W_CHAIN;
2130 auto &DL = I.getDataLayout();
2131 auto &Cont = I.getCalledFunction()->getParent()->getContext();
2132 // The intrinsic function call is of the form { ElTy, i8* }
2133 // @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
2134 // should be derived from ElTy.
2135 Type *ElTy = I.getCalledFunction()->getReturnType()->getStructElementType(0);
2136 Info.memVT = MVT::getVT(ElTy);
2137 llvm::Value *BasePtrVal = I.getOperand(0);
2138 Info.ptrVal = getUnderLyingObjectForBrevLdIntr(BasePtrVal);
2139 // The offset value comes through Modifier register. For now, assume the
2140 // offset is 0.
2141 Info.offset = 0;
2142 Info.align = DL.getABITypeAlign(Info.memVT.getTypeForEVT(Cont));
2143 Info.flags = MachineMemOperand::MOLoad;
2144 return true;
2145 }
2146 case Intrinsic::hexagon_V6_vgathermw:
2147 case Intrinsic::hexagon_V6_vgathermw_128B:
2148 case Intrinsic::hexagon_V6_vgathermh:
2149 case Intrinsic::hexagon_V6_vgathermh_128B:
2150 case Intrinsic::hexagon_V6_vgathermhw:
2151 case Intrinsic::hexagon_V6_vgathermhw_128B:
2152 case Intrinsic::hexagon_V6_vgathermwq:
2153 case Intrinsic::hexagon_V6_vgathermwq_128B:
2154 case Intrinsic::hexagon_V6_vgathermhq:
2155 case Intrinsic::hexagon_V6_vgathermhq_128B:
2156 case Intrinsic::hexagon_V6_vgathermhwq:
2157 case Intrinsic::hexagon_V6_vgathermhwq_128B: {
2158 const Module &M = *I.getParent()->getParent()->getParent();
2159 Info.opc = ISD::INTRINSIC_W_CHAIN;
2160 Type *VecTy = I.getArgOperand(1)->getType();
2161 Info.memVT = MVT::getVT(VecTy);
2162 Info.ptrVal = I.getArgOperand(0);
2163 Info.offset = 0;
2164 Info.align =
2165 MaybeAlign(M.getDataLayout().getTypeAllocSizeInBits(VecTy) / 8);
2166 Info.flags = MachineMemOperand::MOLoad |
2167 MachineMemOperand::MOStore |
2168 MachineMemOperand::MOVolatile;
2169 return true;
2170 }
2171 default:
2172 break;
2173 }
2174 return false;
2175 }
2176
hasBitTest(SDValue X,SDValue Y) const2177 bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
2178 return X.getValueType().isScalarInteger(); // 'tstbit'
2179 }
2180
isTruncateFree(Type * Ty1,Type * Ty2) const2181 bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
2182 return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
2183 }
2184
isTruncateFree(EVT VT1,EVT VT2) const2185 bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
2186 if (!VT1.isSimple() || !VT2.isSimple())
2187 return false;
2188 return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32;
2189 }
2190
isFMAFasterThanFMulAndFAdd(const MachineFunction & MF,EVT VT) const2191 bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(
2192 const MachineFunction &MF, EVT VT) const {
2193 return isOperationLegalOrCustom(ISD::FMA, VT);
2194 }
2195
2196 // Should we expand the build vector with shuffles?
shouldExpandBuildVectorWithShuffles(EVT VT,unsigned DefinedValues) const2197 bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
2198 unsigned DefinedValues) const {
2199 return false;
2200 }
2201
isExtractSubvectorCheap(EVT ResVT,EVT SrcVT,unsigned Index) const2202 bool HexagonTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
2203 unsigned Index) const {
2204 assert(ResVT.getVectorElementType() == SrcVT.getVectorElementType());
2205 if (!ResVT.isSimple() || !SrcVT.isSimple())
2206 return false;
2207
2208 MVT ResTy = ResVT.getSimpleVT(), SrcTy = SrcVT.getSimpleVT();
2209 if (ResTy.getVectorElementType() != MVT::i1)
2210 return true;
2211
2212 // Non-HVX bool vectors are relatively cheap.
2213 return SrcTy.getVectorNumElements() <= 8;
2214 }
2215
isTargetCanonicalConstantNode(SDValue Op) const2216 bool HexagonTargetLowering::isTargetCanonicalConstantNode(SDValue Op) const {
2217 return Op.getOpcode() == ISD::CONCAT_VECTORS ||
2218 TargetLowering::isTargetCanonicalConstantNode(Op);
2219 }
2220
isShuffleMaskLegal(ArrayRef<int> Mask,EVT VT) const2221 bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask,
2222 EVT VT) const {
2223 return true;
2224 }
2225
2226 TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(MVT VT) const2227 HexagonTargetLowering::getPreferredVectorAction(MVT VT) const {
2228 unsigned VecLen = VT.getVectorMinNumElements();
2229 MVT ElemTy = VT.getVectorElementType();
2230
2231 if (VecLen == 1 || VT.isScalableVector())
2232 return TargetLoweringBase::TypeScalarizeVector;
2233
2234 if (Subtarget.useHVXOps()) {
2235 unsigned Action = getPreferredHvxVectorAction(VT);
2236 if (Action != ~0u)
2237 return static_cast<TargetLoweringBase::LegalizeTypeAction>(Action);
2238 }
2239
2240 // Always widen (remaining) vectors of i1.
2241 if (ElemTy == MVT::i1)
2242 return TargetLoweringBase::TypeWidenVector;
2243 // Widen non-power-of-2 vectors. Such types cannot be split right now,
2244 // and computeRegisterProperties will override "split" with "widen",
2245 // which can cause other issues.
2246 if (!isPowerOf2_32(VecLen))
2247 return TargetLoweringBase::TypeWidenVector;
2248
2249 return TargetLoweringBase::TypeSplitVector;
2250 }
2251
2252 TargetLoweringBase::LegalizeAction
getCustomOperationAction(SDNode & Op) const2253 HexagonTargetLowering::getCustomOperationAction(SDNode &Op) const {
2254 if (Subtarget.useHVXOps()) {
2255 unsigned Action = getCustomHvxOperationAction(Op);
2256 if (Action != ~0u)
2257 return static_cast<TargetLoweringBase::LegalizeAction>(Action);
2258 }
2259 return TargetLoweringBase::Legal;
2260 }
2261
2262 std::pair<SDValue, int>
getBaseAndOffset(SDValue Addr) const2263 HexagonTargetLowering::getBaseAndOffset(SDValue Addr) const {
2264 if (Addr.getOpcode() == ISD::ADD) {
2265 SDValue Op1 = Addr.getOperand(1);
2266 if (auto *CN = dyn_cast<const ConstantSDNode>(Op1.getNode()))
2267 return { Addr.getOperand(0), CN->getSExtValue() };
2268 }
2269 return { Addr, 0 };
2270 }
2271
2272 // Lower a vector shuffle (V1, V2, V3). V1 and V2 are the two vectors
2273 // to select data from, V3 is the permutation.
2274 SDValue
LowerVECTOR_SHUFFLE(SDValue Op,SelectionDAG & DAG) const2275 HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
2276 const {
2277 const auto *SVN = cast<ShuffleVectorSDNode>(Op);
2278 ArrayRef<int> AM = SVN->getMask();
2279 assert(AM.size() <= 8 && "Unexpected shuffle mask");
2280 unsigned VecLen = AM.size();
2281
2282 MVT VecTy = ty(Op);
2283 assert(!Subtarget.isHVXVectorType(VecTy, true) &&
2284 "HVX shuffles should be legal");
2285 assert(VecTy.getSizeInBits() <= 64 && "Unexpected vector length");
2286
2287 SDValue Op0 = Op.getOperand(0);
2288 SDValue Op1 = Op.getOperand(1);
2289 const SDLoc &dl(Op);
2290
2291 // If the inputs are not the same as the output, bail. This is not an
2292 // error situation, but complicates the handling and the default expansion
2293 // (into BUILD_VECTOR) should be adequate.
2294 if (ty(Op0) != VecTy || ty(Op1) != VecTy)
2295 return SDValue();
2296
2297 // Normalize the mask so that the first non-negative index comes from
2298 // the first operand.
2299 SmallVector<int, 8> Mask(AM);
2300 unsigned F = llvm::find_if(AM, [](int M) { return M >= 0; }) - AM.data();
2301 if (F == AM.size())
2302 return DAG.getUNDEF(VecTy);
2303 if (AM[F] >= int(VecLen)) {
2304 ShuffleVectorSDNode::commuteMask(Mask);
2305 std::swap(Op0, Op1);
2306 }
2307
2308 // Express the shuffle mask in terms of bytes.
2309 SmallVector<int,8> ByteMask;
2310 unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8;
2311 for (int M : Mask) {
2312 if (M < 0) {
2313 for (unsigned j = 0; j != ElemBytes; ++j)
2314 ByteMask.push_back(-1);
2315 } else {
2316 for (unsigned j = 0; j != ElemBytes; ++j)
2317 ByteMask.push_back(M*ElemBytes + j);
2318 }
2319 }
2320 assert(ByteMask.size() <= 8);
2321
2322 // All non-undef (non-negative) indexes are well within [0..127], so they
2323 // fit in a single byte. Build two 64-bit words:
2324 // - MaskIdx where each byte is the corresponding index (for non-negative
2325 // indexes), and 0xFF for negative indexes, and
2326 // - MaskUnd that has 0xFF for each negative index.
2327 uint64_t MaskIdx = 0;
2328 uint64_t MaskUnd = 0;
2329 for (unsigned i = 0, e = ByteMask.size(); i != e; ++i) {
2330 unsigned S = 8*i;
2331 uint64_t M = ByteMask[i] & 0xFF;
2332 if (M == 0xFF)
2333 MaskUnd |= M << S;
2334 MaskIdx |= M << S;
2335 }
2336
2337 if (ByteMask.size() == 4) {
2338 // Identity.
2339 if (MaskIdx == (0x03020100 | MaskUnd))
2340 return Op0;
2341 // Byte swap.
2342 if (MaskIdx == (0x00010203 | MaskUnd)) {
2343 SDValue T0 = DAG.getBitcast(MVT::i32, Op0);
2344 SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i32, T0);
2345 return DAG.getBitcast(VecTy, T1);
2346 }
2347
2348 // Byte packs.
2349 SDValue Concat10 =
2350 getCombine(Op1, Op0, dl, typeJoin({ty(Op1), ty(Op0)}), DAG);
2351 if (MaskIdx == (0x06040200 | MaskUnd))
2352 return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat10}, DAG);
2353 if (MaskIdx == (0x07050301 | MaskUnd))
2354 return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat10}, DAG);
2355
2356 SDValue Concat01 =
2357 getCombine(Op0, Op1, dl, typeJoin({ty(Op0), ty(Op1)}), DAG);
2358 if (MaskIdx == (0x02000604 | MaskUnd))
2359 return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat01}, DAG);
2360 if (MaskIdx == (0x03010705 | MaskUnd))
2361 return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat01}, DAG);
2362 }
2363
2364 if (ByteMask.size() == 8) {
2365 // Identity.
2366 if (MaskIdx == (0x0706050403020100ull | MaskUnd))
2367 return Op0;
2368 // Byte swap.
2369 if (MaskIdx == (0x0001020304050607ull | MaskUnd)) {
2370 SDValue T0 = DAG.getBitcast(MVT::i64, Op0);
2371 SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i64, T0);
2372 return DAG.getBitcast(VecTy, T1);
2373 }
2374
2375 // Halfword picks.
2376 if (MaskIdx == (0x0d0c050409080100ull | MaskUnd))
2377 return getInstr(Hexagon::S2_shuffeh, dl, VecTy, {Op1, Op0}, DAG);
2378 if (MaskIdx == (0x0f0e07060b0a0302ull | MaskUnd))
2379 return getInstr(Hexagon::S2_shuffoh, dl, VecTy, {Op1, Op0}, DAG);
2380 if (MaskIdx == (0x0d0c090805040100ull | MaskUnd))
2381 return getInstr(Hexagon::S2_vtrunewh, dl, VecTy, {Op1, Op0}, DAG);
2382 if (MaskIdx == (0x0f0e0b0a07060302ull | MaskUnd))
2383 return getInstr(Hexagon::S2_vtrunowh, dl, VecTy, {Op1, Op0}, DAG);
2384 if (MaskIdx == (0x0706030205040100ull | MaskUnd)) {
2385 VectorPair P = opSplit(Op0, dl, DAG);
2386 return getInstr(Hexagon::S2_packhl, dl, VecTy, {P.second, P.first}, DAG);
2387 }
2388
2389 // Byte packs.
2390 if (MaskIdx == (0x0e060c040a020800ull | MaskUnd))
2391 return getInstr(Hexagon::S2_shuffeb, dl, VecTy, {Op1, Op0}, DAG);
2392 if (MaskIdx == (0x0f070d050b030901ull | MaskUnd))
2393 return getInstr(Hexagon::S2_shuffob, dl, VecTy, {Op1, Op0}, DAG);
2394 }
2395
2396 return SDValue();
2397 }
2398
2399 SDValue
getSplatValue(SDValue Op,SelectionDAG & DAG) const2400 HexagonTargetLowering::getSplatValue(SDValue Op, SelectionDAG &DAG) const {
2401 switch (Op.getOpcode()) {
2402 case ISD::BUILD_VECTOR:
2403 if (SDValue S = cast<BuildVectorSDNode>(Op)->getSplatValue())
2404 return S;
2405 break;
2406 case ISD::SPLAT_VECTOR:
2407 return Op.getOperand(0);
2408 }
2409 return SDValue();
2410 }
2411
2412 // Create a Hexagon-specific node for shifting a vector by an integer.
2413 SDValue
getVectorShiftByInt(SDValue Op,SelectionDAG & DAG) const2414 HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG)
2415 const {
2416 unsigned NewOpc;
2417 switch (Op.getOpcode()) {
2418 case ISD::SHL:
2419 NewOpc = HexagonISD::VASL;
2420 break;
2421 case ISD::SRA:
2422 NewOpc = HexagonISD::VASR;
2423 break;
2424 case ISD::SRL:
2425 NewOpc = HexagonISD::VLSR;
2426 break;
2427 default:
2428 llvm_unreachable("Unexpected shift opcode");
2429 }
2430
2431 if (SDValue Sp = getSplatValue(Op.getOperand(1), DAG))
2432 return DAG.getNode(NewOpc, SDLoc(Op), ty(Op), Op.getOperand(0), Sp);
2433 return SDValue();
2434 }
2435
2436 SDValue
LowerVECTOR_SHIFT(SDValue Op,SelectionDAG & DAG) const2437 HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
2438 const SDLoc &dl(Op);
2439
2440 // First try to convert the shift (by vector) to a shift by a scalar.
2441 // If we first split the shift, the shift amount will become 'extract
2442 // subvector', and will no longer be recognized as scalar.
2443 SDValue Res = Op;
2444 if (SDValue S = getVectorShiftByInt(Op, DAG))
2445 Res = S;
2446
2447 unsigned Opc = Res.getOpcode();
2448 switch (Opc) {
2449 case HexagonISD::VASR:
2450 case HexagonISD::VLSR:
2451 case HexagonISD::VASL:
2452 break;
2453 default:
2454 // No instructions for shifts by non-scalars.
2455 return SDValue();
2456 }
2457
2458 MVT ResTy = ty(Res);
2459 if (ResTy.getVectorElementType() != MVT::i8)
2460 return Res;
2461
2462 // For shifts of i8, extend the inputs to i16, then truncate back to i8.
2463 assert(ResTy.getVectorElementType() == MVT::i8);
2464 SDValue Val = Res.getOperand(0), Amt = Res.getOperand(1);
2465
2466 auto ShiftPartI8 = [&dl, &DAG, this](unsigned Opc, SDValue V, SDValue A) {
2467 MVT Ty = ty(V);
2468 MVT ExtTy = MVT::getVectorVT(MVT::i16, Ty.getVectorNumElements());
2469 SDValue ExtV = Opc == HexagonISD::VASR ? DAG.getSExtOrTrunc(V, dl, ExtTy)
2470 : DAG.getZExtOrTrunc(V, dl, ExtTy);
2471 SDValue ExtS = DAG.getNode(Opc, dl, ExtTy, {ExtV, A});
2472 return DAG.getZExtOrTrunc(ExtS, dl, Ty);
2473 };
2474
2475 if (ResTy.getSizeInBits() == 32)
2476 return ShiftPartI8(Opc, Val, Amt);
2477
2478 auto [LoV, HiV] = opSplit(Val, dl, DAG);
2479 return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy,
2480 {ShiftPartI8(Opc, LoV, Amt), ShiftPartI8(Opc, HiV, Amt)});
2481 }
2482
2483 SDValue
LowerROTL(SDValue Op,SelectionDAG & DAG) const2484 HexagonTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
2485 if (isa<ConstantSDNode>(Op.getOperand(1).getNode()))
2486 return Op;
2487 return SDValue();
2488 }
2489
2490 SDValue
LowerBITCAST(SDValue Op,SelectionDAG & DAG) const2491 HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
2492 MVT ResTy = ty(Op);
2493 SDValue InpV = Op.getOperand(0);
2494 MVT InpTy = ty(InpV);
2495 assert(ResTy.getSizeInBits() == InpTy.getSizeInBits());
2496 const SDLoc &dl(Op);
2497
2498 // Handle conversion from i8 to v8i1.
2499 if (InpTy == MVT::i8) {
2500 if (ResTy == MVT::v8i1) {
2501 SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV);
2502 SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32);
2503 return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG);
2504 }
2505 return SDValue();
2506 }
2507
2508 return Op;
2509 }
2510
2511 bool
getBuildVectorConstInts(ArrayRef<SDValue> Values,MVT VecTy,SelectionDAG & DAG,MutableArrayRef<ConstantInt * > Consts) const2512 HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values,
2513 MVT VecTy, SelectionDAG &DAG,
2514 MutableArrayRef<ConstantInt*> Consts) const {
2515 MVT ElemTy = VecTy.getVectorElementType();
2516 unsigned ElemWidth = ElemTy.getSizeInBits();
2517 IntegerType *IntTy = IntegerType::get(*DAG.getContext(), ElemWidth);
2518 bool AllConst = true;
2519
2520 for (unsigned i = 0, e = Values.size(); i != e; ++i) {
2521 SDValue V = Values[i];
2522 if (V.isUndef()) {
2523 Consts[i] = ConstantInt::get(IntTy, 0);
2524 continue;
2525 }
2526 // Make sure to always cast to IntTy.
2527 if (auto *CN = dyn_cast<ConstantSDNode>(V.getNode())) {
2528 const ConstantInt *CI = CN->getConstantIntValue();
2529 Consts[i] = ConstantInt::get(IntTy, CI->getValue().getSExtValue());
2530 } else if (auto *CN = dyn_cast<ConstantFPSDNode>(V.getNode())) {
2531 const ConstantFP *CF = CN->getConstantFPValue();
2532 APInt A = CF->getValueAPF().bitcastToAPInt();
2533 Consts[i] = ConstantInt::get(IntTy, A.getZExtValue());
2534 } else {
2535 AllConst = false;
2536 }
2537 }
2538 return AllConst;
2539 }
2540
2541 SDValue
buildVector32(ArrayRef<SDValue> Elem,const SDLoc & dl,MVT VecTy,SelectionDAG & DAG) const2542 HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
2543 MVT VecTy, SelectionDAG &DAG) const {
2544 MVT ElemTy = VecTy.getVectorElementType();
2545 assert(VecTy.getVectorNumElements() == Elem.size());
2546
2547 SmallVector<ConstantInt*,4> Consts(Elem.size());
2548 bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
2549
2550 unsigned First, Num = Elem.size();
2551 for (First = 0; First != Num; ++First) {
2552 if (!isUndef(Elem[First]))
2553 break;
2554 }
2555 if (First == Num)
2556 return DAG.getUNDEF(VecTy);
2557
2558 if (AllConst &&
2559 llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
2560 return getZero(dl, VecTy, DAG);
2561
2562 if (ElemTy == MVT::i16 || ElemTy == MVT::f16) {
2563 assert(Elem.size() == 2);
2564 if (AllConst) {
2565 // The 'Consts' array will have all values as integers regardless
2566 // of the vector element type.
2567 uint32_t V = (Consts[0]->getZExtValue() & 0xFFFF) |
2568 Consts[1]->getZExtValue() << 16;
2569 return DAG.getBitcast(VecTy, DAG.getConstant(V, dl, MVT::i32));
2570 }
2571 SDValue E0, E1;
2572 if (ElemTy == MVT::f16) {
2573 E0 = DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Elem[0]), dl, MVT::i32);
2574 E1 = DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Elem[1]), dl, MVT::i32);
2575 } else {
2576 E0 = Elem[0];
2577 E1 = Elem[1];
2578 }
2579 SDValue N = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {E1, E0}, DAG);
2580 return DAG.getBitcast(VecTy, N);
2581 }
2582
2583 if (ElemTy == MVT::i8) {
2584 // First try generating a constant.
2585 if (AllConst) {
2586 uint32_t V = (Consts[0]->getZExtValue() & 0xFF) |
2587 (Consts[1]->getZExtValue() & 0xFF) << 8 |
2588 (Consts[2]->getZExtValue() & 0xFF) << 16 |
2589 Consts[3]->getZExtValue() << 24;
2590 return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32));
2591 }
2592
2593 // Then try splat.
2594 bool IsSplat = true;
2595 for (unsigned i = First+1; i != Num; ++i) {
2596 if (Elem[i] == Elem[First] || isUndef(Elem[i]))
2597 continue;
2598 IsSplat = false;
2599 break;
2600 }
2601 if (IsSplat) {
2602 // Legalize the operand of SPLAT_VECTOR.
2603 SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
2604 return DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Ext);
2605 }
2606
2607 // Generate
2608 // (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) |
2609 // (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16
2610 assert(Elem.size() == 4);
2611 SDValue Vs[4];
2612 for (unsigned i = 0; i != 4; ++i) {
2613 Vs[i] = DAG.getZExtOrTrunc(Elem[i], dl, MVT::i32);
2614 Vs[i] = DAG.getZeroExtendInReg(Vs[i], dl, MVT::i8);
2615 }
2616 SDValue S8 = DAG.getConstant(8, dl, MVT::i32);
2617 SDValue T0 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[1], S8});
2618 SDValue T1 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[3], S8});
2619 SDValue B0 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[0], T0});
2620 SDValue B1 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[2], T1});
2621
2622 SDValue R = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG);
2623 return DAG.getBitcast(MVT::v4i8, R);
2624 }
2625
2626 #ifndef NDEBUG
2627 dbgs() << "VecTy: " << VecTy << '\n';
2628 #endif
2629 llvm_unreachable("Unexpected vector element type");
2630 }
2631
2632 SDValue
buildVector64(ArrayRef<SDValue> Elem,const SDLoc & dl,MVT VecTy,SelectionDAG & DAG) const2633 HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
2634 MVT VecTy, SelectionDAG &DAG) const {
2635 MVT ElemTy = VecTy.getVectorElementType();
2636 assert(VecTy.getVectorNumElements() == Elem.size());
2637
2638 SmallVector<ConstantInt*,8> Consts(Elem.size());
2639 bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
2640
2641 unsigned First, Num = Elem.size();
2642 for (First = 0; First != Num; ++First) {
2643 if (!isUndef(Elem[First]))
2644 break;
2645 }
2646 if (First == Num)
2647 return DAG.getUNDEF(VecTy);
2648
2649 if (AllConst &&
2650 llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
2651 return getZero(dl, VecTy, DAG);
2652
2653 // First try splat if possible.
2654 if (ElemTy == MVT::i16 || ElemTy == MVT::f16) {
2655 bool IsSplat = true;
2656 for (unsigned i = First+1; i != Num; ++i) {
2657 if (Elem[i] == Elem[First] || isUndef(Elem[i]))
2658 continue;
2659 IsSplat = false;
2660 break;
2661 }
2662 if (IsSplat) {
2663 // Legalize the operand of SPLAT_VECTOR
2664 SDValue S = ElemTy == MVT::f16 ? DAG.getBitcast(MVT::i16, Elem[First])
2665 : Elem[First];
2666 SDValue Ext = DAG.getZExtOrTrunc(S, dl, MVT::i32);
2667 return DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Ext);
2668 }
2669 }
2670
2671 // Then try constant.
2672 if (AllConst) {
2673 uint64_t Val = 0;
2674 unsigned W = ElemTy.getSizeInBits();
2675 uint64_t Mask = (1ull << W) - 1;
2676 for (unsigned i = 0; i != Num; ++i)
2677 Val = (Val << W) | (Consts[Num-1-i]->getZExtValue() & Mask);
2678 SDValue V0 = DAG.getConstant(Val, dl, MVT::i64);
2679 return DAG.getBitcast(VecTy, V0);
2680 }
2681
2682 // Build two 32-bit vectors and concatenate.
2683 MVT HalfTy = MVT::getVectorVT(ElemTy, Num/2);
2684 SDValue L = (ElemTy == MVT::i32)
2685 ? Elem[0]
2686 : buildVector32(Elem.take_front(Num/2), dl, HalfTy, DAG);
2687 SDValue H = (ElemTy == MVT::i32)
2688 ? Elem[1]
2689 : buildVector32(Elem.drop_front(Num/2), dl, HalfTy, DAG);
2690 return getCombine(H, L, dl, VecTy, DAG);
2691 }
2692
2693 SDValue
extractVector(SDValue VecV,SDValue IdxV,const SDLoc & dl,MVT ValTy,MVT ResTy,SelectionDAG & DAG) const2694 HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
2695 const SDLoc &dl, MVT ValTy, MVT ResTy,
2696 SelectionDAG &DAG) const {
2697 MVT VecTy = ty(VecV);
2698 assert(!ValTy.isVector() ||
2699 VecTy.getVectorElementType() == ValTy.getVectorElementType());
2700 if (VecTy.getVectorElementType() == MVT::i1)
2701 return extractVectorPred(VecV, IdxV, dl, ValTy, ResTy, DAG);
2702
2703 unsigned VecWidth = VecTy.getSizeInBits();
2704 unsigned ValWidth = ValTy.getSizeInBits();
2705 unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits();
2706 assert((VecWidth % ElemWidth) == 0);
2707 assert(VecWidth == 32 || VecWidth == 64);
2708
2709 // Cast everything to scalar integer types.
2710 MVT ScalarTy = tyScalar(VecTy);
2711 VecV = DAG.getBitcast(ScalarTy, VecV);
2712
2713 SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
2714 SDValue ExtV;
2715
2716 if (auto *IdxN = dyn_cast<ConstantSDNode>(IdxV)) {
2717 unsigned Off = IdxN->getZExtValue() * ElemWidth;
2718 if (VecWidth == 64 && ValWidth == 32) {
2719 assert(Off == 0 || Off == 32);
2720 ExtV = Off == 0 ? LoHalf(VecV, DAG) : HiHalf(VecV, DAG);
2721 } else if (Off == 0 && (ValWidth % 8) == 0) {
2722 ExtV = DAG.getZeroExtendInReg(VecV, dl, tyScalar(ValTy));
2723 } else {
2724 SDValue OffV = DAG.getConstant(Off, dl, MVT::i32);
2725 // The return type of EXTRACTU must be the same as the type of the
2726 // input vector.
2727 ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
2728 {VecV, WidthV, OffV});
2729 }
2730 } else {
2731 if (ty(IdxV) != MVT::i32)
2732 IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
2733 SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2734 DAG.getConstant(ElemWidth, dl, MVT::i32));
2735 ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
2736 {VecV, WidthV, OffV});
2737 }
2738
2739 // Cast ExtV to the requested result type.
2740 ExtV = DAG.getZExtOrTrunc(ExtV, dl, tyScalar(ResTy));
2741 ExtV = DAG.getBitcast(ResTy, ExtV);
2742 return ExtV;
2743 }
2744
2745 SDValue
extractVectorPred(SDValue VecV,SDValue IdxV,const SDLoc & dl,MVT ValTy,MVT ResTy,SelectionDAG & DAG) const2746 HexagonTargetLowering::extractVectorPred(SDValue VecV, SDValue IdxV,
2747 const SDLoc &dl, MVT ValTy, MVT ResTy,
2748 SelectionDAG &DAG) const {
2749 // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
2750 // without any coprocessors).
2751 MVT VecTy = ty(VecV);
2752 unsigned VecWidth = VecTy.getSizeInBits();
2753 unsigned ValWidth = ValTy.getSizeInBits();
2754 assert(VecWidth == VecTy.getVectorNumElements() &&
2755 "Vector elements should equal vector width size");
2756 assert(VecWidth == 8 || VecWidth == 4 || VecWidth == 2);
2757
2758 // Check if this is an extract of the lowest bit.
2759 if (isNullConstant(IdxV) && ValTy.getSizeInBits() == 1) {
2760 // Extracting the lowest bit is a no-op, but it changes the type,
2761 // so it must be kept as an operation to avoid errors related to
2762 // type mismatches.
2763 return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV);
2764 }
2765
2766 // If the value extracted is a single bit, use tstbit.
2767 if (ValWidth == 1) {
2768 SDValue A0 = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
2769 SDValue M0 = DAG.getConstant(8 / VecWidth, dl, MVT::i32);
2770 SDValue I0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, M0);
2771 return DAG.getNode(HexagonISD::TSTBIT, dl, MVT::i1, A0, I0);
2772 }
2773
2774 // Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
2775 // a predicate register. The elements of the vector are repeated
2776 // in the register (if necessary) so that the total number is 8.
2777 // The extracted subvector will need to be expanded in such a way.
2778 unsigned Scale = VecWidth / ValWidth;
2779
2780 // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
2781 // position 0.
2782 assert(ty(IdxV) == MVT::i32);
2783 unsigned VecRep = 8 / VecWidth;
2784 SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2785 DAG.getConstant(8*VecRep, dl, MVT::i32));
2786 SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
2787 SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0);
2788 while (Scale > 1) {
2789 // The longest possible subvector is at most 32 bits, so it is always
2790 // contained in the low subregister.
2791 T1 = LoHalf(T1, DAG);
2792 T1 = expandPredicate(T1, dl, DAG);
2793 Scale /= 2;
2794 }
2795
2796 return DAG.getNode(HexagonISD::D2P, dl, ResTy, T1);
2797 }
2798
2799 SDValue
insertVector(SDValue VecV,SDValue ValV,SDValue IdxV,const SDLoc & dl,MVT ValTy,SelectionDAG & DAG) const2800 HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
2801 const SDLoc &dl, MVT ValTy,
2802 SelectionDAG &DAG) const {
2803 MVT VecTy = ty(VecV);
2804 if (VecTy.getVectorElementType() == MVT::i1)
2805 return insertVectorPred(VecV, ValV, IdxV, dl, ValTy, DAG);
2806
2807 unsigned VecWidth = VecTy.getSizeInBits();
2808 unsigned ValWidth = ValTy.getSizeInBits();
2809 assert(VecWidth == 32 || VecWidth == 64);
2810 assert((VecWidth % ValWidth) == 0);
2811
2812 // Cast everything to scalar integer types.
2813 MVT ScalarTy = MVT::getIntegerVT(VecWidth);
2814 // The actual type of ValV may be different than ValTy (which is related
2815 // to the vector type).
2816 unsigned VW = ty(ValV).getSizeInBits();
2817 ValV = DAG.getBitcast(MVT::getIntegerVT(VW), ValV);
2818 VecV = DAG.getBitcast(ScalarTy, VecV);
2819 if (VW != VecWidth)
2820 ValV = DAG.getAnyExtOrTrunc(ValV, dl, ScalarTy);
2821
2822 SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
2823 SDValue InsV;
2824
2825 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(IdxV)) {
2826 unsigned W = C->getZExtValue() * ValWidth;
2827 SDValue OffV = DAG.getConstant(W, dl, MVT::i32);
2828 InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
2829 {VecV, ValV, WidthV, OffV});
2830 } else {
2831 if (ty(IdxV) != MVT::i32)
2832 IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
2833 SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, WidthV);
2834 InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
2835 {VecV, ValV, WidthV, OffV});
2836 }
2837
2838 return DAG.getNode(ISD::BITCAST, dl, VecTy, InsV);
2839 }
2840
2841 SDValue
insertVectorPred(SDValue VecV,SDValue ValV,SDValue IdxV,const SDLoc & dl,MVT ValTy,SelectionDAG & DAG) const2842 HexagonTargetLowering::insertVectorPred(SDValue VecV, SDValue ValV,
2843 SDValue IdxV, const SDLoc &dl,
2844 MVT ValTy, SelectionDAG &DAG) const {
2845 MVT VecTy = ty(VecV);
2846 unsigned VecLen = VecTy.getVectorNumElements();
2847
2848 if (ValTy == MVT::i1) {
2849 SDValue ToReg = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
2850 SDValue Ext = DAG.getSExtOrTrunc(ValV, dl, MVT::i32);
2851 SDValue Width = DAG.getConstant(8 / VecLen, dl, MVT::i32);
2852 SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, Width);
2853 SDValue Ins =
2854 DAG.getNode(HexagonISD::INSERT, dl, MVT::i32, {ToReg, Ext, Width, Idx});
2855 return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Ins}, DAG);
2856 }
2857
2858 assert(ValTy.getVectorElementType() == MVT::i1);
2859 SDValue ValR = ValTy.isVector()
2860 ? DAG.getNode(HexagonISD::P2D, dl, MVT::i64, ValV)
2861 : DAG.getSExtOrTrunc(ValV, dl, MVT::i64);
2862
2863 unsigned Scale = VecLen / ValTy.getVectorNumElements();
2864 assert(Scale > 1);
2865
2866 for (unsigned R = Scale; R > 1; R /= 2) {
2867 ValR = contractPredicate(ValR, dl, DAG);
2868 ValR = getCombine(DAG.getUNDEF(MVT::i32), ValR, dl, MVT::i64, DAG);
2869 }
2870
2871 SDValue Width = DAG.getConstant(64 / Scale, dl, MVT::i32);
2872 SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, Width);
2873 SDValue VecR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
2874 SDValue Ins =
2875 DAG.getNode(HexagonISD::INSERT, dl, MVT::i64, {VecR, ValR, Width, Idx});
2876 return DAG.getNode(HexagonISD::D2P, dl, VecTy, Ins);
2877 }
2878
2879 SDValue
expandPredicate(SDValue Vec32,const SDLoc & dl,SelectionDAG & DAG) const2880 HexagonTargetLowering::expandPredicate(SDValue Vec32, const SDLoc &dl,
2881 SelectionDAG &DAG) const {
2882 assert(ty(Vec32).getSizeInBits() == 32);
2883 if (isUndef(Vec32))
2884 return DAG.getUNDEF(MVT::i64);
2885 SDValue P = DAG.getBitcast(MVT::v4i8, Vec32);
2886 SDValue X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i16, P);
2887 return DAG.getBitcast(MVT::i64, X);
2888 }
2889
2890 SDValue
contractPredicate(SDValue Vec64,const SDLoc & dl,SelectionDAG & DAG) const2891 HexagonTargetLowering::contractPredicate(SDValue Vec64, const SDLoc &dl,
2892 SelectionDAG &DAG) const {
2893 assert(ty(Vec64).getSizeInBits() == 64);
2894 if (isUndef(Vec64))
2895 return DAG.getUNDEF(MVT::i32);
2896 // Collect even bytes:
2897 SDValue A = DAG.getBitcast(MVT::v8i8, Vec64);
2898 SDValue S = DAG.getVectorShuffle(MVT::v8i8, dl, A, DAG.getUNDEF(MVT::v8i8),
2899 {0, 2, 4, 6, 1, 3, 5, 7});
2900 return extractVector(S, DAG.getConstant(0, dl, MVT::i32), dl, MVT::v4i8,
2901 MVT::i32, DAG);
2902 }
2903
2904 SDValue
getZero(const SDLoc & dl,MVT Ty,SelectionDAG & DAG) const2905 HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
2906 const {
2907 if (Ty.isVector()) {
2908 unsigned W = Ty.getSizeInBits();
2909 if (W <= 64)
2910 return DAG.getBitcast(Ty, DAG.getConstant(0, dl, MVT::getIntegerVT(W)));
2911 return DAG.getNode(ISD::SPLAT_VECTOR, dl, Ty, getZero(dl, MVT::i32, DAG));
2912 }
2913
2914 if (Ty.isInteger())
2915 return DAG.getConstant(0, dl, Ty);
2916 if (Ty.isFloatingPoint())
2917 return DAG.getConstantFP(0.0, dl, Ty);
2918 llvm_unreachable("Invalid type for zero");
2919 }
2920
2921 SDValue
appendUndef(SDValue Val,MVT ResTy,SelectionDAG & DAG) const2922 HexagonTargetLowering::appendUndef(SDValue Val, MVT ResTy, SelectionDAG &DAG)
2923 const {
2924 MVT ValTy = ty(Val);
2925 assert(ValTy.getVectorElementType() == ResTy.getVectorElementType());
2926
2927 unsigned ValLen = ValTy.getVectorNumElements();
2928 unsigned ResLen = ResTy.getVectorNumElements();
2929 if (ValLen == ResLen)
2930 return Val;
2931
2932 const SDLoc &dl(Val);
2933 assert(ValLen < ResLen);
2934 assert(ResLen % ValLen == 0);
2935
2936 SmallVector<SDValue, 4> Concats = {Val};
2937 for (unsigned i = 1, e = ResLen / ValLen; i < e; ++i)
2938 Concats.push_back(DAG.getUNDEF(ValTy));
2939
2940 return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, Concats);
2941 }
2942
2943 SDValue
getCombine(SDValue Hi,SDValue Lo,const SDLoc & dl,MVT ResTy,SelectionDAG & DAG) const2944 HexagonTargetLowering::getCombine(SDValue Hi, SDValue Lo, const SDLoc &dl,
2945 MVT ResTy, SelectionDAG &DAG) const {
2946 MVT ElemTy = ty(Hi);
2947 assert(ElemTy == ty(Lo));
2948
2949 if (!ElemTy.isVector()) {
2950 assert(ElemTy.isScalarInteger());
2951 MVT PairTy = MVT::getIntegerVT(2 * ElemTy.getSizeInBits());
2952 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, PairTy, Lo, Hi);
2953 return DAG.getBitcast(ResTy, Pair);
2954 }
2955
2956 unsigned Width = ElemTy.getSizeInBits();
2957 MVT IntTy = MVT::getIntegerVT(Width);
2958 MVT PairTy = MVT::getIntegerVT(2 * Width);
2959 SDValue Pair =
2960 DAG.getNode(ISD::BUILD_PAIR, dl, PairTy,
2961 {DAG.getBitcast(IntTy, Lo), DAG.getBitcast(IntTy, Hi)});
2962 return DAG.getBitcast(ResTy, Pair);
2963 }
2964
2965 SDValue
LowerBUILD_VECTOR(SDValue Op,SelectionDAG & DAG) const2966 HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
2967 MVT VecTy = ty(Op);
2968 unsigned BW = VecTy.getSizeInBits();
2969 const SDLoc &dl(Op);
2970 SmallVector<SDValue,8> Ops;
2971 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
2972 Ops.push_back(Op.getOperand(i));
2973
2974 if (BW == 32)
2975 return buildVector32(Ops, dl, VecTy, DAG);
2976 if (BW == 64)
2977 return buildVector64(Ops, dl, VecTy, DAG);
2978
2979 if (VecTy == MVT::v8i1 || VecTy == MVT::v4i1 || VecTy == MVT::v2i1) {
2980 // Check if this is a special case or all-0 or all-1.
2981 bool All0 = true, All1 = true;
2982 for (SDValue P : Ops) {
2983 auto *CN = dyn_cast<ConstantSDNode>(P.getNode());
2984 if (CN == nullptr) {
2985 All0 = All1 = false;
2986 break;
2987 }
2988 uint32_t C = CN->getZExtValue();
2989 All0 &= (C == 0);
2990 All1 &= (C == 1);
2991 }
2992 if (All0)
2993 return DAG.getNode(HexagonISD::PFALSE, dl, VecTy);
2994 if (All1)
2995 return DAG.getNode(HexagonISD::PTRUE, dl, VecTy);
2996
2997 // For each i1 element in the resulting predicate register, put 1
2998 // shifted by the index of the element into a general-purpose register,
2999 // then or them together and transfer it back into a predicate register.
3000 SDValue Rs[8];
3001 SDValue Z = getZero(dl, MVT::i32, DAG);
3002 // Always produce 8 bits, repeat inputs if necessary.
3003 unsigned Rep = 8 / VecTy.getVectorNumElements();
3004 for (unsigned i = 0; i != 8; ++i) {
3005 SDValue S = DAG.getConstant(1ull << i, dl, MVT::i32);
3006 Rs[i] = DAG.getSelect(dl, MVT::i32, Ops[i/Rep], S, Z);
3007 }
3008 for (ArrayRef<SDValue> A(Rs); A.size() != 1; A = A.drop_back(A.size()/2)) {
3009 for (unsigned i = 0, e = A.size()/2; i != e; ++i)
3010 Rs[i] = DAG.getNode(ISD::OR, dl, MVT::i32, Rs[2*i], Rs[2*i+1]);
3011 }
3012 // Move the value directly to a predicate register.
3013 return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Rs[0]}, DAG);
3014 }
3015
3016 return SDValue();
3017 }
3018
3019 SDValue
LowerCONCAT_VECTORS(SDValue Op,SelectionDAG & DAG) const3020 HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
3021 SelectionDAG &DAG) const {
3022 MVT VecTy = ty(Op);
3023 const SDLoc &dl(Op);
3024 if (VecTy.getSizeInBits() == 64) {
3025 assert(Op.getNumOperands() == 2);
3026 return getCombine(Op.getOperand(1), Op.getOperand(0), dl, VecTy, DAG);
3027 }
3028
3029 MVT ElemTy = VecTy.getVectorElementType();
3030 if (ElemTy == MVT::i1) {
3031 assert(VecTy == MVT::v2i1 || VecTy == MVT::v4i1 || VecTy == MVT::v8i1);
3032 MVT OpTy = ty(Op.getOperand(0));
3033 // Scale is how many times the operands need to be contracted to match
3034 // the representation in the target register.
3035 unsigned Scale = VecTy.getVectorNumElements() / OpTy.getVectorNumElements();
3036 assert(Scale == Op.getNumOperands() && Scale > 1);
3037
3038 // First, convert all bool vectors to integers, then generate pairwise
3039 // inserts to form values of doubled length. Up until there are only
3040 // two values left to concatenate, all of these values will fit in a
3041 // 32-bit integer, so keep them as i32 to use 32-bit inserts.
3042 SmallVector<SDValue,4> Words[2];
3043 unsigned IdxW = 0;
3044
3045 for (SDValue P : Op.getNode()->op_values()) {
3046 SDValue W = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, P);
3047 for (unsigned R = Scale; R > 1; R /= 2) {
3048 W = contractPredicate(W, dl, DAG);
3049 W = getCombine(DAG.getUNDEF(MVT::i32), W, dl, MVT::i64, DAG);
3050 }
3051 W = LoHalf(W, DAG);
3052 Words[IdxW].push_back(W);
3053 }
3054
3055 while (Scale > 2) {
3056 SDValue WidthV = DAG.getConstant(64 / Scale, dl, MVT::i32);
3057 Words[IdxW ^ 1].clear();
3058
3059 for (unsigned i = 0, e = Words[IdxW].size(); i != e; i += 2) {
3060 SDValue W0 = Words[IdxW][i], W1 = Words[IdxW][i+1];
3061 // Insert W1 into W0 right next to the significant bits of W0.
3062 SDValue T = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
3063 {W0, W1, WidthV, WidthV});
3064 Words[IdxW ^ 1].push_back(T);
3065 }
3066 IdxW ^= 1;
3067 Scale /= 2;
3068 }
3069
3070 // At this point there should only be two words left, and Scale should be 2.
3071 assert(Scale == 2 && Words[IdxW].size() == 2);
3072
3073 SDValue WW = getCombine(Words[IdxW][1], Words[IdxW][0], dl, MVT::i64, DAG);
3074 return DAG.getNode(HexagonISD::D2P, dl, VecTy, WW);
3075 }
3076
3077 return SDValue();
3078 }
3079
3080 SDValue
LowerEXTRACT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const3081 HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
3082 SelectionDAG &DAG) const {
3083 SDValue Vec = Op.getOperand(0);
3084 MVT ElemTy = ty(Vec).getVectorElementType();
3085 return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ElemTy, ty(Op), DAG);
3086 }
3087
3088 SDValue
LowerEXTRACT_SUBVECTOR(SDValue Op,SelectionDAG & DAG) const3089 HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
3090 SelectionDAG &DAG) const {
3091 return extractVector(Op.getOperand(0), Op.getOperand(1), SDLoc(Op),
3092 ty(Op), ty(Op), DAG);
3093 }
3094
3095 SDValue
LowerINSERT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const3096 HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
3097 SelectionDAG &DAG) const {
3098 return insertVector(Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
3099 SDLoc(Op), ty(Op).getVectorElementType(), DAG);
3100 }
3101
3102 SDValue
LowerINSERT_SUBVECTOR(SDValue Op,SelectionDAG & DAG) const3103 HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
3104 SelectionDAG &DAG) const {
3105 SDValue ValV = Op.getOperand(1);
3106 return insertVector(Op.getOperand(0), ValV, Op.getOperand(2),
3107 SDLoc(Op), ty(ValV), DAG);
3108 }
3109
3110 bool
allowTruncateForTailCall(Type * Ty1,Type * Ty2) const3111 HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
3112 // Assuming the caller does not have either a signext or zeroext modifier, and
3113 // only one value is accepted, any reasonable truncation is allowed.
3114 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
3115 return false;
3116
3117 // FIXME: in principle up to 64-bit could be made safe, but it would be very
3118 // fragile at the moment: any support for multiple value returns would be
3119 // liable to disallow tail calls involving i64 -> iN truncation in many cases.
3120 return Ty1->getPrimitiveSizeInBits() <= 32;
3121 }
3122
3123 SDValue
LowerLoad(SDValue Op,SelectionDAG & DAG) const3124 HexagonTargetLowering::LowerLoad(SDValue Op, SelectionDAG &DAG) const {
3125 MVT Ty = ty(Op);
3126 const SDLoc &dl(Op);
3127 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
3128 MVT MemTy = LN->getMemoryVT().getSimpleVT();
3129 ISD::LoadExtType ET = LN->getExtensionType();
3130
3131 bool LoadPred = MemTy == MVT::v2i1 || MemTy == MVT::v4i1 || MemTy == MVT::v8i1;
3132 if (LoadPred) {
3133 SDValue NL = DAG.getLoad(
3134 LN->getAddressingMode(), ISD::ZEXTLOAD, MVT::i32, dl, LN->getChain(),
3135 LN->getBasePtr(), LN->getOffset(), LN->getPointerInfo(),
3136 /*MemoryVT*/ MVT::i8, LN->getAlign(), LN->getMemOperand()->getFlags(),
3137 LN->getAAInfo(), LN->getRanges());
3138 LN = cast<LoadSDNode>(NL.getNode());
3139 }
3140
3141 Align ClaimAlign = LN->getAlign();
3142 if (!validateConstPtrAlignment(LN->getBasePtr(), ClaimAlign, dl, DAG))
3143 return replaceMemWithUndef(Op, DAG);
3144
3145 // Call LowerUnalignedLoad for all loads, it recognizes loads that
3146 // don't need extra aligning.
3147 SDValue LU = LowerUnalignedLoad(SDValue(LN, 0), DAG);
3148 if (LoadPred) {
3149 SDValue TP = getInstr(Hexagon::C2_tfrrp, dl, MemTy, {LU}, DAG);
3150 if (ET == ISD::SEXTLOAD) {
3151 TP = DAG.getSExtOrTrunc(TP, dl, Ty);
3152 } else if (ET != ISD::NON_EXTLOAD) {
3153 TP = DAG.getZExtOrTrunc(TP, dl, Ty);
3154 }
3155 SDValue Ch = cast<LoadSDNode>(LU.getNode())->getChain();
3156 return DAG.getMergeValues({TP, Ch}, dl);
3157 }
3158 return LU;
3159 }
3160
3161 SDValue
LowerStore(SDValue Op,SelectionDAG & DAG) const3162 HexagonTargetLowering::LowerStore(SDValue Op, SelectionDAG &DAG) const {
3163 const SDLoc &dl(Op);
3164 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
3165 SDValue Val = SN->getValue();
3166 MVT Ty = ty(Val);
3167
3168 if (Ty == MVT::v2i1 || Ty == MVT::v4i1 || Ty == MVT::v8i1) {
3169 // Store the exact predicate (all bits).
3170 SDValue TR = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {Val}, DAG);
3171 SDValue NS = DAG.getTruncStore(SN->getChain(), dl, TR, SN->getBasePtr(),
3172 MVT::i8, SN->getMemOperand());
3173 if (SN->isIndexed()) {
3174 NS = DAG.getIndexedStore(NS, dl, SN->getBasePtr(), SN->getOffset(),
3175 SN->getAddressingMode());
3176 }
3177 SN = cast<StoreSDNode>(NS.getNode());
3178 }
3179
3180 Align ClaimAlign = SN->getAlign();
3181 if (!validateConstPtrAlignment(SN->getBasePtr(), ClaimAlign, dl, DAG))
3182 return replaceMemWithUndef(Op, DAG);
3183
3184 MVT StoreTy = SN->getMemoryVT().getSimpleVT();
3185 Align NeedAlign = Subtarget.getTypeAlignment(StoreTy);
3186 if (ClaimAlign < NeedAlign)
3187 return expandUnalignedStore(SN, DAG);
3188 return SDValue(SN, 0);
3189 }
3190
3191 SDValue
LowerUnalignedLoad(SDValue Op,SelectionDAG & DAG) const3192 HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
3193 const {
3194 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
3195 MVT LoadTy = ty(Op);
3196 unsigned NeedAlign = Subtarget.getTypeAlignment(LoadTy).value();
3197 unsigned HaveAlign = LN->getAlign().value();
3198 if (HaveAlign >= NeedAlign)
3199 return Op;
3200
3201 const SDLoc &dl(Op);
3202 const DataLayout &DL = DAG.getDataLayout();
3203 LLVMContext &Ctx = *DAG.getContext();
3204
3205 // If the load aligning is disabled or the load can be broken up into two
3206 // smaller legal loads, do the default (target-independent) expansion.
3207 bool DoDefault = false;
3208 // Handle it in the default way if this is an indexed load.
3209 if (!LN->isUnindexed())
3210 DoDefault = true;
3211
3212 if (!AlignLoads) {
3213 if (allowsMemoryAccessForAlignment(Ctx, DL, LN->getMemoryVT(),
3214 *LN->getMemOperand()))
3215 return Op;
3216 DoDefault = true;
3217 }
3218 if (!DoDefault && (2 * HaveAlign) == NeedAlign) {
3219 // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
3220 MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8 * HaveAlign)
3221 : MVT::getVectorVT(MVT::i8, HaveAlign);
3222 DoDefault =
3223 allowsMemoryAccessForAlignment(Ctx, DL, PartTy, *LN->getMemOperand());
3224 }
3225 if (DoDefault) {
3226 std::pair<SDValue, SDValue> P = expandUnalignedLoad(LN, DAG);
3227 return DAG.getMergeValues({P.first, P.second}, dl);
3228 }
3229
3230 // The code below generates two loads, both aligned as NeedAlign, and
3231 // with the distance of NeedAlign between them. For that to cover the
3232 // bits that need to be loaded (and without overlapping), the size of
3233 // the loads should be equal to NeedAlign. This is true for all loadable
3234 // types, but add an assertion in case something changes in the future.
3235 assert(LoadTy.getSizeInBits() == 8*NeedAlign);
3236
3237 unsigned LoadLen = NeedAlign;
3238 SDValue Base = LN->getBasePtr();
3239 SDValue Chain = LN->getChain();
3240 auto BO = getBaseAndOffset(Base);
3241 unsigned BaseOpc = BO.first.getOpcode();
3242 if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % LoadLen == 0)
3243 return Op;
3244
3245 if (BO.second % LoadLen != 0) {
3246 BO.first = DAG.getNode(ISD::ADD, dl, MVT::i32, BO.first,
3247 DAG.getConstant(BO.second % LoadLen, dl, MVT::i32));
3248 BO.second -= BO.second % LoadLen;
3249 }
3250 SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR)
3251 ? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first,
3252 DAG.getConstant(NeedAlign, dl, MVT::i32))
3253 : BO.first;
3254 SDValue Base0 =
3255 DAG.getMemBasePlusOffset(BaseNoOff, TypeSize::getFixed(BO.second), dl);
3256 SDValue Base1 = DAG.getMemBasePlusOffset(
3257 BaseNoOff, TypeSize::getFixed(BO.second + LoadLen), dl);
3258
3259 MachineMemOperand *WideMMO = nullptr;
3260 if (MachineMemOperand *MMO = LN->getMemOperand()) {
3261 MachineFunction &MF = DAG.getMachineFunction();
3262 WideMMO = MF.getMachineMemOperand(
3263 MMO->getPointerInfo(), MMO->getFlags(), 2 * LoadLen, Align(LoadLen),
3264 MMO->getAAInfo(), MMO->getRanges(), MMO->getSyncScopeID(),
3265 MMO->getSuccessOrdering(), MMO->getFailureOrdering());
3266 }
3267
3268 SDValue Load0 = DAG.getLoad(LoadTy, dl, Chain, Base0, WideMMO);
3269 SDValue Load1 = DAG.getLoad(LoadTy, dl, Chain, Base1, WideMMO);
3270
3271 SDValue Aligned = DAG.getNode(HexagonISD::VALIGN, dl, LoadTy,
3272 {Load1, Load0, BaseNoOff.getOperand(0)});
3273 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3274 Load0.getValue(1), Load1.getValue(1));
3275 SDValue M = DAG.getMergeValues({Aligned, NewChain}, dl);
3276 return M;
3277 }
3278
3279 SDValue
LowerUAddSubO(SDValue Op,SelectionDAG & DAG) const3280 HexagonTargetLowering::LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const {
3281 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
3282 auto *CY = dyn_cast<ConstantSDNode>(Y);
3283 if (!CY)
3284 return SDValue();
3285
3286 const SDLoc &dl(Op);
3287 SDVTList VTs = Op.getNode()->getVTList();
3288 assert(VTs.NumVTs == 2);
3289 assert(VTs.VTs[1] == MVT::i1);
3290 unsigned Opc = Op.getOpcode();
3291
3292 if (CY) {
3293 uint64_t VY = CY->getZExtValue();
3294 assert(VY != 0 && "This should have been folded");
3295 // X +/- 1
3296 if (VY != 1)
3297 return SDValue();
3298
3299 if (Opc == ISD::UADDO) {
3300 SDValue Op = DAG.getNode(ISD::ADD, dl, VTs.VTs[0], {X, Y});
3301 SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op, getZero(dl, ty(Op), DAG),
3302 ISD::SETEQ);
3303 return DAG.getMergeValues({Op, Ov}, dl);
3304 }
3305 if (Opc == ISD::USUBO) {
3306 SDValue Op = DAG.getNode(ISD::SUB, dl, VTs.VTs[0], {X, Y});
3307 SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op,
3308 DAG.getAllOnesConstant(dl, ty(Op)), ISD::SETEQ);
3309 return DAG.getMergeValues({Op, Ov}, dl);
3310 }
3311 }
3312
3313 return SDValue();
3314 }
3315
LowerUAddSubOCarry(SDValue Op,SelectionDAG & DAG) const3316 SDValue HexagonTargetLowering::LowerUAddSubOCarry(SDValue Op,
3317 SelectionDAG &DAG) const {
3318 const SDLoc &dl(Op);
3319 unsigned Opc = Op.getOpcode();
3320 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), C = Op.getOperand(2);
3321
3322 if (Opc == ISD::UADDO_CARRY)
3323 return DAG.getNode(HexagonISD::ADDC, dl, Op.getNode()->getVTList(),
3324 { X, Y, C });
3325
3326 EVT CarryTy = C.getValueType();
3327 SDValue SubC = DAG.getNode(HexagonISD::SUBC, dl, Op.getNode()->getVTList(),
3328 { X, Y, DAG.getLogicalNOT(dl, C, CarryTy) });
3329 SDValue Out[] = { SubC.getValue(0),
3330 DAG.getLogicalNOT(dl, SubC.getValue(1), CarryTy) };
3331 return DAG.getMergeValues(Out, dl);
3332 }
3333
3334 SDValue
LowerEH_RETURN(SDValue Op,SelectionDAG & DAG) const3335 HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
3336 SDValue Chain = Op.getOperand(0);
3337 SDValue Offset = Op.getOperand(1);
3338 SDValue Handler = Op.getOperand(2);
3339 SDLoc dl(Op);
3340 auto PtrVT = getPointerTy(DAG.getDataLayout());
3341
3342 // Mark function as containing a call to EH_RETURN.
3343 HexagonMachineFunctionInfo *FuncInfo =
3344 DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>();
3345 FuncInfo->setHasEHReturn();
3346
3347 unsigned OffsetReg = Hexagon::R28;
3348
3349 SDValue StoreAddr =
3350 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT),
3351 DAG.getIntPtrConstant(4, dl));
3352 Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo());
3353 Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
3354
3355 // Not needed we already use it as explicit input to EH_RETURN.
3356 // MF.getRegInfo().addLiveOut(OffsetReg);
3357
3358 return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain);
3359 }
3360
3361 SDValue
LowerOperation(SDValue Op,SelectionDAG & DAG) const3362 HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3363 unsigned Opc = Op.getOpcode();
3364
3365 // Handle INLINEASM first.
3366 if (Opc == ISD::INLINEASM || Opc == ISD::INLINEASM_BR)
3367 return LowerINLINEASM(Op, DAG);
3368
3369 if (isHvxOperation(Op.getNode(), DAG)) {
3370 // If HVX lowering returns nothing, try the default lowering.
3371 if (SDValue V = LowerHvxOperation(Op, DAG))
3372 return V;
3373 }
3374
3375 switch (Opc) {
3376 default:
3377 #ifndef NDEBUG
3378 Op.getNode()->dumpr(&DAG);
3379 if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END)
3380 errs() << "Error: check for a non-legal type in this operation\n";
3381 #endif
3382 llvm_unreachable("Should not custom lower this!");
3383
3384 case ISD::FDIV:
3385 return LowerFDIV(Op, DAG);
3386 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
3387 case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, DAG);
3388 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
3389 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
3390 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3391 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
3392 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
3393 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
3394 case ISD::LOAD: return LowerLoad(Op, DAG);
3395 case ISD::STORE: return LowerStore(Op, DAG);
3396 case ISD::UADDO:
3397 case ISD::USUBO: return LowerUAddSubO(Op, DAG);
3398 case ISD::UADDO_CARRY:
3399 case ISD::USUBO_CARRY: return LowerUAddSubOCarry(Op, DAG);
3400 case ISD::SRA:
3401 case ISD::SHL:
3402 case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG);
3403 case ISD::ROTL: return LowerROTL(Op, DAG);
3404 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
3405 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
3406 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
3407 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
3408 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
3409 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
3410 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
3411 case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
3412 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
3413 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
3414 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
3415 case ISD::VASTART: return LowerVASTART(Op, DAG);
3416 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
3417 case ISD::SETCC: return LowerSETCC(Op, DAG);
3418 case ISD::VSELECT: return LowerVSELECT(Op, DAG);
3419 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3420 case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
3421 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG);
3422 case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
3423 case ISD::READSTEADYCOUNTER: return LowerREADSTEADYCOUNTER(Op, DAG);
3424 break;
3425 }
3426
3427 return SDValue();
3428 }
3429
3430 void
LowerOperationWrapper(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const3431 HexagonTargetLowering::LowerOperationWrapper(SDNode *N,
3432 SmallVectorImpl<SDValue> &Results,
3433 SelectionDAG &DAG) const {
3434 if (isHvxOperation(N, DAG)) {
3435 LowerHvxOperationWrapper(N, Results, DAG);
3436 if (!Results.empty())
3437 return;
3438 }
3439
3440 SDValue Op(N, 0);
3441 unsigned Opc = N->getOpcode();
3442
3443 switch (Opc) {
3444 case HexagonISD::SSAT:
3445 case HexagonISD::USAT:
3446 Results.push_back(opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG));
3447 break;
3448 case ISD::STORE:
3449 // We are only custom-lowering stores to verify the alignment of the
3450 // address if it is a compile-time constant. Since a store can be
3451 // modified during type-legalization (the value being stored may need
3452 // legalization), return empty Results here to indicate that we don't
3453 // really make any changes in the custom lowering.
3454 return;
3455 default:
3456 TargetLowering::LowerOperationWrapper(N, Results, DAG);
3457 break;
3458 }
3459 }
3460
3461 void
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const3462 HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
3463 SmallVectorImpl<SDValue> &Results,
3464 SelectionDAG &DAG) const {
3465 if (isHvxOperation(N, DAG)) {
3466 ReplaceHvxNodeResults(N, Results, DAG);
3467 if (!Results.empty())
3468 return;
3469 }
3470
3471 const SDLoc &dl(N);
3472 switch (N->getOpcode()) {
3473 case ISD::SRL:
3474 case ISD::SRA:
3475 case ISD::SHL:
3476 return;
3477 case ISD::BITCAST:
3478 // Handle a bitcast from v8i1 to i8.
3479 if (N->getValueType(0) == MVT::i8) {
3480 if (N->getOperand(0).getValueType() == MVT::v8i1) {
3481 SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32,
3482 N->getOperand(0), DAG);
3483 SDValue T = DAG.getAnyExtOrTrunc(P, dl, MVT::i8);
3484 Results.push_back(T);
3485 }
3486 }
3487 break;
3488 }
3489 }
3490
3491 SDValue
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const3492 HexagonTargetLowering::PerformDAGCombine(SDNode *N,
3493 DAGCombinerInfo &DCI) const {
3494 if (isHvxOperation(N, DCI.DAG)) {
3495 if (SDValue V = PerformHvxDAGCombine(N, DCI))
3496 return V;
3497 return SDValue();
3498 }
3499
3500 SDValue Op(N, 0);
3501 const SDLoc &dl(Op);
3502 unsigned Opc = Op.getOpcode();
3503
3504 if (Opc == ISD::TRUNCATE) {
3505 SDValue Op0 = Op.getOperand(0);
3506 // fold (truncate (build pair x, y)) -> (truncate x) or x
3507 if (Op0.getOpcode() == ISD::BUILD_PAIR) {
3508 EVT TruncTy = Op.getValueType();
3509 SDValue Elem0 = Op0.getOperand(0);
3510 // if we match the low element of the pair, just return it.
3511 if (Elem0.getValueType() == TruncTy)
3512 return Elem0;
3513 // otherwise, if the low part is still too large, apply the truncate.
3514 if (Elem0.getValueType().bitsGT(TruncTy))
3515 return DCI.DAG.getNode(ISD::TRUNCATE, dl, TruncTy, Elem0);
3516 }
3517 }
3518
3519 if (DCI.isBeforeLegalizeOps())
3520 return SDValue();
3521
3522 if (Opc == HexagonISD::P2D) {
3523 SDValue P = Op.getOperand(0);
3524 switch (P.getOpcode()) {
3525 case HexagonISD::PTRUE:
3526 return DCI.DAG.getAllOnesConstant(dl, ty(Op));
3527 case HexagonISD::PFALSE:
3528 return getZero(dl, ty(Op), DCI.DAG);
3529 default:
3530 break;
3531 }
3532 } else if (Opc == ISD::VSELECT) {
3533 // This is pretty much duplicated in HexagonISelLoweringHVX...
3534 //
3535 // (vselect (xor x, ptrue), v0, v1) -> (vselect x, v1, v0)
3536 SDValue Cond = Op.getOperand(0);
3537 if (Cond->getOpcode() == ISD::XOR) {
3538 SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
3539 if (C1->getOpcode() == HexagonISD::PTRUE) {
3540 SDValue VSel = DCI.DAG.getNode(ISD::VSELECT, dl, ty(Op), C0,
3541 Op.getOperand(2), Op.getOperand(1));
3542 return VSel;
3543 }
3544 }
3545 } else if (Opc == ISD::TRUNCATE) {
3546 SDValue Op0 = Op.getOperand(0);
3547 // fold (truncate (build pair x, y)) -> (truncate x) or x
3548 if (Op0.getOpcode() == ISD::BUILD_PAIR) {
3549 MVT TruncTy = ty(Op);
3550 SDValue Elem0 = Op0.getOperand(0);
3551 // if we match the low element of the pair, just return it.
3552 if (ty(Elem0) == TruncTy)
3553 return Elem0;
3554 // otherwise, if the low part is still too large, apply the truncate.
3555 if (ty(Elem0).bitsGT(TruncTy))
3556 return DCI.DAG.getNode(ISD::TRUNCATE, dl, TruncTy, Elem0);
3557 }
3558 } else if (Opc == ISD::OR) {
3559 // fold (or (shl xx, s), (zext y)) -> (COMBINE (shl xx, s-32), y)
3560 // if s >= 32
3561 auto fold0 = [&, this](SDValue Op) {
3562 if (ty(Op) != MVT::i64)
3563 return SDValue();
3564 SDValue Shl = Op.getOperand(0);
3565 SDValue Zxt = Op.getOperand(1);
3566 if (Shl.getOpcode() != ISD::SHL)
3567 std::swap(Shl, Zxt);
3568
3569 if (Shl.getOpcode() != ISD::SHL || Zxt.getOpcode() != ISD::ZERO_EXTEND)
3570 return SDValue();
3571
3572 SDValue Z = Zxt.getOperand(0);
3573 auto *Amt = dyn_cast<ConstantSDNode>(Shl.getOperand(1));
3574 if (Amt && Amt->getZExtValue() >= 32 && ty(Z).getSizeInBits() <= 32) {
3575 unsigned A = Amt->getZExtValue();
3576 SDValue S = Shl.getOperand(0);
3577 SDValue T0 = DCI.DAG.getNode(ISD::SHL, dl, ty(S), S,
3578 DCI.DAG.getConstant(A - 32, dl, MVT::i32));
3579 SDValue T1 = DCI.DAG.getZExtOrTrunc(T0, dl, MVT::i32);
3580 SDValue T2 = DCI.DAG.getZExtOrTrunc(Z, dl, MVT::i32);
3581 return DCI.DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, {T1, T2});
3582 }
3583 return SDValue();
3584 };
3585
3586 if (SDValue R = fold0(Op))
3587 return R;
3588 }
3589
3590 return SDValue();
3591 }
3592
3593 /// Returns relocation base for the given PIC jumptable.
3594 SDValue
getPICJumpTableRelocBase(SDValue Table,SelectionDAG & DAG) const3595 HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3596 SelectionDAG &DAG) const {
3597 int Idx = cast<JumpTableSDNode>(Table)->getIndex();
3598 EVT VT = Table.getValueType();
3599 SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
3600 return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T);
3601 }
3602
3603 //===----------------------------------------------------------------------===//
3604 // Inline Assembly Support
3605 //===----------------------------------------------------------------------===//
3606
3607 TargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const3608 HexagonTargetLowering::getConstraintType(StringRef Constraint) const {
3609 if (Constraint.size() == 1) {
3610 switch (Constraint[0]) {
3611 case 'q':
3612 case 'v':
3613 if (Subtarget.useHVXOps())
3614 return C_RegisterClass;
3615 break;
3616 case 'a':
3617 return C_RegisterClass;
3618 default:
3619 break;
3620 }
3621 }
3622 return TargetLowering::getConstraintType(Constraint);
3623 }
3624
3625 std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const TargetRegisterInfo * TRI,StringRef Constraint,MVT VT) const3626 HexagonTargetLowering::getRegForInlineAsmConstraint(
3627 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
3628
3629 if (Constraint.size() == 1) {
3630 switch (Constraint[0]) {
3631 case 'r': // R0-R31
3632 switch (VT.SimpleTy) {
3633 default:
3634 return {0u, nullptr};
3635 case MVT::i1:
3636 case MVT::i8:
3637 case MVT::i16:
3638 case MVT::i32:
3639 case MVT::f32:
3640 return {0u, &Hexagon::IntRegsRegClass};
3641 case MVT::i64:
3642 case MVT::f64:
3643 return {0u, &Hexagon::DoubleRegsRegClass};
3644 }
3645 break;
3646 case 'a': // M0-M1
3647 if (VT != MVT::i32)
3648 return {0u, nullptr};
3649 return {0u, &Hexagon::ModRegsRegClass};
3650 case 'q': // q0-q3
3651 switch (VT.getSizeInBits()) {
3652 default:
3653 return {0u, nullptr};
3654 case 64:
3655 case 128:
3656 return {0u, &Hexagon::HvxQRRegClass};
3657 }
3658 break;
3659 case 'v': // V0-V31
3660 switch (VT.getSizeInBits()) {
3661 default:
3662 return {0u, nullptr};
3663 case 512:
3664 return {0u, &Hexagon::HvxVRRegClass};
3665 case 1024:
3666 if (Subtarget.hasV60Ops() && Subtarget.useHVX128BOps())
3667 return {0u, &Hexagon::HvxVRRegClass};
3668 return {0u, &Hexagon::HvxWRRegClass};
3669 case 2048:
3670 return {0u, &Hexagon::HvxWRRegClass};
3671 }
3672 break;
3673 default:
3674 return {0u, nullptr};
3675 }
3676 }
3677
3678 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3679 }
3680
3681 /// isFPImmLegal - Returns true if the target can instruction select the
3682 /// specified FP immediate natively. If false, the legalizer will
3683 /// materialize the FP immediate as a load from a constant pool.
isFPImmLegal(const APFloat & Imm,EVT VT,bool ForCodeSize) const3684 bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
3685 bool ForCodeSize) const {
3686 return true;
3687 }
3688
3689 /// Returns true if it is beneficial to convert a load of a constant
3690 /// to just the constant itself.
shouldConvertConstantLoadToIntImm(const APInt & Imm,Type * Ty) const3691 bool HexagonTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
3692 Type *Ty) const {
3693 if (!ConstantLoadsToImm)
3694 return false;
3695
3696 assert(Ty->isIntegerTy());
3697 unsigned BitSize = Ty->getPrimitiveSizeInBits();
3698 return (BitSize > 0 && BitSize <= 64);
3699 }
3700
3701 /// isLegalAddressingMode - Return true if the addressing mode represented by
3702 /// AM is legal for this target, for a load/store of the specified type.
isLegalAddressingMode(const DataLayout & DL,const AddrMode & AM,Type * Ty,unsigned AS,Instruction * I) const3703 bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
3704 const AddrMode &AM, Type *Ty,
3705 unsigned AS, Instruction *I) const {
3706 if (Ty->isSized()) {
3707 // When LSR detects uses of the same base address to access different
3708 // types (e.g. unions), it will assume a conservative type for these
3709 // uses:
3710 // LSR Use: Kind=Address of void in addrspace(4294967295), ...
3711 // The type Ty passed here would then be "void". Skip the alignment
3712 // checks, but do not return false right away, since that confuses
3713 // LSR into crashing.
3714 Align A = DL.getABITypeAlign(Ty);
3715 // The base offset must be a multiple of the alignment.
3716 if (!isAligned(A, AM.BaseOffs))
3717 return false;
3718 // The shifted offset must fit in 11 bits.
3719 if (!isInt<11>(AM.BaseOffs >> Log2(A)))
3720 return false;
3721 }
3722
3723 // No global is ever allowed as a base.
3724 if (AM.BaseGV)
3725 return false;
3726
3727 int Scale = AM.Scale;
3728 if (Scale < 0)
3729 Scale = -Scale;
3730 switch (Scale) {
3731 case 0: // No scale reg, "r+i", "r", or just "i".
3732 break;
3733 default: // No scaled addressing mode.
3734 return false;
3735 }
3736 return true;
3737 }
3738
3739 /// Return true if folding a constant offset with the given GlobalAddress is
3740 /// legal. It is frequently not legal in PIC relocation models.
isOffsetFoldingLegal(const GlobalAddressSDNode * GA) const3741 bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA)
3742 const {
3743 return HTM.getRelocationModel() == Reloc::Static;
3744 }
3745
3746 /// isLegalICmpImmediate - Return true if the specified immediate is legal
3747 /// icmp immediate, that is the target has icmp instructions which can compare
3748 /// a register against the immediate without having to materialize the
3749 /// immediate into a register.
isLegalICmpImmediate(int64_t Imm) const3750 bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
3751 return Imm >= -512 && Imm <= 511;
3752 }
3753
3754 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
3755 /// for tail call optimization. Targets which want to do tail call
3756 /// optimization should implement this function.
IsEligibleForTailCallOptimization(SDValue Callee,CallingConv::ID CalleeCC,bool IsVarArg,bool IsCalleeStructRet,bool IsCallerStructRet,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SmallVectorImpl<ISD::InputArg> & Ins,SelectionDAG & DAG) const3757 bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
3758 SDValue Callee,
3759 CallingConv::ID CalleeCC,
3760 bool IsVarArg,
3761 bool IsCalleeStructRet,
3762 bool IsCallerStructRet,
3763 const SmallVectorImpl<ISD::OutputArg> &Outs,
3764 const SmallVectorImpl<SDValue> &OutVals,
3765 const SmallVectorImpl<ISD::InputArg> &Ins,
3766 SelectionDAG& DAG) const {
3767 const Function &CallerF = DAG.getMachineFunction().getFunction();
3768 CallingConv::ID CallerCC = CallerF.getCallingConv();
3769 bool CCMatch = CallerCC == CalleeCC;
3770
3771 // ***************************************************************************
3772 // Look for obvious safe cases to perform tail call optimization that do not
3773 // require ABI changes.
3774 // ***************************************************************************
3775
3776 // If this is a tail call via a function pointer, then don't do it!
3777 if (!isa<GlobalAddressSDNode>(Callee) &&
3778 !isa<ExternalSymbolSDNode>(Callee)) {
3779 return false;
3780 }
3781
3782 // Do not optimize if the calling conventions do not match and the conventions
3783 // used are not C or Fast.
3784 if (!CCMatch) {
3785 bool R = (CallerCC == CallingConv::C || CallerCC == CallingConv::Fast);
3786 bool E = (CalleeCC == CallingConv::C || CalleeCC == CallingConv::Fast);
3787 // If R & E, then ok.
3788 if (!R || !E)
3789 return false;
3790 }
3791
3792 // Do not tail call optimize vararg calls.
3793 if (IsVarArg)
3794 return false;
3795
3796 // Also avoid tail call optimization if either caller or callee uses struct
3797 // return semantics.
3798 if (IsCalleeStructRet || IsCallerStructRet)
3799 return false;
3800
3801 // In addition to the cases above, we also disable Tail Call Optimization if
3802 // the calling convention code that at least one outgoing argument needs to
3803 // go on the stack. We cannot check that here because at this point that
3804 // information is not available.
3805 return true;
3806 }
3807
3808 /// Returns the target specific optimal type for load and store operations as
3809 /// a result of memset, memcpy, and memmove lowering.
3810 ///
3811 /// If DstAlign is zero that means it's safe to destination alignment can
3812 /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
3813 /// a need to check it against alignment requirement, probably because the
3814 /// source does not need to be loaded. If 'IsMemset' is true, that means it's
3815 /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
3816 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
3817 /// does not need to be loaded. It returns EVT::Other if the type should be
3818 /// determined using generic target-independent logic.
getOptimalMemOpType(LLVMContext & Context,const MemOp & Op,const AttributeList & FuncAttributes) const3819 EVT HexagonTargetLowering::getOptimalMemOpType(
3820 LLVMContext &Context, const MemOp &Op,
3821 const AttributeList &FuncAttributes) const {
3822 if (Op.size() >= 8 && Op.isAligned(Align(8)))
3823 return MVT::i64;
3824 if (Op.size() >= 4 && Op.isAligned(Align(4)))
3825 return MVT::i32;
3826 if (Op.size() >= 2 && Op.isAligned(Align(2)))
3827 return MVT::i16;
3828 return MVT::Other;
3829 }
3830
allowsMemoryAccess(LLVMContext & Context,const DataLayout & DL,EVT VT,unsigned AddrSpace,Align Alignment,MachineMemOperand::Flags Flags,unsigned * Fast) const3831 bool HexagonTargetLowering::allowsMemoryAccess(
3832 LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
3833 Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const {
3834 if (!VT.isSimple())
3835 return false;
3836 MVT SVT = VT.getSimpleVT();
3837 if (Subtarget.isHVXVectorType(SVT, true))
3838 return allowsHvxMemoryAccess(SVT, Flags, Fast);
3839 return TargetLoweringBase::allowsMemoryAccess(
3840 Context, DL, VT, AddrSpace, Alignment, Flags, Fast);
3841 }
3842
allowsMisalignedMemoryAccesses(EVT VT,unsigned AddrSpace,Align Alignment,MachineMemOperand::Flags Flags,unsigned * Fast) const3843 bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
3844 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
3845 unsigned *Fast) const {
3846 if (!VT.isSimple())
3847 return false;
3848 MVT SVT = VT.getSimpleVT();
3849 if (Subtarget.isHVXVectorType(SVT, true))
3850 return allowsHvxMisalignedMemoryAccesses(SVT, Flags, Fast);
3851 if (Fast)
3852 *Fast = 0;
3853 return false;
3854 }
3855
3856 std::pair<const TargetRegisterClass*, uint8_t>
findRepresentativeClass(const TargetRegisterInfo * TRI,MVT VT) const3857 HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
3858 MVT VT) const {
3859 if (Subtarget.isHVXVectorType(VT, true)) {
3860 unsigned BitWidth = VT.getSizeInBits();
3861 unsigned VecWidth = Subtarget.getVectorLength() * 8;
3862
3863 if (VT.getVectorElementType() == MVT::i1)
3864 return std::make_pair(&Hexagon::HvxQRRegClass, 1);
3865 if (BitWidth == VecWidth)
3866 return std::make_pair(&Hexagon::HvxVRRegClass, 1);
3867 assert(BitWidth == 2 * VecWidth);
3868 return std::make_pair(&Hexagon::HvxWRRegClass, 1);
3869 }
3870
3871 return TargetLowering::findRepresentativeClass(TRI, VT);
3872 }
3873
shouldReduceLoadWidth(SDNode * Load,ISD::LoadExtType ExtTy,EVT NewVT,std::optional<unsigned> ByteOffset) const3874 bool HexagonTargetLowering::shouldReduceLoadWidth(
3875 SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
3876 std::optional<unsigned> ByteOffset) const {
3877 // TODO: This may be worth removing. Check regression tests for diffs.
3878 if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT,
3879 ByteOffset))
3880 return false;
3881
3882 auto *L = cast<LoadSDNode>(Load);
3883 std::pair<SDValue, int> BO = getBaseAndOffset(L->getBasePtr());
3884 // Small-data object, do not shrink.
3885 if (BO.first.getOpcode() == HexagonISD::CONST32_GP)
3886 return false;
3887 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(BO.first)) {
3888 auto &HTM = static_cast<const HexagonTargetMachine &>(getTargetMachine());
3889 const auto *GO = dyn_cast_or_null<const GlobalObject>(GA->getGlobal());
3890 return !GO || !HTM.getObjFileLowering()->isGlobalInSmallSection(GO, HTM);
3891 }
3892 return true;
3893 }
3894
AdjustInstrPostInstrSelection(MachineInstr & MI,SDNode * Node) const3895 void HexagonTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
3896 SDNode *Node) const {
3897 AdjustHvxInstrPostInstrSelection(MI, Node);
3898 }
3899
emitLoadLinked(IRBuilderBase & Builder,Type * ValueTy,Value * Addr,AtomicOrdering Ord) const3900 Value *HexagonTargetLowering::emitLoadLinked(IRBuilderBase &Builder,
3901 Type *ValueTy, Value *Addr,
3902 AtomicOrdering Ord) const {
3903 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
3904 assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported");
3905 Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
3906 : Intrinsic::hexagon_L4_loadd_locked;
3907
3908 Value *Call =
3909 Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
3910
3911 return Builder.CreateBitCast(Call, ValueTy);
3912 }
3913
3914 /// Perform a store-conditional operation to Addr. Return the status of the
3915 /// store. This should be 0 if the store succeeded, non-zero otherwise.
emitStoreConditional(IRBuilderBase & Builder,Value * Val,Value * Addr,AtomicOrdering Ord) const3916 Value *HexagonTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
3917 Value *Val, Value *Addr,
3918 AtomicOrdering Ord) const {
3919 BasicBlock *BB = Builder.GetInsertBlock();
3920 Module *M = BB->getParent()->getParent();
3921 Type *Ty = Val->getType();
3922 unsigned SZ = Ty->getPrimitiveSizeInBits();
3923
3924 Type *CastTy = Builder.getIntNTy(SZ);
3925 assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported");
3926 Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
3927 : Intrinsic::hexagon_S4_stored_locked;
3928
3929 Val = Builder.CreateBitCast(Val, CastTy);
3930
3931 Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
3932 /*FMFSource=*/nullptr, "stcx");
3933 Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), "");
3934 Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext()));
3935 return Ext;
3936 }
3937
3938 TargetLowering::AtomicExpansionKind
shouldExpandAtomicLoadInIR(LoadInst * LI) const3939 HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
3940 // Do not expand loads and stores that don't exceed 64 bits.
3941 return LI->getType()->getPrimitiveSizeInBits() > 64
3942 ? AtomicExpansionKind::LLOnly
3943 : AtomicExpansionKind::None;
3944 }
3945
3946 TargetLowering::AtomicExpansionKind
shouldExpandAtomicStoreInIR(StoreInst * SI) const3947 HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
3948 // Do not expand loads and stores that don't exceed 64 bits.
3949 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64
3950 ? AtomicExpansionKind::Expand
3951 : AtomicExpansionKind::None;
3952 }
3953
3954 TargetLowering::AtomicExpansionKind
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst * AI) const3955 HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
3956 AtomicCmpXchgInst *AI) const {
3957 return AtomicExpansionKind::LLSC;
3958 }
3959