xref: /freebsd/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interfaces that VE uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "VEISelLowering.h"
15 #include "MCTargetDesc/VEMCAsmInfo.h"
16 #include "VECustomDAG.h"
17 #include "VEInstrBuilder.h"
18 #include "VEMachineFunctionInfo.h"
19 #include "VERegisterInfo.h"
20 #include "VETargetMachine.h"
21 #include "llvm/ADT/StringSwitch.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineJumpTableInfo.h"
27 #include "llvm/CodeGen/MachineModuleInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/SelectionDAG.h"
30 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/Module.h"
35 #include "llvm/Support/ErrorHandling.h"
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "ve-lower"
39 
40 //===----------------------------------------------------------------------===//
41 // Calling Convention Implementation
42 //===----------------------------------------------------------------------===//
43 
44 #include "VEGenCallingConv.inc"
45 
getReturnCC(CallingConv::ID CallConv)46 CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
47   switch (CallConv) {
48   default:
49     return RetCC_VE_C;
50   case CallingConv::Fast:
51     return RetCC_VE_Fast;
52   }
53 }
54 
getParamCC(CallingConv::ID CallConv,bool IsVarArg)55 CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {
56   if (IsVarArg)
57     return CC_VE2;
58   switch (CallConv) {
59   default:
60     return CC_VE_C;
61   case CallingConv::Fast:
62     return CC_VE_Fast;
63   }
64 }
65 
CanLowerReturn(CallingConv::ID CallConv,MachineFunction & MF,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,LLVMContext & Context,const Type * RetTy) const66 bool VETargetLowering::CanLowerReturn(
67     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
68     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
69     const Type *RetTy) const {
70   CCAssignFn *RetCC = getReturnCC(CallConv);
71   SmallVector<CCValAssign, 16> RVLocs;
72   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
73   return CCInfo.CheckReturn(Outs, RetCC);
74 }
75 
76 static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
77                                    MVT::v256f32, MVT::v512f32, MVT::v256f64};
78 
79 static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};
80 
81 static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
82 
initRegisterClasses()83 void VETargetLowering::initRegisterClasses() {
84   // Set up the register classes.
85   addRegisterClass(MVT::i32, &VE::I32RegClass);
86   addRegisterClass(MVT::i64, &VE::I64RegClass);
87   addRegisterClass(MVT::f32, &VE::F32RegClass);
88   addRegisterClass(MVT::f64, &VE::I64RegClass);
89   addRegisterClass(MVT::f128, &VE::F128RegClass);
90 
91   if (Subtarget->enableVPU()) {
92     for (MVT VecVT : AllVectorVTs)
93       addRegisterClass(VecVT, &VE::V64RegClass);
94     addRegisterClass(MVT::v256i1, &VE::VMRegClass);
95     addRegisterClass(MVT::v512i1, &VE::VM512RegClass);
96   }
97 }
98 
initSPUActions()99 void VETargetLowering::initSPUActions() {
100   const auto &TM = getTargetMachine();
101   /// Load & Store {
102 
103   // VE doesn't have i1 sign extending load.
104   for (MVT VT : MVT::integer_valuetypes()) {
105     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
106     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
107     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
108     setTruncStoreAction(VT, MVT::i1, Expand);
109   }
110 
111   // VE doesn't have floating point extload/truncstore, so expand them.
112   for (MVT FPVT : MVT::fp_valuetypes()) {
113     for (MVT OtherFPVT : MVT::fp_valuetypes()) {
114       setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
115       setTruncStoreAction(FPVT, OtherFPVT, Expand);
116     }
117   }
118 
119   // VE doesn't have fp128 load/store, so expand them in custom lower.
120   setOperationAction(ISD::LOAD, MVT::f128, Custom);
121   setOperationAction(ISD::STORE, MVT::f128, Custom);
122 
123   /// } Load & Store
124 
125   // Custom legalize address nodes into LO/HI parts.
126   MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
127   setOperationAction(ISD::BlockAddress, PtrVT, Custom);
128   setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
129   setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
130   setOperationAction(ISD::ConstantPool, PtrVT, Custom);
131   setOperationAction(ISD::JumpTable, PtrVT, Custom);
132 
133   /// VAARG handling {
134   setOperationAction(ISD::VASTART, MVT::Other, Custom);
135   // VAARG needs to be lowered to access with 8 bytes alignment.
136   setOperationAction(ISD::VAARG, MVT::Other, Custom);
137   // Use the default implementation.
138   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
139   setOperationAction(ISD::VAEND, MVT::Other, Expand);
140   /// } VAARG handling
141 
142   /// Stack {
143   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
144   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
145 
146   // Use the default implementation.
147   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
148   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
149   /// } Stack
150 
151   /// Branch {
152 
153   // VE doesn't have BRCOND
154   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
155 
156   // BR_JT is not implemented yet.
157   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
158 
159   /// } Branch
160 
161   /// Int Ops {
162   for (MVT IntVT : {MVT::i32, MVT::i64}) {
163     // VE has no REM or DIVREM operations.
164     setOperationAction(ISD::UREM, IntVT, Expand);
165     setOperationAction(ISD::SREM, IntVT, Expand);
166     setOperationAction(ISD::SDIVREM, IntVT, Expand);
167     setOperationAction(ISD::UDIVREM, IntVT, Expand);
168 
169     // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
170     setOperationAction(ISD::SHL_PARTS, IntVT, Expand);
171     setOperationAction(ISD::SRA_PARTS, IntVT, Expand);
172     setOperationAction(ISD::SRL_PARTS, IntVT, Expand);
173 
174     // VE has no MULHU/S or U/SMUL_LOHI operations.
175     // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
176     setOperationAction(ISD::MULHU, IntVT, Expand);
177     setOperationAction(ISD::MULHS, IntVT, Expand);
178     setOperationAction(ISD::UMUL_LOHI, IntVT, Expand);
179     setOperationAction(ISD::SMUL_LOHI, IntVT, Expand);
180 
181     // VE has no CTTZ, ROTL, ROTR operations.
182     setOperationAction(ISD::CTTZ, IntVT, Expand);
183     setOperationAction(ISD::ROTL, IntVT, Expand);
184     setOperationAction(ISD::ROTR, IntVT, Expand);
185 
186     // VE has 64 bits instruction which works as i64 BSWAP operation.  This
187     // instruction works fine as i32 BSWAP operation with an additional
188     // parameter.  Use isel patterns to lower BSWAP.
189     setOperationAction(ISD::BSWAP, IntVT, Legal);
190 
191     // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
192     // operations.  Use isel patterns for i64, promote for i32.
193     LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
194     setOperationAction(ISD::BITREVERSE, IntVT, Act);
195     setOperationAction(ISD::CTLZ, IntVT, Act);
196     setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act);
197     setOperationAction(ISD::CTPOP, IntVT, Act);
198 
199     // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
200     // Use isel patterns for i64, promote for i32.
201     setOperationAction(ISD::AND, IntVT, Act);
202     setOperationAction(ISD::OR, IntVT, Act);
203     setOperationAction(ISD::XOR, IntVT, Act);
204 
205     // Legal smax and smin
206     setOperationAction(ISD::SMAX, IntVT, Legal);
207     setOperationAction(ISD::SMIN, IntVT, Legal);
208   }
209   /// } Int Ops
210 
211   /// Conversion {
212   // VE doesn't have instructions for fp<->uint, so expand them by llvm
213   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
214   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
215   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
216   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
217 
218   // fp16 not supported
219   for (MVT FPVT : MVT::fp_valuetypes()) {
220     setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
221     setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
222   }
223   /// } Conversion
224 
225   /// Floating-point Ops {
226   /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
227   ///       and fcmp.
228 
229   // VE doesn't have following floating point operations.
230   for (MVT VT : MVT::fp_valuetypes()) {
231     setOperationAction(ISD::FNEG, VT, Expand);
232     setOperationAction(ISD::FREM, VT, Expand);
233   }
234 
235   // VE doesn't have fdiv of f128.
236   setOperationAction(ISD::FDIV, MVT::f128, Expand);
237 
238   for (MVT FPVT : {MVT::f32, MVT::f64}) {
239     // f32 and f64 uses ConstantFP.  f128 uses ConstantPool.
240     setOperationAction(ISD::ConstantFP, FPVT, Legal);
241   }
242   /// } Floating-point Ops
243 
244   /// Floating-point math functions {
245 
246   // VE doesn't have following floating point math functions.
247   for (MVT VT : MVT::fp_valuetypes()) {
248     setOperationAction(ISD::FABS, VT, Expand);
249     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
250     setOperationAction(ISD::FCOS, VT, Expand);
251     setOperationAction(ISD::FMA, VT, Expand);
252     setOperationAction(ISD::FPOW, VT, Expand);
253     setOperationAction(ISD::FSIN, VT, Expand);
254     setOperationAction(ISD::FSQRT, VT, Expand);
255   }
256 
257   // VE has single and double FMINNUM and FMAXNUM
258   for (MVT VT : {MVT::f32, MVT::f64}) {
259     setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, VT, Legal);
260   }
261 
262   /// } Floating-point math functions
263 
264   /// Atomic instructions {
265 
266   setMaxAtomicSizeInBitsSupported(64);
267   setMinCmpXchgSizeInBits(32);
268   setSupportsUnalignedAtomics(false);
269 
270   // Use custom inserter for ATOMIC_FENCE.
271   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
272 
273   // Other atomic instructions.
274   for (MVT VT : MVT::integer_valuetypes()) {
275     // Support i8/i16 atomic swap.
276     setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
277 
278     // FIXME: Support "atmam" instructions.
279     setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
280     setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
281     setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
282     setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
283 
284     // VE doesn't have follwing instructions.
285     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
286     setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand);
287     setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
288     setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
289     setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
290     setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
291     setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
292     setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
293   }
294 
295   /// } Atomic instructions
296 
297   /// SJLJ instructions {
298   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
299   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
300   setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
301   /// } SJLJ instructions
302 
303   // Intrinsic instructions
304   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
305 }
306 
initVPUActions()307 void VETargetLowering::initVPUActions() {
308   for (MVT LegalMaskVT : AllMaskVTs)
309     setOperationAction(ISD::BUILD_VECTOR, LegalMaskVT, Custom);
310 
311   for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
312     setOperationAction(Opc, MVT::v512i1, Custom);
313 
314   for (MVT LegalVecVT : AllVectorVTs) {
315     setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
316     setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
317     setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal);
318     // Translate all vector instructions with legal element types to VVP_*
319     // nodes.
320     // TODO We will custom-widen into VVP_* nodes in the future. While we are
321     // buildling the infrastructure for this, we only do this for legal vector
322     // VTs.
323 #define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME)                                     \
324   setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
325 #define ADD_VVP_OP(VVP_NAME, ISD_NAME)                                         \
326   setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
327     setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, LegalVecVT, Custom);
328     setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_STORE, LegalVecVT, Custom);
329 #include "VVPNodes.def"
330   }
331 
332   for (MVT LegalPackedVT : AllPackedVTs) {
333     setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
334     setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
335   }
336 
337   // vNt32, vNt64 ops (legal element types)
338   for (MVT VT : MVT::vector_valuetypes()) {
339     MVT ElemVT = VT.getVectorElementType();
340     unsigned ElemBits = ElemVT.getScalarSizeInBits();
341     if (ElemBits != 32 && ElemBits != 64)
342       continue;
343 
344     for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
345       setOperationAction(MemOpc, VT, Custom);
346 
347     const ISD::NodeType IntReductionOCs[] = {
348         ISD::VECREDUCE_ADD,  ISD::VECREDUCE_MUL,  ISD::VECREDUCE_AND,
349         ISD::VECREDUCE_OR,   ISD::VECREDUCE_XOR,  ISD::VECREDUCE_SMIN,
350         ISD::VECREDUCE_SMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_UMAX};
351 
352     for (unsigned IntRedOpc : IntReductionOCs)
353       setOperationAction(IntRedOpc, VT, Custom);
354   }
355 
356   // v256i1 and v512i1 ops
357   for (MVT MaskVT : AllMaskVTs) {
358     // Custom lower mask ops
359     setOperationAction(ISD::STORE, MaskVT, Custom);
360     setOperationAction(ISD::LOAD, MaskVT, Custom);
361   }
362 }
363 
364 SDValue
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & DL,SelectionDAG & DAG) const365 VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
366                               bool IsVarArg,
367                               const SmallVectorImpl<ISD::OutputArg> &Outs,
368                               const SmallVectorImpl<SDValue> &OutVals,
369                               const SDLoc &DL, SelectionDAG &DAG) const {
370   // CCValAssign - represent the assignment of the return value to locations.
371   SmallVector<CCValAssign, 16> RVLocs;
372 
373   // CCState - Info about the registers and stack slot.
374   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
375                  *DAG.getContext());
376 
377   // Analyze return values.
378   CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));
379 
380   SDValue Glue;
381   SmallVector<SDValue, 4> RetOps(1, Chain);
382 
383   // Copy the result values into the output registers.
384   for (unsigned i = 0; i != RVLocs.size(); ++i) {
385     CCValAssign &VA = RVLocs[i];
386     assert(VA.isRegLoc() && "Can only return in registers!");
387     assert(!VA.needsCustom() && "Unexpected custom lowering");
388     SDValue OutVal = OutVals[i];
389 
390     // Integer return values must be sign or zero extended by the callee.
391     switch (VA.getLocInfo()) {
392     case CCValAssign::Full:
393       break;
394     case CCValAssign::SExt:
395       OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
396       break;
397     case CCValAssign::ZExt:
398       OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
399       break;
400     case CCValAssign::AExt:
401       OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
402       break;
403     case CCValAssign::BCvt: {
404       // Convert a float return value to i64 with padding.
405       //     63     31   0
406       //    +------+------+
407       //    | float|   0  |
408       //    +------+------+
409       assert(VA.getLocVT() == MVT::i64);
410       assert(VA.getValVT() == MVT::f32);
411       SDValue Undef = SDValue(
412           DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
413       SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
414       OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
415                                           MVT::i64, Undef, OutVal, Sub_f32),
416                        0);
417       break;
418     }
419     default:
420       llvm_unreachable("Unknown loc info!");
421     }
422 
423     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Glue);
424 
425     // Guarantee that all emitted copies are stuck together with flags.
426     Glue = Chain.getValue(1);
427     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
428   }
429 
430   RetOps[0] = Chain; // Update chain.
431 
432   // Add the glue if we have it.
433   if (Glue.getNode())
434     RetOps.push_back(Glue);
435 
436   return DAG.getNode(VEISD::RET_GLUE, DL, MVT::Other, RetOps);
437 }
438 
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & DL,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const439 SDValue VETargetLowering::LowerFormalArguments(
440     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
441     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
442     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
443   MachineFunction &MF = DAG.getMachineFunction();
444 
445   // Get the base offset of the incoming arguments stack space.
446   unsigned ArgsBaseOffset = Subtarget->getRsaSize();
447   // Get the size of the preserved arguments area
448   unsigned ArgsPreserved = 64;
449 
450   // Analyze arguments according to CC_VE.
451   SmallVector<CCValAssign, 16> ArgLocs;
452   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
453                  *DAG.getContext());
454   // Allocate the preserved area first.
455   CCInfo.AllocateStack(ArgsPreserved, Align(8));
456   // We already allocated the preserved area, so the stack offset computed
457   // by CC_VE would be correct now.
458   CCInfo.AnalyzeFormalArguments(Ins, getParamCC(CallConv, false));
459 
460   for (const CCValAssign &VA : ArgLocs) {
461     assert(!VA.needsCustom() && "Unexpected custom lowering");
462     if (VA.isRegLoc()) {
463       // This argument is passed in a register.
464       // All integer register arguments are promoted by the caller to i64.
465 
466       // Create a virtual register for the promoted live-in value.
467       Register VReg =
468           MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
469       SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
470 
471       // The caller promoted the argument, so insert an Assert?ext SDNode so we
472       // won't promote the value again in this function.
473       switch (VA.getLocInfo()) {
474       case CCValAssign::SExt:
475         Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
476                           DAG.getValueType(VA.getValVT()));
477         break;
478       case CCValAssign::ZExt:
479         Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
480                           DAG.getValueType(VA.getValVT()));
481         break;
482       case CCValAssign::BCvt: {
483         // Extract a float argument from i64 with padding.
484         //     63     31   0
485         //    +------+------+
486         //    | float|   0  |
487         //    +------+------+
488         assert(VA.getLocVT() == MVT::i64);
489         assert(VA.getValVT() == MVT::f32);
490         SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
491         Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
492                                          MVT::f32, Arg, Sub_f32),
493                       0);
494         break;
495       }
496       default:
497         break;
498       }
499 
500       // Truncate the register down to the argument type.
501       if (VA.isExtInLoc())
502         Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
503 
504       InVals.push_back(Arg);
505       continue;
506     }
507 
508     // The registers are exhausted. This argument was passed on the stack.
509     assert(VA.isMemLoc());
510     // The CC_VE_Full/Half functions compute stack offsets relative to the
511     // beginning of the arguments area at %fp + the size of reserved area.
512     unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
513     unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
514 
515     // Adjust offset for a float argument by adding 4 since the argument is
516     // stored in 8 bytes buffer with offset like below.  LLVM generates
517     // 4 bytes load instruction, so need to adjust offset here.  This
518     // adjustment is required in only LowerFormalArguments.  In LowerCall,
519     // a float argument is converted to i64 first, and stored as 8 bytes
520     // data, which is required by ABI, so no need for adjustment.
521     //    0      4
522     //    +------+------+
523     //    | empty| float|
524     //    +------+------+
525     if (VA.getValVT() == MVT::f32)
526       Offset += 4;
527 
528     int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
529     InVals.push_back(
530         DAG.getLoad(VA.getValVT(), DL, Chain,
531                     DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
532                     MachinePointerInfo::getFixedStack(MF, FI)));
533   }
534 
535   if (!IsVarArg)
536     return Chain;
537 
538   // This function takes variable arguments, some of which may have been passed
539   // in registers %s0-%s8.
540   //
541   // The va_start intrinsic needs to know the offset to the first variable
542   // argument.
543   // TODO: need to calculate offset correctly once we support f128.
544   unsigned ArgOffset = ArgLocs.size() * 8;
545   VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
546   // Skip the reserved area at the top of stack.
547   FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
548 
549   return Chain;
550 }
551 
552 // FIXME? Maybe this could be a TableGen attribute on some registers and
553 // this table could be generated automatically from RegInfo.
getRegisterByName(const char * RegName,LLT VT,const MachineFunction & MF) const554 Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
555                                              const MachineFunction &MF) const {
556   Register Reg = StringSwitch<Register>(RegName)
557                      .Case("sp", VE::SX11)    // Stack pointer
558                      .Case("fp", VE::SX9)     // Frame pointer
559                      .Case("sl", VE::SX8)     // Stack limit
560                      .Case("lr", VE::SX10)    // Link register
561                      .Case("tp", VE::SX14)    // Thread pointer
562                      .Case("outer", VE::SX12) // Outer regiser
563                      .Case("info", VE::SX17)  // Info area register
564                      .Case("got", VE::SX15)   // Global offset table register
565                      .Case("plt", VE::SX16) // Procedure linkage table register
566                      .Default(Register());
567   return Reg;
568 }
569 
570 //===----------------------------------------------------------------------===//
571 // TargetLowering Implementation
572 //===----------------------------------------------------------------------===//
573 
LowerCall(TargetLowering::CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const574 SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
575                                     SmallVectorImpl<SDValue> &InVals) const {
576   SelectionDAG &DAG = CLI.DAG;
577   SDLoc DL = CLI.DL;
578   SDValue Chain = CLI.Chain;
579   auto PtrVT = getPointerTy(DAG.getDataLayout());
580 
581   // VE target does not yet support tail call optimization.
582   CLI.IsTailCall = false;
583 
584   // Get the base offset of the outgoing arguments stack space.
585   unsigned ArgsBaseOffset = Subtarget->getRsaSize();
586   // Get the size of the preserved arguments area
587   unsigned ArgsPreserved = 8 * 8u;
588 
589   // Analyze operands of the call, assigning locations to each operand.
590   SmallVector<CCValAssign, 16> ArgLocs;
591   CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
592                  *DAG.getContext());
593   // Allocate the preserved area first.
594   CCInfo.AllocateStack(ArgsPreserved, Align(8));
595   // We already allocated the preserved area, so the stack offset computed
596   // by CC_VE would be correct now.
597   CCInfo.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, false));
598 
599   // VE requires to use both register and stack for varargs or no-prototyped
600   // functions.
601   bool UseBoth = CLI.IsVarArg;
602 
603   // Analyze operands again if it is required to store BOTH.
604   SmallVector<CCValAssign, 16> ArgLocs2;
605   CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
606                   ArgLocs2, *DAG.getContext());
607   if (UseBoth)
608     CCInfo2.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, true));
609 
610   // Get the size of the outgoing arguments stack space requirement.
611   unsigned ArgsSize = CCInfo.getStackSize();
612 
613   // Keep stack frames 16-byte aligned.
614   ArgsSize = alignTo(ArgsSize, 16);
615 
616   // Adjust the stack pointer to make room for the arguments.
617   // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
618   // with more than 6 arguments.
619   Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
620 
621   // Collect the set of registers to pass to the function and their values.
622   // This will be emitted as a sequence of CopyToReg nodes glued to the call
623   // instruction.
624   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
625 
626   // Collect chains from all the memory opeations that copy arguments to the
627   // stack. They must follow the stack pointer adjustment above and precede the
628   // call instruction itself.
629   SmallVector<SDValue, 8> MemOpChains;
630 
631   // VE needs to get address of callee function in a register
632   // So, prepare to copy it to SX12 here.
633 
634   // If the callee is a GlobalAddress node (quite common, every direct call is)
635   // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
636   // Likewise ExternalSymbol -> TargetExternalSymbol.
637   SDValue Callee = CLI.Callee;
638 
639   bool IsPICCall = isPositionIndependent();
640 
641   // PC-relative references to external symbols should go through $stub.
642   // If so, we need to prepare GlobalBaseReg first.
643   const TargetMachine &TM = DAG.getTarget();
644   const GlobalValue *GV = nullptr;
645   auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);
646   if (CalleeG)
647     GV = CalleeG->getGlobal();
648   bool Local = TM.shouldAssumeDSOLocal(GV);
649   bool UsePlt = !Local;
650   MachineFunction &MF = DAG.getMachineFunction();
651 
652   // Turn GlobalAddress/ExternalSymbol node into a value node
653   // containing the address of them here.
654   if (CalleeG) {
655     if (IsPICCall) {
656       if (UsePlt)
657         Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
658       Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
659       Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
660     } else {
661       Callee = makeHiLoPair(Callee, VE::S_HI32, VE::S_LO32, DAG);
662     }
663   } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
664     if (IsPICCall) {
665       if (UsePlt)
666         Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
667       Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
668       Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
669     } else {
670       Callee = makeHiLoPair(Callee, VE::S_HI32, VE::S_LO32, DAG);
671     }
672   }
673 
674   RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
675 
676   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
677     CCValAssign &VA = ArgLocs[i];
678     SDValue Arg = CLI.OutVals[i];
679 
680     // Promote the value if needed.
681     switch (VA.getLocInfo()) {
682     default:
683       llvm_unreachable("Unknown location info!");
684     case CCValAssign::Full:
685       break;
686     case CCValAssign::SExt:
687       Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
688       break;
689     case CCValAssign::ZExt:
690       Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
691       break;
692     case CCValAssign::AExt:
693       Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
694       break;
695     case CCValAssign::BCvt: {
696       // Convert a float argument to i64 with padding.
697       //     63     31   0
698       //    +------+------+
699       //    | float|   0  |
700       //    +------+------+
701       assert(VA.getLocVT() == MVT::i64);
702       assert(VA.getValVT() == MVT::f32);
703       SDValue Undef = SDValue(
704           DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
705       SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
706       Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
707                                        MVT::i64, Undef, Arg, Sub_f32),
708                     0);
709       break;
710     }
711     }
712 
713     if (VA.isRegLoc()) {
714       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
715       if (!UseBoth)
716         continue;
717       VA = ArgLocs2[i];
718     }
719 
720     assert(VA.isMemLoc());
721 
722     // Create a store off the stack pointer for this argument.
723     SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
724     // The argument area starts at %fp/%sp + the size of reserved area.
725     SDValue PtrOff =
726         DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
727     PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
728     MemOpChains.push_back(
729         DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
730   }
731 
732   // Emit all stores, make sure they occur before the call.
733   if (!MemOpChains.empty())
734     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
735 
736   // Build a sequence of CopyToReg nodes glued together with token chain and
737   // glue operands which copy the outgoing args into registers. The InGlue is
738   // necessary since all emitted instructions must be stuck together in order
739   // to pass the live physical registers.
740   SDValue InGlue;
741   for (const auto &[Reg, N] : RegsToPass) {
742     Chain = DAG.getCopyToReg(Chain, DL, Reg, N, InGlue);
743     InGlue = Chain.getValue(1);
744   }
745 
746   // Build the operands for the call instruction itself.
747   SmallVector<SDValue, 8> Ops;
748   Ops.push_back(Chain);
749   for (const auto &[Reg, N] : RegsToPass)
750     Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
751 
752   // Add a register mask operand representing the call-preserved registers.
753   const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
754   const uint32_t *Mask =
755       TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
756   assert(Mask && "Missing call preserved mask for calling convention");
757   Ops.push_back(DAG.getRegisterMask(Mask));
758 
759   // Make sure the CopyToReg nodes are glued to the call instruction which
760   // consumes the registers.
761   if (InGlue.getNode())
762     Ops.push_back(InGlue);
763 
764   // Now the call itself.
765   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
766   Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
767   InGlue = Chain.getValue(1);
768 
769   // Revert the stack pointer immediately after the call.
770   Chain = DAG.getCALLSEQ_END(Chain, ArgsSize, 0, InGlue, DL);
771   InGlue = Chain.getValue(1);
772 
773   // Now extract the return values. This is more or less the same as
774   // LowerFormalArguments.
775 
776   // Assign locations to each value returned by this call.
777   SmallVector<CCValAssign, 16> RVLocs;
778   CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
779                  *DAG.getContext());
780 
781   // Set inreg flag manually for codegen generated library calls that
782   // return float.
783   if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
784     CLI.Ins[0].Flags.setInReg();
785 
786   RVInfo.AnalyzeCallResult(CLI.Ins, getReturnCC(CLI.CallConv));
787 
788   // Copy all of the result registers out of their specified physreg.
789   for (unsigned i = 0; i != RVLocs.size(); ++i) {
790     CCValAssign &VA = RVLocs[i];
791     assert(!VA.needsCustom() && "Unexpected custom lowering");
792     Register Reg = VA.getLocReg();
793 
794     // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
795     // reside in the same register in the high and low bits. Reuse the
796     // CopyFromReg previous node to avoid duplicate copies.
797     SDValue RV;
798     if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
799       if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
800         RV = Chain.getValue(0);
801 
802     // But usually we'll create a new CopyFromReg for a different register.
803     if (!RV.getNode()) {
804       RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
805       Chain = RV.getValue(1);
806       InGlue = Chain.getValue(2);
807     }
808 
809     // The callee promoted the return value, so insert an Assert?ext SDNode so
810     // we won't promote the value again in this function.
811     switch (VA.getLocInfo()) {
812     case CCValAssign::SExt:
813       RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
814                        DAG.getValueType(VA.getValVT()));
815       break;
816     case CCValAssign::ZExt:
817       RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
818                        DAG.getValueType(VA.getValVT()));
819       break;
820     case CCValAssign::BCvt: {
821       // Extract a float return value from i64 with padding.
822       //     63     31   0
823       //    +------+------+
824       //    | float|   0  |
825       //    +------+------+
826       assert(VA.getLocVT() == MVT::i64);
827       assert(VA.getValVT() == MVT::f32);
828       SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
829       RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
830                                       MVT::f32, RV, Sub_f32),
831                    0);
832       break;
833     }
834     default:
835       break;
836     }
837 
838     // Truncate the register down to the return value type.
839     if (VA.isExtInLoc())
840       RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
841 
842     InVals.push_back(RV);
843   }
844 
845   return Chain;
846 }
847 
isOffsetFoldingLegal(const GlobalAddressSDNode * GA) const848 bool VETargetLowering::isOffsetFoldingLegal(
849     const GlobalAddressSDNode *GA) const {
850   // VE uses 64 bit addressing, so we need multiple instructions to generate
851   // an address.  Folding address with offset increases the number of
852   // instructions, so that we disable it here.  Offsets will be folded in
853   // the DAG combine later if it worth to do so.
854   return false;
855 }
856 
857 /// isFPImmLegal - Returns true if the target can instruction select the
858 /// specified FP immediate natively. If false, the legalizer will
859 /// materialize the FP immediate as a load from a constant pool.
isFPImmLegal(const APFloat & Imm,EVT VT,bool ForCodeSize) const860 bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
861                                     bool ForCodeSize) const {
862   return VT == MVT::f32 || VT == MVT::f64;
863 }
864 
865 /// Determine if the target supports unaligned memory accesses.
866 ///
867 /// This function returns true if the target allows unaligned memory accesses
868 /// of the specified type in the given address space. If true, it also returns
869 /// whether the unaligned memory access is "fast" in the last argument by
870 /// reference. This is used, for example, in situations where an array
871 /// copy/move/set is converted to a sequence of store operations. Its use
872 /// helps to ensure that such replacements don't generate code that causes an
873 /// alignment error (trap) on the target machine.
allowsMisalignedMemoryAccesses(EVT VT,unsigned AddrSpace,Align A,MachineMemOperand::Flags,unsigned * Fast) const874 bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
875                                                       unsigned AddrSpace,
876                                                       Align A,
877                                                       MachineMemOperand::Flags,
878                                                       unsigned *Fast) const {
879   if (Fast) {
880     // It's fast anytime on VE
881     *Fast = 1;
882   }
883   return true;
884 }
885 
VETargetLowering(const TargetMachine & TM,const VESubtarget & STI)886 VETargetLowering::VETargetLowering(const TargetMachine &TM,
887                                    const VESubtarget &STI)
888     : TargetLowering(TM), Subtarget(&STI) {
889   // Instructions which use registers as conditionals examine all the
890   // bits (as does the pseudo SELECT_CC expansion). I don't think it
891   // matters much whether it's ZeroOrOneBooleanContent, or
892   // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
893   // former.
894   setBooleanContents(ZeroOrOneBooleanContent);
895   setBooleanVectorContents(ZeroOrOneBooleanContent);
896 
897   initRegisterClasses();
898   initSPUActions();
899   initVPUActions();
900 
901   setStackPointerRegisterToSaveRestore(VE::SX11);
902 
903   // We have target-specific dag combine patterns for the following nodes:
904   setTargetDAGCombine(ISD::TRUNCATE);
905   setTargetDAGCombine(ISD::SELECT);
906   setTargetDAGCombine(ISD::SELECT_CC);
907 
908   // Set function alignment to 16 bytes
909   setMinFunctionAlignment(Align(16));
910 
911   // VE stores all argument by 8 bytes alignment
912   setMinStackArgumentAlignment(Align(8));
913 
914   computeRegisterProperties(Subtarget->getRegisterInfo());
915 }
916 
getTargetNodeName(unsigned Opcode) const917 const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
918 #define TARGET_NODE_CASE(NAME)                                                 \
919   case VEISD::NAME:                                                            \
920     return "VEISD::" #NAME;
921   switch ((VEISD::NodeType)Opcode) {
922   case VEISD::FIRST_NUMBER:
923     break;
924     TARGET_NODE_CASE(CMPI)
925     TARGET_NODE_CASE(CMPU)
926     TARGET_NODE_CASE(CMPF)
927     TARGET_NODE_CASE(CMPQ)
928     TARGET_NODE_CASE(CMOV)
929     TARGET_NODE_CASE(CALL)
930     TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
931     TARGET_NODE_CASE(EH_SJLJ_SETJMP)
932     TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
933     TARGET_NODE_CASE(GETFUNPLT)
934     TARGET_NODE_CASE(GETSTACKTOP)
935     TARGET_NODE_CASE(GETTLSADDR)
936     TARGET_NODE_CASE(GLOBAL_BASE_REG)
937     TARGET_NODE_CASE(Hi)
938     TARGET_NODE_CASE(Lo)
939     TARGET_NODE_CASE(RET_GLUE)
940     TARGET_NODE_CASE(TS1AM)
941     TARGET_NODE_CASE(VEC_UNPACK_LO)
942     TARGET_NODE_CASE(VEC_UNPACK_HI)
943     TARGET_NODE_CASE(VEC_PACK)
944     TARGET_NODE_CASE(VEC_BROADCAST)
945     TARGET_NODE_CASE(REPL_I32)
946     TARGET_NODE_CASE(REPL_F32)
947 
948     TARGET_NODE_CASE(LEGALAVL)
949 
950     // Register the VVP_* SDNodes.
951 #define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
952 #include "VVPNodes.def"
953   }
954 #undef TARGET_NODE_CASE
955   return nullptr;
956 }
957 
getSetCCResultType(const DataLayout &,LLVMContext &,EVT VT) const958 EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
959                                          EVT VT) const {
960   return MVT::i32;
961 }
962 
963 // Convert to a target node and set target flags.
withTargetFlags(SDValue Op,unsigned TF,SelectionDAG & DAG) const964 SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
965                                           SelectionDAG &DAG) const {
966   if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
967     return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
968                                       GA->getValueType(0), GA->getOffset(), TF);
969 
970   if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
971     return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
972                                      0, TF);
973 
974   if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
975     return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),
976                                      CP->getAlign(), CP->getOffset(), TF);
977 
978   if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
979     return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
980                                        TF);
981 
982   if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op))
983     return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF);
984 
985   llvm_unreachable("Unhandled address SDNode");
986 }
987 
988 // Split Op into high and low parts according to HiTF and LoTF.
989 // Return an ADD node combining the parts.
makeHiLoPair(SDValue Op,unsigned HiTF,unsigned LoTF,SelectionDAG & DAG) const990 SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
991                                        SelectionDAG &DAG) const {
992   SDLoc DL(Op);
993   EVT VT = Op.getValueType();
994   SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
995   SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
996   return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
997 }
998 
999 // Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
1000 // or ExternalSymbol SDNode.
makeAddress(SDValue Op,SelectionDAG & DAG) const1001 SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
1002   SDLoc DL(Op);
1003   EVT PtrVT = Op.getValueType();
1004 
1005   // Handle PIC mode first. VE needs a got load for every variable!
1006   if (isPositionIndependent()) {
1007     auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
1008 
1009     if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) ||
1010         (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
1011       // Create following instructions for local linkage PIC code.
1012       //     lea %reg, label@gotoff_lo
1013       //     and %reg, %reg, (32)0
1014       //     lea.sl %reg, label@gotoff_hi(%reg, %got)
1015       SDValue HiLo =
1016           makeHiLoPair(Op, VE::S_GOTOFF_HI32, VE::S_GOTOFF_LO32, DAG);
1017       SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1018       return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1019     }
1020     // Create following instructions for not local linkage PIC code.
1021     //     lea %reg, label@got_lo
1022     //     and %reg, %reg, (32)0
1023     //     lea.sl %reg, label@got_hi(%reg)
1024     //     ld %reg, (%reg, %got)
1025     SDValue HiLo = makeHiLoPair(Op, VE::S_GOT_HI32, VE::S_GOT_LO32, DAG);
1026     SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1027     SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1028     return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,
1029                        MachinePointerInfo::getGOT(DAG.getMachineFunction()));
1030   }
1031 
1032   // This is one of the absolute code models.
1033   switch (getTargetMachine().getCodeModel()) {
1034   default:
1035     llvm_unreachable("Unsupported absolute code model");
1036   case CodeModel::Small:
1037   case CodeModel::Medium:
1038   case CodeModel::Large:
1039     // abs64.
1040     return makeHiLoPair(Op, VE::S_HI32, VE::S_LO32, DAG);
1041   }
1042 }
1043 
1044 /// Custom Lower {
1045 
1046 // The mappings for emitLeading/TrailingFence for VE is designed by following
1047 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
emitLeadingFence(IRBuilderBase & Builder,Instruction * Inst,AtomicOrdering Ord) const1048 Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,
1049                                                 Instruction *Inst,
1050                                                 AtomicOrdering Ord) const {
1051   switch (Ord) {
1052   case AtomicOrdering::NotAtomic:
1053   case AtomicOrdering::Unordered:
1054     llvm_unreachable("Invalid fence: unordered/non-atomic");
1055   case AtomicOrdering::Monotonic:
1056   case AtomicOrdering::Acquire:
1057     return nullptr; // Nothing to do
1058   case AtomicOrdering::Release:
1059   case AtomicOrdering::AcquireRelease:
1060     return Builder.CreateFence(AtomicOrdering::Release);
1061   case AtomicOrdering::SequentiallyConsistent:
1062     if (!Inst->hasAtomicStore())
1063       return nullptr; // Nothing to do
1064     return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1065   }
1066   llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1067 }
1068 
emitTrailingFence(IRBuilderBase & Builder,Instruction * Inst,AtomicOrdering Ord) const1069 Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1070                                                  Instruction *Inst,
1071                                                  AtomicOrdering Ord) const {
1072   switch (Ord) {
1073   case AtomicOrdering::NotAtomic:
1074   case AtomicOrdering::Unordered:
1075     llvm_unreachable("Invalid fence: unordered/not-atomic");
1076   case AtomicOrdering::Monotonic:
1077   case AtomicOrdering::Release:
1078     return nullptr; // Nothing to do
1079   case AtomicOrdering::Acquire:
1080   case AtomicOrdering::AcquireRelease:
1081     return Builder.CreateFence(AtomicOrdering::Acquire);
1082   case AtomicOrdering::SequentiallyConsistent:
1083     return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1084   }
1085   llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1086 }
1087 
lowerATOMIC_FENCE(SDValue Op,SelectionDAG & DAG) const1088 SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
1089                                             SelectionDAG &DAG) const {
1090   SDLoc DL(Op);
1091   AtomicOrdering FenceOrdering =
1092       static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
1093   SyncScope::ID FenceSSID =
1094       static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1095 
1096   // VE uses Release consistency, so need a fence instruction if it is a
1097   // cross-thread fence.
1098   if (FenceSSID == SyncScope::System) {
1099     switch (FenceOrdering) {
1100     case AtomicOrdering::NotAtomic:
1101     case AtomicOrdering::Unordered:
1102     case AtomicOrdering::Monotonic:
1103       // No need to generate fencem instruction here.
1104       break;
1105     case AtomicOrdering::Acquire:
1106       // Generate "fencem 2" as acquire fence.
1107       return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1108                                         DAG.getTargetConstant(2, DL, MVT::i32),
1109                                         Op.getOperand(0)),
1110                      0);
1111     case AtomicOrdering::Release:
1112       // Generate "fencem 1" as release fence.
1113       return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1114                                         DAG.getTargetConstant(1, DL, MVT::i32),
1115                                         Op.getOperand(0)),
1116                      0);
1117     case AtomicOrdering::AcquireRelease:
1118     case AtomicOrdering::SequentiallyConsistent:
1119       // Generate "fencem 3" as acq_rel and seq_cst fence.
1120       // FIXME: "fencem 3" doesn't wait for PCIe deveices accesses,
1121       //        so  seq_cst may require more instruction for them.
1122       return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1123                                         DAG.getTargetConstant(3, DL, MVT::i32),
1124                                         Op.getOperand(0)),
1125                      0);
1126     }
1127   }
1128 
1129   // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1130   return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1131 }
1132 
1133 TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst * AI) const1134 VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1135   // We have TS1AM implementation for i8/i16/i32/i64, so use it.
1136   if (AI->getOperation() == AtomicRMWInst::Xchg) {
1137     return AtomicExpansionKind::None;
1138   }
1139   // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1140 
1141   // Otherwise, expand it using compare and exchange instruction to not call
1142   // __sync_fetch_and_* functions.
1143   return AtomicExpansionKind::CmpXChg;
1144 }
1145 
prepareTS1AM(SDValue Op,SelectionDAG & DAG,SDValue & Flag,SDValue & Bits)1146 static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
1147                             SDValue &Bits) {
1148   SDLoc DL(Op);
1149   AtomicSDNode *N = cast<AtomicSDNode>(Op);
1150   SDValue Ptr = N->getOperand(1);
1151   SDValue Val = N->getOperand(2);
1152   EVT PtrVT = Ptr.getValueType();
1153   bool Byte = N->getMemoryVT() == MVT::i8;
1154   //   Remainder = AND Ptr, 3
1155   //   Flag = 1 << Remainder  ; If Byte is true (1 byte swap flag)
1156   //   Flag = 3 << Remainder  ; If Byte is false (2 bytes swap flag)
1157   //   Bits = Remainder << 3
1158   //   NewVal = Val << Bits
1159   SDValue Const3 = DAG.getConstant(3, DL, PtrVT);
1160   SDValue Remainder = DAG.getNode(ISD::AND, DL, PtrVT, {Ptr, Const3});
1161   SDValue Mask = Byte ? DAG.getConstant(1, DL, MVT::i32)
1162                       : DAG.getConstant(3, DL, MVT::i32);
1163   Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder});
1164   Bits = DAG.getNode(ISD::SHL, DL, PtrVT, {Remainder, Const3});
1165   return DAG.getNode(ISD::SHL, DL, Val.getValueType(), {Val, Bits});
1166 }
1167 
finalizeTS1AM(SDValue Op,SelectionDAG & DAG,SDValue Data,SDValue Bits)1168 static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
1169                              SDValue Bits) {
1170   SDLoc DL(Op);
1171   EVT VT = Data.getValueType();
1172   bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8;
1173   //   NewData = Data >> Bits
1174   //   Result = NewData & 0xff   ; If Byte is true (1 byte)
1175   //   Result = NewData & 0xffff ; If Byte is false (2 bytes)
1176 
1177   SDValue NewData = DAG.getNode(ISD::SRL, DL, VT, Data, Bits);
1178   return DAG.getNode(ISD::AND, DL, VT,
1179                      {NewData, DAG.getConstant(Byte ? 0xff : 0xffff, DL, VT)});
1180 }
1181 
lowerATOMIC_SWAP(SDValue Op,SelectionDAG & DAG) const1182 SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
1183                                            SelectionDAG &DAG) const {
1184   SDLoc DL(Op);
1185   AtomicSDNode *N = cast<AtomicSDNode>(Op);
1186 
1187   if (N->getMemoryVT() == MVT::i8) {
1188     // For i8, use "ts1am"
1189     //   Input:
1190     //     ATOMIC_SWAP Ptr, Val, Order
1191     //
1192     //   Output:
1193     //     Remainder = AND Ptr, 3
1194     //     Flag = 1 << Remainder   ; 1 byte swap flag for TS1AM inst.
1195     //     Bits = Remainder << 3
1196     //     NewVal = Val << Bits
1197     //
1198     //     Aligned = AND Ptr, -4
1199     //     Data = TS1AM Aligned, Flag, NewVal
1200     //
1201     //     NewData = Data >> Bits
1202     //     Result = NewData & 0xff ; 1 byte result
1203     SDValue Flag;
1204     SDValue Bits;
1205     SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1206 
1207     SDValue Ptr = N->getOperand(1);
1208     SDValue Aligned =
1209         DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1210                     {Ptr, DAG.getSignedConstant(-4, DL, MVT::i64)});
1211     SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1212                                   DAG.getVTList(Op.getNode()->getValueType(0),
1213                                                 Op.getNode()->getValueType(1)),
1214                                   {N->getChain(), Aligned, Flag, NewVal},
1215                                   N->getMemOperand());
1216 
1217     SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1218     SDValue Chain = TS1AM.getValue(1);
1219     return DAG.getMergeValues({Result, Chain}, DL);
1220   }
1221   if (N->getMemoryVT() == MVT::i16) {
1222     // For i16, use "ts1am"
1223     SDValue Flag;
1224     SDValue Bits;
1225     SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1226 
1227     SDValue Ptr = N->getOperand(1);
1228     SDValue Aligned =
1229         DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1230                     {Ptr, DAG.getSignedConstant(-4, DL, MVT::i64)});
1231     SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1232                                   DAG.getVTList(Op.getNode()->getValueType(0),
1233                                                 Op.getNode()->getValueType(1)),
1234                                   {N->getChain(), Aligned, Flag, NewVal},
1235                                   N->getMemOperand());
1236 
1237     SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1238     SDValue Chain = TS1AM.getValue(1);
1239     return DAG.getMergeValues({Result, Chain}, DL);
1240   }
1241   // Otherwise, let llvm legalize it.
1242   return Op;
1243 }
1244 
lowerGlobalAddress(SDValue Op,SelectionDAG & DAG) const1245 SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
1246                                              SelectionDAG &DAG) const {
1247   return makeAddress(Op, DAG);
1248 }
1249 
lowerBlockAddress(SDValue Op,SelectionDAG & DAG) const1250 SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
1251                                             SelectionDAG &DAG) const {
1252   return makeAddress(Op, DAG);
1253 }
1254 
lowerConstantPool(SDValue Op,SelectionDAG & DAG) const1255 SDValue VETargetLowering::lowerConstantPool(SDValue Op,
1256                                             SelectionDAG &DAG) const {
1257   return makeAddress(Op, DAG);
1258 }
1259 
1260 SDValue
lowerToTLSGeneralDynamicModel(SDValue Op,SelectionDAG & DAG) const1261 VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
1262                                                 SelectionDAG &DAG) const {
1263   SDLoc DL(Op);
1264 
1265   // Generate the following code:
1266   //   t1: ch,glue = callseq_start t0, 0, 0
1267   //   t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1268   //   t3: ch,glue = callseq_end t2, 0, 0, t2:2
1269   //   t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1270   SDValue Label = withTargetFlags(Op, 0, DAG);
1271   EVT PtrVT = Op.getValueType();
1272 
1273   // Lowering the machine isd will make sure everything is in the right
1274   // location.
1275   SDValue Chain = DAG.getEntryNode();
1276   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1277   const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1278       DAG.getMachineFunction(), CallingConv::C);
1279   Chain = DAG.getCALLSEQ_START(Chain, 64, 0, DL);
1280   SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
1281   Chain = DAG.getNode(VEISD::GETTLSADDR, DL, NodeTys, Args);
1282   Chain = DAG.getCALLSEQ_END(Chain, 64, 0, Chain.getValue(1), DL);
1283   Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(1));
1284 
1285   // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1286   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1287   MFI.setHasCalls(true);
1288 
1289   // Also generate code to prepare a GOT register if it is PIC.
1290   if (isPositionIndependent()) {
1291     MachineFunction &MF = DAG.getMachineFunction();
1292     Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
1293   }
1294 
1295   return Chain;
1296 }
1297 
lowerGlobalTLSAddress(SDValue Op,SelectionDAG & DAG) const1298 SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
1299                                                 SelectionDAG &DAG) const {
1300   // The current implementation of nld (2.26) doesn't allow local exec model
1301   // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
1302   // generate the general dynamic model code sequence.
1303   //
1304   // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
1305   return lowerToTLSGeneralDynamicModel(Op, DAG);
1306 }
1307 
lowerJumpTable(SDValue Op,SelectionDAG & DAG) const1308 SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1309   return makeAddress(Op, DAG);
1310 }
1311 
1312 // Lower a f128 load into two f64 loads.
lowerLoadF128(SDValue Op,SelectionDAG & DAG)1313 static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
1314   SDLoc DL(Op);
1315   LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1316   assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1317   Align Alignment = LdNode->getAlign();
1318   if (Alignment > 8)
1319     Alignment = Align(8);
1320 
1321   SDValue Lo64 =
1322       DAG.getLoad(MVT::f64, DL, LdNode->getChain(), LdNode->getBasePtr(),
1323                   LdNode->getPointerInfo(), Alignment,
1324                   LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1325                                        : MachineMemOperand::MONone);
1326   EVT AddrVT = LdNode->getBasePtr().getValueType();
1327   SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, LdNode->getBasePtr(),
1328                               DAG.getConstant(8, DL, AddrVT));
1329   SDValue Hi64 =
1330       DAG.getLoad(MVT::f64, DL, LdNode->getChain(), HiPtr,
1331                   LdNode->getPointerInfo(), Alignment,
1332                   LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1333                                        : MachineMemOperand::MONone);
1334 
1335   SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1336   SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1337 
1338   // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1339   SDNode *InFP128 =
1340       DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f128);
1341   InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1342                                SDValue(InFP128, 0), Hi64, SubRegEven);
1343   InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1344                                SDValue(InFP128, 0), Lo64, SubRegOdd);
1345   SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),
1346                           SDValue(Hi64.getNode(), 1)};
1347   SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1348   SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};
1349   return DAG.getMergeValues(Ops, DL);
1350 }
1351 
1352 // Lower a vXi1 load into following instructions
1353 //   LDrii %1, (,%addr)
1354 //   LVMxir  %vm, 0, %1
1355 //   LDrii %2, 8(,%addr)
1356 //   LVMxir  %vm, 0, %2
1357 //   ...
lowerLoadI1(SDValue Op,SelectionDAG & DAG)1358 static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {
1359   SDLoc DL(Op);
1360   LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1361   assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1362 
1363   SDValue BasePtr = LdNode->getBasePtr();
1364   Align Alignment = LdNode->getAlign();
1365   if (Alignment > 8)
1366     Alignment = Align(8);
1367 
1368   EVT AddrVT = BasePtr.getValueType();
1369   EVT MemVT = LdNode->getMemoryVT();
1370   if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1371     SDValue OutChains[4];
1372     SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
1373     for (int i = 0; i < 4; ++i) {
1374       // Generate load dag and prepare chains.
1375       SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1376                                  DAG.getConstant(8 * i, DL, AddrVT));
1377       SDValue Val =
1378           DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1379                       LdNode->getPointerInfo(), Alignment,
1380                       LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1381                                            : MachineMemOperand::MONone);
1382       OutChains[i] = SDValue(Val.getNode(), 1);
1383 
1384       VM = DAG.getMachineNode(VE::LVMir_m, DL, MVT::i64,
1385                               DAG.getTargetConstant(i, DL, MVT::i64), Val,
1386                               SDValue(VM, 0));
1387     }
1388     SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1389     SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1390     return DAG.getMergeValues(Ops, DL);
1391   } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1392     SDValue OutChains[8];
1393     SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
1394     for (int i = 0; i < 8; ++i) {
1395       // Generate load dag and prepare chains.
1396       SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1397                                  DAG.getConstant(8 * i, DL, AddrVT));
1398       SDValue Val =
1399           DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1400                       LdNode->getPointerInfo(), Alignment,
1401                       LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1402                                            : MachineMemOperand::MONone);
1403       OutChains[i] = SDValue(Val.getNode(), 1);
1404 
1405       VM = DAG.getMachineNode(VE::LVMyir_y, DL, MVT::i64,
1406                               DAG.getTargetConstant(i, DL, MVT::i64), Val,
1407                               SDValue(VM, 0));
1408     }
1409     SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1410     SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1411     return DAG.getMergeValues(Ops, DL);
1412   } else {
1413     // Otherwise, ask llvm to expand it.
1414     return SDValue();
1415   }
1416 }
1417 
lowerLOAD(SDValue Op,SelectionDAG & DAG) const1418 SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1419   LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
1420   EVT MemVT = LdNode->getMemoryVT();
1421 
1422   // If VPU is enabled, always expand non-mask vector loads to VVP
1423   if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(MemVT))
1424     return lowerToVVP(Op, DAG);
1425 
1426   SDValue BasePtr = LdNode->getBasePtr();
1427   if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1428     // Do not expand store instruction with frame index here because of
1429     // dependency problems.  We expand it later in eliminateFrameIndex().
1430     return Op;
1431   }
1432 
1433   if (MemVT == MVT::f128)
1434     return lowerLoadF128(Op, DAG);
1435   if (isMaskType(MemVT))
1436     return lowerLoadI1(Op, DAG);
1437 
1438   return Op;
1439 }
1440 
1441 // Lower a f128 store into two f64 stores.
lowerStoreF128(SDValue Op,SelectionDAG & DAG)1442 static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
1443   SDLoc DL(Op);
1444   StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1445   assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1446 
1447   SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1448   SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1449 
1450   SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1451                                     StNode->getValue(), SubRegEven);
1452   SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1453                                     StNode->getValue(), SubRegOdd);
1454 
1455   Align Alignment = StNode->getAlign();
1456   if (Alignment > 8)
1457     Alignment = Align(8);
1458 
1459   // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1460   SDValue OutChains[2];
1461   OutChains[0] =
1462       DAG.getStore(StNode->getChain(), DL, SDValue(Lo64, 0),
1463                    StNode->getBasePtr(), MachinePointerInfo(), Alignment,
1464                    StNode->isVolatile() ? MachineMemOperand::MOVolatile
1465                                         : MachineMemOperand::MONone);
1466   EVT AddrVT = StNode->getBasePtr().getValueType();
1467   SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, StNode->getBasePtr(),
1468                               DAG.getConstant(8, DL, AddrVT));
1469   OutChains[1] =
1470       DAG.getStore(StNode->getChain(), DL, SDValue(Hi64, 0), HiPtr,
1471                    MachinePointerInfo(), Alignment,
1472                    StNode->isVolatile() ? MachineMemOperand::MOVolatile
1473                                         : MachineMemOperand::MONone);
1474   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1475 }
1476 
1477 // Lower a vXi1 store into following instructions
1478 //   SVMi  %1, %vm, 0
1479 //   STrii %1, (,%addr)
1480 //   SVMi  %2, %vm, 1
1481 //   STrii %2, 8(,%addr)
1482 //   ...
lowerStoreI1(SDValue Op,SelectionDAG & DAG)1483 static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {
1484   SDLoc DL(Op);
1485   StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1486   assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1487 
1488   SDValue BasePtr = StNode->getBasePtr();
1489   Align Alignment = StNode->getAlign();
1490   if (Alignment > 8)
1491     Alignment = Align(8);
1492   EVT AddrVT = BasePtr.getValueType();
1493   EVT MemVT = StNode->getMemoryVT();
1494   if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1495     SDValue OutChains[4];
1496     for (int i = 0; i < 4; ++i) {
1497       SDNode *V =
1498           DAG.getMachineNode(VE::SVMmi, DL, MVT::i64, StNode->getValue(),
1499                              DAG.getTargetConstant(i, DL, MVT::i64));
1500       SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1501                                  DAG.getConstant(8 * i, DL, AddrVT));
1502       OutChains[i] =
1503           DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
1504                        MachinePointerInfo(), Alignment,
1505                        StNode->isVolatile() ? MachineMemOperand::MOVolatile
1506                                             : MachineMemOperand::MONone);
1507     }
1508     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1509   } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1510     SDValue OutChains[8];
1511     for (int i = 0; i < 8; ++i) {
1512       SDNode *V =
1513           DAG.getMachineNode(VE::SVMyi, DL, MVT::i64, StNode->getValue(),
1514                              DAG.getTargetConstant(i, DL, MVT::i64));
1515       SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1516                                  DAG.getConstant(8 * i, DL, AddrVT));
1517       OutChains[i] =
1518           DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
1519                        MachinePointerInfo(), Alignment,
1520                        StNode->isVolatile() ? MachineMemOperand::MOVolatile
1521                                             : MachineMemOperand::MONone);
1522     }
1523     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1524   } else {
1525     // Otherwise, ask llvm to expand it.
1526     return SDValue();
1527   }
1528 }
1529 
lowerSTORE(SDValue Op,SelectionDAG & DAG) const1530 SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1531   StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
1532   assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1533   EVT MemVT = StNode->getMemoryVT();
1534 
1535   // If VPU is enabled, always expand non-mask vector stores to VVP
1536   if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(MemVT))
1537     return lowerToVVP(Op, DAG);
1538 
1539   SDValue BasePtr = StNode->getBasePtr();
1540   if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1541     // Do not expand store instruction with frame index here because of
1542     // dependency problems.  We expand it later in eliminateFrameIndex().
1543     return Op;
1544   }
1545 
1546   if (MemVT == MVT::f128)
1547     return lowerStoreF128(Op, DAG);
1548   if (isMaskType(MemVT))
1549     return lowerStoreI1(Op, DAG);
1550 
1551   // Otherwise, ask llvm to expand it.
1552   return SDValue();
1553 }
1554 
lowerVASTART(SDValue Op,SelectionDAG & DAG) const1555 SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1556   MachineFunction &MF = DAG.getMachineFunction();
1557   VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
1558   auto PtrVT = getPointerTy(DAG.getDataLayout());
1559 
1560   // Need frame address to find the address of VarArgsFrameIndex.
1561   MF.getFrameInfo().setFrameAddressIsTaken(true);
1562 
1563   // vastart just stores the address of the VarArgsFrameIndex slot into the
1564   // memory location argument.
1565   SDLoc DL(Op);
1566   SDValue Offset =
1567       DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
1568                   DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
1569   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1570   return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
1571                       MachinePointerInfo(SV));
1572 }
1573 
lowerVAARG(SDValue Op,SelectionDAG & DAG) const1574 SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
1575   SDNode *Node = Op.getNode();
1576   EVT VT = Node->getValueType(0);
1577   SDValue InChain = Node->getOperand(0);
1578   SDValue VAListPtr = Node->getOperand(1);
1579   EVT PtrVT = VAListPtr.getValueType();
1580   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
1581   SDLoc DL(Node);
1582   SDValue VAList =
1583       DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
1584   SDValue Chain = VAList.getValue(1);
1585   SDValue NextPtr;
1586 
1587   if (VT == MVT::f128) {
1588     // VE f128 values must be stored with 16 bytes alignment.  We don't
1589     // know the actual alignment of VAList, so we take alignment of it
1590     // dynamically.
1591     int Align = 16;
1592     VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1593                          DAG.getConstant(Align - 1, DL, PtrVT));
1594     VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
1595                          DAG.getSignedConstant(-Align, DL, PtrVT));
1596     // Increment the pointer, VAList, by 16 to the next vaarg.
1597     NextPtr =
1598         DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL));
1599   } else if (VT == MVT::f32) {
1600     // float --> need special handling like below.
1601     //    0      4
1602     //    +------+------+
1603     //    | empty| float|
1604     //    +------+------+
1605     // Increment the pointer, VAList, by 8 to the next vaarg.
1606     NextPtr =
1607         DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1608     // Then, adjust VAList.
1609     unsigned InternalOffset = 4;
1610     VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1611                          DAG.getConstant(InternalOffset, DL, PtrVT));
1612   } else {
1613     // Increment the pointer, VAList, by 8 to the next vaarg.
1614     NextPtr =
1615         DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1616   }
1617 
1618   // Store the incremented VAList to the legalized pointer.
1619   InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));
1620 
1621   // Load the actual argument out of the pointer VAList.
1622   // We can't count on greater alignment than the word size.
1623   return DAG.getLoad(
1624       VT, DL, InChain, VAList, MachinePointerInfo(),
1625       Align(std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8));
1626 }
1627 
lowerDYNAMIC_STACKALLOC(SDValue Op,SelectionDAG & DAG) const1628 SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
1629                                                   SelectionDAG &DAG) const {
1630   // Generate following code.
1631   //   (void)__llvm_grow_stack(size);
1632   //   ret = GETSTACKTOP;        // pseudo instruction
1633   SDLoc DL(Op);
1634 
1635   // Get the inputs.
1636   SDNode *Node = Op.getNode();
1637   SDValue Chain = Op.getOperand(0);
1638   SDValue Size = Op.getOperand(1);
1639   MaybeAlign Alignment(Op.getConstantOperandVal(2));
1640   EVT VT = Node->getValueType(0);
1641 
1642   // Chain the dynamic stack allocation so that it doesn't modify the stack
1643   // pointer when other instructions are using the stack.
1644   Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
1645 
1646   const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1647   Align StackAlign = TFI.getStackAlign();
1648   bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1649 
1650   // Prepare arguments
1651   TargetLowering::ArgListTy Args;
1652   TargetLowering::ArgListEntry Entry;
1653   Entry.Node = Size;
1654   Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1655   Args.push_back(Entry);
1656   if (NeedsAlign) {
1657     Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
1658     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1659     Args.push_back(Entry);
1660   }
1661   Type *RetTy = Type::getVoidTy(*DAG.getContext());
1662 
1663   EVT PtrVT = Op.getValueType();
1664   SDValue Callee;
1665   if (NeedsAlign) {
1666     Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
1667   } else {
1668     Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
1669   }
1670 
1671   TargetLowering::CallLoweringInfo CLI(DAG);
1672   CLI.setDebugLoc(DL)
1673       .setChain(Chain)
1674       .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
1675       .setDiscardResult(true);
1676   std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1677   Chain = pair.second;
1678   SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
1679   if (NeedsAlign) {
1680     Result = DAG.getNode(ISD::ADD, DL, VT, Result,
1681                          DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
1682     Result = DAG.getNode(ISD::AND, DL, VT, Result,
1683                          DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
1684   }
1685   //  Chain = Result.getValue(1);
1686   Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), DL);
1687 
1688   SDValue Ops[2] = {Result, Chain};
1689   return DAG.getMergeValues(Ops, DL);
1690 }
1691 
lowerEH_SJLJ_LONGJMP(SDValue Op,SelectionDAG & DAG) const1692 SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
1693                                                SelectionDAG &DAG) const {
1694   SDLoc DL(Op);
1695   return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),
1696                      Op.getOperand(1));
1697 }
1698 
lowerEH_SJLJ_SETJMP(SDValue Op,SelectionDAG & DAG) const1699 SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
1700                                               SelectionDAG &DAG) const {
1701   SDLoc DL(Op);
1702   return DAG.getNode(VEISD::EH_SJLJ_SETJMP, DL,
1703                      DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
1704                      Op.getOperand(1));
1705 }
1706 
lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,SelectionDAG & DAG) const1707 SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
1708                                                       SelectionDAG &DAG) const {
1709   SDLoc DL(Op);
1710   return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
1711                      Op.getOperand(0));
1712 }
1713 
lowerFRAMEADDR(SDValue Op,SelectionDAG & DAG,const VETargetLowering & TLI,const VESubtarget * Subtarget)1714 static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
1715                               const VETargetLowering &TLI,
1716                               const VESubtarget *Subtarget) {
1717   SDLoc DL(Op);
1718   MachineFunction &MF = DAG.getMachineFunction();
1719   EVT PtrVT = TLI.getPointerTy(MF.getDataLayout());
1720 
1721   MachineFrameInfo &MFI = MF.getFrameInfo();
1722   MFI.setFrameAddressIsTaken(true);
1723 
1724   unsigned Depth = Op.getConstantOperandVal(0);
1725   const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1726   Register FrameReg = RegInfo->getFrameRegister(MF);
1727   SDValue FrameAddr =
1728       DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, PtrVT);
1729   while (Depth--)
1730     FrameAddr = DAG.getLoad(Op.getValueType(), DL, DAG.getEntryNode(),
1731                             FrameAddr, MachinePointerInfo());
1732   return FrameAddr;
1733 }
1734 
lowerRETURNADDR(SDValue Op,SelectionDAG & DAG,const VETargetLowering & TLI,const VESubtarget * Subtarget)1735 static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
1736                                const VETargetLowering &TLI,
1737                                const VESubtarget *Subtarget) {
1738   MachineFunction &MF = DAG.getMachineFunction();
1739   MachineFrameInfo &MFI = MF.getFrameInfo();
1740   MFI.setReturnAddressIsTaken(true);
1741 
1742   SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1743 
1744   SDLoc DL(Op);
1745   EVT VT = Op.getValueType();
1746   SDValue Offset = DAG.getConstant(8, DL, VT);
1747   return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1748                      DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1749                      MachinePointerInfo());
1750 }
1751 
lowerINTRINSIC_WO_CHAIN(SDValue Op,SelectionDAG & DAG) const1752 SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1753                                                   SelectionDAG &DAG) const {
1754   SDLoc DL(Op);
1755   unsigned IntNo = Op.getConstantOperandVal(0);
1756   switch (IntNo) {
1757   default: // Don't custom lower most intrinsics.
1758     return SDValue();
1759   case Intrinsic::eh_sjlj_lsda: {
1760     MachineFunction &MF = DAG.getMachineFunction();
1761     MVT VT = Op.getSimpleValueType();
1762     const VETargetMachine *TM =
1763         static_cast<const VETargetMachine *>(&DAG.getTarget());
1764 
1765     // Create GCC_except_tableXX string.  The real symbol for that will be
1766     // generated in EHStreamer::emitExceptionTable() later.  So, we just
1767     // borrow it's name here.
1768     TM->getStrList()->push_back(std::string(
1769         (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));
1770     SDValue Addr =
1771         DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), VT, 0);
1772     if (isPositionIndependent()) {
1773       Addr = makeHiLoPair(Addr, VE::S_GOTOFF_HI32, VE::S_GOTOFF_LO32, DAG);
1774       SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT);
1775       return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Addr);
1776     }
1777     return makeHiLoPair(Addr, VE::S_HI32, VE::S_LO32, DAG);
1778   }
1779   }
1780 }
1781 
getUniqueInsertion(SDNode * N,unsigned & UniqueIdx)1782 static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
1783   if (!isa<BuildVectorSDNode>(N))
1784     return false;
1785   const auto *BVN = cast<BuildVectorSDNode>(N);
1786 
1787   // Find first non-undef insertion.
1788   unsigned Idx;
1789   for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
1790     auto ElemV = BVN->getOperand(Idx);
1791     if (!ElemV->isUndef())
1792       break;
1793   }
1794   // Catch the (hypothetical) all-undef case.
1795   if (Idx == BVN->getNumOperands())
1796     return false;
1797   // Remember insertion.
1798   UniqueIdx = Idx++;
1799   // Verify that all other insertions are undef.
1800   for (; Idx < BVN->getNumOperands(); ++Idx) {
1801     auto ElemV = BVN->getOperand(Idx);
1802     if (!ElemV->isUndef())
1803       return false;
1804   }
1805   return true;
1806 }
1807 
getSplatValue(SDNode * N)1808 static SDValue getSplatValue(SDNode *N) {
1809   if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) {
1810     return BuildVec->getSplatValue();
1811   }
1812   return SDValue();
1813 }
1814 
lowerBUILD_VECTOR(SDValue Op,SelectionDAG & DAG) const1815 SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
1816                                             SelectionDAG &DAG) const {
1817   VECustomDAG CDAG(DAG, Op);
1818   MVT ResultVT = Op.getSimpleValueType();
1819 
1820   // If there is just one element, expand to INSERT_VECTOR_ELT.
1821   unsigned UniqueIdx;
1822   if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {
1823     SDValue AccuV = CDAG.getUNDEF(Op.getValueType());
1824     auto ElemV = Op->getOperand(UniqueIdx);
1825     SDValue IdxV = CDAG.getConstant(UniqueIdx, MVT::i64);
1826     return CDAG.getNode(ISD::INSERT_VECTOR_ELT, ResultVT, {AccuV, ElemV, IdxV});
1827   }
1828 
1829   // Else emit a broadcast.
1830   if (SDValue ScalarV = getSplatValue(Op.getNode())) {
1831     unsigned NumEls = ResultVT.getVectorNumElements();
1832     auto AVL = CDAG.getConstant(NumEls, MVT::i32);
1833     return CDAG.getBroadcast(ResultVT, ScalarV, AVL);
1834   }
1835 
1836   // Expand
1837   return SDValue();
1838 }
1839 
1840 TargetLowering::LegalizeAction
getCustomOperationAction(SDNode & Op) const1841 VETargetLowering::getCustomOperationAction(SDNode &Op) const {
1842   // Custom legalization on VVP_* and VEC_* opcodes is required to pack-legalize
1843   // these operations (transform nodes such that their AVL parameter refers to
1844   // packs of 64bit, instead of number of elements.
1845 
1846   // Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
1847   // re-visit them.
1848   if (isPackingSupportOpcode(Op.getOpcode()))
1849     return Legal;
1850 
1851   // Custom lower to legalize AVL for packed mode.
1852   if (isVVPOrVEC(Op.getOpcode()))
1853     return Custom;
1854   return Legal;
1855 }
1856 
LowerOperation(SDValue Op,SelectionDAG & DAG) const1857 SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1858   LLVM_DEBUG(dbgs() << "::LowerOperation "; Op.dump(&DAG));
1859   unsigned Opcode = Op.getOpcode();
1860 
1861   /// Scalar isel.
1862   switch (Opcode) {
1863   case ISD::ATOMIC_FENCE:
1864     return lowerATOMIC_FENCE(Op, DAG);
1865   case ISD::ATOMIC_SWAP:
1866     return lowerATOMIC_SWAP(Op, DAG);
1867   case ISD::BlockAddress:
1868     return lowerBlockAddress(Op, DAG);
1869   case ISD::ConstantPool:
1870     return lowerConstantPool(Op, DAG);
1871   case ISD::DYNAMIC_STACKALLOC:
1872     return lowerDYNAMIC_STACKALLOC(Op, DAG);
1873   case ISD::EH_SJLJ_LONGJMP:
1874     return lowerEH_SJLJ_LONGJMP(Op, DAG);
1875   case ISD::EH_SJLJ_SETJMP:
1876     return lowerEH_SJLJ_SETJMP(Op, DAG);
1877   case ISD::EH_SJLJ_SETUP_DISPATCH:
1878     return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1879   case ISD::FRAMEADDR:
1880     return lowerFRAMEADDR(Op, DAG, *this, Subtarget);
1881   case ISD::GlobalAddress:
1882     return lowerGlobalAddress(Op, DAG);
1883   case ISD::GlobalTLSAddress:
1884     return lowerGlobalTLSAddress(Op, DAG);
1885   case ISD::INTRINSIC_WO_CHAIN:
1886     return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1887   case ISD::JumpTable:
1888     return lowerJumpTable(Op, DAG);
1889   case ISD::LOAD:
1890     return lowerLOAD(Op, DAG);
1891   case ISD::RETURNADDR:
1892     return lowerRETURNADDR(Op, DAG, *this, Subtarget);
1893   case ISD::BUILD_VECTOR:
1894     return lowerBUILD_VECTOR(Op, DAG);
1895   case ISD::STORE:
1896     return lowerSTORE(Op, DAG);
1897   case ISD::VASTART:
1898     return lowerVASTART(Op, DAG);
1899   case ISD::VAARG:
1900     return lowerVAARG(Op, DAG);
1901 
1902   case ISD::INSERT_VECTOR_ELT:
1903     return lowerINSERT_VECTOR_ELT(Op, DAG);
1904   case ISD::EXTRACT_VECTOR_ELT:
1905     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1906   }
1907 
1908   /// Vector isel.
1909   if (ISD::isVPOpcode(Opcode))
1910     return lowerToVVP(Op, DAG);
1911 
1912   switch (Opcode) {
1913   default:
1914     llvm_unreachable("Should not custom lower this!");
1915 
1916   // Legalize the AVL of this internal node.
1917   case VEISD::VEC_BROADCAST:
1918 #define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
1919 #include "VVPNodes.def"
1920     // AVL already legalized.
1921     if (getAnnotatedNodeAVL(Op).second)
1922       return Op;
1923     return legalizeInternalVectorOp(Op, DAG);
1924 
1925     // Translate into a VEC_*/VVP_* layer operation.
1926   case ISD::MLOAD:
1927   case ISD::MSTORE:
1928 #define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1929 #include "VVPNodes.def"
1930     if (isMaskArithmetic(Op) && isPackedVectorType(Op.getValueType()))
1931       return splitMaskArithmetic(Op, DAG);
1932     return lowerToVVP(Op, DAG);
1933   }
1934 }
1935 /// } Custom Lower
1936 
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const1937 void VETargetLowering::ReplaceNodeResults(SDNode *N,
1938                                           SmallVectorImpl<SDValue> &Results,
1939                                           SelectionDAG &DAG) const {
1940   switch (N->getOpcode()) {
1941   case ISD::ATOMIC_SWAP:
1942     // Let LLVM expand atomic swap instruction through LowerOperation.
1943     return;
1944   default:
1945     LLVM_DEBUG(N->dumpr(&DAG));
1946     llvm_unreachable("Do not know how to custom type legalize this operation!");
1947   }
1948 }
1949 
1950 /// JumpTable for VE.
1951 ///
1952 ///   VE cannot generate relocatable symbol in jump table.  VE cannot
1953 ///   generate expressions using symbols in both text segment and data
1954 ///   segment like below.
1955 ///             .4byte  .LBB0_2-.LJTI0_0
1956 ///   So, we generate offset from the top of function like below as
1957 ///   a custom label.
1958 ///             .4byte  .LBB0_2-<function name>
1959 
getJumpTableEncoding() const1960 unsigned VETargetLowering::getJumpTableEncoding() const {
1961   // Use custom label for PIC.
1962   if (isPositionIndependent())
1963     return MachineJumpTableInfo::EK_Custom32;
1964 
1965   // Otherwise, use the normal jump table encoding heuristics.
1966   return TargetLowering::getJumpTableEncoding();
1967 }
1968 
LowerCustomJumpTableEntry(const MachineJumpTableInfo * MJTI,const MachineBasicBlock * MBB,unsigned Uid,MCContext & Ctx) const1969 const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
1970     const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
1971     unsigned Uid, MCContext &Ctx) const {
1972   assert(isPositionIndependent());
1973 
1974   // Generate custom label for PIC like below.
1975   //    .4bytes  .LBB0_2-<function name>
1976   const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
1977   MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data());
1978   const auto *Base = MCSymbolRefExpr::create(Sym, Ctx);
1979   return MCBinaryExpr::createSub(Value, Base, Ctx);
1980 }
1981 
getPICJumpTableRelocBase(SDValue Table,SelectionDAG & DAG) const1982 SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
1983                                                    SelectionDAG &DAG) const {
1984   assert(isPositionIndependent());
1985   SDLoc DL(Table);
1986   Function *Function = &DAG.getMachineFunction().getFunction();
1987   assert(Function != nullptr);
1988   auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace());
1989 
1990   // In the jump table, we have following values in PIC mode.
1991   //    .4bytes  .LBB0_2-<function name>
1992   // We need to add this value and the address of this function to generate
1993   // .LBB0_2 label correctly under PIC mode.  So, we want to generate following
1994   // instructions:
1995   //     lea %reg, fun@gotoff_lo
1996   //     and %reg, %reg, (32)0
1997   //     lea.sl %reg, fun@gotoff_hi(%reg, %got)
1998   // In order to do so, we need to genarate correctly marked DAG node using
1999   // makeHiLoPair.
2000   SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy);
2001   SDValue HiLo = makeHiLoPair(Op, VE::S_GOTOFF_HI32, VE::S_GOTOFF_LO32, DAG);
2002   SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy);
2003   return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo);
2004 }
2005 
prepareMBB(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,MachineBasicBlock * TargetBB,const DebugLoc & DL) const2006 Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
2007                                       MachineBasicBlock::iterator I,
2008                                       MachineBasicBlock *TargetBB,
2009                                       const DebugLoc &DL) const {
2010   MachineFunction *MF = MBB.getParent();
2011   MachineRegisterInfo &MRI = MF->getRegInfo();
2012   const VEInstrInfo *TII = Subtarget->getInstrInfo();
2013 
2014   const TargetRegisterClass *RC = &VE::I64RegClass;
2015   Register Tmp1 = MRI.createVirtualRegister(RC);
2016   Register Tmp2 = MRI.createVirtualRegister(RC);
2017   Register Result = MRI.createVirtualRegister(RC);
2018 
2019   if (isPositionIndependent()) {
2020     // Create following instructions for local linkage PIC code.
2021     //     lea %Tmp1, TargetBB@gotoff_lo
2022     //     and %Tmp2, %Tmp1, (32)0
2023     //     lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2024     BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2025         .addImm(0)
2026         .addImm(0)
2027         .addMBB(TargetBB, VE::S_GOTOFF_LO32);
2028     BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2029         .addReg(Tmp1, getKillRegState(true))
2030         .addImm(M0(32));
2031     BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2032         .addReg(VE::SX15)
2033         .addReg(Tmp2, getKillRegState(true))
2034         .addMBB(TargetBB, VE::S_GOTOFF_HI32);
2035   } else {
2036     // Create following instructions for non-PIC code.
2037     //     lea     %Tmp1, TargetBB@lo
2038     //     and     %Tmp2, %Tmp1, (32)0
2039     //     lea.sl  %Result, TargetBB@hi(%Tmp2)
2040     BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2041         .addImm(0)
2042         .addImm(0)
2043         .addMBB(TargetBB, VE::S_LO32);
2044     BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2045         .addReg(Tmp1, getKillRegState(true))
2046         .addImm(M0(32));
2047     BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2048         .addReg(Tmp2, getKillRegState(true))
2049         .addImm(0)
2050         .addMBB(TargetBB, VE::S_HI32);
2051   }
2052   return Result;
2053 }
2054 
prepareSymbol(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,StringRef Symbol,const DebugLoc & DL,bool IsLocal=false,bool IsCall=false) const2055 Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
2056                                          MachineBasicBlock::iterator I,
2057                                          StringRef Symbol, const DebugLoc &DL,
2058                                          bool IsLocal = false,
2059                                          bool IsCall = false) const {
2060   MachineFunction *MF = MBB.getParent();
2061   MachineRegisterInfo &MRI = MF->getRegInfo();
2062   const VEInstrInfo *TII = Subtarget->getInstrInfo();
2063 
2064   const TargetRegisterClass *RC = &VE::I64RegClass;
2065   Register Result = MRI.createVirtualRegister(RC);
2066 
2067   if (isPositionIndependent()) {
2068     if (IsCall && !IsLocal) {
2069       // Create following instructions for non-local linkage PIC code function
2070       // calls.  These instructions uses IC and magic number -24, so we expand
2071       // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
2072       //     lea %Reg, Symbol@plt_lo(-24)
2073       //     and %Reg, %Reg, (32)0
2074       //     sic %s16
2075       //     lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
2076       BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result)
2077           .addExternalSymbol("abort");
2078     } else if (IsLocal) {
2079       Register Tmp1 = MRI.createVirtualRegister(RC);
2080       Register Tmp2 = MRI.createVirtualRegister(RC);
2081       // Create following instructions for local linkage PIC code.
2082       //     lea %Tmp1, Symbol@gotoff_lo
2083       //     and %Tmp2, %Tmp1, (32)0
2084       //     lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2085       BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2086           .addImm(0)
2087           .addImm(0)
2088           .addExternalSymbol(Symbol.data(), VE::S_GOTOFF_LO32);
2089       BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2090           .addReg(Tmp1, getKillRegState(true))
2091           .addImm(M0(32));
2092       BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2093           .addReg(VE::SX15)
2094           .addReg(Tmp2, getKillRegState(true))
2095           .addExternalSymbol(Symbol.data(), VE::S_GOTOFF_HI32);
2096     } else {
2097       Register Tmp1 = MRI.createVirtualRegister(RC);
2098       Register Tmp2 = MRI.createVirtualRegister(RC);
2099       // Create following instructions for not local linkage PIC code.
2100       //     lea %Tmp1, Symbol@got_lo
2101       //     and %Tmp2, %Tmp1, (32)0
2102       //     lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2103       //     ld %Result, 0(%Tmp3)
2104       Register Tmp3 = MRI.createVirtualRegister(RC);
2105       BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2106           .addImm(0)
2107           .addImm(0)
2108           .addExternalSymbol(Symbol.data(), VE::S_GOT_LO32);
2109       BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2110           .addReg(Tmp1, getKillRegState(true))
2111           .addImm(M0(32));
2112       BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3)
2113           .addReg(VE::SX15)
2114           .addReg(Tmp2, getKillRegState(true))
2115           .addExternalSymbol(Symbol.data(), VE::S_GOT_HI32);
2116       BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result)
2117           .addReg(Tmp3, getKillRegState(true))
2118           .addImm(0)
2119           .addImm(0);
2120     }
2121   } else {
2122     Register Tmp1 = MRI.createVirtualRegister(RC);
2123     Register Tmp2 = MRI.createVirtualRegister(RC);
2124     // Create following instructions for non-PIC code.
2125     //     lea     %Tmp1, Symbol@lo
2126     //     and     %Tmp2, %Tmp1, (32)0
2127     //     lea.sl  %Result, Symbol@hi(%Tmp2)
2128     BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2129         .addImm(0)
2130         .addImm(0)
2131         .addExternalSymbol(Symbol.data(), VE::S_LO32);
2132     BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2133         .addReg(Tmp1, getKillRegState(true))
2134         .addImm(M0(32));
2135     BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2136         .addReg(Tmp2, getKillRegState(true))
2137         .addImm(0)
2138         .addExternalSymbol(Symbol.data(), VE::S_HI32);
2139   }
2140   return Result;
2141 }
2142 
setupEntryBlockForSjLj(MachineInstr & MI,MachineBasicBlock * MBB,MachineBasicBlock * DispatchBB,int FI,int Offset) const2143 void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
2144                                               MachineBasicBlock *MBB,
2145                                               MachineBasicBlock *DispatchBB,
2146                                               int FI, int Offset) const {
2147   DebugLoc DL = MI.getDebugLoc();
2148   const VEInstrInfo *TII = Subtarget->getInstrInfo();
2149 
2150   Register LabelReg =
2151       prepareMBB(*MBB, MachineBasicBlock::iterator(MI), DispatchBB, DL);
2152 
2153   // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
2154   // referenced by longjmp (throw) later.
2155   MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2156   addFrameReference(MIB, FI, Offset); // jmpbuf[1]
2157   MIB.addReg(LabelReg, getKillRegState(true));
2158 }
2159 
2160 MachineBasicBlock *
emitEHSjLjSetJmp(MachineInstr & MI,MachineBasicBlock * MBB) const2161 VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
2162                                    MachineBasicBlock *MBB) const {
2163   DebugLoc DL = MI.getDebugLoc();
2164   MachineFunction *MF = MBB->getParent();
2165   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2166   const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
2167   MachineRegisterInfo &MRI = MF->getRegInfo();
2168 
2169   const BasicBlock *BB = MBB->getBasicBlock();
2170   MachineFunction::iterator I = ++MBB->getIterator();
2171 
2172   // Memory Reference.
2173   SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands());
2174   Register BufReg = MI.getOperand(1).getReg();
2175 
2176   Register DstReg;
2177 
2178   DstReg = MI.getOperand(0).getReg();
2179   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
2180   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
2181   (void)TRI;
2182   Register MainDestReg = MRI.createVirtualRegister(RC);
2183   Register RestoreDestReg = MRI.createVirtualRegister(RC);
2184 
2185   // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
2186   // instructions.  SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
2187   //
2188   // ThisMBB:
2189   //   buf[3] = %s17 iff %s17 is used as BP
2190   //   buf[1] = RestoreMBB as IC after longjmp
2191   //   # SjLjSetup RestoreMBB
2192   //
2193   // MainMBB:
2194   //   v_main = 0
2195   //
2196   // SinkMBB:
2197   //   v = phi(v_main, MainMBB, v_restore, RestoreMBB)
2198   //   ...
2199   //
2200   // RestoreMBB:
2201   //   %s17 = buf[3] = iff %s17 is used as BP
2202   //   v_restore = 1
2203   //   goto SinkMBB
2204 
2205   MachineBasicBlock *ThisMBB = MBB;
2206   MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2207   MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2208   MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2209   MF->insert(I, MainMBB);
2210   MF->insert(I, SinkMBB);
2211   MF->push_back(RestoreMBB);
2212   RestoreMBB->setMachineBlockAddressTaken();
2213 
2214   // Transfer the remainder of BB and its successor edges to SinkMBB.
2215   SinkMBB->splice(SinkMBB->begin(), MBB,
2216                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
2217   SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
2218 
2219   // ThisMBB:
2220   Register LabelReg =
2221       prepareMBB(*MBB, MachineBasicBlock::iterator(MI), RestoreMBB, DL);
2222 
2223   // Store BP in buf[3] iff this function is using BP.
2224   const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2225   if (TFI->hasBP(*MF)) {
2226     MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2227     MIB.addReg(BufReg);
2228     MIB.addImm(0);
2229     MIB.addImm(24);
2230     MIB.addReg(VE::SX17);
2231     MIB.setMemRefs(MMOs);
2232   }
2233 
2234   // Store IP in buf[1].
2235   MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2236   MIB.add(MI.getOperand(1)); // we can preserve the kill flags here.
2237   MIB.addImm(0);
2238   MIB.addImm(8);
2239   MIB.addReg(LabelReg, getKillRegState(true));
2240   MIB.setMemRefs(MMOs);
2241 
2242   // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2243 
2244   // Insert setup.
2245   MIB =
2246       BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
2247 
2248   const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2249   MIB.addRegMask(RegInfo->getNoPreservedMask());
2250   ThisMBB->addSuccessor(MainMBB);
2251   ThisMBB->addSuccessor(RestoreMBB);
2252 
2253   // MainMBB:
2254   BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
2255       .addImm(0)
2256       .addImm(0)
2257       .addImm(0);
2258   MainMBB->addSuccessor(SinkMBB);
2259 
2260   // SinkMBB:
2261   BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
2262       .addReg(MainDestReg)
2263       .addMBB(MainMBB)
2264       .addReg(RestoreDestReg)
2265       .addMBB(RestoreMBB);
2266 
2267   // RestoreMBB:
2268   // Restore BP from buf[3] iff this function is using BP.  The address of
2269   // buf is in SX10.
2270   // FIXME: Better to not use SX10 here
2271   if (TFI->hasBP(*MF)) {
2272     MachineInstrBuilder MIB =
2273         BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
2274     MIB.addReg(VE::SX10);
2275     MIB.addImm(0);
2276     MIB.addImm(24);
2277     MIB.setMemRefs(MMOs);
2278   }
2279   BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
2280       .addImm(0)
2281       .addImm(0)
2282       .addImm(1);
2283   BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
2284   RestoreMBB->addSuccessor(SinkMBB);
2285 
2286   MI.eraseFromParent();
2287   return SinkMBB;
2288 }
2289 
2290 MachineBasicBlock *
emitEHSjLjLongJmp(MachineInstr & MI,MachineBasicBlock * MBB) const2291 VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
2292                                     MachineBasicBlock *MBB) const {
2293   DebugLoc DL = MI.getDebugLoc();
2294   MachineFunction *MF = MBB->getParent();
2295   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2296   MachineRegisterInfo &MRI = MF->getRegInfo();
2297 
2298   // Memory Reference.
2299   SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands());
2300   Register BufReg = MI.getOperand(0).getReg();
2301 
2302   Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass);
2303   // Since FP is only updated here but NOT referenced, it's treated as GPR.
2304   Register FP = VE::SX9;
2305   Register SP = VE::SX11;
2306 
2307   MachineInstrBuilder MIB;
2308 
2309   MachineBasicBlock *ThisMBB = MBB;
2310 
2311   // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2312   //
2313   // ThisMBB:
2314   //   %fp = load buf[0]
2315   //   %jmp = load buf[1]
2316   //   %s10 = buf        ; Store an address of buf to SX10 for RestoreMBB
2317   //   %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2318   //   jmp %jmp
2319 
2320   // Reload FP.
2321   MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP);
2322   MIB.addReg(BufReg);
2323   MIB.addImm(0);
2324   MIB.addImm(0);
2325   MIB.setMemRefs(MMOs);
2326 
2327   // Reload IP.
2328   MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp);
2329   MIB.addReg(BufReg);
2330   MIB.addImm(0);
2331   MIB.addImm(8);
2332   MIB.setMemRefs(MMOs);
2333 
2334   // Copy BufReg to SX10 for later use in setjmp.
2335   // FIXME: Better to not use SX10 here
2336   BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10)
2337       .addReg(BufReg)
2338       .addImm(0);
2339 
2340   // Reload SP.
2341   MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP);
2342   MIB.add(MI.getOperand(0)); // we can preserve the kill flags here.
2343   MIB.addImm(0);
2344   MIB.addImm(16);
2345   MIB.setMemRefs(MMOs);
2346 
2347   // Jump.
2348   BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t))
2349       .addReg(Tmp, getKillRegState(true))
2350       .addImm(0);
2351 
2352   MI.eraseFromParent();
2353   return ThisMBB;
2354 }
2355 
2356 MachineBasicBlock *
emitSjLjDispatchBlock(MachineInstr & MI,MachineBasicBlock * BB) const2357 VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
2358                                         MachineBasicBlock *BB) const {
2359   DebugLoc DL = MI.getDebugLoc();
2360   MachineFunction *MF = BB->getParent();
2361   MachineFrameInfo &MFI = MF->getFrameInfo();
2362   MachineRegisterInfo &MRI = MF->getRegInfo();
2363   const VEInstrInfo *TII = Subtarget->getInstrInfo();
2364   int FI = MFI.getFunctionContextIndex();
2365 
2366   // Get a mapping of the call site numbers to all of the landing pads they're
2367   // associated with.
2368   DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
2369   unsigned MaxCSNum = 0;
2370   for (auto &MBB : *MF) {
2371     if (!MBB.isEHPad())
2372       continue;
2373 
2374     MCSymbol *Sym = nullptr;
2375     for (const auto &MI : MBB) {
2376       if (MI.isDebugInstr())
2377         continue;
2378 
2379       assert(MI.isEHLabel() && "expected EH_LABEL");
2380       Sym = MI.getOperand(0).getMCSymbol();
2381       break;
2382     }
2383 
2384     if (!MF->hasCallSiteLandingPad(Sym))
2385       continue;
2386 
2387     for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2388       CallSiteNumToLPad[CSI].push_back(&MBB);
2389       MaxCSNum = std::max(MaxCSNum, CSI);
2390     }
2391   }
2392 
2393   // Get an ordered list of the machine basic blocks for the jump table.
2394   std::vector<MachineBasicBlock *> LPadList;
2395   SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
2396   LPadList.reserve(CallSiteNumToLPad.size());
2397 
2398   for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
2399     for (auto &LP : CallSiteNumToLPad[CSI]) {
2400       LPadList.push_back(LP);
2401       InvokeBBs.insert_range(LP->predecessors());
2402     }
2403   }
2404 
2405   assert(!LPadList.empty() &&
2406          "No landing pad destinations for the dispatch jump table!");
2407 
2408   // The %fn_context is allocated like below (from --print-after=sjljehprepare):
2409   //   %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
2410   //
2411   // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
2412   // First `i64` is callsite, so callsite is FI+8.
2413   static const int OffsetIC = 72;
2414   static const int OffsetCS = 8;
2415 
2416   // Create the MBBs for the dispatch code like following:
2417   //
2418   // ThisMBB:
2419   //   Prepare DispatchBB address and store it to buf[1].
2420   //   ...
2421   //
2422   // DispatchBB:
2423   //   %s15 = GETGOT iff isPositionIndependent
2424   //   %callsite = load callsite
2425   //   brgt.l.t #size of callsites, %callsite, DispContBB
2426   //
2427   // TrapBB:
2428   //   Call abort.
2429   //
2430   // DispContBB:
2431   //   %breg = address of jump table
2432   //   %pc = load and calculate next pc from %breg and %callsite
2433   //   jmp %pc
2434 
2435   // Shove the dispatch's address into the return slot in the function context.
2436   MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2437   DispatchBB->setIsEHPad(true);
2438 
2439   // Trap BB will causes trap like `assert(0)`.
2440   MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
2441   DispatchBB->addSuccessor(TrapBB);
2442 
2443   MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2444   DispatchBB->addSuccessor(DispContBB);
2445 
2446   // Insert MBBs.
2447   MF->push_back(DispatchBB);
2448   MF->push_back(DispContBB);
2449   MF->push_back(TrapBB);
2450 
2451   // Insert code to call abort in the TrapBB.
2452   Register Abort = prepareSymbol(*TrapBB, TrapBB->end(), "abort", DL,
2453                                  /* Local */ false, /* Call */ true);
2454   BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10)
2455       .addReg(Abort, getKillRegState(true))
2456       .addImm(0)
2457       .addImm(0);
2458 
2459   // Insert code into the entry block that creates and registers the function
2460   // context.
2461   setupEntryBlockForSjLj(MI, BB, DispatchBB, FI, OffsetIC);
2462 
2463   // Create the jump table and associated information
2464   unsigned JTE = getJumpTableEncoding();
2465   MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE);
2466   unsigned MJTI = JTI->createJumpTableIndex(LPadList);
2467 
2468   const VERegisterInfo &RI = TII->getRegisterInfo();
2469   // Add a register mask with no preserved registers.  This results in all
2470   // registers being marked as clobbered.
2471   BuildMI(DispatchBB, DL, TII->get(VE::NOP))
2472       .addRegMask(RI.getNoPreservedMask());
2473 
2474   if (isPositionIndependent()) {
2475     // Force to generate GETGOT, since current implementation doesn't store GOT
2476     // register.
2477     BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15);
2478   }
2479 
2480   // IReg is used as an index in a memory operand and therefore can't be SP
2481   const TargetRegisterClass *RC = &VE::I64RegClass;
2482   Register IReg = MRI.createVirtualRegister(RC);
2483   addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI,
2484                     OffsetCS);
2485   if (LPadList.size() < 64) {
2486     BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t))
2487         .addImm(VECC::CC_ILE)
2488         .addImm(LPadList.size())
2489         .addReg(IReg)
2490         .addMBB(TrapBB);
2491   } else {
2492     assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
2493     Register TmpReg = MRI.createVirtualRegister(RC);
2494     BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg)
2495         .addImm(0)
2496         .addImm(0)
2497         .addImm(LPadList.size());
2498     BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t))
2499         .addImm(VECC::CC_ILE)
2500         .addReg(TmpReg, getKillRegState(true))
2501         .addReg(IReg)
2502         .addMBB(TrapBB);
2503   }
2504 
2505   Register BReg = MRI.createVirtualRegister(RC);
2506   Register Tmp1 = MRI.createVirtualRegister(RC);
2507   Register Tmp2 = MRI.createVirtualRegister(RC);
2508 
2509   if (isPositionIndependent()) {
2510     // Create following instructions for local linkage PIC code.
2511     //     lea    %Tmp1, .LJTI0_0@gotoff_lo
2512     //     and    %Tmp2, %Tmp1, (32)0
2513     //     lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2514     BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2515         .addImm(0)
2516         .addImm(0)
2517         .addJumpTableIndex(MJTI, VE::S_GOTOFF_LO32);
2518     BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2519         .addReg(Tmp1, getKillRegState(true))
2520         .addImm(M0(32));
2521     BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg)
2522         .addReg(VE::SX15)
2523         .addReg(Tmp2, getKillRegState(true))
2524         .addJumpTableIndex(MJTI, VE::S_GOTOFF_HI32);
2525   } else {
2526     // Create following instructions for non-PIC code.
2527     //     lea     %Tmp1, .LJTI0_0@lo
2528     //     and     %Tmp2, %Tmp1, (32)0
2529     //     lea.sl  %BReg, .LJTI0_0@hi(%Tmp2)
2530     BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2531         .addImm(0)
2532         .addImm(0)
2533         .addJumpTableIndex(MJTI, VE::S_LO32);
2534     BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2535         .addReg(Tmp1, getKillRegState(true))
2536         .addImm(M0(32));
2537     BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg)
2538         .addReg(Tmp2, getKillRegState(true))
2539         .addImm(0)
2540         .addJumpTableIndex(MJTI, VE::S_HI32);
2541   }
2542 
2543   switch (JTE) {
2544   case MachineJumpTableInfo::EK_BlockAddress: {
2545     // Generate simple block address code for no-PIC model.
2546     //     sll %Tmp1, %IReg, 3
2547     //     lds %TReg, 0(%Tmp1, %BReg)
2548     //     bcfla %TReg
2549 
2550     Register TReg = MRI.createVirtualRegister(RC);
2551     Register Tmp1 = MRI.createVirtualRegister(RC);
2552 
2553     BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2554         .addReg(IReg, getKillRegState(true))
2555         .addImm(3);
2556     BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg)
2557         .addReg(BReg, getKillRegState(true))
2558         .addReg(Tmp1, getKillRegState(true))
2559         .addImm(0);
2560     BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2561         .addReg(TReg, getKillRegState(true))
2562         .addImm(0);
2563     break;
2564   }
2565   case MachineJumpTableInfo::EK_Custom32: {
2566     // Generate block address code using differences from the function pointer
2567     // for PIC model.
2568     //     sll %Tmp1, %IReg, 2
2569     //     ldl.zx %OReg, 0(%Tmp1, %BReg)
2570     //     Prepare function address in BReg2.
2571     //     adds.l %TReg, %BReg2, %OReg
2572     //     bcfla %TReg
2573 
2574     assert(isPositionIndependent());
2575     Register OReg = MRI.createVirtualRegister(RC);
2576     Register TReg = MRI.createVirtualRegister(RC);
2577     Register Tmp1 = MRI.createVirtualRegister(RC);
2578 
2579     BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2580         .addReg(IReg, getKillRegState(true))
2581         .addImm(2);
2582     BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg)
2583         .addReg(BReg, getKillRegState(true))
2584         .addReg(Tmp1, getKillRegState(true))
2585         .addImm(0);
2586     Register BReg2 =
2587         prepareSymbol(*DispContBB, DispContBB->end(),
2588                       DispContBB->getParent()->getName(), DL, /* Local */ true);
2589     BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg)
2590         .addReg(OReg, getKillRegState(true))
2591         .addReg(BReg2, getKillRegState(true));
2592     BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2593         .addReg(TReg, getKillRegState(true))
2594         .addImm(0);
2595     break;
2596   }
2597   default:
2598     llvm_unreachable("Unexpected jump table encoding");
2599   }
2600 
2601   // Add the jump table entries as successors to the MBB.
2602   SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
2603   for (auto &LP : LPadList)
2604     if (SeenMBBs.insert(LP).second)
2605       DispContBB->addSuccessor(LP);
2606 
2607   // N.B. the order the invoke BBs are processed in doesn't matter here.
2608   SmallVector<MachineBasicBlock *, 64> MBBLPads;
2609   const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2610   for (MachineBasicBlock *MBB : InvokeBBs) {
2611     // Remove the landing pad successor from the invoke block and replace it
2612     // with the new dispatch block.
2613     // Keep a copy of Successors since it's modified inside the loop.
2614     SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
2615                                                    MBB->succ_rend());
2616     // FIXME: Avoid quadratic complexity.
2617     for (auto *MBBS : Successors) {
2618       if (MBBS->isEHPad()) {
2619         MBB->removeSuccessor(MBBS);
2620         MBBLPads.push_back(MBBS);
2621       }
2622     }
2623 
2624     MBB->addSuccessor(DispatchBB);
2625 
2626     // Find the invoke call and mark all of the callee-saved registers as
2627     // 'implicit defined' so that they're spilled.  This prevents code from
2628     // moving instructions to before the EH block, where they will never be
2629     // executed.
2630     for (auto &II : reverse(*MBB)) {
2631       if (!II.isCall())
2632         continue;
2633 
2634       DenseSet<Register> DefRegs;
2635       for (auto &MOp : II.operands())
2636         if (MOp.isReg())
2637           DefRegs.insert(MOp.getReg());
2638 
2639       MachineInstrBuilder MIB(*MF, &II);
2640       for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
2641         Register Reg = SavedRegs[RI];
2642         if (!DefRegs.contains(Reg))
2643           MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
2644       }
2645 
2646       break;
2647     }
2648   }
2649 
2650   // Mark all former landing pads as non-landing pads.  The dispatch is the only
2651   // landing pad now.
2652   for (auto &LP : MBBLPads)
2653     LP->setIsEHPad(false);
2654 
2655   // The instruction is gone now.
2656   MI.eraseFromParent();
2657   return BB;
2658 }
2659 
2660 MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr & MI,MachineBasicBlock * BB) const2661 VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2662                                               MachineBasicBlock *BB) const {
2663   switch (MI.getOpcode()) {
2664   default:
2665     llvm_unreachable("Unknown Custom Instruction!");
2666   case VE::EH_SjLj_LongJmp:
2667     return emitEHSjLjLongJmp(MI, BB);
2668   case VE::EH_SjLj_SetJmp:
2669     return emitEHSjLjSetJmp(MI, BB);
2670   case VE::EH_SjLj_Setup_Dispatch:
2671     return emitSjLjDispatchBlock(MI, BB);
2672   }
2673 }
2674 
isSimm7(SDValue V)2675 static bool isSimm7(SDValue V) {
2676   EVT VT = V.getValueType();
2677   if (VT.isVector())
2678     return false;
2679 
2680   if (VT.isInteger()) {
2681     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))
2682       return isInt<7>(C->getSExtValue());
2683   } else if (VT.isFloatingPoint()) {
2684     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {
2685       if (VT == MVT::f32 || VT == MVT::f64) {
2686         const APInt &Imm = C->getValueAPF().bitcastToAPInt();
2687         uint64_t Val = Imm.getSExtValue();
2688         if (Imm.getBitWidth() == 32)
2689           Val <<= 32; // Immediate value of float place at higher bits on VE.
2690         return isInt<7>(Val);
2691       }
2692     }
2693   }
2694   return false;
2695 }
2696 
isMImm(SDValue V)2697 static bool isMImm(SDValue V) {
2698   EVT VT = V.getValueType();
2699   if (VT.isVector())
2700     return false;
2701 
2702   if (VT.isInteger()) {
2703     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))
2704       return isMImmVal(getImmVal(C));
2705   } else if (VT.isFloatingPoint()) {
2706     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {
2707       if (VT == MVT::f32) {
2708         // Float value places at higher bits, so ignore lower 32 bits.
2709         return isMImm32Val(getFpImmVal(C) >> 32);
2710       } else if (VT == MVT::f64) {
2711         return isMImmVal(getFpImmVal(C));
2712       }
2713     }
2714   }
2715   return false;
2716 }
2717 
decideComp(EVT SrcVT,ISD::CondCode CC)2718 static unsigned decideComp(EVT SrcVT, ISD::CondCode CC) {
2719   if (SrcVT.isFloatingPoint()) {
2720     if (SrcVT == MVT::f128)
2721       return VEISD::CMPQ;
2722     return VEISD::CMPF;
2723   }
2724   return isSignedIntSetCC(CC) ? VEISD::CMPI : VEISD::CMPU;
2725 }
2726 
decideCompType(EVT SrcVT)2727 static EVT decideCompType(EVT SrcVT) {
2728   if (SrcVT == MVT::f128)
2729     return MVT::f64;
2730   return SrcVT;
2731 }
2732 
safeWithoutCompWithNull(EVT SrcVT,ISD::CondCode CC,bool WithCMov)2733 static bool safeWithoutCompWithNull(EVT SrcVT, ISD::CondCode CC,
2734                                     bool WithCMov) {
2735   if (SrcVT.isFloatingPoint()) {
2736     // For the case of floating point setcc, only unordered comparison
2737     // or general comparison with -enable-no-nans-fp-math option reach
2738     // here, so it is safe even if values are NaN.  Only f128 doesn't
2739     // safe since VE uses f64 result of f128 comparison.
2740     return SrcVT != MVT::f128;
2741   }
2742   if (isIntEqualitySetCC(CC)) {
2743     // For the case of equal or not equal, it is safe without comparison with 0.
2744     return true;
2745   }
2746   if (WithCMov) {
2747     // For the case of integer setcc with cmov, all signed comparison with 0
2748     // are safe.
2749     return isSignedIntSetCC(CC);
2750   }
2751   // For the case of integer setcc, only signed 64 bits comparison is safe.
2752   // For unsigned, "CMPU 0x80000000, 0" has to be greater than 0, but it becomes
2753   // less than 0 witout CMPU.  For 32 bits, other half of 32 bits are
2754   // uncoditional, so it is not safe too without CMPI..
2755   return isSignedIntSetCC(CC) && SrcVT == MVT::i64;
2756 }
2757 
generateComparison(EVT VT,SDValue LHS,SDValue RHS,ISD::CondCode CC,bool WithCMov,const SDLoc & DL,SelectionDAG & DAG)2758 static SDValue generateComparison(EVT VT, SDValue LHS, SDValue RHS,
2759                                   ISD::CondCode CC, bool WithCMov,
2760                                   const SDLoc &DL, SelectionDAG &DAG) {
2761   // Compare values.  If RHS is 0 and it is safe to calculate without
2762   // comparison, we don't generate an instruction for comparison.
2763   EVT CompVT = decideCompType(VT);
2764   if (CompVT == VT && safeWithoutCompWithNull(VT, CC, WithCMov) &&
2765       (isNullConstant(RHS) || isNullFPConstant(RHS))) {
2766     return LHS;
2767   }
2768   return DAG.getNode(decideComp(VT, CC), DL, CompVT, LHS, RHS);
2769 }
2770 
combineSelect(SDNode * N,DAGCombinerInfo & DCI) const2771 SDValue VETargetLowering::combineSelect(SDNode *N,
2772                                         DAGCombinerInfo &DCI) const {
2773   assert(N->getOpcode() == ISD::SELECT &&
2774          "Should be called with a SELECT node");
2775   ISD::CondCode CC = ISD::CondCode::SETNE;
2776   SDValue Cond = N->getOperand(0);
2777   SDValue True = N->getOperand(1);
2778   SDValue False = N->getOperand(2);
2779 
2780   // We handle only scalar SELECT.
2781   EVT VT = N->getValueType(0);
2782   if (VT.isVector())
2783     return SDValue();
2784 
2785   // Peform combineSelect after leagalize DAG.
2786   if (!DCI.isAfterLegalizeDAG())
2787     return SDValue();
2788 
2789   EVT VT0 = Cond.getValueType();
2790   if (isMImm(True)) {
2791     // VE's condition move can handle MImm in True clause, so nothing to do.
2792   } else if (isMImm(False)) {
2793     // VE's condition move can handle MImm in True clause, so swap True and
2794     // False clauses if False has MImm value.  And, update condition code.
2795     std::swap(True, False);
2796     CC = getSetCCInverse(CC, VT0);
2797   }
2798 
2799   SDLoc DL(N);
2800   SelectionDAG &DAG = DCI.DAG;
2801   VECC::CondCode VECCVal;
2802   if (VT0.isFloatingPoint()) {
2803     VECCVal = fpCondCode2Fcc(CC);
2804   } else {
2805     VECCVal = intCondCode2Icc(CC);
2806   }
2807   SDValue Ops[] = {Cond, True, False,
2808                    DAG.getConstant(VECCVal, DL, MVT::i32)};
2809   return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
2810 }
2811 
combineSelectCC(SDNode * N,DAGCombinerInfo & DCI) const2812 SDValue VETargetLowering::combineSelectCC(SDNode *N,
2813                                           DAGCombinerInfo &DCI) const {
2814   assert(N->getOpcode() == ISD::SELECT_CC &&
2815          "Should be called with a SELECT_CC node");
2816   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
2817   SDValue LHS = N->getOperand(0);
2818   SDValue RHS = N->getOperand(1);
2819   SDValue True = N->getOperand(2);
2820   SDValue False = N->getOperand(3);
2821 
2822   // We handle only scalar SELECT_CC.
2823   EVT VT = N->getValueType(0);
2824   if (VT.isVector())
2825     return SDValue();
2826 
2827   // Peform combineSelectCC after leagalize DAG.
2828   if (!DCI.isAfterLegalizeDAG())
2829     return SDValue();
2830 
2831   // We handle only i32/i64/f32/f64/f128 comparisons.
2832   EVT LHSVT = LHS.getValueType();
2833   assert(LHSVT == RHS.getValueType());
2834   switch (LHSVT.getSimpleVT().SimpleTy) {
2835   case MVT::i32:
2836   case MVT::i64:
2837   case MVT::f32:
2838   case MVT::f64:
2839   case MVT::f128:
2840     break;
2841   default:
2842     // Return SDValue to let llvm handle other types.
2843     return SDValue();
2844   }
2845 
2846   if (isMImm(RHS)) {
2847     // VE's comparison can handle MImm in RHS, so nothing to do.
2848   } else if (isSimm7(RHS)) {
2849     // VE's comparison can handle Simm7 in LHS, so swap LHS and RHS, and
2850     // update condition code.
2851     std::swap(LHS, RHS);
2852     CC = getSetCCSwappedOperands(CC);
2853   }
2854   if (isMImm(True)) {
2855     // VE's condition move can handle MImm in True clause, so nothing to do.
2856   } else if (isMImm(False)) {
2857     // VE's condition move can handle MImm in True clause, so swap True and
2858     // False clauses if False has MImm value.  And, update condition code.
2859     std::swap(True, False);
2860     CC = getSetCCInverse(CC, LHSVT);
2861   }
2862 
2863   SDLoc DL(N);
2864   SelectionDAG &DAG = DCI.DAG;
2865 
2866   bool WithCMov = true;
2867   SDValue CompNode = generateComparison(LHSVT, LHS, RHS, CC, WithCMov, DL, DAG);
2868 
2869   VECC::CondCode VECCVal;
2870   if (LHSVT.isFloatingPoint()) {
2871     VECCVal = fpCondCode2Fcc(CC);
2872   } else {
2873     VECCVal = intCondCode2Icc(CC);
2874   }
2875   SDValue Ops[] = {CompNode, True, False,
2876                    DAG.getConstant(VECCVal, DL, MVT::i32)};
2877   return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
2878 }
2879 
2880 static bool isI32InsnAllUses(const SDNode *User, const SDNode *N);
isI32Insn(const SDNode * User,const SDNode * N)2881 static bool isI32Insn(const SDNode *User, const SDNode *N) {
2882   switch (User->getOpcode()) {
2883   default:
2884     return false;
2885   case ISD::ADD:
2886   case ISD::SUB:
2887   case ISD::MUL:
2888   case ISD::SDIV:
2889   case ISD::UDIV:
2890   case ISD::SETCC:
2891   case ISD::SMIN:
2892   case ISD::SMAX:
2893   case ISD::SHL:
2894   case ISD::SRA:
2895   case ISD::BSWAP:
2896   case ISD::SINT_TO_FP:
2897   case ISD::UINT_TO_FP:
2898   case ISD::BR_CC:
2899   case ISD::BITCAST:
2900   case ISD::ATOMIC_CMP_SWAP:
2901   case ISD::ATOMIC_SWAP:
2902   case VEISD::CMPU:
2903   case VEISD::CMPI:
2904     return true;
2905   case ISD::SRL:
2906     if (N->getOperand(0).getOpcode() != ISD::SRL)
2907       return true;
2908     // (srl (trunc (srl ...))) may be optimized by combining srl, so
2909     // doesn't optimize trunc now.
2910     return false;
2911   case ISD::SELECT_CC:
2912     if (User->getOperand(2).getNode() != N &&
2913         User->getOperand(3).getNode() != N)
2914       return true;
2915     return isI32InsnAllUses(User, N);
2916   case VEISD::CMOV:
2917     // CMOV in (cmov (trunc ...), true, false, int-comparison) is safe.
2918     // However, trunc in true or false clauses is not safe.
2919     if (User->getOperand(1).getNode() != N &&
2920         User->getOperand(2).getNode() != N &&
2921         isa<ConstantSDNode>(User->getOperand(3))) {
2922       VECC::CondCode VECCVal =
2923           static_cast<VECC::CondCode>(User->getConstantOperandVal(3));
2924       return isIntVECondCode(VECCVal);
2925     }
2926     [[fallthrough]];
2927   case ISD::AND:
2928   case ISD::OR:
2929   case ISD::XOR:
2930   case ISD::SELECT:
2931   case ISD::CopyToReg:
2932     // Check all use of selections, bit operations, and copies.  If all of them
2933     // are safe, optimize truncate to extract_subreg.
2934     return isI32InsnAllUses(User, N);
2935   }
2936 }
2937 
isI32InsnAllUses(const SDNode * User,const SDNode * N)2938 static bool isI32InsnAllUses(const SDNode *User, const SDNode *N) {
2939   // Check all use of User node.  If all of them are safe, optimize
2940   // truncate to extract_subreg.
2941   for (const SDNode *U : User->users()) {
2942     switch (U->getOpcode()) {
2943     default:
2944       // If the use is an instruction which treats the source operand as i32,
2945       // it is safe to avoid truncate here.
2946       if (isI32Insn(U, N))
2947         continue;
2948       break;
2949     case ISD::ANY_EXTEND:
2950     case ISD::SIGN_EXTEND:
2951     case ISD::ZERO_EXTEND: {
2952       // Special optimizations to the combination of ext and trunc.
2953       // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2954       // since this truncate instruction clears higher 32 bits which is filled
2955       // by one of ext instructions later.
2956       assert(N->getValueType(0) == MVT::i32 &&
2957              "find truncate to not i32 integer");
2958       if (User->getOpcode() == ISD::SELECT_CC ||
2959           User->getOpcode() == ISD::SELECT || User->getOpcode() == VEISD::CMOV)
2960         continue;
2961       break;
2962     }
2963     }
2964     return false;
2965   }
2966   return true;
2967 }
2968 
2969 // Optimize TRUNCATE in DAG combining.  Optimizing it in CUSTOM lower is
2970 // sometime too early.  Optimizing it in DAG pattern matching in VEInstrInfo.td
2971 // is sometime too late.  So, doing it at here.
combineTRUNCATE(SDNode * N,DAGCombinerInfo & DCI) const2972 SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
2973                                           DAGCombinerInfo &DCI) const {
2974   assert(N->getOpcode() == ISD::TRUNCATE &&
2975          "Should be called with a TRUNCATE node");
2976 
2977   SelectionDAG &DAG = DCI.DAG;
2978   SDLoc DL(N);
2979   EVT VT = N->getValueType(0);
2980 
2981   // We prefer to do this when all types are legal.
2982   if (!DCI.isAfterLegalizeDAG())
2983     return SDValue();
2984 
2985   // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
2986   if (N->getOperand(0)->getOpcode() == ISD::SELECT_CC &&
2987       isa<ConstantSDNode>(N->getOperand(0)->getOperand(0)) &&
2988       isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
2989     return SDValue();
2990 
2991   // Check all use of this TRUNCATE.
2992   for (const SDNode *User : N->users()) {
2993     // Make sure that we're not going to replace TRUNCATE for non i32
2994     // instructions.
2995     //
2996     // FIXME: Although we could sometimes handle this, and it does occur in
2997     // practice that one of the condition inputs to the select is also one of
2998     // the outputs, we currently can't deal with this.
2999     if (isI32Insn(User, N))
3000       continue;
3001 
3002     return SDValue();
3003   }
3004 
3005   SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3006   return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT,
3007                                     N->getOperand(0), SubI32),
3008                  0);
3009 }
3010 
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const3011 SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
3012                                             DAGCombinerInfo &DCI) const {
3013   switch (N->getOpcode()) {
3014   default:
3015     break;
3016   case ISD::SELECT:
3017     return combineSelect(N, DCI);
3018   case ISD::SELECT_CC:
3019     return combineSelectCC(N, DCI);
3020   case ISD::TRUNCATE:
3021     return combineTRUNCATE(N, DCI);
3022   }
3023 
3024   return SDValue();
3025 }
3026 
3027 //===----------------------------------------------------------------------===//
3028 // VE Inline Assembly Support
3029 //===----------------------------------------------------------------------===//
3030 
3031 VETargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const3032 VETargetLowering::getConstraintType(StringRef Constraint) const {
3033   if (Constraint.size() == 1) {
3034     switch (Constraint[0]) {
3035     default:
3036       break;
3037     case 'v': // vector registers
3038       return C_RegisterClass;
3039     }
3040   }
3041   return TargetLowering::getConstraintType(Constraint);
3042 }
3043 
3044 std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * TRI,StringRef Constraint,MVT VT) const3045 VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3046                                                StringRef Constraint,
3047                                                MVT VT) const {
3048   const TargetRegisterClass *RC = nullptr;
3049   if (Constraint.size() == 1) {
3050     switch (Constraint[0]) {
3051     default:
3052       return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3053     case 'r':
3054       RC = &VE::I64RegClass;
3055       break;
3056     case 'v':
3057       RC = &VE::V64RegClass;
3058       break;
3059     }
3060     return std::make_pair(0U, RC);
3061   }
3062 
3063   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3064 }
3065 
3066 //===----------------------------------------------------------------------===//
3067 // VE Target Optimization Support
3068 //===----------------------------------------------------------------------===//
3069 
getMinimumJumpTableEntries() const3070 unsigned VETargetLowering::getMinimumJumpTableEntries() const {
3071   // Specify 8 for PIC model to relieve the impact of PIC load instructions.
3072   if (isJumpTableRelative())
3073     return 8;
3074 
3075   return TargetLowering::getMinimumJumpTableEntries();
3076 }
3077 
hasAndNot(SDValue Y) const3078 bool VETargetLowering::hasAndNot(SDValue Y) const {
3079   EVT VT = Y.getValueType();
3080 
3081   // VE doesn't have vector and not instruction.
3082   if (VT.isVector())
3083     return false;
3084 
3085   // VE allows different immediate values for X and Y where ~X & Y.
3086   // Only simm7 works for X, and only mimm works for Y on VE.  However, this
3087   // function is used to check whether an immediate value is OK for and-not
3088   // instruction as both X and Y.  Generating additional instruction to
3089   // retrieve an immediate value is no good since the purpose of this
3090   // function is to convert a series of 3 instructions to another series of
3091   // 3 instructions with better parallelism.  Therefore, we return false
3092   // for all immediate values now.
3093   // FIXME: Change hasAndNot function to have two operands to make it work
3094   //        correctly with Aurora VE.
3095   if (isa<ConstantSDNode>(Y))
3096     return false;
3097 
3098   // It's ok for generic registers.
3099   return true;
3100 }
3101 
lowerEXTRACT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const3102 SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3103                                                   SelectionDAG &DAG) const {
3104   assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
3105   MVT VT = Op.getOperand(0).getSimpleValueType();
3106 
3107   // Special treatment for packed V64 types.
3108   assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3109   (void)VT;
3110   // Example of codes:
3111   //   %packed_v = extractelt %vr, %idx / 2
3112   //   %v = %packed_v >> (%idx % 2 * 32)
3113   //   %res = %v & 0xffffffff
3114 
3115   SDValue Vec = Op.getOperand(0);
3116   SDValue Idx = Op.getOperand(1);
3117   SDLoc DL(Op);
3118   SDValue Result = Op;
3119   if (false /* Idx->isConstant() */) {
3120     // TODO: optimized implementation using constant values
3121   } else {
3122     SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
3123     SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
3124     SDValue PackedElt =
3125         SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
3126     SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
3127     SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
3128     SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
3129     Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
3130     PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});
3131     SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64);
3132     PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
3133     SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3134     Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
3135                                         MVT::i32, PackedElt, SubI32),
3136                      0);
3137 
3138     if (Op.getSimpleValueType() == MVT::f32) {
3139       Result = DAG.getBitcast(MVT::f32, Result);
3140     } else {
3141       assert(Op.getSimpleValueType() == MVT::i32);
3142     }
3143   }
3144   return Result;
3145 }
3146 
lowerINSERT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const3147 SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3148                                                  SelectionDAG &DAG) const {
3149   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
3150   MVT VT = Op.getOperand(0).getSimpleValueType();
3151 
3152   // Special treatment for packed V64 types.
3153   assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3154   (void)VT;
3155   // The v512i32 and v512f32 starts from upper bits (0..31).  This "upper
3156   // bits" required `val << 32` from C implementation's point of view.
3157   //
3158   // Example of codes:
3159   //   %packed_elt = extractelt %vr, (%idx >> 1)
3160   //   %shift = ((%idx & 1) ^ 1) << 5
3161   //   %packed_elt &= 0xffffffff00000000 >> shift
3162   //   %packed_elt |= (zext %val) << shift
3163   //   %vr = insertelt %vr, %packed_elt, (%idx >> 1)
3164 
3165   SDLoc DL(Op);
3166   SDValue Vec = Op.getOperand(0);
3167   SDValue Val = Op.getOperand(1);
3168   SDValue Idx = Op.getOperand(2);
3169   if (Idx.getSimpleValueType() == MVT::i32)
3170     Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);
3171   if (Val.getSimpleValueType() == MVT::f32)
3172     Val = DAG.getBitcast(MVT::i32, Val);
3173   assert(Val.getSimpleValueType() == MVT::i32);
3174   Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
3175 
3176   SDValue Result = Op;
3177   if (false /* Idx->isConstant()*/) {
3178     // TODO: optimized implementation using constant values
3179   } else {
3180     SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
3181     SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
3182     SDValue PackedElt =
3183         SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
3184     SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
3185     SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
3186     SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
3187     Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
3188     SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64);
3189     Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});
3190     PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
3191     Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});
3192     PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});
3193     Result =
3194         SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),
3195                                    {HalfIdx, PackedElt, Vec}),
3196                 0);
3197   }
3198   return Result;
3199 }
3200