xref: /freebsd/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp (revision 47ef2a131091508e049ab10cad7f91a3c1342cd9)
1 //===-- BPFISelLowering.cpp - BPF DAG Lowering Implementation  ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that BPF uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "BPFISelLowering.h"
15 #include "BPF.h"
16 #include "BPFSubtarget.h"
17 #include "BPFTargetMachine.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
24 #include "llvm/CodeGen/ValueTypes.h"
25 #include "llvm/IR/DiagnosticInfo.h"
26 #include "llvm/IR/DiagnosticPrinter.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Support/raw_ostream.h"
31 
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "bpf-lower"
35 
36 static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order",
37   cl::Hidden, cl::init(false),
38   cl::desc("Expand memcpy into load/store pairs in order"));
39 
40 static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg,
41                  SDValue Val = {}) {
42   std::string Str;
43   if (Val) {
44     raw_string_ostream OS(Str);
45     Val->print(OS);
46     OS << ' ';
47   }
48   MachineFunction &MF = DAG.getMachineFunction();
49   DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
50       MF.getFunction(), Twine(Str).concat(Msg), DL.getDebugLoc()));
51 }
52 
53 BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
54                                      const BPFSubtarget &STI)
55     : TargetLowering(TM) {
56 
57   // Set up the register classes.
58   addRegisterClass(MVT::i64, &BPF::GPRRegClass);
59   if (STI.getHasAlu32())
60     addRegisterClass(MVT::i32, &BPF::GPR32RegClass);
61 
62   // Compute derived properties from the register classes
63   computeRegisterProperties(STI.getRegisterInfo());
64 
65   setStackPointerRegisterToSaveRestore(BPF::R11);
66 
67   setOperationAction(ISD::BR_CC, MVT::i64, Custom);
68   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
69   setOperationAction(ISD::BRIND, MVT::Other, Expand);
70   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
71 
72   setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, MVT::i64, Custom);
73 
74   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
75   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
76   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
77 
78   // Set unsupported atomic operations as Custom so
79   // we can emit better error messages than fatal error
80   // from selectiondag.
81   for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) {
82     if (VT == MVT::i32) {
83       if (STI.getHasAlu32())
84         continue;
85     } else {
86       setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
87     }
88 
89     setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
90     setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
91     setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
92     setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
93     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
94   }
95 
96   for (auto VT : { MVT::i32, MVT::i64 }) {
97     if (VT == MVT::i32 && !STI.getHasAlu32())
98       continue;
99 
100     setOperationAction(ISD::SDIVREM, VT, Expand);
101     setOperationAction(ISD::UDIVREM, VT, Expand);
102     if (!STI.hasSdivSmod()) {
103       setOperationAction(ISD::SDIV, VT, Custom);
104       setOperationAction(ISD::SREM, VT, Custom);
105     }
106     setOperationAction(ISD::MULHU, VT, Expand);
107     setOperationAction(ISD::MULHS, VT, Expand);
108     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
109     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
110     setOperationAction(ISD::ROTR, VT, Expand);
111     setOperationAction(ISD::ROTL, VT, Expand);
112     setOperationAction(ISD::SHL_PARTS, VT, Expand);
113     setOperationAction(ISD::SRL_PARTS, VT, Expand);
114     setOperationAction(ISD::SRA_PARTS, VT, Expand);
115     setOperationAction(ISD::CTPOP, VT, Expand);
116     setOperationAction(ISD::CTTZ, VT, Expand);
117     setOperationAction(ISD::CTLZ, VT, Expand);
118     setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
119     setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
120 
121     setOperationAction(ISD::SETCC, VT, Expand);
122     setOperationAction(ISD::SELECT, VT, Expand);
123     setOperationAction(ISD::SELECT_CC, VT, Custom);
124   }
125 
126   if (STI.getHasAlu32()) {
127     setOperationAction(ISD::BSWAP, MVT::i32, Promote);
128     setOperationAction(ISD::BR_CC, MVT::i32,
129                        STI.getHasJmp32() ? Custom : Promote);
130   }
131 
132   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
133   if (!STI.hasMovsx()) {
134     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
135     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
136     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
137   }
138 
139   // Extended load operations for i1 types must be promoted
140   for (MVT VT : MVT::integer_valuetypes()) {
141     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
142     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
143     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
144 
145     if (!STI.hasLdsx()) {
146       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
147       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand);
148       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
149     }
150   }
151 
152   setBooleanContents(ZeroOrOneBooleanContent);
153   setMaxAtomicSizeInBitsSupported(64);
154 
155   // Function alignments
156   setMinFunctionAlignment(Align(8));
157   setPrefFunctionAlignment(Align(8));
158 
159   if (BPFExpandMemcpyInOrder) {
160     // LLVM generic code will try to expand memcpy into load/store pairs at this
161     // stage which is before quite a few IR optimization passes, therefore the
162     // loads and stores could potentially be moved apart from each other which
163     // will cause trouble to memcpy pattern matcher inside kernel eBPF JIT
164     // compilers.
165     //
166     // When -bpf-expand-memcpy-in-order specified, we want to defer the expand
167     // of memcpy to later stage in IR optimization pipeline so those load/store
168     // pairs won't be touched and could be kept in order. Hence, we set
169     // MaxStoresPerMem* to zero to disable the generic getMemcpyLoadsAndStores
170     // code path, and ask LLVM to use target expander EmitTargetCodeForMemcpy.
171     MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0;
172     MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0;
173     MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0;
174     MaxLoadsPerMemcmp = 0;
175   } else {
176     // inline memcpy() for kernel to see explicit copy
177     unsigned CommonMaxStores =
178       STI.getSelectionDAGInfo()->getCommonMaxStoresPerMemFunc();
179 
180     MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores;
181     MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores;
182     MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores;
183     MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = CommonMaxStores;
184   }
185 
186   // CPU/Feature control
187   HasAlu32 = STI.getHasAlu32();
188   HasJmp32 = STI.getHasJmp32();
189   HasJmpExt = STI.getHasJmpExt();
190   HasMovsx = STI.hasMovsx();
191 }
192 
193 bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
194   return false;
195 }
196 
197 bool BPFTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
198   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
199     return false;
200   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
201   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
202   return NumBits1 > NumBits2;
203 }
204 
205 bool BPFTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
206   if (!VT1.isInteger() || !VT2.isInteger())
207     return false;
208   unsigned NumBits1 = VT1.getSizeInBits();
209   unsigned NumBits2 = VT2.getSizeInBits();
210   return NumBits1 > NumBits2;
211 }
212 
213 bool BPFTargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
214   if (!getHasAlu32() || !Ty1->isIntegerTy() || !Ty2->isIntegerTy())
215     return false;
216   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
217   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
218   return NumBits1 == 32 && NumBits2 == 64;
219 }
220 
221 bool BPFTargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
222   if (!getHasAlu32() || !VT1.isInteger() || !VT2.isInteger())
223     return false;
224   unsigned NumBits1 = VT1.getSizeInBits();
225   unsigned NumBits2 = VT2.getSizeInBits();
226   return NumBits1 == 32 && NumBits2 == 64;
227 }
228 
229 bool BPFTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
230   EVT VT1 = Val.getValueType();
231   if (Val.getOpcode() == ISD::LOAD && VT1.isSimple() && VT2.isSimple()) {
232     MVT MT1 = VT1.getSimpleVT().SimpleTy;
233     MVT MT2 = VT2.getSimpleVT().SimpleTy;
234     if ((MT1 == MVT::i8 || MT1 == MVT::i16 || MT1 == MVT::i32) &&
235         (MT2 == MVT::i32 || MT2 == MVT::i64))
236       return true;
237   }
238   return TargetLoweringBase::isZExtFree(Val, VT2);
239 }
240 
241 BPFTargetLowering::ConstraintType
242 BPFTargetLowering::getConstraintType(StringRef Constraint) const {
243   if (Constraint.size() == 1) {
244     switch (Constraint[0]) {
245     default:
246       break;
247     case 'w':
248       return C_RegisterClass;
249     }
250   }
251 
252   return TargetLowering::getConstraintType(Constraint);
253 }
254 
255 std::pair<unsigned, const TargetRegisterClass *>
256 BPFTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
257                                                 StringRef Constraint,
258                                                 MVT VT) const {
259   if (Constraint.size() == 1) {
260     // GCC Constraint Letters
261     switch (Constraint[0]) {
262     case 'r': // GENERAL_REGS
263       return std::make_pair(0U, &BPF::GPRRegClass);
264     case 'w':
265       if (HasAlu32)
266         return std::make_pair(0U, &BPF::GPR32RegClass);
267       break;
268     default:
269       break;
270     }
271   }
272 
273   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
274 }
275 
276 void BPFTargetLowering::ReplaceNodeResults(
277   SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
278   const char *Msg;
279   uint32_t Opcode = N->getOpcode();
280   switch (Opcode) {
281   default:
282     report_fatal_error("unhandled custom legalization: " + Twine(Opcode));
283   case ISD::ATOMIC_LOAD_ADD:
284   case ISD::ATOMIC_LOAD_AND:
285   case ISD::ATOMIC_LOAD_OR:
286   case ISD::ATOMIC_LOAD_XOR:
287   case ISD::ATOMIC_SWAP:
288   case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
289     if (HasAlu32 || Opcode == ISD::ATOMIC_LOAD_ADD)
290       Msg = "unsupported atomic operation, please use 32/64 bit version";
291     else
292       Msg = "unsupported atomic operation, please use 64 bit version";
293     break;
294   }
295 
296   SDLoc DL(N);
297   // We'll still produce a fatal error downstream, but this diagnostic is more
298   // user-friendly.
299   fail(DL, DAG, Msg);
300 }
301 
302 SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
303   switch (Op.getOpcode()) {
304   default:
305     report_fatal_error("unimplemented opcode: " + Twine(Op.getOpcode()));
306   case ISD::BR_CC:
307     return LowerBR_CC(Op, DAG);
308   case ISD::GlobalAddress:
309     return LowerGlobalAddress(Op, DAG);
310   case ISD::ConstantPool:
311     return LowerConstantPool(Op, DAG);
312   case ISD::SELECT_CC:
313     return LowerSELECT_CC(Op, DAG);
314   case ISD::SDIV:
315   case ISD::SREM:
316     return LowerSDIVSREM(Op, DAG);
317   case ISD::DYNAMIC_STACKALLOC:
318     return LowerDYNAMIC_STACKALLOC(Op, DAG);
319   }
320 }
321 
322 // Calling Convention Implementation
323 #include "BPFGenCallingConv.inc"
324 
325 SDValue BPFTargetLowering::LowerFormalArguments(
326     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
327     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
328     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
329   switch (CallConv) {
330   default:
331     report_fatal_error("unimplemented calling convention: " + Twine(CallConv));
332   case CallingConv::C:
333   case CallingConv::Fast:
334     break;
335   }
336 
337   MachineFunction &MF = DAG.getMachineFunction();
338   MachineRegisterInfo &RegInfo = MF.getRegInfo();
339 
340   // Assign locations to all of the incoming arguments.
341   SmallVector<CCValAssign, 16> ArgLocs;
342   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
343   CCInfo.AnalyzeFormalArguments(Ins, getHasAlu32() ? CC_BPF32 : CC_BPF64);
344 
345   bool HasMemArgs = false;
346   for (size_t I = 0; I < ArgLocs.size(); ++I) {
347     auto &VA = ArgLocs[I];
348 
349     if (VA.isRegLoc()) {
350       // Arguments passed in registers
351       EVT RegVT = VA.getLocVT();
352       MVT::SimpleValueType SimpleTy = RegVT.getSimpleVT().SimpleTy;
353       switch (SimpleTy) {
354       default: {
355         std::string Str;
356         {
357           raw_string_ostream OS(Str);
358           RegVT.print(OS);
359         }
360         report_fatal_error("unhandled argument type: " + Twine(Str));
361       }
362       case MVT::i32:
363       case MVT::i64:
364         Register VReg = RegInfo.createVirtualRegister(
365             SimpleTy == MVT::i64 ? &BPF::GPRRegClass : &BPF::GPR32RegClass);
366         RegInfo.addLiveIn(VA.getLocReg(), VReg);
367         SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT);
368 
369         // If this is an value that has been promoted to wider types, insert an
370         // assert[sz]ext to capture this, then truncate to the right size.
371         if (VA.getLocInfo() == CCValAssign::SExt)
372           ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue,
373                                  DAG.getValueType(VA.getValVT()));
374         else if (VA.getLocInfo() == CCValAssign::ZExt)
375           ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue,
376                                  DAG.getValueType(VA.getValVT()));
377 
378         if (VA.getLocInfo() != CCValAssign::Full)
379           ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue);
380 
381         InVals.push_back(ArgValue);
382 
383         break;
384       }
385     } else {
386       if (VA.isMemLoc())
387         HasMemArgs = true;
388       else
389         report_fatal_error("unhandled argument location");
390       InVals.push_back(DAG.getConstant(0, DL, VA.getLocVT()));
391     }
392   }
393   if (HasMemArgs)
394     fail(DL, DAG, "stack arguments are not supported");
395   if (IsVarArg)
396     fail(DL, DAG, "variadic functions are not supported");
397   if (MF.getFunction().hasStructRetAttr())
398     fail(DL, DAG, "aggregate returns are not supported");
399 
400   return Chain;
401 }
402 
403 const size_t BPFTargetLowering::MaxArgs = 5;
404 
405 SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
406                                      SmallVectorImpl<SDValue> &InVals) const {
407   SelectionDAG &DAG = CLI.DAG;
408   auto &Outs = CLI.Outs;
409   auto &OutVals = CLI.OutVals;
410   auto &Ins = CLI.Ins;
411   SDValue Chain = CLI.Chain;
412   SDValue Callee = CLI.Callee;
413   bool &IsTailCall = CLI.IsTailCall;
414   CallingConv::ID CallConv = CLI.CallConv;
415   bool IsVarArg = CLI.IsVarArg;
416   MachineFunction &MF = DAG.getMachineFunction();
417 
418   // BPF target does not support tail call optimization.
419   IsTailCall = false;
420 
421   switch (CallConv) {
422   default:
423     report_fatal_error("unsupported calling convention: " + Twine(CallConv));
424   case CallingConv::Fast:
425   case CallingConv::C:
426     break;
427   }
428 
429   // Analyze operands of the call, assigning locations to each operand.
430   SmallVector<CCValAssign, 16> ArgLocs;
431   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
432 
433   CCInfo.AnalyzeCallOperands(Outs, getHasAlu32() ? CC_BPF32 : CC_BPF64);
434 
435   unsigned NumBytes = CCInfo.getStackSize();
436 
437   if (Outs.size() > MaxArgs)
438     fail(CLI.DL, DAG, "too many arguments", Callee);
439 
440   for (auto &Arg : Outs) {
441     ISD::ArgFlagsTy Flags = Arg.Flags;
442     if (!Flags.isByVal())
443       continue;
444     fail(CLI.DL, DAG, "pass by value not supported", Callee);
445     break;
446   }
447 
448   auto PtrVT = getPointerTy(MF.getDataLayout());
449   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
450 
451   SmallVector<std::pair<unsigned, SDValue>, MaxArgs> RegsToPass;
452 
453   // Walk arg assignments
454   for (size_t i = 0; i < std::min(ArgLocs.size(), MaxArgs); ++i) {
455     CCValAssign &VA = ArgLocs[i];
456     SDValue &Arg = OutVals[i];
457 
458     // Promote the value if needed.
459     switch (VA.getLocInfo()) {
460     default:
461       report_fatal_error("unhandled location info: " + Twine(VA.getLocInfo()));
462     case CCValAssign::Full:
463       break;
464     case CCValAssign::SExt:
465       Arg = DAG.getNode(ISD::SIGN_EXTEND, CLI.DL, VA.getLocVT(), Arg);
466       break;
467     case CCValAssign::ZExt:
468       Arg = DAG.getNode(ISD::ZERO_EXTEND, CLI.DL, VA.getLocVT(), Arg);
469       break;
470     case CCValAssign::AExt:
471       Arg = DAG.getNode(ISD::ANY_EXTEND, CLI.DL, VA.getLocVT(), Arg);
472       break;
473     }
474 
475     // Push arguments into RegsToPass vector
476     if (VA.isRegLoc())
477       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
478     else
479       report_fatal_error("stack arguments are not supported");
480   }
481 
482   SDValue InGlue;
483 
484   // Build a sequence of copy-to-reg nodes chained together with token chain and
485   // flag operands which copy the outgoing args into registers.  The InGlue in
486   // necessary since all emitted instructions must be stuck together.
487   for (auto &Reg : RegsToPass) {
488     Chain = DAG.getCopyToReg(Chain, CLI.DL, Reg.first, Reg.second, InGlue);
489     InGlue = Chain.getValue(1);
490   }
491 
492   // If the callee is a GlobalAddress node (quite common, every direct call is)
493   // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
494   // Likewise ExternalSymbol -> TargetExternalSymbol.
495   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
496     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT,
497                                         G->getOffset(), 0);
498   } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
499     Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
500     fail(CLI.DL, DAG,
501          Twine("A call to built-in function '" + StringRef(E->getSymbol()) +
502                "' is not supported."));
503   }
504 
505   // Returns a chain & a flag for retval copy to use.
506   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
507   SmallVector<SDValue, 8> Ops;
508   Ops.push_back(Chain);
509   Ops.push_back(Callee);
510 
511   // Add argument registers to the end of the list so that they are
512   // known live into the call.
513   for (auto &Reg : RegsToPass)
514     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
515 
516   if (InGlue.getNode())
517     Ops.push_back(InGlue);
518 
519   Chain = DAG.getNode(BPFISD::CALL, CLI.DL, NodeTys, Ops);
520   InGlue = Chain.getValue(1);
521 
522   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
523 
524   // Create the CALLSEQ_END node.
525   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, CLI.DL);
526   InGlue = Chain.getValue(1);
527 
528   // Handle result values, copying them out of physregs into vregs that we
529   // return.
530   return LowerCallResult(Chain, InGlue, CallConv, IsVarArg, Ins, CLI.DL, DAG,
531                          InVals);
532 }
533 
534 SDValue
535 BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
536                                bool IsVarArg,
537                                const SmallVectorImpl<ISD::OutputArg> &Outs,
538                                const SmallVectorImpl<SDValue> &OutVals,
539                                const SDLoc &DL, SelectionDAG &DAG) const {
540   unsigned Opc = BPFISD::RET_GLUE;
541 
542   // CCValAssign - represent the assignment of the return value to a location
543   SmallVector<CCValAssign, 16> RVLocs;
544   MachineFunction &MF = DAG.getMachineFunction();
545 
546   // CCState - Info about the registers and stack slot.
547   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
548 
549   if (MF.getFunction().getReturnType()->isAggregateType()) {
550     fail(DL, DAG, "aggregate returns are not supported");
551     return DAG.getNode(Opc, DL, MVT::Other, Chain);
552   }
553 
554   // Analize return values.
555   CCInfo.AnalyzeReturn(Outs, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
556 
557   SDValue Glue;
558   SmallVector<SDValue, 4> RetOps(1, Chain);
559 
560   // Copy the result values into the output registers.
561   for (size_t i = 0; i != RVLocs.size(); ++i) {
562     CCValAssign &VA = RVLocs[i];
563     if (!VA.isRegLoc())
564       report_fatal_error("stack return values are not supported");
565 
566     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Glue);
567 
568     // Guarantee that all emitted copies are stuck together,
569     // avoiding something bad.
570     Glue = Chain.getValue(1);
571     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
572   }
573 
574   RetOps[0] = Chain; // Update chain.
575 
576   // Add the glue if we have it.
577   if (Glue.getNode())
578     RetOps.push_back(Glue);
579 
580   return DAG.getNode(Opc, DL, MVT::Other, RetOps);
581 }
582 
583 SDValue BPFTargetLowering::LowerCallResult(
584     SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool IsVarArg,
585     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
586     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
587 
588   MachineFunction &MF = DAG.getMachineFunction();
589   // Assign locations to each value returned by this call.
590   SmallVector<CCValAssign, 16> RVLocs;
591   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
592 
593   if (Ins.size() > 1) {
594     fail(DL, DAG, "only small returns supported");
595     for (auto &In : Ins)
596       InVals.push_back(DAG.getConstant(0, DL, In.VT));
597     return DAG.getCopyFromReg(Chain, DL, 1, Ins[0].VT, InGlue).getValue(1);
598   }
599 
600   CCInfo.AnalyzeCallResult(Ins, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
601 
602   // Copy all of the result registers out of their specified physreg.
603   for (auto &Val : RVLocs) {
604     Chain = DAG.getCopyFromReg(Chain, DL, Val.getLocReg(),
605                                Val.getValVT(), InGlue).getValue(1);
606     InGlue = Chain.getValue(2);
607     InVals.push_back(Chain.getValue(0));
608   }
609 
610   return Chain;
611 }
612 
613 static void NegateCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
614   switch (CC) {
615   default:
616     break;
617   case ISD::SETULT:
618   case ISD::SETULE:
619   case ISD::SETLT:
620   case ISD::SETLE:
621     CC = ISD::getSetCCSwappedOperands(CC);
622     std::swap(LHS, RHS);
623     break;
624   }
625 }
626 
627 SDValue BPFTargetLowering::LowerSDIVSREM(SDValue Op, SelectionDAG &DAG) const {
628   SDLoc DL(Op);
629   fail(DL, DAG,
630        "unsupported signed division, please convert to unsigned div/mod.");
631   return DAG.getUNDEF(Op->getValueType(0));
632 }
633 
634 SDValue BPFTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
635                                                    SelectionDAG &DAG) const {
636   SDLoc DL(Op);
637   fail(DL, DAG, "unsupported dynamic stack allocation");
638   auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)};
639   return DAG.getMergeValues(Ops, SDLoc());
640 }
641 
642 SDValue BPFTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
643   SDValue Chain = Op.getOperand(0);
644   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
645   SDValue LHS = Op.getOperand(2);
646   SDValue RHS = Op.getOperand(3);
647   SDValue Dest = Op.getOperand(4);
648   SDLoc DL(Op);
649 
650   if (!getHasJmpExt())
651     NegateCC(LHS, RHS, CC);
652 
653   return DAG.getNode(BPFISD::BR_CC, DL, Op.getValueType(), Chain, LHS, RHS,
654                      DAG.getConstant(CC, DL, LHS.getValueType()), Dest);
655 }
656 
657 SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
658   SDValue LHS = Op.getOperand(0);
659   SDValue RHS = Op.getOperand(1);
660   SDValue TrueV = Op.getOperand(2);
661   SDValue FalseV = Op.getOperand(3);
662   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
663   SDLoc DL(Op);
664 
665   if (!getHasJmpExt())
666     NegateCC(LHS, RHS, CC);
667 
668   SDValue TargetCC = DAG.getConstant(CC, DL, LHS.getValueType());
669   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
670   SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
671 
672   return DAG.getNode(BPFISD::SELECT_CC, DL, VTs, Ops);
673 }
674 
675 const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
676   switch ((BPFISD::NodeType)Opcode) {
677   case BPFISD::FIRST_NUMBER:
678     break;
679   case BPFISD::RET_GLUE:
680     return "BPFISD::RET_GLUE";
681   case BPFISD::CALL:
682     return "BPFISD::CALL";
683   case BPFISD::SELECT_CC:
684     return "BPFISD::SELECT_CC";
685   case BPFISD::BR_CC:
686     return "BPFISD::BR_CC";
687   case BPFISD::Wrapper:
688     return "BPFISD::Wrapper";
689   case BPFISD::MEMCPY:
690     return "BPFISD::MEMCPY";
691   }
692   return nullptr;
693 }
694 
695 static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty,
696                              SelectionDAG &DAG, unsigned Flags) {
697   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
698 }
699 
700 static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
701                              SelectionDAG &DAG, unsigned Flags) {
702   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
703                                    N->getOffset(), Flags);
704 }
705 
706 template <class NodeTy>
707 SDValue BPFTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
708                                    unsigned Flags) const {
709   SDLoc DL(N);
710 
711   SDValue GA = getTargetNode(N, DL, MVT::i64, DAG, Flags);
712 
713   return DAG.getNode(BPFISD::Wrapper, DL, MVT::i64, GA);
714 }
715 
716 SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op,
717                                               SelectionDAG &DAG) const {
718   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
719   if (N->getOffset() != 0)
720     report_fatal_error("invalid offset for global address: " +
721                        Twine(N->getOffset()));
722   return getAddr(N, DAG);
723 }
724 
725 SDValue BPFTargetLowering::LowerConstantPool(SDValue Op,
726                                              SelectionDAG &DAG) const {
727   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
728 
729   return getAddr(N, DAG);
730 }
731 
732 unsigned
733 BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
734                                  unsigned Reg, bool isSigned) const {
735   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
736   const TargetRegisterClass *RC = getRegClassFor(MVT::i64);
737   int RShiftOp = isSigned ? BPF::SRA_ri : BPF::SRL_ri;
738   MachineFunction *F = BB->getParent();
739   DebugLoc DL = MI.getDebugLoc();
740 
741   MachineRegisterInfo &RegInfo = F->getRegInfo();
742 
743   if (!isSigned) {
744     Register PromotedReg0 = RegInfo.createVirtualRegister(RC);
745     BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);
746     return PromotedReg0;
747   }
748   Register PromotedReg0 = RegInfo.createVirtualRegister(RC);
749   Register PromotedReg1 = RegInfo.createVirtualRegister(RC);
750   Register PromotedReg2 = RegInfo.createVirtualRegister(RC);
751   if (HasMovsx) {
752     BuildMI(BB, DL, TII.get(BPF::MOVSX_rr_32), PromotedReg0).addReg(Reg);
753   } else {
754     BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);
755     BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1)
756       .addReg(PromotedReg0).addImm(32);
757     BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2)
758       .addReg(PromotedReg1).addImm(32);
759   }
760 
761   return PromotedReg2;
762 }
763 
764 MachineBasicBlock *
765 BPFTargetLowering::EmitInstrWithCustomInserterMemcpy(MachineInstr &MI,
766                                                      MachineBasicBlock *BB)
767                                                      const {
768   MachineFunction *MF = MI.getParent()->getParent();
769   MachineRegisterInfo &MRI = MF->getRegInfo();
770   MachineInstrBuilder MIB(*MF, MI);
771   unsigned ScratchReg;
772 
773   // This function does custom insertion during lowering BPFISD::MEMCPY which
774   // only has two register operands from memcpy semantics, the copy source
775   // address and the copy destination address.
776   //
777   // Because we will expand BPFISD::MEMCPY into load/store pairs, we will need
778   // a third scratch register to serve as the destination register of load and
779   // source register of store.
780   //
781   // The scratch register here is with the Define | Dead | EarlyClobber flags.
782   // The EarlyClobber flag has the semantic property that the operand it is
783   // attached to is clobbered before the rest of the inputs are read. Hence it
784   // must be unique among the operands to the instruction. The Define flag is
785   // needed to coerce the machine verifier that an Undef value isn't a problem
786   // as we anyway is loading memory into it. The Dead flag is needed as the
787   // value in scratch isn't supposed to be used by any other instruction.
788   ScratchReg = MRI.createVirtualRegister(&BPF::GPRRegClass);
789   MIB.addReg(ScratchReg,
790              RegState::Define | RegState::Dead | RegState::EarlyClobber);
791 
792   return BB;
793 }
794 
795 MachineBasicBlock *
796 BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
797                                                MachineBasicBlock *BB) const {
798   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
799   DebugLoc DL = MI.getDebugLoc();
800   unsigned Opc = MI.getOpcode();
801   bool isSelectRROp = (Opc == BPF::Select ||
802                        Opc == BPF::Select_64_32 ||
803                        Opc == BPF::Select_32 ||
804                        Opc == BPF::Select_32_64);
805 
806   bool isMemcpyOp = Opc == BPF::MEMCPY;
807 
808 #ifndef NDEBUG
809   bool isSelectRIOp = (Opc == BPF::Select_Ri ||
810                        Opc == BPF::Select_Ri_64_32 ||
811                        Opc == BPF::Select_Ri_32 ||
812                        Opc == BPF::Select_Ri_32_64);
813 
814   if (!(isSelectRROp || isSelectRIOp || isMemcpyOp))
815     report_fatal_error("unhandled instruction type: " + Twine(Opc));
816 #endif
817 
818   if (isMemcpyOp)
819     return EmitInstrWithCustomInserterMemcpy(MI, BB);
820 
821   bool is32BitCmp = (Opc == BPF::Select_32 ||
822                      Opc == BPF::Select_32_64 ||
823                      Opc == BPF::Select_Ri_32 ||
824                      Opc == BPF::Select_Ri_32_64);
825 
826   // To "insert" a SELECT instruction, we actually have to insert the diamond
827   // control-flow pattern.  The incoming instruction knows the destination vreg
828   // to set, the condition code register to branch on, the true/false values to
829   // select between, and a branch opcode to use.
830   const BasicBlock *LLVM_BB = BB->getBasicBlock();
831   MachineFunction::iterator I = ++BB->getIterator();
832 
833   // ThisMBB:
834   // ...
835   //  TrueVal = ...
836   //  jmp_XX r1, r2 goto Copy1MBB
837   //  fallthrough --> Copy0MBB
838   MachineBasicBlock *ThisMBB = BB;
839   MachineFunction *F = BB->getParent();
840   MachineBasicBlock *Copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
841   MachineBasicBlock *Copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
842 
843   F->insert(I, Copy0MBB);
844   F->insert(I, Copy1MBB);
845   // Update machine-CFG edges by transferring all successors of the current
846   // block to the new block which will contain the Phi node for the select.
847   Copy1MBB->splice(Copy1MBB->begin(), BB,
848                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
849   Copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
850   // Next, add the true and fallthrough blocks as its successors.
851   BB->addSuccessor(Copy0MBB);
852   BB->addSuccessor(Copy1MBB);
853 
854   // Insert Branch if Flag
855   int CC = MI.getOperand(3).getImm();
856   int NewCC;
857   switch (CC) {
858 #define SET_NEWCC(X, Y) \
859   case ISD::X: \
860     if (is32BitCmp && HasJmp32) \
861       NewCC = isSelectRROp ? BPF::Y##_rr_32 : BPF::Y##_ri_32; \
862     else \
863       NewCC = isSelectRROp ? BPF::Y##_rr : BPF::Y##_ri; \
864     break
865   SET_NEWCC(SETGT, JSGT);
866   SET_NEWCC(SETUGT, JUGT);
867   SET_NEWCC(SETGE, JSGE);
868   SET_NEWCC(SETUGE, JUGE);
869   SET_NEWCC(SETEQ, JEQ);
870   SET_NEWCC(SETNE, JNE);
871   SET_NEWCC(SETLT, JSLT);
872   SET_NEWCC(SETULT, JULT);
873   SET_NEWCC(SETLE, JSLE);
874   SET_NEWCC(SETULE, JULE);
875   default:
876     report_fatal_error("unimplemented select CondCode " + Twine(CC));
877   }
878 
879   Register LHS = MI.getOperand(1).getReg();
880   bool isSignedCmp = (CC == ISD::SETGT ||
881                       CC == ISD::SETGE ||
882                       CC == ISD::SETLT ||
883                       CC == ISD::SETLE);
884 
885   // eBPF at the moment only has 64-bit comparison. Any 32-bit comparison need
886   // to be promoted, however if the 32-bit comparison operands are destination
887   // registers then they are implicitly zero-extended already, there is no
888   // need of explicit zero-extend sequence for them.
889   //
890   // We simply do extension for all situations in this method, but we will
891   // try to remove those unnecessary in BPFMIPeephole pass.
892   if (is32BitCmp && !HasJmp32)
893     LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp);
894 
895   if (isSelectRROp) {
896     Register RHS = MI.getOperand(2).getReg();
897 
898     if (is32BitCmp && !HasJmp32)
899       RHS = EmitSubregExt(MI, BB, RHS, isSignedCmp);
900 
901     BuildMI(BB, DL, TII.get(NewCC)).addReg(LHS).addReg(RHS).addMBB(Copy1MBB);
902   } else {
903     int64_t imm32 = MI.getOperand(2).getImm();
904     // Check before we build J*_ri instruction.
905     if (!isInt<32>(imm32))
906       report_fatal_error("immediate overflows 32 bits: " + Twine(imm32));
907     BuildMI(BB, DL, TII.get(NewCC))
908         .addReg(LHS).addImm(imm32).addMBB(Copy1MBB);
909   }
910 
911   // Copy0MBB:
912   //  %FalseValue = ...
913   //  # fallthrough to Copy1MBB
914   BB = Copy0MBB;
915 
916   // Update machine-CFG edges
917   BB->addSuccessor(Copy1MBB);
918 
919   // Copy1MBB:
920   //  %Result = phi [ %FalseValue, Copy0MBB ], [ %TrueValue, ThisMBB ]
921   // ...
922   BB = Copy1MBB;
923   BuildMI(*BB, BB->begin(), DL, TII.get(BPF::PHI), MI.getOperand(0).getReg())
924       .addReg(MI.getOperand(5).getReg())
925       .addMBB(Copy0MBB)
926       .addReg(MI.getOperand(4).getReg())
927       .addMBB(ThisMBB);
928 
929   MI.eraseFromParent(); // The pseudo instruction is gone now.
930   return BB;
931 }
932 
933 EVT BPFTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
934                                           EVT VT) const {
935   return getHasAlu32() ? MVT::i32 : MVT::i64;
936 }
937 
938 MVT BPFTargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
939                                               EVT VT) const {
940   return (getHasAlu32() && VT == MVT::i32) ? MVT::i32 : MVT::i64;
941 }
942 
943 bool BPFTargetLowering::isLegalAddressingMode(const DataLayout &DL,
944                                               const AddrMode &AM, Type *Ty,
945                                               unsigned AS,
946                                               Instruction *I) const {
947   // No global is ever allowed as a base.
948   if (AM.BaseGV)
949     return false;
950 
951   switch (AM.Scale) {
952   case 0: // "r+i" or just "i", depending on HasBaseReg.
953     break;
954   case 1:
955     if (!AM.HasBaseReg) // allow "r+i".
956       break;
957     return false; // disallow "r+r" or "r+r+i".
958   default:
959     return false;
960   }
961 
962   return true;
963 }
964