xref: /freebsd/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1 //===-- BPFISelLowering.cpp - BPF DAG Lowering Implementation  ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that BPF uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "BPFISelLowering.h"
15 #include "BPF.h"
16 #include "BPFSubtarget.h"
17 #include "BPFTargetMachine.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
24 #include "llvm/CodeGen/ValueTypes.h"
25 #include "llvm/IR/DiagnosticInfo.h"
26 #include "llvm/IR/DiagnosticPrinter.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Support/raw_ostream.h"
31 
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "bpf-lower"
35 
36 static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order",
37   cl::Hidden, cl::init(false),
38   cl::desc("Expand memcpy into load/store pairs in order"));
39 
40 static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg,
41                  SDValue Val = {}) {
42   std::string Str;
43   if (Val) {
44     raw_string_ostream OS(Str);
45     Val->print(OS);
46     OS << ' ';
47   }
48   MachineFunction &MF = DAG.getMachineFunction();
49   DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
50       MF.getFunction(), Twine(Str).concat(Msg), DL.getDebugLoc()));
51 }
52 
53 BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
54                                      const BPFSubtarget &STI)
55     : TargetLowering(TM) {
56 
57   // Set up the register classes.
58   addRegisterClass(MVT::i64, &BPF::GPRRegClass);
59   if (STI.getHasAlu32())
60     addRegisterClass(MVT::i32, &BPF::GPR32RegClass);
61 
62   // Compute derived properties from the register classes
63   computeRegisterProperties(STI.getRegisterInfo());
64 
65   setStackPointerRegisterToSaveRestore(BPF::R11);
66 
67   setOperationAction(ISD::BR_CC, MVT::i64, Custom);
68   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
69   setOperationAction(ISD::BRIND, MVT::Other, Expand);
70   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
71 
72   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
73 
74   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
75   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
76   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
77 
78   // Set unsupported atomic operations as Custom so
79   // we can emit better error messages than fatal error
80   // from selectiondag.
81   for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) {
82     if (VT == MVT::i32) {
83       if (STI.getHasAlu32())
84         continue;
85     } else {
86       setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
87     }
88 
89     setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
90     setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
91     setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
92     setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
93     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
94   }
95 
96   for (auto VT : { MVT::i32, MVT::i64 }) {
97     if (VT == MVT::i32 && !STI.getHasAlu32())
98       continue;
99 
100     setOperationAction(ISD::SDIVREM, VT, Expand);
101     setOperationAction(ISD::UDIVREM, VT, Expand);
102     if (!STI.hasSdivSmod()) {
103       setOperationAction(ISD::SDIV, VT, Custom);
104       setOperationAction(ISD::SREM, VT, Custom);
105     }
106     setOperationAction(ISD::MULHU, VT, Expand);
107     setOperationAction(ISD::MULHS, VT, Expand);
108     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
109     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
110     setOperationAction(ISD::ROTR, VT, Expand);
111     setOperationAction(ISD::ROTL, VT, Expand);
112     setOperationAction(ISD::SHL_PARTS, VT, Expand);
113     setOperationAction(ISD::SRL_PARTS, VT, Expand);
114     setOperationAction(ISD::SRA_PARTS, VT, Expand);
115     setOperationAction(ISD::CTPOP, VT, Expand);
116 
117     setOperationAction(ISD::SETCC, VT, Expand);
118     setOperationAction(ISD::SELECT, VT, Expand);
119     setOperationAction(ISD::SELECT_CC, VT, Custom);
120   }
121 
122   if (STI.getHasAlu32()) {
123     setOperationAction(ISD::BSWAP, MVT::i32, Promote);
124     setOperationAction(ISD::BR_CC, MVT::i32,
125                        STI.getHasJmp32() ? Custom : Promote);
126   }
127 
128   setOperationAction(ISD::CTTZ, MVT::i64, Custom);
129   setOperationAction(ISD::CTLZ, MVT::i64, Custom);
130   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom);
131   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
132 
133   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
134   if (!STI.hasMovsx()) {
135     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
136     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
137     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
138   }
139 
140   // Extended load operations for i1 types must be promoted
141   for (MVT VT : MVT::integer_valuetypes()) {
142     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
143     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
144     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
145 
146     if (!STI.hasLdsx()) {
147       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
148       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand);
149       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
150     }
151   }
152 
153   setBooleanContents(ZeroOrOneBooleanContent);
154   setMaxAtomicSizeInBitsSupported(64);
155 
156   // Function alignments
157   setMinFunctionAlignment(Align(8));
158   setPrefFunctionAlignment(Align(8));
159 
160   if (BPFExpandMemcpyInOrder) {
161     // LLVM generic code will try to expand memcpy into load/store pairs at this
162     // stage which is before quite a few IR optimization passes, therefore the
163     // loads and stores could potentially be moved apart from each other which
164     // will cause trouble to memcpy pattern matcher inside kernel eBPF JIT
165     // compilers.
166     //
167     // When -bpf-expand-memcpy-in-order specified, we want to defer the expand
168     // of memcpy to later stage in IR optimization pipeline so those load/store
169     // pairs won't be touched and could be kept in order. Hence, we set
170     // MaxStoresPerMem* to zero to disable the generic getMemcpyLoadsAndStores
171     // code path, and ask LLVM to use target expander EmitTargetCodeForMemcpy.
172     MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0;
173     MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0;
174     MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0;
175     MaxLoadsPerMemcmp = 0;
176   } else {
177     // inline memcpy() for kernel to see explicit copy
178     unsigned CommonMaxStores =
179       STI.getSelectionDAGInfo()->getCommonMaxStoresPerMemFunc();
180 
181     MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores;
182     MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores;
183     MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores;
184     MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = CommonMaxStores;
185   }
186 
187   // CPU/Feature control
188   HasAlu32 = STI.getHasAlu32();
189   HasJmp32 = STI.getHasJmp32();
190   HasJmpExt = STI.getHasJmpExt();
191   HasMovsx = STI.hasMovsx();
192 }
193 
194 bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
195   return false;
196 }
197 
198 bool BPFTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
199   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
200     return false;
201   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
202   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
203   return NumBits1 > NumBits2;
204 }
205 
206 bool BPFTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
207   if (!VT1.isInteger() || !VT2.isInteger())
208     return false;
209   unsigned NumBits1 = VT1.getSizeInBits();
210   unsigned NumBits2 = VT2.getSizeInBits();
211   return NumBits1 > NumBits2;
212 }
213 
214 bool BPFTargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
215   if (!getHasAlu32() || !Ty1->isIntegerTy() || !Ty2->isIntegerTy())
216     return false;
217   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
218   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
219   return NumBits1 == 32 && NumBits2 == 64;
220 }
221 
222 bool BPFTargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
223   if (!getHasAlu32() || !VT1.isInteger() || !VT2.isInteger())
224     return false;
225   unsigned NumBits1 = VT1.getSizeInBits();
226   unsigned NumBits2 = VT2.getSizeInBits();
227   return NumBits1 == 32 && NumBits2 == 64;
228 }
229 
230 bool BPFTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
231   EVT VT1 = Val.getValueType();
232   if (Val.getOpcode() == ISD::LOAD && VT1.isSimple() && VT2.isSimple()) {
233     MVT MT1 = VT1.getSimpleVT().SimpleTy;
234     MVT MT2 = VT2.getSimpleVT().SimpleTy;
235     if ((MT1 == MVT::i8 || MT1 == MVT::i16 || MT1 == MVT::i32) &&
236         (MT2 == MVT::i32 || MT2 == MVT::i64))
237       return true;
238   }
239   return TargetLoweringBase::isZExtFree(Val, VT2);
240 }
241 
242 BPFTargetLowering::ConstraintType
243 BPFTargetLowering::getConstraintType(StringRef Constraint) const {
244   if (Constraint.size() == 1) {
245     switch (Constraint[0]) {
246     default:
247       break;
248     case 'w':
249       return C_RegisterClass;
250     }
251   }
252 
253   return TargetLowering::getConstraintType(Constraint);
254 }
255 
256 std::pair<unsigned, const TargetRegisterClass *>
257 BPFTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
258                                                 StringRef Constraint,
259                                                 MVT VT) const {
260   if (Constraint.size() == 1) {
261     // GCC Constraint Letters
262     switch (Constraint[0]) {
263     case 'r': // GENERAL_REGS
264       return std::make_pair(0U, &BPF::GPRRegClass);
265     case 'w':
266       if (HasAlu32)
267         return std::make_pair(0U, &BPF::GPR32RegClass);
268       break;
269     default:
270       break;
271     }
272   }
273 
274   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
275 }
276 
277 void BPFTargetLowering::ReplaceNodeResults(
278   SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
279   const char *Msg;
280   uint32_t Opcode = N->getOpcode();
281   switch (Opcode) {
282   default:
283     report_fatal_error("unhandled custom legalization: " + Twine(Opcode));
284   case ISD::ATOMIC_LOAD_ADD:
285   case ISD::ATOMIC_LOAD_AND:
286   case ISD::ATOMIC_LOAD_OR:
287   case ISD::ATOMIC_LOAD_XOR:
288   case ISD::ATOMIC_SWAP:
289   case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
290     if (HasAlu32 || Opcode == ISD::ATOMIC_LOAD_ADD)
291       Msg = "unsupported atomic operation, please use 32/64 bit version";
292     else
293       Msg = "unsupported atomic operation, please use 64 bit version";
294     break;
295   }
296 
297   SDLoc DL(N);
298   // We'll still produce a fatal error downstream, but this diagnostic is more
299   // user-friendly.
300   fail(DL, DAG, Msg);
301 }
302 
303 SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
304   switch (Op.getOpcode()) {
305   default:
306     report_fatal_error("unimplemented opcode: " + Twine(Op.getOpcode()));
307   case ISD::BR_CC:
308     return LowerBR_CC(Op, DAG);
309   case ISD::GlobalAddress:
310     return LowerGlobalAddress(Op, DAG);
311   case ISD::SELECT_CC:
312     return LowerSELECT_CC(Op, DAG);
313   case ISD::SDIV:
314   case ISD::SREM:
315     return LowerSDIVSREM(Op, DAG);
316   case ISD::DYNAMIC_STACKALLOC:
317     return LowerDYNAMIC_STACKALLOC(Op, DAG);
318   }
319 }
320 
321 // Calling Convention Implementation
322 #include "BPFGenCallingConv.inc"
323 
324 SDValue BPFTargetLowering::LowerFormalArguments(
325     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
326     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
327     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
328   switch (CallConv) {
329   default:
330     report_fatal_error("unimplemented calling convention: " + Twine(CallConv));
331   case CallingConv::C:
332   case CallingConv::Fast:
333     break;
334   }
335 
336   MachineFunction &MF = DAG.getMachineFunction();
337   MachineRegisterInfo &RegInfo = MF.getRegInfo();
338 
339   // Assign locations to all of the incoming arguments.
340   SmallVector<CCValAssign, 16> ArgLocs;
341   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
342   CCInfo.AnalyzeFormalArguments(Ins, getHasAlu32() ? CC_BPF32 : CC_BPF64);
343 
344   bool HasMemArgs = false;
345   for (size_t I = 0; I < ArgLocs.size(); ++I) {
346     auto &VA = ArgLocs[I];
347 
348     if (VA.isRegLoc()) {
349       // Arguments passed in registers
350       EVT RegVT = VA.getLocVT();
351       MVT::SimpleValueType SimpleTy = RegVT.getSimpleVT().SimpleTy;
352       switch (SimpleTy) {
353       default: {
354         std::string Str;
355         {
356           raw_string_ostream OS(Str);
357           RegVT.print(OS);
358         }
359         report_fatal_error("unhandled argument type: " + Twine(Str));
360       }
361       case MVT::i32:
362       case MVT::i64:
363         Register VReg = RegInfo.createVirtualRegister(
364             SimpleTy == MVT::i64 ? &BPF::GPRRegClass : &BPF::GPR32RegClass);
365         RegInfo.addLiveIn(VA.getLocReg(), VReg);
366         SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT);
367 
368         // If this is an value that has been promoted to wider types, insert an
369         // assert[sz]ext to capture this, then truncate to the right size.
370         if (VA.getLocInfo() == CCValAssign::SExt)
371           ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue,
372                                  DAG.getValueType(VA.getValVT()));
373         else if (VA.getLocInfo() == CCValAssign::ZExt)
374           ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue,
375                                  DAG.getValueType(VA.getValVT()));
376 
377         if (VA.getLocInfo() != CCValAssign::Full)
378           ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue);
379 
380         InVals.push_back(ArgValue);
381 
382         break;
383       }
384     } else {
385       if (VA.isMemLoc())
386         HasMemArgs = true;
387       else
388         report_fatal_error("unhandled argument location");
389       InVals.push_back(DAG.getConstant(0, DL, VA.getLocVT()));
390     }
391   }
392   if (HasMemArgs)
393     fail(DL, DAG, "stack arguments are not supported");
394   if (IsVarArg)
395     fail(DL, DAG, "variadic functions are not supported");
396   if (MF.getFunction().hasStructRetAttr())
397     fail(DL, DAG, "aggregate returns are not supported");
398 
399   return Chain;
400 }
401 
402 const size_t BPFTargetLowering::MaxArgs = 5;
403 
404 SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
405                                      SmallVectorImpl<SDValue> &InVals) const {
406   SelectionDAG &DAG = CLI.DAG;
407   auto &Outs = CLI.Outs;
408   auto &OutVals = CLI.OutVals;
409   auto &Ins = CLI.Ins;
410   SDValue Chain = CLI.Chain;
411   SDValue Callee = CLI.Callee;
412   bool &IsTailCall = CLI.IsTailCall;
413   CallingConv::ID CallConv = CLI.CallConv;
414   bool IsVarArg = CLI.IsVarArg;
415   MachineFunction &MF = DAG.getMachineFunction();
416 
417   // BPF target does not support tail call optimization.
418   IsTailCall = false;
419 
420   switch (CallConv) {
421   default:
422     report_fatal_error("unsupported calling convention: " + Twine(CallConv));
423   case CallingConv::Fast:
424   case CallingConv::C:
425     break;
426   }
427 
428   // Analyze operands of the call, assigning locations to each operand.
429   SmallVector<CCValAssign, 16> ArgLocs;
430   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
431 
432   CCInfo.AnalyzeCallOperands(Outs, getHasAlu32() ? CC_BPF32 : CC_BPF64);
433 
434   unsigned NumBytes = CCInfo.getStackSize();
435 
436   if (Outs.size() > MaxArgs)
437     fail(CLI.DL, DAG, "too many arguments", Callee);
438 
439   for (auto &Arg : Outs) {
440     ISD::ArgFlagsTy Flags = Arg.Flags;
441     if (!Flags.isByVal())
442       continue;
443     fail(CLI.DL, DAG, "pass by value not supported", Callee);
444     break;
445   }
446 
447   auto PtrVT = getPointerTy(MF.getDataLayout());
448   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
449 
450   SmallVector<std::pair<unsigned, SDValue>, MaxArgs> RegsToPass;
451 
452   // Walk arg assignments
453   for (size_t i = 0; i < std::min(ArgLocs.size(), MaxArgs); ++i) {
454     CCValAssign &VA = ArgLocs[i];
455     SDValue &Arg = OutVals[i];
456 
457     // Promote the value if needed.
458     switch (VA.getLocInfo()) {
459     default:
460       report_fatal_error("unhandled location info: " + Twine(VA.getLocInfo()));
461     case CCValAssign::Full:
462       break;
463     case CCValAssign::SExt:
464       Arg = DAG.getNode(ISD::SIGN_EXTEND, CLI.DL, VA.getLocVT(), Arg);
465       break;
466     case CCValAssign::ZExt:
467       Arg = DAG.getNode(ISD::ZERO_EXTEND, CLI.DL, VA.getLocVT(), Arg);
468       break;
469     case CCValAssign::AExt:
470       Arg = DAG.getNode(ISD::ANY_EXTEND, CLI.DL, VA.getLocVT(), Arg);
471       break;
472     }
473 
474     // Push arguments into RegsToPass vector
475     if (VA.isRegLoc())
476       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
477     else
478       report_fatal_error("stack arguments are not supported");
479   }
480 
481   SDValue InGlue;
482 
483   // Build a sequence of copy-to-reg nodes chained together with token chain and
484   // flag operands which copy the outgoing args into registers.  The InGlue in
485   // necessary since all emitted instructions must be stuck together.
486   for (auto &Reg : RegsToPass) {
487     Chain = DAG.getCopyToReg(Chain, CLI.DL, Reg.first, Reg.second, InGlue);
488     InGlue = Chain.getValue(1);
489   }
490 
491   // If the callee is a GlobalAddress node (quite common, every direct call is)
492   // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
493   // Likewise ExternalSymbol -> TargetExternalSymbol.
494   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
495     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT,
496                                         G->getOffset(), 0);
497   } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
498     Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
499     fail(CLI.DL, DAG,
500          Twine("A call to built-in function '" + StringRef(E->getSymbol()) +
501                "' is not supported."));
502   }
503 
504   // Returns a chain & a flag for retval copy to use.
505   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
506   SmallVector<SDValue, 8> Ops;
507   Ops.push_back(Chain);
508   Ops.push_back(Callee);
509 
510   // Add argument registers to the end of the list so that they are
511   // known live into the call.
512   for (auto &Reg : RegsToPass)
513     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
514 
515   if (InGlue.getNode())
516     Ops.push_back(InGlue);
517 
518   Chain = DAG.getNode(BPFISD::CALL, CLI.DL, NodeTys, Ops);
519   InGlue = Chain.getValue(1);
520 
521   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
522 
523   // Create the CALLSEQ_END node.
524   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, CLI.DL);
525   InGlue = Chain.getValue(1);
526 
527   // Handle result values, copying them out of physregs into vregs that we
528   // return.
529   return LowerCallResult(Chain, InGlue, CallConv, IsVarArg, Ins, CLI.DL, DAG,
530                          InVals);
531 }
532 
533 SDValue
534 BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
535                                bool IsVarArg,
536                                const SmallVectorImpl<ISD::OutputArg> &Outs,
537                                const SmallVectorImpl<SDValue> &OutVals,
538                                const SDLoc &DL, SelectionDAG &DAG) const {
539   unsigned Opc = BPFISD::RET_GLUE;
540 
541   // CCValAssign - represent the assignment of the return value to a location
542   SmallVector<CCValAssign, 16> RVLocs;
543   MachineFunction &MF = DAG.getMachineFunction();
544 
545   // CCState - Info about the registers and stack slot.
546   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
547 
548   if (MF.getFunction().getReturnType()->isAggregateType()) {
549     fail(DL, DAG, "aggregate returns are not supported");
550     return DAG.getNode(Opc, DL, MVT::Other, Chain);
551   }
552 
553   // Analize return values.
554   CCInfo.AnalyzeReturn(Outs, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
555 
556   SDValue Glue;
557   SmallVector<SDValue, 4> RetOps(1, Chain);
558 
559   // Copy the result values into the output registers.
560   for (size_t i = 0; i != RVLocs.size(); ++i) {
561     CCValAssign &VA = RVLocs[i];
562     if (!VA.isRegLoc())
563       report_fatal_error("stack return values are not supported");
564 
565     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Glue);
566 
567     // Guarantee that all emitted copies are stuck together,
568     // avoiding something bad.
569     Glue = Chain.getValue(1);
570     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
571   }
572 
573   RetOps[0] = Chain; // Update chain.
574 
575   // Add the glue if we have it.
576   if (Glue.getNode())
577     RetOps.push_back(Glue);
578 
579   return DAG.getNode(Opc, DL, MVT::Other, RetOps);
580 }
581 
582 SDValue BPFTargetLowering::LowerCallResult(
583     SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool IsVarArg,
584     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
585     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
586 
587   MachineFunction &MF = DAG.getMachineFunction();
588   // Assign locations to each value returned by this call.
589   SmallVector<CCValAssign, 16> RVLocs;
590   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
591 
592   if (Ins.size() > 1) {
593     fail(DL, DAG, "only small returns supported");
594     for (auto &In : Ins)
595       InVals.push_back(DAG.getConstant(0, DL, In.VT));
596     return DAG.getCopyFromReg(Chain, DL, 1, Ins[0].VT, InGlue).getValue(1);
597   }
598 
599   CCInfo.AnalyzeCallResult(Ins, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
600 
601   // Copy all of the result registers out of their specified physreg.
602   for (auto &Val : RVLocs) {
603     Chain = DAG.getCopyFromReg(Chain, DL, Val.getLocReg(),
604                                Val.getValVT(), InGlue).getValue(1);
605     InGlue = Chain.getValue(2);
606     InVals.push_back(Chain.getValue(0));
607   }
608 
609   return Chain;
610 }
611 
612 static void NegateCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
613   switch (CC) {
614   default:
615     break;
616   case ISD::SETULT:
617   case ISD::SETULE:
618   case ISD::SETLT:
619   case ISD::SETLE:
620     CC = ISD::getSetCCSwappedOperands(CC);
621     std::swap(LHS, RHS);
622     break;
623   }
624 }
625 
626 SDValue BPFTargetLowering::LowerSDIVSREM(SDValue Op, SelectionDAG &DAG) const {
627   SDLoc DL(Op);
628   fail(DL, DAG,
629        "unsupported signed division, please convert to unsigned div/mod.");
630   return DAG.getUNDEF(Op->getValueType(0));
631 }
632 
633 SDValue BPFTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
634                                                    SelectionDAG &DAG) const {
635   SDLoc DL(Op);
636   fail(DL, DAG, "unsupported dynamic stack allocation");
637   auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)};
638   return DAG.getMergeValues(Ops, SDLoc());
639 }
640 
641 SDValue BPFTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
642   SDValue Chain = Op.getOperand(0);
643   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
644   SDValue LHS = Op.getOperand(2);
645   SDValue RHS = Op.getOperand(3);
646   SDValue Dest = Op.getOperand(4);
647   SDLoc DL(Op);
648 
649   if (!getHasJmpExt())
650     NegateCC(LHS, RHS, CC);
651 
652   return DAG.getNode(BPFISD::BR_CC, DL, Op.getValueType(), Chain, LHS, RHS,
653                      DAG.getConstant(CC, DL, LHS.getValueType()), Dest);
654 }
655 
656 SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
657   SDValue LHS = Op.getOperand(0);
658   SDValue RHS = Op.getOperand(1);
659   SDValue TrueV = Op.getOperand(2);
660   SDValue FalseV = Op.getOperand(3);
661   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
662   SDLoc DL(Op);
663 
664   if (!getHasJmpExt())
665     NegateCC(LHS, RHS, CC);
666 
667   SDValue TargetCC = DAG.getConstant(CC, DL, LHS.getValueType());
668   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
669   SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
670 
671   return DAG.getNode(BPFISD::SELECT_CC, DL, VTs, Ops);
672 }
673 
674 const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
675   switch ((BPFISD::NodeType)Opcode) {
676   case BPFISD::FIRST_NUMBER:
677     break;
678   case BPFISD::RET_GLUE:
679     return "BPFISD::RET_GLUE";
680   case BPFISD::CALL:
681     return "BPFISD::CALL";
682   case BPFISD::SELECT_CC:
683     return "BPFISD::SELECT_CC";
684   case BPFISD::BR_CC:
685     return "BPFISD::BR_CC";
686   case BPFISD::Wrapper:
687     return "BPFISD::Wrapper";
688   case BPFISD::MEMCPY:
689     return "BPFISD::MEMCPY";
690   }
691   return nullptr;
692 }
693 
694 SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op,
695                                               SelectionDAG &DAG) const {
696   auto *N = cast<GlobalAddressSDNode>(Op);
697   if (N->getOffset() != 0)
698     report_fatal_error("invalid offset for global address: " +
699                        Twine(N->getOffset()));
700 
701   SDLoc DL(Op);
702   const GlobalValue *GV = N->getGlobal();
703   SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i64);
704 
705   return DAG.getNode(BPFISD::Wrapper, DL, MVT::i64, GA);
706 }
707 
708 unsigned
709 BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
710                                  unsigned Reg, bool isSigned) const {
711   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
712   const TargetRegisterClass *RC = getRegClassFor(MVT::i64);
713   int RShiftOp = isSigned ? BPF::SRA_ri : BPF::SRL_ri;
714   MachineFunction *F = BB->getParent();
715   DebugLoc DL = MI.getDebugLoc();
716 
717   MachineRegisterInfo &RegInfo = F->getRegInfo();
718 
719   if (!isSigned) {
720     Register PromotedReg0 = RegInfo.createVirtualRegister(RC);
721     BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);
722     return PromotedReg0;
723   }
724   Register PromotedReg0 = RegInfo.createVirtualRegister(RC);
725   Register PromotedReg1 = RegInfo.createVirtualRegister(RC);
726   Register PromotedReg2 = RegInfo.createVirtualRegister(RC);
727   if (HasMovsx) {
728     BuildMI(BB, DL, TII.get(BPF::MOVSX_rr_32), PromotedReg0).addReg(Reg);
729   } else {
730     BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);
731     BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1)
732       .addReg(PromotedReg0).addImm(32);
733     BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2)
734       .addReg(PromotedReg1).addImm(32);
735   }
736 
737   return PromotedReg2;
738 }
739 
740 MachineBasicBlock *
741 BPFTargetLowering::EmitInstrWithCustomInserterMemcpy(MachineInstr &MI,
742                                                      MachineBasicBlock *BB)
743                                                      const {
744   MachineFunction *MF = MI.getParent()->getParent();
745   MachineRegisterInfo &MRI = MF->getRegInfo();
746   MachineInstrBuilder MIB(*MF, MI);
747   unsigned ScratchReg;
748 
749   // This function does custom insertion during lowering BPFISD::MEMCPY which
750   // only has two register operands from memcpy semantics, the copy source
751   // address and the copy destination address.
752   //
753   // Because we will expand BPFISD::MEMCPY into load/store pairs, we will need
754   // a third scratch register to serve as the destination register of load and
755   // source register of store.
756   //
757   // The scratch register here is with the Define | Dead | EarlyClobber flags.
758   // The EarlyClobber flag has the semantic property that the operand it is
759   // attached to is clobbered before the rest of the inputs are read. Hence it
760   // must be unique among the operands to the instruction. The Define flag is
761   // needed to coerce the machine verifier that an Undef value isn't a problem
762   // as we anyway is loading memory into it. The Dead flag is needed as the
763   // value in scratch isn't supposed to be used by any other instruction.
764   ScratchReg = MRI.createVirtualRegister(&BPF::GPRRegClass);
765   MIB.addReg(ScratchReg,
766              RegState::Define | RegState::Dead | RegState::EarlyClobber);
767 
768   return BB;
769 }
770 
771 MachineBasicBlock *
772 BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
773                                                MachineBasicBlock *BB) const {
774   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
775   DebugLoc DL = MI.getDebugLoc();
776   unsigned Opc = MI.getOpcode();
777   bool isSelectRROp = (Opc == BPF::Select ||
778                        Opc == BPF::Select_64_32 ||
779                        Opc == BPF::Select_32 ||
780                        Opc == BPF::Select_32_64);
781 
782   bool isMemcpyOp = Opc == BPF::MEMCPY;
783 
784 #ifndef NDEBUG
785   bool isSelectRIOp = (Opc == BPF::Select_Ri ||
786                        Opc == BPF::Select_Ri_64_32 ||
787                        Opc == BPF::Select_Ri_32 ||
788                        Opc == BPF::Select_Ri_32_64);
789 
790   if (!(isSelectRROp || isSelectRIOp || isMemcpyOp))
791     report_fatal_error("unhandled instruction type: " + Twine(Opc));
792 #endif
793 
794   if (isMemcpyOp)
795     return EmitInstrWithCustomInserterMemcpy(MI, BB);
796 
797   bool is32BitCmp = (Opc == BPF::Select_32 ||
798                      Opc == BPF::Select_32_64 ||
799                      Opc == BPF::Select_Ri_32 ||
800                      Opc == BPF::Select_Ri_32_64);
801 
802   // To "insert" a SELECT instruction, we actually have to insert the diamond
803   // control-flow pattern.  The incoming instruction knows the destination vreg
804   // to set, the condition code register to branch on, the true/false values to
805   // select between, and a branch opcode to use.
806   const BasicBlock *LLVM_BB = BB->getBasicBlock();
807   MachineFunction::iterator I = ++BB->getIterator();
808 
809   // ThisMBB:
810   // ...
811   //  TrueVal = ...
812   //  jmp_XX r1, r2 goto Copy1MBB
813   //  fallthrough --> Copy0MBB
814   MachineBasicBlock *ThisMBB = BB;
815   MachineFunction *F = BB->getParent();
816   MachineBasicBlock *Copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
817   MachineBasicBlock *Copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
818 
819   F->insert(I, Copy0MBB);
820   F->insert(I, Copy1MBB);
821   // Update machine-CFG edges by transferring all successors of the current
822   // block to the new block which will contain the Phi node for the select.
823   Copy1MBB->splice(Copy1MBB->begin(), BB,
824                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
825   Copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
826   // Next, add the true and fallthrough blocks as its successors.
827   BB->addSuccessor(Copy0MBB);
828   BB->addSuccessor(Copy1MBB);
829 
830   // Insert Branch if Flag
831   int CC = MI.getOperand(3).getImm();
832   int NewCC;
833   switch (CC) {
834 #define SET_NEWCC(X, Y) \
835   case ISD::X: \
836     if (is32BitCmp && HasJmp32) \
837       NewCC = isSelectRROp ? BPF::Y##_rr_32 : BPF::Y##_ri_32; \
838     else \
839       NewCC = isSelectRROp ? BPF::Y##_rr : BPF::Y##_ri; \
840     break
841   SET_NEWCC(SETGT, JSGT);
842   SET_NEWCC(SETUGT, JUGT);
843   SET_NEWCC(SETGE, JSGE);
844   SET_NEWCC(SETUGE, JUGE);
845   SET_NEWCC(SETEQ, JEQ);
846   SET_NEWCC(SETNE, JNE);
847   SET_NEWCC(SETLT, JSLT);
848   SET_NEWCC(SETULT, JULT);
849   SET_NEWCC(SETLE, JSLE);
850   SET_NEWCC(SETULE, JULE);
851   default:
852     report_fatal_error("unimplemented select CondCode " + Twine(CC));
853   }
854 
855   Register LHS = MI.getOperand(1).getReg();
856   bool isSignedCmp = (CC == ISD::SETGT ||
857                       CC == ISD::SETGE ||
858                       CC == ISD::SETLT ||
859                       CC == ISD::SETLE);
860 
861   // eBPF at the moment only has 64-bit comparison. Any 32-bit comparison need
862   // to be promoted, however if the 32-bit comparison operands are destination
863   // registers then they are implicitly zero-extended already, there is no
864   // need of explicit zero-extend sequence for them.
865   //
866   // We simply do extension for all situations in this method, but we will
867   // try to remove those unnecessary in BPFMIPeephole pass.
868   if (is32BitCmp && !HasJmp32)
869     LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp);
870 
871   if (isSelectRROp) {
872     Register RHS = MI.getOperand(2).getReg();
873 
874     if (is32BitCmp && !HasJmp32)
875       RHS = EmitSubregExt(MI, BB, RHS, isSignedCmp);
876 
877     BuildMI(BB, DL, TII.get(NewCC)).addReg(LHS).addReg(RHS).addMBB(Copy1MBB);
878   } else {
879     int64_t imm32 = MI.getOperand(2).getImm();
880     // Check before we build J*_ri instruction.
881     if (!isInt<32>(imm32))
882       report_fatal_error("immediate overflows 32 bits: " + Twine(imm32));
883     BuildMI(BB, DL, TII.get(NewCC))
884         .addReg(LHS).addImm(imm32).addMBB(Copy1MBB);
885   }
886 
887   // Copy0MBB:
888   //  %FalseValue = ...
889   //  # fallthrough to Copy1MBB
890   BB = Copy0MBB;
891 
892   // Update machine-CFG edges
893   BB->addSuccessor(Copy1MBB);
894 
895   // Copy1MBB:
896   //  %Result = phi [ %FalseValue, Copy0MBB ], [ %TrueValue, ThisMBB ]
897   // ...
898   BB = Copy1MBB;
899   BuildMI(*BB, BB->begin(), DL, TII.get(BPF::PHI), MI.getOperand(0).getReg())
900       .addReg(MI.getOperand(5).getReg())
901       .addMBB(Copy0MBB)
902       .addReg(MI.getOperand(4).getReg())
903       .addMBB(ThisMBB);
904 
905   MI.eraseFromParent(); // The pseudo instruction is gone now.
906   return BB;
907 }
908 
909 EVT BPFTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
910                                           EVT VT) const {
911   return getHasAlu32() ? MVT::i32 : MVT::i64;
912 }
913 
914 MVT BPFTargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
915                                               EVT VT) const {
916   return (getHasAlu32() && VT == MVT::i32) ? MVT::i32 : MVT::i64;
917 }
918 
919 bool BPFTargetLowering::isLegalAddressingMode(const DataLayout &DL,
920                                               const AddrMode &AM, Type *Ty,
921                                               unsigned AS,
922                                               Instruction *I) const {
923   // No global is ever allowed as a base.
924   if (AM.BaseGV)
925     return false;
926 
927   switch (AM.Scale) {
928   case 0: // "r+i" or just "i", depending on HasBaseReg.
929     break;
930   case 1:
931     if (!AM.HasBaseReg) // allow "r+i".
932       break;
933     return false; // disallow "r+r" or "r+r+i".
934   default:
935     return false;
936   }
937 
938   return true;
939 }
940