xref: /freebsd/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- BPFISelLowering.cpp - BPF DAG Lowering Implementation  ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that BPF uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "BPFISelLowering.h"
15 #include "BPF.h"
16 #include "BPFSubtarget.h"
17 #include "llvm/CodeGen/CallingConvLower.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
23 #include "llvm/CodeGen/ValueTypes.h"
24 #include "llvm/IR/DIBuilder.h"
25 #include "llvm/IR/DiagnosticInfo.h"
26 #include "llvm/IR/DiagnosticPrinter.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Support/raw_ostream.h"
32 
33 using namespace llvm;
34 
35 #define DEBUG_TYPE "bpf-lower"
36 
37 static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order",
38   cl::Hidden, cl::init(false),
39   cl::desc("Expand memcpy into load/store pairs in order"));
40 
fail(const SDLoc & DL,SelectionDAG & DAG,const Twine & Msg,SDValue Val={})41 static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg,
42                  SDValue Val = {}) {
43   std::string Str;
44   if (Val) {
45     raw_string_ostream OS(Str);
46     Val->print(OS);
47     OS << ' ';
48   }
49   MachineFunction &MF = DAG.getMachineFunction();
50   DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
51       MF.getFunction(), Twine(Str).concat(Msg), DL.getDebugLoc()));
52 }
53 
BPFTargetLowering(const TargetMachine & TM,const BPFSubtarget & STI)54 BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
55                                      const BPFSubtarget &STI)
56     : TargetLowering(TM) {
57 
58   // Set up the register classes.
59   addRegisterClass(MVT::i64, &BPF::GPRRegClass);
60   if (STI.getHasAlu32())
61     addRegisterClass(MVT::i32, &BPF::GPR32RegClass);
62 
63   // Compute derived properties from the register classes
64   computeRegisterProperties(STI.getRegisterInfo());
65 
66   setStackPointerRegisterToSaveRestore(BPF::R11);
67 
68   setOperationAction(ISD::BR_CC, MVT::i64, Custom);
69   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
70   setOperationAction(ISD::BRIND, MVT::Other, Expand);
71   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
72 
73   setOperationAction(ISD::TRAP, MVT::Other, Custom);
74 
75   setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, MVT::i64, Custom);
76 
77   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
78   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
79   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
80 
81   // Set unsupported atomic operations as Custom so
82   // we can emit better error messages than fatal error
83   // from selectiondag.
84   for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) {
85     if (VT == MVT::i32) {
86       if (STI.getHasAlu32())
87         continue;
88     } else {
89       setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
90     }
91 
92     setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
93     setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
94     setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
95     setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
96     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
97   }
98 
99   for (auto VT : {MVT::i32, MVT::i64}) {
100     setOperationAction(ISD::ATOMIC_LOAD, VT, Custom);
101     setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
102   }
103 
104   for (auto VT : { MVT::i32, MVT::i64 }) {
105     if (VT == MVT::i32 && !STI.getHasAlu32())
106       continue;
107 
108     setOperationAction(ISD::SDIVREM, VT, Expand);
109     setOperationAction(ISD::UDIVREM, VT, Expand);
110     if (!STI.hasSdivSmod()) {
111       setOperationAction(ISD::SDIV, VT, Custom);
112       setOperationAction(ISD::SREM, VT, Custom);
113     }
114     setOperationAction(ISD::MULHU, VT, Expand);
115     setOperationAction(ISD::MULHS, VT, Expand);
116     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
117     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
118     setOperationAction(ISD::ROTR, VT, Expand);
119     setOperationAction(ISD::ROTL, VT, Expand);
120     setOperationAction(ISD::SHL_PARTS, VT, Expand);
121     setOperationAction(ISD::SRL_PARTS, VT, Expand);
122     setOperationAction(ISD::SRA_PARTS, VT, Expand);
123     setOperationAction(ISD::CTPOP, VT, Expand);
124     setOperationAction(ISD::CTTZ, VT, Expand);
125     setOperationAction(ISD::CTLZ, VT, Expand);
126     setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
127     setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
128 
129     setOperationAction(ISD::SETCC, VT, Expand);
130     setOperationAction(ISD::SELECT, VT, Expand);
131     setOperationAction(ISD::SELECT_CC, VT, Custom);
132   }
133 
134   if (STI.getHasAlu32()) {
135     setOperationAction(ISD::BSWAP, MVT::i32, Promote);
136     setOperationAction(ISD::BR_CC, MVT::i32,
137                        STI.getHasJmp32() ? Custom : Promote);
138   }
139 
140   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
141   if (!STI.hasMovsx()) {
142     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
143     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
144     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
145   }
146 
147   // Extended load operations for i1 types must be promoted
148   for (MVT VT : MVT::integer_valuetypes()) {
149     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
150     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
151     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
152 
153     if (!STI.hasLdsx()) {
154       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
155       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand);
156       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
157     }
158   }
159 
160   setBooleanContents(ZeroOrOneBooleanContent);
161   setMaxAtomicSizeInBitsSupported(64);
162 
163   // Function alignments
164   setMinFunctionAlignment(Align(8));
165   setPrefFunctionAlignment(Align(8));
166 
167   if (BPFExpandMemcpyInOrder) {
168     // LLVM generic code will try to expand memcpy into load/store pairs at this
169     // stage which is before quite a few IR optimization passes, therefore the
170     // loads and stores could potentially be moved apart from each other which
171     // will cause trouble to memcpy pattern matcher inside kernel eBPF JIT
172     // compilers.
173     //
174     // When -bpf-expand-memcpy-in-order specified, we want to defer the expand
175     // of memcpy to later stage in IR optimization pipeline so those load/store
176     // pairs won't be touched and could be kept in order. Hence, we set
177     // MaxStoresPerMem* to zero to disable the generic getMemcpyLoadsAndStores
178     // code path, and ask LLVM to use target expander EmitTargetCodeForMemcpy.
179     MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0;
180     MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0;
181     MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0;
182     MaxLoadsPerMemcmp = 0;
183   } else {
184     // inline memcpy() for kernel to see explicit copy
185     unsigned CommonMaxStores =
186       STI.getSelectionDAGInfo()->getCommonMaxStoresPerMemFunc();
187 
188     MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores;
189     MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores;
190     MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores;
191     MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = CommonMaxStores;
192   }
193 
194   // CPU/Feature control
195   HasAlu32 = STI.getHasAlu32();
196   HasJmp32 = STI.getHasJmp32();
197   HasJmpExt = STI.getHasJmpExt();
198   HasMovsx = STI.hasMovsx();
199 }
200 
isOffsetFoldingLegal(const GlobalAddressSDNode * GA) const201 bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
202   return false;
203 }
204 
isTruncateFree(Type * Ty1,Type * Ty2) const205 bool BPFTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
206   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
207     return false;
208   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
209   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
210   return NumBits1 > NumBits2;
211 }
212 
isTruncateFree(EVT VT1,EVT VT2) const213 bool BPFTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
214   if (!VT1.isInteger() || !VT2.isInteger())
215     return false;
216   unsigned NumBits1 = VT1.getSizeInBits();
217   unsigned NumBits2 = VT2.getSizeInBits();
218   return NumBits1 > NumBits2;
219 }
220 
isZExtFree(Type * Ty1,Type * Ty2) const221 bool BPFTargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
222   if (!getHasAlu32() || !Ty1->isIntegerTy() || !Ty2->isIntegerTy())
223     return false;
224   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
225   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
226   return NumBits1 == 32 && NumBits2 == 64;
227 }
228 
isZExtFree(EVT VT1,EVT VT2) const229 bool BPFTargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
230   if (!getHasAlu32() || !VT1.isInteger() || !VT2.isInteger())
231     return false;
232   unsigned NumBits1 = VT1.getSizeInBits();
233   unsigned NumBits2 = VT2.getSizeInBits();
234   return NumBits1 == 32 && NumBits2 == 64;
235 }
236 
isZExtFree(SDValue Val,EVT VT2) const237 bool BPFTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
238   EVT VT1 = Val.getValueType();
239   if (Val.getOpcode() == ISD::LOAD && VT1.isSimple() && VT2.isSimple()) {
240     MVT MT1 = VT1.getSimpleVT().SimpleTy;
241     MVT MT2 = VT2.getSimpleVT().SimpleTy;
242     if ((MT1 == MVT::i8 || MT1 == MVT::i16 || MT1 == MVT::i32) &&
243         (MT2 == MVT::i32 || MT2 == MVT::i64))
244       return true;
245   }
246   return TargetLoweringBase::isZExtFree(Val, VT2);
247 }
248 
249 BPFTargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const250 BPFTargetLowering::getConstraintType(StringRef Constraint) const {
251   if (Constraint.size() == 1) {
252     switch (Constraint[0]) {
253     default:
254       break;
255     case 'w':
256       return C_RegisterClass;
257     }
258   }
259 
260   return TargetLowering::getConstraintType(Constraint);
261 }
262 
263 std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * TRI,StringRef Constraint,MVT VT) const264 BPFTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
265                                                 StringRef Constraint,
266                                                 MVT VT) const {
267   if (Constraint.size() == 1) {
268     // GCC Constraint Letters
269     switch (Constraint[0]) {
270     case 'r': // GENERAL_REGS
271       return std::make_pair(0U, &BPF::GPRRegClass);
272     case 'w':
273       if (HasAlu32)
274         return std::make_pair(0U, &BPF::GPR32RegClass);
275       break;
276     default:
277       break;
278     }
279   }
280 
281   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
282 }
283 
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const284 void BPFTargetLowering::ReplaceNodeResults(
285   SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
286   const char *Msg;
287   uint32_t Opcode = N->getOpcode();
288   switch (Opcode) {
289   default:
290     report_fatal_error("unhandled custom legalization: " + Twine(Opcode));
291   case ISD::ATOMIC_LOAD_ADD:
292   case ISD::ATOMIC_LOAD_AND:
293   case ISD::ATOMIC_LOAD_OR:
294   case ISD::ATOMIC_LOAD_XOR:
295   case ISD::ATOMIC_SWAP:
296   case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
297     if (HasAlu32 || Opcode == ISD::ATOMIC_LOAD_ADD)
298       Msg = "unsupported atomic operation, please use 32/64 bit version";
299     else
300       Msg = "unsupported atomic operation, please use 64 bit version";
301     break;
302   case ISD::ATOMIC_LOAD:
303   case ISD::ATOMIC_STORE:
304     return;
305   }
306 
307   SDLoc DL(N);
308   // We'll still produce a fatal error downstream, but this diagnostic is more
309   // user-friendly.
310   fail(DL, DAG, Msg);
311 }
312 
LowerOperation(SDValue Op,SelectionDAG & DAG) const313 SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
314   switch (Op.getOpcode()) {
315   default:
316     report_fatal_error("unimplemented opcode: " + Twine(Op.getOpcode()));
317   case ISD::BR_CC:
318     return LowerBR_CC(Op, DAG);
319   case ISD::GlobalAddress:
320     return LowerGlobalAddress(Op, DAG);
321   case ISD::ConstantPool:
322     return LowerConstantPool(Op, DAG);
323   case ISD::SELECT_CC:
324     return LowerSELECT_CC(Op, DAG);
325   case ISD::SDIV:
326   case ISD::SREM:
327     return LowerSDIVSREM(Op, DAG);
328   case ISD::DYNAMIC_STACKALLOC:
329     return LowerDYNAMIC_STACKALLOC(Op, DAG);
330   case ISD::ATOMIC_LOAD:
331   case ISD::ATOMIC_STORE:
332     return LowerATOMIC_LOAD_STORE(Op, DAG);
333   case ISD::TRAP:
334     return LowerTRAP(Op, DAG);
335   }
336 }
337 
338 // Calling Convention Implementation
339 #include "BPFGenCallingConv.inc"
340 
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & DL,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const341 SDValue BPFTargetLowering::LowerFormalArguments(
342     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
343     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
344     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
345   switch (CallConv) {
346   default:
347     report_fatal_error("unimplemented calling convention: " + Twine(CallConv));
348   case CallingConv::C:
349   case CallingConv::Fast:
350     break;
351   }
352 
353   MachineFunction &MF = DAG.getMachineFunction();
354   MachineRegisterInfo &RegInfo = MF.getRegInfo();
355 
356   // Assign locations to all of the incoming arguments.
357   SmallVector<CCValAssign, 16> ArgLocs;
358   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
359   CCInfo.AnalyzeFormalArguments(Ins, getHasAlu32() ? CC_BPF32 : CC_BPF64);
360 
361   bool HasMemArgs = false;
362   for (size_t I = 0; I < ArgLocs.size(); ++I) {
363     auto &VA = ArgLocs[I];
364 
365     if (VA.isRegLoc()) {
366       // Arguments passed in registers
367       EVT RegVT = VA.getLocVT();
368       MVT::SimpleValueType SimpleTy = RegVT.getSimpleVT().SimpleTy;
369       switch (SimpleTy) {
370       default: {
371         std::string Str;
372         {
373           raw_string_ostream OS(Str);
374           RegVT.print(OS);
375         }
376         report_fatal_error("unhandled argument type: " + Twine(Str));
377       }
378       case MVT::i32:
379       case MVT::i64:
380         Register VReg = RegInfo.createVirtualRegister(
381             SimpleTy == MVT::i64 ? &BPF::GPRRegClass : &BPF::GPR32RegClass);
382         RegInfo.addLiveIn(VA.getLocReg(), VReg);
383         SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT);
384 
385         // If this is an value that has been promoted to wider types, insert an
386         // assert[sz]ext to capture this, then truncate to the right size.
387         if (VA.getLocInfo() == CCValAssign::SExt)
388           ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue,
389                                  DAG.getValueType(VA.getValVT()));
390         else if (VA.getLocInfo() == CCValAssign::ZExt)
391           ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue,
392                                  DAG.getValueType(VA.getValVT()));
393 
394         if (VA.getLocInfo() != CCValAssign::Full)
395           ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue);
396 
397         InVals.push_back(ArgValue);
398 
399         break;
400       }
401     } else {
402       if (VA.isMemLoc())
403         HasMemArgs = true;
404       else
405         report_fatal_error("unhandled argument location");
406       InVals.push_back(DAG.getConstant(0, DL, VA.getLocVT()));
407     }
408   }
409   if (HasMemArgs)
410     fail(DL, DAG, "stack arguments are not supported");
411   if (IsVarArg)
412     fail(DL, DAG, "variadic functions are not supported");
413   if (MF.getFunction().hasStructRetAttr())
414     fail(DL, DAG, "aggregate returns are not supported");
415 
416   return Chain;
417 }
418 
419 const size_t BPFTargetLowering::MaxArgs = 5;
420 
resetRegMaskBit(const TargetRegisterInfo * TRI,uint32_t * RegMask,MCRegister Reg)421 static void resetRegMaskBit(const TargetRegisterInfo *TRI, uint32_t *RegMask,
422                             MCRegister Reg) {
423   for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
424     RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
425 }
426 
regMaskFromTemplate(const TargetRegisterInfo * TRI,MachineFunction & MF,const uint32_t * BaseRegMask)427 static uint32_t *regMaskFromTemplate(const TargetRegisterInfo *TRI,
428                                      MachineFunction &MF,
429                                      const uint32_t *BaseRegMask) {
430   uint32_t *RegMask = MF.allocateRegMask();
431   unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
432   memcpy(RegMask, BaseRegMask, sizeof(RegMask[0]) * RegMaskSize);
433   return RegMask;
434 }
435 
LowerCall(TargetLowering::CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const436 SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
437                                      SmallVectorImpl<SDValue> &InVals) const {
438   SelectionDAG &DAG = CLI.DAG;
439   auto &Outs = CLI.Outs;
440   auto &OutVals = CLI.OutVals;
441   auto &Ins = CLI.Ins;
442   SDValue Chain = CLI.Chain;
443   SDValue Callee = CLI.Callee;
444   bool &IsTailCall = CLI.IsTailCall;
445   CallingConv::ID CallConv = CLI.CallConv;
446   bool IsVarArg = CLI.IsVarArg;
447   MachineFunction &MF = DAG.getMachineFunction();
448 
449   // BPF target does not support tail call optimization.
450   IsTailCall = false;
451 
452   switch (CallConv) {
453   default:
454     report_fatal_error("unsupported calling convention: " + Twine(CallConv));
455   case CallingConv::Fast:
456   case CallingConv::C:
457     break;
458   }
459 
460   // Analyze operands of the call, assigning locations to each operand.
461   SmallVector<CCValAssign, 16> ArgLocs;
462   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
463 
464   CCInfo.AnalyzeCallOperands(Outs, getHasAlu32() ? CC_BPF32 : CC_BPF64);
465 
466   unsigned NumBytes = CCInfo.getStackSize();
467 
468   if (Outs.size() > MaxArgs)
469     fail(CLI.DL, DAG, "too many arguments", Callee);
470 
471   for (auto &Arg : Outs) {
472     ISD::ArgFlagsTy Flags = Arg.Flags;
473     if (!Flags.isByVal())
474       continue;
475     fail(CLI.DL, DAG, "pass by value not supported", Callee);
476     break;
477   }
478 
479   auto PtrVT = getPointerTy(MF.getDataLayout());
480   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
481 
482   SmallVector<std::pair<unsigned, SDValue>, MaxArgs> RegsToPass;
483 
484   // Walk arg assignments
485   for (size_t i = 0; i < std::min(ArgLocs.size(), MaxArgs); ++i) {
486     CCValAssign &VA = ArgLocs[i];
487     SDValue &Arg = OutVals[i];
488 
489     // Promote the value if needed.
490     switch (VA.getLocInfo()) {
491     default:
492       report_fatal_error("unhandled location info: " + Twine(VA.getLocInfo()));
493     case CCValAssign::Full:
494       break;
495     case CCValAssign::SExt:
496       Arg = DAG.getNode(ISD::SIGN_EXTEND, CLI.DL, VA.getLocVT(), Arg);
497       break;
498     case CCValAssign::ZExt:
499       Arg = DAG.getNode(ISD::ZERO_EXTEND, CLI.DL, VA.getLocVT(), Arg);
500       break;
501     case CCValAssign::AExt:
502       Arg = DAG.getNode(ISD::ANY_EXTEND, CLI.DL, VA.getLocVT(), Arg);
503       break;
504     }
505 
506     // Push arguments into RegsToPass vector
507     if (VA.isRegLoc())
508       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
509     else
510       report_fatal_error("stack arguments are not supported");
511   }
512 
513   SDValue InGlue;
514 
515   // Build a sequence of copy-to-reg nodes chained together with token chain and
516   // flag operands which copy the outgoing args into registers.  The InGlue in
517   // necessary since all emitted instructions must be stuck together.
518   for (auto &Reg : RegsToPass) {
519     Chain = DAG.getCopyToReg(Chain, CLI.DL, Reg.first, Reg.second, InGlue);
520     InGlue = Chain.getValue(1);
521   }
522 
523   // If the callee is a GlobalAddress node (quite common, every direct call is)
524   // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
525   // Likewise ExternalSymbol -> TargetExternalSymbol.
526   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
527     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT,
528                                         G->getOffset(), 0);
529   } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
530     if (StringRef(E->getSymbol()) != BPF_TRAP) {
531       Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
532       fail(CLI.DL, DAG,
533            Twine("A call to built-in function '" + StringRef(E->getSymbol()) +
534                  "' is not supported."));
535     }
536   }
537 
538   // Returns a chain & a flag for retval copy to use.
539   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
540   SmallVector<SDValue, 8> Ops;
541   Ops.push_back(Chain);
542   Ops.push_back(Callee);
543 
544   // Add argument registers to the end of the list so that they are
545   // known live into the call.
546   for (auto &Reg : RegsToPass)
547     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
548 
549   bool HasFastCall =
550       (CLI.CB && isa<CallInst>(CLI.CB) && CLI.CB->hasFnAttr("bpf_fastcall"));
551   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
552   if (HasFastCall) {
553     uint32_t *RegMask = regMaskFromTemplate(
554         TRI, MF, TRI->getCallPreservedMask(MF, CallingConv::PreserveAll));
555     for (auto const &RegPair : RegsToPass)
556       resetRegMaskBit(TRI, RegMask, RegPair.first);
557     if (!CLI.CB->getType()->isVoidTy())
558       resetRegMaskBit(TRI, RegMask, BPF::R0);
559     Ops.push_back(DAG.getRegisterMask(RegMask));
560   } else {
561     Ops.push_back(
562         DAG.getRegisterMask(TRI->getCallPreservedMask(MF, CLI.CallConv)));
563   }
564 
565   if (InGlue.getNode())
566     Ops.push_back(InGlue);
567 
568   Chain = DAG.getNode(BPFISD::CALL, CLI.DL, NodeTys, Ops);
569   InGlue = Chain.getValue(1);
570 
571   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
572 
573   // Create the CALLSEQ_END node.
574   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, CLI.DL);
575   InGlue = Chain.getValue(1);
576 
577   // Handle result values, copying them out of physregs into vregs that we
578   // return.
579   return LowerCallResult(Chain, InGlue, CallConv, IsVarArg, Ins, CLI.DL, DAG,
580                          InVals);
581 }
582 
583 SDValue
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & DL,SelectionDAG & DAG) const584 BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
585                                bool IsVarArg,
586                                const SmallVectorImpl<ISD::OutputArg> &Outs,
587                                const SmallVectorImpl<SDValue> &OutVals,
588                                const SDLoc &DL, SelectionDAG &DAG) const {
589   unsigned Opc = BPFISD::RET_GLUE;
590 
591   // CCValAssign - represent the assignment of the return value to a location
592   SmallVector<CCValAssign, 16> RVLocs;
593   MachineFunction &MF = DAG.getMachineFunction();
594 
595   // CCState - Info about the registers and stack slot.
596   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
597 
598   if (MF.getFunction().getReturnType()->isAggregateType()) {
599     fail(DL, DAG, "aggregate returns are not supported");
600     return DAG.getNode(Opc, DL, MVT::Other, Chain);
601   }
602 
603   // Analize return values.
604   CCInfo.AnalyzeReturn(Outs, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
605 
606   SDValue Glue;
607   SmallVector<SDValue, 4> RetOps(1, Chain);
608 
609   // Copy the result values into the output registers.
610   for (size_t i = 0; i != RVLocs.size(); ++i) {
611     CCValAssign &VA = RVLocs[i];
612     if (!VA.isRegLoc())
613       report_fatal_error("stack return values are not supported");
614 
615     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Glue);
616 
617     // Guarantee that all emitted copies are stuck together,
618     // avoiding something bad.
619     Glue = Chain.getValue(1);
620     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
621   }
622 
623   RetOps[0] = Chain; // Update chain.
624 
625   // Add the glue if we have it.
626   if (Glue.getNode())
627     RetOps.push_back(Glue);
628 
629   return DAG.getNode(Opc, DL, MVT::Other, RetOps);
630 }
631 
LowerCallResult(SDValue Chain,SDValue InGlue,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & DL,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const632 SDValue BPFTargetLowering::LowerCallResult(
633     SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool IsVarArg,
634     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
635     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
636 
637   MachineFunction &MF = DAG.getMachineFunction();
638   // Assign locations to each value returned by this call.
639   SmallVector<CCValAssign, 16> RVLocs;
640   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
641 
642   if (Ins.size() > 1) {
643     fail(DL, DAG, "only small returns supported");
644     for (auto &In : Ins)
645       InVals.push_back(DAG.getConstant(0, DL, In.VT));
646     return DAG.getCopyFromReg(Chain, DL, 1, Ins[0].VT, InGlue).getValue(1);
647   }
648 
649   CCInfo.AnalyzeCallResult(Ins, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
650 
651   // Copy all of the result registers out of their specified physreg.
652   for (auto &Val : RVLocs) {
653     Chain = DAG.getCopyFromReg(Chain, DL, Val.getLocReg(),
654                                Val.getValVT(), InGlue).getValue(1);
655     InGlue = Chain.getValue(2);
656     InVals.push_back(Chain.getValue(0));
657   }
658 
659   return Chain;
660 }
661 
NegateCC(SDValue & LHS,SDValue & RHS,ISD::CondCode & CC)662 static void NegateCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
663   switch (CC) {
664   default:
665     break;
666   case ISD::SETULT:
667   case ISD::SETULE:
668   case ISD::SETLT:
669   case ISD::SETLE:
670     CC = ISD::getSetCCSwappedOperands(CC);
671     std::swap(LHS, RHS);
672     break;
673   }
674 }
675 
LowerSDIVSREM(SDValue Op,SelectionDAG & DAG) const676 SDValue BPFTargetLowering::LowerSDIVSREM(SDValue Op, SelectionDAG &DAG) const {
677   SDLoc DL(Op);
678   fail(DL, DAG,
679        "unsupported signed division, please convert to unsigned div/mod.");
680   return DAG.getUNDEF(Op->getValueType(0));
681 }
682 
LowerDYNAMIC_STACKALLOC(SDValue Op,SelectionDAG & DAG) const683 SDValue BPFTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
684                                                    SelectionDAG &DAG) const {
685   SDLoc DL(Op);
686   fail(DL, DAG, "unsupported dynamic stack allocation");
687   auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)};
688   return DAG.getMergeValues(Ops, SDLoc());
689 }
690 
LowerBR_CC(SDValue Op,SelectionDAG & DAG) const691 SDValue BPFTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
692   SDValue Chain = Op.getOperand(0);
693   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
694   SDValue LHS = Op.getOperand(2);
695   SDValue RHS = Op.getOperand(3);
696   SDValue Dest = Op.getOperand(4);
697   SDLoc DL(Op);
698 
699   if (!getHasJmpExt())
700     NegateCC(LHS, RHS, CC);
701 
702   return DAG.getNode(BPFISD::BR_CC, DL, Op.getValueType(), Chain, LHS, RHS,
703                      DAG.getConstant(CC, DL, LHS.getValueType()), Dest);
704 }
705 
LowerSELECT_CC(SDValue Op,SelectionDAG & DAG) const706 SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
707   SDValue LHS = Op.getOperand(0);
708   SDValue RHS = Op.getOperand(1);
709   SDValue TrueV = Op.getOperand(2);
710   SDValue FalseV = Op.getOperand(3);
711   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
712   SDLoc DL(Op);
713 
714   if (!getHasJmpExt())
715     NegateCC(LHS, RHS, CC);
716 
717   SDValue TargetCC = DAG.getConstant(CC, DL, LHS.getValueType());
718   SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
719 
720   return DAG.getNode(BPFISD::SELECT_CC, DL, Op.getValueType(), Ops);
721 }
722 
LowerATOMIC_LOAD_STORE(SDValue Op,SelectionDAG & DAG) const723 SDValue BPFTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
724                                                   SelectionDAG &DAG) const {
725   SDNode *N = Op.getNode();
726   SDLoc DL(N);
727 
728   if (cast<AtomicSDNode>(N)->getMergedOrdering() ==
729       AtomicOrdering::SequentiallyConsistent)
730     fail(DL, DAG,
731          "sequentially consistent (seq_cst) "
732          "atomic load/store is not supported");
733 
734   return Op;
735 }
736 
createBPFUnreachable(Module * M)737 static Function *createBPFUnreachable(Module *M) {
738   if (auto *Fn = M->getFunction(BPF_TRAP))
739     return Fn;
740 
741   FunctionType *FT = FunctionType::get(Type::getVoidTy(M->getContext()), false);
742   Function *NewF =
743       Function::Create(FT, GlobalValue::ExternalWeakLinkage, BPF_TRAP, M);
744   NewF->setDSOLocal(true);
745   NewF->setCallingConv(CallingConv::C);
746   NewF->setSection(".ksyms");
747 
748   if (M->debug_compile_units().empty())
749     return NewF;
750 
751   DIBuilder DBuilder(*M);
752   DITypeRefArray ParamTypes =
753       DBuilder.getOrCreateTypeArray({nullptr /*void return*/});
754   DISubroutineType *FuncType = DBuilder.createSubroutineType(ParamTypes);
755   DICompileUnit *CU = *M->debug_compile_units_begin();
756   DISubprogram *SP =
757       DBuilder.createFunction(CU, BPF_TRAP, BPF_TRAP, nullptr, 0, FuncType, 0,
758                               DINode::FlagZero, DISubprogram::SPFlagZero);
759   NewF->setSubprogram(SP);
760   return NewF;
761 }
762 
LowerTRAP(SDValue Op,SelectionDAG & DAG) const763 SDValue BPFTargetLowering::LowerTRAP(SDValue Op, SelectionDAG &DAG) const {
764   MachineFunction &MF = DAG.getMachineFunction();
765   TargetLowering::CallLoweringInfo CLI(DAG);
766   SmallVector<SDValue> InVals;
767   SDNode *N = Op.getNode();
768   SDLoc DL(N);
769 
770   Function *Fn = createBPFUnreachable(MF.getFunction().getParent());
771   auto PtrVT = getPointerTy(MF.getDataLayout());
772   CLI.Callee = DAG.getTargetGlobalAddress(Fn, DL, PtrVT);
773   CLI.Chain = N->getOperand(0);
774   CLI.IsTailCall = false;
775   CLI.CallConv = CallingConv::C;
776   CLI.IsVarArg = false;
777   CLI.DL = DL;
778   CLI.NoMerge = false;
779   CLI.DoesNotReturn = true;
780   return LowerCall(CLI, InVals);
781 }
782 
getTargetNodeName(unsigned Opcode) const783 const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
784   switch ((BPFISD::NodeType)Opcode) {
785   case BPFISD::FIRST_NUMBER:
786     break;
787   case BPFISD::RET_GLUE:
788     return "BPFISD::RET_GLUE";
789   case BPFISD::CALL:
790     return "BPFISD::CALL";
791   case BPFISD::SELECT_CC:
792     return "BPFISD::SELECT_CC";
793   case BPFISD::BR_CC:
794     return "BPFISD::BR_CC";
795   case BPFISD::Wrapper:
796     return "BPFISD::Wrapper";
797   case BPFISD::MEMCPY:
798     return "BPFISD::MEMCPY";
799   }
800   return nullptr;
801 }
802 
getTargetNode(GlobalAddressSDNode * N,const SDLoc & DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)803 static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty,
804                              SelectionDAG &DAG, unsigned Flags) {
805   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
806 }
807 
getTargetNode(ConstantPoolSDNode * N,const SDLoc & DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)808 static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
809                              SelectionDAG &DAG, unsigned Flags) {
810   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
811                                    N->getOffset(), Flags);
812 }
813 
814 template <class NodeTy>
getAddr(NodeTy * N,SelectionDAG & DAG,unsigned Flags) const815 SDValue BPFTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
816                                    unsigned Flags) const {
817   SDLoc DL(N);
818 
819   SDValue GA = getTargetNode(N, DL, MVT::i64, DAG, Flags);
820 
821   return DAG.getNode(BPFISD::Wrapper, DL, MVT::i64, GA);
822 }
823 
LowerGlobalAddress(SDValue Op,SelectionDAG & DAG) const824 SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op,
825                                               SelectionDAG &DAG) const {
826   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
827   if (N->getOffset() != 0)
828     report_fatal_error("invalid offset for global address: " +
829                        Twine(N->getOffset()));
830   return getAddr(N, DAG);
831 }
832 
LowerConstantPool(SDValue Op,SelectionDAG & DAG) const833 SDValue BPFTargetLowering::LowerConstantPool(SDValue Op,
834                                              SelectionDAG &DAG) const {
835   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
836 
837   return getAddr(N, DAG);
838 }
839 
840 unsigned
EmitSubregExt(MachineInstr & MI,MachineBasicBlock * BB,unsigned Reg,bool isSigned) const841 BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
842                                  unsigned Reg, bool isSigned) const {
843   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
844   const TargetRegisterClass *RC = getRegClassFor(MVT::i64);
845   int RShiftOp = isSigned ? BPF::SRA_ri : BPF::SRL_ri;
846   MachineFunction *F = BB->getParent();
847   DebugLoc DL = MI.getDebugLoc();
848 
849   MachineRegisterInfo &RegInfo = F->getRegInfo();
850 
851   if (!isSigned) {
852     Register PromotedReg0 = RegInfo.createVirtualRegister(RC);
853     BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);
854     return PromotedReg0;
855   }
856   Register PromotedReg0 = RegInfo.createVirtualRegister(RC);
857   Register PromotedReg1 = RegInfo.createVirtualRegister(RC);
858   Register PromotedReg2 = RegInfo.createVirtualRegister(RC);
859   if (HasMovsx) {
860     BuildMI(BB, DL, TII.get(BPF::MOVSX_rr_32), PromotedReg0).addReg(Reg);
861   } else {
862     BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);
863     BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1)
864       .addReg(PromotedReg0).addImm(32);
865     BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2)
866       .addReg(PromotedReg1).addImm(32);
867   }
868 
869   return PromotedReg2;
870 }
871 
872 MachineBasicBlock *
EmitInstrWithCustomInserterMemcpy(MachineInstr & MI,MachineBasicBlock * BB) const873 BPFTargetLowering::EmitInstrWithCustomInserterMemcpy(MachineInstr &MI,
874                                                      MachineBasicBlock *BB)
875                                                      const {
876   MachineFunction *MF = MI.getParent()->getParent();
877   MachineRegisterInfo &MRI = MF->getRegInfo();
878   MachineInstrBuilder MIB(*MF, MI);
879   unsigned ScratchReg;
880 
881   // This function does custom insertion during lowering BPFISD::MEMCPY which
882   // only has two register operands from memcpy semantics, the copy source
883   // address and the copy destination address.
884   //
885   // Because we will expand BPFISD::MEMCPY into load/store pairs, we will need
886   // a third scratch register to serve as the destination register of load and
887   // source register of store.
888   //
889   // The scratch register here is with the Define | Dead | EarlyClobber flags.
890   // The EarlyClobber flag has the semantic property that the operand it is
891   // attached to is clobbered before the rest of the inputs are read. Hence it
892   // must be unique among the operands to the instruction. The Define flag is
893   // needed to coerce the machine verifier that an Undef value isn't a problem
894   // as we anyway is loading memory into it. The Dead flag is needed as the
895   // value in scratch isn't supposed to be used by any other instruction.
896   ScratchReg = MRI.createVirtualRegister(&BPF::GPRRegClass);
897   MIB.addReg(ScratchReg,
898              RegState::Define | RegState::Dead | RegState::EarlyClobber);
899 
900   return BB;
901 }
902 
903 MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr & MI,MachineBasicBlock * BB) const904 BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
905                                                MachineBasicBlock *BB) const {
906   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
907   DebugLoc DL = MI.getDebugLoc();
908   unsigned Opc = MI.getOpcode();
909   bool isSelectRROp = (Opc == BPF::Select ||
910                        Opc == BPF::Select_64_32 ||
911                        Opc == BPF::Select_32 ||
912                        Opc == BPF::Select_32_64);
913 
914   bool isMemcpyOp = Opc == BPF::MEMCPY;
915 
916 #ifndef NDEBUG
917   bool isSelectRIOp = (Opc == BPF::Select_Ri ||
918                        Opc == BPF::Select_Ri_64_32 ||
919                        Opc == BPF::Select_Ri_32 ||
920                        Opc == BPF::Select_Ri_32_64);
921 
922   if (!(isSelectRROp || isSelectRIOp || isMemcpyOp))
923     report_fatal_error("unhandled instruction type: " + Twine(Opc));
924 #endif
925 
926   if (isMemcpyOp)
927     return EmitInstrWithCustomInserterMemcpy(MI, BB);
928 
929   bool is32BitCmp = (Opc == BPF::Select_32 ||
930                      Opc == BPF::Select_32_64 ||
931                      Opc == BPF::Select_Ri_32 ||
932                      Opc == BPF::Select_Ri_32_64);
933 
934   // To "insert" a SELECT instruction, we actually have to insert the diamond
935   // control-flow pattern.  The incoming instruction knows the destination vreg
936   // to set, the condition code register to branch on, the true/false values to
937   // select between, and a branch opcode to use.
938   const BasicBlock *LLVM_BB = BB->getBasicBlock();
939   MachineFunction::iterator I = ++BB->getIterator();
940 
941   // ThisMBB:
942   // ...
943   //  TrueVal = ...
944   //  jmp_XX r1, r2 goto Copy1MBB
945   //  fallthrough --> Copy0MBB
946   MachineBasicBlock *ThisMBB = BB;
947   MachineFunction *F = BB->getParent();
948   MachineBasicBlock *Copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
949   MachineBasicBlock *Copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
950 
951   F->insert(I, Copy0MBB);
952   F->insert(I, Copy1MBB);
953   // Update machine-CFG edges by transferring all successors of the current
954   // block to the new block which will contain the Phi node for the select.
955   Copy1MBB->splice(Copy1MBB->begin(), BB,
956                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
957   Copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
958   // Next, add the true and fallthrough blocks as its successors.
959   BB->addSuccessor(Copy0MBB);
960   BB->addSuccessor(Copy1MBB);
961 
962   // Insert Branch if Flag
963   int CC = MI.getOperand(3).getImm();
964   int NewCC;
965   switch (CC) {
966 #define SET_NEWCC(X, Y) \
967   case ISD::X: \
968     if (is32BitCmp && HasJmp32) \
969       NewCC = isSelectRROp ? BPF::Y##_rr_32 : BPF::Y##_ri_32; \
970     else \
971       NewCC = isSelectRROp ? BPF::Y##_rr : BPF::Y##_ri; \
972     break
973   SET_NEWCC(SETGT, JSGT);
974   SET_NEWCC(SETUGT, JUGT);
975   SET_NEWCC(SETGE, JSGE);
976   SET_NEWCC(SETUGE, JUGE);
977   SET_NEWCC(SETEQ, JEQ);
978   SET_NEWCC(SETNE, JNE);
979   SET_NEWCC(SETLT, JSLT);
980   SET_NEWCC(SETULT, JULT);
981   SET_NEWCC(SETLE, JSLE);
982   SET_NEWCC(SETULE, JULE);
983   default:
984     report_fatal_error("unimplemented select CondCode " + Twine(CC));
985   }
986 
987   Register LHS = MI.getOperand(1).getReg();
988   bool isSignedCmp = (CC == ISD::SETGT ||
989                       CC == ISD::SETGE ||
990                       CC == ISD::SETLT ||
991                       CC == ISD::SETLE);
992 
993   // eBPF at the moment only has 64-bit comparison. Any 32-bit comparison need
994   // to be promoted, however if the 32-bit comparison operands are destination
995   // registers then they are implicitly zero-extended already, there is no
996   // need of explicit zero-extend sequence for them.
997   //
998   // We simply do extension for all situations in this method, but we will
999   // try to remove those unnecessary in BPFMIPeephole pass.
1000   if (is32BitCmp && !HasJmp32)
1001     LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp);
1002 
1003   if (isSelectRROp) {
1004     Register RHS = MI.getOperand(2).getReg();
1005 
1006     if (is32BitCmp && !HasJmp32)
1007       RHS = EmitSubregExt(MI, BB, RHS, isSignedCmp);
1008 
1009     BuildMI(BB, DL, TII.get(NewCC)).addReg(LHS).addReg(RHS).addMBB(Copy1MBB);
1010   } else {
1011     int64_t imm32 = MI.getOperand(2).getImm();
1012     // Check before we build J*_ri instruction.
1013     if (!isInt<32>(imm32))
1014       report_fatal_error("immediate overflows 32 bits: " + Twine(imm32));
1015     BuildMI(BB, DL, TII.get(NewCC))
1016         .addReg(LHS).addImm(imm32).addMBB(Copy1MBB);
1017   }
1018 
1019   // Copy0MBB:
1020   //  %FalseValue = ...
1021   //  # fallthrough to Copy1MBB
1022   BB = Copy0MBB;
1023 
1024   // Update machine-CFG edges
1025   BB->addSuccessor(Copy1MBB);
1026 
1027   // Copy1MBB:
1028   //  %Result = phi [ %FalseValue, Copy0MBB ], [ %TrueValue, ThisMBB ]
1029   // ...
1030   BB = Copy1MBB;
1031   BuildMI(*BB, BB->begin(), DL, TII.get(BPF::PHI), MI.getOperand(0).getReg())
1032       .addReg(MI.getOperand(5).getReg())
1033       .addMBB(Copy0MBB)
1034       .addReg(MI.getOperand(4).getReg())
1035       .addMBB(ThisMBB);
1036 
1037   MI.eraseFromParent(); // The pseudo instruction is gone now.
1038   return BB;
1039 }
1040 
getSetCCResultType(const DataLayout &,LLVMContext &,EVT VT) const1041 EVT BPFTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
1042                                           EVT VT) const {
1043   return getHasAlu32() ? MVT::i32 : MVT::i64;
1044 }
1045 
getScalarShiftAmountTy(const DataLayout & DL,EVT VT) const1046 MVT BPFTargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1047                                               EVT VT) const {
1048   return (getHasAlu32() && VT == MVT::i32) ? MVT::i32 : MVT::i64;
1049 }
1050 
isLegalAddressingMode(const DataLayout & DL,const AddrMode & AM,Type * Ty,unsigned AS,Instruction * I) const1051 bool BPFTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1052                                               const AddrMode &AM, Type *Ty,
1053                                               unsigned AS,
1054                                               Instruction *I) const {
1055   // No global is ever allowed as a base.
1056   if (AM.BaseGV)
1057     return false;
1058 
1059   switch (AM.Scale) {
1060   case 0: // "r+i" or just "i", depending on HasBaseReg.
1061     break;
1062   case 1:
1063     if (!AM.HasBaseReg) // allow "r+i".
1064       break;
1065     return false; // disallow "r+r" or "r+r+i".
1066   default:
1067     return false;
1068   }
1069 
1070   return true;
1071 }
1072