1 //===-- BPFISelLowering.cpp - BPF DAG Lowering Implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that BPF uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "BPFISelLowering.h" 15 #include "BPF.h" 16 #include "BPFSubtarget.h" 17 #include "BPFTargetMachine.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 24 #include "llvm/CodeGen/ValueTypes.h" 25 #include "llvm/IR/DiagnosticInfo.h" 26 #include "llvm/IR/DiagnosticPrinter.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include "llvm/Support/MathExtras.h" 30 #include "llvm/Support/raw_ostream.h" 31 32 using namespace llvm; 33 34 #define DEBUG_TYPE "bpf-lower" 35 36 static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order", 37 cl::Hidden, cl::init(false), 38 cl::desc("Expand memcpy into load/store pairs in order")); 39 40 static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, 41 SDValue Val = {}) { 42 std::string Str; 43 if (Val) { 44 raw_string_ostream OS(Str); 45 Val->print(OS); 46 OS << ' '; 47 } 48 MachineFunction &MF = DAG.getMachineFunction(); 49 DAG.getContext()->diagnose(DiagnosticInfoUnsupported( 50 MF.getFunction(), Twine(Str).concat(Msg), DL.getDebugLoc())); 51 } 52 53 BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, 54 const BPFSubtarget &STI) 55 : TargetLowering(TM) { 56 57 // Set up the register classes. 58 addRegisterClass(MVT::i64, &BPF::GPRRegClass); 59 if (STI.getHasAlu32()) 60 addRegisterClass(MVT::i32, &BPF::GPR32RegClass); 61 62 // Compute derived properties from the register classes 63 computeRegisterProperties(STI.getRegisterInfo()); 64 65 setStackPointerRegisterToSaveRestore(BPF::R11); 66 67 setOperationAction(ISD::BR_CC, MVT::i64, Custom); 68 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 69 setOperationAction(ISD::BRIND, MVT::Other, Expand); 70 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 71 72 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 73 74 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); 75 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 76 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 77 78 // Set unsupported atomic operations as Custom so 79 // we can emit better error messages than fatal error 80 // from selectiondag. 81 for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) { 82 if (VT == MVT::i32) { 83 if (STI.getHasAlu32()) 84 continue; 85 } else { 86 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom); 87 } 88 89 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom); 90 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom); 91 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom); 92 setOperationAction(ISD::ATOMIC_SWAP, VT, Custom); 93 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom); 94 } 95 96 for (auto VT : { MVT::i32, MVT::i64 }) { 97 if (VT == MVT::i32 && !STI.getHasAlu32()) 98 continue; 99 100 setOperationAction(ISD::SDIVREM, VT, Expand); 101 setOperationAction(ISD::UDIVREM, VT, Expand); 102 if (!STI.hasSdivSmod()) { 103 setOperationAction(ISD::SDIV, VT, Custom); 104 setOperationAction(ISD::SREM, VT, Custom); 105 } 106 setOperationAction(ISD::MULHU, VT, Expand); 107 setOperationAction(ISD::MULHS, VT, Expand); 108 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 109 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 110 setOperationAction(ISD::ROTR, VT, Expand); 111 setOperationAction(ISD::ROTL, VT, Expand); 112 setOperationAction(ISD::SHL_PARTS, VT, Expand); 113 setOperationAction(ISD::SRL_PARTS, VT, Expand); 114 setOperationAction(ISD::SRA_PARTS, VT, Expand); 115 setOperationAction(ISD::CTPOP, VT, Expand); 116 117 setOperationAction(ISD::SETCC, VT, Expand); 118 setOperationAction(ISD::SELECT, VT, Expand); 119 setOperationAction(ISD::SELECT_CC, VT, Custom); 120 } 121 122 if (STI.getHasAlu32()) { 123 setOperationAction(ISD::BSWAP, MVT::i32, Promote); 124 setOperationAction(ISD::BR_CC, MVT::i32, 125 STI.getHasJmp32() ? Custom : Promote); 126 } 127 128 setOperationAction(ISD::CTTZ, MVT::i64, Custom); 129 setOperationAction(ISD::CTLZ, MVT::i64, Custom); 130 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom); 131 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); 132 133 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 134 if (!STI.hasMovsx()) { 135 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 136 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 137 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); 138 } 139 140 // Extended load operations for i1 types must be promoted 141 for (MVT VT : MVT::integer_valuetypes()) { 142 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); 143 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 144 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 145 146 if (!STI.hasLdsx()) { 147 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); 148 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand); 149 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); 150 } 151 } 152 153 setBooleanContents(ZeroOrOneBooleanContent); 154 setMaxAtomicSizeInBitsSupported(64); 155 156 // Function alignments 157 setMinFunctionAlignment(Align(8)); 158 setPrefFunctionAlignment(Align(8)); 159 160 if (BPFExpandMemcpyInOrder) { 161 // LLVM generic code will try to expand memcpy into load/store pairs at this 162 // stage which is before quite a few IR optimization passes, therefore the 163 // loads and stores could potentially be moved apart from each other which 164 // will cause trouble to memcpy pattern matcher inside kernel eBPF JIT 165 // compilers. 166 // 167 // When -bpf-expand-memcpy-in-order specified, we want to defer the expand 168 // of memcpy to later stage in IR optimization pipeline so those load/store 169 // pairs won't be touched and could be kept in order. Hence, we set 170 // MaxStoresPerMem* to zero to disable the generic getMemcpyLoadsAndStores 171 // code path, and ask LLVM to use target expander EmitTargetCodeForMemcpy. 172 MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0; 173 MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0; 174 MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0; 175 MaxLoadsPerMemcmp = 0; 176 } else { 177 // inline memcpy() for kernel to see explicit copy 178 unsigned CommonMaxStores = 179 STI.getSelectionDAGInfo()->getCommonMaxStoresPerMemFunc(); 180 181 MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores; 182 MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores; 183 MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores; 184 MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = CommonMaxStores; 185 } 186 187 // CPU/Feature control 188 HasAlu32 = STI.getHasAlu32(); 189 HasJmp32 = STI.getHasJmp32(); 190 HasJmpExt = STI.getHasJmpExt(); 191 HasMovsx = STI.hasMovsx(); 192 } 193 194 bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 195 return false; 196 } 197 198 bool BPFTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { 199 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 200 return false; 201 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 202 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 203 return NumBits1 > NumBits2; 204 } 205 206 bool BPFTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { 207 if (!VT1.isInteger() || !VT2.isInteger()) 208 return false; 209 unsigned NumBits1 = VT1.getSizeInBits(); 210 unsigned NumBits2 = VT2.getSizeInBits(); 211 return NumBits1 > NumBits2; 212 } 213 214 bool BPFTargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { 215 if (!getHasAlu32() || !Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 216 return false; 217 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 218 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 219 return NumBits1 == 32 && NumBits2 == 64; 220 } 221 222 bool BPFTargetLowering::isZExtFree(EVT VT1, EVT VT2) const { 223 if (!getHasAlu32() || !VT1.isInteger() || !VT2.isInteger()) 224 return false; 225 unsigned NumBits1 = VT1.getSizeInBits(); 226 unsigned NumBits2 = VT2.getSizeInBits(); 227 return NumBits1 == 32 && NumBits2 == 64; 228 } 229 230 bool BPFTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 231 EVT VT1 = Val.getValueType(); 232 if (Val.getOpcode() == ISD::LOAD && VT1.isSimple() && VT2.isSimple()) { 233 MVT MT1 = VT1.getSimpleVT().SimpleTy; 234 MVT MT2 = VT2.getSimpleVT().SimpleTy; 235 if ((MT1 == MVT::i8 || MT1 == MVT::i16 || MT1 == MVT::i32) && 236 (MT2 == MVT::i32 || MT2 == MVT::i64)) 237 return true; 238 } 239 return TargetLoweringBase::isZExtFree(Val, VT2); 240 } 241 242 BPFTargetLowering::ConstraintType 243 BPFTargetLowering::getConstraintType(StringRef Constraint) const { 244 if (Constraint.size() == 1) { 245 switch (Constraint[0]) { 246 default: 247 break; 248 case 'w': 249 return C_RegisterClass; 250 } 251 } 252 253 return TargetLowering::getConstraintType(Constraint); 254 } 255 256 std::pair<unsigned, const TargetRegisterClass *> 257 BPFTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 258 StringRef Constraint, 259 MVT VT) const { 260 if (Constraint.size() == 1) { 261 // GCC Constraint Letters 262 switch (Constraint[0]) { 263 case 'r': // GENERAL_REGS 264 return std::make_pair(0U, &BPF::GPRRegClass); 265 case 'w': 266 if (HasAlu32) 267 return std::make_pair(0U, &BPF::GPR32RegClass); 268 break; 269 default: 270 break; 271 } 272 } 273 274 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 275 } 276 277 void BPFTargetLowering::ReplaceNodeResults( 278 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 279 const char *Msg; 280 uint32_t Opcode = N->getOpcode(); 281 switch (Opcode) { 282 default: 283 report_fatal_error("unhandled custom legalization: " + Twine(Opcode)); 284 case ISD::ATOMIC_LOAD_ADD: 285 case ISD::ATOMIC_LOAD_AND: 286 case ISD::ATOMIC_LOAD_OR: 287 case ISD::ATOMIC_LOAD_XOR: 288 case ISD::ATOMIC_SWAP: 289 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: 290 if (HasAlu32 || Opcode == ISD::ATOMIC_LOAD_ADD) 291 Msg = "unsupported atomic operation, please use 32/64 bit version"; 292 else 293 Msg = "unsupported atomic operation, please use 64 bit version"; 294 break; 295 } 296 297 SDLoc DL(N); 298 // We'll still produce a fatal error downstream, but this diagnostic is more 299 // user-friendly. 300 fail(DL, DAG, Msg); 301 } 302 303 SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 304 switch (Op.getOpcode()) { 305 default: 306 report_fatal_error("unimplemented opcode: " + Twine(Op.getOpcode())); 307 case ISD::BR_CC: 308 return LowerBR_CC(Op, DAG); 309 case ISD::GlobalAddress: 310 return LowerGlobalAddress(Op, DAG); 311 case ISD::SELECT_CC: 312 return LowerSELECT_CC(Op, DAG); 313 case ISD::SDIV: 314 case ISD::SREM: 315 return LowerSDIVSREM(Op, DAG); 316 case ISD::DYNAMIC_STACKALLOC: 317 return LowerDYNAMIC_STACKALLOC(Op, DAG); 318 } 319 } 320 321 // Calling Convention Implementation 322 #include "BPFGenCallingConv.inc" 323 324 SDValue BPFTargetLowering::LowerFormalArguments( 325 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 326 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 327 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 328 switch (CallConv) { 329 default: 330 report_fatal_error("unimplemented calling convention: " + Twine(CallConv)); 331 case CallingConv::C: 332 case CallingConv::Fast: 333 break; 334 } 335 336 MachineFunction &MF = DAG.getMachineFunction(); 337 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 338 339 // Assign locations to all of the incoming arguments. 340 SmallVector<CCValAssign, 16> ArgLocs; 341 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 342 CCInfo.AnalyzeFormalArguments(Ins, getHasAlu32() ? CC_BPF32 : CC_BPF64); 343 344 bool HasMemArgs = false; 345 for (size_t I = 0; I < ArgLocs.size(); ++I) { 346 auto &VA = ArgLocs[I]; 347 348 if (VA.isRegLoc()) { 349 // Arguments passed in registers 350 EVT RegVT = VA.getLocVT(); 351 MVT::SimpleValueType SimpleTy = RegVT.getSimpleVT().SimpleTy; 352 switch (SimpleTy) { 353 default: { 354 std::string Str; 355 { 356 raw_string_ostream OS(Str); 357 RegVT.print(OS); 358 } 359 report_fatal_error("unhandled argument type: " + Twine(Str)); 360 } 361 case MVT::i32: 362 case MVT::i64: 363 Register VReg = RegInfo.createVirtualRegister( 364 SimpleTy == MVT::i64 ? &BPF::GPRRegClass : &BPF::GPR32RegClass); 365 RegInfo.addLiveIn(VA.getLocReg(), VReg); 366 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT); 367 368 // If this is an value that has been promoted to wider types, insert an 369 // assert[sz]ext to capture this, then truncate to the right size. 370 if (VA.getLocInfo() == CCValAssign::SExt) 371 ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue, 372 DAG.getValueType(VA.getValVT())); 373 else if (VA.getLocInfo() == CCValAssign::ZExt) 374 ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue, 375 DAG.getValueType(VA.getValVT())); 376 377 if (VA.getLocInfo() != CCValAssign::Full) 378 ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue); 379 380 InVals.push_back(ArgValue); 381 382 break; 383 } 384 } else { 385 if (VA.isMemLoc()) 386 HasMemArgs = true; 387 else 388 report_fatal_error("unhandled argument location"); 389 InVals.push_back(DAG.getConstant(0, DL, VA.getLocVT())); 390 } 391 } 392 if (HasMemArgs) 393 fail(DL, DAG, "stack arguments are not supported"); 394 if (IsVarArg) 395 fail(DL, DAG, "variadic functions are not supported"); 396 if (MF.getFunction().hasStructRetAttr()) 397 fail(DL, DAG, "aggregate returns are not supported"); 398 399 return Chain; 400 } 401 402 const size_t BPFTargetLowering::MaxArgs = 5; 403 404 SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 405 SmallVectorImpl<SDValue> &InVals) const { 406 SelectionDAG &DAG = CLI.DAG; 407 auto &Outs = CLI.Outs; 408 auto &OutVals = CLI.OutVals; 409 auto &Ins = CLI.Ins; 410 SDValue Chain = CLI.Chain; 411 SDValue Callee = CLI.Callee; 412 bool &IsTailCall = CLI.IsTailCall; 413 CallingConv::ID CallConv = CLI.CallConv; 414 bool IsVarArg = CLI.IsVarArg; 415 MachineFunction &MF = DAG.getMachineFunction(); 416 417 // BPF target does not support tail call optimization. 418 IsTailCall = false; 419 420 switch (CallConv) { 421 default: 422 report_fatal_error("unsupported calling convention: " + Twine(CallConv)); 423 case CallingConv::Fast: 424 case CallingConv::C: 425 break; 426 } 427 428 // Analyze operands of the call, assigning locations to each operand. 429 SmallVector<CCValAssign, 16> ArgLocs; 430 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 431 432 CCInfo.AnalyzeCallOperands(Outs, getHasAlu32() ? CC_BPF32 : CC_BPF64); 433 434 unsigned NumBytes = CCInfo.getStackSize(); 435 436 if (Outs.size() > MaxArgs) 437 fail(CLI.DL, DAG, "too many arguments", Callee); 438 439 for (auto &Arg : Outs) { 440 ISD::ArgFlagsTy Flags = Arg.Flags; 441 if (!Flags.isByVal()) 442 continue; 443 fail(CLI.DL, DAG, "pass by value not supported", Callee); 444 break; 445 } 446 447 auto PtrVT = getPointerTy(MF.getDataLayout()); 448 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 449 450 SmallVector<std::pair<unsigned, SDValue>, MaxArgs> RegsToPass; 451 452 // Walk arg assignments 453 for (size_t i = 0; i < std::min(ArgLocs.size(), MaxArgs); ++i) { 454 CCValAssign &VA = ArgLocs[i]; 455 SDValue &Arg = OutVals[i]; 456 457 // Promote the value if needed. 458 switch (VA.getLocInfo()) { 459 default: 460 report_fatal_error("unhandled location info: " + Twine(VA.getLocInfo())); 461 case CCValAssign::Full: 462 break; 463 case CCValAssign::SExt: 464 Arg = DAG.getNode(ISD::SIGN_EXTEND, CLI.DL, VA.getLocVT(), Arg); 465 break; 466 case CCValAssign::ZExt: 467 Arg = DAG.getNode(ISD::ZERO_EXTEND, CLI.DL, VA.getLocVT(), Arg); 468 break; 469 case CCValAssign::AExt: 470 Arg = DAG.getNode(ISD::ANY_EXTEND, CLI.DL, VA.getLocVT(), Arg); 471 break; 472 } 473 474 // Push arguments into RegsToPass vector 475 if (VA.isRegLoc()) 476 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 477 else 478 report_fatal_error("stack arguments are not supported"); 479 } 480 481 SDValue InGlue; 482 483 // Build a sequence of copy-to-reg nodes chained together with token chain and 484 // flag operands which copy the outgoing args into registers. The InGlue in 485 // necessary since all emitted instructions must be stuck together. 486 for (auto &Reg : RegsToPass) { 487 Chain = DAG.getCopyToReg(Chain, CLI.DL, Reg.first, Reg.second, InGlue); 488 InGlue = Chain.getValue(1); 489 } 490 491 // If the callee is a GlobalAddress node (quite common, every direct call is) 492 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 493 // Likewise ExternalSymbol -> TargetExternalSymbol. 494 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 495 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT, 496 G->getOffset(), 0); 497 } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { 498 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0); 499 fail(CLI.DL, DAG, 500 Twine("A call to built-in function '" + StringRef(E->getSymbol()) + 501 "' is not supported.")); 502 } 503 504 // Returns a chain & a flag for retval copy to use. 505 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 506 SmallVector<SDValue, 8> Ops; 507 Ops.push_back(Chain); 508 Ops.push_back(Callee); 509 510 // Add argument registers to the end of the list so that they are 511 // known live into the call. 512 for (auto &Reg : RegsToPass) 513 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 514 515 if (InGlue.getNode()) 516 Ops.push_back(InGlue); 517 518 Chain = DAG.getNode(BPFISD::CALL, CLI.DL, NodeTys, Ops); 519 InGlue = Chain.getValue(1); 520 521 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 522 523 // Create the CALLSEQ_END node. 524 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, CLI.DL); 525 InGlue = Chain.getValue(1); 526 527 // Handle result values, copying them out of physregs into vregs that we 528 // return. 529 return LowerCallResult(Chain, InGlue, CallConv, IsVarArg, Ins, CLI.DL, DAG, 530 InVals); 531 } 532 533 SDValue 534 BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 535 bool IsVarArg, 536 const SmallVectorImpl<ISD::OutputArg> &Outs, 537 const SmallVectorImpl<SDValue> &OutVals, 538 const SDLoc &DL, SelectionDAG &DAG) const { 539 unsigned Opc = BPFISD::RET_GLUE; 540 541 // CCValAssign - represent the assignment of the return value to a location 542 SmallVector<CCValAssign, 16> RVLocs; 543 MachineFunction &MF = DAG.getMachineFunction(); 544 545 // CCState - Info about the registers and stack slot. 546 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 547 548 if (MF.getFunction().getReturnType()->isAggregateType()) { 549 fail(DL, DAG, "aggregate returns are not supported"); 550 return DAG.getNode(Opc, DL, MVT::Other, Chain); 551 } 552 553 // Analize return values. 554 CCInfo.AnalyzeReturn(Outs, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64); 555 556 SDValue Glue; 557 SmallVector<SDValue, 4> RetOps(1, Chain); 558 559 // Copy the result values into the output registers. 560 for (size_t i = 0; i != RVLocs.size(); ++i) { 561 CCValAssign &VA = RVLocs[i]; 562 if (!VA.isRegLoc()) 563 report_fatal_error("stack return values are not supported"); 564 565 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Glue); 566 567 // Guarantee that all emitted copies are stuck together, 568 // avoiding something bad. 569 Glue = Chain.getValue(1); 570 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 571 } 572 573 RetOps[0] = Chain; // Update chain. 574 575 // Add the glue if we have it. 576 if (Glue.getNode()) 577 RetOps.push_back(Glue); 578 579 return DAG.getNode(Opc, DL, MVT::Other, RetOps); 580 } 581 582 SDValue BPFTargetLowering::LowerCallResult( 583 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool IsVarArg, 584 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 585 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 586 587 MachineFunction &MF = DAG.getMachineFunction(); 588 // Assign locations to each value returned by this call. 589 SmallVector<CCValAssign, 16> RVLocs; 590 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 591 592 if (Ins.size() > 1) { 593 fail(DL, DAG, "only small returns supported"); 594 for (auto &In : Ins) 595 InVals.push_back(DAG.getConstant(0, DL, In.VT)); 596 return DAG.getCopyFromReg(Chain, DL, 1, Ins[0].VT, InGlue).getValue(1); 597 } 598 599 CCInfo.AnalyzeCallResult(Ins, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64); 600 601 // Copy all of the result registers out of their specified physreg. 602 for (auto &Val : RVLocs) { 603 Chain = DAG.getCopyFromReg(Chain, DL, Val.getLocReg(), 604 Val.getValVT(), InGlue).getValue(1); 605 InGlue = Chain.getValue(2); 606 InVals.push_back(Chain.getValue(0)); 607 } 608 609 return Chain; 610 } 611 612 static void NegateCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 613 switch (CC) { 614 default: 615 break; 616 case ISD::SETULT: 617 case ISD::SETULE: 618 case ISD::SETLT: 619 case ISD::SETLE: 620 CC = ISD::getSetCCSwappedOperands(CC); 621 std::swap(LHS, RHS); 622 break; 623 } 624 } 625 626 SDValue BPFTargetLowering::LowerSDIVSREM(SDValue Op, SelectionDAG &DAG) const { 627 SDLoc DL(Op); 628 fail(DL, DAG, 629 "unsupported signed division, please convert to unsigned div/mod."); 630 return DAG.getUNDEF(Op->getValueType(0)); 631 } 632 633 SDValue BPFTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 634 SelectionDAG &DAG) const { 635 SDLoc DL(Op); 636 fail(DL, DAG, "unsupported dynamic stack allocation"); 637 auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)}; 638 return DAG.getMergeValues(Ops, SDLoc()); 639 } 640 641 SDValue BPFTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 642 SDValue Chain = Op.getOperand(0); 643 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 644 SDValue LHS = Op.getOperand(2); 645 SDValue RHS = Op.getOperand(3); 646 SDValue Dest = Op.getOperand(4); 647 SDLoc DL(Op); 648 649 if (!getHasJmpExt()) 650 NegateCC(LHS, RHS, CC); 651 652 return DAG.getNode(BPFISD::BR_CC, DL, Op.getValueType(), Chain, LHS, RHS, 653 DAG.getConstant(CC, DL, LHS.getValueType()), Dest); 654 } 655 656 SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 657 SDValue LHS = Op.getOperand(0); 658 SDValue RHS = Op.getOperand(1); 659 SDValue TrueV = Op.getOperand(2); 660 SDValue FalseV = Op.getOperand(3); 661 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 662 SDLoc DL(Op); 663 664 if (!getHasJmpExt()) 665 NegateCC(LHS, RHS, CC); 666 667 SDValue TargetCC = DAG.getConstant(CC, DL, LHS.getValueType()); 668 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 669 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 670 671 return DAG.getNode(BPFISD::SELECT_CC, DL, VTs, Ops); 672 } 673 674 const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const { 675 switch ((BPFISD::NodeType)Opcode) { 676 case BPFISD::FIRST_NUMBER: 677 break; 678 case BPFISD::RET_GLUE: 679 return "BPFISD::RET_GLUE"; 680 case BPFISD::CALL: 681 return "BPFISD::CALL"; 682 case BPFISD::SELECT_CC: 683 return "BPFISD::SELECT_CC"; 684 case BPFISD::BR_CC: 685 return "BPFISD::BR_CC"; 686 case BPFISD::Wrapper: 687 return "BPFISD::Wrapper"; 688 case BPFISD::MEMCPY: 689 return "BPFISD::MEMCPY"; 690 } 691 return nullptr; 692 } 693 694 SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op, 695 SelectionDAG &DAG) const { 696 auto *N = cast<GlobalAddressSDNode>(Op); 697 if (N->getOffset() != 0) 698 report_fatal_error("invalid offset for global address: " + 699 Twine(N->getOffset())); 700 701 SDLoc DL(Op); 702 const GlobalValue *GV = N->getGlobal(); 703 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i64); 704 705 return DAG.getNode(BPFISD::Wrapper, DL, MVT::i64, GA); 706 } 707 708 unsigned 709 BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, 710 unsigned Reg, bool isSigned) const { 711 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 712 const TargetRegisterClass *RC = getRegClassFor(MVT::i64); 713 int RShiftOp = isSigned ? BPF::SRA_ri : BPF::SRL_ri; 714 MachineFunction *F = BB->getParent(); 715 DebugLoc DL = MI.getDebugLoc(); 716 717 MachineRegisterInfo &RegInfo = F->getRegInfo(); 718 719 if (!isSigned) { 720 Register PromotedReg0 = RegInfo.createVirtualRegister(RC); 721 BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg); 722 return PromotedReg0; 723 } 724 Register PromotedReg0 = RegInfo.createVirtualRegister(RC); 725 Register PromotedReg1 = RegInfo.createVirtualRegister(RC); 726 Register PromotedReg2 = RegInfo.createVirtualRegister(RC); 727 if (HasMovsx) { 728 BuildMI(BB, DL, TII.get(BPF::MOVSX_rr_32), PromotedReg0).addReg(Reg); 729 } else { 730 BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg); 731 BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1) 732 .addReg(PromotedReg0).addImm(32); 733 BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2) 734 .addReg(PromotedReg1).addImm(32); 735 } 736 737 return PromotedReg2; 738 } 739 740 MachineBasicBlock * 741 BPFTargetLowering::EmitInstrWithCustomInserterMemcpy(MachineInstr &MI, 742 MachineBasicBlock *BB) 743 const { 744 MachineFunction *MF = MI.getParent()->getParent(); 745 MachineRegisterInfo &MRI = MF->getRegInfo(); 746 MachineInstrBuilder MIB(*MF, MI); 747 unsigned ScratchReg; 748 749 // This function does custom insertion during lowering BPFISD::MEMCPY which 750 // only has two register operands from memcpy semantics, the copy source 751 // address and the copy destination address. 752 // 753 // Because we will expand BPFISD::MEMCPY into load/store pairs, we will need 754 // a third scratch register to serve as the destination register of load and 755 // source register of store. 756 // 757 // The scratch register here is with the Define | Dead | EarlyClobber flags. 758 // The EarlyClobber flag has the semantic property that the operand it is 759 // attached to is clobbered before the rest of the inputs are read. Hence it 760 // must be unique among the operands to the instruction. The Define flag is 761 // needed to coerce the machine verifier that an Undef value isn't a problem 762 // as we anyway is loading memory into it. The Dead flag is needed as the 763 // value in scratch isn't supposed to be used by any other instruction. 764 ScratchReg = MRI.createVirtualRegister(&BPF::GPRRegClass); 765 MIB.addReg(ScratchReg, 766 RegState::Define | RegState::Dead | RegState::EarlyClobber); 767 768 return BB; 769 } 770 771 MachineBasicBlock * 772 BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 773 MachineBasicBlock *BB) const { 774 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 775 DebugLoc DL = MI.getDebugLoc(); 776 unsigned Opc = MI.getOpcode(); 777 bool isSelectRROp = (Opc == BPF::Select || 778 Opc == BPF::Select_64_32 || 779 Opc == BPF::Select_32 || 780 Opc == BPF::Select_32_64); 781 782 bool isMemcpyOp = Opc == BPF::MEMCPY; 783 784 #ifndef NDEBUG 785 bool isSelectRIOp = (Opc == BPF::Select_Ri || 786 Opc == BPF::Select_Ri_64_32 || 787 Opc == BPF::Select_Ri_32 || 788 Opc == BPF::Select_Ri_32_64); 789 790 if (!(isSelectRROp || isSelectRIOp || isMemcpyOp)) 791 report_fatal_error("unhandled instruction type: " + Twine(Opc)); 792 #endif 793 794 if (isMemcpyOp) 795 return EmitInstrWithCustomInserterMemcpy(MI, BB); 796 797 bool is32BitCmp = (Opc == BPF::Select_32 || 798 Opc == BPF::Select_32_64 || 799 Opc == BPF::Select_Ri_32 || 800 Opc == BPF::Select_Ri_32_64); 801 802 // To "insert" a SELECT instruction, we actually have to insert the diamond 803 // control-flow pattern. The incoming instruction knows the destination vreg 804 // to set, the condition code register to branch on, the true/false values to 805 // select between, and a branch opcode to use. 806 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 807 MachineFunction::iterator I = ++BB->getIterator(); 808 809 // ThisMBB: 810 // ... 811 // TrueVal = ... 812 // jmp_XX r1, r2 goto Copy1MBB 813 // fallthrough --> Copy0MBB 814 MachineBasicBlock *ThisMBB = BB; 815 MachineFunction *F = BB->getParent(); 816 MachineBasicBlock *Copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 817 MachineBasicBlock *Copy1MBB = F->CreateMachineBasicBlock(LLVM_BB); 818 819 F->insert(I, Copy0MBB); 820 F->insert(I, Copy1MBB); 821 // Update machine-CFG edges by transferring all successors of the current 822 // block to the new block which will contain the Phi node for the select. 823 Copy1MBB->splice(Copy1MBB->begin(), BB, 824 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 825 Copy1MBB->transferSuccessorsAndUpdatePHIs(BB); 826 // Next, add the true and fallthrough blocks as its successors. 827 BB->addSuccessor(Copy0MBB); 828 BB->addSuccessor(Copy1MBB); 829 830 // Insert Branch if Flag 831 int CC = MI.getOperand(3).getImm(); 832 int NewCC; 833 switch (CC) { 834 #define SET_NEWCC(X, Y) \ 835 case ISD::X: \ 836 if (is32BitCmp && HasJmp32) \ 837 NewCC = isSelectRROp ? BPF::Y##_rr_32 : BPF::Y##_ri_32; \ 838 else \ 839 NewCC = isSelectRROp ? BPF::Y##_rr : BPF::Y##_ri; \ 840 break 841 SET_NEWCC(SETGT, JSGT); 842 SET_NEWCC(SETUGT, JUGT); 843 SET_NEWCC(SETGE, JSGE); 844 SET_NEWCC(SETUGE, JUGE); 845 SET_NEWCC(SETEQ, JEQ); 846 SET_NEWCC(SETNE, JNE); 847 SET_NEWCC(SETLT, JSLT); 848 SET_NEWCC(SETULT, JULT); 849 SET_NEWCC(SETLE, JSLE); 850 SET_NEWCC(SETULE, JULE); 851 default: 852 report_fatal_error("unimplemented select CondCode " + Twine(CC)); 853 } 854 855 Register LHS = MI.getOperand(1).getReg(); 856 bool isSignedCmp = (CC == ISD::SETGT || 857 CC == ISD::SETGE || 858 CC == ISD::SETLT || 859 CC == ISD::SETLE); 860 861 // eBPF at the moment only has 64-bit comparison. Any 32-bit comparison need 862 // to be promoted, however if the 32-bit comparison operands are destination 863 // registers then they are implicitly zero-extended already, there is no 864 // need of explicit zero-extend sequence for them. 865 // 866 // We simply do extension for all situations in this method, but we will 867 // try to remove those unnecessary in BPFMIPeephole pass. 868 if (is32BitCmp && !HasJmp32) 869 LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp); 870 871 if (isSelectRROp) { 872 Register RHS = MI.getOperand(2).getReg(); 873 874 if (is32BitCmp && !HasJmp32) 875 RHS = EmitSubregExt(MI, BB, RHS, isSignedCmp); 876 877 BuildMI(BB, DL, TII.get(NewCC)).addReg(LHS).addReg(RHS).addMBB(Copy1MBB); 878 } else { 879 int64_t imm32 = MI.getOperand(2).getImm(); 880 // Check before we build J*_ri instruction. 881 if (!isInt<32>(imm32)) 882 report_fatal_error("immediate overflows 32 bits: " + Twine(imm32)); 883 BuildMI(BB, DL, TII.get(NewCC)) 884 .addReg(LHS).addImm(imm32).addMBB(Copy1MBB); 885 } 886 887 // Copy0MBB: 888 // %FalseValue = ... 889 // # fallthrough to Copy1MBB 890 BB = Copy0MBB; 891 892 // Update machine-CFG edges 893 BB->addSuccessor(Copy1MBB); 894 895 // Copy1MBB: 896 // %Result = phi [ %FalseValue, Copy0MBB ], [ %TrueValue, ThisMBB ] 897 // ... 898 BB = Copy1MBB; 899 BuildMI(*BB, BB->begin(), DL, TII.get(BPF::PHI), MI.getOperand(0).getReg()) 900 .addReg(MI.getOperand(5).getReg()) 901 .addMBB(Copy0MBB) 902 .addReg(MI.getOperand(4).getReg()) 903 .addMBB(ThisMBB); 904 905 MI.eraseFromParent(); // The pseudo instruction is gone now. 906 return BB; 907 } 908 909 EVT BPFTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, 910 EVT VT) const { 911 return getHasAlu32() ? MVT::i32 : MVT::i64; 912 } 913 914 MVT BPFTargetLowering::getScalarShiftAmountTy(const DataLayout &DL, 915 EVT VT) const { 916 return (getHasAlu32() && VT == MVT::i32) ? MVT::i32 : MVT::i64; 917 } 918 919 bool BPFTargetLowering::isLegalAddressingMode(const DataLayout &DL, 920 const AddrMode &AM, Type *Ty, 921 unsigned AS, 922 Instruction *I) const { 923 // No global is ever allowed as a base. 924 if (AM.BaseGV) 925 return false; 926 927 switch (AM.Scale) { 928 case 0: // "r+i" or just "i", depending on HasBaseReg. 929 break; 930 case 1: 931 if (!AM.HasBaseReg) // allow "r+i". 932 break; 933 return false; // disallow "r+r" or "r+r+i". 934 default: 935 return false; 936 } 937 938 return true; 939 } 940