1 //===-- BPFISelLowering.cpp - BPF DAG Lowering Implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that BPF uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "BPFISelLowering.h" 15 #include "BPF.h" 16 #include "BPFSubtarget.h" 17 #include "BPFTargetMachine.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 24 #include "llvm/CodeGen/ValueTypes.h" 25 #include "llvm/IR/DiagnosticInfo.h" 26 #include "llvm/IR/DiagnosticPrinter.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include "llvm/Support/MathExtras.h" 30 #include "llvm/Support/raw_ostream.h" 31 32 using namespace llvm; 33 34 #define DEBUG_TYPE "bpf-lower" 35 36 static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order", 37 cl::Hidden, cl::init(false), 38 cl::desc("Expand memcpy into load/store pairs in order")); 39 40 static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, 41 SDValue Val = {}) { 42 std::string Str; 43 if (Val) { 44 raw_string_ostream OS(Str); 45 Val->print(OS); 46 OS << ' '; 47 } 48 MachineFunction &MF = DAG.getMachineFunction(); 49 DAG.getContext()->diagnose(DiagnosticInfoUnsupported( 50 MF.getFunction(), Twine(Str).concat(Msg), DL.getDebugLoc())); 51 } 52 53 BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, 54 const BPFSubtarget &STI) 55 : TargetLowering(TM) { 56 57 // Set up the register classes. 58 addRegisterClass(MVT::i64, &BPF::GPRRegClass); 59 if (STI.getHasAlu32()) 60 addRegisterClass(MVT::i32, &BPF::GPR32RegClass); 61 62 // Compute derived properties from the register classes 63 computeRegisterProperties(STI.getRegisterInfo()); 64 65 setStackPointerRegisterToSaveRestore(BPF::R11); 66 67 setOperationAction(ISD::BR_CC, MVT::i64, Custom); 68 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 69 setOperationAction(ISD::BRIND, MVT::Other, Expand); 70 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 71 72 setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, MVT::i64, Custom); 73 74 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); 75 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 76 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 77 78 // Set unsupported atomic operations as Custom so 79 // we can emit better error messages than fatal error 80 // from selectiondag. 81 for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) { 82 if (VT == MVT::i32) { 83 if (STI.getHasAlu32()) 84 continue; 85 } else { 86 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom); 87 } 88 89 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom); 90 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom); 91 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom); 92 setOperationAction(ISD::ATOMIC_SWAP, VT, Custom); 93 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom); 94 } 95 96 for (auto VT : { MVT::i32, MVT::i64 }) { 97 if (VT == MVT::i32 && !STI.getHasAlu32()) 98 continue; 99 100 setOperationAction(ISD::SDIVREM, VT, Expand); 101 setOperationAction(ISD::UDIVREM, VT, Expand); 102 if (!STI.hasSdivSmod()) { 103 setOperationAction(ISD::SDIV, VT, Custom); 104 setOperationAction(ISD::SREM, VT, Custom); 105 } 106 setOperationAction(ISD::MULHU, VT, Expand); 107 setOperationAction(ISD::MULHS, VT, Expand); 108 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 109 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 110 setOperationAction(ISD::ROTR, VT, Expand); 111 setOperationAction(ISD::ROTL, VT, Expand); 112 setOperationAction(ISD::SHL_PARTS, VT, Expand); 113 setOperationAction(ISD::SRL_PARTS, VT, Expand); 114 setOperationAction(ISD::SRA_PARTS, VT, Expand); 115 setOperationAction(ISD::CTPOP, VT, Expand); 116 setOperationAction(ISD::CTTZ, VT, Expand); 117 setOperationAction(ISD::CTLZ, VT, Expand); 118 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); 119 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); 120 121 setOperationAction(ISD::SETCC, VT, Expand); 122 setOperationAction(ISD::SELECT, VT, Expand); 123 setOperationAction(ISD::SELECT_CC, VT, Custom); 124 } 125 126 if (STI.getHasAlu32()) { 127 setOperationAction(ISD::BSWAP, MVT::i32, Promote); 128 setOperationAction(ISD::BR_CC, MVT::i32, 129 STI.getHasJmp32() ? Custom : Promote); 130 } 131 132 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 133 if (!STI.hasMovsx()) { 134 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 135 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 136 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); 137 } 138 139 // Extended load operations for i1 types must be promoted 140 for (MVT VT : MVT::integer_valuetypes()) { 141 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); 142 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 143 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 144 145 if (!STI.hasLdsx()) { 146 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); 147 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand); 148 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); 149 } 150 } 151 152 setBooleanContents(ZeroOrOneBooleanContent); 153 setMaxAtomicSizeInBitsSupported(64); 154 155 // Function alignments 156 setMinFunctionAlignment(Align(8)); 157 setPrefFunctionAlignment(Align(8)); 158 159 if (BPFExpandMemcpyInOrder) { 160 // LLVM generic code will try to expand memcpy into load/store pairs at this 161 // stage which is before quite a few IR optimization passes, therefore the 162 // loads and stores could potentially be moved apart from each other which 163 // will cause trouble to memcpy pattern matcher inside kernel eBPF JIT 164 // compilers. 165 // 166 // When -bpf-expand-memcpy-in-order specified, we want to defer the expand 167 // of memcpy to later stage in IR optimization pipeline so those load/store 168 // pairs won't be touched and could be kept in order. Hence, we set 169 // MaxStoresPerMem* to zero to disable the generic getMemcpyLoadsAndStores 170 // code path, and ask LLVM to use target expander EmitTargetCodeForMemcpy. 171 MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0; 172 MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0; 173 MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0; 174 MaxLoadsPerMemcmp = 0; 175 } else { 176 // inline memcpy() for kernel to see explicit copy 177 unsigned CommonMaxStores = 178 STI.getSelectionDAGInfo()->getCommonMaxStoresPerMemFunc(); 179 180 MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores; 181 MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores; 182 MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores; 183 MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = CommonMaxStores; 184 } 185 186 // CPU/Feature control 187 HasAlu32 = STI.getHasAlu32(); 188 HasJmp32 = STI.getHasJmp32(); 189 HasJmpExt = STI.getHasJmpExt(); 190 HasMovsx = STI.hasMovsx(); 191 } 192 193 bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 194 return false; 195 } 196 197 bool BPFTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { 198 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 199 return false; 200 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 201 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 202 return NumBits1 > NumBits2; 203 } 204 205 bool BPFTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { 206 if (!VT1.isInteger() || !VT2.isInteger()) 207 return false; 208 unsigned NumBits1 = VT1.getSizeInBits(); 209 unsigned NumBits2 = VT2.getSizeInBits(); 210 return NumBits1 > NumBits2; 211 } 212 213 bool BPFTargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { 214 if (!getHasAlu32() || !Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 215 return false; 216 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 217 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 218 return NumBits1 == 32 && NumBits2 == 64; 219 } 220 221 bool BPFTargetLowering::isZExtFree(EVT VT1, EVT VT2) const { 222 if (!getHasAlu32() || !VT1.isInteger() || !VT2.isInteger()) 223 return false; 224 unsigned NumBits1 = VT1.getSizeInBits(); 225 unsigned NumBits2 = VT2.getSizeInBits(); 226 return NumBits1 == 32 && NumBits2 == 64; 227 } 228 229 bool BPFTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 230 EVT VT1 = Val.getValueType(); 231 if (Val.getOpcode() == ISD::LOAD && VT1.isSimple() && VT2.isSimple()) { 232 MVT MT1 = VT1.getSimpleVT().SimpleTy; 233 MVT MT2 = VT2.getSimpleVT().SimpleTy; 234 if ((MT1 == MVT::i8 || MT1 == MVT::i16 || MT1 == MVT::i32) && 235 (MT2 == MVT::i32 || MT2 == MVT::i64)) 236 return true; 237 } 238 return TargetLoweringBase::isZExtFree(Val, VT2); 239 } 240 241 BPFTargetLowering::ConstraintType 242 BPFTargetLowering::getConstraintType(StringRef Constraint) const { 243 if (Constraint.size() == 1) { 244 switch (Constraint[0]) { 245 default: 246 break; 247 case 'w': 248 return C_RegisterClass; 249 } 250 } 251 252 return TargetLowering::getConstraintType(Constraint); 253 } 254 255 std::pair<unsigned, const TargetRegisterClass *> 256 BPFTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 257 StringRef Constraint, 258 MVT VT) const { 259 if (Constraint.size() == 1) { 260 // GCC Constraint Letters 261 switch (Constraint[0]) { 262 case 'r': // GENERAL_REGS 263 return std::make_pair(0U, &BPF::GPRRegClass); 264 case 'w': 265 if (HasAlu32) 266 return std::make_pair(0U, &BPF::GPR32RegClass); 267 break; 268 default: 269 break; 270 } 271 } 272 273 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 274 } 275 276 void BPFTargetLowering::ReplaceNodeResults( 277 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 278 const char *Msg; 279 uint32_t Opcode = N->getOpcode(); 280 switch (Opcode) { 281 default: 282 report_fatal_error("unhandled custom legalization: " + Twine(Opcode)); 283 case ISD::ATOMIC_LOAD_ADD: 284 case ISD::ATOMIC_LOAD_AND: 285 case ISD::ATOMIC_LOAD_OR: 286 case ISD::ATOMIC_LOAD_XOR: 287 case ISD::ATOMIC_SWAP: 288 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: 289 if (HasAlu32 || Opcode == ISD::ATOMIC_LOAD_ADD) 290 Msg = "unsupported atomic operation, please use 32/64 bit version"; 291 else 292 Msg = "unsupported atomic operation, please use 64 bit version"; 293 break; 294 } 295 296 SDLoc DL(N); 297 // We'll still produce a fatal error downstream, but this diagnostic is more 298 // user-friendly. 299 fail(DL, DAG, Msg); 300 } 301 302 SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 303 switch (Op.getOpcode()) { 304 default: 305 report_fatal_error("unimplemented opcode: " + Twine(Op.getOpcode())); 306 case ISD::BR_CC: 307 return LowerBR_CC(Op, DAG); 308 case ISD::GlobalAddress: 309 return LowerGlobalAddress(Op, DAG); 310 case ISD::ConstantPool: 311 return LowerConstantPool(Op, DAG); 312 case ISD::SELECT_CC: 313 return LowerSELECT_CC(Op, DAG); 314 case ISD::SDIV: 315 case ISD::SREM: 316 return LowerSDIVSREM(Op, DAG); 317 case ISD::DYNAMIC_STACKALLOC: 318 return LowerDYNAMIC_STACKALLOC(Op, DAG); 319 } 320 } 321 322 // Calling Convention Implementation 323 #include "BPFGenCallingConv.inc" 324 325 SDValue BPFTargetLowering::LowerFormalArguments( 326 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 327 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 328 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 329 switch (CallConv) { 330 default: 331 report_fatal_error("unimplemented calling convention: " + Twine(CallConv)); 332 case CallingConv::C: 333 case CallingConv::Fast: 334 break; 335 } 336 337 MachineFunction &MF = DAG.getMachineFunction(); 338 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 339 340 // Assign locations to all of the incoming arguments. 341 SmallVector<CCValAssign, 16> ArgLocs; 342 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 343 CCInfo.AnalyzeFormalArguments(Ins, getHasAlu32() ? CC_BPF32 : CC_BPF64); 344 345 bool HasMemArgs = false; 346 for (size_t I = 0; I < ArgLocs.size(); ++I) { 347 auto &VA = ArgLocs[I]; 348 349 if (VA.isRegLoc()) { 350 // Arguments passed in registers 351 EVT RegVT = VA.getLocVT(); 352 MVT::SimpleValueType SimpleTy = RegVT.getSimpleVT().SimpleTy; 353 switch (SimpleTy) { 354 default: { 355 std::string Str; 356 { 357 raw_string_ostream OS(Str); 358 RegVT.print(OS); 359 } 360 report_fatal_error("unhandled argument type: " + Twine(Str)); 361 } 362 case MVT::i32: 363 case MVT::i64: 364 Register VReg = RegInfo.createVirtualRegister( 365 SimpleTy == MVT::i64 ? &BPF::GPRRegClass : &BPF::GPR32RegClass); 366 RegInfo.addLiveIn(VA.getLocReg(), VReg); 367 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT); 368 369 // If this is an value that has been promoted to wider types, insert an 370 // assert[sz]ext to capture this, then truncate to the right size. 371 if (VA.getLocInfo() == CCValAssign::SExt) 372 ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue, 373 DAG.getValueType(VA.getValVT())); 374 else if (VA.getLocInfo() == CCValAssign::ZExt) 375 ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue, 376 DAG.getValueType(VA.getValVT())); 377 378 if (VA.getLocInfo() != CCValAssign::Full) 379 ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue); 380 381 InVals.push_back(ArgValue); 382 383 break; 384 } 385 } else { 386 if (VA.isMemLoc()) 387 HasMemArgs = true; 388 else 389 report_fatal_error("unhandled argument location"); 390 InVals.push_back(DAG.getConstant(0, DL, VA.getLocVT())); 391 } 392 } 393 if (HasMemArgs) 394 fail(DL, DAG, "stack arguments are not supported"); 395 if (IsVarArg) 396 fail(DL, DAG, "variadic functions are not supported"); 397 if (MF.getFunction().hasStructRetAttr()) 398 fail(DL, DAG, "aggregate returns are not supported"); 399 400 return Chain; 401 } 402 403 const size_t BPFTargetLowering::MaxArgs = 5; 404 405 SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 406 SmallVectorImpl<SDValue> &InVals) const { 407 SelectionDAG &DAG = CLI.DAG; 408 auto &Outs = CLI.Outs; 409 auto &OutVals = CLI.OutVals; 410 auto &Ins = CLI.Ins; 411 SDValue Chain = CLI.Chain; 412 SDValue Callee = CLI.Callee; 413 bool &IsTailCall = CLI.IsTailCall; 414 CallingConv::ID CallConv = CLI.CallConv; 415 bool IsVarArg = CLI.IsVarArg; 416 MachineFunction &MF = DAG.getMachineFunction(); 417 418 // BPF target does not support tail call optimization. 419 IsTailCall = false; 420 421 switch (CallConv) { 422 default: 423 report_fatal_error("unsupported calling convention: " + Twine(CallConv)); 424 case CallingConv::Fast: 425 case CallingConv::C: 426 break; 427 } 428 429 // Analyze operands of the call, assigning locations to each operand. 430 SmallVector<CCValAssign, 16> ArgLocs; 431 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 432 433 CCInfo.AnalyzeCallOperands(Outs, getHasAlu32() ? CC_BPF32 : CC_BPF64); 434 435 unsigned NumBytes = CCInfo.getStackSize(); 436 437 if (Outs.size() > MaxArgs) 438 fail(CLI.DL, DAG, "too many arguments", Callee); 439 440 for (auto &Arg : Outs) { 441 ISD::ArgFlagsTy Flags = Arg.Flags; 442 if (!Flags.isByVal()) 443 continue; 444 fail(CLI.DL, DAG, "pass by value not supported", Callee); 445 break; 446 } 447 448 auto PtrVT = getPointerTy(MF.getDataLayout()); 449 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 450 451 SmallVector<std::pair<unsigned, SDValue>, MaxArgs> RegsToPass; 452 453 // Walk arg assignments 454 for (size_t i = 0; i < std::min(ArgLocs.size(), MaxArgs); ++i) { 455 CCValAssign &VA = ArgLocs[i]; 456 SDValue &Arg = OutVals[i]; 457 458 // Promote the value if needed. 459 switch (VA.getLocInfo()) { 460 default: 461 report_fatal_error("unhandled location info: " + Twine(VA.getLocInfo())); 462 case CCValAssign::Full: 463 break; 464 case CCValAssign::SExt: 465 Arg = DAG.getNode(ISD::SIGN_EXTEND, CLI.DL, VA.getLocVT(), Arg); 466 break; 467 case CCValAssign::ZExt: 468 Arg = DAG.getNode(ISD::ZERO_EXTEND, CLI.DL, VA.getLocVT(), Arg); 469 break; 470 case CCValAssign::AExt: 471 Arg = DAG.getNode(ISD::ANY_EXTEND, CLI.DL, VA.getLocVT(), Arg); 472 break; 473 } 474 475 // Push arguments into RegsToPass vector 476 if (VA.isRegLoc()) 477 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 478 else 479 report_fatal_error("stack arguments are not supported"); 480 } 481 482 SDValue InGlue; 483 484 // Build a sequence of copy-to-reg nodes chained together with token chain and 485 // flag operands which copy the outgoing args into registers. The InGlue in 486 // necessary since all emitted instructions must be stuck together. 487 for (auto &Reg : RegsToPass) { 488 Chain = DAG.getCopyToReg(Chain, CLI.DL, Reg.first, Reg.second, InGlue); 489 InGlue = Chain.getValue(1); 490 } 491 492 // If the callee is a GlobalAddress node (quite common, every direct call is) 493 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 494 // Likewise ExternalSymbol -> TargetExternalSymbol. 495 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 496 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT, 497 G->getOffset(), 0); 498 } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { 499 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0); 500 fail(CLI.DL, DAG, 501 Twine("A call to built-in function '" + StringRef(E->getSymbol()) + 502 "' is not supported.")); 503 } 504 505 // Returns a chain & a flag for retval copy to use. 506 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 507 SmallVector<SDValue, 8> Ops; 508 Ops.push_back(Chain); 509 Ops.push_back(Callee); 510 511 // Add argument registers to the end of the list so that they are 512 // known live into the call. 513 for (auto &Reg : RegsToPass) 514 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 515 516 if (InGlue.getNode()) 517 Ops.push_back(InGlue); 518 519 Chain = DAG.getNode(BPFISD::CALL, CLI.DL, NodeTys, Ops); 520 InGlue = Chain.getValue(1); 521 522 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 523 524 // Create the CALLSEQ_END node. 525 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, CLI.DL); 526 InGlue = Chain.getValue(1); 527 528 // Handle result values, copying them out of physregs into vregs that we 529 // return. 530 return LowerCallResult(Chain, InGlue, CallConv, IsVarArg, Ins, CLI.DL, DAG, 531 InVals); 532 } 533 534 SDValue 535 BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 536 bool IsVarArg, 537 const SmallVectorImpl<ISD::OutputArg> &Outs, 538 const SmallVectorImpl<SDValue> &OutVals, 539 const SDLoc &DL, SelectionDAG &DAG) const { 540 unsigned Opc = BPFISD::RET_GLUE; 541 542 // CCValAssign - represent the assignment of the return value to a location 543 SmallVector<CCValAssign, 16> RVLocs; 544 MachineFunction &MF = DAG.getMachineFunction(); 545 546 // CCState - Info about the registers and stack slot. 547 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 548 549 if (MF.getFunction().getReturnType()->isAggregateType()) { 550 fail(DL, DAG, "aggregate returns are not supported"); 551 return DAG.getNode(Opc, DL, MVT::Other, Chain); 552 } 553 554 // Analize return values. 555 CCInfo.AnalyzeReturn(Outs, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64); 556 557 SDValue Glue; 558 SmallVector<SDValue, 4> RetOps(1, Chain); 559 560 // Copy the result values into the output registers. 561 for (size_t i = 0; i != RVLocs.size(); ++i) { 562 CCValAssign &VA = RVLocs[i]; 563 if (!VA.isRegLoc()) 564 report_fatal_error("stack return values are not supported"); 565 566 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Glue); 567 568 // Guarantee that all emitted copies are stuck together, 569 // avoiding something bad. 570 Glue = Chain.getValue(1); 571 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 572 } 573 574 RetOps[0] = Chain; // Update chain. 575 576 // Add the glue if we have it. 577 if (Glue.getNode()) 578 RetOps.push_back(Glue); 579 580 return DAG.getNode(Opc, DL, MVT::Other, RetOps); 581 } 582 583 SDValue BPFTargetLowering::LowerCallResult( 584 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool IsVarArg, 585 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 586 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 587 588 MachineFunction &MF = DAG.getMachineFunction(); 589 // Assign locations to each value returned by this call. 590 SmallVector<CCValAssign, 16> RVLocs; 591 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 592 593 if (Ins.size() > 1) { 594 fail(DL, DAG, "only small returns supported"); 595 for (auto &In : Ins) 596 InVals.push_back(DAG.getConstant(0, DL, In.VT)); 597 return DAG.getCopyFromReg(Chain, DL, 1, Ins[0].VT, InGlue).getValue(1); 598 } 599 600 CCInfo.AnalyzeCallResult(Ins, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64); 601 602 // Copy all of the result registers out of their specified physreg. 603 for (auto &Val : RVLocs) { 604 Chain = DAG.getCopyFromReg(Chain, DL, Val.getLocReg(), 605 Val.getValVT(), InGlue).getValue(1); 606 InGlue = Chain.getValue(2); 607 InVals.push_back(Chain.getValue(0)); 608 } 609 610 return Chain; 611 } 612 613 static void NegateCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 614 switch (CC) { 615 default: 616 break; 617 case ISD::SETULT: 618 case ISD::SETULE: 619 case ISD::SETLT: 620 case ISD::SETLE: 621 CC = ISD::getSetCCSwappedOperands(CC); 622 std::swap(LHS, RHS); 623 break; 624 } 625 } 626 627 SDValue BPFTargetLowering::LowerSDIVSREM(SDValue Op, SelectionDAG &DAG) const { 628 SDLoc DL(Op); 629 fail(DL, DAG, 630 "unsupported signed division, please convert to unsigned div/mod."); 631 return DAG.getUNDEF(Op->getValueType(0)); 632 } 633 634 SDValue BPFTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 635 SelectionDAG &DAG) const { 636 SDLoc DL(Op); 637 fail(DL, DAG, "unsupported dynamic stack allocation"); 638 auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)}; 639 return DAG.getMergeValues(Ops, SDLoc()); 640 } 641 642 SDValue BPFTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 643 SDValue Chain = Op.getOperand(0); 644 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 645 SDValue LHS = Op.getOperand(2); 646 SDValue RHS = Op.getOperand(3); 647 SDValue Dest = Op.getOperand(4); 648 SDLoc DL(Op); 649 650 if (!getHasJmpExt()) 651 NegateCC(LHS, RHS, CC); 652 653 return DAG.getNode(BPFISD::BR_CC, DL, Op.getValueType(), Chain, LHS, RHS, 654 DAG.getConstant(CC, DL, LHS.getValueType()), Dest); 655 } 656 657 SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 658 SDValue LHS = Op.getOperand(0); 659 SDValue RHS = Op.getOperand(1); 660 SDValue TrueV = Op.getOperand(2); 661 SDValue FalseV = Op.getOperand(3); 662 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 663 SDLoc DL(Op); 664 665 if (!getHasJmpExt()) 666 NegateCC(LHS, RHS, CC); 667 668 SDValue TargetCC = DAG.getConstant(CC, DL, LHS.getValueType()); 669 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 670 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 671 672 return DAG.getNode(BPFISD::SELECT_CC, DL, VTs, Ops); 673 } 674 675 const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const { 676 switch ((BPFISD::NodeType)Opcode) { 677 case BPFISD::FIRST_NUMBER: 678 break; 679 case BPFISD::RET_GLUE: 680 return "BPFISD::RET_GLUE"; 681 case BPFISD::CALL: 682 return "BPFISD::CALL"; 683 case BPFISD::SELECT_CC: 684 return "BPFISD::SELECT_CC"; 685 case BPFISD::BR_CC: 686 return "BPFISD::BR_CC"; 687 case BPFISD::Wrapper: 688 return "BPFISD::Wrapper"; 689 case BPFISD::MEMCPY: 690 return "BPFISD::MEMCPY"; 691 } 692 return nullptr; 693 } 694 695 static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, 696 SelectionDAG &DAG, unsigned Flags) { 697 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 698 } 699 700 static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, 701 SelectionDAG &DAG, unsigned Flags) { 702 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 703 N->getOffset(), Flags); 704 } 705 706 template <class NodeTy> 707 SDValue BPFTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 708 unsigned Flags) const { 709 SDLoc DL(N); 710 711 SDValue GA = getTargetNode(N, DL, MVT::i64, DAG, Flags); 712 713 return DAG.getNode(BPFISD::Wrapper, DL, MVT::i64, GA); 714 } 715 716 SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op, 717 SelectionDAG &DAG) const { 718 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 719 if (N->getOffset() != 0) 720 report_fatal_error("invalid offset for global address: " + 721 Twine(N->getOffset())); 722 return getAddr(N, DAG); 723 } 724 725 SDValue BPFTargetLowering::LowerConstantPool(SDValue Op, 726 SelectionDAG &DAG) const { 727 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 728 729 return getAddr(N, DAG); 730 } 731 732 unsigned 733 BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, 734 unsigned Reg, bool isSigned) const { 735 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 736 const TargetRegisterClass *RC = getRegClassFor(MVT::i64); 737 int RShiftOp = isSigned ? BPF::SRA_ri : BPF::SRL_ri; 738 MachineFunction *F = BB->getParent(); 739 DebugLoc DL = MI.getDebugLoc(); 740 741 MachineRegisterInfo &RegInfo = F->getRegInfo(); 742 743 if (!isSigned) { 744 Register PromotedReg0 = RegInfo.createVirtualRegister(RC); 745 BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg); 746 return PromotedReg0; 747 } 748 Register PromotedReg0 = RegInfo.createVirtualRegister(RC); 749 Register PromotedReg1 = RegInfo.createVirtualRegister(RC); 750 Register PromotedReg2 = RegInfo.createVirtualRegister(RC); 751 if (HasMovsx) { 752 BuildMI(BB, DL, TII.get(BPF::MOVSX_rr_32), PromotedReg0).addReg(Reg); 753 } else { 754 BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg); 755 BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1) 756 .addReg(PromotedReg0).addImm(32); 757 BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2) 758 .addReg(PromotedReg1).addImm(32); 759 } 760 761 return PromotedReg2; 762 } 763 764 MachineBasicBlock * 765 BPFTargetLowering::EmitInstrWithCustomInserterMemcpy(MachineInstr &MI, 766 MachineBasicBlock *BB) 767 const { 768 MachineFunction *MF = MI.getParent()->getParent(); 769 MachineRegisterInfo &MRI = MF->getRegInfo(); 770 MachineInstrBuilder MIB(*MF, MI); 771 unsigned ScratchReg; 772 773 // This function does custom insertion during lowering BPFISD::MEMCPY which 774 // only has two register operands from memcpy semantics, the copy source 775 // address and the copy destination address. 776 // 777 // Because we will expand BPFISD::MEMCPY into load/store pairs, we will need 778 // a third scratch register to serve as the destination register of load and 779 // source register of store. 780 // 781 // The scratch register here is with the Define | Dead | EarlyClobber flags. 782 // The EarlyClobber flag has the semantic property that the operand it is 783 // attached to is clobbered before the rest of the inputs are read. Hence it 784 // must be unique among the operands to the instruction. The Define flag is 785 // needed to coerce the machine verifier that an Undef value isn't a problem 786 // as we anyway is loading memory into it. The Dead flag is needed as the 787 // value in scratch isn't supposed to be used by any other instruction. 788 ScratchReg = MRI.createVirtualRegister(&BPF::GPRRegClass); 789 MIB.addReg(ScratchReg, 790 RegState::Define | RegState::Dead | RegState::EarlyClobber); 791 792 return BB; 793 } 794 795 MachineBasicBlock * 796 BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 797 MachineBasicBlock *BB) const { 798 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 799 DebugLoc DL = MI.getDebugLoc(); 800 unsigned Opc = MI.getOpcode(); 801 bool isSelectRROp = (Opc == BPF::Select || 802 Opc == BPF::Select_64_32 || 803 Opc == BPF::Select_32 || 804 Opc == BPF::Select_32_64); 805 806 bool isMemcpyOp = Opc == BPF::MEMCPY; 807 808 #ifndef NDEBUG 809 bool isSelectRIOp = (Opc == BPF::Select_Ri || 810 Opc == BPF::Select_Ri_64_32 || 811 Opc == BPF::Select_Ri_32 || 812 Opc == BPF::Select_Ri_32_64); 813 814 if (!(isSelectRROp || isSelectRIOp || isMemcpyOp)) 815 report_fatal_error("unhandled instruction type: " + Twine(Opc)); 816 #endif 817 818 if (isMemcpyOp) 819 return EmitInstrWithCustomInserterMemcpy(MI, BB); 820 821 bool is32BitCmp = (Opc == BPF::Select_32 || 822 Opc == BPF::Select_32_64 || 823 Opc == BPF::Select_Ri_32 || 824 Opc == BPF::Select_Ri_32_64); 825 826 // To "insert" a SELECT instruction, we actually have to insert the diamond 827 // control-flow pattern. The incoming instruction knows the destination vreg 828 // to set, the condition code register to branch on, the true/false values to 829 // select between, and a branch opcode to use. 830 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 831 MachineFunction::iterator I = ++BB->getIterator(); 832 833 // ThisMBB: 834 // ... 835 // TrueVal = ... 836 // jmp_XX r1, r2 goto Copy1MBB 837 // fallthrough --> Copy0MBB 838 MachineBasicBlock *ThisMBB = BB; 839 MachineFunction *F = BB->getParent(); 840 MachineBasicBlock *Copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 841 MachineBasicBlock *Copy1MBB = F->CreateMachineBasicBlock(LLVM_BB); 842 843 F->insert(I, Copy0MBB); 844 F->insert(I, Copy1MBB); 845 // Update machine-CFG edges by transferring all successors of the current 846 // block to the new block which will contain the Phi node for the select. 847 Copy1MBB->splice(Copy1MBB->begin(), BB, 848 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 849 Copy1MBB->transferSuccessorsAndUpdatePHIs(BB); 850 // Next, add the true and fallthrough blocks as its successors. 851 BB->addSuccessor(Copy0MBB); 852 BB->addSuccessor(Copy1MBB); 853 854 // Insert Branch if Flag 855 int CC = MI.getOperand(3).getImm(); 856 int NewCC; 857 switch (CC) { 858 #define SET_NEWCC(X, Y) \ 859 case ISD::X: \ 860 if (is32BitCmp && HasJmp32) \ 861 NewCC = isSelectRROp ? BPF::Y##_rr_32 : BPF::Y##_ri_32; \ 862 else \ 863 NewCC = isSelectRROp ? BPF::Y##_rr : BPF::Y##_ri; \ 864 break 865 SET_NEWCC(SETGT, JSGT); 866 SET_NEWCC(SETUGT, JUGT); 867 SET_NEWCC(SETGE, JSGE); 868 SET_NEWCC(SETUGE, JUGE); 869 SET_NEWCC(SETEQ, JEQ); 870 SET_NEWCC(SETNE, JNE); 871 SET_NEWCC(SETLT, JSLT); 872 SET_NEWCC(SETULT, JULT); 873 SET_NEWCC(SETLE, JSLE); 874 SET_NEWCC(SETULE, JULE); 875 default: 876 report_fatal_error("unimplemented select CondCode " + Twine(CC)); 877 } 878 879 Register LHS = MI.getOperand(1).getReg(); 880 bool isSignedCmp = (CC == ISD::SETGT || 881 CC == ISD::SETGE || 882 CC == ISD::SETLT || 883 CC == ISD::SETLE); 884 885 // eBPF at the moment only has 64-bit comparison. Any 32-bit comparison need 886 // to be promoted, however if the 32-bit comparison operands are destination 887 // registers then they are implicitly zero-extended already, there is no 888 // need of explicit zero-extend sequence for them. 889 // 890 // We simply do extension for all situations in this method, but we will 891 // try to remove those unnecessary in BPFMIPeephole pass. 892 if (is32BitCmp && !HasJmp32) 893 LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp); 894 895 if (isSelectRROp) { 896 Register RHS = MI.getOperand(2).getReg(); 897 898 if (is32BitCmp && !HasJmp32) 899 RHS = EmitSubregExt(MI, BB, RHS, isSignedCmp); 900 901 BuildMI(BB, DL, TII.get(NewCC)).addReg(LHS).addReg(RHS).addMBB(Copy1MBB); 902 } else { 903 int64_t imm32 = MI.getOperand(2).getImm(); 904 // Check before we build J*_ri instruction. 905 if (!isInt<32>(imm32)) 906 report_fatal_error("immediate overflows 32 bits: " + Twine(imm32)); 907 BuildMI(BB, DL, TII.get(NewCC)) 908 .addReg(LHS).addImm(imm32).addMBB(Copy1MBB); 909 } 910 911 // Copy0MBB: 912 // %FalseValue = ... 913 // # fallthrough to Copy1MBB 914 BB = Copy0MBB; 915 916 // Update machine-CFG edges 917 BB->addSuccessor(Copy1MBB); 918 919 // Copy1MBB: 920 // %Result = phi [ %FalseValue, Copy0MBB ], [ %TrueValue, ThisMBB ] 921 // ... 922 BB = Copy1MBB; 923 BuildMI(*BB, BB->begin(), DL, TII.get(BPF::PHI), MI.getOperand(0).getReg()) 924 .addReg(MI.getOperand(5).getReg()) 925 .addMBB(Copy0MBB) 926 .addReg(MI.getOperand(4).getReg()) 927 .addMBB(ThisMBB); 928 929 MI.eraseFromParent(); // The pseudo instruction is gone now. 930 return BB; 931 } 932 933 EVT BPFTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, 934 EVT VT) const { 935 return getHasAlu32() ? MVT::i32 : MVT::i64; 936 } 937 938 MVT BPFTargetLowering::getScalarShiftAmountTy(const DataLayout &DL, 939 EVT VT) const { 940 return (getHasAlu32() && VT == MVT::i32) ? MVT::i32 : MVT::i64; 941 } 942 943 bool BPFTargetLowering::isLegalAddressingMode(const DataLayout &DL, 944 const AddrMode &AM, Type *Ty, 945 unsigned AS, 946 Instruction *I) const { 947 // No global is ever allowed as a base. 948 if (AM.BaseGV) 949 return false; 950 951 switch (AM.Scale) { 952 case 0: // "r+i" or just "i", depending on HasBaseReg. 953 break; 954 case 1: 955 if (!AM.HasBaseReg) // allow "r+i". 956 break; 957 return false; // disallow "r+r" or "r+r+i". 958 default: 959 return false; 960 } 961 962 return true; 963 } 964