1 //===-- BPFISelLowering.cpp - BPF DAG Lowering Implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that BPF uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "BPFISelLowering.h" 15 #include "BPF.h" 16 #include "BPFSubtarget.h" 17 #include "BPFTargetMachine.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 24 #include "llvm/CodeGen/ValueTypes.h" 25 #include "llvm/IR/DiagnosticInfo.h" 26 #include "llvm/IR/DiagnosticPrinter.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include "llvm/Support/raw_ostream.h" 30 using namespace llvm; 31 32 #define DEBUG_TYPE "bpf-lower" 33 34 static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order", 35 cl::Hidden, cl::init(false), 36 cl::desc("Expand memcpy into load/store pairs in order")); 37 38 static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg) { 39 MachineFunction &MF = DAG.getMachineFunction(); 40 DAG.getContext()->diagnose( 41 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc())); 42 } 43 44 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg, 45 SDValue Val) { 46 MachineFunction &MF = DAG.getMachineFunction(); 47 std::string Str; 48 raw_string_ostream OS(Str); 49 OS << Msg; 50 Val->print(OS); 51 OS.flush(); 52 DAG.getContext()->diagnose( 53 DiagnosticInfoUnsupported(MF.getFunction(), Str, DL.getDebugLoc())); 54 } 55 56 BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, 57 const BPFSubtarget &STI) 58 : TargetLowering(TM) { 59 60 // Set up the register classes. 61 addRegisterClass(MVT::i64, &BPF::GPRRegClass); 62 if (STI.getHasAlu32()) 63 addRegisterClass(MVT::i32, &BPF::GPR32RegClass); 64 65 // Compute derived properties from the register classes 66 computeRegisterProperties(STI.getRegisterInfo()); 67 68 setStackPointerRegisterToSaveRestore(BPF::R11); 69 70 setOperationAction(ISD::BR_CC, MVT::i64, Custom); 71 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 72 setOperationAction(ISD::BRIND, MVT::Other, Expand); 73 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 74 75 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 76 77 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); 78 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 79 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 80 81 for (auto VT : { MVT::i32, MVT::i64 }) { 82 if (VT == MVT::i32 && !STI.getHasAlu32()) 83 continue; 84 85 setOperationAction(ISD::SDIVREM, VT, Expand); 86 setOperationAction(ISD::UDIVREM, VT, Expand); 87 setOperationAction(ISD::SREM, VT, Expand); 88 setOperationAction(ISD::UREM, VT, Expand); 89 setOperationAction(ISD::MULHU, VT, Expand); 90 setOperationAction(ISD::MULHS, VT, Expand); 91 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 92 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 93 setOperationAction(ISD::ROTR, VT, Expand); 94 setOperationAction(ISD::ROTL, VT, Expand); 95 setOperationAction(ISD::SHL_PARTS, VT, Expand); 96 setOperationAction(ISD::SRL_PARTS, VT, Expand); 97 setOperationAction(ISD::SRA_PARTS, VT, Expand); 98 setOperationAction(ISD::CTPOP, VT, Expand); 99 100 setOperationAction(ISD::SETCC, VT, Expand); 101 setOperationAction(ISD::SELECT, VT, Expand); 102 setOperationAction(ISD::SELECT_CC, VT, Custom); 103 } 104 105 if (STI.getHasAlu32()) { 106 setOperationAction(ISD::BSWAP, MVT::i32, Promote); 107 setOperationAction(ISD::BR_CC, MVT::i32, 108 STI.getHasJmp32() ? Custom : Promote); 109 } 110 111 setOperationAction(ISD::CTTZ, MVT::i64, Custom); 112 setOperationAction(ISD::CTLZ, MVT::i64, Custom); 113 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom); 114 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); 115 116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 118 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 119 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); 120 121 // Extended load operations for i1 types must be promoted 122 for (MVT VT : MVT::integer_valuetypes()) { 123 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); 124 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 125 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 126 127 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); 128 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand); 129 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); 130 } 131 132 setBooleanContents(ZeroOrOneBooleanContent); 133 134 // Function alignments 135 setMinFunctionAlignment(Align(8)); 136 setPrefFunctionAlignment(Align(8)); 137 138 if (BPFExpandMemcpyInOrder) { 139 // LLVM generic code will try to expand memcpy into load/store pairs at this 140 // stage which is before quite a few IR optimization passes, therefore the 141 // loads and stores could potentially be moved apart from each other which 142 // will cause trouble to memcpy pattern matcher inside kernel eBPF JIT 143 // compilers. 144 // 145 // When -bpf-expand-memcpy-in-order specified, we want to defer the expand 146 // of memcpy to later stage in IR optimization pipeline so those load/store 147 // pairs won't be touched and could be kept in order. Hence, we set 148 // MaxStoresPerMem* to zero to disable the generic getMemcpyLoadsAndStores 149 // code path, and ask LLVM to use target expander EmitTargetCodeForMemcpy. 150 MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0; 151 MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0; 152 MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0; 153 } else { 154 // inline memcpy() for kernel to see explicit copy 155 unsigned CommonMaxStores = 156 STI.getSelectionDAGInfo()->getCommonMaxStoresPerMemFunc(); 157 158 MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores; 159 MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores; 160 MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores; 161 } 162 163 // CPU/Feature control 164 HasAlu32 = STI.getHasAlu32(); 165 HasJmp32 = STI.getHasJmp32(); 166 HasJmpExt = STI.getHasJmpExt(); 167 } 168 169 bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 170 return false; 171 } 172 173 bool BPFTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { 174 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 175 return false; 176 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 177 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 178 return NumBits1 > NumBits2; 179 } 180 181 bool BPFTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { 182 if (!VT1.isInteger() || !VT2.isInteger()) 183 return false; 184 unsigned NumBits1 = VT1.getSizeInBits(); 185 unsigned NumBits2 = VT2.getSizeInBits(); 186 return NumBits1 > NumBits2; 187 } 188 189 bool BPFTargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { 190 if (!getHasAlu32() || !Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 191 return false; 192 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 193 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 194 return NumBits1 == 32 && NumBits2 == 64; 195 } 196 197 bool BPFTargetLowering::isZExtFree(EVT VT1, EVT VT2) const { 198 if (!getHasAlu32() || !VT1.isInteger() || !VT2.isInteger()) 199 return false; 200 unsigned NumBits1 = VT1.getSizeInBits(); 201 unsigned NumBits2 = VT2.getSizeInBits(); 202 return NumBits1 == 32 && NumBits2 == 64; 203 } 204 205 std::pair<unsigned, const TargetRegisterClass *> 206 BPFTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 207 StringRef Constraint, 208 MVT VT) const { 209 if (Constraint.size() == 1) 210 // GCC Constraint Letters 211 switch (Constraint[0]) { 212 case 'r': // GENERAL_REGS 213 return std::make_pair(0U, &BPF::GPRRegClass); 214 default: 215 break; 216 } 217 218 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 219 } 220 221 SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 222 switch (Op.getOpcode()) { 223 case ISD::BR_CC: 224 return LowerBR_CC(Op, DAG); 225 case ISD::GlobalAddress: 226 return LowerGlobalAddress(Op, DAG); 227 case ISD::SELECT_CC: 228 return LowerSELECT_CC(Op, DAG); 229 case ISD::DYNAMIC_STACKALLOC: 230 report_fatal_error("Unsupported dynamic stack allocation"); 231 default: 232 llvm_unreachable("unimplemented operand"); 233 } 234 } 235 236 // Calling Convention Implementation 237 #include "BPFGenCallingConv.inc" 238 239 SDValue BPFTargetLowering::LowerFormalArguments( 240 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 241 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 242 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 243 switch (CallConv) { 244 default: 245 report_fatal_error("Unsupported calling convention"); 246 case CallingConv::C: 247 case CallingConv::Fast: 248 break; 249 } 250 251 MachineFunction &MF = DAG.getMachineFunction(); 252 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 253 254 // Assign locations to all of the incoming arguments. 255 SmallVector<CCValAssign, 16> ArgLocs; 256 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 257 CCInfo.AnalyzeFormalArguments(Ins, getHasAlu32() ? CC_BPF32 : CC_BPF64); 258 259 for (auto &VA : ArgLocs) { 260 if (VA.isRegLoc()) { 261 // Arguments passed in registers 262 EVT RegVT = VA.getLocVT(); 263 MVT::SimpleValueType SimpleTy = RegVT.getSimpleVT().SimpleTy; 264 switch (SimpleTy) { 265 default: { 266 errs() << "LowerFormalArguments Unhandled argument type: " 267 << RegVT.getEVTString() << '\n'; 268 llvm_unreachable(0); 269 } 270 case MVT::i32: 271 case MVT::i64: 272 Register VReg = RegInfo.createVirtualRegister( 273 SimpleTy == MVT::i64 ? &BPF::GPRRegClass : &BPF::GPR32RegClass); 274 RegInfo.addLiveIn(VA.getLocReg(), VReg); 275 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT); 276 277 // If this is an value that has been promoted to wider types, insert an 278 // assert[sz]ext to capture this, then truncate to the right size. 279 if (VA.getLocInfo() == CCValAssign::SExt) 280 ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue, 281 DAG.getValueType(VA.getValVT())); 282 else if (VA.getLocInfo() == CCValAssign::ZExt) 283 ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue, 284 DAG.getValueType(VA.getValVT())); 285 286 if (VA.getLocInfo() != CCValAssign::Full) 287 ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue); 288 289 InVals.push_back(ArgValue); 290 291 break; 292 } 293 } else { 294 fail(DL, DAG, "defined with too many args"); 295 InVals.push_back(DAG.getConstant(0, DL, VA.getLocVT())); 296 } 297 } 298 299 if (IsVarArg || MF.getFunction().hasStructRetAttr()) { 300 fail(DL, DAG, "functions with VarArgs or StructRet are not supported"); 301 } 302 303 return Chain; 304 } 305 306 const unsigned BPFTargetLowering::MaxArgs = 5; 307 308 SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 309 SmallVectorImpl<SDValue> &InVals) const { 310 SelectionDAG &DAG = CLI.DAG; 311 auto &Outs = CLI.Outs; 312 auto &OutVals = CLI.OutVals; 313 auto &Ins = CLI.Ins; 314 SDValue Chain = CLI.Chain; 315 SDValue Callee = CLI.Callee; 316 bool &IsTailCall = CLI.IsTailCall; 317 CallingConv::ID CallConv = CLI.CallConv; 318 bool IsVarArg = CLI.IsVarArg; 319 MachineFunction &MF = DAG.getMachineFunction(); 320 321 // BPF target does not support tail call optimization. 322 IsTailCall = false; 323 324 switch (CallConv) { 325 default: 326 report_fatal_error("Unsupported calling convention"); 327 case CallingConv::Fast: 328 case CallingConv::C: 329 break; 330 } 331 332 // Analyze operands of the call, assigning locations to each operand. 333 SmallVector<CCValAssign, 16> ArgLocs; 334 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 335 336 CCInfo.AnalyzeCallOperands(Outs, getHasAlu32() ? CC_BPF32 : CC_BPF64); 337 338 unsigned NumBytes = CCInfo.getNextStackOffset(); 339 340 if (Outs.size() > MaxArgs) 341 fail(CLI.DL, DAG, "too many args to ", Callee); 342 343 for (auto &Arg : Outs) { 344 ISD::ArgFlagsTy Flags = Arg.Flags; 345 if (!Flags.isByVal()) 346 continue; 347 348 fail(CLI.DL, DAG, "pass by value not supported ", Callee); 349 } 350 351 auto PtrVT = getPointerTy(MF.getDataLayout()); 352 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 353 354 SmallVector<std::pair<unsigned, SDValue>, MaxArgs> RegsToPass; 355 356 // Walk arg assignments 357 for (unsigned i = 0, 358 e = std::min(static_cast<unsigned>(ArgLocs.size()), MaxArgs); 359 i != e; ++i) { 360 CCValAssign &VA = ArgLocs[i]; 361 SDValue Arg = OutVals[i]; 362 363 // Promote the value if needed. 364 switch (VA.getLocInfo()) { 365 default: 366 llvm_unreachable("Unknown loc info"); 367 case CCValAssign::Full: 368 break; 369 case CCValAssign::SExt: 370 Arg = DAG.getNode(ISD::SIGN_EXTEND, CLI.DL, VA.getLocVT(), Arg); 371 break; 372 case CCValAssign::ZExt: 373 Arg = DAG.getNode(ISD::ZERO_EXTEND, CLI.DL, VA.getLocVT(), Arg); 374 break; 375 case CCValAssign::AExt: 376 Arg = DAG.getNode(ISD::ANY_EXTEND, CLI.DL, VA.getLocVT(), Arg); 377 break; 378 } 379 380 // Push arguments into RegsToPass vector 381 if (VA.isRegLoc()) 382 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 383 else 384 llvm_unreachable("call arg pass bug"); 385 } 386 387 SDValue InFlag; 388 389 // Build a sequence of copy-to-reg nodes chained together with token chain and 390 // flag operands which copy the outgoing args into registers. The InFlag in 391 // necessary since all emitted instructions must be stuck together. 392 for (auto &Reg : RegsToPass) { 393 Chain = DAG.getCopyToReg(Chain, CLI.DL, Reg.first, Reg.second, InFlag); 394 InFlag = Chain.getValue(1); 395 } 396 397 // If the callee is a GlobalAddress node (quite common, every direct call is) 398 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 399 // Likewise ExternalSymbol -> TargetExternalSymbol. 400 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 401 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT, 402 G->getOffset(), 0); 403 } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { 404 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0); 405 fail(CLI.DL, DAG, Twine("A call to built-in function '" 406 + StringRef(E->getSymbol()) 407 + "' is not supported.")); 408 } 409 410 // Returns a chain & a flag for retval copy to use. 411 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 412 SmallVector<SDValue, 8> Ops; 413 Ops.push_back(Chain); 414 Ops.push_back(Callee); 415 416 // Add argument registers to the end of the list so that they are 417 // known live into the call. 418 for (auto &Reg : RegsToPass) 419 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 420 421 if (InFlag.getNode()) 422 Ops.push_back(InFlag); 423 424 Chain = DAG.getNode(BPFISD::CALL, CLI.DL, NodeTys, Ops); 425 InFlag = Chain.getValue(1); 426 427 // Create the CALLSEQ_END node. 428 Chain = DAG.getCALLSEQ_END( 429 Chain, DAG.getConstant(NumBytes, CLI.DL, PtrVT, true), 430 DAG.getConstant(0, CLI.DL, PtrVT, true), InFlag, CLI.DL); 431 InFlag = Chain.getValue(1); 432 433 // Handle result values, copying them out of physregs into vregs that we 434 // return. 435 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, CLI.DL, DAG, 436 InVals); 437 } 438 439 SDValue 440 BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 441 bool IsVarArg, 442 const SmallVectorImpl<ISD::OutputArg> &Outs, 443 const SmallVectorImpl<SDValue> &OutVals, 444 const SDLoc &DL, SelectionDAG &DAG) const { 445 unsigned Opc = BPFISD::RET_FLAG; 446 447 // CCValAssign - represent the assignment of the return value to a location 448 SmallVector<CCValAssign, 16> RVLocs; 449 MachineFunction &MF = DAG.getMachineFunction(); 450 451 // CCState - Info about the registers and stack slot. 452 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 453 454 if (MF.getFunction().getReturnType()->isAggregateType()) { 455 fail(DL, DAG, "only integer returns supported"); 456 return DAG.getNode(Opc, DL, MVT::Other, Chain); 457 } 458 459 // Analize return values. 460 CCInfo.AnalyzeReturn(Outs, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64); 461 462 SDValue Flag; 463 SmallVector<SDValue, 4> RetOps(1, Chain); 464 465 // Copy the result values into the output registers. 466 for (unsigned i = 0; i != RVLocs.size(); ++i) { 467 CCValAssign &VA = RVLocs[i]; 468 assert(VA.isRegLoc() && "Can only return in registers!"); 469 470 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Flag); 471 472 // Guarantee that all emitted copies are stuck together, 473 // avoiding something bad. 474 Flag = Chain.getValue(1); 475 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 476 } 477 478 RetOps[0] = Chain; // Update chain. 479 480 // Add the flag if we have it. 481 if (Flag.getNode()) 482 RetOps.push_back(Flag); 483 484 return DAG.getNode(Opc, DL, MVT::Other, RetOps); 485 } 486 487 SDValue BPFTargetLowering::LowerCallResult( 488 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg, 489 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 490 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 491 492 MachineFunction &MF = DAG.getMachineFunction(); 493 // Assign locations to each value returned by this call. 494 SmallVector<CCValAssign, 16> RVLocs; 495 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 496 497 if (Ins.size() >= 2) { 498 fail(DL, DAG, "only small returns supported"); 499 for (unsigned i = 0, e = Ins.size(); i != e; ++i) 500 InVals.push_back(DAG.getConstant(0, DL, Ins[i].VT)); 501 return DAG.getCopyFromReg(Chain, DL, 1, Ins[0].VT, InFlag).getValue(1); 502 } 503 504 CCInfo.AnalyzeCallResult(Ins, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64); 505 506 // Copy all of the result registers out of their specified physreg. 507 for (auto &Val : RVLocs) { 508 Chain = DAG.getCopyFromReg(Chain, DL, Val.getLocReg(), 509 Val.getValVT(), InFlag).getValue(1); 510 InFlag = Chain.getValue(2); 511 InVals.push_back(Chain.getValue(0)); 512 } 513 514 return Chain; 515 } 516 517 static void NegateCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 518 switch (CC) { 519 default: 520 break; 521 case ISD::SETULT: 522 case ISD::SETULE: 523 case ISD::SETLT: 524 case ISD::SETLE: 525 CC = ISD::getSetCCSwappedOperands(CC); 526 std::swap(LHS, RHS); 527 break; 528 } 529 } 530 531 SDValue BPFTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 532 SDValue Chain = Op.getOperand(0); 533 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 534 SDValue LHS = Op.getOperand(2); 535 SDValue RHS = Op.getOperand(3); 536 SDValue Dest = Op.getOperand(4); 537 SDLoc DL(Op); 538 539 if (!getHasJmpExt()) 540 NegateCC(LHS, RHS, CC); 541 542 return DAG.getNode(BPFISD::BR_CC, DL, Op.getValueType(), Chain, LHS, RHS, 543 DAG.getConstant(CC, DL, LHS.getValueType()), Dest); 544 } 545 546 SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 547 SDValue LHS = Op.getOperand(0); 548 SDValue RHS = Op.getOperand(1); 549 SDValue TrueV = Op.getOperand(2); 550 SDValue FalseV = Op.getOperand(3); 551 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 552 SDLoc DL(Op); 553 554 if (!getHasJmpExt()) 555 NegateCC(LHS, RHS, CC); 556 557 SDValue TargetCC = DAG.getConstant(CC, DL, LHS.getValueType()); 558 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 559 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 560 561 return DAG.getNode(BPFISD::SELECT_CC, DL, VTs, Ops); 562 } 563 564 const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const { 565 switch ((BPFISD::NodeType)Opcode) { 566 case BPFISD::FIRST_NUMBER: 567 break; 568 case BPFISD::RET_FLAG: 569 return "BPFISD::RET_FLAG"; 570 case BPFISD::CALL: 571 return "BPFISD::CALL"; 572 case BPFISD::SELECT_CC: 573 return "BPFISD::SELECT_CC"; 574 case BPFISD::BR_CC: 575 return "BPFISD::BR_CC"; 576 case BPFISD::Wrapper: 577 return "BPFISD::Wrapper"; 578 case BPFISD::MEMCPY: 579 return "BPFISD::MEMCPY"; 580 } 581 return nullptr; 582 } 583 584 SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op, 585 SelectionDAG &DAG) const { 586 auto N = cast<GlobalAddressSDNode>(Op); 587 assert(N->getOffset() == 0 && "Invalid offset for global address"); 588 589 SDLoc DL(Op); 590 const GlobalValue *GV = N->getGlobal(); 591 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i64); 592 593 return DAG.getNode(BPFISD::Wrapper, DL, MVT::i64, GA); 594 } 595 596 unsigned 597 BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, 598 unsigned Reg, bool isSigned) const { 599 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 600 const TargetRegisterClass *RC = getRegClassFor(MVT::i64); 601 int RShiftOp = isSigned ? BPF::SRA_ri : BPF::SRL_ri; 602 MachineFunction *F = BB->getParent(); 603 DebugLoc DL = MI.getDebugLoc(); 604 605 MachineRegisterInfo &RegInfo = F->getRegInfo(); 606 607 if (!isSigned) { 608 Register PromotedReg0 = RegInfo.createVirtualRegister(RC); 609 BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg); 610 return PromotedReg0; 611 } 612 Register PromotedReg0 = RegInfo.createVirtualRegister(RC); 613 Register PromotedReg1 = RegInfo.createVirtualRegister(RC); 614 Register PromotedReg2 = RegInfo.createVirtualRegister(RC); 615 BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg); 616 BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1) 617 .addReg(PromotedReg0).addImm(32); 618 BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2) 619 .addReg(PromotedReg1).addImm(32); 620 621 return PromotedReg2; 622 } 623 624 MachineBasicBlock * 625 BPFTargetLowering::EmitInstrWithCustomInserterMemcpy(MachineInstr &MI, 626 MachineBasicBlock *BB) 627 const { 628 MachineFunction *MF = MI.getParent()->getParent(); 629 MachineRegisterInfo &MRI = MF->getRegInfo(); 630 MachineInstrBuilder MIB(*MF, MI); 631 unsigned ScratchReg; 632 633 // This function does custom insertion during lowering BPFISD::MEMCPY which 634 // only has two register operands from memcpy semantics, the copy source 635 // address and the copy destination address. 636 // 637 // Because we will expand BPFISD::MEMCPY into load/store pairs, we will need 638 // a third scratch register to serve as the destination register of load and 639 // source register of store. 640 // 641 // The scratch register here is with the Define | Dead | EarlyClobber flags. 642 // The EarlyClobber flag has the semantic property that the operand it is 643 // attached to is clobbered before the rest of the inputs are read. Hence it 644 // must be unique among the operands to the instruction. The Define flag is 645 // needed to coerce the machine verifier that an Undef value isn't a problem 646 // as we anyway is loading memory into it. The Dead flag is needed as the 647 // value in scratch isn't supposed to be used by any other instruction. 648 ScratchReg = MRI.createVirtualRegister(&BPF::GPRRegClass); 649 MIB.addReg(ScratchReg, 650 RegState::Define | RegState::Dead | RegState::EarlyClobber); 651 652 return BB; 653 } 654 655 MachineBasicBlock * 656 BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 657 MachineBasicBlock *BB) const { 658 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 659 DebugLoc DL = MI.getDebugLoc(); 660 unsigned Opc = MI.getOpcode(); 661 bool isSelectRROp = (Opc == BPF::Select || 662 Opc == BPF::Select_64_32 || 663 Opc == BPF::Select_32 || 664 Opc == BPF::Select_32_64); 665 666 bool isMemcpyOp = Opc == BPF::MEMCPY; 667 668 #ifndef NDEBUG 669 bool isSelectRIOp = (Opc == BPF::Select_Ri || 670 Opc == BPF::Select_Ri_64_32 || 671 Opc == BPF::Select_Ri_32 || 672 Opc == BPF::Select_Ri_32_64); 673 674 675 assert((isSelectRROp || isSelectRIOp || isMemcpyOp) && 676 "Unexpected instr type to insert"); 677 #endif 678 679 if (isMemcpyOp) 680 return EmitInstrWithCustomInserterMemcpy(MI, BB); 681 682 bool is32BitCmp = (Opc == BPF::Select_32 || 683 Opc == BPF::Select_32_64 || 684 Opc == BPF::Select_Ri_32 || 685 Opc == BPF::Select_Ri_32_64); 686 687 // To "insert" a SELECT instruction, we actually have to insert the diamond 688 // control-flow pattern. The incoming instruction knows the destination vreg 689 // to set, the condition code register to branch on, the true/false values to 690 // select between, and a branch opcode to use. 691 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 692 MachineFunction::iterator I = ++BB->getIterator(); 693 694 // ThisMBB: 695 // ... 696 // TrueVal = ... 697 // jmp_XX r1, r2 goto Copy1MBB 698 // fallthrough --> Copy0MBB 699 MachineBasicBlock *ThisMBB = BB; 700 MachineFunction *F = BB->getParent(); 701 MachineBasicBlock *Copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 702 MachineBasicBlock *Copy1MBB = F->CreateMachineBasicBlock(LLVM_BB); 703 704 F->insert(I, Copy0MBB); 705 F->insert(I, Copy1MBB); 706 // Update machine-CFG edges by transferring all successors of the current 707 // block to the new block which will contain the Phi node for the select. 708 Copy1MBB->splice(Copy1MBB->begin(), BB, 709 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 710 Copy1MBB->transferSuccessorsAndUpdatePHIs(BB); 711 // Next, add the true and fallthrough blocks as its successors. 712 BB->addSuccessor(Copy0MBB); 713 BB->addSuccessor(Copy1MBB); 714 715 // Insert Branch if Flag 716 int CC = MI.getOperand(3).getImm(); 717 int NewCC; 718 switch (CC) { 719 #define SET_NEWCC(X, Y) \ 720 case ISD::X: \ 721 if (is32BitCmp && HasJmp32) \ 722 NewCC = isSelectRROp ? BPF::Y##_rr_32 : BPF::Y##_ri_32; \ 723 else \ 724 NewCC = isSelectRROp ? BPF::Y##_rr : BPF::Y##_ri; \ 725 break 726 SET_NEWCC(SETGT, JSGT); 727 SET_NEWCC(SETUGT, JUGT); 728 SET_NEWCC(SETGE, JSGE); 729 SET_NEWCC(SETUGE, JUGE); 730 SET_NEWCC(SETEQ, JEQ); 731 SET_NEWCC(SETNE, JNE); 732 SET_NEWCC(SETLT, JSLT); 733 SET_NEWCC(SETULT, JULT); 734 SET_NEWCC(SETLE, JSLE); 735 SET_NEWCC(SETULE, JULE); 736 default: 737 report_fatal_error("unimplemented select CondCode " + Twine(CC)); 738 } 739 740 Register LHS = MI.getOperand(1).getReg(); 741 bool isSignedCmp = (CC == ISD::SETGT || 742 CC == ISD::SETGE || 743 CC == ISD::SETLT || 744 CC == ISD::SETLE); 745 746 // eBPF at the moment only has 64-bit comparison. Any 32-bit comparison need 747 // to be promoted, however if the 32-bit comparison operands are destination 748 // registers then they are implicitly zero-extended already, there is no 749 // need of explicit zero-extend sequence for them. 750 // 751 // We simply do extension for all situations in this method, but we will 752 // try to remove those unnecessary in BPFMIPeephole pass. 753 if (is32BitCmp && !HasJmp32) 754 LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp); 755 756 if (isSelectRROp) { 757 Register RHS = MI.getOperand(2).getReg(); 758 759 if (is32BitCmp && !HasJmp32) 760 RHS = EmitSubregExt(MI, BB, RHS, isSignedCmp); 761 762 BuildMI(BB, DL, TII.get(NewCC)).addReg(LHS).addReg(RHS).addMBB(Copy1MBB); 763 } else { 764 int64_t imm32 = MI.getOperand(2).getImm(); 765 // sanity check before we build J*_ri instruction. 766 assert (isInt<32>(imm32)); 767 BuildMI(BB, DL, TII.get(NewCC)) 768 .addReg(LHS).addImm(imm32).addMBB(Copy1MBB); 769 } 770 771 // Copy0MBB: 772 // %FalseValue = ... 773 // # fallthrough to Copy1MBB 774 BB = Copy0MBB; 775 776 // Update machine-CFG edges 777 BB->addSuccessor(Copy1MBB); 778 779 // Copy1MBB: 780 // %Result = phi [ %FalseValue, Copy0MBB ], [ %TrueValue, ThisMBB ] 781 // ... 782 BB = Copy1MBB; 783 BuildMI(*BB, BB->begin(), DL, TII.get(BPF::PHI), MI.getOperand(0).getReg()) 784 .addReg(MI.getOperand(5).getReg()) 785 .addMBB(Copy0MBB) 786 .addReg(MI.getOperand(4).getReg()) 787 .addMBB(ThisMBB); 788 789 MI.eraseFromParent(); // The pseudo instruction is gone now. 790 return BB; 791 } 792 793 EVT BPFTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, 794 EVT VT) const { 795 return getHasAlu32() ? MVT::i32 : MVT::i64; 796 } 797 798 MVT BPFTargetLowering::getScalarShiftAmountTy(const DataLayout &DL, 799 EVT VT) const { 800 return (getHasAlu32() && VT == MVT::i32) ? MVT::i32 : MVT::i64; 801 } 802