1 //===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the interfaces that VE uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "VEISelLowering.h" 15 #include "MCTargetDesc/VEMCExpr.h" 16 #include "VEMachineFunctionInfo.h" 17 #include "VERegisterInfo.h" 18 #include "VETargetMachine.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/CodeGen/CallingConvLower.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineModuleInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/SelectionDAG.h" 27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/Module.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/KnownBits.h" 33 using namespace llvm; 34 35 #define DEBUG_TYPE "ve-lower" 36 37 //===----------------------------------------------------------------------===// 38 // Calling Convention Implementation 39 //===----------------------------------------------------------------------===// 40 41 static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT, 42 CCValAssign::LocInfo LocInfo, 43 ISD::ArgFlagsTy ArgFlags, CCState &State) { 44 switch (LocVT.SimpleTy) { 45 case MVT::f32: { 46 // Allocate stack like below 47 // 0 4 48 // +------+------+ 49 // | empty| float| 50 // +------+------+ 51 // Use align=8 for dummy area to align the beginning of these 2 area. 52 State.AllocateStack(4, Align(8)); // for empty area 53 // Use align=4 for value to place it at just after the dummy area. 54 unsigned Offset = State.AllocateStack(4, Align(4)); // for float value area 55 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); 56 return true; 57 } 58 default: 59 return false; 60 } 61 } 62 63 #include "VEGenCallingConv.inc" 64 65 bool VETargetLowering::CanLowerReturn( 66 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 67 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 68 CCAssignFn *RetCC = RetCC_VE; 69 SmallVector<CCValAssign, 16> RVLocs; 70 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 71 return CCInfo.CheckReturn(Outs, RetCC); 72 } 73 74 SDValue 75 VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 76 bool IsVarArg, 77 const SmallVectorImpl<ISD::OutputArg> &Outs, 78 const SmallVectorImpl<SDValue> &OutVals, 79 const SDLoc &DL, SelectionDAG &DAG) const { 80 // CCValAssign - represent the assignment of the return value to locations. 81 SmallVector<CCValAssign, 16> RVLocs; 82 83 // CCState - Info about the registers and stack slot. 84 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 85 *DAG.getContext()); 86 87 // Analyze return values. 88 CCInfo.AnalyzeReturn(Outs, RetCC_VE); 89 90 SDValue Flag; 91 SmallVector<SDValue, 4> RetOps(1, Chain); 92 93 // Copy the result values into the output registers. 94 for (unsigned i = 0; i != RVLocs.size(); ++i) { 95 CCValAssign &VA = RVLocs[i]; 96 assert(VA.isRegLoc() && "Can only return in registers!"); 97 SDValue OutVal = OutVals[i]; 98 99 // Integer return values must be sign or zero extended by the callee. 100 switch (VA.getLocInfo()) { 101 case CCValAssign::Full: 102 break; 103 case CCValAssign::SExt: 104 OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal); 105 break; 106 case CCValAssign::ZExt: 107 OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal); 108 break; 109 case CCValAssign::AExt: 110 OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal); 111 break; 112 default: 113 llvm_unreachable("Unknown loc info!"); 114 } 115 116 assert(!VA.needsCustom() && "Unexpected custom lowering"); 117 118 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag); 119 120 // Guarantee that all emitted copies are stuck together with flags. 121 Flag = Chain.getValue(1); 122 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 123 } 124 125 RetOps[0] = Chain; // Update chain. 126 127 // Add the flag if we have it. 128 if (Flag.getNode()) 129 RetOps.push_back(Flag); 130 131 return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps); 132 } 133 134 SDValue VETargetLowering::LowerFormalArguments( 135 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 136 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 137 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 138 MachineFunction &MF = DAG.getMachineFunction(); 139 140 // Get the base offset of the incoming arguments stack space. 141 unsigned ArgsBaseOffset = 176; 142 // Get the size of the preserved arguments area 143 unsigned ArgsPreserved = 64; 144 145 // Analyze arguments according to CC_VE. 146 SmallVector<CCValAssign, 16> ArgLocs; 147 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, 148 *DAG.getContext()); 149 // Allocate the preserved area first. 150 CCInfo.AllocateStack(ArgsPreserved, Align(8)); 151 // We already allocated the preserved area, so the stack offset computed 152 // by CC_VE would be correct now. 153 CCInfo.AnalyzeFormalArguments(Ins, CC_VE); 154 155 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 156 CCValAssign &VA = ArgLocs[i]; 157 if (VA.isRegLoc()) { 158 // This argument is passed in a register. 159 // All integer register arguments are promoted by the caller to i64. 160 161 // Create a virtual register for the promoted live-in value. 162 unsigned VReg = 163 MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT())); 164 SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT()); 165 166 // Get the high bits for i32 struct elements. 167 if (VA.getValVT() == MVT::i32 && VA.needsCustom()) 168 Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg, 169 DAG.getConstant(32, DL, MVT::i32)); 170 171 // The caller promoted the argument, so insert an Assert?ext SDNode so we 172 // won't promote the value again in this function. 173 switch (VA.getLocInfo()) { 174 case CCValAssign::SExt: 175 Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg, 176 DAG.getValueType(VA.getValVT())); 177 break; 178 case CCValAssign::ZExt: 179 Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg, 180 DAG.getValueType(VA.getValVT())); 181 break; 182 default: 183 break; 184 } 185 186 // Truncate the register down to the argument type. 187 if (VA.isExtInLoc()) 188 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg); 189 190 InVals.push_back(Arg); 191 continue; 192 } 193 194 // The registers are exhausted. This argument was passed on the stack. 195 assert(VA.isMemLoc()); 196 // The CC_VE_Full/Half functions compute stack offsets relative to the 197 // beginning of the arguments area at %fp+176. 198 unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset; 199 unsigned ValSize = VA.getValVT().getSizeInBits() / 8; 200 int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true); 201 InVals.push_back( 202 DAG.getLoad(VA.getValVT(), DL, Chain, 203 DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())), 204 MachinePointerInfo::getFixedStack(MF, FI))); 205 } 206 207 if (!IsVarArg) 208 return Chain; 209 210 // This function takes variable arguments, some of which may have been passed 211 // in registers %s0-%s8. 212 // 213 // The va_start intrinsic needs to know the offset to the first variable 214 // argument. 215 // TODO: need to calculate offset correctly once we support f128. 216 unsigned ArgOffset = ArgLocs.size() * 8; 217 VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>(); 218 // Skip the 176 bytes of register save area. 219 FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset); 220 221 return Chain; 222 } 223 224 // FIXME? Maybe this could be a TableGen attribute on some registers and 225 // this table could be generated automatically from RegInfo. 226 Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT, 227 const MachineFunction &MF) const { 228 Register Reg = StringSwitch<Register>(RegName) 229 .Case("sp", VE::SX11) // Stack pointer 230 .Case("fp", VE::SX9) // Frame pointer 231 .Case("sl", VE::SX8) // Stack limit 232 .Case("lr", VE::SX10) // Link register 233 .Case("tp", VE::SX14) // Thread pointer 234 .Case("outer", VE::SX12) // Outer regiser 235 .Case("info", VE::SX17) // Info area register 236 .Case("got", VE::SX15) // Global offset table register 237 .Case("plt", VE::SX16) // Procedure linkage table register 238 .Default(0); 239 240 if (Reg) 241 return Reg; 242 243 report_fatal_error("Invalid register name global variable"); 244 } 245 246 //===----------------------------------------------------------------------===// 247 // TargetLowering Implementation 248 //===----------------------------------------------------------------------===// 249 250 SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 251 SmallVectorImpl<SDValue> &InVals) const { 252 SelectionDAG &DAG = CLI.DAG; 253 SDLoc DL = CLI.DL; 254 SDValue Chain = CLI.Chain; 255 auto PtrVT = getPointerTy(DAG.getDataLayout()); 256 257 // VE target does not yet support tail call optimization. 258 CLI.IsTailCall = false; 259 260 // Get the base offset of the outgoing arguments stack space. 261 unsigned ArgsBaseOffset = 176; 262 // Get the size of the preserved arguments area 263 unsigned ArgsPreserved = 8 * 8u; 264 265 // Analyze operands of the call, assigning locations to each operand. 266 SmallVector<CCValAssign, 16> ArgLocs; 267 CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs, 268 *DAG.getContext()); 269 // Allocate the preserved area first. 270 CCInfo.AllocateStack(ArgsPreserved, Align(8)); 271 // We already allocated the preserved area, so the stack offset computed 272 // by CC_VE would be correct now. 273 CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE); 274 275 // VE requires to use both register and stack for varargs or no-prototyped 276 // functions. 277 bool UseBoth = CLI.IsVarArg; 278 279 // Analyze operands again if it is required to store BOTH. 280 SmallVector<CCValAssign, 16> ArgLocs2; 281 CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), 282 ArgLocs2, *DAG.getContext()); 283 if (UseBoth) 284 CCInfo2.AnalyzeCallOperands(CLI.Outs, CC_VE2); 285 286 // Get the size of the outgoing arguments stack space requirement. 287 unsigned ArgsSize = CCInfo.getNextStackOffset(); 288 289 // Keep stack frames 16-byte aligned. 290 ArgsSize = alignTo(ArgsSize, 16); 291 292 // Adjust the stack pointer to make room for the arguments. 293 // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls 294 // with more than 6 arguments. 295 Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL); 296 297 // Collect the set of registers to pass to the function and their values. 298 // This will be emitted as a sequence of CopyToReg nodes glued to the call 299 // instruction. 300 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 301 302 // Collect chains from all the memory opeations that copy arguments to the 303 // stack. They must follow the stack pointer adjustment above and precede the 304 // call instruction itself. 305 SmallVector<SDValue, 8> MemOpChains; 306 307 // VE needs to get address of callee function in a register 308 // So, prepare to copy it to SX12 here. 309 310 // If the callee is a GlobalAddress node (quite common, every direct call is) 311 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 312 // Likewise ExternalSymbol -> TargetExternalSymbol. 313 SDValue Callee = CLI.Callee; 314 315 bool IsPICCall = isPositionIndependent(); 316 317 // PC-relative references to external symbols should go through $stub. 318 // If so, we need to prepare GlobalBaseReg first. 319 const TargetMachine &TM = DAG.getTarget(); 320 const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); 321 const GlobalValue *GV = nullptr; 322 auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee); 323 if (CalleeG) 324 GV = CalleeG->getGlobal(); 325 bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); 326 bool UsePlt = !Local; 327 MachineFunction &MF = DAG.getMachineFunction(); 328 329 // Turn GlobalAddress/ExternalSymbol node into a value node 330 // containing the address of them here. 331 if (CalleeG) { 332 if (IsPICCall) { 333 if (UsePlt) 334 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); 335 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); 336 Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee); 337 } else { 338 Callee = 339 makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); 340 } 341 } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { 342 if (IsPICCall) { 343 if (UsePlt) 344 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); 345 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0); 346 Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee); 347 } else { 348 Callee = 349 makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); 350 } 351 } 352 353 RegsToPass.push_back(std::make_pair(VE::SX12, Callee)); 354 355 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 356 CCValAssign &VA = ArgLocs[i]; 357 SDValue Arg = CLI.OutVals[i]; 358 359 // Promote the value if needed. 360 switch (VA.getLocInfo()) { 361 default: 362 llvm_unreachable("Unknown location info!"); 363 case CCValAssign::Full: 364 break; 365 case CCValAssign::SExt: 366 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); 367 break; 368 case CCValAssign::ZExt: 369 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); 370 break; 371 case CCValAssign::AExt: 372 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); 373 break; 374 } 375 376 if (VA.isRegLoc()) { 377 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 378 if (!UseBoth) 379 continue; 380 VA = ArgLocs2[i]; 381 } 382 383 assert(VA.isMemLoc()); 384 385 // Create a store off the stack pointer for this argument. 386 SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT); 387 // The argument area starts at %fp+176 in the callee frame, 388 // %sp+176 in ours. 389 SDValue PtrOff = 390 DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL); 391 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); 392 MemOpChains.push_back( 393 DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo())); 394 } 395 396 // Emit all stores, make sure they occur before the call. 397 if (!MemOpChains.empty()) 398 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 399 400 // Build a sequence of CopyToReg nodes glued together with token chain and 401 // glue operands which copy the outgoing args into registers. The InGlue is 402 // necessary since all emitted instructions must be stuck together in order 403 // to pass the live physical registers. 404 SDValue InGlue; 405 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 406 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first, 407 RegsToPass[i].second, InGlue); 408 InGlue = Chain.getValue(1); 409 } 410 411 // Build the operands for the call instruction itself. 412 SmallVector<SDValue, 8> Ops; 413 Ops.push_back(Chain); 414 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 415 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 416 RegsToPass[i].second.getValueType())); 417 418 // Add a register mask operand representing the call-preserved registers. 419 const VERegisterInfo *TRI = Subtarget->getRegisterInfo(); 420 const uint32_t *Mask = 421 TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv); 422 assert(Mask && "Missing call preserved mask for calling convention"); 423 Ops.push_back(DAG.getRegisterMask(Mask)); 424 425 // Make sure the CopyToReg nodes are glued to the call instruction which 426 // consumes the registers. 427 if (InGlue.getNode()) 428 Ops.push_back(InGlue); 429 430 // Now the call itself. 431 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 432 Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops); 433 InGlue = Chain.getValue(1); 434 435 // Revert the stack pointer immediately after the call. 436 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true), 437 DAG.getIntPtrConstant(0, DL, true), InGlue, DL); 438 InGlue = Chain.getValue(1); 439 440 // Now extract the return values. This is more or less the same as 441 // LowerFormalArguments. 442 443 // Assign locations to each value returned by this call. 444 SmallVector<CCValAssign, 16> RVLocs; 445 CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs, 446 *DAG.getContext()); 447 448 // Set inreg flag manually for codegen generated library calls that 449 // return float. 450 if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB) 451 CLI.Ins[0].Flags.setInReg(); 452 453 RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE); 454 455 // Copy all of the result registers out of their specified physreg. 456 for (unsigned i = 0; i != RVLocs.size(); ++i) { 457 CCValAssign &VA = RVLocs[i]; 458 unsigned Reg = VA.getLocReg(); 459 460 // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can 461 // reside in the same register in the high and low bits. Reuse the 462 // CopyFromReg previous node to avoid duplicate copies. 463 SDValue RV; 464 if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1))) 465 if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg) 466 RV = Chain.getValue(0); 467 468 // But usually we'll create a new CopyFromReg for a different register. 469 if (!RV.getNode()) { 470 RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue); 471 Chain = RV.getValue(1); 472 InGlue = Chain.getValue(2); 473 } 474 475 // Get the high bits for i32 struct elements. 476 if (VA.getValVT() == MVT::i32 && VA.needsCustom()) 477 RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV, 478 DAG.getConstant(32, DL, MVT::i32)); 479 480 // The callee promoted the return value, so insert an Assert?ext SDNode so 481 // we won't promote the value again in this function. 482 switch (VA.getLocInfo()) { 483 case CCValAssign::SExt: 484 RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV, 485 DAG.getValueType(VA.getValVT())); 486 break; 487 case CCValAssign::ZExt: 488 RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV, 489 DAG.getValueType(VA.getValVT())); 490 break; 491 default: 492 break; 493 } 494 495 // Truncate the register down to the return value type. 496 if (VA.isExtInLoc()) 497 RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV); 498 499 InVals.push_back(RV); 500 } 501 502 return Chain; 503 } 504 505 /// isFPImmLegal - Returns true if the target can instruction select the 506 /// specified FP immediate natively. If false, the legalizer will 507 /// materialize the FP immediate as a load from a constant pool. 508 bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 509 bool ForCodeSize) const { 510 return VT == MVT::f32 || VT == MVT::f64; 511 } 512 513 /// Determine if the target supports unaligned memory accesses. 514 /// 515 /// This function returns true if the target allows unaligned memory accesses 516 /// of the specified type in the given address space. If true, it also returns 517 /// whether the unaligned memory access is "fast" in the last argument by 518 /// reference. This is used, for example, in situations where an array 519 /// copy/move/set is converted to a sequence of store operations. Its use 520 /// helps to ensure that such replacements don't generate code that causes an 521 /// alignment error (trap) on the target machine. 522 bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 523 unsigned AddrSpace, 524 unsigned Align, 525 MachineMemOperand::Flags, 526 bool *Fast) const { 527 if (Fast) { 528 // It's fast anytime on VE 529 *Fast = true; 530 } 531 return true; 532 } 533 534 bool VETargetLowering::hasAndNot(SDValue Y) const { 535 EVT VT = Y.getValueType(); 536 537 // VE doesn't have vector and not instruction. 538 if (VT.isVector()) 539 return false; 540 541 // VE allows different immediate values for X and Y where ~X & Y. 542 // Only simm7 works for X, and only mimm works for Y on VE. However, this 543 // function is used to check whether an immediate value is OK for and-not 544 // instruction as both X and Y. Generating additional instruction to 545 // retrieve an immediate value is no good since the purpose of this 546 // function is to convert a series of 3 instructions to another series of 547 // 3 instructions with better parallelism. Therefore, we return false 548 // for all immediate values now. 549 // FIXME: Change hasAndNot function to have two operands to make it work 550 // correctly with Aurora VE. 551 if (isa<ConstantSDNode>(Y)) 552 return false; 553 554 // It's ok for generic registers. 555 return true; 556 } 557 558 VETargetLowering::VETargetLowering(const TargetMachine &TM, 559 const VESubtarget &STI) 560 : TargetLowering(TM), Subtarget(&STI) { 561 // Instructions which use registers as conditionals examine all the 562 // bits (as does the pseudo SELECT_CC expansion). I don't think it 563 // matters much whether it's ZeroOrOneBooleanContent, or 564 // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the 565 // former. 566 setBooleanContents(ZeroOrOneBooleanContent); 567 setBooleanVectorContents(ZeroOrOneBooleanContent); 568 569 // Set up the register classes. 570 addRegisterClass(MVT::i32, &VE::I32RegClass); 571 addRegisterClass(MVT::i64, &VE::I64RegClass); 572 addRegisterClass(MVT::f32, &VE::F32RegClass); 573 addRegisterClass(MVT::f64, &VE::I64RegClass); 574 575 /// Load & Store { 576 for (MVT FPVT : MVT::fp_valuetypes()) { 577 for (MVT OtherFPVT : MVT::fp_valuetypes()) { 578 // Turn FP extload into load/fpextend 579 setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand); 580 581 // Turn FP truncstore into trunc + store. 582 setTruncStoreAction(FPVT, OtherFPVT, Expand); 583 } 584 } 585 586 // VE doesn't have i1 sign extending load 587 for (MVT VT : MVT::integer_valuetypes()) { 588 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 589 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 590 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); 591 setTruncStoreAction(VT, MVT::i1, Expand); 592 } 593 /// } Load & Store 594 595 // Custom legalize address nodes into LO/HI parts. 596 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); 597 setOperationAction(ISD::BlockAddress, PtrVT, Custom); 598 setOperationAction(ISD::GlobalAddress, PtrVT, Custom); 599 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); 600 601 /// VAARG handling { 602 setOperationAction(ISD::VASTART, MVT::Other, Custom); 603 // VAARG needs to be lowered to access with 8 bytes alignment. 604 setOperationAction(ISD::VAARG, MVT::Other, Custom); 605 // Use the default implementation. 606 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 607 setOperationAction(ISD::VAEND, MVT::Other, Expand); 608 /// } VAARG handling 609 610 /// Stack { 611 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 612 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); 613 /// } Stack 614 615 /// Int Ops { 616 for (MVT IntVT : {MVT::i32, MVT::i64}) { 617 // VE has no REM or DIVREM operations. 618 setOperationAction(ISD::UREM, IntVT, Expand); 619 setOperationAction(ISD::SREM, IntVT, Expand); 620 setOperationAction(ISD::SDIVREM, IntVT, Expand); 621 setOperationAction(ISD::UDIVREM, IntVT, Expand); 622 623 setOperationAction(ISD::CTTZ, IntVT, Expand); 624 setOperationAction(ISD::ROTL, IntVT, Expand); 625 setOperationAction(ISD::ROTR, IntVT, Expand); 626 627 // Use isel patterns for i32 and i64 628 setOperationAction(ISD::BSWAP, IntVT, Legal); 629 setOperationAction(ISD::CTLZ, IntVT, Legal); 630 setOperationAction(ISD::CTPOP, IntVT, Legal); 631 632 // Use isel patterns for i64, Promote i32 633 LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal; 634 setOperationAction(ISD::BITREVERSE, IntVT, Act); 635 } 636 /// } Int Ops 637 638 /// Conversion { 639 // VE doesn't have instructions for fp<->uint, so expand them by llvm 640 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64 641 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64 642 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 643 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 644 645 // fp16 not supported 646 for (MVT FPVT : MVT::fp_valuetypes()) { 647 setOperationAction(ISD::FP16_TO_FP, FPVT, Expand); 648 setOperationAction(ISD::FP_TO_FP16, FPVT, Expand); 649 } 650 /// } Conversion 651 652 setStackPointerRegisterToSaveRestore(VE::SX11); 653 654 // Set function alignment to 16 bytes 655 setMinFunctionAlignment(Align(16)); 656 657 // VE stores all argument by 8 bytes alignment 658 setMinStackArgumentAlignment(Align(8)); 659 660 computeRegisterProperties(Subtarget->getRegisterInfo()); 661 } 662 663 const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const { 664 #define TARGET_NODE_CASE(NAME) \ 665 case VEISD::NAME: \ 666 return "VEISD::" #NAME; 667 switch ((VEISD::NodeType)Opcode) { 668 case VEISD::FIRST_NUMBER: 669 break; 670 TARGET_NODE_CASE(Lo) 671 TARGET_NODE_CASE(Hi) 672 TARGET_NODE_CASE(GETFUNPLT) 673 TARGET_NODE_CASE(GETSTACKTOP) 674 TARGET_NODE_CASE(GETTLSADDR) 675 TARGET_NODE_CASE(CALL) 676 TARGET_NODE_CASE(RET_FLAG) 677 TARGET_NODE_CASE(GLOBAL_BASE_REG) 678 } 679 #undef TARGET_NODE_CASE 680 return nullptr; 681 } 682 683 EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, 684 EVT VT) const { 685 return MVT::i32; 686 } 687 688 // Convert to a target node and set target flags. 689 SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF, 690 SelectionDAG &DAG) const { 691 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) 692 return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), 693 GA->getValueType(0), GA->getOffset(), TF); 694 695 if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) 696 return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(), 697 0, TF); 698 699 if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) 700 return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0), 701 TF); 702 703 llvm_unreachable("Unhandled address SDNode"); 704 } 705 706 // Split Op into high and low parts according to HiTF and LoTF. 707 // Return an ADD node combining the parts. 708 SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF, 709 SelectionDAG &DAG) const { 710 SDLoc DL(Op); 711 EVT VT = Op.getValueType(); 712 SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG)); 713 SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG)); 714 return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo); 715 } 716 717 // Build SDNodes for producing an address from a GlobalAddress, ConstantPool, 718 // or ExternalSymbol SDNode. 719 SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const { 720 SDLoc DL(Op); 721 EVT PtrVT = Op.getValueType(); 722 723 // Handle PIC mode first. VE needs a got load for every variable! 724 if (isPositionIndependent()) { 725 // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this 726 // function has calls. 727 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 728 MFI.setHasCalls(true); 729 auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op); 730 731 if (isa<ConstantPoolSDNode>(Op) || 732 (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) { 733 // Create following instructions for local linkage PIC code. 734 // lea %s35, %gotoff_lo(.LCPI0_0) 735 // and %s35, %s35, (32)0 736 // lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35) 737 // adds.l %s35, %s15, %s35 ; %s15 is GOT 738 // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15) 739 SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32, 740 VEMCExpr::VK_VE_GOTOFF_LO32, DAG); 741 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT); 742 return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo); 743 } 744 // Create following instructions for not local linkage PIC code. 745 // lea %s35, %got_lo(.LCPI0_0) 746 // and %s35, %s35, (32)0 747 // lea.sl %s35, %got_hi(.LCPI0_0)(%s35) 748 // adds.l %s35, %s15, %s35 ; %s15 is GOT 749 // ld %s35, (,%s35) 750 // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15) 751 SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32, 752 VEMCExpr::VK_VE_GOT_LO32, DAG); 753 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT); 754 SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo); 755 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr, 756 MachinePointerInfo::getGOT(DAG.getMachineFunction())); 757 } 758 759 // This is one of the absolute code models. 760 switch (getTargetMachine().getCodeModel()) { 761 default: 762 llvm_unreachable("Unsupported absolute code model"); 763 case CodeModel::Small: 764 case CodeModel::Medium: 765 case CodeModel::Large: 766 // abs64. 767 return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); 768 } 769 } 770 771 /// Custom Lower { 772 773 SDValue VETargetLowering::LowerGlobalAddress(SDValue Op, 774 SelectionDAG &DAG) const { 775 return makeAddress(Op, DAG); 776 } 777 778 SDValue VETargetLowering::LowerBlockAddress(SDValue Op, 779 SelectionDAG &DAG) const { 780 return makeAddress(Op, DAG); 781 } 782 783 SDValue 784 VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op, 785 SelectionDAG &DAG) const { 786 SDLoc dl(Op); 787 788 // Generate the following code: 789 // t1: ch,glue = callseq_start t0, 0, 0 790 // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1 791 // t3: ch,glue = callseq_end t2, 0, 0, t2:2 792 // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1 793 SDValue Label = withTargetFlags(Op, 0, DAG); 794 EVT PtrVT = Op.getValueType(); 795 796 // Lowering the machine isd will make sure everything is in the right 797 // location. 798 SDValue Chain = DAG.getEntryNode(); 799 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 800 const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask( 801 DAG.getMachineFunction(), CallingConv::C); 802 Chain = DAG.getCALLSEQ_START(Chain, 64, 0, dl); 803 SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)}; 804 Chain = DAG.getNode(VEISD::GETTLSADDR, dl, NodeTys, Args); 805 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, dl, true), 806 DAG.getIntPtrConstant(0, dl, true), 807 Chain.getValue(1), dl); 808 Chain = DAG.getCopyFromReg(Chain, dl, VE::SX0, PtrVT, Chain.getValue(1)); 809 810 // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls. 811 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 812 MFI.setHasCalls(true); 813 814 // Also generate code to prepare a GOT register if it is PIC. 815 if (isPositionIndependent()) { 816 MachineFunction &MF = DAG.getMachineFunction(); 817 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); 818 } 819 820 return Chain; 821 } 822 823 SDValue VETargetLowering::LowerGlobalTLSAddress(SDValue Op, 824 SelectionDAG &DAG) const { 825 // The current implementation of nld (2.26) doesn't allow local exec model 826 // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always 827 // generate the general dynamic model code sequence. 828 // 829 // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf 830 return LowerToTLSGeneralDynamicModel(Op, DAG); 831 } 832 833 SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { 834 MachineFunction &MF = DAG.getMachineFunction(); 835 VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>(); 836 auto PtrVT = getPointerTy(DAG.getDataLayout()); 837 838 // Need frame address to find the address of VarArgsFrameIndex. 839 MF.getFrameInfo().setFrameAddressIsTaken(true); 840 841 // vastart just stores the address of the VarArgsFrameIndex slot into the 842 // memory location argument. 843 SDLoc DL(Op); 844 SDValue Offset = 845 DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT), 846 DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL)); 847 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 848 return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1), 849 MachinePointerInfo(SV)); 850 } 851 852 SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { 853 SDNode *Node = Op.getNode(); 854 EVT VT = Node->getValueType(0); 855 SDValue InChain = Node->getOperand(0); 856 SDValue VAListPtr = Node->getOperand(1); 857 EVT PtrVT = VAListPtr.getValueType(); 858 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 859 SDLoc DL(Node); 860 SDValue VAList = 861 DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV)); 862 SDValue Chain = VAList.getValue(1); 863 SDValue NextPtr; 864 865 if (VT == MVT::f32) { 866 // float --> need special handling like below. 867 // 0 4 868 // +------+------+ 869 // | empty| float| 870 // +------+------+ 871 // Increment the pointer, VAList, by 8 to the next vaarg. 872 NextPtr = 873 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL)); 874 // Then, adjust VAList. 875 unsigned InternalOffset = 4; 876 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, 877 DAG.getConstant(InternalOffset, DL, PtrVT)); 878 } else { 879 // Increment the pointer, VAList, by 8 to the next vaarg. 880 NextPtr = 881 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL)); 882 } 883 884 // Store the incremented VAList to the legalized pointer. 885 InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV)); 886 887 // Load the actual argument out of the pointer VAList. 888 // We can't count on greater alignment than the word size. 889 return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(), 890 std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8); 891 } 892 893 SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op, 894 SelectionDAG &DAG) const { 895 // Generate following code. 896 // (void)__llvm_grow_stack(size); 897 // ret = GETSTACKTOP; // pseudo instruction 898 SDLoc DL(Op); 899 900 // Get the inputs. 901 SDNode *Node = Op.getNode(); 902 SDValue Chain = Op.getOperand(0); 903 SDValue Size = Op.getOperand(1); 904 MaybeAlign Alignment(Op.getConstantOperandVal(2)); 905 EVT VT = Node->getValueType(0); 906 907 // Chain the dynamic stack allocation so that it doesn't modify the stack 908 // pointer when other instructions are using the stack. 909 Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL); 910 911 const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); 912 Align StackAlign = TFI.getStackAlign(); 913 bool NeedsAlign = Alignment.valueOrOne() > StackAlign; 914 915 // Prepare arguments 916 TargetLowering::ArgListTy Args; 917 TargetLowering::ArgListEntry Entry; 918 Entry.Node = Size; 919 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); 920 Args.push_back(Entry); 921 if (NeedsAlign) { 922 Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT); 923 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); 924 Args.push_back(Entry); 925 } 926 Type *RetTy = Type::getVoidTy(*DAG.getContext()); 927 928 EVT PtrVT = Op.getValueType(); 929 SDValue Callee; 930 if (NeedsAlign) { 931 Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0); 932 } else { 933 Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0); 934 } 935 936 TargetLowering::CallLoweringInfo CLI(DAG); 937 CLI.setDebugLoc(DL) 938 .setChain(Chain) 939 .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args)) 940 .setDiscardResult(true); 941 std::pair<SDValue, SDValue> pair = LowerCallTo(CLI); 942 Chain = pair.second; 943 SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain); 944 if (NeedsAlign) { 945 Result = DAG.getNode(ISD::ADD, DL, VT, Result, 946 DAG.getConstant((Alignment->value() - 1ULL), DL, VT)); 947 Result = DAG.getNode(ISD::AND, DL, VT, Result, 948 DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT)); 949 } 950 // Chain = Result.getValue(1); 951 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true), 952 DAG.getIntPtrConstant(0, DL, true), SDValue(), DL); 953 954 SDValue Ops[2] = {Result, Chain}; 955 return DAG.getMergeValues(Ops, DL); 956 } 957 958 SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 959 switch (Op.getOpcode()) { 960 default: 961 llvm_unreachable("Should not custom lower this!"); 962 case ISD::BlockAddress: 963 return LowerBlockAddress(Op, DAG); 964 case ISD::DYNAMIC_STACKALLOC: 965 return lowerDYNAMIC_STACKALLOC(Op, DAG); 966 case ISD::GlobalAddress: 967 return LowerGlobalAddress(Op, DAG); 968 case ISD::GlobalTLSAddress: 969 return LowerGlobalTLSAddress(Op, DAG); 970 case ISD::VASTART: 971 return LowerVASTART(Op, DAG); 972 case ISD::VAARG: 973 return LowerVAARG(Op, DAG); 974 } 975 } 976 /// } Custom Lower 977