//===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the interfaces that VE uses to lower LLVM code into a // selection DAG. // //===----------------------------------------------------------------------===// #include "VEISelLowering.h" #include "MCTargetDesc/VEMCExpr.h" #include "VEMachineFunctionInfo.h" #include "VERegisterInfo.h" #include "VETargetMachine.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" using namespace llvm; #define DEBUG_TYPE "ve-lower" //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { switch (LocVT.SimpleTy) { case MVT::f32: { // Allocate stack like below // 0 4 // +------+------+ // | empty| float| // +------+------+ // Use align=8 for dummy area to align the beginning of these 2 area. State.AllocateStack(4, Align(8)); // for empty area // Use align=4 for value to place it at just after the dummy area. unsigned Offset = State.AllocateStack(4, Align(4)); // for float value area State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return true; } default: return false; } } #include "VEGenCallingConv.inc" bool VETargetLowering::CanLowerReturn( CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { CCAssignFn *RetCC = RetCC_VE; SmallVector RVLocs; CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn(Outs, RetCC); } SDValue VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to locations. SmallVector RVLocs; // CCState - Info about the registers and stack slot. CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); // Analyze return values. CCInfo.AnalyzeReturn(Outs, RetCC_VE); SDValue Flag; SmallVector RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue OutVal = OutVals[i]; // Integer return values must be sign or zero extended by the callee. switch (VA.getLocInfo()) { case CCValAssign::Full: break; case CCValAssign::SExt: OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal); break; case CCValAssign::ZExt: OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal); break; case CCValAssign::AExt: OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal); break; default: llvm_unreachable("Unknown loc info!"); } assert(!VA.needsCustom() && "Unexpected custom lowering"); Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag); // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } RetOps[0] = Chain; // Update chain. // Add the flag if we have it. if (Flag.getNode()) RetOps.push_back(Flag); return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps); } SDValue VETargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); // Get the base offset of the incoming arguments stack space. unsigned ArgsBaseOffset = 176; // Get the size of the preserved arguments area unsigned ArgsPreserved = 64; // Analyze arguments according to CC_VE. SmallVector ArgLocs; CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); // Allocate the preserved area first. CCInfo.AllocateStack(ArgsPreserved, Align(8)); // We already allocated the preserved area, so the stack offset computed // by CC_VE would be correct now. CCInfo.AnalyzeFormalArguments(Ins, CC_VE); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (VA.isRegLoc()) { // This argument is passed in a register. // All integer register arguments are promoted by the caller to i64. // Create a virtual register for the promoted live-in value. unsigned VReg = MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT())); SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT()); // Get the high bits for i32 struct elements. if (VA.getValVT() == MVT::i32 && VA.needsCustom()) Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg, DAG.getConstant(32, DL, MVT::i32)); // The caller promoted the argument, so insert an Assert?ext SDNode so we // won't promote the value again in this function. switch (VA.getLocInfo()) { case CCValAssign::SExt: Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg, DAG.getValueType(VA.getValVT())); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg, DAG.getValueType(VA.getValVT())); break; default: break; } // Truncate the register down to the argument type. if (VA.isExtInLoc()) Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg); InVals.push_back(Arg); continue; } // The registers are exhausted. This argument was passed on the stack. assert(VA.isMemLoc()); // The CC_VE_Full/Half functions compute stack offsets relative to the // beginning of the arguments area at %fp+176. unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset; unsigned ValSize = VA.getValVT().getSizeInBits() / 8; int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true); InVals.push_back( DAG.getLoad(VA.getValVT(), DL, Chain, DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())), MachinePointerInfo::getFixedStack(MF, FI))); } if (!IsVarArg) return Chain; // This function takes variable arguments, some of which may have been passed // in registers %s0-%s8. // // The va_start intrinsic needs to know the offset to the first variable // argument. // TODO: need to calculate offset correctly once we support f128. unsigned ArgOffset = ArgLocs.size() * 8; VEMachineFunctionInfo *FuncInfo = MF.getInfo(); // Skip the 176 bytes of register save area. FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset); return Chain; } // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const { Register Reg = StringSwitch(RegName) .Case("sp", VE::SX11) // Stack pointer .Case("fp", VE::SX9) // Frame pointer .Case("sl", VE::SX8) // Stack limit .Case("lr", VE::SX10) // Link register .Case("tp", VE::SX14) // Thread pointer .Case("outer", VE::SX12) // Outer regiser .Case("info", VE::SX17) // Info area register .Case("got", VE::SX15) // Global offset table register .Case("plt", VE::SX16) // Procedure linkage table register .Default(0); if (Reg) return Reg; report_fatal_error("Invalid register name global variable"); } //===----------------------------------------------------------------------===// // TargetLowering Implementation //===----------------------------------------------------------------------===// SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc DL = CLI.DL; SDValue Chain = CLI.Chain; auto PtrVT = getPointerTy(DAG.getDataLayout()); // VE target does not yet support tail call optimization. CLI.IsTailCall = false; // Get the base offset of the outgoing arguments stack space. unsigned ArgsBaseOffset = 176; // Get the size of the preserved arguments area unsigned ArgsPreserved = 8 * 8u; // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); // Allocate the preserved area first. CCInfo.AllocateStack(ArgsPreserved, Align(8)); // We already allocated the preserved area, so the stack offset computed // by CC_VE would be correct now. CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE); // VE requires to use both register and stack for varargs or no-prototyped // functions. bool UseBoth = CLI.IsVarArg; // Analyze operands again if it is required to store BOTH. SmallVector ArgLocs2; CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs2, *DAG.getContext()); if (UseBoth) CCInfo2.AnalyzeCallOperands(CLI.Outs, CC_VE2); // Get the size of the outgoing arguments stack space requirement. unsigned ArgsSize = CCInfo.getNextStackOffset(); // Keep stack frames 16-byte aligned. ArgsSize = alignTo(ArgsSize, 16); // Adjust the stack pointer to make room for the arguments. // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls // with more than 6 arguments. Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL); // Collect the set of registers to pass to the function and their values. // This will be emitted as a sequence of CopyToReg nodes glued to the call // instruction. SmallVector, 8> RegsToPass; // Collect chains from all the memory opeations that copy arguments to the // stack. They must follow the stack pointer adjustment above and precede the // call instruction itself. SmallVector MemOpChains; // VE needs to get address of callee function in a register // So, prepare to copy it to SX12 here. // If the callee is a GlobalAddress node (quite common, every direct call is) // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. SDValue Callee = CLI.Callee; bool IsPICCall = isPositionIndependent(); // PC-relative references to external symbols should go through $stub. // If so, we need to prepare GlobalBaseReg first. const TargetMachine &TM = DAG.getTarget(); const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); const GlobalValue *GV = nullptr; auto *CalleeG = dyn_cast(Callee); if (CalleeG) GV = CalleeG->getGlobal(); bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); bool UsePlt = !Local; MachineFunction &MF = DAG.getMachineFunction(); // Turn GlobalAddress/ExternalSymbol node into a value node // containing the address of them here. if (CalleeG) { if (IsPICCall) { if (UsePlt) Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee); } else { Callee = makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); } } else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) { if (IsPICCall) { if (UsePlt) Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0); Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee); } else { Callee = makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); } } RegsToPass.push_back(std::make_pair(VE::SX12, Callee)); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = CLI.OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown location info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); break; } if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); if (!UseBoth) continue; VA = ArgLocs2[i]; } assert(VA.isMemLoc()); // Create a store off the stack pointer for this argument. SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT); // The argument area starts at %fp+176 in the callee frame, // %sp+176 in ours. SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL); PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); MemOpChains.push_back( DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo())); } // Emit all stores, make sure they occur before the call. if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); // Build a sequence of CopyToReg nodes glued together with token chain and // glue operands which copy the outgoing args into registers. The InGlue is // necessary since all emitted instructions must be stuck together in order // to pass the live physical registers. SDValue InGlue; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first, RegsToPass[i].second, InGlue); InGlue = Chain.getValue(1); } // Build the operands for the call instruction itself. SmallVector Ops; Ops.push_back(Chain); for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. const VERegisterInfo *TRI = Subtarget->getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); // Make sure the CopyToReg nodes are glued to the call instruction which // consumes the registers. if (InGlue.getNode()) Ops.push_back(InGlue); // Now the call itself. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops); InGlue = Chain.getValue(1); // Revert the stack pointer immediately after the call. Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true), DAG.getIntPtrConstant(0, DL, true), InGlue, DL); InGlue = Chain.getValue(1); // Now extract the return values. This is more or less the same as // LowerFormalArguments. // Assign locations to each value returned by this call. SmallVector RVLocs; CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); // Set inreg flag manually for codegen generated library calls that // return float. if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB) CLI.Ins[0].Flags.setInReg(); RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; unsigned Reg = VA.getLocReg(); // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can // reside in the same register in the high and low bits. Reuse the // CopyFromReg previous node to avoid duplicate copies. SDValue RV; if (RegisterSDNode *SrcReg = dyn_cast(Chain.getOperand(1))) if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg) RV = Chain.getValue(0); // But usually we'll create a new CopyFromReg for a different register. if (!RV.getNode()) { RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue); Chain = RV.getValue(1); InGlue = Chain.getValue(2); } // Get the high bits for i32 struct elements. if (VA.getValVT() == MVT::i32 && VA.needsCustom()) RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV, DAG.getConstant(32, DL, MVT::i32)); // The callee promoted the return value, so insert an Assert?ext SDNode so // we won't promote the value again in this function. switch (VA.getLocInfo()) { case CCValAssign::SExt: RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV, DAG.getValueType(VA.getValVT())); break; case CCValAssign::ZExt: RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV, DAG.getValueType(VA.getValVT())); break; default: break; } // Truncate the register down to the return value type. if (VA.isExtInLoc()) RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV); InVals.push_back(RV); } return Chain; } /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { return VT == MVT::f32 || VT == MVT::f64; } /// Determine if the target supports unaligned memory accesses. /// /// This function returns true if the target allows unaligned memory accesses /// of the specified type in the given address space. If true, it also returns /// whether the unaligned memory access is "fast" in the last argument by /// reference. This is used, for example, in situations where an array /// copy/move/set is converted to a sequence of store operations. Its use /// helps to ensure that such replacements don't generate code that causes an /// alignment error (trap) on the target machine. bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags, bool *Fast) const { if (Fast) { // It's fast anytime on VE *Fast = true; } return true; } bool VETargetLowering::hasAndNot(SDValue Y) const { EVT VT = Y.getValueType(); // VE doesn't have vector and not instruction. if (VT.isVector()) return false; // VE allows different immediate values for X and Y where ~X & Y. // Only simm7 works for X, and only mimm works for Y on VE. However, this // function is used to check whether an immediate value is OK for and-not // instruction as both X and Y. Generating additional instruction to // retrieve an immediate value is no good since the purpose of this // function is to convert a series of 3 instructions to another series of // 3 instructions with better parallelism. Therefore, we return false // for all immediate values now. // FIXME: Change hasAndNot function to have two operands to make it work // correctly with Aurora VE. if (isa(Y)) return false; // It's ok for generic registers. return true; } VETargetLowering::VETargetLowering(const TargetMachine &TM, const VESubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { // Instructions which use registers as conditionals examine all the // bits (as does the pseudo SELECT_CC expansion). I don't think it // matters much whether it's ZeroOrOneBooleanContent, or // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the // former. setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); // Set up the register classes. addRegisterClass(MVT::i32, &VE::I32RegClass); addRegisterClass(MVT::i64, &VE::I64RegClass); addRegisterClass(MVT::f32, &VE::F32RegClass); addRegisterClass(MVT::f64, &VE::I64RegClass); /// Load & Store { for (MVT FPVT : MVT::fp_valuetypes()) { for (MVT OtherFPVT : MVT::fp_valuetypes()) { // Turn FP extload into load/fpextend setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand); // Turn FP truncstore into trunc + store. setTruncStoreAction(FPVT, OtherFPVT, Expand); } } // VE doesn't have i1 sign extending load for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); setTruncStoreAction(VT, MVT::i1, Expand); } /// } Load & Store // Custom legalize address nodes into LO/HI parts. MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); setOperationAction(ISD::BlockAddress, PtrVT, Custom); setOperationAction(ISD::GlobalAddress, PtrVT, Custom); setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); /// VAARG handling { setOperationAction(ISD::VASTART, MVT::Other, Custom); // VAARG needs to be lowered to access with 8 bytes alignment. setOperationAction(ISD::VAARG, MVT::Other, Custom); // Use the default implementation. setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAEND, MVT::Other, Expand); /// } VAARG handling /// Stack { setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); /// } Stack /// Int Ops { for (MVT IntVT : {MVT::i32, MVT::i64}) { // VE has no REM or DIVREM operations. setOperationAction(ISD::UREM, IntVT, Expand); setOperationAction(ISD::SREM, IntVT, Expand); setOperationAction(ISD::SDIVREM, IntVT, Expand); setOperationAction(ISD::UDIVREM, IntVT, Expand); setOperationAction(ISD::CTTZ, IntVT, Expand); setOperationAction(ISD::ROTL, IntVT, Expand); setOperationAction(ISD::ROTR, IntVT, Expand); // Use isel patterns for i32 and i64 setOperationAction(ISD::BSWAP, IntVT, Legal); setOperationAction(ISD::CTLZ, IntVT, Legal); setOperationAction(ISD::CTPOP, IntVT, Legal); // Use isel patterns for i64, Promote i32 LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal; setOperationAction(ISD::BITREVERSE, IntVT, Act); } /// } Int Ops /// Conversion { // VE doesn't have instructions for fp<->uint, so expand them by llvm setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); // fp16 not supported for (MVT FPVT : MVT::fp_valuetypes()) { setOperationAction(ISD::FP16_TO_FP, FPVT, Expand); setOperationAction(ISD::FP_TO_FP16, FPVT, Expand); } /// } Conversion setStackPointerRegisterToSaveRestore(VE::SX11); // Set function alignment to 16 bytes setMinFunctionAlignment(Align(16)); // VE stores all argument by 8 bytes alignment setMinStackArgumentAlignment(Align(8)); computeRegisterProperties(Subtarget->getRegisterInfo()); } const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const { #define TARGET_NODE_CASE(NAME) \ case VEISD::NAME: \ return "VEISD::" #NAME; switch ((VEISD::NodeType)Opcode) { case VEISD::FIRST_NUMBER: break; TARGET_NODE_CASE(Lo) TARGET_NODE_CASE(Hi) TARGET_NODE_CASE(GETFUNPLT) TARGET_NODE_CASE(GETSTACKTOP) TARGET_NODE_CASE(GETTLSADDR) TARGET_NODE_CASE(CALL) TARGET_NODE_CASE(RET_FLAG) TARGET_NODE_CASE(GLOBAL_BASE_REG) } #undef TARGET_NODE_CASE return nullptr; } EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, EVT VT) const { return MVT::i32; } // Convert to a target node and set target flags. SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const { if (const GlobalAddressSDNode *GA = dyn_cast(Op)) return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), GA->getValueType(0), GA->getOffset(), TF); if (const BlockAddressSDNode *BA = dyn_cast(Op)) return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(), 0, TF); if (const ExternalSymbolSDNode *ES = dyn_cast(Op)) return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0), TF); llvm_unreachable("Unhandled address SDNode"); } // Split Op into high and low parts according to HiTF and LoTF. // Return an ADD node combining the parts. SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF, SelectionDAG &DAG) const { SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG)); SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG)); return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo); } // Build SDNodes for producing an address from a GlobalAddress, ConstantPool, // or ExternalSymbol SDNode. SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); EVT PtrVT = Op.getValueType(); // Handle PIC mode first. VE needs a got load for every variable! if (isPositionIndependent()) { // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this // function has calls. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); MFI.setHasCalls(true); auto GlobalN = dyn_cast(Op); if (isa(Op) || (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) { // Create following instructions for local linkage PIC code. // lea %s35, %gotoff_lo(.LCPI0_0) // and %s35, %s35, (32)0 // lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35) // adds.l %s35, %s15, %s35 ; %s15 is GOT // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15) SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32, VEMCExpr::VK_VE_GOTOFF_LO32, DAG); SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT); return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo); } // Create following instructions for not local linkage PIC code. // lea %s35, %got_lo(.LCPI0_0) // and %s35, %s35, (32)0 // lea.sl %s35, %got_hi(.LCPI0_0)(%s35) // adds.l %s35, %s15, %s35 ; %s15 is GOT // ld %s35, (,%s35) // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15) SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32, VEMCExpr::VK_VE_GOT_LO32, DAG); SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT); SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo); return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr, MachinePointerInfo::getGOT(DAG.getMachineFunction())); } // This is one of the absolute code models. switch (getTargetMachine().getCodeModel()) { default: llvm_unreachable("Unsupported absolute code model"); case CodeModel::Small: case CodeModel::Medium: case CodeModel::Large: // abs64. return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); } } /// Custom Lower { SDValue VETargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { return makeAddress(Op, DAG); } SDValue VETargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { return makeAddress(Op, DAG); } SDValue VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); // Generate the following code: // t1: ch,glue = callseq_start t0, 0, 0 // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1 // t3: ch,glue = callseq_end t2, 0, 0, t2:2 // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1 SDValue Label = withTargetFlags(Op, 0, DAG); EVT PtrVT = Op.getValueType(); // Lowering the machine isd will make sure everything is in the right // location. SDValue Chain = DAG.getEntryNode(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask( DAG.getMachineFunction(), CallingConv::C); Chain = DAG.getCALLSEQ_START(Chain, 64, 0, dl); SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)}; Chain = DAG.getNode(VEISD::GETTLSADDR, dl, NodeTys, Args); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, dl, true), DAG.getIntPtrConstant(0, dl, true), Chain.getValue(1), dl); Chain = DAG.getCopyFromReg(Chain, dl, VE::SX0, PtrVT, Chain.getValue(1)); // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); MFI.setHasCalls(true); // Also generate code to prepare a GOT register if it is PIC. if (isPositionIndependent()) { MachineFunction &MF = DAG.getMachineFunction(); Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); } return Chain; } SDValue VETargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // The current implementation of nld (2.26) doesn't allow local exec model // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always // generate the general dynamic model code sequence. // // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf return LowerToTLSGeneralDynamicModel(Op, DAG); } SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); VEMachineFunctionInfo *FuncInfo = MF.getInfo(); auto PtrVT = getPointerTy(DAG.getDataLayout()); // Need frame address to find the address of VarArgsFrameIndex. MF.getFrameInfo().setFrameAddressIsTaken(true); // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDLoc DL(Op); SDValue Offset = DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT), DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL)); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1), MachinePointerInfo(SV)); } SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); EVT VT = Node->getValueType(0); SDValue InChain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); EVT PtrVT = VAListPtr.getValueType(); const Value *SV = cast(Node->getOperand(2))->getValue(); SDLoc DL(Node); SDValue VAList = DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV)); SDValue Chain = VAList.getValue(1); SDValue NextPtr; if (VT == MVT::f32) { // float --> need special handling like below. // 0 4 // +------+------+ // | empty| float| // +------+------+ // Increment the pointer, VAList, by 8 to the next vaarg. NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL)); // Then, adjust VAList. unsigned InternalOffset = 4; VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(InternalOffset, DL, PtrVT)); } else { // Increment the pointer, VAList, by 8 to the next vaarg. NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL)); } // Store the incremented VAList to the legalized pointer. InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV)); // Load the actual argument out of the pointer VAList. // We can't count on greater alignment than the word size. return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(), std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8); } SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { // Generate following code. // (void)__llvm_grow_stack(size); // ret = GETSTACKTOP; // pseudo instruction SDLoc DL(Op); // Get the inputs. SDNode *Node = Op.getNode(); SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); MaybeAlign Alignment(Op.getConstantOperandVal(2)); EVT VT = Node->getValueType(0); // Chain the dynamic stack allocation so that it doesn't modify the stack // pointer when other instructions are using the stack. Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL); const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); Align StackAlign = TFI.getStackAlign(); bool NeedsAlign = Alignment.valueOrOne() > StackAlign; // Prepare arguments TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Size; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); Args.push_back(Entry); if (NeedsAlign) { Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT); Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); Args.push_back(Entry); } Type *RetTy = Type::getVoidTy(*DAG.getContext()); EVT PtrVT = Op.getValueType(); SDValue Callee; if (NeedsAlign) { Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0); } else { Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0); } TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(DL) .setChain(Chain) .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args)) .setDiscardResult(true); std::pair pair = LowerCallTo(CLI); Chain = pair.second; SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain); if (NeedsAlign) { Result = DAG.getNode(ISD::ADD, DL, VT, Result, DAG.getConstant((Alignment->value() - 1ULL), DL, VT)); Result = DAG.getNode(ISD::AND, DL, VT, Result, DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT)); } // Chain = Result.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true), DAG.getIntPtrConstant(0, DL, true), SDValue(), DL); SDValue Ops[2] = {Result, Chain}; return DAG.getMergeValues(Ops, DL); } SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return lowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); } } /// } Custom Lower