xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
10b57cec5SDimitry Andric //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements the ARMSelectionDAGInfo class.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "ARMTargetMachine.h"
14fe6060f1SDimitry Andric #include "ARMTargetTransformInfo.h"
150b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAG.h"
160b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
17fe6060f1SDimitry Andric #include "llvm/Support/CommandLine.h"
180b57cec5SDimitry Andric using namespace llvm;
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric #define DEBUG_TYPE "arm-selectiondag-info"
210b57cec5SDimitry Andric 
22fe6060f1SDimitry Andric cl::opt<TPLoop::MemTransfer> EnableMemtransferTPLoop(
23fe6060f1SDimitry Andric     "arm-memtransfer-tploop", cl::Hidden,
24fe6060f1SDimitry Andric     cl::desc("Control conversion of memcpy to "
25fe6060f1SDimitry Andric              "Tail predicated loops (WLSTP)"),
26fe6060f1SDimitry Andric     cl::init(TPLoop::ForceDisabled),
27fe6060f1SDimitry Andric     cl::values(clEnumValN(TPLoop::ForceDisabled, "force-disabled",
28fe6060f1SDimitry Andric                           "Don't convert memcpy to TP loop."),
29fe6060f1SDimitry Andric                clEnumValN(TPLoop::ForceEnabled, "force-enabled",
30fe6060f1SDimitry Andric                           "Always convert memcpy to TP loop."),
31fe6060f1SDimitry Andric                clEnumValN(TPLoop::Allow, "allow",
32fe6060f1SDimitry Andric                           "Allow (may be subject to certain conditions) "
33fe6060f1SDimitry Andric                           "conversion of memcpy to TP loop.")));
34fe6060f1SDimitry Andric 
350b57cec5SDimitry Andric // Emit, if possible, a specialized version of the given Libcall. Typically this
360b57cec5SDimitry Andric // means selecting the appropriately aligned version, but we also convert memset
370b57cec5SDimitry Andric // of 0 into memclr.
EmitSpecializedLibcall(SelectionDAG & DAG,const SDLoc & dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,unsigned Align,RTLIB::Libcall LC) const380b57cec5SDimitry Andric SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
390b57cec5SDimitry Andric     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
400b57cec5SDimitry Andric     SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
410b57cec5SDimitry Andric   const ARMSubtarget &Subtarget =
420b57cec5SDimitry Andric       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
430b57cec5SDimitry Andric   const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric   // Only use a specialized AEABI function if the default version of this
460b57cec5SDimitry Andric   // Libcall is an AEABI function.
470b57cec5SDimitry Andric   if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
480b57cec5SDimitry Andric     return SDValue();
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric   // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
510b57cec5SDimitry Andric   // able to translate memset to memclr and use the value to index the function
520b57cec5SDimitry Andric   // name array.
530b57cec5SDimitry Andric   enum {
540b57cec5SDimitry Andric     AEABI_MEMCPY = 0,
550b57cec5SDimitry Andric     AEABI_MEMMOVE,
560b57cec5SDimitry Andric     AEABI_MEMSET,
570b57cec5SDimitry Andric     AEABI_MEMCLR
580b57cec5SDimitry Andric   } AEABILibcall;
590b57cec5SDimitry Andric   switch (LC) {
600b57cec5SDimitry Andric   case RTLIB::MEMCPY:
610b57cec5SDimitry Andric     AEABILibcall = AEABI_MEMCPY;
620b57cec5SDimitry Andric     break;
630b57cec5SDimitry Andric   case RTLIB::MEMMOVE:
640b57cec5SDimitry Andric     AEABILibcall = AEABI_MEMMOVE;
650b57cec5SDimitry Andric     break;
660b57cec5SDimitry Andric   case RTLIB::MEMSET:
670b57cec5SDimitry Andric     AEABILibcall = AEABI_MEMSET;
68*06c3fb27SDimitry Andric     if (isNullConstant(Src))
690b57cec5SDimitry Andric       AEABILibcall = AEABI_MEMCLR;
700b57cec5SDimitry Andric     break;
710b57cec5SDimitry Andric   default:
720b57cec5SDimitry Andric     return SDValue();
730b57cec5SDimitry Andric   }
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric   // Choose the most-aligned libcall variant that we can
760b57cec5SDimitry Andric   enum {
770b57cec5SDimitry Andric     ALIGN1 = 0,
780b57cec5SDimitry Andric     ALIGN4,
790b57cec5SDimitry Andric     ALIGN8
800b57cec5SDimitry Andric   } AlignVariant;
810b57cec5SDimitry Andric   if ((Align & 7) == 0)
820b57cec5SDimitry Andric     AlignVariant = ALIGN8;
830b57cec5SDimitry Andric   else if ((Align & 3) == 0)
840b57cec5SDimitry Andric     AlignVariant = ALIGN4;
850b57cec5SDimitry Andric   else
860b57cec5SDimitry Andric     AlignVariant = ALIGN1;
870b57cec5SDimitry Andric 
880b57cec5SDimitry Andric   TargetLowering::ArgListTy Args;
890b57cec5SDimitry Andric   TargetLowering::ArgListEntry Entry;
900b57cec5SDimitry Andric   Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
910b57cec5SDimitry Andric   Entry.Node = Dst;
920b57cec5SDimitry Andric   Args.push_back(Entry);
930b57cec5SDimitry Andric   if (AEABILibcall == AEABI_MEMCLR) {
940b57cec5SDimitry Andric     Entry.Node = Size;
950b57cec5SDimitry Andric     Args.push_back(Entry);
960b57cec5SDimitry Andric   } else if (AEABILibcall == AEABI_MEMSET) {
970b57cec5SDimitry Andric     // Adjust parameters for memset, EABI uses format (ptr, size, value),
980b57cec5SDimitry Andric     // GNU library uses (ptr, value, size)
990b57cec5SDimitry Andric     // See RTABI section 4.3.4
1000b57cec5SDimitry Andric     Entry.Node = Size;
1010b57cec5SDimitry Andric     Args.push_back(Entry);
1020b57cec5SDimitry Andric 
1030b57cec5SDimitry Andric     // Extend or truncate the argument to be an i32 value for the call.
1040b57cec5SDimitry Andric     if (Src.getValueType().bitsGT(MVT::i32))
1050b57cec5SDimitry Andric       Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
1060b57cec5SDimitry Andric     else if (Src.getValueType().bitsLT(MVT::i32))
1070b57cec5SDimitry Andric       Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
1080b57cec5SDimitry Andric 
1090b57cec5SDimitry Andric     Entry.Node = Src;
1100b57cec5SDimitry Andric     Entry.Ty = Type::getInt32Ty(*DAG.getContext());
1110b57cec5SDimitry Andric     Entry.IsSExt = false;
1120b57cec5SDimitry Andric     Args.push_back(Entry);
1130b57cec5SDimitry Andric   } else {
1140b57cec5SDimitry Andric     Entry.Node = Src;
1150b57cec5SDimitry Andric     Args.push_back(Entry);
1160b57cec5SDimitry Andric 
1170b57cec5SDimitry Andric     Entry.Node = Size;
1180b57cec5SDimitry Andric     Args.push_back(Entry);
1190b57cec5SDimitry Andric   }
1200b57cec5SDimitry Andric 
1210b57cec5SDimitry Andric   char const *FunctionNames[4][3] = {
1220b57cec5SDimitry Andric     { "__aeabi_memcpy",  "__aeabi_memcpy4",  "__aeabi_memcpy8"  },
1230b57cec5SDimitry Andric     { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
1240b57cec5SDimitry Andric     { "__aeabi_memset",  "__aeabi_memset4",  "__aeabi_memset8"  },
1250b57cec5SDimitry Andric     { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  }
1260b57cec5SDimitry Andric   };
1270b57cec5SDimitry Andric   TargetLowering::CallLoweringInfo CLI(DAG);
1280b57cec5SDimitry Andric   CLI.setDebugLoc(dl)
1290b57cec5SDimitry Andric       .setChain(Chain)
1300b57cec5SDimitry Andric       .setLibCallee(
1310b57cec5SDimitry Andric           TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
1320b57cec5SDimitry Andric           DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
1330b57cec5SDimitry Andric                                 TLI->getPointerTy(DAG.getDataLayout())),
1340b57cec5SDimitry Andric           std::move(Args))
1350b57cec5SDimitry Andric       .setDiscardResult();
1360b57cec5SDimitry Andric   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric   return CallResult.second;
1390b57cec5SDimitry Andric }
1400b57cec5SDimitry Andric 
shouldGenerateInlineTPLoop(const ARMSubtarget & Subtarget,const SelectionDAG & DAG,ConstantSDNode * ConstantSize,Align Alignment,bool IsMemcpy)141fe6060f1SDimitry Andric static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget,
142fe6060f1SDimitry Andric                                        const SelectionDAG &DAG,
143fe6060f1SDimitry Andric                                        ConstantSDNode *ConstantSize,
144fe6060f1SDimitry Andric                                        Align Alignment, bool IsMemcpy) {
145fe6060f1SDimitry Andric   auto &F = DAG.getMachineFunction().getFunction();
146fe6060f1SDimitry Andric   if (!EnableMemtransferTPLoop)
147fe6060f1SDimitry Andric     return false;
148fe6060f1SDimitry Andric   if (EnableMemtransferTPLoop == TPLoop::ForceEnabled)
149fe6060f1SDimitry Andric     return true;
150fe6060f1SDimitry Andric   // Do not generate inline TP loop if optimizations is disabled,
151fe6060f1SDimitry Andric   // or if optimization for size (-Os or -Oz) is on.
152fe6060f1SDimitry Andric   if (F.hasOptNone() || F.hasOptSize())
153fe6060f1SDimitry Andric     return false;
154fe6060f1SDimitry Andric   // If cli option is unset, for memset always generate inline TP.
155fe6060f1SDimitry Andric   // For memcpy, check some conditions
156fe6060f1SDimitry Andric   if (!IsMemcpy)
157fe6060f1SDimitry Andric     return true;
158fe6060f1SDimitry Andric   if (!ConstantSize && Alignment >= Align(4))
159fe6060f1SDimitry Andric     return true;
160fe6060f1SDimitry Andric   if (ConstantSize &&
161fe6060f1SDimitry Andric       ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold() &&
162fe6060f1SDimitry Andric       ConstantSize->getZExtValue() <
163fe6060f1SDimitry Andric           Subtarget.getMaxMemcpyTPInlineSizeThreshold())
164fe6060f1SDimitry Andric     return true;
165fe6060f1SDimitry Andric   return false;
166fe6060f1SDimitry Andric }
167fe6060f1SDimitry Andric 
EmitTargetCodeForMemcpy(SelectionDAG & DAG,const SDLoc & dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,Align Alignment,bool isVolatile,bool AlwaysInline,MachinePointerInfo DstPtrInfo,MachinePointerInfo SrcPtrInfo) const1680b57cec5SDimitry Andric SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
1690b57cec5SDimitry Andric     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
1705ffd83dbSDimitry Andric     SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
1710b57cec5SDimitry Andric     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
1720b57cec5SDimitry Andric   const ARMSubtarget &Subtarget =
1730b57cec5SDimitry Andric       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
174fe6060f1SDimitry Andric   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
175fe6060f1SDimitry Andric 
176fe6060f1SDimitry Andric   if (Subtarget.hasMVEIntegerOps() &&
177fe6060f1SDimitry Andric       shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment, true))
178fe6060f1SDimitry Andric     return DAG.getNode(ARMISD::MEMCPYLOOP, dl, MVT::Other, Chain, Dst, Src,
179fe6060f1SDimitry Andric                        DAG.getZExtOrTrunc(Size, dl, MVT::i32));
180fe6060f1SDimitry Andric 
1810b57cec5SDimitry Andric   // Do repeated 4-byte loads and stores. To be improved.
1820b57cec5SDimitry Andric   // This requires 4-byte alignment.
1835ffd83dbSDimitry Andric   if (Alignment < Align(4))
1840b57cec5SDimitry Andric     return SDValue();
1850b57cec5SDimitry Andric   // This requires the copy size to be a constant, preferably
1860b57cec5SDimitry Andric   // within a subtarget-specific limit.
1870b57cec5SDimitry Andric   if (!ConstantSize)
1885ffd83dbSDimitry Andric     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
1895ffd83dbSDimitry Andric                                   Alignment.value(), RTLIB::MEMCPY);
1900b57cec5SDimitry Andric   uint64_t SizeVal = ConstantSize->getZExtValue();
1910b57cec5SDimitry Andric   if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
1925ffd83dbSDimitry Andric     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
1935ffd83dbSDimitry Andric                                   Alignment.value(), RTLIB::MEMCPY);
1940b57cec5SDimitry Andric 
1950b57cec5SDimitry Andric   unsigned BytesLeft = SizeVal & 3;
1960b57cec5SDimitry Andric   unsigned NumMemOps = SizeVal >> 2;
1970b57cec5SDimitry Andric   unsigned EmittedNumMemOps = 0;
1980b57cec5SDimitry Andric   EVT VT = MVT::i32;
1990b57cec5SDimitry Andric   unsigned VTSize = 4;
2000b57cec5SDimitry Andric   unsigned i = 0;
2010b57cec5SDimitry Andric   // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
2020b57cec5SDimitry Andric   const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
2030b57cec5SDimitry Andric   SDValue TFOps[6];
2040b57cec5SDimitry Andric   SDValue Loads[6];
2050b57cec5SDimitry Andric   uint64_t SrcOff = 0, DstOff = 0;
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric   // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
2080b57cec5SDimitry Andric   // VLDM/VSTM and make this code emit it when appropriate. This would reduce
2090b57cec5SDimitry Andric   // pressure on the general purpose registers. However this seems harder to map
2100b57cec5SDimitry Andric   // onto the register allocator's view of the world.
2110b57cec5SDimitry Andric 
2120b57cec5SDimitry Andric   // The number of MEMCPY pseudo-instructions to emit. We use up to
2130b57cec5SDimitry Andric   // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
2140b57cec5SDimitry Andric   // later on. This is a lower bound on the number of MEMCPY operations we must
2150b57cec5SDimitry Andric   // emit.
2160b57cec5SDimitry Andric   unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
2170b57cec5SDimitry Andric 
2180b57cec5SDimitry Andric   // Code size optimisation: do not inline memcpy if expansion results in
2190b57cec5SDimitry Andric   // more instructions than the libary call.
2200b57cec5SDimitry Andric   if (NumMEMCPYs > 1 && Subtarget.hasMinSize()) {
2210b57cec5SDimitry Andric     return SDValue();
2220b57cec5SDimitry Andric   }
2230b57cec5SDimitry Andric 
2240b57cec5SDimitry Andric   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
2250b57cec5SDimitry Andric 
2260b57cec5SDimitry Andric   for (unsigned I = 0; I != NumMEMCPYs; ++I) {
2270b57cec5SDimitry Andric     // Evenly distribute registers among MEMCPY operations to reduce register
2280b57cec5SDimitry Andric     // pressure.
2290b57cec5SDimitry Andric     unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
2300b57cec5SDimitry Andric     unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
2310b57cec5SDimitry Andric 
2320b57cec5SDimitry Andric     Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
2330b57cec5SDimitry Andric                       DAG.getConstant(NumRegs, dl, MVT::i32));
2340b57cec5SDimitry Andric     Src = Dst.getValue(1);
2350b57cec5SDimitry Andric     Chain = Dst.getValue(2);
2360b57cec5SDimitry Andric 
2370b57cec5SDimitry Andric     DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
2380b57cec5SDimitry Andric     SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
2390b57cec5SDimitry Andric 
2400b57cec5SDimitry Andric     EmittedNumMemOps = NextEmittedNumMemOps;
2410b57cec5SDimitry Andric   }
2420b57cec5SDimitry Andric 
2430b57cec5SDimitry Andric   if (BytesLeft == 0)
2440b57cec5SDimitry Andric     return Chain;
2450b57cec5SDimitry Andric 
2460b57cec5SDimitry Andric   // Issue loads / stores for the trailing (1 - 3) bytes.
2470b57cec5SDimitry Andric   auto getRemainingValueType = [](unsigned BytesLeft) {
2480b57cec5SDimitry Andric     return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
2490b57cec5SDimitry Andric   };
2500b57cec5SDimitry Andric   auto getRemainingSize = [](unsigned BytesLeft) {
2510b57cec5SDimitry Andric     return (BytesLeft >= 2) ? 2 : 1;
2520b57cec5SDimitry Andric   };
2530b57cec5SDimitry Andric 
2540b57cec5SDimitry Andric   unsigned BytesLeftSave = BytesLeft;
2550b57cec5SDimitry Andric   i = 0;
2560b57cec5SDimitry Andric   while (BytesLeft) {
2570b57cec5SDimitry Andric     VT = getRemainingValueType(BytesLeft);
2580b57cec5SDimitry Andric     VTSize = getRemainingSize(BytesLeft);
2590b57cec5SDimitry Andric     Loads[i] = DAG.getLoad(VT, dl, Chain,
2600b57cec5SDimitry Andric                            DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
2610b57cec5SDimitry Andric                                        DAG.getConstant(SrcOff, dl, MVT::i32)),
2620b57cec5SDimitry Andric                            SrcPtrInfo.getWithOffset(SrcOff));
2630b57cec5SDimitry Andric     TFOps[i] = Loads[i].getValue(1);
2640b57cec5SDimitry Andric     ++i;
2650b57cec5SDimitry Andric     SrcOff += VTSize;
2660b57cec5SDimitry Andric     BytesLeft -= VTSize;
2670b57cec5SDimitry Andric   }
268bdd1243dSDimitry Andric   Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
2690b57cec5SDimitry Andric 
2700b57cec5SDimitry Andric   i = 0;
2710b57cec5SDimitry Andric   BytesLeft = BytesLeftSave;
2720b57cec5SDimitry Andric   while (BytesLeft) {
2730b57cec5SDimitry Andric     VT = getRemainingValueType(BytesLeft);
2740b57cec5SDimitry Andric     VTSize = getRemainingSize(BytesLeft);
2750b57cec5SDimitry Andric     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
2760b57cec5SDimitry Andric                             DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
2770b57cec5SDimitry Andric                                         DAG.getConstant(DstOff, dl, MVT::i32)),
2780b57cec5SDimitry Andric                             DstPtrInfo.getWithOffset(DstOff));
2790b57cec5SDimitry Andric     ++i;
2800b57cec5SDimitry Andric     DstOff += VTSize;
2810b57cec5SDimitry Andric     BytesLeft -= VTSize;
2820b57cec5SDimitry Andric   }
283bdd1243dSDimitry Andric   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
2840b57cec5SDimitry Andric }
2850b57cec5SDimitry Andric 
EmitTargetCodeForMemmove(SelectionDAG & DAG,const SDLoc & dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,Align Alignment,bool isVolatile,MachinePointerInfo DstPtrInfo,MachinePointerInfo SrcPtrInfo) const2860b57cec5SDimitry Andric SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
2870b57cec5SDimitry Andric     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
2885ffd83dbSDimitry Andric     SDValue Size, Align Alignment, bool isVolatile,
2890b57cec5SDimitry Andric     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
2905ffd83dbSDimitry Andric   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
2915ffd83dbSDimitry Andric                                 Alignment.value(), RTLIB::MEMMOVE);
2920b57cec5SDimitry Andric }
2930b57cec5SDimitry Andric 
EmitTargetCodeForMemset(SelectionDAG & DAG,const SDLoc & dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,Align Alignment,bool isVolatile,bool AlwaysInline,MachinePointerInfo DstPtrInfo) const2940b57cec5SDimitry Andric SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
2950b57cec5SDimitry Andric     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
29681ad6265SDimitry Andric     SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
2970b57cec5SDimitry Andric     MachinePointerInfo DstPtrInfo) const {
298fe6060f1SDimitry Andric 
299fe6060f1SDimitry Andric   const ARMSubtarget &Subtarget =
300fe6060f1SDimitry Andric       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
301fe6060f1SDimitry Andric 
302fe6060f1SDimitry Andric   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
303fe6060f1SDimitry Andric 
304fe6060f1SDimitry Andric   // Generate TP loop for llvm.memset
305fe6060f1SDimitry Andric   if (Subtarget.hasMVEIntegerOps() &&
306fe6060f1SDimitry Andric       shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment,
307fe6060f1SDimitry Andric                                  false)) {
308fe6060f1SDimitry Andric     Src = DAG.getSplatBuildVector(MVT::v16i8, dl,
309fe6060f1SDimitry Andric                                   DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src));
310fe6060f1SDimitry Andric     return DAG.getNode(ARMISD::MEMSETLOOP, dl, MVT::Other, Chain, Dst, Src,
311fe6060f1SDimitry Andric                        DAG.getZExtOrTrunc(Size, dl, MVT::i32));
312fe6060f1SDimitry Andric   }
313fe6060f1SDimitry Andric 
31481ad6265SDimitry Andric   if (!AlwaysInline)
3155ffd83dbSDimitry Andric     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
3165ffd83dbSDimitry Andric                                   Alignment.value(), RTLIB::MEMSET);
31781ad6265SDimitry Andric 
31881ad6265SDimitry Andric   return SDValue();
3190b57cec5SDimitry Andric }
320