xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp (revision 99282790b7d01ec3c4072621d46a0d7302517ad4)
1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the ARMSelectionDAGInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARMTargetMachine.h"
14 #include "llvm/CodeGen/SelectionDAG.h"
15 #include "llvm/IR/DerivedTypes.h"
16 using namespace llvm;
17 
18 #define DEBUG_TYPE "arm-selectiondag-info"
19 
20 // Emit, if possible, a specialized version of the given Libcall. Typically this
21 // means selecting the appropriately aligned version, but we also convert memset
22 // of 0 into memclr.
23 SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
24     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
25     SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
26   const ARMSubtarget &Subtarget =
27       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
28   const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
29 
30   // Only use a specialized AEABI function if the default version of this
31   // Libcall is an AEABI function.
32   if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
33     return SDValue();
34 
35   // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
36   // able to translate memset to memclr and use the value to index the function
37   // name array.
38   enum {
39     AEABI_MEMCPY = 0,
40     AEABI_MEMMOVE,
41     AEABI_MEMSET,
42     AEABI_MEMCLR
43   } AEABILibcall;
44   switch (LC) {
45   case RTLIB::MEMCPY:
46     AEABILibcall = AEABI_MEMCPY;
47     break;
48   case RTLIB::MEMMOVE:
49     AEABILibcall = AEABI_MEMMOVE;
50     break;
51   case RTLIB::MEMSET:
52     AEABILibcall = AEABI_MEMSET;
53     if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
54       if (ConstantSrc->getZExtValue() == 0)
55         AEABILibcall = AEABI_MEMCLR;
56     break;
57   default:
58     return SDValue();
59   }
60 
61   // Choose the most-aligned libcall variant that we can
62   enum {
63     ALIGN1 = 0,
64     ALIGN4,
65     ALIGN8
66   } AlignVariant;
67   if ((Align & 7) == 0)
68     AlignVariant = ALIGN8;
69   else if ((Align & 3) == 0)
70     AlignVariant = ALIGN4;
71   else
72     AlignVariant = ALIGN1;
73 
74   TargetLowering::ArgListTy Args;
75   TargetLowering::ArgListEntry Entry;
76   Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
77   Entry.Node = Dst;
78   Args.push_back(Entry);
79   if (AEABILibcall == AEABI_MEMCLR) {
80     Entry.Node = Size;
81     Args.push_back(Entry);
82   } else if (AEABILibcall == AEABI_MEMSET) {
83     // Adjust parameters for memset, EABI uses format (ptr, size, value),
84     // GNU library uses (ptr, value, size)
85     // See RTABI section 4.3.4
86     Entry.Node = Size;
87     Args.push_back(Entry);
88 
89     // Extend or truncate the argument to be an i32 value for the call.
90     if (Src.getValueType().bitsGT(MVT::i32))
91       Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
92     else if (Src.getValueType().bitsLT(MVT::i32))
93       Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
94 
95     Entry.Node = Src;
96     Entry.Ty = Type::getInt32Ty(*DAG.getContext());
97     Entry.IsSExt = false;
98     Args.push_back(Entry);
99   } else {
100     Entry.Node = Src;
101     Args.push_back(Entry);
102 
103     Entry.Node = Size;
104     Args.push_back(Entry);
105   }
106 
107   char const *FunctionNames[4][3] = {
108     { "__aeabi_memcpy",  "__aeabi_memcpy4",  "__aeabi_memcpy8"  },
109     { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
110     { "__aeabi_memset",  "__aeabi_memset4",  "__aeabi_memset8"  },
111     { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  }
112   };
113   TargetLowering::CallLoweringInfo CLI(DAG);
114   CLI.setDebugLoc(dl)
115       .setChain(Chain)
116       .setLibCallee(
117           TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
118           DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
119                                 TLI->getPointerTy(DAG.getDataLayout())),
120           std::move(Args))
121       .setDiscardResult();
122   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
123 
124   return CallResult.second;
125 }
126 
127 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
128     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
129     SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
130     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
131   const ARMSubtarget &Subtarget =
132       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
133   // Do repeated 4-byte loads and stores. To be improved.
134   // This requires 4-byte alignment.
135   if ((Align & 3) != 0)
136     return SDValue();
137   // This requires the copy size to be a constant, preferably
138   // within a subtarget-specific limit.
139   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
140   if (!ConstantSize)
141     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
142                                   RTLIB::MEMCPY);
143   uint64_t SizeVal = ConstantSize->getZExtValue();
144   if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
145     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
146                                   RTLIB::MEMCPY);
147 
148   unsigned BytesLeft = SizeVal & 3;
149   unsigned NumMemOps = SizeVal >> 2;
150   unsigned EmittedNumMemOps = 0;
151   EVT VT = MVT::i32;
152   unsigned VTSize = 4;
153   unsigned i = 0;
154   // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
155   const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
156   SDValue TFOps[6];
157   SDValue Loads[6];
158   uint64_t SrcOff = 0, DstOff = 0;
159 
160   // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
161   // VLDM/VSTM and make this code emit it when appropriate. This would reduce
162   // pressure on the general purpose registers. However this seems harder to map
163   // onto the register allocator's view of the world.
164 
165   // The number of MEMCPY pseudo-instructions to emit. We use up to
166   // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
167   // later on. This is a lower bound on the number of MEMCPY operations we must
168   // emit.
169   unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
170 
171   // Code size optimisation: do not inline memcpy if expansion results in
172   // more instructions than the libary call.
173   if (NumMEMCPYs > 1 && Subtarget.hasMinSize()) {
174     return SDValue();
175   }
176 
177   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
178 
179   for (unsigned I = 0; I != NumMEMCPYs; ++I) {
180     // Evenly distribute registers among MEMCPY operations to reduce register
181     // pressure.
182     unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
183     unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
184 
185     Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
186                       DAG.getConstant(NumRegs, dl, MVT::i32));
187     Src = Dst.getValue(1);
188     Chain = Dst.getValue(2);
189 
190     DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
191     SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
192 
193     EmittedNumMemOps = NextEmittedNumMemOps;
194   }
195 
196   if (BytesLeft == 0)
197     return Chain;
198 
199   // Issue loads / stores for the trailing (1 - 3) bytes.
200   auto getRemainingValueType = [](unsigned BytesLeft) {
201     return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
202   };
203   auto getRemainingSize = [](unsigned BytesLeft) {
204     return (BytesLeft >= 2) ? 2 : 1;
205   };
206 
207   unsigned BytesLeftSave = BytesLeft;
208   i = 0;
209   while (BytesLeft) {
210     VT = getRemainingValueType(BytesLeft);
211     VTSize = getRemainingSize(BytesLeft);
212     Loads[i] = DAG.getLoad(VT, dl, Chain,
213                            DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
214                                        DAG.getConstant(SrcOff, dl, MVT::i32)),
215                            SrcPtrInfo.getWithOffset(SrcOff));
216     TFOps[i] = Loads[i].getValue(1);
217     ++i;
218     SrcOff += VTSize;
219     BytesLeft -= VTSize;
220   }
221   Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
222                       makeArrayRef(TFOps, i));
223 
224   i = 0;
225   BytesLeft = BytesLeftSave;
226   while (BytesLeft) {
227     VT = getRemainingValueType(BytesLeft);
228     VTSize = getRemainingSize(BytesLeft);
229     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
230                             DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
231                                         DAG.getConstant(DstOff, dl, MVT::i32)),
232                             DstPtrInfo.getWithOffset(DstOff));
233     ++i;
234     DstOff += VTSize;
235     BytesLeft -= VTSize;
236   }
237   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
238                      makeArrayRef(TFOps, i));
239 }
240 
241 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
242     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
243     SDValue Size, unsigned Align, bool isVolatile,
244     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
245   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
246                                 RTLIB::MEMMOVE);
247 }
248 
249 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
250     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
251     SDValue Size, unsigned Align, bool isVolatile,
252     MachinePointerInfo DstPtrInfo) const {
253   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
254                                 RTLIB::MEMSET);
255 }
256