1 //===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the X86SelectionDAGInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "X86SelectionDAGInfo.h"
14 #include "X86ISelLowering.h"
15 #include "X86InstrInfo.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/IR/DerivedTypes.h"
22
23 using namespace llvm;
24
25 #define DEBUG_TYPE "x86-selectiondag-info"
26
27 static cl::opt<bool>
28 UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false),
29 cl::desc("Use fast short rep mov in memcpy lowering"));
30
isBaseRegConflictPossible(SelectionDAG & DAG,ArrayRef<MCPhysReg> ClobberSet) const31 bool X86SelectionDAGInfo::isBaseRegConflictPossible(
32 SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const {
33 // We cannot use TRI->hasBasePointer() until *after* we select all basic
34 // blocks. Legalization may introduce new stack temporaries with large
35 // alignment requirements. Fall back to generic code if there are any
36 // dynamic stack adjustments (hopefully rare) and the base pointer would
37 // conflict if we had to use it.
38 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
39 if (!MFI.hasVarSizedObjects() && !MFI.hasOpaqueSPAdjustment())
40 return false;
41
42 const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
43 DAG.getSubtarget().getRegisterInfo());
44 return llvm::is_contained(ClobberSet, TRI->getBaseRegister());
45 }
46
EmitTargetCodeForMemset(SelectionDAG & DAG,const SDLoc & dl,SDValue Chain,SDValue Dst,SDValue Val,SDValue Size,Align Alignment,bool isVolatile,bool AlwaysInline,MachinePointerInfo DstPtrInfo) const47 SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
48 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
49 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
50 MachinePointerInfo DstPtrInfo) const {
51 // If to a segment-relative address space, use the default lowering.
52 if (DstPtrInfo.getAddrSpace() >= 256)
53 return SDValue();
54
55 // If the base register might conflict with our physical registers, bail out.
56 const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
57 X86::ECX, X86::EAX, X86::EDI};
58 if (isBaseRegConflictPossible(DAG, ClobberSet))
59 return SDValue();
60
61 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
62 const X86Subtarget &Subtarget =
63 DAG.getMachineFunction().getSubtarget<X86Subtarget>();
64
65 // If not DWORD aligned or size is more than the threshold, call the library.
66 // The libc version is likely to be faster for these cases. It can use the
67 // address value and run time information about the CPU.
68 if (Alignment < Align(4) || !ConstantSize ||
69 ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold())
70 return SDValue();
71
72 uint64_t SizeVal = ConstantSize->getZExtValue();
73 SDValue InGlue;
74 EVT AVT;
75 SDValue Count;
76 unsigned BytesLeft = 0;
77 if (auto *ValC = dyn_cast<ConstantSDNode>(Val)) {
78 unsigned ValReg;
79 uint64_t Val = ValC->getZExtValue() & 255;
80
81 // If the value is a constant, then we can potentially use larger sets.
82 if (Alignment >= Align(4)) {
83 // DWORD aligned
84 AVT = MVT::i32;
85 ValReg = X86::EAX;
86 Val = (Val << 8) | Val;
87 Val = (Val << 16) | Val;
88 if (Subtarget.is64Bit() && Alignment >= Align(8)) { // QWORD aligned
89 AVT = MVT::i64;
90 ValReg = X86::RAX;
91 Val = (Val << 32) | Val;
92 }
93 } else if (Alignment == Align(2)) {
94 // WORD aligned
95 AVT = MVT::i16;
96 ValReg = X86::AX;
97 Val = (Val << 8) | Val;
98 } else {
99 // Byte aligned
100 AVT = MVT::i8;
101 ValReg = X86::AL;
102 Count = DAG.getIntPtrConstant(SizeVal, dl);
103 }
104
105 if (AVT.bitsGT(MVT::i8)) {
106 unsigned UBytes = AVT.getSizeInBits() / 8;
107 Count = DAG.getIntPtrConstant(SizeVal / UBytes, dl);
108 BytesLeft = SizeVal % UBytes;
109 }
110
111 Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT),
112 InGlue);
113 InGlue = Chain.getValue(1);
114 } else {
115 AVT = MVT::i8;
116 Count = DAG.getIntPtrConstant(SizeVal, dl);
117 Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Val, InGlue);
118 InGlue = Chain.getValue(1);
119 }
120
121 bool Use64BitRegs = Subtarget.isTarget64BitLP64();
122 Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX,
123 Count, InGlue);
124 InGlue = Chain.getValue(1);
125 Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI,
126 Dst, InGlue);
127 InGlue = Chain.getValue(1);
128
129 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
130 SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
131 SDValue RepStos = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
132
133 /// RepStos can process the whole length.
134 if (BytesLeft == 0)
135 return RepStos;
136
137 // Handle the last 1 - 7 bytes.
138 SmallVector<SDValue, 4> Results;
139 Results.push_back(RepStos);
140 unsigned Offset = SizeVal - BytesLeft;
141 EVT AddrVT = Dst.getValueType();
142 EVT SizeVT = Size.getValueType();
143
144 Results.push_back(
145 DAG.getMemset(Chain, dl,
146 DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
147 DAG.getConstant(Offset, dl, AddrVT)),
148 Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment,
149 isVolatile, AlwaysInline,
150 /* CI */ nullptr, DstPtrInfo.getWithOffset(Offset)));
151
152 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
153 }
154
155 /// Emit a single REP MOVS{B,W,D,Q} instruction.
emitRepmovs(const X86Subtarget & Subtarget,SelectionDAG & DAG,const SDLoc & dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,MVT AVT)156 static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG,
157 const SDLoc &dl, SDValue Chain, SDValue Dst,
158 SDValue Src, SDValue Size, MVT AVT) {
159 const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
160 const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
161 const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
162 const unsigned SI = Use64BitRegs ? X86::RSI : X86::ESI;
163
164 SDValue InGlue;
165 Chain = DAG.getCopyToReg(Chain, dl, CX, Size, InGlue);
166 InGlue = Chain.getValue(1);
167 Chain = DAG.getCopyToReg(Chain, dl, DI, Dst, InGlue);
168 InGlue = Chain.getValue(1);
169 Chain = DAG.getCopyToReg(Chain, dl, SI, Src, InGlue);
170 InGlue = Chain.getValue(1);
171
172 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
173 SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
174 return DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);
175 }
176
177 /// Emit a single REP MOVSB instruction for a particular constant size.
emitRepmovsB(const X86Subtarget & Subtarget,SelectionDAG & DAG,const SDLoc & dl,SDValue Chain,SDValue Dst,SDValue Src,uint64_t Size)178 static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
179 const SDLoc &dl, SDValue Chain, SDValue Dst,
180 SDValue Src, uint64_t Size) {
181 return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
182 DAG.getIntPtrConstant(Size, dl), MVT::i8);
183 }
184
185 /// Returns the best type to use with repmovs depending on alignment.
getOptimalRepmovsType(const X86Subtarget & Subtarget,Align Alignment)186 static MVT getOptimalRepmovsType(const X86Subtarget &Subtarget,
187 Align Alignment) {
188 uint64_t Align = Alignment.value();
189 assert((Align != 0) && "Align is normalized");
190 assert(isPowerOf2_64(Align) && "Align is a power of 2");
191 switch (Align) {
192 case 1:
193 return MVT::i8;
194 case 2:
195 return MVT::i16;
196 case 4:
197 return MVT::i32;
198 default:
199 return Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
200 }
201 }
202
203 /// Returns a REP MOVS instruction, possibly with a few load/stores to implement
204 /// a constant size memory copy. In some cases where we know REP MOVS is
205 /// inefficient we return an empty SDValue so the calling code can either
206 /// generate a load/store sequence or call the runtime memcpy function.
emitConstantSizeRepmov(SelectionDAG & DAG,const X86Subtarget & Subtarget,const SDLoc & dl,SDValue Chain,SDValue Dst,SDValue Src,uint64_t Size,EVT SizeVT,Align Alignment,bool isVolatile,bool AlwaysInline,MachinePointerInfo DstPtrInfo,MachinePointerInfo SrcPtrInfo)207 static SDValue emitConstantSizeRepmov(
208 SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl,
209 SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT,
210 Align Alignment, bool isVolatile, bool AlwaysInline,
211 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) {
212
213 /// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very
214 /// efficient.
215 if (!AlwaysInline && Size > Subtarget.getMaxInlineSizeThreshold())
216 return SDValue();
217
218 /// If we have enhanced repmovs we use it.
219 if (Subtarget.hasERMSB())
220 return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
221
222 assert(!Subtarget.hasERMSB() && "No efficient RepMovs");
223 /// We assume runtime memcpy will do a better job for unaligned copies when
224 /// ERMS is not present.
225 if (!AlwaysInline && (Alignment.value() & 3) != 0)
226 return SDValue();
227
228 const MVT BlockType = getOptimalRepmovsType(Subtarget, Alignment);
229 const uint64_t BlockBytes = BlockType.getSizeInBits() / 8;
230 const uint64_t BlockCount = Size / BlockBytes;
231 const uint64_t BytesLeft = Size % BlockBytes;
232 SDValue RepMovs =
233 emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
234 DAG.getIntPtrConstant(BlockCount, dl), BlockType);
235
236 /// RepMov can process the whole length.
237 if (BytesLeft == 0)
238 return RepMovs;
239
240 assert(BytesLeft && "We have leftover at this point");
241
242 /// In case we optimize for size we use repmovsb even if it's less efficient
243 /// so we can save the loads/stores of the leftover.
244 if (DAG.getMachineFunction().getFunction().hasMinSize())
245 return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
246
247 // Handle the last 1 - 7 bytes.
248 SmallVector<SDValue, 4> Results;
249 Results.push_back(RepMovs);
250 unsigned Offset = Size - BytesLeft;
251 EVT DstVT = Dst.getValueType();
252 EVT SrcVT = Src.getValueType();
253 Results.push_back(DAG.getMemcpy(
254 Chain, dl,
255 DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, dl, DstVT)),
256 DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)),
257 DAG.getConstant(BytesLeft, dl, SizeVT), Alignment, isVolatile,
258 /*AlwaysInline*/ true, /*CI=*/nullptr, std::nullopt,
259 DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset)));
260 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
261 }
262
EmitTargetCodeForMemcpy(SelectionDAG & DAG,const SDLoc & dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,Align Alignment,bool isVolatile,bool AlwaysInline,MachinePointerInfo DstPtrInfo,MachinePointerInfo SrcPtrInfo) const263 SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
264 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
265 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
266 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
267 // If to a segment-relative address space, use the default lowering.
268 if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256)
269 return SDValue();
270
271 // If the base registers conflict with our physical registers, use the default
272 // lowering.
273 const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
274 X86::ECX, X86::ESI, X86::EDI};
275 if (isBaseRegConflictPossible(DAG, ClobberSet))
276 return SDValue();
277
278 const X86Subtarget &Subtarget =
279 DAG.getMachineFunction().getSubtarget<X86Subtarget>();
280
281 // If enabled and available, use fast short rep mov.
282 if (UseFSRMForMemcpy && Subtarget.hasFSRM())
283 return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, MVT::i8);
284
285 /// Handle constant sizes,
286 if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size))
287 return emitConstantSizeRepmov(DAG, Subtarget, dl, Chain, Dst, Src,
288 ConstantSize->getZExtValue(),
289 Size.getValueType(), Alignment, isVolatile,
290 AlwaysInline, DstPtrInfo, SrcPtrInfo);
291
292 return SDValue();
293 }
294