1 //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the X86 implementation of the TargetRegisterInfo class.
10 // This file is responsible for the frame pointer elimination optimization
11 // on X86.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "X86RegisterInfo.h"
16 #include "X86FrameLowering.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86Subtarget.h"
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/CodeGen/LiveRegMatrix.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/TargetFrameLowering.h"
27 #include "llvm/CodeGen/TargetInstrInfo.h"
28 #include "llvm/CodeGen/TileShapeInfo.h"
29 #include "llvm/CodeGen/VirtRegMap.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/MC/MCContext.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include "llvm/Target/TargetOptions.h"
37
38 using namespace llvm;
39
40 #define GET_REGINFO_TARGET_DESC
41 #include "X86GenRegisterInfo.inc"
42
43 static cl::opt<bool>
44 EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
45 cl::desc("Enable use of a base pointer for complex stack frames"));
46
47 static cl::opt<bool>
48 DisableRegAllocNDDHints("x86-disable-regalloc-hints-for-ndd", cl::Hidden,
49 cl::init(false),
50 cl::desc("Disable two address hints for register "
51 "allocation"));
52
53 extern cl::opt<bool> X86EnableAPXForRelocation;
54
X86RegisterInfo(const Triple & TT)55 X86RegisterInfo::X86RegisterInfo(const Triple &TT)
56 : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP),
57 X86_MC::getDwarfRegFlavour(TT, false),
58 X86_MC::getDwarfRegFlavour(TT, true),
59 (TT.isArch64Bit() ? X86::RIP : X86::EIP)) {
60 X86_MC::initLLVMToSEHAndCVRegMapping(this);
61
62 // Cache some information.
63 Is64Bit = TT.isArch64Bit();
64 IsWin64 = Is64Bit && TT.isOSWindows();
65 IsUEFI64 = Is64Bit && TT.isUEFI();
66
67 // Use a callee-saved register as the base pointer. These registers must
68 // not conflict with any ABI requirements. For example, in 32-bit mode PIC
69 // requires GOT in the EBX register before function calls via PLT GOT pointer.
70 if (Is64Bit) {
71 SlotSize = 8;
72 // This matches the simplified 32-bit pointer code in the data layout
73 // computation.
74 // FIXME: Should use the data layout?
75 bool Use64BitReg = !TT.isX32();
76 StackPtr = Use64BitReg ? X86::RSP : X86::ESP;
77 FramePtr = Use64BitReg ? X86::RBP : X86::EBP;
78 BasePtr = Use64BitReg ? X86::RBX : X86::EBX;
79 } else {
80 SlotSize = 4;
81 StackPtr = X86::ESP;
82 FramePtr = X86::EBP;
83 BasePtr = X86::ESI;
84 }
85 }
86
87 const TargetRegisterClass *
getSubClassWithSubReg(const TargetRegisterClass * RC,unsigned Idx) const88 X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
89 unsigned Idx) const {
90 // The sub_8bit sub-register index is more constrained in 32-bit mode.
91 // It behaves just like the sub_8bit_hi index.
92 if (!Is64Bit && Idx == X86::sub_8bit)
93 Idx = X86::sub_8bit_hi;
94
95 // Forward to TableGen's default version.
96 return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
97 }
98
99 const TargetRegisterClass *
getMatchingSuperRegClass(const TargetRegisterClass * A,const TargetRegisterClass * B,unsigned SubIdx) const100 X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
101 const TargetRegisterClass *B,
102 unsigned SubIdx) const {
103 // The sub_8bit sub-register index is more constrained in 32-bit mode.
104 if (!Is64Bit && SubIdx == X86::sub_8bit) {
105 A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi);
106 if (!A)
107 return nullptr;
108 }
109 return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx);
110 }
111
112 const TargetRegisterClass *
getLargestLegalSuperClass(const TargetRegisterClass * RC,const MachineFunction & MF) const113 X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
114 const MachineFunction &MF) const {
115 // Don't allow super-classes of GR8_NOREX. This class is only used after
116 // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied
117 // to the full GR8 register class in 64-bit mode, so we cannot allow the
118 // reigster class inflation.
119 //
120 // The GR8_NOREX class is always used in a way that won't be constrained to a
121 // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
122 // full GR8 class.
123 if (RC == &X86::GR8_NOREXRegClass)
124 return RC;
125
126 // Keep using non-rex2 register class when APX feature (EGPR/NDD/NF) is not
127 // enabled for relocation.
128 if (!X86EnableAPXForRelocation && isNonRex2RegClass(RC))
129 return RC;
130
131 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
132
133 const TargetRegisterClass *Super = RC;
134 auto I = RC->superclasses().begin();
135 auto E = RC->superclasses().end();
136 do {
137 switch (Super->getID()) {
138 case X86::FR32RegClassID:
139 case X86::FR64RegClassID:
140 // If AVX-512 isn't supported we should only inflate to these classes.
141 if (!Subtarget.hasAVX512() &&
142 getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
143 return Super;
144 break;
145 case X86::VR128RegClassID:
146 case X86::VR256RegClassID:
147 // If VLX isn't supported we should only inflate to these classes.
148 if (!Subtarget.hasVLX() &&
149 getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
150 return Super;
151 break;
152 case X86::VR128XRegClassID:
153 case X86::VR256XRegClassID:
154 // If VLX isn't support we shouldn't inflate to these classes.
155 if (Subtarget.hasVLX() &&
156 getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
157 return Super;
158 break;
159 case X86::FR32XRegClassID:
160 case X86::FR64XRegClassID:
161 // If AVX-512 isn't support we shouldn't inflate to these classes.
162 if (Subtarget.hasAVX512() &&
163 getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
164 return Super;
165 break;
166 case X86::GR8RegClassID:
167 case X86::GR16RegClassID:
168 case X86::GR32RegClassID:
169 case X86::GR64RegClassID:
170 case X86::GR8_NOREX2RegClassID:
171 case X86::GR16_NOREX2RegClassID:
172 case X86::GR32_NOREX2RegClassID:
173 case X86::GR64_NOREX2RegClassID:
174 case X86::RFP32RegClassID:
175 case X86::RFP64RegClassID:
176 case X86::RFP80RegClassID:
177 case X86::VR512_0_15RegClassID:
178 case X86::VR512RegClassID:
179 // Don't return a super-class that would shrink the spill size.
180 // That can happen with the vector and float classes.
181 if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
182 return Super;
183 }
184 if (I != E) {
185 Super = getRegClass(*I);
186 ++I;
187 } else {
188 Super = nullptr;
189 }
190 } while (Super);
191 return RC;
192 }
193
194 const TargetRegisterClass *
getPointerRegClass(const MachineFunction & MF,unsigned Kind) const195 X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
196 unsigned Kind) const {
197 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
198 switch (Kind) {
199 default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
200 case 0: // Normal GPRs.
201 if (Subtarget.isTarget64BitLP64())
202 return &X86::GR64RegClass;
203 // If the target is 64bit but we have been told to use 32bit addresses,
204 // we can still use 64-bit register as long as we know the high bits
205 // are zeros.
206 // Reflect that in the returned register class.
207 if (Is64Bit) {
208 // When the target also allows 64-bit frame pointer and we do have a
209 // frame, this is fine to use it for the address accesses as well.
210 const X86FrameLowering *TFI = getFrameLowering(MF);
211 return TFI->hasFP(MF) && TFI->Uses64BitFramePtr
212 ? &X86::LOW32_ADDR_ACCESS_RBPRegClass
213 : &X86::LOW32_ADDR_ACCESSRegClass;
214 }
215 return &X86::GR32RegClass;
216 case 1: // Normal GPRs except the stack pointer (for encoding reasons).
217 if (Subtarget.isTarget64BitLP64())
218 return &X86::GR64_NOSPRegClass;
219 // NOSP does not contain RIP, so no special case here.
220 return &X86::GR32_NOSPRegClass;
221 case 2: // NOREX GPRs.
222 if (Subtarget.isTarget64BitLP64())
223 return &X86::GR64_NOREXRegClass;
224 return &X86::GR32_NOREXRegClass;
225 case 3: // NOREX GPRs except the stack pointer (for encoding reasons).
226 if (Subtarget.isTarget64BitLP64())
227 return &X86::GR64_NOREX_NOSPRegClass;
228 // NOSP does not contain RIP, so no special case here.
229 return &X86::GR32_NOREX_NOSPRegClass;
230 case 4: // Available for tailcall (not callee-saved GPRs).
231 return getGPRsForTailCall(MF);
232 }
233 }
234
235 const TargetRegisterClass *
getGPRsForTailCall(const MachineFunction & MF) const236 X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
237 const Function &F = MF.getFunction();
238 if (IsWin64 || IsUEFI64 || (F.getCallingConv() == CallingConv::Win64))
239 return &X86::GR64_TCW64RegClass;
240 else if (Is64Bit)
241 return &X86::GR64_TCRegClass;
242
243 bool hasHipeCC = (F.getCallingConv() == CallingConv::HiPE);
244 if (hasHipeCC)
245 return &X86::GR32RegClass;
246 return &X86::GR32_TCRegClass;
247 }
248
249 const TargetRegisterClass *
getCrossCopyRegClass(const TargetRegisterClass * RC) const250 X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
251 if (RC == &X86::CCRRegClass) {
252 if (Is64Bit)
253 return &X86::GR64RegClass;
254 else
255 return &X86::GR32RegClass;
256 }
257 return RC;
258 }
259
260 unsigned
getRegPressureLimit(const TargetRegisterClass * RC,MachineFunction & MF) const261 X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
262 MachineFunction &MF) const {
263 const X86FrameLowering *TFI = getFrameLowering(MF);
264
265 unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
266 switch (RC->getID()) {
267 default:
268 return 0;
269 case X86::GR32RegClassID:
270 return 4 - FPDiff;
271 case X86::GR64RegClassID:
272 return 12 - FPDiff;
273 case X86::VR128RegClassID:
274 return Is64Bit ? 10 : 4;
275 case X86::VR64RegClassID:
276 return 4;
277 }
278 }
279
280 const MCPhysReg *
getCalleeSavedRegs(const MachineFunction * MF) const281 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
282 assert(MF && "MachineFunction required");
283
284 const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
285 const Function &F = MF->getFunction();
286 bool HasSSE = Subtarget.hasSSE1();
287 bool HasAVX = Subtarget.hasAVX();
288 bool HasAVX512 = Subtarget.hasAVX512();
289 bool CallsEHReturn = MF->callsEHReturn();
290
291 CallingConv::ID CC = F.getCallingConv();
292
293 // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling
294 // convention because it has the CSR list.
295 if (MF->getFunction().hasFnAttribute("no_caller_saved_registers"))
296 CC = CallingConv::X86_INTR;
297
298 // If atribute specified, override the CSRs normally specified by the
299 // calling convention and use the empty set instead.
300 if (MF->getFunction().hasFnAttribute("no_callee_saved_registers"))
301 return CSR_NoRegs_SaveList;
302
303 switch (CC) {
304 case CallingConv::GHC:
305 case CallingConv::HiPE:
306 return CSR_NoRegs_SaveList;
307 case CallingConv::AnyReg:
308 if (HasAVX)
309 return CSR_64_AllRegs_AVX_SaveList;
310 return CSR_64_AllRegs_SaveList;
311 case CallingConv::PreserveMost:
312 return IsWin64 ? CSR_Win64_RT_MostRegs_SaveList
313 : CSR_64_RT_MostRegs_SaveList;
314 case CallingConv::PreserveAll:
315 if (HasAVX)
316 return CSR_64_RT_AllRegs_AVX_SaveList;
317 return CSR_64_RT_AllRegs_SaveList;
318 case CallingConv::PreserveNone:
319 return CSR_64_NoneRegs_SaveList;
320 case CallingConv::CXX_FAST_TLS:
321 if (Is64Bit)
322 return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
323 CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList;
324 break;
325 case CallingConv::Intel_OCL_BI: {
326 if (HasAVX512 && IsWin64)
327 return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
328 if (HasAVX512 && Is64Bit)
329 return CSR_64_Intel_OCL_BI_AVX512_SaveList;
330 if (HasAVX && IsWin64)
331 return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
332 if (HasAVX && Is64Bit)
333 return CSR_64_Intel_OCL_BI_AVX_SaveList;
334 if (!HasAVX && !IsWin64 && Is64Bit)
335 return CSR_64_Intel_OCL_BI_SaveList;
336 break;
337 }
338 case CallingConv::X86_RegCall:
339 if (Is64Bit) {
340 if (IsWin64) {
341 return (HasSSE ? CSR_Win64_RegCall_SaveList :
342 CSR_Win64_RegCall_NoSSE_SaveList);
343 } else {
344 return (HasSSE ? CSR_SysV64_RegCall_SaveList :
345 CSR_SysV64_RegCall_NoSSE_SaveList);
346 }
347 } else {
348 return (HasSSE ? CSR_32_RegCall_SaveList :
349 CSR_32_RegCall_NoSSE_SaveList);
350 }
351 case CallingConv::CFGuard_Check:
352 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
353 return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList
354 : CSR_Win32_CFGuard_Check_NoSSE_SaveList);
355 case CallingConv::Cold:
356 if (Is64Bit)
357 return CSR_64_MostRegs_SaveList;
358 break;
359 case CallingConv::Win64:
360 if (!HasSSE)
361 return CSR_Win64_NoSSE_SaveList;
362 return CSR_Win64_SaveList;
363 case CallingConv::SwiftTail:
364 if (!Is64Bit)
365 return CSR_32_SaveList;
366 return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList;
367 case CallingConv::X86_64_SysV:
368 if (CallsEHReturn)
369 return CSR_64EHRet_SaveList;
370 return CSR_64_SaveList;
371 case CallingConv::X86_INTR:
372 if (Is64Bit) {
373 if (HasAVX512)
374 return CSR_64_AllRegs_AVX512_SaveList;
375 if (HasAVX)
376 return CSR_64_AllRegs_AVX_SaveList;
377 if (HasSSE)
378 return CSR_64_AllRegs_SaveList;
379 return CSR_64_AllRegs_NoSSE_SaveList;
380 } else {
381 if (HasAVX512)
382 return CSR_32_AllRegs_AVX512_SaveList;
383 if (HasAVX)
384 return CSR_32_AllRegs_AVX_SaveList;
385 if (HasSSE)
386 return CSR_32_AllRegs_SSE_SaveList;
387 return CSR_32_AllRegs_SaveList;
388 }
389 default:
390 break;
391 }
392
393 if (Is64Bit) {
394 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
395 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
396 if (IsSwiftCC)
397 return IsWin64 ? CSR_Win64_SwiftError_SaveList
398 : CSR_64_SwiftError_SaveList;
399
400 if (IsWin64 || IsUEFI64)
401 return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList;
402 if (CallsEHReturn)
403 return CSR_64EHRet_SaveList;
404 return CSR_64_SaveList;
405 }
406
407 return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList;
408 }
409
410 const MCPhysReg *
getIPRACSRegs(const MachineFunction * MF) const411 X86RegisterInfo::getIPRACSRegs(const MachineFunction *MF) const {
412 return Is64Bit ? CSR_IPRA_64_SaveList : CSR_IPRA_32_SaveList;
413 }
414
getCalleeSavedRegsViaCopy(const MachineFunction * MF) const415 const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
416 const MachineFunction *MF) const {
417 assert(MF && "Invalid MachineFunction pointer.");
418 if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
419 MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())
420 return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;
421 return nullptr;
422 }
423
424 const uint32_t *
getCallPreservedMask(const MachineFunction & MF,CallingConv::ID CC) const425 X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
426 CallingConv::ID CC) const {
427 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
428 bool HasSSE = Subtarget.hasSSE1();
429 bool HasAVX = Subtarget.hasAVX();
430 bool HasAVX512 = Subtarget.hasAVX512();
431
432 switch (CC) {
433 case CallingConv::GHC:
434 case CallingConv::HiPE:
435 return CSR_NoRegs_RegMask;
436 case CallingConv::AnyReg:
437 if (HasAVX)
438 return CSR_64_AllRegs_AVX_RegMask;
439 return CSR_64_AllRegs_RegMask;
440 case CallingConv::PreserveMost:
441 return IsWin64 ? CSR_Win64_RT_MostRegs_RegMask : CSR_64_RT_MostRegs_RegMask;
442 case CallingConv::PreserveAll:
443 if (HasAVX)
444 return CSR_64_RT_AllRegs_AVX_RegMask;
445 return CSR_64_RT_AllRegs_RegMask;
446 case CallingConv::PreserveNone:
447 return CSR_64_NoneRegs_RegMask;
448 case CallingConv::CXX_FAST_TLS:
449 if (Is64Bit)
450 return CSR_64_TLS_Darwin_RegMask;
451 break;
452 case CallingConv::Intel_OCL_BI: {
453 if (HasAVX512 && IsWin64)
454 return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
455 if (HasAVX512 && Is64Bit)
456 return CSR_64_Intel_OCL_BI_AVX512_RegMask;
457 if (HasAVX && IsWin64)
458 return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
459 if (HasAVX && Is64Bit)
460 return CSR_64_Intel_OCL_BI_AVX_RegMask;
461 if (!HasAVX && !IsWin64 && Is64Bit)
462 return CSR_64_Intel_OCL_BI_RegMask;
463 break;
464 }
465 case CallingConv::X86_RegCall:
466 if (Is64Bit) {
467 if (IsWin64) {
468 return (HasSSE ? CSR_Win64_RegCall_RegMask :
469 CSR_Win64_RegCall_NoSSE_RegMask);
470 } else {
471 return (HasSSE ? CSR_SysV64_RegCall_RegMask :
472 CSR_SysV64_RegCall_NoSSE_RegMask);
473 }
474 } else {
475 return (HasSSE ? CSR_32_RegCall_RegMask :
476 CSR_32_RegCall_NoSSE_RegMask);
477 }
478 case CallingConv::CFGuard_Check:
479 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
480 return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask
481 : CSR_Win32_CFGuard_Check_NoSSE_RegMask);
482 case CallingConv::Cold:
483 if (Is64Bit)
484 return CSR_64_MostRegs_RegMask;
485 break;
486 case CallingConv::Win64:
487 return CSR_Win64_RegMask;
488 case CallingConv::SwiftTail:
489 if (!Is64Bit)
490 return CSR_32_RegMask;
491 return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask;
492 case CallingConv::X86_64_SysV:
493 return CSR_64_RegMask;
494 case CallingConv::X86_INTR:
495 if (Is64Bit) {
496 if (HasAVX512)
497 return CSR_64_AllRegs_AVX512_RegMask;
498 if (HasAVX)
499 return CSR_64_AllRegs_AVX_RegMask;
500 if (HasSSE)
501 return CSR_64_AllRegs_RegMask;
502 return CSR_64_AllRegs_NoSSE_RegMask;
503 } else {
504 if (HasAVX512)
505 return CSR_32_AllRegs_AVX512_RegMask;
506 if (HasAVX)
507 return CSR_32_AllRegs_AVX_RegMask;
508 if (HasSSE)
509 return CSR_32_AllRegs_SSE_RegMask;
510 return CSR_32_AllRegs_RegMask;
511 }
512 default:
513 break;
514 }
515
516 // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
517 // callsEHReturn().
518 if (Is64Bit) {
519 const Function &F = MF.getFunction();
520 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
521 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
522 if (IsSwiftCC)
523 return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask;
524
525 return (IsWin64 || IsUEFI64) ? CSR_Win64_RegMask : CSR_64_RegMask;
526 }
527
528 return CSR_32_RegMask;
529 }
530
531 const uint32_t*
getNoPreservedMask() const532 X86RegisterInfo::getNoPreservedMask() const {
533 return CSR_NoRegs_RegMask;
534 }
535
getDarwinTLSCallPreservedMask() const536 const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const {
537 return CSR_64_TLS_Darwin_RegMask;
538 }
539
getReservedRegs(const MachineFunction & MF) const540 BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
541 BitVector Reserved(getNumRegs());
542 const X86FrameLowering *TFI = getFrameLowering(MF);
543
544 // Set the floating point control register as reserved.
545 Reserved.set(X86::FPCW);
546
547 // Set the floating point status register as reserved.
548 Reserved.set(X86::FPSW);
549
550 // Set the SIMD floating point control register as reserved.
551 Reserved.set(X86::MXCSR);
552
553 // Set the stack-pointer register and its aliases as reserved.
554 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RSP))
555 Reserved.set(SubReg);
556
557 // Set the Shadow Stack Pointer as reserved.
558 Reserved.set(X86::SSP);
559
560 // Set the instruction pointer register and its aliases as reserved.
561 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RIP))
562 Reserved.set(SubReg);
563
564 // Set the frame-pointer register and its aliases as reserved if needed.
565 if (TFI->hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF)) {
566 if (MF.getInfo<X86MachineFunctionInfo>()->getFPClobberedByInvoke())
567 MF.getContext().reportError(
568 SMLoc(),
569 "Frame pointer clobbered by function invoke is not supported.");
570
571 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP))
572 Reserved.set(SubReg);
573 }
574
575 // Set the base-pointer register and its aliases as reserved if needed.
576 if (hasBasePointer(MF)) {
577 if (MF.getInfo<X86MachineFunctionInfo>()->getBPClobberedByInvoke())
578 MF.getContext().reportError(SMLoc(),
579 "Stack realignment in presence of dynamic "
580 "allocas is not supported with "
581 "this calling convention.");
582
583 Register BasePtr = getX86SubSuperRegister(getBaseRegister(), 64);
584 for (const MCPhysReg &SubReg : subregs_inclusive(BasePtr))
585 Reserved.set(SubReg);
586 }
587
588 // Mark the segment registers as reserved.
589 Reserved.set(X86::CS);
590 Reserved.set(X86::SS);
591 Reserved.set(X86::DS);
592 Reserved.set(X86::ES);
593 Reserved.set(X86::FS);
594 Reserved.set(X86::GS);
595
596 // Mark the floating point stack registers as reserved.
597 for (unsigned n = 0; n != 8; ++n)
598 Reserved.set(X86::ST0 + n);
599
600 // Reserve the registers that only exist in 64-bit mode.
601 if (!Is64Bit) {
602 // These 8-bit registers are part of the x86-64 extension even though their
603 // super-registers are old 32-bits.
604 Reserved.set(X86::SIL);
605 Reserved.set(X86::DIL);
606 Reserved.set(X86::BPL);
607 Reserved.set(X86::SPL);
608 Reserved.set(X86::SIH);
609 Reserved.set(X86::DIH);
610 Reserved.set(X86::BPH);
611 Reserved.set(X86::SPH);
612
613 for (unsigned n = 0; n != 8; ++n) {
614 // R8, R9, ...
615 for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI)
616 Reserved.set(*AI);
617
618 // XMM8, XMM9, ...
619 for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI)
620 Reserved.set(*AI);
621 }
622 }
623 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
624 for (unsigned n = 0; n != 16; ++n) {
625 for (MCRegAliasIterator AI(X86::XMM16 + n, this, true); AI.isValid();
626 ++AI)
627 Reserved.set(*AI);
628 }
629 }
630
631 // Reserve the extended general purpose registers.
632 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasEGPR())
633 Reserved.set(X86::R16, X86::R31WH + 1);
634
635 if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
636 for (MCRegAliasIterator AI(X86::R14, this, true); AI.isValid(); ++AI)
637 Reserved.set(*AI);
638 for (MCRegAliasIterator AI(X86::R15, this, true); AI.isValid(); ++AI)
639 Reserved.set(*AI);
640 }
641
642 // Reserve low half pair registers in case they are used by RA aggressively.
643 Reserved.set(X86::TMM0_TMM1);
644 Reserved.set(X86::TMM2_TMM3);
645
646 assert(checkAllSuperRegsMarked(Reserved,
647 {X86::SIL, X86::DIL, X86::BPL, X86::SPL,
648 X86::SIH, X86::DIH, X86::BPH, X86::SPH}));
649 return Reserved;
650 }
651
getNumSupportedRegs(const MachineFunction & MF) const652 unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const {
653 // All existing Intel CPUs that support AMX support AVX512 and all existing
654 // Intel CPUs that support APX support AMX. AVX512 implies AVX.
655 //
656 // We enumerate the registers in X86GenRegisterInfo.inc in this order:
657 //
658 // Registers before AVX512,
659 // AVX512 registers (X/YMM16-31, ZMM0-31, K registers)
660 // AMX registers (TMM)
661 // APX registers (R16-R31)
662 //
663 // and try to return the minimum number of registers supported by the target.
664 static_assert((X86::R15WH + 1 == X86::YMM0) && (X86::YMM15 + 1 == X86::K0) &&
665 (X86::K6_K7 + 1 == X86::TMMCFG) &&
666 (X86::TMM6_TMM7 + 1 == X86::R16) &&
667 (X86::R31WH + 1 == X86::NUM_TARGET_REGS),
668 "Register number may be incorrect");
669
670 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
671 if (ST.hasEGPR())
672 return X86::NUM_TARGET_REGS;
673 if (ST.hasAMXTILE())
674 return X86::TMM7 + 1;
675 if (ST.hasAVX512())
676 return X86::K6_K7 + 1;
677 if (ST.hasAVX())
678 return X86::YMM15 + 1;
679 return X86::R15WH + 1;
680 }
681
isArgumentRegister(const MachineFunction & MF,MCRegister Reg) const682 bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF,
683 MCRegister Reg) const {
684 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
685 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
686 auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) {
687 return TRI.isSuperOrSubRegisterEq(RegA, RegB);
688 };
689
690 if (!ST.is64Bit())
691 return llvm::any_of(
692 SmallVector<MCRegister>{X86::EAX, X86::ECX, X86::EDX},
693 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) ||
694 (ST.hasMMX() && X86::VR64RegClass.contains(Reg));
695
696 CallingConv::ID CC = MF.getFunction().getCallingConv();
697
698 if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg))
699 return true;
700
701 if (llvm::any_of(
702 SmallVector<MCRegister>{X86::RDX, X86::RCX, X86::R8, X86::R9},
703 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
704 return true;
705
706 if (CC != CallingConv::Win64 &&
707 llvm::any_of(SmallVector<MCRegister>{X86::RDI, X86::RSI},
708 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
709 return true;
710
711 if (ST.hasSSE1() &&
712 llvm::any_of(SmallVector<MCRegister>{X86::XMM0, X86::XMM1, X86::XMM2,
713 X86::XMM3, X86::XMM4, X86::XMM5,
714 X86::XMM6, X86::XMM7},
715 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
716 return true;
717
718 return X86GenRegisterInfo::isArgumentRegister(MF, Reg);
719 }
720
isFixedRegister(const MachineFunction & MF,MCRegister PhysReg) const721 bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF,
722 MCRegister PhysReg) const {
723 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
724 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
725
726 // Stack pointer.
727 if (TRI.isSuperOrSubRegisterEq(X86::RSP, PhysReg))
728 return true;
729
730 // Don't use the frame pointer if it's being used.
731 const X86FrameLowering &TFI = *getFrameLowering(MF);
732 if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(X86::RBP, PhysReg))
733 return true;
734
735 return X86GenRegisterInfo::isFixedRegister(MF, PhysReg);
736 }
737
isTileRegisterClass(const TargetRegisterClass * RC) const738 bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const {
739 return RC->getID() == X86::TILERegClassID ||
740 RC->getID() == X86::TILEPAIRRegClassID;
741 }
742
adjustStackMapLiveOutMask(uint32_t * Mask) const743 void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
744 // Check if the EFLAGS register is marked as live-out. This shouldn't happen,
745 // because the calling convention defines the EFLAGS register as NOT
746 // preserved.
747 //
748 // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding
749 // an assert to track this and clear the register afterwards to avoid
750 // unnecessary crashes during release builds.
751 assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) &&
752 "EFLAGS are not live-out from a patchpoint.");
753
754 // Also clean other registers that don't need preserving (IP).
755 for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP})
756 Mask[Reg / 32] &= ~(1U << (Reg % 32));
757 }
758
759 //===----------------------------------------------------------------------===//
760 // Stack Frame Processing methods
761 //===----------------------------------------------------------------------===//
762
CantUseSP(const MachineFrameInfo & MFI)763 static bool CantUseSP(const MachineFrameInfo &MFI) {
764 return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment();
765 }
766
hasBasePointer(const MachineFunction & MF) const767 bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
768 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
769 // We have a virtual register to reference argument, and don't need base
770 // pointer.
771 if (X86FI->getStackPtrSaveMI() != nullptr)
772 return false;
773
774 if (X86FI->hasPreallocatedCall())
775 return true;
776
777 const MachineFrameInfo &MFI = MF.getFrameInfo();
778
779 if (!EnableBasePointer)
780 return false;
781
782 // When we need stack realignment, we can't address the stack from the frame
783 // pointer. When we have dynamic allocas or stack-adjusting inline asm, we
784 // can't address variables from the stack pointer. MS inline asm can
785 // reference locals while also adjusting the stack pointer. When we can't
786 // use both the SP and the FP, we need a separate base pointer register.
787 bool CantUseFP = hasStackRealignment(MF);
788 return CantUseFP && CantUseSP(MFI);
789 }
790
canRealignStack(const MachineFunction & MF) const791 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
792 if (!TargetRegisterInfo::canRealignStack(MF))
793 return false;
794
795 const MachineFrameInfo &MFI = MF.getFrameInfo();
796 const MachineRegisterInfo *MRI = &MF.getRegInfo();
797
798 // Stack realignment requires a frame pointer. If we already started
799 // register allocation with frame pointer elimination, it is too late now.
800 if (!MRI->canReserveReg(FramePtr))
801 return false;
802
803 // If a base pointer is necessary. Check that it isn't too late to reserve
804 // it.
805 if (CantUseSP(MFI))
806 return MRI->canReserveReg(BasePtr);
807 return true;
808 }
809
shouldRealignStack(const MachineFunction & MF) const810 bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
811 if (TargetRegisterInfo::shouldRealignStack(MF))
812 return true;
813
814 return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
815 }
816
817 // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
818 // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
819 // TODO: In this case we should be really trying first to entirely eliminate
820 // this instruction which is a plain copy.
tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II)821 static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
822 MachineInstr &MI = *II;
823 unsigned Opc = II->getOpcode();
824 // Check if this is a LEA of the form 'lea (%esp), %ebx'
825 if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) ||
826 MI.getOperand(2).getImm() != 1 ||
827 MI.getOperand(3).getReg() != X86::NoRegister ||
828 MI.getOperand(4).getImm() != 0 ||
829 MI.getOperand(5).getReg() != X86::NoRegister)
830 return false;
831 Register BasePtr = MI.getOperand(1).getReg();
832 // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will
833 // be replaced with a 32-bit operand MOV which will zero extend the upper
834 // 32-bits of the super register.
835 if (Opc == X86::LEA64_32r)
836 BasePtr = getX86SubSuperRegister(BasePtr, 32);
837 Register NewDestReg = MI.getOperand(0).getReg();
838 const X86InstrInfo *TII =
839 MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo();
840 TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr,
841 MI.getOperand(1).isKill());
842 MI.eraseFromParent();
843 return true;
844 }
845
isFuncletReturnInstr(MachineInstr & MI)846 static bool isFuncletReturnInstr(MachineInstr &MI) {
847 switch (MI.getOpcode()) {
848 case X86::CATCHRET:
849 case X86::CLEANUPRET:
850 return true;
851 default:
852 return false;
853 }
854 llvm_unreachable("impossible");
855 }
856
eliminateFrameIndex(MachineBasicBlock::iterator II,unsigned FIOperandNum,Register BaseReg,int FIOffset) const857 void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
858 unsigned FIOperandNum,
859 Register BaseReg,
860 int FIOffset) const {
861 MachineInstr &MI = *II;
862 unsigned Opc = MI.getOpcode();
863 if (Opc == TargetOpcode::LOCAL_ESCAPE) {
864 MachineOperand &FI = MI.getOperand(FIOperandNum);
865 FI.ChangeToImmediate(FIOffset);
866 return;
867 }
868
869 MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
870
871 // The frame index format for stackmaps and patchpoints is different from the
872 // X86 format. It only has a FI and an offset.
873 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
874 assert(BasePtr == FramePtr && "Expected the FP as base register");
875 int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
876 MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
877 return;
878 }
879
880 if (MI.getOperand(FIOperandNum + 3).isImm()) {
881 // Offset is a 32-bit integer.
882 int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
883 int Offset = FIOffset + Imm;
884 assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
885 "Requesting 64-bit offset in 32-bit immediate!");
886 if (Offset != 0)
887 MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
888 } else {
889 // Offset is symbolic. This is extremely rare.
890 uint64_t Offset =
891 FIOffset + (uint64_t)MI.getOperand(FIOperandNum + 3).getOffset();
892 MI.getOperand(FIOperandNum + 3).setOffset(Offset);
893 }
894 }
895
896 bool
eliminateFrameIndex(MachineBasicBlock::iterator II,int SPAdj,unsigned FIOperandNum,RegScavenger * RS) const897 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
898 int SPAdj, unsigned FIOperandNum,
899 RegScavenger *RS) const {
900 MachineInstr &MI = *II;
901 MachineBasicBlock &MBB = *MI.getParent();
902 MachineFunction &MF = *MBB.getParent();
903 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
904 bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false
905 : isFuncletReturnInstr(*MBBI);
906 const X86FrameLowering *TFI = getFrameLowering(MF);
907 int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
908
909 // Determine base register and offset.
910 int FIOffset;
911 Register BasePtr;
912 if (MI.isReturn()) {
913 assert((!hasStackRealignment(MF) ||
914 MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) &&
915 "Return instruction can only reference SP relative frame objects");
916 FIOffset =
917 TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0).getFixed();
918 } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) {
919 FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr);
920 } else {
921 FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed();
922 }
923
924 // LOCAL_ESCAPE uses a single offset, with no register. It only works in the
925 // simple FP case, and doesn't work with stack realignment. On 32-bit, the
926 // offset is from the traditional base pointer location. On 64-bit, the
927 // offset is from the SP at the end of the prologue, not the FP location. This
928 // matches the behavior of llvm.frameaddress.
929 unsigned Opc = MI.getOpcode();
930 if (Opc == TargetOpcode::LOCAL_ESCAPE) {
931 MachineOperand &FI = MI.getOperand(FIOperandNum);
932 FI.ChangeToImmediate(FIOffset);
933 return false;
934 }
935
936 // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
937 // register as source operand, semantic is the same and destination is
938 // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
939 // Don't change BasePtr since it is used later for stack adjustment.
940 Register MachineBasePtr = BasePtr;
941 if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr))
942 MachineBasePtr = getX86SubSuperRegister(BasePtr, 64);
943
944 // This must be part of a four operand memory reference. Replace the
945 // FrameIndex with base register. Add an offset to the offset.
946 MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false);
947
948 if (BasePtr == StackPtr)
949 FIOffset += SPAdj;
950
951 // The frame index format for stackmaps and patchpoints is different from the
952 // X86 format. It only has a FI and an offset.
953 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
954 assert(BasePtr == FramePtr && "Expected the FP as base register");
955 int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
956 MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
957 return false;
958 }
959
960 if (MI.getOperand(FIOperandNum+3).isImm()) {
961 // Offset is a 32-bit integer.
962 int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
963 int Offset = FIOffset + Imm;
964 assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
965 "Requesting 64-bit offset in 32-bit immediate!");
966 if (Offset != 0 || !tryOptimizeLEAtoMOV(II))
967 MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
968 } else {
969 // Offset is symbolic. This is extremely rare.
970 uint64_t Offset = FIOffset +
971 (uint64_t)MI.getOperand(FIOperandNum+3).getOffset();
972 MI.getOperand(FIOperandNum + 3).setOffset(Offset);
973 }
974 return false;
975 }
976
findDeadCallerSavedReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI) const977 unsigned X86RegisterInfo::findDeadCallerSavedReg(
978 MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const {
979 const MachineFunction *MF = MBB.getParent();
980 if (MF->callsEHReturn())
981 return 0;
982
983 const TargetRegisterClass &AvailableRegs = *getGPRsForTailCall(*MF);
984
985 if (MBBI == MBB.end())
986 return 0;
987
988 switch (MBBI->getOpcode()) {
989 default:
990 return 0;
991 case TargetOpcode::PATCHABLE_RET:
992 case X86::RET:
993 case X86::RET32:
994 case X86::RET64:
995 case X86::RETI32:
996 case X86::RETI64:
997 case X86::TCRETURNdi:
998 case X86::TCRETURNri:
999 case X86::TCRETURNmi:
1000 case X86::TCRETURNdi64:
1001 case X86::TCRETURNri64:
1002 case X86::TCRETURNri64_ImpCall:
1003 case X86::TCRETURNmi64:
1004 case X86::EH_RETURN:
1005 case X86::EH_RETURN64: {
1006 SmallSet<uint16_t, 8> Uses;
1007 for (MachineOperand &MO : MBBI->operands()) {
1008 if (!MO.isReg() || MO.isDef())
1009 continue;
1010 Register Reg = MO.getReg();
1011 if (!Reg)
1012 continue;
1013 for (MCRegAliasIterator AI(Reg, this, true); AI.isValid(); ++AI)
1014 Uses.insert(*AI);
1015 }
1016
1017 for (auto CS : AvailableRegs)
1018 if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP && CS != X86::ESP)
1019 return CS;
1020 }
1021 }
1022
1023 return 0;
1024 }
1025
getFrameRegister(const MachineFunction & MF) const1026 Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
1027 const X86FrameLowering *TFI = getFrameLowering(MF);
1028 return TFI->hasFP(MF) ? FramePtr : StackPtr;
1029 }
1030
1031 Register
getPtrSizedFrameRegister(const MachineFunction & MF) const1032 X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
1033 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
1034 Register FrameReg = getFrameRegister(MF);
1035 if (Subtarget.isTarget64BitILP32())
1036 FrameReg = getX86SubSuperRegister(FrameReg, 32);
1037 return FrameReg;
1038 }
1039
1040 Register
getPtrSizedStackRegister(const MachineFunction & MF) const1041 X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const {
1042 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
1043 Register StackReg = getStackRegister();
1044 if (Subtarget.isTarget64BitILP32())
1045 StackReg = getX86SubSuperRegister(StackReg, 32);
1046 return StackReg;
1047 }
1048
getTileShape(Register VirtReg,VirtRegMap * VRM,const MachineRegisterInfo * MRI)1049 static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM,
1050 const MachineRegisterInfo *MRI) {
1051 if (VRM->hasShape(VirtReg))
1052 return VRM->getShape(VirtReg);
1053
1054 const MachineOperand &Def = *MRI->def_begin(VirtReg);
1055 MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent());
1056 unsigned OpCode = MI->getOpcode();
1057 switch (OpCode) {
1058 default:
1059 llvm_unreachable("Unexpected machine instruction on tile register!");
1060 break;
1061 case X86::COPY: {
1062 Register SrcReg = MI->getOperand(1).getReg();
1063 ShapeT Shape = getTileShape(SrcReg, VRM, MRI);
1064 VRM->assignVirt2Shape(VirtReg, Shape);
1065 return Shape;
1066 }
1067 // We only collect the tile shape that is defined.
1068 case X86::PTILELOADDV:
1069 case X86::PTILELOADDT1V:
1070 case X86::PTDPBSSDV:
1071 case X86::PTDPBSUDV:
1072 case X86::PTDPBUSDV:
1073 case X86::PTDPBUUDV:
1074 case X86::PTILEZEROV:
1075 case X86::PTDPBF16PSV:
1076 case X86::PTDPFP16PSV:
1077 case X86::PTCMMIMFP16PSV:
1078 case X86::PTCMMRLFP16PSV:
1079 case X86::PTTRANSPOSEDV:
1080 case X86::PTTDPBF16PSV:
1081 case X86::PTTDPFP16PSV:
1082 case X86::PTTCMMIMFP16PSV:
1083 case X86::PTTCMMRLFP16PSV:
1084 case X86::PTCONJTCMMIMFP16PSV:
1085 case X86::PTCONJTFP16V:
1086 case X86::PTILELOADDRSV:
1087 case X86::PTILELOADDRST1V:
1088 case X86::PTMMULTF32PSV:
1089 case X86::PTTMMULTF32PSV:
1090 case X86::PTDPBF8PSV:
1091 case X86::PTDPBHF8PSV:
1092 case X86::PTDPHBF8PSV:
1093 case X86::PTDPHF8PSV: {
1094 MachineOperand &MO1 = MI->getOperand(1);
1095 MachineOperand &MO2 = MI->getOperand(2);
1096 ShapeT Shape(&MO1, &MO2, MRI);
1097 VRM->assignVirt2Shape(VirtReg, Shape);
1098 return Shape;
1099 }
1100 case X86::PT2RPNTLVWZ0V:
1101 case X86::PT2RPNTLVWZ0T1V:
1102 case X86::PT2RPNTLVWZ1V:
1103 case X86::PT2RPNTLVWZ1T1V:
1104 case X86::PT2RPNTLVWZ0RSV:
1105 case X86::PT2RPNTLVWZ0RST1V:
1106 case X86::PT2RPNTLVWZ1RSV:
1107 case X86::PT2RPNTLVWZ1RST1V: {
1108 MachineOperand &MO1 = MI->getOperand(1);
1109 MachineOperand &MO2 = MI->getOperand(2);
1110 MachineOperand &MO3 = MI->getOperand(3);
1111 ShapeT Shape({&MO1, &MO2, &MO1, &MO3}, MRI);
1112 VRM->assignVirt2Shape(VirtReg, Shape);
1113 return Shape;
1114 }
1115 }
1116 }
1117
canHintShape(ShapeT & PhysShape,ShapeT & VirtShape)1118 static bool canHintShape(ShapeT &PhysShape, ShapeT &VirtShape) {
1119 unsigned PhysShapeNum = PhysShape.getShapeNum();
1120 unsigned VirtShapeNum = VirtShape.getShapeNum();
1121
1122 if (PhysShapeNum < VirtShapeNum)
1123 return false;
1124
1125 if (PhysShapeNum == VirtShapeNum) {
1126 if (PhysShapeNum == 1)
1127 return PhysShape == VirtShape;
1128
1129 for (unsigned I = 0; I < PhysShapeNum; I++) {
1130 ShapeT PShape(PhysShape.getRow(I), PhysShape.getCol(I));
1131 ShapeT VShape(VirtShape.getRow(I), VirtShape.getCol(I));
1132 if (VShape != PShape)
1133 return false;
1134 }
1135 return true;
1136 }
1137
1138 // Hint subreg of mult-tile reg to single tile reg.
1139 if (VirtShapeNum == 1) {
1140 for (unsigned I = 0; I < PhysShapeNum; I++) {
1141 ShapeT PShape(PhysShape.getRow(I), PhysShape.getCol(I));
1142 if (VirtShape == PShape)
1143 return true;
1144 }
1145 }
1146
1147 // Note: Currently we have no requirement for case of
1148 // (VirtShapeNum > 1 and PhysShapeNum > VirtShapeNum)
1149 return false;
1150 }
1151
getRegAllocationHints(Register VirtReg,ArrayRef<MCPhysReg> Order,SmallVectorImpl<MCPhysReg> & Hints,const MachineFunction & MF,const VirtRegMap * VRM,const LiveRegMatrix * Matrix) const1152 bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
1153 ArrayRef<MCPhysReg> Order,
1154 SmallVectorImpl<MCPhysReg> &Hints,
1155 const MachineFunction &MF,
1156 const VirtRegMap *VRM,
1157 const LiveRegMatrix *Matrix) const {
1158 const MachineRegisterInfo *MRI = &MF.getRegInfo();
1159 const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
1160 bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
1161 VirtReg, Order, Hints, MF, VRM, Matrix);
1162 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
1163 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
1164
1165 unsigned ID = RC.getID();
1166
1167 if (!VRM)
1168 return BaseImplRetVal;
1169
1170 if (ID != X86::TILERegClassID && ID != X86::TILEPAIRRegClassID) {
1171 if (DisableRegAllocNDDHints || !ST.hasNDD() ||
1172 !TRI.isGeneralPurposeRegisterClass(&RC))
1173 return BaseImplRetVal;
1174
1175 // Add any two address hints after any copy hints.
1176 SmallSet<unsigned, 4> TwoAddrHints;
1177
1178 auto TryAddNDDHint = [&](const MachineOperand &MO) {
1179 Register Reg = MO.getReg();
1180 Register PhysReg = Reg.isPhysical() ? Reg : Register(VRM->getPhys(Reg));
1181 if (PhysReg && !MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
1182 TwoAddrHints.insert(PhysReg);
1183 };
1184
1185 // NDD instructions is compressible when Op0 is allocated to the same
1186 // physic register as Op1 (or Op2 if it's commutable).
1187 for (auto &MO : MRI->reg_nodbg_operands(VirtReg)) {
1188 const MachineInstr &MI = *MO.getParent();
1189 if (!X86::getNonNDVariant(MI.getOpcode()))
1190 continue;
1191 unsigned OpIdx = MI.getOperandNo(&MO);
1192 if (OpIdx == 0) {
1193 assert(MI.getOperand(1).isReg());
1194 TryAddNDDHint(MI.getOperand(1));
1195 if (MI.isCommutable()) {
1196 assert(MI.getOperand(2).isReg());
1197 TryAddNDDHint(MI.getOperand(2));
1198 }
1199 } else if (OpIdx == 1) {
1200 TryAddNDDHint(MI.getOperand(0));
1201 } else if (MI.isCommutable() && OpIdx == 2) {
1202 TryAddNDDHint(MI.getOperand(0));
1203 }
1204 }
1205
1206 for (MCPhysReg OrderReg : Order)
1207 if (TwoAddrHints.count(OrderReg))
1208 Hints.push_back(OrderReg);
1209
1210 return BaseImplRetVal;
1211 }
1212
1213 ShapeT VirtShape = getTileShape(VirtReg, const_cast<VirtRegMap *>(VRM), MRI);
1214 auto AddHint = [&](MCPhysReg PhysReg) {
1215 Register VReg = Matrix->getOneVReg(PhysReg);
1216 if (VReg == MCRegister::NoRegister) { // Not allocated yet
1217 Hints.push_back(PhysReg);
1218 return;
1219 }
1220 ShapeT PhysShape = getTileShape(VReg, const_cast<VirtRegMap *>(VRM), MRI);
1221 if (canHintShape(PhysShape, VirtShape))
1222 Hints.push_back(PhysReg);
1223 };
1224
1225 SmallSet<MCPhysReg, 4> CopyHints(llvm::from_range, Hints);
1226 Hints.clear();
1227 for (auto Hint : CopyHints) {
1228 if (RC.contains(Hint) && !MRI->isReserved(Hint))
1229 AddHint(Hint);
1230 }
1231 for (MCPhysReg PhysReg : Order) {
1232 if (!CopyHints.count(PhysReg) && RC.contains(PhysReg) &&
1233 !MRI->isReserved(PhysReg))
1234 AddHint(PhysReg);
1235 }
1236
1237 #define DEBUG_TYPE "tile-hint"
1238 LLVM_DEBUG({
1239 dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n";
1240 for (auto Hint : Hints) {
1241 dbgs() << "tmm" << Hint << ",";
1242 }
1243 dbgs() << "\n";
1244 });
1245 #undef DEBUG_TYPE
1246
1247 return true;
1248 }
1249
constrainRegClassToNonRex2(const TargetRegisterClass * RC) const1250 const TargetRegisterClass *X86RegisterInfo::constrainRegClassToNonRex2(
1251 const TargetRegisterClass *RC) const {
1252 switch (RC->getID()) {
1253 default:
1254 return RC;
1255 case X86::GR8RegClassID:
1256 return &X86::GR8_NOREX2RegClass;
1257 case X86::GR16RegClassID:
1258 return &X86::GR16_NOREX2RegClass;
1259 case X86::GR32RegClassID:
1260 return &X86::GR32_NOREX2RegClass;
1261 case X86::GR64RegClassID:
1262 return &X86::GR64_NOREX2RegClass;
1263 case X86::GR32_NOSPRegClassID:
1264 return &X86::GR32_NOREX2_NOSPRegClass;
1265 case X86::GR64_NOSPRegClassID:
1266 return &X86::GR64_NOREX2_NOSPRegClass;
1267 }
1268 }
1269
isNonRex2RegClass(const TargetRegisterClass * RC) const1270 bool X86RegisterInfo::isNonRex2RegClass(const TargetRegisterClass *RC) const {
1271 switch (RC->getID()) {
1272 default:
1273 return false;
1274 case X86::GR8_NOREX2RegClassID:
1275 case X86::GR16_NOREX2RegClassID:
1276 case X86::GR32_NOREX2RegClassID:
1277 case X86::GR64_NOREX2RegClassID:
1278 case X86::GR32_NOREX2_NOSPRegClassID:
1279 case X86::GR64_NOREX2_NOSPRegClassID:
1280 case X86::GR64_with_sub_16bit_in_GR16_NOREX2RegClassID:
1281 return true;
1282 }
1283 }
1284