xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the X86 implementation of the TargetRegisterInfo class.
10 // This file is responsible for the frame pointer elimination optimization
11 // on X86.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86RegisterInfo.h"
16 #include "X86FrameLowering.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86Subtarget.h"
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/CodeGen/LiveRegMatrix.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/TargetFrameLowering.h"
28 #include "llvm/CodeGen/TargetInstrInfo.h"
29 #include "llvm/CodeGen/TileShapeInfo.h"
30 #include "llvm/CodeGen/VirtRegMap.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Target/TargetMachine.h"
37 #include "llvm/Target/TargetOptions.h"
38 
39 using namespace llvm;
40 
41 #define GET_REGINFO_TARGET_DESC
42 #include "X86GenRegisterInfo.inc"
43 
44 static cl::opt<bool>
45 EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
46           cl::desc("Enable use of a base pointer for complex stack frames"));
47 
X86RegisterInfo(const Triple & TT)48 X86RegisterInfo::X86RegisterInfo(const Triple &TT)
49     : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP),
50                          X86_MC::getDwarfRegFlavour(TT, false),
51                          X86_MC::getDwarfRegFlavour(TT, true),
52                          (TT.isArch64Bit() ? X86::RIP : X86::EIP)) {
53   X86_MC::initLLVMToSEHAndCVRegMapping(this);
54 
55   // Cache some information.
56   Is64Bit = TT.isArch64Bit();
57   IsWin64 = Is64Bit && TT.isOSWindows();
58 
59   // Use a callee-saved register as the base pointer.  These registers must
60   // not conflict with any ABI requirements.  For example, in 32-bit mode PIC
61   // requires GOT in the EBX register before function calls via PLT GOT pointer.
62   if (Is64Bit) {
63     SlotSize = 8;
64     // This matches the simplified 32-bit pointer code in the data layout
65     // computation.
66     // FIXME: Should use the data layout?
67     bool Use64BitReg = !TT.isX32();
68     StackPtr = Use64BitReg ? X86::RSP : X86::ESP;
69     FramePtr = Use64BitReg ? X86::RBP : X86::EBP;
70     BasePtr = Use64BitReg ? X86::RBX : X86::EBX;
71   } else {
72     SlotSize = 4;
73     StackPtr = X86::ESP;
74     FramePtr = X86::EBP;
75     BasePtr = X86::ESI;
76   }
77 }
78 
79 int
getSEHRegNum(unsigned i) const80 X86RegisterInfo::getSEHRegNum(unsigned i) const {
81   return getEncodingValue(i);
82 }
83 
84 const TargetRegisterClass *
getSubClassWithSubReg(const TargetRegisterClass * RC,unsigned Idx) const85 X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
86                                        unsigned Idx) const {
87   // The sub_8bit sub-register index is more constrained in 32-bit mode.
88   // It behaves just like the sub_8bit_hi index.
89   if (!Is64Bit && Idx == X86::sub_8bit)
90     Idx = X86::sub_8bit_hi;
91 
92   // Forward to TableGen's default version.
93   return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
94 }
95 
96 const TargetRegisterClass *
getMatchingSuperRegClass(const TargetRegisterClass * A,const TargetRegisterClass * B,unsigned SubIdx) const97 X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
98                                           const TargetRegisterClass *B,
99                                           unsigned SubIdx) const {
100   // The sub_8bit sub-register index is more constrained in 32-bit mode.
101   if (!Is64Bit && SubIdx == X86::sub_8bit) {
102     A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi);
103     if (!A)
104       return nullptr;
105   }
106   return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx);
107 }
108 
109 const TargetRegisterClass *
getLargestLegalSuperClass(const TargetRegisterClass * RC,const MachineFunction & MF) const110 X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
111                                            const MachineFunction &MF) const {
112   // Don't allow super-classes of GR8_NOREX.  This class is only used after
113   // extracting sub_8bit_hi sub-registers.  The H sub-registers cannot be copied
114   // to the full GR8 register class in 64-bit mode, so we cannot allow the
115   // reigster class inflation.
116   //
117   // The GR8_NOREX class is always used in a way that won't be constrained to a
118   // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
119   // full GR8 class.
120   if (RC == &X86::GR8_NOREXRegClass)
121     return RC;
122 
123   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
124 
125   const TargetRegisterClass *Super = RC;
126   TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
127   do {
128     switch (Super->getID()) {
129     case X86::FR32RegClassID:
130     case X86::FR64RegClassID:
131       // If AVX-512 isn't supported we should only inflate to these classes.
132       if (!Subtarget.hasAVX512() &&
133           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
134         return Super;
135       break;
136     case X86::VR128RegClassID:
137     case X86::VR256RegClassID:
138       // If VLX isn't supported we should only inflate to these classes.
139       if (!Subtarget.hasVLX() &&
140           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
141         return Super;
142       break;
143     case X86::VR128XRegClassID:
144     case X86::VR256XRegClassID:
145       // If VLX isn't support we shouldn't inflate to these classes.
146       if (Subtarget.hasVLX() &&
147           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
148         return Super;
149       break;
150     case X86::FR32XRegClassID:
151     case X86::FR64XRegClassID:
152       // If AVX-512 isn't support we shouldn't inflate to these classes.
153       if (Subtarget.hasAVX512() &&
154           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
155         return Super;
156       break;
157     case X86::GR8RegClassID:
158     case X86::GR16RegClassID:
159     case X86::GR32RegClassID:
160     case X86::GR64RegClassID:
161     case X86::GR8_NOREX2RegClassID:
162     case X86::GR16_NOREX2RegClassID:
163     case X86::GR32_NOREX2RegClassID:
164     case X86::GR64_NOREX2RegClassID:
165     case X86::RFP32RegClassID:
166     case X86::RFP64RegClassID:
167     case X86::RFP80RegClassID:
168     case X86::VR512_0_15RegClassID:
169     case X86::VR512RegClassID:
170       // Don't return a super-class that would shrink the spill size.
171       // That can happen with the vector and float classes.
172       if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
173         return Super;
174     }
175     Super = *I++;
176   } while (Super);
177   return RC;
178 }
179 
180 const TargetRegisterClass *
getPointerRegClass(const MachineFunction & MF,unsigned Kind) const181 X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
182                                     unsigned Kind) const {
183   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
184   switch (Kind) {
185   default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
186   case 0: // Normal GPRs.
187     if (Subtarget.isTarget64BitLP64())
188       return &X86::GR64RegClass;
189     // If the target is 64bit but we have been told to use 32bit addresses,
190     // we can still use 64-bit register as long as we know the high bits
191     // are zeros.
192     // Reflect that in the returned register class.
193     if (Is64Bit) {
194       // When the target also allows 64-bit frame pointer and we do have a
195       // frame, this is fine to use it for the address accesses as well.
196       const X86FrameLowering *TFI = getFrameLowering(MF);
197       return TFI->hasFP(MF) && TFI->Uses64BitFramePtr
198                  ? &X86::LOW32_ADDR_ACCESS_RBPRegClass
199                  : &X86::LOW32_ADDR_ACCESSRegClass;
200     }
201     return &X86::GR32RegClass;
202   case 1: // Normal GPRs except the stack pointer (for encoding reasons).
203     if (Subtarget.isTarget64BitLP64())
204       return &X86::GR64_NOSPRegClass;
205     // NOSP does not contain RIP, so no special case here.
206     return &X86::GR32_NOSPRegClass;
207   case 2: // NOREX GPRs.
208     if (Subtarget.isTarget64BitLP64())
209       return &X86::GR64_NOREXRegClass;
210     return &X86::GR32_NOREXRegClass;
211   case 3: // NOREX GPRs except the stack pointer (for encoding reasons).
212     if (Subtarget.isTarget64BitLP64())
213       return &X86::GR64_NOREX_NOSPRegClass;
214     // NOSP does not contain RIP, so no special case here.
215     return &X86::GR32_NOREX_NOSPRegClass;
216   case 4: // Available for tailcall (not callee-saved GPRs).
217     return getGPRsForTailCall(MF);
218   }
219 }
220 
shouldRewriteCopySrc(const TargetRegisterClass * DefRC,unsigned DefSubReg,const TargetRegisterClass * SrcRC,unsigned SrcSubReg) const221 bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
222                                            unsigned DefSubReg,
223                                            const TargetRegisterClass *SrcRC,
224                                            unsigned SrcSubReg) const {
225   // Prevent rewriting a copy where the destination size is larger than the
226   // input size. See PR41619.
227   // FIXME: Should this be factored into the base implementation somehow.
228   if (DefRC->hasSuperClassEq(&X86::GR64RegClass) && DefSubReg == 0 &&
229       SrcRC->hasSuperClassEq(&X86::GR64RegClass) && SrcSubReg == X86::sub_32bit)
230     return false;
231 
232   return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg,
233                                                   SrcRC, SrcSubReg);
234 }
235 
236 const TargetRegisterClass *
getGPRsForTailCall(const MachineFunction & MF) const237 X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
238   const Function &F = MF.getFunction();
239   if (IsWin64 || (F.getCallingConv() == CallingConv::Win64))
240     return &X86::GR64_TCW64RegClass;
241   else if (Is64Bit)
242     return &X86::GR64_TCRegClass;
243 
244   bool hasHipeCC = (F.getCallingConv() == CallingConv::HiPE);
245   if (hasHipeCC)
246     return &X86::GR32RegClass;
247   return &X86::GR32_TCRegClass;
248 }
249 
250 const TargetRegisterClass *
getCrossCopyRegClass(const TargetRegisterClass * RC) const251 X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
252   if (RC == &X86::CCRRegClass) {
253     if (Is64Bit)
254       return &X86::GR64RegClass;
255     else
256       return &X86::GR32RegClass;
257   }
258   return RC;
259 }
260 
261 unsigned
getRegPressureLimit(const TargetRegisterClass * RC,MachineFunction & MF) const262 X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
263                                      MachineFunction &MF) const {
264   const X86FrameLowering *TFI = getFrameLowering(MF);
265 
266   unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
267   switch (RC->getID()) {
268   default:
269     return 0;
270   case X86::GR32RegClassID:
271     return 4 - FPDiff;
272   case X86::GR64RegClassID:
273     return 12 - FPDiff;
274   case X86::VR128RegClassID:
275     return Is64Bit ? 10 : 4;
276   case X86::VR64RegClassID:
277     return 4;
278   }
279 }
280 
281 const MCPhysReg *
getCalleeSavedRegs(const MachineFunction * MF) const282 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
283   assert(MF && "MachineFunction required");
284 
285   const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
286   const Function &F = MF->getFunction();
287   bool HasSSE = Subtarget.hasSSE1();
288   bool HasAVX = Subtarget.hasAVX();
289   bool HasAVX512 = Subtarget.hasAVX512();
290   bool CallsEHReturn = MF->callsEHReturn();
291 
292   CallingConv::ID CC = F.getCallingConv();
293 
294   // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling
295   // convention because it has the CSR list.
296   if (MF->getFunction().hasFnAttribute("no_caller_saved_registers"))
297     CC = CallingConv::X86_INTR;
298 
299   // If atribute specified, override the CSRs normally specified by the
300   // calling convention and use the empty set instead.
301   if (MF->getFunction().hasFnAttribute("no_callee_saved_registers"))
302     return CSR_NoRegs_SaveList;
303 
304   switch (CC) {
305   case CallingConv::GHC:
306   case CallingConv::HiPE:
307     return CSR_NoRegs_SaveList;
308   case CallingConv::AnyReg:
309     if (HasAVX)
310       return CSR_64_AllRegs_AVX_SaveList;
311     return CSR_64_AllRegs_SaveList;
312   case CallingConv::PreserveMost:
313     return IsWin64 ? CSR_Win64_RT_MostRegs_SaveList
314                    : CSR_64_RT_MostRegs_SaveList;
315   case CallingConv::PreserveAll:
316     if (HasAVX)
317       return CSR_64_RT_AllRegs_AVX_SaveList;
318     return CSR_64_RT_AllRegs_SaveList;
319   case CallingConv::PreserveNone:
320     return CSR_64_NoneRegs_SaveList;
321   case CallingConv::CXX_FAST_TLS:
322     if (Is64Bit)
323       return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
324              CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList;
325     break;
326   case CallingConv::Intel_OCL_BI: {
327     if (HasAVX512 && IsWin64)
328       return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
329     if (HasAVX512 && Is64Bit)
330       return CSR_64_Intel_OCL_BI_AVX512_SaveList;
331     if (HasAVX && IsWin64)
332       return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
333     if (HasAVX && Is64Bit)
334       return CSR_64_Intel_OCL_BI_AVX_SaveList;
335     if (!HasAVX && !IsWin64 && Is64Bit)
336       return CSR_64_Intel_OCL_BI_SaveList;
337     break;
338   }
339   case CallingConv::X86_RegCall:
340     if (Is64Bit) {
341       if (IsWin64) {
342         return (HasSSE ? CSR_Win64_RegCall_SaveList :
343                          CSR_Win64_RegCall_NoSSE_SaveList);
344       } else {
345         return (HasSSE ? CSR_SysV64_RegCall_SaveList :
346                          CSR_SysV64_RegCall_NoSSE_SaveList);
347       }
348     } else {
349       return (HasSSE ? CSR_32_RegCall_SaveList :
350                        CSR_32_RegCall_NoSSE_SaveList);
351     }
352   case CallingConv::CFGuard_Check:
353     assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
354     return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList
355                    : CSR_Win32_CFGuard_Check_NoSSE_SaveList);
356   case CallingConv::Cold:
357     if (Is64Bit)
358       return CSR_64_MostRegs_SaveList;
359     break;
360   case CallingConv::Win64:
361     if (!HasSSE)
362       return CSR_Win64_NoSSE_SaveList;
363     return CSR_Win64_SaveList;
364   case CallingConv::SwiftTail:
365     if (!Is64Bit)
366       return CSR_32_SaveList;
367     return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList;
368   case CallingConv::X86_64_SysV:
369     if (CallsEHReturn)
370       return CSR_64EHRet_SaveList;
371     return CSR_64_SaveList;
372   case CallingConv::X86_INTR:
373     if (Is64Bit) {
374       if (HasAVX512)
375         return CSR_64_AllRegs_AVX512_SaveList;
376       if (HasAVX)
377         return CSR_64_AllRegs_AVX_SaveList;
378       if (HasSSE)
379         return CSR_64_AllRegs_SaveList;
380       return CSR_64_AllRegs_NoSSE_SaveList;
381     } else {
382       if (HasAVX512)
383         return CSR_32_AllRegs_AVX512_SaveList;
384       if (HasAVX)
385         return CSR_32_AllRegs_AVX_SaveList;
386       if (HasSSE)
387         return CSR_32_AllRegs_SSE_SaveList;
388       return CSR_32_AllRegs_SaveList;
389     }
390   default:
391     break;
392   }
393 
394   if (Is64Bit) {
395     bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
396                      F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
397     if (IsSwiftCC)
398       return IsWin64 ? CSR_Win64_SwiftError_SaveList
399                      : CSR_64_SwiftError_SaveList;
400 
401     if (IsWin64)
402       return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList;
403     if (CallsEHReturn)
404       return CSR_64EHRet_SaveList;
405     return CSR_64_SaveList;
406   }
407 
408   return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList;
409 }
410 
getCalleeSavedRegsViaCopy(const MachineFunction * MF) const411 const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
412     const MachineFunction *MF) const {
413   assert(MF && "Invalid MachineFunction pointer.");
414   if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
415       MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())
416     return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;
417   return nullptr;
418 }
419 
420 const uint32_t *
getCallPreservedMask(const MachineFunction & MF,CallingConv::ID CC) const421 X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
422                                       CallingConv::ID CC) const {
423   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
424   bool HasSSE = Subtarget.hasSSE1();
425   bool HasAVX = Subtarget.hasAVX();
426   bool HasAVX512 = Subtarget.hasAVX512();
427 
428   switch (CC) {
429   case CallingConv::GHC:
430   case CallingConv::HiPE:
431     return CSR_NoRegs_RegMask;
432   case CallingConv::AnyReg:
433     if (HasAVX)
434       return CSR_64_AllRegs_AVX_RegMask;
435     return CSR_64_AllRegs_RegMask;
436   case CallingConv::PreserveMost:
437     return IsWin64 ? CSR_Win64_RT_MostRegs_RegMask : CSR_64_RT_MostRegs_RegMask;
438   case CallingConv::PreserveAll:
439     if (HasAVX)
440       return CSR_64_RT_AllRegs_AVX_RegMask;
441     return CSR_64_RT_AllRegs_RegMask;
442   case CallingConv::PreserveNone:
443     return CSR_64_NoneRegs_RegMask;
444   case CallingConv::CXX_FAST_TLS:
445     if (Is64Bit)
446       return CSR_64_TLS_Darwin_RegMask;
447     break;
448   case CallingConv::Intel_OCL_BI: {
449     if (HasAVX512 && IsWin64)
450       return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
451     if (HasAVX512 && Is64Bit)
452       return CSR_64_Intel_OCL_BI_AVX512_RegMask;
453     if (HasAVX && IsWin64)
454       return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
455     if (HasAVX && Is64Bit)
456       return CSR_64_Intel_OCL_BI_AVX_RegMask;
457     if (!HasAVX && !IsWin64 && Is64Bit)
458       return CSR_64_Intel_OCL_BI_RegMask;
459     break;
460   }
461   case CallingConv::X86_RegCall:
462     if (Is64Bit) {
463       if (IsWin64) {
464         return (HasSSE ? CSR_Win64_RegCall_RegMask :
465                          CSR_Win64_RegCall_NoSSE_RegMask);
466       } else {
467         return (HasSSE ? CSR_SysV64_RegCall_RegMask :
468                          CSR_SysV64_RegCall_NoSSE_RegMask);
469       }
470     } else {
471       return (HasSSE ? CSR_32_RegCall_RegMask :
472                        CSR_32_RegCall_NoSSE_RegMask);
473     }
474   case CallingConv::CFGuard_Check:
475     assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
476     return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask
477                    : CSR_Win32_CFGuard_Check_NoSSE_RegMask);
478   case CallingConv::Cold:
479     if (Is64Bit)
480       return CSR_64_MostRegs_RegMask;
481     break;
482   case CallingConv::Win64:
483     return CSR_Win64_RegMask;
484   case CallingConv::SwiftTail:
485     if (!Is64Bit)
486       return CSR_32_RegMask;
487     return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask;
488   case CallingConv::X86_64_SysV:
489     return CSR_64_RegMask;
490   case CallingConv::X86_INTR:
491     if (Is64Bit) {
492       if (HasAVX512)
493         return CSR_64_AllRegs_AVX512_RegMask;
494       if (HasAVX)
495         return CSR_64_AllRegs_AVX_RegMask;
496       if (HasSSE)
497         return CSR_64_AllRegs_RegMask;
498       return CSR_64_AllRegs_NoSSE_RegMask;
499     } else {
500       if (HasAVX512)
501         return CSR_32_AllRegs_AVX512_RegMask;
502       if (HasAVX)
503         return CSR_32_AllRegs_AVX_RegMask;
504       if (HasSSE)
505         return CSR_32_AllRegs_SSE_RegMask;
506       return CSR_32_AllRegs_RegMask;
507     }
508   default:
509     break;
510   }
511 
512   // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
513   // callsEHReturn().
514   if (Is64Bit) {
515     const Function &F = MF.getFunction();
516     bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
517                      F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
518     if (IsSwiftCC)
519       return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask;
520 
521     return IsWin64 ? CSR_Win64_RegMask : CSR_64_RegMask;
522   }
523 
524   return CSR_32_RegMask;
525 }
526 
527 const uint32_t*
getNoPreservedMask() const528 X86RegisterInfo::getNoPreservedMask() const {
529   return CSR_NoRegs_RegMask;
530 }
531 
getDarwinTLSCallPreservedMask() const532 const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const {
533   return CSR_64_TLS_Darwin_RegMask;
534 }
535 
getReservedRegs(const MachineFunction & MF) const536 BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
537   BitVector Reserved(getNumRegs());
538   const X86FrameLowering *TFI = getFrameLowering(MF);
539 
540   // Set the floating point control register as reserved.
541   Reserved.set(X86::FPCW);
542 
543   // Set the floating point status register as reserved.
544   Reserved.set(X86::FPSW);
545 
546   // Set the SIMD floating point control register as reserved.
547   Reserved.set(X86::MXCSR);
548 
549   // Set the stack-pointer register and its aliases as reserved.
550   for (const MCPhysReg &SubReg : subregs_inclusive(X86::RSP))
551     Reserved.set(SubReg);
552 
553   // Set the Shadow Stack Pointer as reserved.
554   Reserved.set(X86::SSP);
555 
556   // Set the instruction pointer register and its aliases as reserved.
557   for (const MCPhysReg &SubReg : subregs_inclusive(X86::RIP))
558     Reserved.set(SubReg);
559 
560   // Set the frame-pointer register and its aliases as reserved if needed.
561   if (TFI->hasFP(MF)) {
562     for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP))
563       Reserved.set(SubReg);
564   }
565 
566   // Set the base-pointer register and its aliases as reserved if needed.
567   if (hasBasePointer(MF)) {
568     CallingConv::ID CC = MF.getFunction().getCallingConv();
569     const uint32_t *RegMask = getCallPreservedMask(MF, CC);
570     if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
571       report_fatal_error(
572         "Stack realignment in presence of dynamic allocas is not supported with"
573         "this calling convention.");
574 
575     Register BasePtr = getX86SubSuperRegister(getBaseRegister(), 64);
576     for (const MCPhysReg &SubReg : subregs_inclusive(BasePtr))
577       Reserved.set(SubReg);
578   }
579 
580   // Mark the segment registers as reserved.
581   Reserved.set(X86::CS);
582   Reserved.set(X86::SS);
583   Reserved.set(X86::DS);
584   Reserved.set(X86::ES);
585   Reserved.set(X86::FS);
586   Reserved.set(X86::GS);
587 
588   // Mark the floating point stack registers as reserved.
589   for (unsigned n = 0; n != 8; ++n)
590     Reserved.set(X86::ST0 + n);
591 
592   // Reserve the registers that only exist in 64-bit mode.
593   if (!Is64Bit) {
594     // These 8-bit registers are part of the x86-64 extension even though their
595     // super-registers are old 32-bits.
596     Reserved.set(X86::SIL);
597     Reserved.set(X86::DIL);
598     Reserved.set(X86::BPL);
599     Reserved.set(X86::SPL);
600     Reserved.set(X86::SIH);
601     Reserved.set(X86::DIH);
602     Reserved.set(X86::BPH);
603     Reserved.set(X86::SPH);
604 
605     for (unsigned n = 0; n != 8; ++n) {
606       // R8, R9, ...
607       for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI)
608         Reserved.set(*AI);
609 
610       // XMM8, XMM9, ...
611       for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI)
612         Reserved.set(*AI);
613     }
614   }
615   if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
616     for (unsigned n = 0; n != 16; ++n) {
617       for (MCRegAliasIterator AI(X86::XMM16 + n, this, true); AI.isValid();
618            ++AI)
619         Reserved.set(*AI);
620     }
621   }
622 
623   // Reserve the extended general purpose registers.
624   if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasEGPR())
625     Reserved.set(X86::R16, X86::R31WH + 1);
626 
627   if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
628     for (MCRegAliasIterator AI(X86::R14, this, true); AI.isValid(); ++AI)
629       Reserved.set(*AI);
630     for (MCRegAliasIterator AI(X86::R15, this, true); AI.isValid(); ++AI)
631       Reserved.set(*AI);
632   }
633 
634   assert(checkAllSuperRegsMarked(Reserved,
635                                  {X86::SIL, X86::DIL, X86::BPL, X86::SPL,
636                                   X86::SIH, X86::DIH, X86::BPH, X86::SPH}));
637   return Reserved;
638 }
639 
getNumSupportedRegs(const MachineFunction & MF) const640 unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const {
641   // All existing Intel CPUs that support AMX support AVX512 and all existing
642   // Intel CPUs that support APX support AMX. AVX512 implies AVX.
643   //
644   // We enumerate the registers in X86GenRegisterInfo.inc in this order:
645   //
646   // Registers before AVX512,
647   // AVX512 registers (X/YMM16-31, ZMM0-31, K registers)
648   // AMX registers (TMM)
649   // APX registers (R16-R31)
650   //
651   // and try to return the minimum number of registers supported by the target.
652   static_assert((X86::R15WH + 1 == X86::YMM0) && (X86::YMM15 + 1 == X86::K0) &&
653                     (X86::K6_K7 + 1 == X86::TMMCFG) &&
654                     (X86::TMM7 + 1 == X86::R16) &&
655                     (X86::R31WH + 1 == X86::NUM_TARGET_REGS),
656                 "Register number may be incorrect");
657 
658   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
659   if (ST.hasEGPR())
660     return X86::NUM_TARGET_REGS;
661   if (ST.hasAMXTILE())
662     return X86::TMM7 + 1;
663   if (ST.hasAVX512())
664     return X86::K6_K7 + 1;
665   if (ST.hasAVX())
666     return X86::YMM15 + 1;
667   return X86::R15WH + 1;
668 }
669 
isArgumentRegister(const MachineFunction & MF,MCRegister Reg) const670 bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF,
671                                          MCRegister Reg) const {
672   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
673   const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
674   auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) {
675     return TRI.isSuperOrSubRegisterEq(RegA, RegB);
676   };
677 
678   if (!ST.is64Bit())
679     return llvm::any_of(
680                SmallVector<MCRegister>{X86::EAX, X86::ECX, X86::EDX},
681                [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) ||
682            (ST.hasMMX() && X86::VR64RegClass.contains(Reg));
683 
684   CallingConv::ID CC = MF.getFunction().getCallingConv();
685 
686   if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg))
687     return true;
688 
689   if (llvm::any_of(
690           SmallVector<MCRegister>{X86::RDX, X86::RCX, X86::R8, X86::R9},
691           [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
692     return true;
693 
694   if (CC != CallingConv::Win64 &&
695       llvm::any_of(SmallVector<MCRegister>{X86::RDI, X86::RSI},
696                    [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
697     return true;
698 
699   if (ST.hasSSE1() &&
700       llvm::any_of(SmallVector<MCRegister>{X86::XMM0, X86::XMM1, X86::XMM2,
701                                            X86::XMM3, X86::XMM4, X86::XMM5,
702                                            X86::XMM6, X86::XMM7},
703                    [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
704     return true;
705 
706   return X86GenRegisterInfo::isArgumentRegister(MF, Reg);
707 }
708 
isFixedRegister(const MachineFunction & MF,MCRegister PhysReg) const709 bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF,
710                                       MCRegister PhysReg) const {
711   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
712   const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
713 
714   // Stack pointer.
715   if (TRI.isSuperOrSubRegisterEq(X86::RSP, PhysReg))
716     return true;
717 
718   // Don't use the frame pointer if it's being used.
719   const X86FrameLowering &TFI = *getFrameLowering(MF);
720   if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(X86::RBP, PhysReg))
721     return true;
722 
723   return X86GenRegisterInfo::isFixedRegister(MF, PhysReg);
724 }
725 
isTileRegisterClass(const TargetRegisterClass * RC) const726 bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const {
727   return RC->getID() == X86::TILERegClassID;
728 }
729 
adjustStackMapLiveOutMask(uint32_t * Mask) const730 void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
731   // Check if the EFLAGS register is marked as live-out. This shouldn't happen,
732   // because the calling convention defines the EFLAGS register as NOT
733   // preserved.
734   //
735   // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding
736   // an assert to track this and clear the register afterwards to avoid
737   // unnecessary crashes during release builds.
738   assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) &&
739          "EFLAGS are not live-out from a patchpoint.");
740 
741   // Also clean other registers that don't need preserving (IP).
742   for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP})
743     Mask[Reg / 32] &= ~(1U << (Reg % 32));
744 }
745 
746 //===----------------------------------------------------------------------===//
747 // Stack Frame Processing methods
748 //===----------------------------------------------------------------------===//
749 
CantUseSP(const MachineFrameInfo & MFI)750 static bool CantUseSP(const MachineFrameInfo &MFI) {
751   return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment();
752 }
753 
hasBasePointer(const MachineFunction & MF) const754 bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
755   const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
756   // We have a virtual register to reference argument, and don't need base
757   // pointer.
758   if (X86FI->getStackPtrSaveMI() != nullptr)
759     return false;
760 
761   if (X86FI->hasPreallocatedCall())
762     return true;
763 
764   const MachineFrameInfo &MFI = MF.getFrameInfo();
765 
766   if (!EnableBasePointer)
767     return false;
768 
769   // When we need stack realignment, we can't address the stack from the frame
770   // pointer.  When we have dynamic allocas or stack-adjusting inline asm, we
771   // can't address variables from the stack pointer.  MS inline asm can
772   // reference locals while also adjusting the stack pointer.  When we can't
773   // use both the SP and the FP, we need a separate base pointer register.
774   bool CantUseFP = hasStackRealignment(MF);
775   return CantUseFP && CantUseSP(MFI);
776 }
777 
canRealignStack(const MachineFunction & MF) const778 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
779   if (!TargetRegisterInfo::canRealignStack(MF))
780     return false;
781 
782   const MachineFrameInfo &MFI = MF.getFrameInfo();
783   const MachineRegisterInfo *MRI = &MF.getRegInfo();
784 
785   // Stack realignment requires a frame pointer.  If we already started
786   // register allocation with frame pointer elimination, it is too late now.
787   if (!MRI->canReserveReg(FramePtr))
788     return false;
789 
790   // If a base pointer is necessary.  Check that it isn't too late to reserve
791   // it.
792   if (CantUseSP(MFI))
793     return MRI->canReserveReg(BasePtr);
794   return true;
795 }
796 
shouldRealignStack(const MachineFunction & MF) const797 bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
798   if (TargetRegisterInfo::shouldRealignStack(MF))
799     return true;
800 
801   return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
802 }
803 
804 // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
805 // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
806 // TODO: In this case we should be really trying first to entirely eliminate
807 // this instruction which is a plain copy.
tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II)808 static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
809   MachineInstr &MI = *II;
810   unsigned Opc = II->getOpcode();
811   // Check if this is a LEA of the form 'lea (%esp), %ebx'
812   if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) ||
813       MI.getOperand(2).getImm() != 1 ||
814       MI.getOperand(3).getReg() != X86::NoRegister ||
815       MI.getOperand(4).getImm() != 0 ||
816       MI.getOperand(5).getReg() != X86::NoRegister)
817     return false;
818   Register BasePtr = MI.getOperand(1).getReg();
819   // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will
820   // be replaced with a 32-bit operand MOV which will zero extend the upper
821   // 32-bits of the super register.
822   if (Opc == X86::LEA64_32r)
823     BasePtr = getX86SubSuperRegister(BasePtr, 32);
824   Register NewDestReg = MI.getOperand(0).getReg();
825   const X86InstrInfo *TII =
826       MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo();
827   TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr,
828                    MI.getOperand(1).isKill());
829   MI.eraseFromParent();
830   return true;
831 }
832 
isFuncletReturnInstr(MachineInstr & MI)833 static bool isFuncletReturnInstr(MachineInstr &MI) {
834   switch (MI.getOpcode()) {
835   case X86::CATCHRET:
836   case X86::CLEANUPRET:
837     return true;
838   default:
839     return false;
840   }
841   llvm_unreachable("impossible");
842 }
843 
eliminateFrameIndex(MachineBasicBlock::iterator II,unsigned FIOperandNum,Register BaseReg,int FIOffset) const844 void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
845                                           unsigned FIOperandNum,
846                                           Register BaseReg,
847                                           int FIOffset) const {
848   MachineInstr &MI = *II;
849   unsigned Opc = MI.getOpcode();
850   if (Opc == TargetOpcode::LOCAL_ESCAPE) {
851     MachineOperand &FI = MI.getOperand(FIOperandNum);
852     FI.ChangeToImmediate(FIOffset);
853     return;
854   }
855 
856   MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
857 
858   // The frame index format for stackmaps and patchpoints is different from the
859   // X86 format. It only has a FI and an offset.
860   if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
861     assert(BasePtr == FramePtr && "Expected the FP as base register");
862     int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
863     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
864     return;
865   }
866 
867   if (MI.getOperand(FIOperandNum + 3).isImm()) {
868     // Offset is a 32-bit integer.
869     int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
870     int Offset = FIOffset + Imm;
871     assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
872            "Requesting 64-bit offset in 32-bit immediate!");
873     if (Offset != 0)
874       MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
875   } else {
876     // Offset is symbolic. This is extremely rare.
877     uint64_t Offset =
878         FIOffset + (uint64_t)MI.getOperand(FIOperandNum + 3).getOffset();
879     MI.getOperand(FIOperandNum + 3).setOffset(Offset);
880   }
881 }
882 
883 bool
eliminateFrameIndex(MachineBasicBlock::iterator II,int SPAdj,unsigned FIOperandNum,RegScavenger * RS) const884 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
885                                      int SPAdj, unsigned FIOperandNum,
886                                      RegScavenger *RS) const {
887   MachineInstr &MI = *II;
888   MachineBasicBlock &MBB = *MI.getParent();
889   MachineFunction &MF = *MBB.getParent();
890   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
891   bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false
892                                                : isFuncletReturnInstr(*MBBI);
893   const X86FrameLowering *TFI = getFrameLowering(MF);
894   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
895 
896   // Determine base register and offset.
897   int FIOffset;
898   Register BasePtr;
899   if (MI.isReturn()) {
900     assert((!hasStackRealignment(MF) ||
901             MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) &&
902            "Return instruction can only reference SP relative frame objects");
903     FIOffset =
904         TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0).getFixed();
905   } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) {
906     FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr);
907   } else {
908     FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed();
909   }
910 
911   // LOCAL_ESCAPE uses a single offset, with no register. It only works in the
912   // simple FP case, and doesn't work with stack realignment. On 32-bit, the
913   // offset is from the traditional base pointer location.  On 64-bit, the
914   // offset is from the SP at the end of the prologue, not the FP location. This
915   // matches the behavior of llvm.frameaddress.
916   unsigned Opc = MI.getOpcode();
917   if (Opc == TargetOpcode::LOCAL_ESCAPE) {
918     MachineOperand &FI = MI.getOperand(FIOperandNum);
919     FI.ChangeToImmediate(FIOffset);
920     return false;
921   }
922 
923   // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
924   // register as source operand, semantic is the same and destination is
925   // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
926   // Don't change BasePtr since it is used later for stack adjustment.
927   Register MachineBasePtr = BasePtr;
928   if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr))
929     MachineBasePtr = getX86SubSuperRegister(BasePtr, 64);
930 
931   // This must be part of a four operand memory reference.  Replace the
932   // FrameIndex with base register.  Add an offset to the offset.
933   MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false);
934 
935   if (BasePtr == StackPtr)
936     FIOffset += SPAdj;
937 
938   // The frame index format for stackmaps and patchpoints is different from the
939   // X86 format. It only has a FI and an offset.
940   if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
941     assert(BasePtr == FramePtr && "Expected the FP as base register");
942     int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
943     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
944     return false;
945   }
946 
947   if (MI.getOperand(FIOperandNum+3).isImm()) {
948     // Offset is a 32-bit integer.
949     int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
950     int Offset = FIOffset + Imm;
951     assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
952            "Requesting 64-bit offset in 32-bit immediate!");
953     if (Offset != 0 || !tryOptimizeLEAtoMOV(II))
954       MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
955   } else {
956     // Offset is symbolic. This is extremely rare.
957     uint64_t Offset = FIOffset +
958       (uint64_t)MI.getOperand(FIOperandNum+3).getOffset();
959     MI.getOperand(FIOperandNum + 3).setOffset(Offset);
960   }
961   return false;
962 }
963 
findDeadCallerSavedReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI) const964 unsigned X86RegisterInfo::findDeadCallerSavedReg(
965     MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const {
966   const MachineFunction *MF = MBB.getParent();
967   if (MF->callsEHReturn())
968     return 0;
969 
970   const TargetRegisterClass &AvailableRegs = *getGPRsForTailCall(*MF);
971 
972   if (MBBI == MBB.end())
973     return 0;
974 
975   switch (MBBI->getOpcode()) {
976   default:
977     return 0;
978   case TargetOpcode::PATCHABLE_RET:
979   case X86::RET:
980   case X86::RET32:
981   case X86::RET64:
982   case X86::RETI32:
983   case X86::RETI64:
984   case X86::TCRETURNdi:
985   case X86::TCRETURNri:
986   case X86::TCRETURNmi:
987   case X86::TCRETURNdi64:
988   case X86::TCRETURNri64:
989   case X86::TCRETURNmi64:
990   case X86::EH_RETURN:
991   case X86::EH_RETURN64: {
992     SmallSet<uint16_t, 8> Uses;
993     for (MachineOperand &MO : MBBI->operands()) {
994       if (!MO.isReg() || MO.isDef())
995         continue;
996       Register Reg = MO.getReg();
997       if (!Reg)
998         continue;
999       for (MCRegAliasIterator AI(Reg, this, true); AI.isValid(); ++AI)
1000         Uses.insert(*AI);
1001     }
1002 
1003     for (auto CS : AvailableRegs)
1004       if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP && CS != X86::ESP)
1005         return CS;
1006   }
1007   }
1008 
1009   return 0;
1010 }
1011 
getFrameRegister(const MachineFunction & MF) const1012 Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
1013   const X86FrameLowering *TFI = getFrameLowering(MF);
1014   return TFI->hasFP(MF) ? FramePtr : StackPtr;
1015 }
1016 
1017 unsigned
getPtrSizedFrameRegister(const MachineFunction & MF) const1018 X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
1019   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
1020   Register FrameReg = getFrameRegister(MF);
1021   if (Subtarget.isTarget64BitILP32())
1022     FrameReg = getX86SubSuperRegister(FrameReg, 32);
1023   return FrameReg;
1024 }
1025 
1026 unsigned
getPtrSizedStackRegister(const MachineFunction & MF) const1027 X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const {
1028   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
1029   Register StackReg = getStackRegister();
1030   if (Subtarget.isTarget64BitILP32())
1031     StackReg = getX86SubSuperRegister(StackReg, 32);
1032   return StackReg;
1033 }
1034 
getTileShape(Register VirtReg,VirtRegMap * VRM,const MachineRegisterInfo * MRI)1035 static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM,
1036                            const MachineRegisterInfo *MRI) {
1037   if (VRM->hasShape(VirtReg))
1038     return VRM->getShape(VirtReg);
1039 
1040   const MachineOperand &Def = *MRI->def_begin(VirtReg);
1041   MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent());
1042   unsigned OpCode = MI->getOpcode();
1043   switch (OpCode) {
1044   default:
1045     llvm_unreachable("Unexpected machine instruction on tile register!");
1046     break;
1047   case X86::COPY: {
1048     Register SrcReg = MI->getOperand(1).getReg();
1049     ShapeT Shape = getTileShape(SrcReg, VRM, MRI);
1050     VRM->assignVirt2Shape(VirtReg, Shape);
1051     return Shape;
1052   }
1053   // We only collect the tile shape that is defined.
1054   case X86::PTILELOADDV:
1055   case X86::PTILELOADDT1V:
1056   case X86::PTDPBSSDV:
1057   case X86::PTDPBSUDV:
1058   case X86::PTDPBUSDV:
1059   case X86::PTDPBUUDV:
1060   case X86::PTILEZEROV:
1061   case X86::PTDPBF16PSV:
1062   case X86::PTDPFP16PSV:
1063   case X86::PTCMMIMFP16PSV:
1064   case X86::PTCMMRLFP16PSV:
1065     MachineOperand &MO1 = MI->getOperand(1);
1066     MachineOperand &MO2 = MI->getOperand(2);
1067     ShapeT Shape(&MO1, &MO2, MRI);
1068     VRM->assignVirt2Shape(VirtReg, Shape);
1069     return Shape;
1070   }
1071 }
1072 
getRegAllocationHints(Register VirtReg,ArrayRef<MCPhysReg> Order,SmallVectorImpl<MCPhysReg> & Hints,const MachineFunction & MF,const VirtRegMap * VRM,const LiveRegMatrix * Matrix) const1073 bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
1074                                             ArrayRef<MCPhysReg> Order,
1075                                             SmallVectorImpl<MCPhysReg> &Hints,
1076                                             const MachineFunction &MF,
1077                                             const VirtRegMap *VRM,
1078                                             const LiveRegMatrix *Matrix) const {
1079   const MachineRegisterInfo *MRI = &MF.getRegInfo();
1080   const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
1081   bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
1082       VirtReg, Order, Hints, MF, VRM, Matrix);
1083 
1084   unsigned ID = RC.getID();
1085   if (ID != X86::TILERegClassID)
1086     return BaseImplRetVal;
1087 
1088   ShapeT VirtShape = getTileShape(VirtReg, const_cast<VirtRegMap *>(VRM), MRI);
1089   auto AddHint = [&](MCPhysReg PhysReg) {
1090     Register VReg = Matrix->getOneVReg(PhysReg);
1091     if (VReg == MCRegister::NoRegister) { // Not allocated yet
1092       Hints.push_back(PhysReg);
1093       return;
1094     }
1095     ShapeT PhysShape = getTileShape(VReg, const_cast<VirtRegMap *>(VRM), MRI);
1096     if (PhysShape == VirtShape)
1097       Hints.push_back(PhysReg);
1098   };
1099 
1100   SmallSet<MCPhysReg, 4> CopyHints;
1101   CopyHints.insert(Hints.begin(), Hints.end());
1102   Hints.clear();
1103   for (auto Hint : CopyHints) {
1104     if (RC.contains(Hint) && !MRI->isReserved(Hint))
1105       AddHint(Hint);
1106   }
1107   for (MCPhysReg PhysReg : Order) {
1108     if (!CopyHints.count(PhysReg) && RC.contains(PhysReg) &&
1109         !MRI->isReserved(PhysReg))
1110       AddHint(PhysReg);
1111   }
1112 
1113 #define DEBUG_TYPE "tile-hint"
1114   LLVM_DEBUG({
1115     dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n";
1116     for (auto Hint : Hints) {
1117       dbgs() << "tmm" << Hint << ",";
1118     }
1119     dbgs() << "\n";
1120   });
1121 #undef DEBUG_TYPE
1122 
1123   return true;
1124 }
1125