xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the X86 implementation of the TargetRegisterInfo class.
10 // This file is responsible for the frame pointer elimination optimization
11 // on X86.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86RegisterInfo.h"
16 #include "X86FrameLowering.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86Subtarget.h"
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/CodeGen/LiveRegMatrix.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/TargetFrameLowering.h"
27 #include "llvm/CodeGen/TargetInstrInfo.h"
28 #include "llvm/CodeGen/TileShapeInfo.h"
29 #include "llvm/CodeGen/VirtRegMap.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/MC/MCContext.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include "llvm/Target/TargetOptions.h"
37 
38 using namespace llvm;
39 
40 #define GET_REGINFO_TARGET_DESC
41 #include "X86GenRegisterInfo.inc"
42 
43 static cl::opt<bool>
44 EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
45           cl::desc("Enable use of a base pointer for complex stack frames"));
46 
47 static cl::opt<bool>
48     DisableRegAllocNDDHints("x86-disable-regalloc-hints-for-ndd", cl::Hidden,
49                             cl::init(false),
50                             cl::desc("Disable two address hints for register "
51                                      "allocation"));
52 
53 extern cl::opt<bool> X86EnableAPXForRelocation;
54 
X86RegisterInfo(const Triple & TT)55 X86RegisterInfo::X86RegisterInfo(const Triple &TT)
56     : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP),
57                          X86_MC::getDwarfRegFlavour(TT, false),
58                          X86_MC::getDwarfRegFlavour(TT, true),
59                          (TT.isArch64Bit() ? X86::RIP : X86::EIP)) {
60   X86_MC::initLLVMToSEHAndCVRegMapping(this);
61 
62   // Cache some information.
63   Is64Bit = TT.isArch64Bit();
64   IsWin64 = Is64Bit && TT.isOSWindows();
65   IsUEFI64 = Is64Bit && TT.isUEFI();
66 
67   // Use a callee-saved register as the base pointer.  These registers must
68   // not conflict with any ABI requirements.  For example, in 32-bit mode PIC
69   // requires GOT in the EBX register before function calls via PLT GOT pointer.
70   if (Is64Bit) {
71     SlotSize = 8;
72     // This matches the simplified 32-bit pointer code in the data layout
73     // computation.
74     // FIXME: Should use the data layout?
75     bool Use64BitReg = !TT.isX32();
76     StackPtr = Use64BitReg ? X86::RSP : X86::ESP;
77     FramePtr = Use64BitReg ? X86::RBP : X86::EBP;
78     BasePtr = Use64BitReg ? X86::RBX : X86::EBX;
79   } else {
80     SlotSize = 4;
81     StackPtr = X86::ESP;
82     FramePtr = X86::EBP;
83     BasePtr = X86::ESI;
84   }
85 }
86 
87 const TargetRegisterClass *
getSubClassWithSubReg(const TargetRegisterClass * RC,unsigned Idx) const88 X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
89                                        unsigned Idx) const {
90   // The sub_8bit sub-register index is more constrained in 32-bit mode.
91   // It behaves just like the sub_8bit_hi index.
92   if (!Is64Bit && Idx == X86::sub_8bit)
93     Idx = X86::sub_8bit_hi;
94 
95   // Forward to TableGen's default version.
96   return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
97 }
98 
99 const TargetRegisterClass *
getMatchingSuperRegClass(const TargetRegisterClass * A,const TargetRegisterClass * B,unsigned SubIdx) const100 X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
101                                           const TargetRegisterClass *B,
102                                           unsigned SubIdx) const {
103   // The sub_8bit sub-register index is more constrained in 32-bit mode.
104   if (!Is64Bit && SubIdx == X86::sub_8bit) {
105     A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi);
106     if (!A)
107       return nullptr;
108   }
109   return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx);
110 }
111 
112 const TargetRegisterClass *
getLargestLegalSuperClass(const TargetRegisterClass * RC,const MachineFunction & MF) const113 X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
114                                            const MachineFunction &MF) const {
115   // Don't allow super-classes of GR8_NOREX.  This class is only used after
116   // extracting sub_8bit_hi sub-registers.  The H sub-registers cannot be copied
117   // to the full GR8 register class in 64-bit mode, so we cannot allow the
118   // reigster class inflation.
119   //
120   // The GR8_NOREX class is always used in a way that won't be constrained to a
121   // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
122   // full GR8 class.
123   if (RC == &X86::GR8_NOREXRegClass)
124     return RC;
125 
126   // Keep using non-rex2 register class when APX feature (EGPR/NDD/NF) is not
127   // enabled for relocation.
128   if (!X86EnableAPXForRelocation && isNonRex2RegClass(RC))
129     return RC;
130 
131   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
132 
133   const TargetRegisterClass *Super = RC;
134   auto I = RC->superclasses().begin();
135   auto E = RC->superclasses().end();
136   do {
137     switch (Super->getID()) {
138     case X86::FR32RegClassID:
139     case X86::FR64RegClassID:
140       // If AVX-512 isn't supported we should only inflate to these classes.
141       if (!Subtarget.hasAVX512() &&
142           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
143         return Super;
144       break;
145     case X86::VR128RegClassID:
146     case X86::VR256RegClassID:
147       // If VLX isn't supported we should only inflate to these classes.
148       if (!Subtarget.hasVLX() &&
149           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
150         return Super;
151       break;
152     case X86::VR128XRegClassID:
153     case X86::VR256XRegClassID:
154       // If VLX isn't support we shouldn't inflate to these classes.
155       if (Subtarget.hasVLX() &&
156           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
157         return Super;
158       break;
159     case X86::FR32XRegClassID:
160     case X86::FR64XRegClassID:
161       // If AVX-512 isn't support we shouldn't inflate to these classes.
162       if (Subtarget.hasAVX512() &&
163           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
164         return Super;
165       break;
166     case X86::GR8RegClassID:
167     case X86::GR16RegClassID:
168     case X86::GR32RegClassID:
169     case X86::GR64RegClassID:
170     case X86::GR8_NOREX2RegClassID:
171     case X86::GR16_NOREX2RegClassID:
172     case X86::GR32_NOREX2RegClassID:
173     case X86::GR64_NOREX2RegClassID:
174     case X86::RFP32RegClassID:
175     case X86::RFP64RegClassID:
176     case X86::RFP80RegClassID:
177     case X86::VR512_0_15RegClassID:
178     case X86::VR512RegClassID:
179       // Don't return a super-class that would shrink the spill size.
180       // That can happen with the vector and float classes.
181       if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
182         return Super;
183     }
184     if (I != E) {
185       Super = getRegClass(*I);
186       ++I;
187     } else {
188       Super = nullptr;
189     }
190   } while (Super);
191   return RC;
192 }
193 
194 const TargetRegisterClass *
getPointerRegClass(const MachineFunction & MF,unsigned Kind) const195 X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
196                                     unsigned Kind) const {
197   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
198   switch (Kind) {
199   default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
200   case 0: // Normal GPRs.
201     if (Subtarget.isTarget64BitLP64())
202       return &X86::GR64RegClass;
203     // If the target is 64bit but we have been told to use 32bit addresses,
204     // we can still use 64-bit register as long as we know the high bits
205     // are zeros.
206     // Reflect that in the returned register class.
207     if (Is64Bit) {
208       // When the target also allows 64-bit frame pointer and we do have a
209       // frame, this is fine to use it for the address accesses as well.
210       const X86FrameLowering *TFI = getFrameLowering(MF);
211       return TFI->hasFP(MF) && TFI->Uses64BitFramePtr
212                  ? &X86::LOW32_ADDR_ACCESS_RBPRegClass
213                  : &X86::LOW32_ADDR_ACCESSRegClass;
214     }
215     return &X86::GR32RegClass;
216   case 1: // Normal GPRs except the stack pointer (for encoding reasons).
217     if (Subtarget.isTarget64BitLP64())
218       return &X86::GR64_NOSPRegClass;
219     // NOSP does not contain RIP, so no special case here.
220     return &X86::GR32_NOSPRegClass;
221   case 2: // NOREX GPRs.
222     if (Subtarget.isTarget64BitLP64())
223       return &X86::GR64_NOREXRegClass;
224     return &X86::GR32_NOREXRegClass;
225   case 3: // NOREX GPRs except the stack pointer (for encoding reasons).
226     if (Subtarget.isTarget64BitLP64())
227       return &X86::GR64_NOREX_NOSPRegClass;
228     // NOSP does not contain RIP, so no special case here.
229     return &X86::GR32_NOREX_NOSPRegClass;
230   case 4: // Available for tailcall (not callee-saved GPRs).
231     return getGPRsForTailCall(MF);
232   }
233 }
234 
235 const TargetRegisterClass *
getGPRsForTailCall(const MachineFunction & MF) const236 X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
237   const Function &F = MF.getFunction();
238   if (IsWin64 || IsUEFI64 || (F.getCallingConv() == CallingConv::Win64))
239     return &X86::GR64_TCW64RegClass;
240   else if (Is64Bit)
241     return &X86::GR64_TCRegClass;
242 
243   bool hasHipeCC = (F.getCallingConv() == CallingConv::HiPE);
244   if (hasHipeCC)
245     return &X86::GR32RegClass;
246   return &X86::GR32_TCRegClass;
247 }
248 
249 const TargetRegisterClass *
getCrossCopyRegClass(const TargetRegisterClass * RC) const250 X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
251   if (RC == &X86::CCRRegClass) {
252     if (Is64Bit)
253       return &X86::GR64RegClass;
254     else
255       return &X86::GR32RegClass;
256   }
257   return RC;
258 }
259 
260 unsigned
getRegPressureLimit(const TargetRegisterClass * RC,MachineFunction & MF) const261 X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
262                                      MachineFunction &MF) const {
263   const X86FrameLowering *TFI = getFrameLowering(MF);
264 
265   unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
266   switch (RC->getID()) {
267   default:
268     return 0;
269   case X86::GR32RegClassID:
270     return 4 - FPDiff;
271   case X86::GR64RegClassID:
272     return 12 - FPDiff;
273   case X86::VR128RegClassID:
274     return Is64Bit ? 10 : 4;
275   case X86::VR64RegClassID:
276     return 4;
277   }
278 }
279 
280 const MCPhysReg *
getCalleeSavedRegs(const MachineFunction * MF) const281 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
282   assert(MF && "MachineFunction required");
283 
284   const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
285   const Function &F = MF->getFunction();
286   bool HasSSE = Subtarget.hasSSE1();
287   bool HasAVX = Subtarget.hasAVX();
288   bool HasAVX512 = Subtarget.hasAVX512();
289   bool CallsEHReturn = MF->callsEHReturn();
290 
291   CallingConv::ID CC = F.getCallingConv();
292 
293   // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling
294   // convention because it has the CSR list.
295   if (MF->getFunction().hasFnAttribute("no_caller_saved_registers"))
296     CC = CallingConv::X86_INTR;
297 
298   // If atribute specified, override the CSRs normally specified by the
299   // calling convention and use the empty set instead.
300   if (MF->getFunction().hasFnAttribute("no_callee_saved_registers"))
301     return CSR_NoRegs_SaveList;
302 
303   switch (CC) {
304   case CallingConv::GHC:
305   case CallingConv::HiPE:
306     return CSR_NoRegs_SaveList;
307   case CallingConv::AnyReg:
308     if (HasAVX)
309       return CSR_64_AllRegs_AVX_SaveList;
310     return CSR_64_AllRegs_SaveList;
311   case CallingConv::PreserveMost:
312     return IsWin64 ? CSR_Win64_RT_MostRegs_SaveList
313                    : CSR_64_RT_MostRegs_SaveList;
314   case CallingConv::PreserveAll:
315     if (HasAVX)
316       return CSR_64_RT_AllRegs_AVX_SaveList;
317     return CSR_64_RT_AllRegs_SaveList;
318   case CallingConv::PreserveNone:
319     return CSR_64_NoneRegs_SaveList;
320   case CallingConv::CXX_FAST_TLS:
321     if (Is64Bit)
322       return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
323              CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList;
324     break;
325   case CallingConv::Intel_OCL_BI: {
326     if (HasAVX512 && IsWin64)
327       return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
328     if (HasAVX512 && Is64Bit)
329       return CSR_64_Intel_OCL_BI_AVX512_SaveList;
330     if (HasAVX && IsWin64)
331       return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
332     if (HasAVX && Is64Bit)
333       return CSR_64_Intel_OCL_BI_AVX_SaveList;
334     if (!HasAVX && !IsWin64 && Is64Bit)
335       return CSR_64_Intel_OCL_BI_SaveList;
336     break;
337   }
338   case CallingConv::X86_RegCall:
339     if (Is64Bit) {
340       if (IsWin64) {
341         return (HasSSE ? CSR_Win64_RegCall_SaveList :
342                          CSR_Win64_RegCall_NoSSE_SaveList);
343       } else {
344         return (HasSSE ? CSR_SysV64_RegCall_SaveList :
345                          CSR_SysV64_RegCall_NoSSE_SaveList);
346       }
347     } else {
348       return (HasSSE ? CSR_32_RegCall_SaveList :
349                        CSR_32_RegCall_NoSSE_SaveList);
350     }
351   case CallingConv::CFGuard_Check:
352     assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
353     return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList
354                    : CSR_Win32_CFGuard_Check_NoSSE_SaveList);
355   case CallingConv::Cold:
356     if (Is64Bit)
357       return CSR_64_MostRegs_SaveList;
358     break;
359   case CallingConv::Win64:
360     if (!HasSSE)
361       return CSR_Win64_NoSSE_SaveList;
362     return CSR_Win64_SaveList;
363   case CallingConv::SwiftTail:
364     if (!Is64Bit)
365       return CSR_32_SaveList;
366     return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList;
367   case CallingConv::X86_64_SysV:
368     if (CallsEHReturn)
369       return CSR_64EHRet_SaveList;
370     return CSR_64_SaveList;
371   case CallingConv::X86_INTR:
372     if (Is64Bit) {
373       if (HasAVX512)
374         return CSR_64_AllRegs_AVX512_SaveList;
375       if (HasAVX)
376         return CSR_64_AllRegs_AVX_SaveList;
377       if (HasSSE)
378         return CSR_64_AllRegs_SaveList;
379       return CSR_64_AllRegs_NoSSE_SaveList;
380     } else {
381       if (HasAVX512)
382         return CSR_32_AllRegs_AVX512_SaveList;
383       if (HasAVX)
384         return CSR_32_AllRegs_AVX_SaveList;
385       if (HasSSE)
386         return CSR_32_AllRegs_SSE_SaveList;
387       return CSR_32_AllRegs_SaveList;
388     }
389   default:
390     break;
391   }
392 
393   if (Is64Bit) {
394     bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
395                      F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
396     if (IsSwiftCC)
397       return IsWin64 ? CSR_Win64_SwiftError_SaveList
398                      : CSR_64_SwiftError_SaveList;
399 
400     if (IsWin64 || IsUEFI64)
401       return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList;
402     if (CallsEHReturn)
403       return CSR_64EHRet_SaveList;
404     return CSR_64_SaveList;
405   }
406 
407   return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList;
408 }
409 
410 const MCPhysReg *
getIPRACSRegs(const MachineFunction * MF) const411 X86RegisterInfo::getIPRACSRegs(const MachineFunction *MF) const {
412   return Is64Bit ? CSR_IPRA_64_SaveList : CSR_IPRA_32_SaveList;
413 }
414 
getCalleeSavedRegsViaCopy(const MachineFunction * MF) const415 const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
416     const MachineFunction *MF) const {
417   assert(MF && "Invalid MachineFunction pointer.");
418   if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
419       MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())
420     return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;
421   return nullptr;
422 }
423 
424 const uint32_t *
getCallPreservedMask(const MachineFunction & MF,CallingConv::ID CC) const425 X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
426                                       CallingConv::ID CC) const {
427   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
428   bool HasSSE = Subtarget.hasSSE1();
429   bool HasAVX = Subtarget.hasAVX();
430   bool HasAVX512 = Subtarget.hasAVX512();
431 
432   switch (CC) {
433   case CallingConv::GHC:
434   case CallingConv::HiPE:
435     return CSR_NoRegs_RegMask;
436   case CallingConv::AnyReg:
437     if (HasAVX)
438       return CSR_64_AllRegs_AVX_RegMask;
439     return CSR_64_AllRegs_RegMask;
440   case CallingConv::PreserveMost:
441     return IsWin64 ? CSR_Win64_RT_MostRegs_RegMask : CSR_64_RT_MostRegs_RegMask;
442   case CallingConv::PreserveAll:
443     if (HasAVX)
444       return CSR_64_RT_AllRegs_AVX_RegMask;
445     return CSR_64_RT_AllRegs_RegMask;
446   case CallingConv::PreserveNone:
447     return CSR_64_NoneRegs_RegMask;
448   case CallingConv::CXX_FAST_TLS:
449     if (Is64Bit)
450       return CSR_64_TLS_Darwin_RegMask;
451     break;
452   case CallingConv::Intel_OCL_BI: {
453     if (HasAVX512 && IsWin64)
454       return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
455     if (HasAVX512 && Is64Bit)
456       return CSR_64_Intel_OCL_BI_AVX512_RegMask;
457     if (HasAVX && IsWin64)
458       return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
459     if (HasAVX && Is64Bit)
460       return CSR_64_Intel_OCL_BI_AVX_RegMask;
461     if (!HasAVX && !IsWin64 && Is64Bit)
462       return CSR_64_Intel_OCL_BI_RegMask;
463     break;
464   }
465   case CallingConv::X86_RegCall:
466     if (Is64Bit) {
467       if (IsWin64) {
468         return (HasSSE ? CSR_Win64_RegCall_RegMask :
469                          CSR_Win64_RegCall_NoSSE_RegMask);
470       } else {
471         return (HasSSE ? CSR_SysV64_RegCall_RegMask :
472                          CSR_SysV64_RegCall_NoSSE_RegMask);
473       }
474     } else {
475       return (HasSSE ? CSR_32_RegCall_RegMask :
476                        CSR_32_RegCall_NoSSE_RegMask);
477     }
478   case CallingConv::CFGuard_Check:
479     assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
480     return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask
481                    : CSR_Win32_CFGuard_Check_NoSSE_RegMask);
482   case CallingConv::Cold:
483     if (Is64Bit)
484       return CSR_64_MostRegs_RegMask;
485     break;
486   case CallingConv::Win64:
487     return CSR_Win64_RegMask;
488   case CallingConv::SwiftTail:
489     if (!Is64Bit)
490       return CSR_32_RegMask;
491     return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask;
492   case CallingConv::X86_64_SysV:
493     return CSR_64_RegMask;
494   case CallingConv::X86_INTR:
495     if (Is64Bit) {
496       if (HasAVX512)
497         return CSR_64_AllRegs_AVX512_RegMask;
498       if (HasAVX)
499         return CSR_64_AllRegs_AVX_RegMask;
500       if (HasSSE)
501         return CSR_64_AllRegs_RegMask;
502       return CSR_64_AllRegs_NoSSE_RegMask;
503     } else {
504       if (HasAVX512)
505         return CSR_32_AllRegs_AVX512_RegMask;
506       if (HasAVX)
507         return CSR_32_AllRegs_AVX_RegMask;
508       if (HasSSE)
509         return CSR_32_AllRegs_SSE_RegMask;
510       return CSR_32_AllRegs_RegMask;
511     }
512   default:
513     break;
514   }
515 
516   // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
517   // callsEHReturn().
518   if (Is64Bit) {
519     const Function &F = MF.getFunction();
520     bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
521                      F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
522     if (IsSwiftCC)
523       return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask;
524 
525     return (IsWin64 || IsUEFI64) ? CSR_Win64_RegMask : CSR_64_RegMask;
526   }
527 
528   return CSR_32_RegMask;
529 }
530 
531 const uint32_t*
getNoPreservedMask() const532 X86RegisterInfo::getNoPreservedMask() const {
533   return CSR_NoRegs_RegMask;
534 }
535 
getDarwinTLSCallPreservedMask() const536 const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const {
537   return CSR_64_TLS_Darwin_RegMask;
538 }
539 
getReservedRegs(const MachineFunction & MF) const540 BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
541   BitVector Reserved(getNumRegs());
542   const X86FrameLowering *TFI = getFrameLowering(MF);
543 
544   // Set the floating point control register as reserved.
545   Reserved.set(X86::FPCW);
546 
547   // Set the floating point status register as reserved.
548   Reserved.set(X86::FPSW);
549 
550   // Set the SIMD floating point control register as reserved.
551   Reserved.set(X86::MXCSR);
552 
553   // Set the stack-pointer register and its aliases as reserved.
554   for (const MCPhysReg &SubReg : subregs_inclusive(X86::RSP))
555     Reserved.set(SubReg);
556 
557   // Set the Shadow Stack Pointer as reserved.
558   Reserved.set(X86::SSP);
559 
560   // Set the instruction pointer register and its aliases as reserved.
561   for (const MCPhysReg &SubReg : subregs_inclusive(X86::RIP))
562     Reserved.set(SubReg);
563 
564   // Set the frame-pointer register and its aliases as reserved if needed.
565   if (TFI->hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF)) {
566     if (MF.getInfo<X86MachineFunctionInfo>()->getFPClobberedByInvoke())
567       MF.getContext().reportError(
568           SMLoc(),
569           "Frame pointer clobbered by function invoke is not supported.");
570 
571     for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP))
572       Reserved.set(SubReg);
573   }
574 
575   // Set the base-pointer register and its aliases as reserved if needed.
576   if (hasBasePointer(MF)) {
577     if (MF.getInfo<X86MachineFunctionInfo>()->getBPClobberedByInvoke())
578       MF.getContext().reportError(SMLoc(),
579                                   "Stack realignment in presence of dynamic "
580                                   "allocas is not supported with "
581                                   "this calling convention.");
582 
583     Register BasePtr = getX86SubSuperRegister(getBaseRegister(), 64);
584     for (const MCPhysReg &SubReg : subregs_inclusive(BasePtr))
585       Reserved.set(SubReg);
586   }
587 
588   // Mark the segment registers as reserved.
589   Reserved.set(X86::CS);
590   Reserved.set(X86::SS);
591   Reserved.set(X86::DS);
592   Reserved.set(X86::ES);
593   Reserved.set(X86::FS);
594   Reserved.set(X86::GS);
595 
596   // Mark the floating point stack registers as reserved.
597   for (unsigned n = 0; n != 8; ++n)
598     Reserved.set(X86::ST0 + n);
599 
600   // Reserve the registers that only exist in 64-bit mode.
601   if (!Is64Bit) {
602     // These 8-bit registers are part of the x86-64 extension even though their
603     // super-registers are old 32-bits.
604     Reserved.set(X86::SIL);
605     Reserved.set(X86::DIL);
606     Reserved.set(X86::BPL);
607     Reserved.set(X86::SPL);
608     Reserved.set(X86::SIH);
609     Reserved.set(X86::DIH);
610     Reserved.set(X86::BPH);
611     Reserved.set(X86::SPH);
612 
613     for (unsigned n = 0; n != 8; ++n) {
614       // R8, R9, ...
615       for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI)
616         Reserved.set(*AI);
617 
618       // XMM8, XMM9, ...
619       for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI)
620         Reserved.set(*AI);
621     }
622   }
623   if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
624     for (unsigned n = 0; n != 16; ++n) {
625       for (MCRegAliasIterator AI(X86::XMM16 + n, this, true); AI.isValid();
626            ++AI)
627         Reserved.set(*AI);
628     }
629   }
630 
631   // Reserve the extended general purpose registers.
632   if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasEGPR())
633     Reserved.set(X86::R16, X86::R31WH + 1);
634 
635   if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
636     for (MCRegAliasIterator AI(X86::R14, this, true); AI.isValid(); ++AI)
637       Reserved.set(*AI);
638     for (MCRegAliasIterator AI(X86::R15, this, true); AI.isValid(); ++AI)
639       Reserved.set(*AI);
640   }
641 
642   // Reserve low half pair registers in case they are used by RA aggressively.
643   Reserved.set(X86::TMM0_TMM1);
644   Reserved.set(X86::TMM2_TMM3);
645 
646   assert(checkAllSuperRegsMarked(Reserved,
647                                  {X86::SIL, X86::DIL, X86::BPL, X86::SPL,
648                                   X86::SIH, X86::DIH, X86::BPH, X86::SPH}));
649   return Reserved;
650 }
651 
getNumSupportedRegs(const MachineFunction & MF) const652 unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const {
653   // All existing Intel CPUs that support AMX support AVX512 and all existing
654   // Intel CPUs that support APX support AMX. AVX512 implies AVX.
655   //
656   // We enumerate the registers in X86GenRegisterInfo.inc in this order:
657   //
658   // Registers before AVX512,
659   // AVX512 registers (X/YMM16-31, ZMM0-31, K registers)
660   // AMX registers (TMM)
661   // APX registers (R16-R31)
662   //
663   // and try to return the minimum number of registers supported by the target.
664   static_assert((X86::R15WH + 1 == X86::YMM0) && (X86::YMM15 + 1 == X86::K0) &&
665                     (X86::K6_K7 + 1 == X86::TMMCFG) &&
666                     (X86::TMM6_TMM7 + 1 == X86::R16) &&
667                     (X86::R31WH + 1 == X86::NUM_TARGET_REGS),
668                 "Register number may be incorrect");
669 
670   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
671   if (ST.hasEGPR())
672     return X86::NUM_TARGET_REGS;
673   if (ST.hasAMXTILE())
674     return X86::TMM7 + 1;
675   if (ST.hasAVX512())
676     return X86::K6_K7 + 1;
677   if (ST.hasAVX())
678     return X86::YMM15 + 1;
679   return X86::R15WH + 1;
680 }
681 
isArgumentRegister(const MachineFunction & MF,MCRegister Reg) const682 bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF,
683                                          MCRegister Reg) const {
684   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
685   const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
686   auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) {
687     return TRI.isSuperOrSubRegisterEq(RegA, RegB);
688   };
689 
690   if (!ST.is64Bit())
691     return llvm::any_of(
692                SmallVector<MCRegister>{X86::EAX, X86::ECX, X86::EDX},
693                [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) ||
694            (ST.hasMMX() && X86::VR64RegClass.contains(Reg));
695 
696   CallingConv::ID CC = MF.getFunction().getCallingConv();
697 
698   if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg))
699     return true;
700 
701   if (llvm::any_of(
702           SmallVector<MCRegister>{X86::RDX, X86::RCX, X86::R8, X86::R9},
703           [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
704     return true;
705 
706   if (CC != CallingConv::Win64 &&
707       llvm::any_of(SmallVector<MCRegister>{X86::RDI, X86::RSI},
708                    [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
709     return true;
710 
711   if (ST.hasSSE1() &&
712       llvm::any_of(SmallVector<MCRegister>{X86::XMM0, X86::XMM1, X86::XMM2,
713                                            X86::XMM3, X86::XMM4, X86::XMM5,
714                                            X86::XMM6, X86::XMM7},
715                    [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
716     return true;
717 
718   return X86GenRegisterInfo::isArgumentRegister(MF, Reg);
719 }
720 
isFixedRegister(const MachineFunction & MF,MCRegister PhysReg) const721 bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF,
722                                       MCRegister PhysReg) const {
723   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
724   const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
725 
726   // Stack pointer.
727   if (TRI.isSuperOrSubRegisterEq(X86::RSP, PhysReg))
728     return true;
729 
730   // Don't use the frame pointer if it's being used.
731   const X86FrameLowering &TFI = *getFrameLowering(MF);
732   if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(X86::RBP, PhysReg))
733     return true;
734 
735   return X86GenRegisterInfo::isFixedRegister(MF, PhysReg);
736 }
737 
isTileRegisterClass(const TargetRegisterClass * RC) const738 bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const {
739   return RC->getID() == X86::TILERegClassID ||
740          RC->getID() == X86::TILEPAIRRegClassID;
741 }
742 
adjustStackMapLiveOutMask(uint32_t * Mask) const743 void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
744   // Check if the EFLAGS register is marked as live-out. This shouldn't happen,
745   // because the calling convention defines the EFLAGS register as NOT
746   // preserved.
747   //
748   // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding
749   // an assert to track this and clear the register afterwards to avoid
750   // unnecessary crashes during release builds.
751   assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) &&
752          "EFLAGS are not live-out from a patchpoint.");
753 
754   // Also clean other registers that don't need preserving (IP).
755   for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP})
756     Mask[Reg / 32] &= ~(1U << (Reg % 32));
757 }
758 
759 //===----------------------------------------------------------------------===//
760 // Stack Frame Processing methods
761 //===----------------------------------------------------------------------===//
762 
CantUseSP(const MachineFrameInfo & MFI)763 static bool CantUseSP(const MachineFrameInfo &MFI) {
764   return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment();
765 }
766 
hasBasePointer(const MachineFunction & MF) const767 bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
768   const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
769   // We have a virtual register to reference argument, and don't need base
770   // pointer.
771   if (X86FI->getStackPtrSaveMI() != nullptr)
772     return false;
773 
774   if (X86FI->hasPreallocatedCall())
775     return true;
776 
777   const MachineFrameInfo &MFI = MF.getFrameInfo();
778 
779   if (!EnableBasePointer)
780     return false;
781 
782   // When we need stack realignment, we can't address the stack from the frame
783   // pointer.  When we have dynamic allocas or stack-adjusting inline asm, we
784   // can't address variables from the stack pointer.  MS inline asm can
785   // reference locals while also adjusting the stack pointer.  When we can't
786   // use both the SP and the FP, we need a separate base pointer register.
787   bool CantUseFP = hasStackRealignment(MF);
788   return CantUseFP && CantUseSP(MFI);
789 }
790 
canRealignStack(const MachineFunction & MF) const791 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
792   if (!TargetRegisterInfo::canRealignStack(MF))
793     return false;
794 
795   const MachineFrameInfo &MFI = MF.getFrameInfo();
796   const MachineRegisterInfo *MRI = &MF.getRegInfo();
797 
798   // Stack realignment requires a frame pointer.  If we already started
799   // register allocation with frame pointer elimination, it is too late now.
800   if (!MRI->canReserveReg(FramePtr))
801     return false;
802 
803   // If a base pointer is necessary.  Check that it isn't too late to reserve
804   // it.
805   if (CantUseSP(MFI))
806     return MRI->canReserveReg(BasePtr);
807   return true;
808 }
809 
shouldRealignStack(const MachineFunction & MF) const810 bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
811   if (TargetRegisterInfo::shouldRealignStack(MF))
812     return true;
813 
814   return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
815 }
816 
817 // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
818 // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
819 // TODO: In this case we should be really trying first to entirely eliminate
820 // this instruction which is a plain copy.
tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II)821 static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
822   MachineInstr &MI = *II;
823   unsigned Opc = II->getOpcode();
824   // Check if this is a LEA of the form 'lea (%esp), %ebx'
825   if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) ||
826       MI.getOperand(2).getImm() != 1 ||
827       MI.getOperand(3).getReg() != X86::NoRegister ||
828       MI.getOperand(4).getImm() != 0 ||
829       MI.getOperand(5).getReg() != X86::NoRegister)
830     return false;
831   Register BasePtr = MI.getOperand(1).getReg();
832   // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will
833   // be replaced with a 32-bit operand MOV which will zero extend the upper
834   // 32-bits of the super register.
835   if (Opc == X86::LEA64_32r)
836     BasePtr = getX86SubSuperRegister(BasePtr, 32);
837   Register NewDestReg = MI.getOperand(0).getReg();
838   const X86InstrInfo *TII =
839       MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo();
840   TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr,
841                    MI.getOperand(1).isKill());
842   MI.eraseFromParent();
843   return true;
844 }
845 
isFuncletReturnInstr(MachineInstr & MI)846 static bool isFuncletReturnInstr(MachineInstr &MI) {
847   switch (MI.getOpcode()) {
848   case X86::CATCHRET:
849   case X86::CLEANUPRET:
850     return true;
851   default:
852     return false;
853   }
854   llvm_unreachable("impossible");
855 }
856 
eliminateFrameIndex(MachineBasicBlock::iterator II,unsigned FIOperandNum,Register BaseReg,int FIOffset) const857 void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
858                                           unsigned FIOperandNum,
859                                           Register BaseReg,
860                                           int FIOffset) const {
861   MachineInstr &MI = *II;
862   unsigned Opc = MI.getOpcode();
863   if (Opc == TargetOpcode::LOCAL_ESCAPE) {
864     MachineOperand &FI = MI.getOperand(FIOperandNum);
865     FI.ChangeToImmediate(FIOffset);
866     return;
867   }
868 
869   MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
870 
871   // The frame index format for stackmaps and patchpoints is different from the
872   // X86 format. It only has a FI and an offset.
873   if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
874     assert(BasePtr == FramePtr && "Expected the FP as base register");
875     int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
876     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
877     return;
878   }
879 
880   if (MI.getOperand(FIOperandNum + 3).isImm()) {
881     // Offset is a 32-bit integer.
882     int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
883     int Offset = FIOffset + Imm;
884     assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
885            "Requesting 64-bit offset in 32-bit immediate!");
886     if (Offset != 0)
887       MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
888   } else {
889     // Offset is symbolic. This is extremely rare.
890     uint64_t Offset =
891         FIOffset + (uint64_t)MI.getOperand(FIOperandNum + 3).getOffset();
892     MI.getOperand(FIOperandNum + 3).setOffset(Offset);
893   }
894 }
895 
896 bool
eliminateFrameIndex(MachineBasicBlock::iterator II,int SPAdj,unsigned FIOperandNum,RegScavenger * RS) const897 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
898                                      int SPAdj, unsigned FIOperandNum,
899                                      RegScavenger *RS) const {
900   MachineInstr &MI = *II;
901   MachineBasicBlock &MBB = *MI.getParent();
902   MachineFunction &MF = *MBB.getParent();
903   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
904   bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false
905                                                : isFuncletReturnInstr(*MBBI);
906   const X86FrameLowering *TFI = getFrameLowering(MF);
907   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
908 
909   // Determine base register and offset.
910   int FIOffset;
911   Register BasePtr;
912   if (MI.isReturn()) {
913     assert((!hasStackRealignment(MF) ||
914             MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) &&
915            "Return instruction can only reference SP relative frame objects");
916     FIOffset =
917         TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0).getFixed();
918   } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) {
919     FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr);
920   } else {
921     FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed();
922   }
923 
924   // LOCAL_ESCAPE uses a single offset, with no register. It only works in the
925   // simple FP case, and doesn't work with stack realignment. On 32-bit, the
926   // offset is from the traditional base pointer location.  On 64-bit, the
927   // offset is from the SP at the end of the prologue, not the FP location. This
928   // matches the behavior of llvm.frameaddress.
929   unsigned Opc = MI.getOpcode();
930   if (Opc == TargetOpcode::LOCAL_ESCAPE) {
931     MachineOperand &FI = MI.getOperand(FIOperandNum);
932     FI.ChangeToImmediate(FIOffset);
933     return false;
934   }
935 
936   // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
937   // register as source operand, semantic is the same and destination is
938   // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
939   // Don't change BasePtr since it is used later for stack adjustment.
940   Register MachineBasePtr = BasePtr;
941   if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr))
942     MachineBasePtr = getX86SubSuperRegister(BasePtr, 64);
943 
944   // This must be part of a four operand memory reference.  Replace the
945   // FrameIndex with base register.  Add an offset to the offset.
946   MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false);
947 
948   if (BasePtr == StackPtr)
949     FIOffset += SPAdj;
950 
951   // The frame index format for stackmaps and patchpoints is different from the
952   // X86 format. It only has a FI and an offset.
953   if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
954     assert(BasePtr == FramePtr && "Expected the FP as base register");
955     int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
956     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
957     return false;
958   }
959 
960   if (MI.getOperand(FIOperandNum+3).isImm()) {
961     // Offset is a 32-bit integer.
962     int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
963     int Offset = FIOffset + Imm;
964     assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
965            "Requesting 64-bit offset in 32-bit immediate!");
966     if (Offset != 0 || !tryOptimizeLEAtoMOV(II))
967       MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
968   } else {
969     // Offset is symbolic. This is extremely rare.
970     uint64_t Offset = FIOffset +
971       (uint64_t)MI.getOperand(FIOperandNum+3).getOffset();
972     MI.getOperand(FIOperandNum + 3).setOffset(Offset);
973   }
974   return false;
975 }
976 
findDeadCallerSavedReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI) const977 unsigned X86RegisterInfo::findDeadCallerSavedReg(
978     MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const {
979   const MachineFunction *MF = MBB.getParent();
980   if (MF->callsEHReturn())
981     return 0;
982 
983   const TargetRegisterClass &AvailableRegs = *getGPRsForTailCall(*MF);
984 
985   if (MBBI == MBB.end())
986     return 0;
987 
988   switch (MBBI->getOpcode()) {
989   default:
990     return 0;
991   case TargetOpcode::PATCHABLE_RET:
992   case X86::RET:
993   case X86::RET32:
994   case X86::RET64:
995   case X86::RETI32:
996   case X86::RETI64:
997   case X86::TCRETURNdi:
998   case X86::TCRETURNri:
999   case X86::TCRETURNmi:
1000   case X86::TCRETURNdi64:
1001   case X86::TCRETURNri64:
1002   case X86::TCRETURNri64_ImpCall:
1003   case X86::TCRETURNmi64:
1004   case X86::EH_RETURN:
1005   case X86::EH_RETURN64: {
1006     SmallSet<uint16_t, 8> Uses;
1007     for (MachineOperand &MO : MBBI->operands()) {
1008       if (!MO.isReg() || MO.isDef())
1009         continue;
1010       Register Reg = MO.getReg();
1011       if (!Reg)
1012         continue;
1013       for (MCRegAliasIterator AI(Reg, this, true); AI.isValid(); ++AI)
1014         Uses.insert(*AI);
1015     }
1016 
1017     for (auto CS : AvailableRegs)
1018       if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP && CS != X86::ESP)
1019         return CS;
1020   }
1021   }
1022 
1023   return 0;
1024 }
1025 
getFrameRegister(const MachineFunction & MF) const1026 Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
1027   const X86FrameLowering *TFI = getFrameLowering(MF);
1028   return TFI->hasFP(MF) ? FramePtr : StackPtr;
1029 }
1030 
1031 Register
getPtrSizedFrameRegister(const MachineFunction & MF) const1032 X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
1033   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
1034   Register FrameReg = getFrameRegister(MF);
1035   if (Subtarget.isTarget64BitILP32())
1036     FrameReg = getX86SubSuperRegister(FrameReg, 32);
1037   return FrameReg;
1038 }
1039 
1040 Register
getPtrSizedStackRegister(const MachineFunction & MF) const1041 X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const {
1042   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
1043   Register StackReg = getStackRegister();
1044   if (Subtarget.isTarget64BitILP32())
1045     StackReg = getX86SubSuperRegister(StackReg, 32);
1046   return StackReg;
1047 }
1048 
getTileShape(Register VirtReg,VirtRegMap * VRM,const MachineRegisterInfo * MRI)1049 static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM,
1050                            const MachineRegisterInfo *MRI) {
1051   if (VRM->hasShape(VirtReg))
1052     return VRM->getShape(VirtReg);
1053 
1054   const MachineOperand &Def = *MRI->def_begin(VirtReg);
1055   MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent());
1056   unsigned OpCode = MI->getOpcode();
1057   switch (OpCode) {
1058   default:
1059     llvm_unreachable("Unexpected machine instruction on tile register!");
1060     break;
1061   case X86::COPY: {
1062     Register SrcReg = MI->getOperand(1).getReg();
1063     ShapeT Shape = getTileShape(SrcReg, VRM, MRI);
1064     VRM->assignVirt2Shape(VirtReg, Shape);
1065     return Shape;
1066   }
1067   // We only collect the tile shape that is defined.
1068   case X86::PTILELOADDV:
1069   case X86::PTILELOADDT1V:
1070   case X86::PTDPBSSDV:
1071   case X86::PTDPBSUDV:
1072   case X86::PTDPBUSDV:
1073   case X86::PTDPBUUDV:
1074   case X86::PTILEZEROV:
1075   case X86::PTDPBF16PSV:
1076   case X86::PTDPFP16PSV:
1077   case X86::PTCMMIMFP16PSV:
1078   case X86::PTCMMRLFP16PSV:
1079   case X86::PTTRANSPOSEDV:
1080   case X86::PTTDPBF16PSV:
1081   case X86::PTTDPFP16PSV:
1082   case X86::PTTCMMIMFP16PSV:
1083   case X86::PTTCMMRLFP16PSV:
1084   case X86::PTCONJTCMMIMFP16PSV:
1085   case X86::PTCONJTFP16V:
1086   case X86::PTILELOADDRSV:
1087   case X86::PTILELOADDRST1V:
1088   case X86::PTMMULTF32PSV:
1089   case X86::PTTMMULTF32PSV:
1090   case X86::PTDPBF8PSV:
1091   case X86::PTDPBHF8PSV:
1092   case X86::PTDPHBF8PSV:
1093   case X86::PTDPHF8PSV: {
1094     MachineOperand &MO1 = MI->getOperand(1);
1095     MachineOperand &MO2 = MI->getOperand(2);
1096     ShapeT Shape(&MO1, &MO2, MRI);
1097     VRM->assignVirt2Shape(VirtReg, Shape);
1098     return Shape;
1099   }
1100   case X86::PT2RPNTLVWZ0V:
1101   case X86::PT2RPNTLVWZ0T1V:
1102   case X86::PT2RPNTLVWZ1V:
1103   case X86::PT2RPNTLVWZ1T1V:
1104   case X86::PT2RPNTLVWZ0RSV:
1105   case X86::PT2RPNTLVWZ0RST1V:
1106   case X86::PT2RPNTLVWZ1RSV:
1107   case X86::PT2RPNTLVWZ1RST1V: {
1108     MachineOperand &MO1 = MI->getOperand(1);
1109     MachineOperand &MO2 = MI->getOperand(2);
1110     MachineOperand &MO3 = MI->getOperand(3);
1111     ShapeT Shape({&MO1, &MO2, &MO1, &MO3}, MRI);
1112     VRM->assignVirt2Shape(VirtReg, Shape);
1113     return Shape;
1114   }
1115   }
1116 }
1117 
canHintShape(ShapeT & PhysShape,ShapeT & VirtShape)1118 static bool canHintShape(ShapeT &PhysShape, ShapeT &VirtShape) {
1119   unsigned PhysShapeNum = PhysShape.getShapeNum();
1120   unsigned VirtShapeNum = VirtShape.getShapeNum();
1121 
1122   if (PhysShapeNum < VirtShapeNum)
1123     return false;
1124 
1125   if (PhysShapeNum == VirtShapeNum) {
1126     if (PhysShapeNum == 1)
1127       return PhysShape == VirtShape;
1128 
1129     for (unsigned I = 0; I < PhysShapeNum; I++) {
1130       ShapeT PShape(PhysShape.getRow(I), PhysShape.getCol(I));
1131       ShapeT VShape(VirtShape.getRow(I), VirtShape.getCol(I));
1132       if (VShape != PShape)
1133         return false;
1134     }
1135     return true;
1136   }
1137 
1138   // Hint subreg of mult-tile reg to single tile reg.
1139   if (VirtShapeNum == 1) {
1140     for (unsigned I = 0; I < PhysShapeNum; I++) {
1141       ShapeT PShape(PhysShape.getRow(I), PhysShape.getCol(I));
1142       if (VirtShape == PShape)
1143         return true;
1144     }
1145   }
1146 
1147   // Note: Currently we have no requirement for case of
1148   // (VirtShapeNum > 1 and PhysShapeNum > VirtShapeNum)
1149   return false;
1150 }
1151 
getRegAllocationHints(Register VirtReg,ArrayRef<MCPhysReg> Order,SmallVectorImpl<MCPhysReg> & Hints,const MachineFunction & MF,const VirtRegMap * VRM,const LiveRegMatrix * Matrix) const1152 bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
1153                                             ArrayRef<MCPhysReg> Order,
1154                                             SmallVectorImpl<MCPhysReg> &Hints,
1155                                             const MachineFunction &MF,
1156                                             const VirtRegMap *VRM,
1157                                             const LiveRegMatrix *Matrix) const {
1158   const MachineRegisterInfo *MRI = &MF.getRegInfo();
1159   const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
1160   bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
1161       VirtReg, Order, Hints, MF, VRM, Matrix);
1162   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
1163   const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
1164 
1165   unsigned ID = RC.getID();
1166 
1167   if (!VRM)
1168     return BaseImplRetVal;
1169 
1170   if (ID != X86::TILERegClassID && ID != X86::TILEPAIRRegClassID) {
1171     if (DisableRegAllocNDDHints || !ST.hasNDD() ||
1172         !TRI.isGeneralPurposeRegisterClass(&RC))
1173       return BaseImplRetVal;
1174 
1175     // Add any two address hints after any copy hints.
1176     SmallSet<unsigned, 4> TwoAddrHints;
1177 
1178     auto TryAddNDDHint = [&](const MachineOperand &MO) {
1179       Register Reg = MO.getReg();
1180       Register PhysReg = Reg.isPhysical() ? Reg : Register(VRM->getPhys(Reg));
1181       if (PhysReg && !MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
1182         TwoAddrHints.insert(PhysReg);
1183     };
1184 
1185     // NDD instructions is compressible when Op0 is allocated to the same
1186     // physic register as Op1 (or Op2 if it's commutable).
1187     for (auto &MO : MRI->reg_nodbg_operands(VirtReg)) {
1188       const MachineInstr &MI = *MO.getParent();
1189       if (!X86::getNonNDVariant(MI.getOpcode()))
1190         continue;
1191       unsigned OpIdx = MI.getOperandNo(&MO);
1192       if (OpIdx == 0) {
1193         assert(MI.getOperand(1).isReg());
1194         TryAddNDDHint(MI.getOperand(1));
1195         if (MI.isCommutable()) {
1196           assert(MI.getOperand(2).isReg());
1197           TryAddNDDHint(MI.getOperand(2));
1198         }
1199       } else if (OpIdx == 1) {
1200         TryAddNDDHint(MI.getOperand(0));
1201       } else if (MI.isCommutable() && OpIdx == 2) {
1202         TryAddNDDHint(MI.getOperand(0));
1203       }
1204     }
1205 
1206     for (MCPhysReg OrderReg : Order)
1207       if (TwoAddrHints.count(OrderReg))
1208         Hints.push_back(OrderReg);
1209 
1210     return BaseImplRetVal;
1211   }
1212 
1213   ShapeT VirtShape = getTileShape(VirtReg, const_cast<VirtRegMap *>(VRM), MRI);
1214   auto AddHint = [&](MCPhysReg PhysReg) {
1215     Register VReg = Matrix->getOneVReg(PhysReg);
1216     if (VReg == MCRegister::NoRegister) { // Not allocated yet
1217       Hints.push_back(PhysReg);
1218       return;
1219     }
1220     ShapeT PhysShape = getTileShape(VReg, const_cast<VirtRegMap *>(VRM), MRI);
1221     if (canHintShape(PhysShape, VirtShape))
1222       Hints.push_back(PhysReg);
1223   };
1224 
1225   SmallSet<MCPhysReg, 4> CopyHints(llvm::from_range, Hints);
1226   Hints.clear();
1227   for (auto Hint : CopyHints) {
1228     if (RC.contains(Hint) && !MRI->isReserved(Hint))
1229       AddHint(Hint);
1230   }
1231   for (MCPhysReg PhysReg : Order) {
1232     if (!CopyHints.count(PhysReg) && RC.contains(PhysReg) &&
1233         !MRI->isReserved(PhysReg))
1234       AddHint(PhysReg);
1235   }
1236 
1237 #define DEBUG_TYPE "tile-hint"
1238   LLVM_DEBUG({
1239     dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n";
1240     for (auto Hint : Hints) {
1241       dbgs() << "tmm" << Hint << ",";
1242     }
1243     dbgs() << "\n";
1244   });
1245 #undef DEBUG_TYPE
1246 
1247   return true;
1248 }
1249 
constrainRegClassToNonRex2(const TargetRegisterClass * RC) const1250 const TargetRegisterClass *X86RegisterInfo::constrainRegClassToNonRex2(
1251     const TargetRegisterClass *RC) const {
1252   switch (RC->getID()) {
1253   default:
1254     return RC;
1255   case X86::GR8RegClassID:
1256     return &X86::GR8_NOREX2RegClass;
1257   case X86::GR16RegClassID:
1258     return &X86::GR16_NOREX2RegClass;
1259   case X86::GR32RegClassID:
1260     return &X86::GR32_NOREX2RegClass;
1261   case X86::GR64RegClassID:
1262     return &X86::GR64_NOREX2RegClass;
1263   case X86::GR32_NOSPRegClassID:
1264     return &X86::GR32_NOREX2_NOSPRegClass;
1265   case X86::GR64_NOSPRegClassID:
1266     return &X86::GR64_NOREX2_NOSPRegClass;
1267   }
1268 }
1269 
isNonRex2RegClass(const TargetRegisterClass * RC) const1270 bool X86RegisterInfo::isNonRex2RegClass(const TargetRegisterClass *RC) const {
1271   switch (RC->getID()) {
1272   default:
1273     return false;
1274   case X86::GR8_NOREX2RegClassID:
1275   case X86::GR16_NOREX2RegClassID:
1276   case X86::GR32_NOREX2RegClassID:
1277   case X86::GR64_NOREX2RegClassID:
1278   case X86::GR32_NOREX2_NOSPRegClassID:
1279   case X86::GR64_NOREX2_NOSPRegClassID:
1280   case X86::GR64_with_sub_16bit_in_GR16_NOREX2RegClassID:
1281     return true;
1282   }
1283 }
1284