1 //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the X86 implementation of the TargetRegisterInfo class.
10 // This file is responsible for the frame pointer elimination optimization
11 // on X86.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "X86RegisterInfo.h"
16 #include "X86FrameLowering.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86Subtarget.h"
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/CodeGen/LiveRegMatrix.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/TargetFrameLowering.h"
28 #include "llvm/CodeGen/TargetInstrInfo.h"
29 #include "llvm/CodeGen/TileShapeInfo.h"
30 #include "llvm/CodeGen/VirtRegMap.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Target/TargetMachine.h"
37 #include "llvm/Target/TargetOptions.h"
38
39 using namespace llvm;
40
41 #define GET_REGINFO_TARGET_DESC
42 #include "X86GenRegisterInfo.inc"
43
44 static cl::opt<bool>
45 EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
46 cl::desc("Enable use of a base pointer for complex stack frames"));
47
X86RegisterInfo(const Triple & TT)48 X86RegisterInfo::X86RegisterInfo(const Triple &TT)
49 : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP),
50 X86_MC::getDwarfRegFlavour(TT, false),
51 X86_MC::getDwarfRegFlavour(TT, true),
52 (TT.isArch64Bit() ? X86::RIP : X86::EIP)) {
53 X86_MC::initLLVMToSEHAndCVRegMapping(this);
54
55 // Cache some information.
56 Is64Bit = TT.isArch64Bit();
57 IsWin64 = Is64Bit && TT.isOSWindows();
58
59 // Use a callee-saved register as the base pointer. These registers must
60 // not conflict with any ABI requirements. For example, in 32-bit mode PIC
61 // requires GOT in the EBX register before function calls via PLT GOT pointer.
62 if (Is64Bit) {
63 SlotSize = 8;
64 // This matches the simplified 32-bit pointer code in the data layout
65 // computation.
66 // FIXME: Should use the data layout?
67 bool Use64BitReg = !TT.isX32();
68 StackPtr = Use64BitReg ? X86::RSP : X86::ESP;
69 FramePtr = Use64BitReg ? X86::RBP : X86::EBP;
70 BasePtr = Use64BitReg ? X86::RBX : X86::EBX;
71 } else {
72 SlotSize = 4;
73 StackPtr = X86::ESP;
74 FramePtr = X86::EBP;
75 BasePtr = X86::ESI;
76 }
77 }
78
79 int
getSEHRegNum(unsigned i) const80 X86RegisterInfo::getSEHRegNum(unsigned i) const {
81 return getEncodingValue(i);
82 }
83
84 const TargetRegisterClass *
getSubClassWithSubReg(const TargetRegisterClass * RC,unsigned Idx) const85 X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
86 unsigned Idx) const {
87 // The sub_8bit sub-register index is more constrained in 32-bit mode.
88 // It behaves just like the sub_8bit_hi index.
89 if (!Is64Bit && Idx == X86::sub_8bit)
90 Idx = X86::sub_8bit_hi;
91
92 // Forward to TableGen's default version.
93 return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
94 }
95
96 const TargetRegisterClass *
getMatchingSuperRegClass(const TargetRegisterClass * A,const TargetRegisterClass * B,unsigned SubIdx) const97 X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
98 const TargetRegisterClass *B,
99 unsigned SubIdx) const {
100 // The sub_8bit sub-register index is more constrained in 32-bit mode.
101 if (!Is64Bit && SubIdx == X86::sub_8bit) {
102 A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi);
103 if (!A)
104 return nullptr;
105 }
106 return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx);
107 }
108
109 const TargetRegisterClass *
getLargestLegalSuperClass(const TargetRegisterClass * RC,const MachineFunction & MF) const110 X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
111 const MachineFunction &MF) const {
112 // Don't allow super-classes of GR8_NOREX. This class is only used after
113 // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied
114 // to the full GR8 register class in 64-bit mode, so we cannot allow the
115 // reigster class inflation.
116 //
117 // The GR8_NOREX class is always used in a way that won't be constrained to a
118 // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
119 // full GR8 class.
120 if (RC == &X86::GR8_NOREXRegClass)
121 return RC;
122
123 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
124
125 const TargetRegisterClass *Super = RC;
126 TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
127 do {
128 switch (Super->getID()) {
129 case X86::FR32RegClassID:
130 case X86::FR64RegClassID:
131 // If AVX-512 isn't supported we should only inflate to these classes.
132 if (!Subtarget.hasAVX512() &&
133 getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
134 return Super;
135 break;
136 case X86::VR128RegClassID:
137 case X86::VR256RegClassID:
138 // If VLX isn't supported we should only inflate to these classes.
139 if (!Subtarget.hasVLX() &&
140 getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
141 return Super;
142 break;
143 case X86::VR128XRegClassID:
144 case X86::VR256XRegClassID:
145 // If VLX isn't support we shouldn't inflate to these classes.
146 if (Subtarget.hasVLX() &&
147 getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
148 return Super;
149 break;
150 case X86::FR32XRegClassID:
151 case X86::FR64XRegClassID:
152 // If AVX-512 isn't support we shouldn't inflate to these classes.
153 if (Subtarget.hasAVX512() &&
154 getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
155 return Super;
156 break;
157 case X86::GR8RegClassID:
158 case X86::GR16RegClassID:
159 case X86::GR32RegClassID:
160 case X86::GR64RegClassID:
161 case X86::GR8_NOREX2RegClassID:
162 case X86::GR16_NOREX2RegClassID:
163 case X86::GR32_NOREX2RegClassID:
164 case X86::GR64_NOREX2RegClassID:
165 case X86::RFP32RegClassID:
166 case X86::RFP64RegClassID:
167 case X86::RFP80RegClassID:
168 case X86::VR512_0_15RegClassID:
169 case X86::VR512RegClassID:
170 // Don't return a super-class that would shrink the spill size.
171 // That can happen with the vector and float classes.
172 if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
173 return Super;
174 }
175 Super = *I++;
176 } while (Super);
177 return RC;
178 }
179
180 const TargetRegisterClass *
getPointerRegClass(const MachineFunction & MF,unsigned Kind) const181 X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
182 unsigned Kind) const {
183 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
184 switch (Kind) {
185 default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
186 case 0: // Normal GPRs.
187 if (Subtarget.isTarget64BitLP64())
188 return &X86::GR64RegClass;
189 // If the target is 64bit but we have been told to use 32bit addresses,
190 // we can still use 64-bit register as long as we know the high bits
191 // are zeros.
192 // Reflect that in the returned register class.
193 if (Is64Bit) {
194 // When the target also allows 64-bit frame pointer and we do have a
195 // frame, this is fine to use it for the address accesses as well.
196 const X86FrameLowering *TFI = getFrameLowering(MF);
197 return TFI->hasFP(MF) && TFI->Uses64BitFramePtr
198 ? &X86::LOW32_ADDR_ACCESS_RBPRegClass
199 : &X86::LOW32_ADDR_ACCESSRegClass;
200 }
201 return &X86::GR32RegClass;
202 case 1: // Normal GPRs except the stack pointer (for encoding reasons).
203 if (Subtarget.isTarget64BitLP64())
204 return &X86::GR64_NOSPRegClass;
205 // NOSP does not contain RIP, so no special case here.
206 return &X86::GR32_NOSPRegClass;
207 case 2: // NOREX GPRs.
208 if (Subtarget.isTarget64BitLP64())
209 return &X86::GR64_NOREXRegClass;
210 return &X86::GR32_NOREXRegClass;
211 case 3: // NOREX GPRs except the stack pointer (for encoding reasons).
212 if (Subtarget.isTarget64BitLP64())
213 return &X86::GR64_NOREX_NOSPRegClass;
214 // NOSP does not contain RIP, so no special case here.
215 return &X86::GR32_NOREX_NOSPRegClass;
216 case 4: // Available for tailcall (not callee-saved GPRs).
217 return getGPRsForTailCall(MF);
218 }
219 }
220
shouldRewriteCopySrc(const TargetRegisterClass * DefRC,unsigned DefSubReg,const TargetRegisterClass * SrcRC,unsigned SrcSubReg) const221 bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
222 unsigned DefSubReg,
223 const TargetRegisterClass *SrcRC,
224 unsigned SrcSubReg) const {
225 // Prevent rewriting a copy where the destination size is larger than the
226 // input size. See PR41619.
227 // FIXME: Should this be factored into the base implementation somehow.
228 if (DefRC->hasSuperClassEq(&X86::GR64RegClass) && DefSubReg == 0 &&
229 SrcRC->hasSuperClassEq(&X86::GR64RegClass) && SrcSubReg == X86::sub_32bit)
230 return false;
231
232 return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg,
233 SrcRC, SrcSubReg);
234 }
235
236 const TargetRegisterClass *
getGPRsForTailCall(const MachineFunction & MF) const237 X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
238 const Function &F = MF.getFunction();
239 if (IsWin64 || (F.getCallingConv() == CallingConv::Win64))
240 return &X86::GR64_TCW64RegClass;
241 else if (Is64Bit)
242 return &X86::GR64_TCRegClass;
243
244 bool hasHipeCC = (F.getCallingConv() == CallingConv::HiPE);
245 if (hasHipeCC)
246 return &X86::GR32RegClass;
247 return &X86::GR32_TCRegClass;
248 }
249
250 const TargetRegisterClass *
getCrossCopyRegClass(const TargetRegisterClass * RC) const251 X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
252 if (RC == &X86::CCRRegClass) {
253 if (Is64Bit)
254 return &X86::GR64RegClass;
255 else
256 return &X86::GR32RegClass;
257 }
258 return RC;
259 }
260
261 unsigned
getRegPressureLimit(const TargetRegisterClass * RC,MachineFunction & MF) const262 X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
263 MachineFunction &MF) const {
264 const X86FrameLowering *TFI = getFrameLowering(MF);
265
266 unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
267 switch (RC->getID()) {
268 default:
269 return 0;
270 case X86::GR32RegClassID:
271 return 4 - FPDiff;
272 case X86::GR64RegClassID:
273 return 12 - FPDiff;
274 case X86::VR128RegClassID:
275 return Is64Bit ? 10 : 4;
276 case X86::VR64RegClassID:
277 return 4;
278 }
279 }
280
281 const MCPhysReg *
getCalleeSavedRegs(const MachineFunction * MF) const282 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
283 assert(MF && "MachineFunction required");
284
285 const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
286 const Function &F = MF->getFunction();
287 bool HasSSE = Subtarget.hasSSE1();
288 bool HasAVX = Subtarget.hasAVX();
289 bool HasAVX512 = Subtarget.hasAVX512();
290 bool CallsEHReturn = MF->callsEHReturn();
291
292 CallingConv::ID CC = F.getCallingConv();
293
294 // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling
295 // convention because it has the CSR list.
296 if (MF->getFunction().hasFnAttribute("no_caller_saved_registers"))
297 CC = CallingConv::X86_INTR;
298
299 // If atribute specified, override the CSRs normally specified by the
300 // calling convention and use the empty set instead.
301 if (MF->getFunction().hasFnAttribute("no_callee_saved_registers"))
302 return CSR_NoRegs_SaveList;
303
304 switch (CC) {
305 case CallingConv::GHC:
306 case CallingConv::HiPE:
307 return CSR_NoRegs_SaveList;
308 case CallingConv::AnyReg:
309 if (HasAVX)
310 return CSR_64_AllRegs_AVX_SaveList;
311 return CSR_64_AllRegs_SaveList;
312 case CallingConv::PreserveMost:
313 return IsWin64 ? CSR_Win64_RT_MostRegs_SaveList
314 : CSR_64_RT_MostRegs_SaveList;
315 case CallingConv::PreserveAll:
316 if (HasAVX)
317 return CSR_64_RT_AllRegs_AVX_SaveList;
318 return CSR_64_RT_AllRegs_SaveList;
319 case CallingConv::PreserveNone:
320 return CSR_64_NoneRegs_SaveList;
321 case CallingConv::CXX_FAST_TLS:
322 if (Is64Bit)
323 return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
324 CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList;
325 break;
326 case CallingConv::Intel_OCL_BI: {
327 if (HasAVX512 && IsWin64)
328 return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
329 if (HasAVX512 && Is64Bit)
330 return CSR_64_Intel_OCL_BI_AVX512_SaveList;
331 if (HasAVX && IsWin64)
332 return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
333 if (HasAVX && Is64Bit)
334 return CSR_64_Intel_OCL_BI_AVX_SaveList;
335 if (!HasAVX && !IsWin64 && Is64Bit)
336 return CSR_64_Intel_OCL_BI_SaveList;
337 break;
338 }
339 case CallingConv::X86_RegCall:
340 if (Is64Bit) {
341 if (IsWin64) {
342 return (HasSSE ? CSR_Win64_RegCall_SaveList :
343 CSR_Win64_RegCall_NoSSE_SaveList);
344 } else {
345 return (HasSSE ? CSR_SysV64_RegCall_SaveList :
346 CSR_SysV64_RegCall_NoSSE_SaveList);
347 }
348 } else {
349 return (HasSSE ? CSR_32_RegCall_SaveList :
350 CSR_32_RegCall_NoSSE_SaveList);
351 }
352 case CallingConv::CFGuard_Check:
353 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
354 return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList
355 : CSR_Win32_CFGuard_Check_NoSSE_SaveList);
356 case CallingConv::Cold:
357 if (Is64Bit)
358 return CSR_64_MostRegs_SaveList;
359 break;
360 case CallingConv::Win64:
361 if (!HasSSE)
362 return CSR_Win64_NoSSE_SaveList;
363 return CSR_Win64_SaveList;
364 case CallingConv::SwiftTail:
365 if (!Is64Bit)
366 return CSR_32_SaveList;
367 return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList;
368 case CallingConv::X86_64_SysV:
369 if (CallsEHReturn)
370 return CSR_64EHRet_SaveList;
371 return CSR_64_SaveList;
372 case CallingConv::X86_INTR:
373 if (Is64Bit) {
374 if (HasAVX512)
375 return CSR_64_AllRegs_AVX512_SaveList;
376 if (HasAVX)
377 return CSR_64_AllRegs_AVX_SaveList;
378 if (HasSSE)
379 return CSR_64_AllRegs_SaveList;
380 return CSR_64_AllRegs_NoSSE_SaveList;
381 } else {
382 if (HasAVX512)
383 return CSR_32_AllRegs_AVX512_SaveList;
384 if (HasAVX)
385 return CSR_32_AllRegs_AVX_SaveList;
386 if (HasSSE)
387 return CSR_32_AllRegs_SSE_SaveList;
388 return CSR_32_AllRegs_SaveList;
389 }
390 default:
391 break;
392 }
393
394 if (Is64Bit) {
395 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
396 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
397 if (IsSwiftCC)
398 return IsWin64 ? CSR_Win64_SwiftError_SaveList
399 : CSR_64_SwiftError_SaveList;
400
401 if (IsWin64)
402 return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList;
403 if (CallsEHReturn)
404 return CSR_64EHRet_SaveList;
405 return CSR_64_SaveList;
406 }
407
408 return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList;
409 }
410
getCalleeSavedRegsViaCopy(const MachineFunction * MF) const411 const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
412 const MachineFunction *MF) const {
413 assert(MF && "Invalid MachineFunction pointer.");
414 if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
415 MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())
416 return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;
417 return nullptr;
418 }
419
420 const uint32_t *
getCallPreservedMask(const MachineFunction & MF,CallingConv::ID CC) const421 X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
422 CallingConv::ID CC) const {
423 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
424 bool HasSSE = Subtarget.hasSSE1();
425 bool HasAVX = Subtarget.hasAVX();
426 bool HasAVX512 = Subtarget.hasAVX512();
427
428 switch (CC) {
429 case CallingConv::GHC:
430 case CallingConv::HiPE:
431 return CSR_NoRegs_RegMask;
432 case CallingConv::AnyReg:
433 if (HasAVX)
434 return CSR_64_AllRegs_AVX_RegMask;
435 return CSR_64_AllRegs_RegMask;
436 case CallingConv::PreserveMost:
437 return IsWin64 ? CSR_Win64_RT_MostRegs_RegMask : CSR_64_RT_MostRegs_RegMask;
438 case CallingConv::PreserveAll:
439 if (HasAVX)
440 return CSR_64_RT_AllRegs_AVX_RegMask;
441 return CSR_64_RT_AllRegs_RegMask;
442 case CallingConv::PreserveNone:
443 return CSR_64_NoneRegs_RegMask;
444 case CallingConv::CXX_FAST_TLS:
445 if (Is64Bit)
446 return CSR_64_TLS_Darwin_RegMask;
447 break;
448 case CallingConv::Intel_OCL_BI: {
449 if (HasAVX512 && IsWin64)
450 return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
451 if (HasAVX512 && Is64Bit)
452 return CSR_64_Intel_OCL_BI_AVX512_RegMask;
453 if (HasAVX && IsWin64)
454 return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
455 if (HasAVX && Is64Bit)
456 return CSR_64_Intel_OCL_BI_AVX_RegMask;
457 if (!HasAVX && !IsWin64 && Is64Bit)
458 return CSR_64_Intel_OCL_BI_RegMask;
459 break;
460 }
461 case CallingConv::X86_RegCall:
462 if (Is64Bit) {
463 if (IsWin64) {
464 return (HasSSE ? CSR_Win64_RegCall_RegMask :
465 CSR_Win64_RegCall_NoSSE_RegMask);
466 } else {
467 return (HasSSE ? CSR_SysV64_RegCall_RegMask :
468 CSR_SysV64_RegCall_NoSSE_RegMask);
469 }
470 } else {
471 return (HasSSE ? CSR_32_RegCall_RegMask :
472 CSR_32_RegCall_NoSSE_RegMask);
473 }
474 case CallingConv::CFGuard_Check:
475 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
476 return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask
477 : CSR_Win32_CFGuard_Check_NoSSE_RegMask);
478 case CallingConv::Cold:
479 if (Is64Bit)
480 return CSR_64_MostRegs_RegMask;
481 break;
482 case CallingConv::Win64:
483 return CSR_Win64_RegMask;
484 case CallingConv::SwiftTail:
485 if (!Is64Bit)
486 return CSR_32_RegMask;
487 return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask;
488 case CallingConv::X86_64_SysV:
489 return CSR_64_RegMask;
490 case CallingConv::X86_INTR:
491 if (Is64Bit) {
492 if (HasAVX512)
493 return CSR_64_AllRegs_AVX512_RegMask;
494 if (HasAVX)
495 return CSR_64_AllRegs_AVX_RegMask;
496 if (HasSSE)
497 return CSR_64_AllRegs_RegMask;
498 return CSR_64_AllRegs_NoSSE_RegMask;
499 } else {
500 if (HasAVX512)
501 return CSR_32_AllRegs_AVX512_RegMask;
502 if (HasAVX)
503 return CSR_32_AllRegs_AVX_RegMask;
504 if (HasSSE)
505 return CSR_32_AllRegs_SSE_RegMask;
506 return CSR_32_AllRegs_RegMask;
507 }
508 default:
509 break;
510 }
511
512 // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
513 // callsEHReturn().
514 if (Is64Bit) {
515 const Function &F = MF.getFunction();
516 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
517 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
518 if (IsSwiftCC)
519 return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask;
520
521 return IsWin64 ? CSR_Win64_RegMask : CSR_64_RegMask;
522 }
523
524 return CSR_32_RegMask;
525 }
526
527 const uint32_t*
getNoPreservedMask() const528 X86RegisterInfo::getNoPreservedMask() const {
529 return CSR_NoRegs_RegMask;
530 }
531
getDarwinTLSCallPreservedMask() const532 const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const {
533 return CSR_64_TLS_Darwin_RegMask;
534 }
535
getReservedRegs(const MachineFunction & MF) const536 BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
537 BitVector Reserved(getNumRegs());
538 const X86FrameLowering *TFI = getFrameLowering(MF);
539
540 // Set the floating point control register as reserved.
541 Reserved.set(X86::FPCW);
542
543 // Set the floating point status register as reserved.
544 Reserved.set(X86::FPSW);
545
546 // Set the SIMD floating point control register as reserved.
547 Reserved.set(X86::MXCSR);
548
549 // Set the stack-pointer register and its aliases as reserved.
550 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RSP))
551 Reserved.set(SubReg);
552
553 // Set the Shadow Stack Pointer as reserved.
554 Reserved.set(X86::SSP);
555
556 // Set the instruction pointer register and its aliases as reserved.
557 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RIP))
558 Reserved.set(SubReg);
559
560 // Set the frame-pointer register and its aliases as reserved if needed.
561 if (TFI->hasFP(MF)) {
562 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP))
563 Reserved.set(SubReg);
564 }
565
566 // Set the base-pointer register and its aliases as reserved if needed.
567 if (hasBasePointer(MF)) {
568 CallingConv::ID CC = MF.getFunction().getCallingConv();
569 const uint32_t *RegMask = getCallPreservedMask(MF, CC);
570 if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
571 report_fatal_error(
572 "Stack realignment in presence of dynamic allocas is not supported with"
573 "this calling convention.");
574
575 Register BasePtr = getX86SubSuperRegister(getBaseRegister(), 64);
576 for (const MCPhysReg &SubReg : subregs_inclusive(BasePtr))
577 Reserved.set(SubReg);
578 }
579
580 // Mark the segment registers as reserved.
581 Reserved.set(X86::CS);
582 Reserved.set(X86::SS);
583 Reserved.set(X86::DS);
584 Reserved.set(X86::ES);
585 Reserved.set(X86::FS);
586 Reserved.set(X86::GS);
587
588 // Mark the floating point stack registers as reserved.
589 for (unsigned n = 0; n != 8; ++n)
590 Reserved.set(X86::ST0 + n);
591
592 // Reserve the registers that only exist in 64-bit mode.
593 if (!Is64Bit) {
594 // These 8-bit registers are part of the x86-64 extension even though their
595 // super-registers are old 32-bits.
596 Reserved.set(X86::SIL);
597 Reserved.set(X86::DIL);
598 Reserved.set(X86::BPL);
599 Reserved.set(X86::SPL);
600 Reserved.set(X86::SIH);
601 Reserved.set(X86::DIH);
602 Reserved.set(X86::BPH);
603 Reserved.set(X86::SPH);
604
605 for (unsigned n = 0; n != 8; ++n) {
606 // R8, R9, ...
607 for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI)
608 Reserved.set(*AI);
609
610 // XMM8, XMM9, ...
611 for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI)
612 Reserved.set(*AI);
613 }
614 }
615 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
616 for (unsigned n = 0; n != 16; ++n) {
617 for (MCRegAliasIterator AI(X86::XMM16 + n, this, true); AI.isValid();
618 ++AI)
619 Reserved.set(*AI);
620 }
621 }
622
623 // Reserve the extended general purpose registers.
624 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasEGPR())
625 Reserved.set(X86::R16, X86::R31WH + 1);
626
627 if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
628 for (MCRegAliasIterator AI(X86::R14, this, true); AI.isValid(); ++AI)
629 Reserved.set(*AI);
630 for (MCRegAliasIterator AI(X86::R15, this, true); AI.isValid(); ++AI)
631 Reserved.set(*AI);
632 }
633
634 assert(checkAllSuperRegsMarked(Reserved,
635 {X86::SIL, X86::DIL, X86::BPL, X86::SPL,
636 X86::SIH, X86::DIH, X86::BPH, X86::SPH}));
637 return Reserved;
638 }
639
getNumSupportedRegs(const MachineFunction & MF) const640 unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const {
641 // All existing Intel CPUs that support AMX support AVX512 and all existing
642 // Intel CPUs that support APX support AMX. AVX512 implies AVX.
643 //
644 // We enumerate the registers in X86GenRegisterInfo.inc in this order:
645 //
646 // Registers before AVX512,
647 // AVX512 registers (X/YMM16-31, ZMM0-31, K registers)
648 // AMX registers (TMM)
649 // APX registers (R16-R31)
650 //
651 // and try to return the minimum number of registers supported by the target.
652 static_assert((X86::R15WH + 1 == X86::YMM0) && (X86::YMM15 + 1 == X86::K0) &&
653 (X86::K6_K7 + 1 == X86::TMMCFG) &&
654 (X86::TMM7 + 1 == X86::R16) &&
655 (X86::R31WH + 1 == X86::NUM_TARGET_REGS),
656 "Register number may be incorrect");
657
658 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
659 if (ST.hasEGPR())
660 return X86::NUM_TARGET_REGS;
661 if (ST.hasAMXTILE())
662 return X86::TMM7 + 1;
663 if (ST.hasAVX512())
664 return X86::K6_K7 + 1;
665 if (ST.hasAVX())
666 return X86::YMM15 + 1;
667 return X86::R15WH + 1;
668 }
669
isArgumentRegister(const MachineFunction & MF,MCRegister Reg) const670 bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF,
671 MCRegister Reg) const {
672 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
673 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
674 auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) {
675 return TRI.isSuperOrSubRegisterEq(RegA, RegB);
676 };
677
678 if (!ST.is64Bit())
679 return llvm::any_of(
680 SmallVector<MCRegister>{X86::EAX, X86::ECX, X86::EDX},
681 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) ||
682 (ST.hasMMX() && X86::VR64RegClass.contains(Reg));
683
684 CallingConv::ID CC = MF.getFunction().getCallingConv();
685
686 if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg))
687 return true;
688
689 if (llvm::any_of(
690 SmallVector<MCRegister>{X86::RDX, X86::RCX, X86::R8, X86::R9},
691 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
692 return true;
693
694 if (CC != CallingConv::Win64 &&
695 llvm::any_of(SmallVector<MCRegister>{X86::RDI, X86::RSI},
696 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
697 return true;
698
699 if (ST.hasSSE1() &&
700 llvm::any_of(SmallVector<MCRegister>{X86::XMM0, X86::XMM1, X86::XMM2,
701 X86::XMM3, X86::XMM4, X86::XMM5,
702 X86::XMM6, X86::XMM7},
703 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
704 return true;
705
706 return X86GenRegisterInfo::isArgumentRegister(MF, Reg);
707 }
708
isFixedRegister(const MachineFunction & MF,MCRegister PhysReg) const709 bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF,
710 MCRegister PhysReg) const {
711 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
712 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
713
714 // Stack pointer.
715 if (TRI.isSuperOrSubRegisterEq(X86::RSP, PhysReg))
716 return true;
717
718 // Don't use the frame pointer if it's being used.
719 const X86FrameLowering &TFI = *getFrameLowering(MF);
720 if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(X86::RBP, PhysReg))
721 return true;
722
723 return X86GenRegisterInfo::isFixedRegister(MF, PhysReg);
724 }
725
isTileRegisterClass(const TargetRegisterClass * RC) const726 bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const {
727 return RC->getID() == X86::TILERegClassID;
728 }
729
adjustStackMapLiveOutMask(uint32_t * Mask) const730 void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
731 // Check if the EFLAGS register is marked as live-out. This shouldn't happen,
732 // because the calling convention defines the EFLAGS register as NOT
733 // preserved.
734 //
735 // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding
736 // an assert to track this and clear the register afterwards to avoid
737 // unnecessary crashes during release builds.
738 assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) &&
739 "EFLAGS are not live-out from a patchpoint.");
740
741 // Also clean other registers that don't need preserving (IP).
742 for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP})
743 Mask[Reg / 32] &= ~(1U << (Reg % 32));
744 }
745
746 //===----------------------------------------------------------------------===//
747 // Stack Frame Processing methods
748 //===----------------------------------------------------------------------===//
749
CantUseSP(const MachineFrameInfo & MFI)750 static bool CantUseSP(const MachineFrameInfo &MFI) {
751 return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment();
752 }
753
hasBasePointer(const MachineFunction & MF) const754 bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
755 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
756 // We have a virtual register to reference argument, and don't need base
757 // pointer.
758 if (X86FI->getStackPtrSaveMI() != nullptr)
759 return false;
760
761 if (X86FI->hasPreallocatedCall())
762 return true;
763
764 const MachineFrameInfo &MFI = MF.getFrameInfo();
765
766 if (!EnableBasePointer)
767 return false;
768
769 // When we need stack realignment, we can't address the stack from the frame
770 // pointer. When we have dynamic allocas or stack-adjusting inline asm, we
771 // can't address variables from the stack pointer. MS inline asm can
772 // reference locals while also adjusting the stack pointer. When we can't
773 // use both the SP and the FP, we need a separate base pointer register.
774 bool CantUseFP = hasStackRealignment(MF);
775 return CantUseFP && CantUseSP(MFI);
776 }
777
canRealignStack(const MachineFunction & MF) const778 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
779 if (!TargetRegisterInfo::canRealignStack(MF))
780 return false;
781
782 const MachineFrameInfo &MFI = MF.getFrameInfo();
783 const MachineRegisterInfo *MRI = &MF.getRegInfo();
784
785 // Stack realignment requires a frame pointer. If we already started
786 // register allocation with frame pointer elimination, it is too late now.
787 if (!MRI->canReserveReg(FramePtr))
788 return false;
789
790 // If a base pointer is necessary. Check that it isn't too late to reserve
791 // it.
792 if (CantUseSP(MFI))
793 return MRI->canReserveReg(BasePtr);
794 return true;
795 }
796
shouldRealignStack(const MachineFunction & MF) const797 bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
798 if (TargetRegisterInfo::shouldRealignStack(MF))
799 return true;
800
801 return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
802 }
803
804 // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
805 // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
806 // TODO: In this case we should be really trying first to entirely eliminate
807 // this instruction which is a plain copy.
tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II)808 static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
809 MachineInstr &MI = *II;
810 unsigned Opc = II->getOpcode();
811 // Check if this is a LEA of the form 'lea (%esp), %ebx'
812 if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) ||
813 MI.getOperand(2).getImm() != 1 ||
814 MI.getOperand(3).getReg() != X86::NoRegister ||
815 MI.getOperand(4).getImm() != 0 ||
816 MI.getOperand(5).getReg() != X86::NoRegister)
817 return false;
818 Register BasePtr = MI.getOperand(1).getReg();
819 // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will
820 // be replaced with a 32-bit operand MOV which will zero extend the upper
821 // 32-bits of the super register.
822 if (Opc == X86::LEA64_32r)
823 BasePtr = getX86SubSuperRegister(BasePtr, 32);
824 Register NewDestReg = MI.getOperand(0).getReg();
825 const X86InstrInfo *TII =
826 MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo();
827 TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr,
828 MI.getOperand(1).isKill());
829 MI.eraseFromParent();
830 return true;
831 }
832
isFuncletReturnInstr(MachineInstr & MI)833 static bool isFuncletReturnInstr(MachineInstr &MI) {
834 switch (MI.getOpcode()) {
835 case X86::CATCHRET:
836 case X86::CLEANUPRET:
837 return true;
838 default:
839 return false;
840 }
841 llvm_unreachable("impossible");
842 }
843
eliminateFrameIndex(MachineBasicBlock::iterator II,unsigned FIOperandNum,Register BaseReg,int FIOffset) const844 void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
845 unsigned FIOperandNum,
846 Register BaseReg,
847 int FIOffset) const {
848 MachineInstr &MI = *II;
849 unsigned Opc = MI.getOpcode();
850 if (Opc == TargetOpcode::LOCAL_ESCAPE) {
851 MachineOperand &FI = MI.getOperand(FIOperandNum);
852 FI.ChangeToImmediate(FIOffset);
853 return;
854 }
855
856 MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
857
858 // The frame index format for stackmaps and patchpoints is different from the
859 // X86 format. It only has a FI and an offset.
860 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
861 assert(BasePtr == FramePtr && "Expected the FP as base register");
862 int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
863 MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
864 return;
865 }
866
867 if (MI.getOperand(FIOperandNum + 3).isImm()) {
868 // Offset is a 32-bit integer.
869 int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
870 int Offset = FIOffset + Imm;
871 assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
872 "Requesting 64-bit offset in 32-bit immediate!");
873 if (Offset != 0)
874 MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
875 } else {
876 // Offset is symbolic. This is extremely rare.
877 uint64_t Offset =
878 FIOffset + (uint64_t)MI.getOperand(FIOperandNum + 3).getOffset();
879 MI.getOperand(FIOperandNum + 3).setOffset(Offset);
880 }
881 }
882
883 bool
eliminateFrameIndex(MachineBasicBlock::iterator II,int SPAdj,unsigned FIOperandNum,RegScavenger * RS) const884 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
885 int SPAdj, unsigned FIOperandNum,
886 RegScavenger *RS) const {
887 MachineInstr &MI = *II;
888 MachineBasicBlock &MBB = *MI.getParent();
889 MachineFunction &MF = *MBB.getParent();
890 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
891 bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false
892 : isFuncletReturnInstr(*MBBI);
893 const X86FrameLowering *TFI = getFrameLowering(MF);
894 int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
895
896 // Determine base register and offset.
897 int FIOffset;
898 Register BasePtr;
899 if (MI.isReturn()) {
900 assert((!hasStackRealignment(MF) ||
901 MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) &&
902 "Return instruction can only reference SP relative frame objects");
903 FIOffset =
904 TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0).getFixed();
905 } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) {
906 FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr);
907 } else {
908 FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed();
909 }
910
911 // LOCAL_ESCAPE uses a single offset, with no register. It only works in the
912 // simple FP case, and doesn't work with stack realignment. On 32-bit, the
913 // offset is from the traditional base pointer location. On 64-bit, the
914 // offset is from the SP at the end of the prologue, not the FP location. This
915 // matches the behavior of llvm.frameaddress.
916 unsigned Opc = MI.getOpcode();
917 if (Opc == TargetOpcode::LOCAL_ESCAPE) {
918 MachineOperand &FI = MI.getOperand(FIOperandNum);
919 FI.ChangeToImmediate(FIOffset);
920 return false;
921 }
922
923 // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
924 // register as source operand, semantic is the same and destination is
925 // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
926 // Don't change BasePtr since it is used later for stack adjustment.
927 Register MachineBasePtr = BasePtr;
928 if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr))
929 MachineBasePtr = getX86SubSuperRegister(BasePtr, 64);
930
931 // This must be part of a four operand memory reference. Replace the
932 // FrameIndex with base register. Add an offset to the offset.
933 MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false);
934
935 if (BasePtr == StackPtr)
936 FIOffset += SPAdj;
937
938 // The frame index format for stackmaps and patchpoints is different from the
939 // X86 format. It only has a FI and an offset.
940 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
941 assert(BasePtr == FramePtr && "Expected the FP as base register");
942 int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
943 MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
944 return false;
945 }
946
947 if (MI.getOperand(FIOperandNum+3).isImm()) {
948 // Offset is a 32-bit integer.
949 int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
950 int Offset = FIOffset + Imm;
951 assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
952 "Requesting 64-bit offset in 32-bit immediate!");
953 if (Offset != 0 || !tryOptimizeLEAtoMOV(II))
954 MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
955 } else {
956 // Offset is symbolic. This is extremely rare.
957 uint64_t Offset = FIOffset +
958 (uint64_t)MI.getOperand(FIOperandNum+3).getOffset();
959 MI.getOperand(FIOperandNum + 3).setOffset(Offset);
960 }
961 return false;
962 }
963
findDeadCallerSavedReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI) const964 unsigned X86RegisterInfo::findDeadCallerSavedReg(
965 MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const {
966 const MachineFunction *MF = MBB.getParent();
967 if (MF->callsEHReturn())
968 return 0;
969
970 const TargetRegisterClass &AvailableRegs = *getGPRsForTailCall(*MF);
971
972 if (MBBI == MBB.end())
973 return 0;
974
975 switch (MBBI->getOpcode()) {
976 default:
977 return 0;
978 case TargetOpcode::PATCHABLE_RET:
979 case X86::RET:
980 case X86::RET32:
981 case X86::RET64:
982 case X86::RETI32:
983 case X86::RETI64:
984 case X86::TCRETURNdi:
985 case X86::TCRETURNri:
986 case X86::TCRETURNmi:
987 case X86::TCRETURNdi64:
988 case X86::TCRETURNri64:
989 case X86::TCRETURNmi64:
990 case X86::EH_RETURN:
991 case X86::EH_RETURN64: {
992 SmallSet<uint16_t, 8> Uses;
993 for (MachineOperand &MO : MBBI->operands()) {
994 if (!MO.isReg() || MO.isDef())
995 continue;
996 Register Reg = MO.getReg();
997 if (!Reg)
998 continue;
999 for (MCRegAliasIterator AI(Reg, this, true); AI.isValid(); ++AI)
1000 Uses.insert(*AI);
1001 }
1002
1003 for (auto CS : AvailableRegs)
1004 if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP && CS != X86::ESP)
1005 return CS;
1006 }
1007 }
1008
1009 return 0;
1010 }
1011
getFrameRegister(const MachineFunction & MF) const1012 Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
1013 const X86FrameLowering *TFI = getFrameLowering(MF);
1014 return TFI->hasFP(MF) ? FramePtr : StackPtr;
1015 }
1016
1017 unsigned
getPtrSizedFrameRegister(const MachineFunction & MF) const1018 X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
1019 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
1020 Register FrameReg = getFrameRegister(MF);
1021 if (Subtarget.isTarget64BitILP32())
1022 FrameReg = getX86SubSuperRegister(FrameReg, 32);
1023 return FrameReg;
1024 }
1025
1026 unsigned
getPtrSizedStackRegister(const MachineFunction & MF) const1027 X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const {
1028 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
1029 Register StackReg = getStackRegister();
1030 if (Subtarget.isTarget64BitILP32())
1031 StackReg = getX86SubSuperRegister(StackReg, 32);
1032 return StackReg;
1033 }
1034
getTileShape(Register VirtReg,VirtRegMap * VRM,const MachineRegisterInfo * MRI)1035 static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM,
1036 const MachineRegisterInfo *MRI) {
1037 if (VRM->hasShape(VirtReg))
1038 return VRM->getShape(VirtReg);
1039
1040 const MachineOperand &Def = *MRI->def_begin(VirtReg);
1041 MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent());
1042 unsigned OpCode = MI->getOpcode();
1043 switch (OpCode) {
1044 default:
1045 llvm_unreachable("Unexpected machine instruction on tile register!");
1046 break;
1047 case X86::COPY: {
1048 Register SrcReg = MI->getOperand(1).getReg();
1049 ShapeT Shape = getTileShape(SrcReg, VRM, MRI);
1050 VRM->assignVirt2Shape(VirtReg, Shape);
1051 return Shape;
1052 }
1053 // We only collect the tile shape that is defined.
1054 case X86::PTILELOADDV:
1055 case X86::PTILELOADDT1V:
1056 case X86::PTDPBSSDV:
1057 case X86::PTDPBSUDV:
1058 case X86::PTDPBUSDV:
1059 case X86::PTDPBUUDV:
1060 case X86::PTILEZEROV:
1061 case X86::PTDPBF16PSV:
1062 case X86::PTDPFP16PSV:
1063 case X86::PTCMMIMFP16PSV:
1064 case X86::PTCMMRLFP16PSV:
1065 MachineOperand &MO1 = MI->getOperand(1);
1066 MachineOperand &MO2 = MI->getOperand(2);
1067 ShapeT Shape(&MO1, &MO2, MRI);
1068 VRM->assignVirt2Shape(VirtReg, Shape);
1069 return Shape;
1070 }
1071 }
1072
getRegAllocationHints(Register VirtReg,ArrayRef<MCPhysReg> Order,SmallVectorImpl<MCPhysReg> & Hints,const MachineFunction & MF,const VirtRegMap * VRM,const LiveRegMatrix * Matrix) const1073 bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
1074 ArrayRef<MCPhysReg> Order,
1075 SmallVectorImpl<MCPhysReg> &Hints,
1076 const MachineFunction &MF,
1077 const VirtRegMap *VRM,
1078 const LiveRegMatrix *Matrix) const {
1079 const MachineRegisterInfo *MRI = &MF.getRegInfo();
1080 const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
1081 bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
1082 VirtReg, Order, Hints, MF, VRM, Matrix);
1083
1084 unsigned ID = RC.getID();
1085 if (ID != X86::TILERegClassID)
1086 return BaseImplRetVal;
1087
1088 ShapeT VirtShape = getTileShape(VirtReg, const_cast<VirtRegMap *>(VRM), MRI);
1089 auto AddHint = [&](MCPhysReg PhysReg) {
1090 Register VReg = Matrix->getOneVReg(PhysReg);
1091 if (VReg == MCRegister::NoRegister) { // Not allocated yet
1092 Hints.push_back(PhysReg);
1093 return;
1094 }
1095 ShapeT PhysShape = getTileShape(VReg, const_cast<VirtRegMap *>(VRM), MRI);
1096 if (PhysShape == VirtShape)
1097 Hints.push_back(PhysReg);
1098 };
1099
1100 SmallSet<MCPhysReg, 4> CopyHints;
1101 CopyHints.insert(Hints.begin(), Hints.end());
1102 Hints.clear();
1103 for (auto Hint : CopyHints) {
1104 if (RC.contains(Hint) && !MRI->isReserved(Hint))
1105 AddHint(Hint);
1106 }
1107 for (MCPhysReg PhysReg : Order) {
1108 if (!CopyHints.count(PhysReg) && RC.contains(PhysReg) &&
1109 !MRI->isReserved(PhysReg))
1110 AddHint(PhysReg);
1111 }
1112
1113 #define DEBUG_TYPE "tile-hint"
1114 LLVM_DEBUG({
1115 dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n";
1116 for (auto Hint : Hints) {
1117 dbgs() << "tmm" << Hint << ",";
1118 }
1119 dbgs() << "\n";
1120 });
1121 #undef DEBUG_TYPE
1122
1123 return true;
1124 }
1125