xref: /freebsd/contrib/llvm-project/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 //===- PPCInstructionSelector.cpp --------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// PowerPC.
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPC.h"
14 #include "PPCInstrInfo.h"
15 #include "PPCMachineFunctionInfo.h"
16 #include "PPCRegisterBankInfo.h"
17 #include "PPCSubtarget.h"
18 #include "PPCTargetMachine.h"
19 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
20 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
21 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
22 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
23 #include "llvm/CodeGen/MachineConstantPool.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/IR/IntrinsicsPowerPC.h"
26 #include "llvm/Support/Debug.h"
27 
28 #define DEBUG_TYPE "ppc-gisel"
29 
30 using namespace llvm;
31 
32 namespace {
33 
34 #define GET_GLOBALISEL_PREDICATE_BITSET
35 #include "PPCGenGlobalISel.inc"
36 #undef GET_GLOBALISEL_PREDICATE_BITSET
37 
38 class PPCInstructionSelector : public InstructionSelector {
39 public:
40   PPCInstructionSelector(const PPCTargetMachine &TM, const PPCSubtarget &STI,
41                          const PPCRegisterBankInfo &RBI);
42 
43   bool select(MachineInstr &I) override;
44   static const char *getName() { return DEBUG_TYPE; }
45 
46 private:
47   /// tblgen generated 'select' implementation that is used as the initial
48   /// selector for the patterns that do not require complex C++.
49   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
50 
51   bool selectFPToInt(MachineInstr &I, MachineBasicBlock &MBB,
52                   MachineRegisterInfo &MRI) const;
53   bool selectIntToFP(MachineInstr &I, MachineBasicBlock &MBB,
54                   MachineRegisterInfo &MRI) const;
55 
56   bool selectZExt(MachineInstr &I, MachineBasicBlock &MBB,
57                   MachineRegisterInfo &MRI) const;
58   bool selectConstantPool(MachineInstr &I, MachineBasicBlock &MBB,
59                           MachineRegisterInfo &MRI) const;
60 
61   std::optional<bool> selectI64ImmDirect(MachineInstr &I,
62                                          MachineBasicBlock &MBB,
63                                          MachineRegisterInfo &MRI, Register Reg,
64                                          uint64_t Imm) const;
65   bool selectI64Imm(MachineInstr &I, MachineBasicBlock &MBB,
66                     MachineRegisterInfo &MRI) const;
67 
68   const PPCTargetMachine &TM;
69   const PPCSubtarget &STI;
70   const PPCInstrInfo &TII;
71   const PPCRegisterInfo &TRI;
72   const PPCRegisterBankInfo &RBI;
73 
74 #define GET_GLOBALISEL_PREDICATES_DECL
75 #include "PPCGenGlobalISel.inc"
76 #undef GET_GLOBALISEL_PREDICATES_DECL
77 
78 #define GET_GLOBALISEL_TEMPORARIES_DECL
79 #include "PPCGenGlobalISel.inc"
80 #undef GET_GLOBALISEL_TEMPORARIES_DECL
81 };
82 
83 } // end anonymous namespace
84 
85 #define GET_GLOBALISEL_IMPL
86 #include "PPCGenGlobalISel.inc"
87 #undef GET_GLOBALISEL_IMPL
88 
89 PPCInstructionSelector::PPCInstructionSelector(const PPCTargetMachine &TM,
90                                                const PPCSubtarget &STI,
91                                                const PPCRegisterBankInfo &RBI)
92     : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
93       RBI(RBI),
94 #define GET_GLOBALISEL_PREDICATES_INIT
95 #include "PPCGenGlobalISel.inc"
96 #undef GET_GLOBALISEL_PREDICATES_INIT
97 #define GET_GLOBALISEL_TEMPORARIES_INIT
98 #include "PPCGenGlobalISel.inc"
99 #undef GET_GLOBALISEL_TEMPORARIES_INIT
100 {
101 }
102 
103 static const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank *RB) {
104   if (RB->getID() == PPC::GPRRegBankID) {
105     if (Ty.getSizeInBits() == 64)
106       return &PPC::G8RCRegClass;
107     if (Ty.getSizeInBits() <= 32)
108       return &PPC::GPRCRegClass;
109   }
110   if (RB->getID() == PPC::FPRRegBankID) {
111     if (Ty.getSizeInBits() == 32)
112       return &PPC::F4RCRegClass;
113     if (Ty.getSizeInBits() == 64)
114       return &PPC::F8RCRegClass;
115   }
116   if (RB->getID() == PPC::VECRegBankID) {
117     if (Ty.getSizeInBits() == 128)
118       return &PPC::VSRCRegClass;
119   }
120   if (RB->getID() == PPC::CRRegBankID) {
121     if (Ty.getSizeInBits() == 1)
122       return &PPC::CRBITRCRegClass;
123     if (Ty.getSizeInBits() == 4)
124       return &PPC::CRRCRegClass;
125   }
126 
127   llvm_unreachable("Unknown RegBank!");
128 }
129 
130 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
131                        MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
132                        const RegisterBankInfo &RBI) {
133   Register DstReg = I.getOperand(0).getReg();
134 
135   if (DstReg.isPhysical())
136     return true;
137 
138   const RegisterBank *DstRegBank = RBI.getRegBank(DstReg, MRI, TRI);
139   const TargetRegisterClass *DstRC =
140       getRegClass(MRI.getType(DstReg), DstRegBank);
141 
142   // No need to constrain SrcReg. It will get constrained when we hit another of
143   // its use or its defs.
144   // Copies do not have constraints.
145   if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
146     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
147                       << " operand\n");
148     return false;
149   }
150 
151   return true;
152 }
153 
154 static unsigned selectLoadStoreOp(unsigned GenericOpc, unsigned RegBankID,
155                                   unsigned OpSize) {
156   const bool IsStore = GenericOpc == TargetOpcode::G_STORE;
157   switch (RegBankID) {
158   case PPC::GPRRegBankID:
159     switch (OpSize) {
160     case 32:
161       return IsStore ? PPC::STW : PPC::LWZ;
162     case 64:
163       return IsStore ? PPC::STD : PPC::LD;
164     default:
165       llvm_unreachable("Unexpected size!");
166     }
167     break;
168   case PPC::FPRRegBankID:
169     switch (OpSize) {
170     case 32:
171       return IsStore ? PPC::STFS : PPC::LFS;
172     case 64:
173       return IsStore ? PPC::STFD : PPC::LFD;
174     default:
175       llvm_unreachable("Unexpected size!");
176     }
177     break;
178   default:
179     llvm_unreachable("Unexpected register bank!");
180   }
181   return GenericOpc;
182 }
183 
184 bool PPCInstructionSelector::selectIntToFP(MachineInstr &I,
185                                            MachineBasicBlock &MBB,
186                                            MachineRegisterInfo &MRI) const {
187   if (!STI.hasDirectMove() || !STI.isPPC64() || !STI.hasFPCVT())
188     return false;
189 
190   const DebugLoc &DbgLoc = I.getDebugLoc();
191   const Register DstReg = I.getOperand(0).getReg();
192   const Register SrcReg = I.getOperand(1).getReg();
193 
194   Register MoveReg = MRI.createVirtualRegister(&PPC::VSFRCRegClass);
195 
196   // For now, only handle the case for 64 bit integer.
197   BuildMI(MBB, I, DbgLoc, TII.get(PPC::MTVSRD), MoveReg).addReg(SrcReg);
198 
199   bool IsSingle = MRI.getType(DstReg).getSizeInBits() == 32;
200   bool IsSigned = I.getOpcode() == TargetOpcode::G_SITOFP;
201   unsigned ConvOp = IsSingle ? (IsSigned ? PPC::XSCVSXDSP : PPC::XSCVUXDSP)
202                              : (IsSigned ? PPC::XSCVSXDDP : PPC::XSCVUXDDP);
203 
204   MachineInstr *MI =
205       BuildMI(MBB, I, DbgLoc, TII.get(ConvOp), DstReg).addReg(MoveReg);
206 
207   I.eraseFromParent();
208   return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
209 }
210 
211 bool PPCInstructionSelector::selectFPToInt(MachineInstr &I,
212                                            MachineBasicBlock &MBB,
213                                            MachineRegisterInfo &MRI) const {
214   if (!STI.hasDirectMove() || !STI.isPPC64() || !STI.hasFPCVT())
215     return false;
216 
217   const DebugLoc &DbgLoc = I.getDebugLoc();
218   const Register DstReg = I.getOperand(0).getReg();
219   const Register SrcReg = I.getOperand(1).getReg();
220 
221   Register CopyReg = MRI.createVirtualRegister(&PPC::VSFRCRegClass);
222   BuildMI(MBB, I, DbgLoc, TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
223 
224   Register ConvReg = MRI.createVirtualRegister(&PPC::VSFRCRegClass);
225 
226   bool IsSigned = I.getOpcode() == TargetOpcode::G_FPTOSI;
227 
228   // single-precision is stored as double-precision on PPC in registers, so
229   // always use double-precision convertions.
230   unsigned ConvOp = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
231 
232   BuildMI(MBB, I, DbgLoc, TII.get(ConvOp), ConvReg).addReg(CopyReg);
233 
234   MachineInstr *MI =
235       BuildMI(MBB, I, DbgLoc, TII.get(PPC::MFVSRD), DstReg).addReg(ConvReg);
236 
237   I.eraseFromParent();
238   return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
239 }
240 
241 bool PPCInstructionSelector::selectZExt(MachineInstr &I, MachineBasicBlock &MBB,
242                                         MachineRegisterInfo &MRI) const {
243   const Register DstReg = I.getOperand(0).getReg();
244   const LLT DstTy = MRI.getType(DstReg);
245   const RegisterBank *DstRegBank = RBI.getRegBank(DstReg, MRI, TRI);
246 
247   const Register SrcReg = I.getOperand(1).getReg();
248 
249   assert(DstTy.getSizeInBits() == 64 && "Unexpected dest size!");
250   assert(MRI.getType(SrcReg).getSizeInBits() == 32 && "Unexpected src size!");
251 
252   Register ImpDefReg =
253       MRI.createVirtualRegister(getRegClass(DstTy, DstRegBank));
254   BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
255           ImpDefReg);
256 
257   Register NewDefReg =
258       MRI.createVirtualRegister(getRegClass(DstTy, DstRegBank));
259   BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::INSERT_SUBREG),
260           NewDefReg)
261       .addReg(ImpDefReg)
262       .addReg(SrcReg)
263       .addImm(PPC::sub_32);
264 
265   MachineInstr *MI =
266       BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDICL), DstReg)
267           .addReg(NewDefReg)
268           .addImm(0)
269           .addImm(32);
270 
271   I.eraseFromParent();
272   return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
273 }
274 
275 // For any 32 < Num < 64, check if the Imm contains at least Num consecutive
276 // zeros and return the number of bits by the left of these consecutive zeros.
277 static uint32_t findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
278   uint32_t HiTZ = llvm::countr_zero<uint32_t>(Hi_32(Imm));
279   uint32_t LoLZ = llvm::countl_zero<uint32_t>(Lo_32(Imm));
280   if ((HiTZ + LoLZ) >= Num)
281     return (32 + HiTZ);
282   return 0;
283 }
284 
285 // Direct materialization of 64-bit constants by enumerated patterns.
286 // Similar to PPCISelDAGToDAG::selectI64ImmDirect().
287 std::optional<bool> PPCInstructionSelector::selectI64ImmDirect(MachineInstr &I,
288                                                 MachineBasicBlock &MBB,
289                                                 MachineRegisterInfo &MRI,
290                                                 Register Reg,
291                                                 uint64_t Imm) const {
292   unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
293   unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
294   unsigned TO = llvm::countr_one<uint64_t>(Imm);
295   unsigned LO = llvm::countl_one<uint64_t>(Imm);
296   uint32_t Hi32 = Hi_32(Imm);
297   uint32_t Lo32 = Lo_32(Imm);
298   uint32_t Shift = 0;
299 
300   // Following patterns use 1 instructions to materialize the Imm.
301 
302   // 1-1) Patterns : {zeros}{15-bit valve}
303   //                 {ones}{15-bit valve}
304   if (isInt<16>(Imm))
305     return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::LI8), Reg)
306         .addImm(Imm)
307         .constrainAllUses(TII, TRI, RBI);
308   // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
309   //                 {ones}{15-bit valve}{16 zeros}
310   if (TZ > 15 && (LZ > 32 || LO > 32))
311     return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::LIS8), Reg)
312         .addImm((Imm >> 16) & 0xffff)
313         .constrainAllUses(TII, TRI, RBI);
314 
315   // Following patterns use 2 instructions to materialize the Imm.
316 
317   assert(LZ < 64 && "Unexpected leading zeros here.");
318   // Count of ones follwing the leading zeros.
319   unsigned FO = llvm::countl_one<uint64_t>(Imm << LZ);
320   // 2-1) Patterns : {zeros}{31-bit value}
321   //                 {ones}{31-bit value}
322   if (isInt<32>(Imm)) {
323     uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
324     unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
325     Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
326     if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(Opcode), TmpReg)
327              .addImm((Imm >> 16) & 0xffff)
328              .constrainAllUses(TII, TRI, RBI))
329       return false;
330     return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::ORI8), Reg)
331         .addReg(TmpReg, RegState::Kill)
332         .addImm(Imm & 0xffff)
333         .constrainAllUses(TII, TRI, RBI);
334   }
335   // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
336   //                 {zeros}{15-bit value}{zeros}
337   //                 {zeros}{ones}{15-bit value}
338   //                 {ones}{15-bit value}{zeros}
339   // We can take advantage of LI's sign-extension semantics to generate leading
340   // ones, and then use RLDIC to mask off the ones in both sides after rotation.
341   if ((LZ + FO + TZ) > 48) {
342     Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
343     if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::LI8), TmpReg)
344              .addImm((Imm >> TZ) & 0xffff)
345              .constrainAllUses(TII, TRI, RBI))
346       return false;
347     return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDIC), Reg)
348         .addReg(TmpReg, RegState::Kill)
349         .addImm(TZ)
350         .addImm(LZ)
351         .constrainAllUses(TII, TRI, RBI);
352   }
353   // 2-3) Pattern : {zeros}{15-bit value}{ones}
354   // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
355   // therefore we can take advantage of LI's sign-extension semantics, and then
356   // mask them off after rotation.
357   //
358   // +--LZ--||-15-bit-||--TO--+     +-------------|--16-bit--+
359   // |00000001bbbbbbbbb1111111| ->  |00000000000001bbbbbbbbb1|
360   // +------------------------+     +------------------------+
361   // 63                      0      63                      0
362   //          Imm                   (Imm >> (48 - LZ) & 0xffff)
363   // +----sext-----|--16-bit--+     +clear-|-----------------+
364   // |11111111111111bbbbbbbbb1| ->  |00000001bbbbbbbbb1111111|
365   // +------------------------+     +------------------------+
366   // 63                      0      63                      0
367   // LI8: sext many leading zeros   RLDICL: rotate left (48 - LZ), clear left LZ
368   if ((LZ + TO) > 48) {
369     // Since the immediates with (LZ > 32) have been handled by previous
370     // patterns, here we have (LZ <= 32) to make sure we will not shift right
371     // the Imm by a negative value.
372     assert(LZ <= 32 && "Unexpected shift value.");
373     Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
374     if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::LI8), TmpReg)
375              .addImm(Imm >> (48 - LZ) & 0xffff)
376              .constrainAllUses(TII, TRI, RBI))
377       return false;
378     return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDICL), Reg)
379         .addReg(TmpReg, RegState::Kill)
380         .addImm(48 - LZ)
381         .addImm(LZ)
382         .constrainAllUses(TII, TRI, RBI);
383   }
384   // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
385   //                 {ones}{15-bit value}{ones}
386   // We can take advantage of LI's sign-extension semantics to generate leading
387   // ones, and then use RLDICL to mask off the ones in left sides (if required)
388   // after rotation.
389   //
390   // +-LZ-FO||-15-bit-||--TO--+     +-------------|--16-bit--+
391   // |00011110bbbbbbbbb1111111| ->  |000000000011110bbbbbbbbb|
392   // +------------------------+     +------------------------+
393   // 63                      0      63                      0
394   //            Imm                    (Imm >> TO) & 0xffff
395   // +----sext-----|--16-bit--+     +LZ|---------------------+
396   // |111111111111110bbbbbbbbb| ->  |00011110bbbbbbbbb1111111|
397   // +------------------------+     +------------------------+
398   // 63                      0      63                      0
399   // LI8: sext many leading zeros   RLDICL: rotate left TO, clear left LZ
400   if ((LZ + FO + TO) > 48) {
401     Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
402     if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::LI8), TmpReg)
403              .addImm((Imm >> TO) & 0xffff)
404              .constrainAllUses(TII, TRI, RBI))
405       return false;
406     return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDICL), Reg)
407         .addReg(TmpReg, RegState::Kill)
408         .addImm(TO)
409         .addImm(LZ)
410         .constrainAllUses(TII, TRI, RBI);
411   }
412   // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
413   // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
414   // value, we can use LI for Lo16 without generating leading ones then add the
415   // Hi16(in Lo32).
416   if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
417     Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
418     if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::LI8), TmpReg)
419              .addImm(Lo32 & 0xffff)
420              .constrainAllUses(TII, TRI, RBI))
421       return false;
422     return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::ORIS8), Reg)
423         .addReg(TmpReg, RegState::Kill)
424         .addImm(Lo32 >> 16)
425         .constrainAllUses(TII, TRI, RBI);
426   }
427   // 2-6) Patterns : {******}{49 zeros}{******}
428   //                 {******}{49 ones}{******}
429   // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
430   // bits remain on both sides. Rotate right the Imm to construct an int<16>
431   // value, use LI for int<16> value and then use RLDICL without mask to rotate
432   // it back.
433   //
434   // 1) findContiguousZerosAtLeast(Imm, 49)
435   // +------|--zeros-|------+     +---ones--||---15 bit--+
436   // |bbbbbb0000000000aaaaaa| ->  |0000000000aaaaaabbbbbb|
437   // +----------------------+     +----------------------+
438   // 63                    0      63                    0
439   //
440   // 2) findContiguousZerosAtLeast(~Imm, 49)
441   // +------|--ones--|------+     +---ones--||---15 bit--+
442   // |bbbbbb1111111111aaaaaa| ->  |1111111111aaaaaabbbbbb|
443   // +----------------------+     +----------------------+
444   // 63                    0      63                    0
445   if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
446       (Shift = findContiguousZerosAtLeast(~Imm, 49))) {
447     uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
448     Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
449     if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::LI8), TmpReg)
450              .addImm(RotImm & 0xffff)
451              .constrainAllUses(TII, TRI, RBI))
452       return false;
453     return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDICL), Reg)
454         .addReg(TmpReg, RegState::Kill)
455         .addImm(Shift)
456         .addImm(0)
457         .constrainAllUses(TII, TRI, RBI);
458   }
459 
460   // Following patterns use 3 instructions to materialize the Imm.
461 
462   // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
463   //                 {zeros}{31-bit value}{zeros}
464   //                 {zeros}{ones}{31-bit value}
465   //                 {ones}{31-bit value}{zeros}
466   // We can take advantage of LIS's sign-extension semantics to generate leading
467   // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
468   // ones in both sides after rotation.
469   if ((LZ + FO + TZ) > 32) {
470     uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
471     unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
472     Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
473     Register Tmp2Reg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
474     if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(Opcode), TmpReg)
475              .addImm(ImmHi16)
476              .constrainAllUses(TII, TRI, RBI))
477       return false;
478     if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::ORI8), Tmp2Reg)
479              .addReg(TmpReg, RegState::Kill)
480              .addImm((Imm >> TZ) & 0xffff)
481              .constrainAllUses(TII, TRI, RBI))
482       return false;
483     return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDIC), Reg)
484         .addReg(Tmp2Reg, RegState::Kill)
485         .addImm(TZ)
486         .addImm(LZ)
487         .constrainAllUses(TII, TRI, RBI);
488   }
489   // 3-2) Pattern : {zeros}{31-bit value}{ones}
490   // Shift right the Imm by (32 - LZ) bits to construct a negative 32 bits
491   // value, therefore we can take advantage of LIS's sign-extension semantics,
492   // add the remaining bits with ORI, and then mask them off after rotation.
493   // This is similar to Pattern 2-3, please refer to the diagram there.
494   if ((LZ + TO) > 32) {
495     // Since the immediates with (LZ > 32) have been handled by previous
496     // patterns, here we have (LZ <= 32) to make sure we will not shift right
497     // the Imm by a negative value.
498     assert(LZ <= 32 && "Unexpected shift value.");
499     Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
500     Register Tmp2Reg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
501     if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::LIS8), TmpReg)
502             .addImm((Imm >> (48 - LZ)) & 0xffff)
503             .constrainAllUses(TII, TRI, RBI))
504       return false;
505     if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::ORI8), Tmp2Reg)
506              .addReg(TmpReg, RegState::Kill)
507              .addImm((Imm >> (32 - LZ)) & 0xffff)
508              .constrainAllUses(TII, TRI, RBI))
509       return false;
510     return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDICL), Reg)
511         .addReg(Tmp2Reg, RegState::Kill)
512         .addImm(32 - LZ)
513         .addImm(LZ)
514         .constrainAllUses(TII, TRI, RBI);
515   }
516   // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
517   //                 {ones}{31-bit value}{ones}
518   // We can take advantage of LIS's sign-extension semantics to generate leading
519   // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
520   // ones in left sides (if required) after rotation.
521   // This is similar to Pattern 2-4, please refer to the diagram there.
522   if ((LZ + FO + TO) > 32) {
523     Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
524     Register Tmp2Reg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
525     if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::LIS8), TmpReg)
526              .addImm((Imm >> (TO + 16)) & 0xffff)
527              .constrainAllUses(TII, TRI, RBI))
528       return false;
529     if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::ORI8), Tmp2Reg)
530              .addReg(TmpReg, RegState::Kill)
531              .addImm((Imm >> TO) & 0xffff)
532              .constrainAllUses(TII, TRI, RBI))
533       return false;
534     return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDICL), Reg)
535         .addReg(Tmp2Reg, RegState::Kill)
536         .addImm(TO)
537         .addImm(LZ)
538         .constrainAllUses(TII, TRI, RBI);
539   }
540   // 3-4) Patterns : High word == Low word
541   if (Hi32 == Lo32) {
542     // Handle the first 32 bits.
543     uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
544     unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
545     Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
546     Register Tmp2Reg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
547     if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(Opcode), TmpReg)
548              .addImm(ImmHi16)
549              .constrainAllUses(TII, TRI, RBI))
550       return false;
551     if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::ORI8), Tmp2Reg)
552              .addReg(TmpReg, RegState::Kill)
553              .addImm(Lo32 & 0xffff)
554              .constrainAllUses(TII, TRI, RBI))
555       return false;
556     return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDIMI), Reg)
557         .addReg(Tmp2Reg)
558         .addReg(Tmp2Reg, RegState::Kill)
559         .addImm(32)
560         .addImm(0)
561         .constrainAllUses(TII, TRI, RBI);
562   }
563   // 3-5) Patterns : {******}{33 zeros}{******}
564   //                 {******}{33 ones}{******}
565   // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
566   // bits remain on both sides. Rotate right the Imm to construct an int<32>
567   // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
568   // rotate it back.
569   // This is similar to Pattern 2-6, please refer to the diagram there.
570   if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
571       (Shift = findContiguousZerosAtLeast(~Imm, 33))) {
572     uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
573     uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
574     unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
575     Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
576     Register Tmp2Reg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
577     if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(Opcode), TmpReg)
578              .addImm(ImmHi16)
579              .constrainAllUses(TII, TRI, RBI))
580       return false;
581     if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::ORI8), Tmp2Reg)
582              .addReg(TmpReg, RegState::Kill)
583              .addImm(RotImm & 0xffff)
584              .constrainAllUses(TII, TRI, RBI))
585       return false;
586     return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDICL), Reg)
587         .addReg(Tmp2Reg, RegState::Kill)
588         .addImm(Shift)
589         .addImm(0)
590         .constrainAllUses(TII, TRI, RBI);
591   }
592 
593   // If we end up here then no instructions were inserted.
594   return std::nullopt;
595 }
596 
597 // Derived from PPCISelDAGToDAG::selectI64Imm().
598 // TODO: Add support for prefixed instructions.
599 bool PPCInstructionSelector::selectI64Imm(MachineInstr &I,
600                                           MachineBasicBlock &MBB,
601                                           MachineRegisterInfo &MRI) const {
602   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Unexpected G code");
603 
604   Register DstReg = I.getOperand(0).getReg();
605   int64_t Imm = I.getOperand(1).getCImm()->getValue().getZExtValue();
606   // No more than 3 instructions are used if we can select the i64 immediate
607   // directly.
608   if (std::optional<bool> Res = selectI64ImmDirect(I, MBB, MRI, DstReg, Imm)) {
609     I.eraseFromParent();
610     return *Res;
611   }
612 
613   // Calculate the last bits as required.
614   uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff;
615   uint32_t Lo16 = Lo_32(Imm) & 0xffff;
616 
617   Register Reg =
618       (Hi16 || Lo16) ? MRI.createVirtualRegister(&PPC::G8RCRegClass) : DstReg;
619 
620   // Handle the upper 32 bit value.
621   std::optional<bool> Res =
622       selectI64ImmDirect(I, MBB, MRI, Reg, Imm & 0xffffffff00000000);
623   if (!Res || !*Res)
624     return false;
625 
626   // Add in the last bits as required.
627   if (Hi16) {
628     Register TmpReg =
629         Lo16 ? MRI.createVirtualRegister(&PPC::G8RCRegClass) : DstReg;
630     if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::ORIS8), TmpReg)
631              .addReg(Reg, RegState::Kill)
632              .addImm(Hi16)
633              .constrainAllUses(TII, TRI, RBI))
634       return false;
635     Reg = TmpReg;
636   }
637   if (Lo16) {
638     if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::ORI8), DstReg)
639              .addReg(Reg, RegState::Kill)
640              .addImm(Lo16)
641              .constrainAllUses(TII, TRI, RBI))
642       return false;
643   }
644   I.eraseFromParent();
645   return true;
646 }
647 
648 bool PPCInstructionSelector::selectConstantPool(
649     MachineInstr &I, MachineBasicBlock &MBB, MachineRegisterInfo &MRI) const {
650   const DebugLoc &DbgLoc = I.getDebugLoc();
651   MachineFunction *MF = MBB.getParent();
652 
653   // TODO: handle 32-bit.
654   // TODO: Enabling floating point constant pool selection on AIX requires
655   // global isel on big endian target enabled first.
656   // See CallLowering::enableBigEndian().
657   if (!STI.isPPC64() || !STI.isLittleEndian())
658     return false;
659 
660   MF->getInfo<PPCFunctionInfo>()->setUsesTOCBasePtr();
661 
662   const Register DstReg = I.getOperand(0).getReg();
663   unsigned CPI = I.getOperand(1).getIndex();
664 
665   // Address stored in the TOC entry. This is related to code model and the ABI
666   // we are currently using. For now we only handle 64-bit Linux LE. PowerPC
667   // only supports small, medium and large code model.
668   const CodeModel::Model CModel = TM.getCodeModel();
669   assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
670          "PowerPC doesn't support tiny or kernel code models.");
671 
672   const MCRegister TOCReg = STI.getTOCPointerRegister();
673   MachineMemOperand *MMO = MF->getMachineMemOperand(
674       MachinePointerInfo::getGOT(*MF), MachineMemOperand::MOLoad,
675       MRI.getType(DstReg), MF->getDataLayout().getPointerABIAlignment(0));
676 
677   MachineInstr *MI = nullptr;
678   // For now we only handle 64-bit Linux.
679   if (CModel == CodeModel::Small) {
680     // For small code model, generate LDtocCPT(CPI, X2).
681     MI = BuildMI(MBB, I, DbgLoc, TII.get(PPC::LDtocCPT), DstReg)
682              .addConstantPoolIndex(CPI)
683              .addReg(TOCReg)
684              .addMemOperand(MMO);
685   } else {
686     Register HaAddrReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
687     BuildMI(MBB, I, DbgLoc, TII.get(PPC::ADDIStocHA8), HaAddrReg)
688         .addReg(TOCReg)
689         .addConstantPoolIndex(CPI);
690 
691     if (CModel == CodeModel::Large)
692       // For large code model, generate LDtocL(CPI, ADDIStocHA8(X2, CPI))
693       MI = BuildMI(MBB, I, DbgLoc, TII.get(PPC::LDtocL), DstReg)
694                .addConstantPoolIndex(CPI)
695                .addReg(HaAddrReg)
696                .addMemOperand(MMO);
697     else
698       // For medium code model, generate ADDItocL8(CPI, ADDIStocHA8(X2, CPI))
699       MI = BuildMI(MBB, I, DbgLoc, TII.get(PPC::ADDItocL8), DstReg)
700                .addReg(HaAddrReg)
701                .addConstantPoolIndex(CPI);
702   }
703 
704   I.eraseFromParent();
705   return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
706 }
707 
708 bool PPCInstructionSelector::select(MachineInstr &I) {
709   auto &MBB = *I.getParent();
710   auto &MF = *MBB.getParent();
711   auto &MRI = MF.getRegInfo();
712 
713   if (!isPreISelGenericOpcode(I.getOpcode())) {
714     if (I.isCopy())
715       return selectCopy(I, TII, MRI, TRI, RBI);
716 
717     return true;
718   }
719 
720   if (selectImpl(I, *CoverageInfo))
721     return true;
722 
723   unsigned Opcode = I.getOpcode();
724 
725   switch (Opcode) {
726   default:
727     return false;
728   case TargetOpcode::G_LOAD:
729   case TargetOpcode::G_STORE: {
730     GLoadStore &LdSt = cast<GLoadStore>(I);
731     LLT PtrTy = MRI.getType(LdSt.getPointerReg());
732 
733     if (PtrTy != LLT::pointer(0, 64)) {
734       LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
735                         << ", expected: " << LLT::pointer(0, 64) << '\n');
736       return false;
737     }
738 
739     auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
740       const unsigned NewOpc = selectLoadStoreOp(
741           I.getOpcode(), RBI.getRegBank(LdSt.getReg(0), MRI, TRI)->getID(),
742           LdSt.getMemSizeInBits().getValue());
743 
744       if (NewOpc == I.getOpcode())
745         return nullptr;
746 
747       // For now, simply use DForm with load/store addr as base and 0 as imm.
748       // FIXME: optimize load/store with some specific address patterns.
749       I.setDesc(TII.get(NewOpc));
750       Register AddrReg = I.getOperand(1).getReg();
751       bool IsKill = I.getOperand(1).isKill();
752       I.getOperand(1).ChangeToImmediate(0);
753       I.addOperand(*I.getParent()->getParent(),
754                    MachineOperand::CreateReg(AddrReg, /* isDef */ false,
755                                              /* isImp */ false, IsKill));
756       return &I;
757     };
758 
759     MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
760     if (!LoadStore)
761       return false;
762 
763     return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
764   }
765   case TargetOpcode::G_SITOFP:
766   case TargetOpcode::G_UITOFP:
767     return selectIntToFP(I, MBB, MRI);
768   case TargetOpcode::G_FPTOSI:
769   case TargetOpcode::G_FPTOUI:
770     return selectFPToInt(I, MBB, MRI);
771   // G_SEXT will be selected in tb-gen pattern.
772   case TargetOpcode::G_ZEXT:
773     return selectZExt(I, MBB, MRI);
774   case TargetOpcode::G_CONSTANT:
775     return selectI64Imm(I, MBB, MRI);
776   case TargetOpcode::G_CONSTANT_POOL:
777     return selectConstantPool(I, MBB, MRI);
778   }
779   return false;
780 }
781 
782 namespace llvm {
783 InstructionSelector *
784 createPPCInstructionSelector(const PPCTargetMachine &TM,
785                              const PPCSubtarget &Subtarget,
786                              const PPCRegisterBankInfo &RBI) {
787   return new PPCInstructionSelector(TM, Subtarget, RBI);
788 }
789 } // end namespace llvm
790