xref: /freebsd/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1  //===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This pass expands ADDItls{ld,gd}LADDR[32] machine instructions into
10  // separate ADDItls[gd]L[32] and GETtlsADDR[32] instructions, both of
11  // which define GPR3.  A copy is added from GPR3 to the target virtual
12  // register of the original instruction.  The GETtlsADDR[32] is really
13  // a call instruction, so its target register is constrained to be GPR3.
14  // This is not true of ADDItls[gd]L[32], but there is a legacy linker
15  // optimization bug that requires the target register of the addi of
16  // a local- or general-dynamic TLS access sequence to be GPR3.
17  //
18  // This is done in a late pass so that TLS variable accesses can be
19  // fully commoned by MachineCSE.
20  //
21  //===----------------------------------------------------------------------===//
22  
23  #include "PPC.h"
24  #include "PPCInstrBuilder.h"
25  #include "PPCInstrInfo.h"
26  #include "PPCTargetMachine.h"
27  #include "llvm/CodeGen/LiveIntervals.h"
28  #include "llvm/CodeGen/MachineFrameInfo.h"
29  #include "llvm/CodeGen/MachineFunctionPass.h"
30  #include "llvm/CodeGen/MachineInstrBuilder.h"
31  #include "llvm/InitializePasses.h"
32  #include "llvm/Support/Debug.h"
33  #include "llvm/Support/raw_ostream.h"
34  
35  using namespace llvm;
36  
37  #define DEBUG_TYPE "ppc-tls-dynamic-call"
38  
39  namespace {
40    struct PPCTLSDynamicCall : public MachineFunctionPass {
41      static char ID;
PPCTLSDynamicCall__anonbafe3dec0111::PPCTLSDynamicCall42      PPCTLSDynamicCall() : MachineFunctionPass(ID) {
43        initializePPCTLSDynamicCallPass(*PassRegistry::getPassRegistry());
44      }
45  
46      const PPCInstrInfo *TII;
47  
48  protected:
processBlock__anonbafe3dec0111::PPCTLSDynamicCall49      bool processBlock(MachineBasicBlock &MBB) {
50        bool Changed = false;
51        bool NeedFence = true;
52        const PPCSubtarget &Subtarget =
53            MBB.getParent()->getSubtarget<PPCSubtarget>();
54        bool Is64Bit = Subtarget.isPPC64();
55        bool IsAIX = Subtarget.isAIXABI();
56        bool IsLargeModel =
57            Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large;
58        bool IsPCREL = false;
59        MachineFunction *MF = MBB.getParent();
60        MachineRegisterInfo &RegInfo = MF->getRegInfo();
61  
62        for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
63             I != IE;) {
64          MachineInstr &MI = *I;
65          IsPCREL = isPCREL(MI);
66          // There are a number of slight differences in code generation
67          // when we call .__get_tpointer (32-bit AIX TLS).
68          bool IsTLSTPRelMI = MI.getOpcode() == PPC::GETtlsTpointer32AIX;
69          bool IsTLSLDAIXMI = (MI.getOpcode() == PPC::TLSLDAIX8 ||
70                               MI.getOpcode() == PPC::TLSLDAIX);
71  
72          if (MI.getOpcode() != PPC::ADDItlsgdLADDR &&
73              MI.getOpcode() != PPC::ADDItlsldLADDR &&
74              MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
75              MI.getOpcode() != PPC::ADDItlsldLADDR32 &&
76              MI.getOpcode() != PPC::TLSGDAIX &&
77              MI.getOpcode() != PPC::TLSGDAIX8 && !IsTLSTPRelMI && !IsPCREL &&
78              !IsTLSLDAIXMI) {
79            // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP
80            // as scheduling fences, we skip creating fences if we already
81            // have existing ADJCALLSTACKDOWN/UP to avoid nesting,
82            // which causes verification error with -verify-machineinstrs.
83            if (MI.getOpcode() == PPC::ADJCALLSTACKDOWN)
84              NeedFence = false;
85            else if (MI.getOpcode() == PPC::ADJCALLSTACKUP)
86              NeedFence = true;
87  
88            ++I;
89            continue;
90          }
91  
92          LLVM_DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n    " << MI);
93  
94          Register OutReg = MI.getOperand(0).getReg();
95          Register InReg = PPC::NoRegister;
96          Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
97          Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4;
98          if (!IsPCREL && !IsTLSTPRelMI)
99            InReg = MI.getOperand(1).getReg();
100          DebugLoc DL = MI.getDebugLoc();
101  
102          unsigned Opc1, Opc2;
103          switch (MI.getOpcode()) {
104          default:
105            llvm_unreachable("Opcode inconsistency error");
106          case PPC::ADDItlsgdLADDR:
107            Opc1 = PPC::ADDItlsgdL;
108            Opc2 = PPC::GETtlsADDR;
109            break;
110          case PPC::ADDItlsldLADDR:
111            Opc1 = PPC::ADDItlsldL;
112            Opc2 = PPC::GETtlsldADDR;
113            break;
114          case PPC::ADDItlsgdLADDR32:
115            Opc1 = PPC::ADDItlsgdL32;
116            Opc2 = PPC::GETtlsADDR32;
117            break;
118          case PPC::ADDItlsldLADDR32:
119            Opc1 = PPC::ADDItlsldL32;
120            Opc2 = PPC::GETtlsldADDR32;
121            break;
122          case PPC::TLSLDAIX:
123            // TLSLDAIX is expanded to one copy and GET_TLS_MOD, so we only set
124            // Opc2 here.
125            Opc2 = PPC::GETtlsMOD32AIX;
126            break;
127          case PPC::TLSLDAIX8:
128            // TLSLDAIX8 is expanded to one copy and GET_TLS_MOD, so we only set
129            // Opc2 here.
130            Opc2 = PPC::GETtlsMOD64AIX;
131            break;
132          case PPC::TLSGDAIX8:
133            // TLSGDAIX8 is expanded to two copies and GET_TLS_ADDR, so we only
134            // set Opc2 here.
135            Opc2 = PPC::GETtlsADDR64AIX;
136            break;
137          case PPC::TLSGDAIX:
138            // TLSGDAIX is expanded to two copies and GET_TLS_ADDR, so we only
139            // set Opc2 here.
140            Opc2 = PPC::GETtlsADDR32AIX;
141            break;
142          case PPC::GETtlsTpointer32AIX:
143            // GETtlsTpointer32AIX is expanded to a call to GET_TPOINTER on AIX
144            // 32-bit mode within PPCAsmPrinter. This instruction does not need
145            // to change, so Opc2 is set to the same instruction opcode.
146            Opc2 = PPC::GETtlsTpointer32AIX;
147            break;
148          case PPC::PADDI8pc:
149            assert(IsPCREL && "Expecting General/Local Dynamic PCRel");
150            Opc1 = PPC::PADDI8pc;
151            Opc2 = MI.getOperand(2).getTargetFlags() ==
152                           PPCII::MO_GOT_TLSGD_PCREL_FLAG
153                       ? PPC::GETtlsADDRPCREL
154                       : PPC::GETtlsldADDRPCREL;
155          }
156  
157          // We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr
158          // as scheduling fence to avoid it is scheduled before
159          // mflr in the prologue and the address in LR is clobbered (PR25839).
160          // We don't really need to save data to the stack - the clobbered
161          // registers are already saved when the SDNode (e.g. PPCaddiTlsgdLAddr)
162          // gets translated to the pseudo instruction (e.g. ADDItlsgdLADDR).
163          if (NeedFence) {
164            MBB.getParent()->getFrameInfo().setAdjustsStack(true);
165            BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0)
166                                                                .addImm(0);
167          }
168  
169          if (IsAIX) {
170            if (IsTLSLDAIXMI) {
171              // The relative order between the node that loads the variable
172              // offset from the TOC, and the .__tls_get_mod node is being tuned
173              // here. It is better to put the variable offset TOC load after the
174              // call, since this node can use clobbers r4/r5.
175              // Search for the pattern of the two nodes that load from the TOC
176              // (either for the variable offset or for the module handle), and
177              // then move the variable offset TOC load right before the node that
178              // uses the OutReg of the .__tls_get_mod node.
179              unsigned LDTocOp =
180                  Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
181                          : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
182              if (!RegInfo.use_empty(OutReg)) {
183                std::set<MachineInstr *> Uses;
184                // Collect all instructions that use the OutReg.
185                for (MachineOperand &MO : RegInfo.use_operands(OutReg))
186                  Uses.insert(MO.getParent());
187                // Find the first user (e.g.: lwax/stfdx) of the OutReg within the
188                // current BB.
189                MachineBasicBlock::iterator UseIter = MBB.begin();
190                for (MachineBasicBlock::iterator IE = MBB.end(); UseIter != IE;
191                     ++UseIter)
192                  if (Uses.count(&*UseIter))
193                    break;
194  
195                // Additional handling is required when UserIter (the first user
196                // of OutReg) is pointing to a valid node that loads from the TOC.
197                // Check the pattern and do the movement if the pattern matches.
198                if (UseIter != MBB.end()) {
199                  // Collect all associated nodes that load from the TOC. Use
200                  // hasOneDef() to guard against unexpected scenarios.
201                  std::set<MachineInstr *> LoadFromTocs;
202                  for (MachineOperand &MO : UseIter->operands())
203                    if (MO.isReg() && MO.isUse()) {
204                      Register MOReg = MO.getReg();
205                      if (RegInfo.hasOneDef(MOReg)) {
206                        MachineInstr *Temp =
207                            RegInfo.getOneDef(MOReg)->getParent();
208                        // For the current TLSLDAIX node, get the corresponding
209                        // node that loads from the TOC for the InReg. Otherwise,
210                        // Temp probably pointed to the variable offset TOC load
211                        // we would like to move.
212                        if (Temp == &MI && RegInfo.hasOneDef(InReg))
213                          Temp = RegInfo.getOneDef(InReg)->getParent();
214                        if (Temp->getOpcode() == LDTocOp)
215                          LoadFromTocs.insert(Temp);
216                      } else {
217                        // FIXME: analyze this scenario if there is one.
218                        LoadFromTocs.clear();
219                        break;
220                      }
221                    }
222  
223                  // Check the two nodes that loaded from the TOC: one should be
224                  // "_$TLSML", and the other will be moved before the node that
225                  // uses the OutReg of the .__tls_get_mod node.
226                  if (LoadFromTocs.size() == 2) {
227                    MachineBasicBlock::iterator TLSMLIter = MBB.end();
228                    MachineBasicBlock::iterator OffsetIter = MBB.end();
229                    // Make sure the two nodes that loaded from the TOC are within
230                    // the current BB, and that one of them is from the "_$TLSML"
231                    // pseudo symbol, while the other is from the variable.
232                    for (MachineBasicBlock::iterator I = MBB.begin(),
233                                                     IE = MBB.end();
234                         I != IE; ++I)
235                      if (LoadFromTocs.count(&*I)) {
236                        MachineOperand MO = I->getOperand(1);
237                        if (MO.isGlobal() && MO.getGlobal()->hasName() &&
238                            MO.getGlobal()->getName() == "_$TLSML")
239                          TLSMLIter = I;
240                        else
241                          OffsetIter = I;
242                      }
243                    // Perform the movement when the desired scenario has been
244                    // identified, which should be when both of the iterators are
245                    // valid.
246                    if (TLSMLIter != MBB.end() && OffsetIter != MBB.end())
247                      OffsetIter->moveBefore(&*UseIter);
248                  }
249                }
250              }
251              // The module-handle is copied into r3. The copy is followed by
252              // GETtlsMOD32AIX/GETtlsMOD64AIX.
253              BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
254                  .addReg(InReg);
255              // The call to .__tls_get_mod.
256              BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3);
257            } else if (!IsTLSTPRelMI) {
258              // The variable offset and region handle (for TLSGD) are copied in
259              // r4 and r3. The copies are followed by
260              // GETtlsADDR32AIX/GETtlsADDR64AIX.
261              BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
262                  .addReg(MI.getOperand(1).getReg());
263              BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
264                  .addReg(MI.getOperand(2).getReg());
265              BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3).addReg(GPR4);
266            } else
267              // The opcode of GETtlsTpointer32AIX does not change, because later
268              // this instruction will be expanded into a call to .__get_tpointer,
269              // which will return the thread pointer into r3.
270              BuildMI(MBB, I, DL, TII->get(Opc2), GPR3);
271          } else {
272            MachineInstr *Addi;
273            if (IsPCREL) {
274              Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addImm(0);
275            } else {
276              // Expand into two ops built prior to the existing instruction.
277              assert(InReg != PPC::NoRegister && "Operand must be a register");
278              Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addReg(InReg);
279            }
280  
281            Addi->addOperand(MI.getOperand(2));
282  
283            MachineInstr *Call =
284                (BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3));
285            if (IsPCREL)
286              Call->addOperand(MI.getOperand(2));
287            else
288              Call->addOperand(MI.getOperand(3));
289          }
290          if (NeedFence)
291            BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0);
292  
293          BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg)
294            .addReg(GPR3);
295  
296          // Move past the original instruction and remove it.
297          ++I;
298          MI.removeFromParent();
299  
300          Changed = true;
301        }
302  
303        return Changed;
304      }
305  
306  public:
isPCREL__anonbafe3dec0111::PPCTLSDynamicCall307    bool isPCREL(const MachineInstr &MI) {
308      return (MI.getOpcode() == PPC::PADDI8pc) &&
309             (MI.getOperand(2).getTargetFlags() ==
310                  PPCII::MO_GOT_TLSGD_PCREL_FLAG ||
311              MI.getOperand(2).getTargetFlags() ==
312                  PPCII::MO_GOT_TLSLD_PCREL_FLAG);
313    }
314  
runOnMachineFunction__anonbafe3dec0111::PPCTLSDynamicCall315      bool runOnMachineFunction(MachineFunction &MF) override {
316        TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
317  
318        bool Changed = false;
319  
320        for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
321          if (processBlock(B))
322            Changed = true;
323  
324        return Changed;
325      }
326  
getAnalysisUsage__anonbafe3dec0111::PPCTLSDynamicCall327      void getAnalysisUsage(AnalysisUsage &AU) const override {
328        AU.addRequired<LiveIntervalsWrapperPass>();
329        AU.addRequired<SlotIndexesWrapperPass>();
330        MachineFunctionPass::getAnalysisUsage(AU);
331      }
332    };
333  }
334  
335  INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE,
336                        "PowerPC TLS Dynamic Call Fixup", false, false)
337  INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
338  INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
339  INITIALIZE_PASS_END(PPCTLSDynamicCall, DEBUG_TYPE,
340                      "PowerPC TLS Dynamic Call Fixup", false, false)
341  
342  char PPCTLSDynamicCall::ID = 0;
343  FunctionPass*
createPPCTLSDynamicCallPass()344  llvm::createPPCTLSDynamicCallPass() { return new PPCTLSDynamicCall(); }
345