1 //===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass expands ADDItls{ld,gd}LADDR[32] machine instructions into 10 // separate ADDItls[gd]L[32] and GETtlsADDR[32] instructions, both of 11 // which define GPR3. A copy is added from GPR3 to the target virtual 12 // register of the original instruction. The GETtlsADDR[32] is really 13 // a call instruction, so its target register is constrained to be GPR3. 14 // This is not true of ADDItls[gd]L[32], but there is a legacy linker 15 // optimization bug that requires the target register of the addi of 16 // a local- or general-dynamic TLS access sequence to be GPR3. 17 // 18 // This is done in a late pass so that TLS variable accesses can be 19 // fully commoned by MachineCSE. 20 // 21 //===----------------------------------------------------------------------===// 22 23 #include "PPC.h" 24 #include "PPCInstrInfo.h" 25 #include "PPCTargetMachine.h" 26 #include "llvm/CodeGen/LiveIntervals.h" 27 #include "llvm/CodeGen/MachineFrameInfo.h" 28 #include "llvm/CodeGen/MachineFunctionPass.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/raw_ostream.h" 32 33 using namespace llvm; 34 35 #define DEBUG_TYPE "ppc-tls-dynamic-call" 36 37 namespace { 38 struct PPCTLSDynamicCall : public MachineFunctionPass { 39 static char ID; 40 PPCTLSDynamicCall() : MachineFunctionPass(ID) {} 41 42 const PPCInstrInfo *TII; 43 44 protected: 45 bool processBlock(MachineBasicBlock &MBB) { 46 bool Changed = false; 47 bool NeedFence = true; 48 const PPCSubtarget &Subtarget = 49 MBB.getParent()->getSubtarget<PPCSubtarget>(); 50 bool Is64Bit = Subtarget.isPPC64(); 51 bool IsAIX = Subtarget.isAIXABI(); 52 bool IsLargeModel = 53 Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large; 54 bool IsPCREL = false; 55 MachineFunction *MF = MBB.getParent(); 56 MachineRegisterInfo &RegInfo = MF->getRegInfo(); 57 58 for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); 59 I != IE;) { 60 MachineInstr &MI = *I; 61 IsPCREL = isPCREL(MI); 62 // There are a number of slight differences in code generation 63 // when we call .__get_tpointer (32-bit AIX TLS). 64 bool IsTLSTPRelMI = MI.getOpcode() == PPC::GETtlsTpointer32AIX; 65 bool IsTLSLDAIXMI = (MI.getOpcode() == PPC::TLSLDAIX8 || 66 MI.getOpcode() == PPC::TLSLDAIX); 67 68 if (MI.getOpcode() != PPC::ADDItlsgdLADDR && 69 MI.getOpcode() != PPC::ADDItlsldLADDR && 70 MI.getOpcode() != PPC::ADDItlsgdLADDR32 && 71 MI.getOpcode() != PPC::ADDItlsldLADDR32 && 72 MI.getOpcode() != PPC::TLSGDAIX && 73 MI.getOpcode() != PPC::TLSGDAIX8 && !IsTLSTPRelMI && !IsPCREL && 74 !IsTLSLDAIXMI) { 75 // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP 76 // as scheduling fences, we skip creating fences if we already 77 // have existing ADJCALLSTACKDOWN/UP to avoid nesting, 78 // which causes verification error with -verify-machineinstrs. 79 if (MI.getOpcode() == PPC::ADJCALLSTACKDOWN) 80 NeedFence = false; 81 else if (MI.getOpcode() == PPC::ADJCALLSTACKUP) 82 NeedFence = true; 83 84 ++I; 85 continue; 86 } 87 88 LLVM_DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << MI); 89 90 Register OutReg = MI.getOperand(0).getReg(); 91 Register InReg = PPC::NoRegister; 92 Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3; 93 Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4; 94 if (!IsPCREL && !IsTLSTPRelMI) 95 InReg = MI.getOperand(1).getReg(); 96 DebugLoc DL = MI.getDebugLoc(); 97 98 unsigned Opc1, Opc2; 99 switch (MI.getOpcode()) { 100 default: 101 llvm_unreachable("Opcode inconsistency error"); 102 case PPC::ADDItlsgdLADDR: 103 Opc1 = PPC::ADDItlsgdL; 104 Opc2 = PPC::GETtlsADDR; 105 break; 106 case PPC::ADDItlsldLADDR: 107 Opc1 = PPC::ADDItlsldL; 108 Opc2 = PPC::GETtlsldADDR; 109 break; 110 case PPC::ADDItlsgdLADDR32: 111 Opc1 = PPC::ADDItlsgdL32; 112 Opc2 = PPC::GETtlsADDR32; 113 break; 114 case PPC::ADDItlsldLADDR32: 115 Opc1 = PPC::ADDItlsldL32; 116 Opc2 = PPC::GETtlsldADDR32; 117 break; 118 case PPC::TLSLDAIX: 119 // TLSLDAIX is expanded to one copy and GET_TLS_MOD, so we only set 120 // Opc2 here. 121 Opc2 = PPC::GETtlsMOD32AIX; 122 break; 123 case PPC::TLSLDAIX8: 124 // TLSLDAIX8 is expanded to one copy and GET_TLS_MOD, so we only set 125 // Opc2 here. 126 Opc2 = PPC::GETtlsMOD64AIX; 127 break; 128 case PPC::TLSGDAIX8: 129 // TLSGDAIX8 is expanded to two copies and GET_TLS_ADDR, so we only 130 // set Opc2 here. 131 Opc2 = PPC::GETtlsADDR64AIX; 132 break; 133 case PPC::TLSGDAIX: 134 // TLSGDAIX is expanded to two copies and GET_TLS_ADDR, so we only 135 // set Opc2 here. 136 Opc2 = PPC::GETtlsADDR32AIX; 137 break; 138 case PPC::GETtlsTpointer32AIX: 139 // GETtlsTpointer32AIX is expanded to a call to GET_TPOINTER on AIX 140 // 32-bit mode within PPCAsmPrinter. This instruction does not need 141 // to change, so Opc2 is set to the same instruction opcode. 142 Opc2 = PPC::GETtlsTpointer32AIX; 143 break; 144 case PPC::PADDI8pc: 145 assert(IsPCREL && "Expecting General/Local Dynamic PCRel"); 146 Opc1 = PPC::PADDI8pc; 147 Opc2 = MI.getOperand(2).getTargetFlags() == 148 PPCII::MO_GOT_TLSGD_PCREL_FLAG 149 ? PPC::GETtlsADDRPCREL 150 : PPC::GETtlsldADDRPCREL; 151 } 152 153 // We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr 154 // as scheduling fence to avoid it is scheduled before 155 // mflr in the prologue and the address in LR is clobbered (PR25839). 156 // We don't really need to save data to the stack - the clobbered 157 // registers are already saved when the SDNode (e.g. PPCaddiTlsgdLAddr) 158 // gets translated to the pseudo instruction (e.g. ADDItlsgdLADDR). 159 if (NeedFence) { 160 MBB.getParent()->getFrameInfo().setAdjustsStack(true); 161 BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0) 162 .addImm(0); 163 } 164 165 if (IsAIX) { 166 if (IsTLSLDAIXMI) { 167 // The relative order between the node that loads the variable 168 // offset from the TOC, and the .__tls_get_mod node is being tuned 169 // here. It is better to put the variable offset TOC load after the 170 // call, since this node can use clobbers r4/r5. 171 // Search for the pattern of the two nodes that load from the TOC 172 // (either for the variable offset or for the module handle), and 173 // then move the variable offset TOC load right before the node that 174 // uses the OutReg of the .__tls_get_mod node. 175 unsigned LDTocOp = 176 Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc) 177 : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc); 178 if (!RegInfo.use_empty(OutReg)) { 179 std::set<MachineInstr *> Uses; 180 // Collect all instructions that use the OutReg. 181 for (MachineOperand &MO : RegInfo.use_operands(OutReg)) 182 Uses.insert(MO.getParent()); 183 // Find the first user (e.g.: lwax/stfdx) of the OutReg within the 184 // current BB. 185 MachineBasicBlock::iterator UseIter = MBB.begin(); 186 for (MachineBasicBlock::iterator IE = MBB.end(); UseIter != IE; 187 ++UseIter) 188 if (Uses.count(&*UseIter)) 189 break; 190 191 // Additional handling is required when UserIter (the first user 192 // of OutReg) is pointing to a valid node that loads from the TOC. 193 // Check the pattern and do the movement if the pattern matches. 194 if (UseIter != MBB.end()) { 195 // Collect all associated nodes that load from the TOC. Use 196 // hasOneDef() to guard against unexpected scenarios. 197 std::set<MachineInstr *> LoadFromTocs; 198 for (MachineOperand &MO : UseIter->operands()) 199 if (MO.isReg() && MO.isUse()) { 200 Register MOReg = MO.getReg(); 201 if (RegInfo.hasOneDef(MOReg)) { 202 MachineInstr *Temp = 203 RegInfo.getOneDef(MOReg)->getParent(); 204 // For the current TLSLDAIX node, get the corresponding 205 // node that loads from the TOC for the InReg. Otherwise, 206 // Temp probably pointed to the variable offset TOC load 207 // we would like to move. 208 if (Temp == &MI && RegInfo.hasOneDef(InReg)) 209 Temp = RegInfo.getOneDef(InReg)->getParent(); 210 if (Temp->getOpcode() == LDTocOp) 211 LoadFromTocs.insert(Temp); 212 } else { 213 // FIXME: analyze this scenario if there is one. 214 LoadFromTocs.clear(); 215 break; 216 } 217 } 218 219 // Check the two nodes that loaded from the TOC: one should be 220 // "_$TLSML", and the other will be moved before the node that 221 // uses the OutReg of the .__tls_get_mod node. 222 if (LoadFromTocs.size() == 2) { 223 MachineBasicBlock::iterator TLSMLIter = MBB.end(); 224 MachineBasicBlock::iterator OffsetIter = MBB.end(); 225 // Make sure the two nodes that loaded from the TOC are within 226 // the current BB, and that one of them is from the "_$TLSML" 227 // pseudo symbol, while the other is from the variable. 228 for (MachineBasicBlock::iterator I = MBB.begin(), 229 IE = MBB.end(); 230 I != IE; ++I) 231 if (LoadFromTocs.count(&*I)) { 232 MachineOperand MO = I->getOperand(1); 233 if (MO.isGlobal() && MO.getGlobal()->hasName() && 234 MO.getGlobal()->getName() == "_$TLSML") 235 TLSMLIter = I; 236 else 237 OffsetIter = I; 238 } 239 // Perform the movement when the desired scenario has been 240 // identified, which should be when both of the iterators are 241 // valid. 242 if (TLSMLIter != MBB.end() && OffsetIter != MBB.end()) 243 OffsetIter->moveBefore(&*UseIter); 244 } 245 } 246 } 247 // The module-handle is copied into r3. The copy is followed by 248 // GETtlsMOD32AIX/GETtlsMOD64AIX. 249 BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3) 250 .addReg(InReg); 251 // The call to .__tls_get_mod. 252 BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3); 253 } else if (!IsTLSTPRelMI) { 254 // The variable offset and region handle (for TLSGD) are copied in 255 // r4 and r3. The copies are followed by 256 // GETtlsADDR32AIX/GETtlsADDR64AIX. 257 BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4) 258 .addReg(MI.getOperand(1).getReg()); 259 BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3) 260 .addReg(MI.getOperand(2).getReg()); 261 BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3).addReg(GPR4); 262 } else 263 // The opcode of GETtlsTpointer32AIX does not change, because later 264 // this instruction will be expanded into a call to .__get_tpointer, 265 // which will return the thread pointer into r3. 266 BuildMI(MBB, I, DL, TII->get(Opc2), GPR3); 267 } else { 268 MachineInstr *Addi; 269 if (IsPCREL) { 270 Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addImm(0); 271 } else { 272 // Expand into two ops built prior to the existing instruction. 273 assert(InReg != PPC::NoRegister && "Operand must be a register"); 274 Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addReg(InReg); 275 } 276 277 Addi->addOperand(MI.getOperand(2)); 278 279 MachineInstr *Call = 280 (BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3)); 281 if (IsPCREL) 282 Call->addOperand(MI.getOperand(2)); 283 else 284 Call->addOperand(MI.getOperand(3)); 285 } 286 if (NeedFence) 287 BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0); 288 289 BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg) 290 .addReg(GPR3); 291 292 // Move past the original instruction and remove it. 293 ++I; 294 MI.removeFromParent(); 295 296 Changed = true; 297 } 298 299 return Changed; 300 } 301 302 public: 303 bool isPCREL(const MachineInstr &MI) { 304 return (MI.getOpcode() == PPC::PADDI8pc) && 305 (MI.getOperand(2).getTargetFlags() == 306 PPCII::MO_GOT_TLSGD_PCREL_FLAG || 307 MI.getOperand(2).getTargetFlags() == 308 PPCII::MO_GOT_TLSLD_PCREL_FLAG); 309 } 310 311 bool runOnMachineFunction(MachineFunction &MF) override { 312 TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo(); 313 314 bool Changed = false; 315 316 for (MachineBasicBlock &B : llvm::make_early_inc_range(MF)) 317 if (processBlock(B)) 318 Changed = true; 319 320 return Changed; 321 } 322 323 void getAnalysisUsage(AnalysisUsage &AU) const override { 324 AU.addRequired<LiveIntervalsWrapperPass>(); 325 AU.addRequired<SlotIndexesWrapperPass>(); 326 MachineFunctionPass::getAnalysisUsage(AU); 327 } 328 }; 329 } 330 331 INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE, 332 "PowerPC TLS Dynamic Call Fixup", false, false) 333 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) 334 INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) 335 INITIALIZE_PASS_END(PPCTLSDynamicCall, DEBUG_TYPE, 336 "PowerPC TLS Dynamic Call Fixup", false, false) 337 338 char PPCTLSDynamicCall::ID = 0; 339 FunctionPass* 340 llvm::createPPCTLSDynamicCallPass() { return new PPCTLSDynamicCall(); } 341