1 //===- ConstantMerge.cpp - Merge duplicate global constants ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interface to a pass that merges duplicate global 10 // constants together into a single constant that is shared. This is useful 11 // because some passes (ie TraceValues) insert a lot of string constants into 12 // the program, regardless of whether or not an existing string is available. 13 // 14 // Algorithm: ConstantMerge is designed to build up a map of available constants 15 // and eliminate duplicates when it is initialized. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/Transforms/IPO/ConstantMerge.h" 20 #include "llvm/ADT/DenseMap.h" 21 #include "llvm/ADT/SmallPtrSet.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/IR/Constants.h" 25 #include "llvm/IR/DataLayout.h" 26 #include "llvm/IR/DerivedTypes.h" 27 #include "llvm/IR/GlobalValue.h" 28 #include "llvm/IR/GlobalVariable.h" 29 #include "llvm/IR/LLVMContext.h" 30 #include "llvm/IR/Module.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Transforms/IPO.h" 33 #include <algorithm> 34 #include <cassert> 35 #include <utility> 36 37 using namespace llvm; 38 39 #define DEBUG_TYPE "constmerge" 40 41 STATISTIC(NumIdenticalMerged, "Number of identical global constants merged"); 42 43 /// Find values that are marked as llvm.used. 44 static void FindUsedValues(GlobalVariable *LLVMUsed, 45 SmallPtrSetImpl<const GlobalValue*> &UsedValues) { 46 if (!LLVMUsed) return; 47 ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); 48 49 for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) { 50 Value *Operand = Inits->getOperand(i)->stripPointerCasts(); 51 GlobalValue *GV = cast<GlobalValue>(Operand); 52 UsedValues.insert(GV); 53 } 54 } 55 56 // True if A is better than B. 57 static bool IsBetterCanonical(const GlobalVariable &A, 58 const GlobalVariable &B) { 59 if (!A.hasLocalLinkage() && B.hasLocalLinkage()) 60 return true; 61 62 if (A.hasLocalLinkage() && !B.hasLocalLinkage()) 63 return false; 64 65 return A.hasGlobalUnnamedAddr(); 66 } 67 68 static bool hasMetadataOtherThanDebugLoc(const GlobalVariable *GV) { 69 SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; 70 GV->getAllMetadata(MDs); 71 for (const auto &V : MDs) 72 if (V.first != LLVMContext::MD_dbg) 73 return true; 74 return false; 75 } 76 77 static void copyDebugLocMetadata(const GlobalVariable *From, 78 GlobalVariable *To) { 79 SmallVector<DIGlobalVariableExpression *, 1> MDs; 80 From->getDebugInfo(MDs); 81 for (auto *MD : MDs) 82 To->addDebugInfo(MD); 83 } 84 85 static Align getAlign(GlobalVariable *GV) { 86 return GV->getAlign().value_or( 87 GV->getParent()->getDataLayout().getPreferredAlign(GV)); 88 } 89 90 static bool 91 isUnmergeableGlobal(GlobalVariable *GV, 92 const SmallPtrSetImpl<const GlobalValue *> &UsedGlobals) { 93 // Only process constants with initializers in the default address space. 94 return !GV->isConstant() || !GV->hasDefinitiveInitializer() || 95 GV->getType()->getAddressSpace() != 0 || GV->hasSection() || 96 // Don't touch thread-local variables. 97 GV->isThreadLocal() || 98 // Don't touch values marked with attribute(used). 99 UsedGlobals.count(GV); 100 } 101 102 enum class CanMerge { No, Yes }; 103 static CanMerge makeMergeable(GlobalVariable *Old, GlobalVariable *New) { 104 if (!Old->hasGlobalUnnamedAddr() && !New->hasGlobalUnnamedAddr()) 105 return CanMerge::No; 106 if (hasMetadataOtherThanDebugLoc(Old)) 107 return CanMerge::No; 108 assert(!hasMetadataOtherThanDebugLoc(New)); 109 if (!Old->hasGlobalUnnamedAddr()) 110 New->setUnnamedAddr(GlobalValue::UnnamedAddr::None); 111 return CanMerge::Yes; 112 } 113 114 static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New) { 115 Constant *NewConstant = New; 116 117 LLVM_DEBUG(dbgs() << "Replacing global: @" << Old->getName() << " -> @" 118 << New->getName() << "\n"); 119 120 // Bump the alignment if necessary. 121 if (Old->getAlign() || New->getAlign()) 122 New->setAlignment(std::max(getAlign(Old), getAlign(New))); 123 124 copyDebugLocMetadata(Old, New); 125 Old->replaceAllUsesWith(NewConstant); 126 127 // Delete the global value from the module. 128 assert(Old->hasLocalLinkage() && 129 "Refusing to delete an externally visible global variable."); 130 Old->eraseFromParent(); 131 } 132 133 static bool mergeConstants(Module &M) { 134 // Find all the globals that are marked "used". These cannot be merged. 135 SmallPtrSet<const GlobalValue*, 8> UsedGlobals; 136 FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals); 137 FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals); 138 139 // Map unique constants to globals. 140 DenseMap<Constant *, GlobalVariable *> CMap; 141 142 SmallVector<std::pair<GlobalVariable *, GlobalVariable *>, 32> 143 SameContentReplacements; 144 145 size_t ChangesMade = 0; 146 size_t OldChangesMade = 0; 147 148 // Iterate constant merging while we are still making progress. Merging two 149 // constants together may allow us to merge other constants together if the 150 // second level constants have initializers which point to the globals that 151 // were just merged. 152 while (true) { 153 // Find the canonical constants others will be merged with. 154 for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) { 155 // If this GV is dead, remove it. 156 GV.removeDeadConstantUsers(); 157 if (GV.use_empty() && GV.hasLocalLinkage()) { 158 GV.eraseFromParent(); 159 ++ChangesMade; 160 continue; 161 } 162 163 if (isUnmergeableGlobal(&GV, UsedGlobals)) 164 continue; 165 166 // This transformation is legal for weak ODR globals in the sense it 167 // doesn't change semantics, but we really don't want to perform it 168 // anyway; it's likely to pessimize code generation, and some tools 169 // (like the Darwin linker in cases involving CFString) don't expect it. 170 if (GV.isWeakForLinker()) 171 continue; 172 173 // Don't touch globals with metadata other then !dbg. 174 if (hasMetadataOtherThanDebugLoc(&GV)) 175 continue; 176 177 Constant *Init = GV.getInitializer(); 178 179 // Check to see if the initializer is already known. 180 GlobalVariable *&Slot = CMap[Init]; 181 182 // If this is the first constant we find or if the old one is local, 183 // replace with the current one. If the current is externally visible 184 // it cannot be replace, but can be the canonical constant we merge with. 185 bool FirstConstantFound = !Slot; 186 if (FirstConstantFound || IsBetterCanonical(GV, *Slot)) { 187 Slot = &GV; 188 LLVM_DEBUG(dbgs() << "Cmap[" << *Init << "] = " << GV.getName() 189 << (FirstConstantFound ? "\n" : " (updated)\n")); 190 } 191 } 192 193 // Identify all globals that can be merged together, filling in the 194 // SameContentReplacements vector. We cannot do the replacement in this pass 195 // because doing so may cause initializers of other globals to be rewritten, 196 // invalidating the Constant* pointers in CMap. 197 for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) { 198 if (isUnmergeableGlobal(&GV, UsedGlobals)) 199 continue; 200 201 // We can only replace constant with local linkage. 202 if (!GV.hasLocalLinkage()) 203 continue; 204 205 Constant *Init = GV.getInitializer(); 206 207 // Check to see if the initializer is already known. 208 auto Found = CMap.find(Init); 209 if (Found == CMap.end()) 210 continue; 211 212 GlobalVariable *Slot = Found->second; 213 if (Slot == &GV) 214 continue; 215 216 if (makeMergeable(&GV, Slot) == CanMerge::No) 217 continue; 218 219 // Make all uses of the duplicate constant use the canonical version. 220 LLVM_DEBUG(dbgs() << "Will replace: @" << GV.getName() << " -> @" 221 << Slot->getName() << "\n"); 222 SameContentReplacements.push_back(std::make_pair(&GV, Slot)); 223 } 224 225 // Now that we have figured out which replacements must be made, do them all 226 // now. This avoid invalidating the pointers in CMap, which are unneeded 227 // now. 228 for (unsigned i = 0, e = SameContentReplacements.size(); i != e; ++i) { 229 GlobalVariable *Old = SameContentReplacements[i].first; 230 GlobalVariable *New = SameContentReplacements[i].second; 231 replace(M, Old, New); 232 ++ChangesMade; 233 ++NumIdenticalMerged; 234 } 235 236 if (ChangesMade == OldChangesMade) 237 break; 238 OldChangesMade = ChangesMade; 239 240 SameContentReplacements.clear(); 241 CMap.clear(); 242 } 243 244 return ChangesMade; 245 } 246 247 PreservedAnalyses ConstantMergePass::run(Module &M, ModuleAnalysisManager &) { 248 if (!mergeConstants(M)) 249 return PreservedAnalyses::all(); 250 return PreservedAnalyses::none(); 251 } 252