1 //===- ValueEnumerator.cpp - Number values and types for bitcode writer ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the ValueEnumerator class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ValueEnumerator.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/Config/llvm-config.h" 16 #include "llvm/IR/Argument.h" 17 #include "llvm/IR/BasicBlock.h" 18 #include "llvm/IR/Constant.h" 19 #include "llvm/IR/DebugInfoMetadata.h" 20 #include "llvm/IR/DerivedTypes.h" 21 #include "llvm/IR/Function.h" 22 #include "llvm/IR/GlobalAlias.h" 23 #include "llvm/IR/GlobalIFunc.h" 24 #include "llvm/IR/GlobalObject.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/GlobalVariable.h" 27 #include "llvm/IR/Instruction.h" 28 #include "llvm/IR/Instructions.h" 29 #include "llvm/IR/Metadata.h" 30 #include "llvm/IR/Module.h" 31 #include "llvm/IR/Operator.h" 32 #include "llvm/IR/Type.h" 33 #include "llvm/IR/Use.h" 34 #include "llvm/IR/User.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/IR/ValueSymbolTable.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/Compiler.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/MathExtras.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include <algorithm> 43 #include <cstddef> 44 #include <iterator> 45 #include <tuple> 46 47 using namespace llvm; 48 49 namespace { 50 51 struct OrderMap { 52 DenseMap<const Value *, std::pair<unsigned, bool>> IDs; 53 unsigned LastGlobalValueID = 0; 54 55 OrderMap() = default; 56 57 bool isGlobalValue(unsigned ID) const { 58 return ID <= LastGlobalValueID; 59 } 60 61 unsigned size() const { return IDs.size(); } 62 std::pair<unsigned, bool> &operator[](const Value *V) { return IDs[V]; } 63 64 std::pair<unsigned, bool> lookup(const Value *V) const { 65 return IDs.lookup(V); 66 } 67 68 void index(const Value *V) { 69 // Explicitly sequence get-size and insert-value operations to avoid UB. 70 unsigned ID = IDs.size() + 1; 71 IDs[V].first = ID; 72 } 73 }; 74 75 } // end anonymous namespace 76 77 static void orderValue(const Value *V, OrderMap &OM) { 78 if (OM.lookup(V).first) 79 return; 80 81 if (const Constant *C = dyn_cast<Constant>(V)) { 82 if (C->getNumOperands()) { 83 for (const Value *Op : C->operands()) 84 if (!isa<BasicBlock>(Op) && !isa<GlobalValue>(Op)) 85 orderValue(Op, OM); 86 if (auto *CE = dyn_cast<ConstantExpr>(C)) 87 if (CE->getOpcode() == Instruction::ShuffleVector) 88 orderValue(CE->getShuffleMaskForBitcode(), OM); 89 } 90 } 91 92 // Note: we cannot cache this lookup above, since inserting into the map 93 // changes the map's size, and thus affects the other IDs. 94 OM.index(V); 95 } 96 97 static OrderMap orderModule(const Module &M) { 98 // This needs to match the order used by ValueEnumerator::ValueEnumerator() 99 // and ValueEnumerator::incorporateFunction(). 100 OrderMap OM; 101 102 // Initializers of GlobalValues are processed in 103 // BitcodeReader::ResolveGlobalAndAliasInits(). Match the order there rather 104 // than ValueEnumerator, and match the code in predictValueUseListOrderImpl() 105 // by giving IDs in reverse order. 106 // 107 // Since GlobalValues never reference each other directly (just through 108 // initializers), their relative IDs only matter for determining order of 109 // uses in their initializers. 110 for (const GlobalVariable &G : reverse(M.globals())) 111 orderValue(&G, OM); 112 for (const GlobalAlias &A : reverse(M.aliases())) 113 orderValue(&A, OM); 114 for (const GlobalIFunc &I : reverse(M.ifuncs())) 115 orderValue(&I, OM); 116 for (const Function &F : reverse(M)) 117 orderValue(&F, OM); 118 OM.LastGlobalValueID = OM.size(); 119 120 auto orderConstantValue = [&OM](const Value *V) { 121 if (isa<Constant>(V) || isa<InlineAsm>(V)) 122 orderValue(V, OM); 123 }; 124 125 for (const Function &F : M) { 126 if (F.isDeclaration()) 127 continue; 128 // Here we need to match the union of ValueEnumerator::incorporateFunction() 129 // and WriteFunction(). Basic blocks are implicitly declared before 130 // anything else (by declaring their size). 131 for (const BasicBlock &BB : F) 132 orderValue(&BB, OM); 133 134 // Metadata used by instructions is decoded before the actual instructions, 135 // so visit any constants used by it beforehand. 136 for (const BasicBlock &BB : F) 137 for (const Instruction &I : BB) { 138 auto OrderConstantFromMetadata = [&](Metadata *MD) { 139 if (const auto *VAM = dyn_cast<ValueAsMetadata>(MD)) { 140 orderConstantValue(VAM->getValue()); 141 } else if (const auto *AL = dyn_cast<DIArgList>(MD)) { 142 for (const auto *VAM : AL->getArgs()) 143 orderConstantValue(VAM->getValue()); 144 } 145 }; 146 147 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) { 148 OrderConstantFromMetadata(DVR.getRawLocation()); 149 if (DVR.isDbgAssign()) 150 OrderConstantFromMetadata(DVR.getRawAddress()); 151 } 152 153 for (const Value *V : I.operands()) { 154 if (const auto *MAV = dyn_cast<MetadataAsValue>(V)) 155 OrderConstantFromMetadata(MAV->getMetadata()); 156 } 157 } 158 159 for (const Argument &A : F.args()) 160 orderValue(&A, OM); 161 for (const BasicBlock &BB : F) 162 for (const Instruction &I : BB) { 163 for (const Value *Op : I.operands()) 164 orderConstantValue(Op); 165 if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I)) 166 orderValue(SVI->getShuffleMaskForBitcode(), OM); 167 orderValue(&I, OM); 168 } 169 } 170 return OM; 171 } 172 173 static void predictValueUseListOrderImpl(const Value *V, const Function *F, 174 unsigned ID, const OrderMap &OM, 175 UseListOrderStack &Stack) { 176 // Predict use-list order for this one. 177 using Entry = std::pair<const Use *, unsigned>; 178 SmallVector<Entry, 64> List; 179 for (const Use &U : V->uses()) 180 // Check if this user will be serialized. 181 if (OM.lookup(U.getUser()).first) 182 List.push_back(std::make_pair(&U, List.size())); 183 184 if (List.size() < 2) 185 // We may have lost some users. 186 return; 187 188 bool IsGlobalValue = OM.isGlobalValue(ID); 189 llvm::sort(List, [&](const Entry &L, const Entry &R) { 190 const Use *LU = L.first; 191 const Use *RU = R.first; 192 if (LU == RU) 193 return false; 194 195 auto LID = OM.lookup(LU->getUser()).first; 196 auto RID = OM.lookup(RU->getUser()).first; 197 198 // If ID is 4, then expect: 7 6 5 1 2 3. 199 if (LID < RID) { 200 if (RID <= ID) 201 if (!IsGlobalValue) // GlobalValue uses don't get reversed. 202 return true; 203 return false; 204 } 205 if (RID < LID) { 206 if (LID <= ID) 207 if (!IsGlobalValue) // GlobalValue uses don't get reversed. 208 return false; 209 return true; 210 } 211 212 // LID and RID are equal, so we have different operands of the same user. 213 // Assume operands are added in order for all instructions. 214 if (LID <= ID) 215 if (!IsGlobalValue) // GlobalValue uses don't get reversed. 216 return LU->getOperandNo() < RU->getOperandNo(); 217 return LU->getOperandNo() > RU->getOperandNo(); 218 }); 219 220 if (llvm::is_sorted(List, llvm::less_second())) 221 // Order is already correct. 222 return; 223 224 // Store the shuffle. 225 Stack.emplace_back(V, F, List.size()); 226 assert(List.size() == Stack.back().Shuffle.size() && "Wrong size"); 227 for (size_t I = 0, E = List.size(); I != E; ++I) 228 Stack.back().Shuffle[I] = List[I].second; 229 } 230 231 static void predictValueUseListOrder(const Value *V, const Function *F, 232 OrderMap &OM, UseListOrderStack &Stack) { 233 if (!V->hasUseList()) 234 return; 235 236 auto &IDPair = OM[V]; 237 assert(IDPair.first && "Unmapped value"); 238 if (IDPair.second) 239 // Already predicted. 240 return; 241 242 // Do the actual prediction. 243 IDPair.second = true; 244 if (!V->use_empty() && std::next(V->use_begin()) != V->use_end()) 245 predictValueUseListOrderImpl(V, F, IDPair.first, OM, Stack); 246 247 // Recursive descent into constants. 248 if (const Constant *C = dyn_cast<Constant>(V)) { 249 if (C->getNumOperands()) { // Visit GlobalValues. 250 for (const Value *Op : C->operands()) 251 if (isa<Constant>(Op)) // Visit GlobalValues. 252 predictValueUseListOrder(Op, F, OM, Stack); 253 if (auto *CE = dyn_cast<ConstantExpr>(C)) 254 if (CE->getOpcode() == Instruction::ShuffleVector) 255 predictValueUseListOrder(CE->getShuffleMaskForBitcode(), F, OM, 256 Stack); 257 } 258 } 259 } 260 261 static UseListOrderStack predictUseListOrder(const Module &M) { 262 OrderMap OM = orderModule(M); 263 264 // Use-list orders need to be serialized after all the users have been added 265 // to a value, or else the shuffles will be incomplete. Store them per 266 // function in a stack. 267 // 268 // Aside from function order, the order of values doesn't matter much here. 269 UseListOrderStack Stack; 270 271 // We want to visit the functions backward now so we can list function-local 272 // constants in the last Function they're used in. Module-level constants 273 // have already been visited above. 274 for (const Function &F : llvm::reverse(M)) { 275 auto PredictValueOrderFromMetadata = [&](Metadata *MD) { 276 if (const auto *VAM = dyn_cast<ValueAsMetadata>(MD)) { 277 predictValueUseListOrder(VAM->getValue(), &F, OM, Stack); 278 } else if (const auto *AL = dyn_cast<DIArgList>(MD)) { 279 for (const auto *VAM : AL->getArgs()) 280 predictValueUseListOrder(VAM->getValue(), &F, OM, Stack); 281 } 282 }; 283 if (F.isDeclaration()) 284 continue; 285 for (const BasicBlock &BB : F) 286 predictValueUseListOrder(&BB, &F, OM, Stack); 287 for (const Argument &A : F.args()) 288 predictValueUseListOrder(&A, &F, OM, Stack); 289 for (const BasicBlock &BB : F) { 290 for (const Instruction &I : BB) { 291 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) { 292 PredictValueOrderFromMetadata(DVR.getRawLocation()); 293 if (DVR.isDbgAssign()) 294 PredictValueOrderFromMetadata(DVR.getRawAddress()); 295 } 296 for (const Value *Op : I.operands()) { 297 if (isa<Constant>(*Op) || isa<InlineAsm>(*Op)) // Visit GlobalValues. 298 predictValueUseListOrder(Op, &F, OM, Stack); 299 if (const auto *MAV = dyn_cast<MetadataAsValue>(Op)) 300 PredictValueOrderFromMetadata(MAV->getMetadata()); 301 } 302 if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I)) 303 predictValueUseListOrder(SVI->getShuffleMaskForBitcode(), &F, OM, 304 Stack); 305 predictValueUseListOrder(&I, &F, OM, Stack); 306 } 307 } 308 } 309 310 // Visit globals last, since the module-level use-list block will be seen 311 // before the function bodies are processed. 312 for (const GlobalVariable &G : M.globals()) 313 predictValueUseListOrder(&G, nullptr, OM, Stack); 314 for (const Function &F : M) 315 predictValueUseListOrder(&F, nullptr, OM, Stack); 316 for (const GlobalAlias &A : M.aliases()) 317 predictValueUseListOrder(&A, nullptr, OM, Stack); 318 for (const GlobalIFunc &I : M.ifuncs()) 319 predictValueUseListOrder(&I, nullptr, OM, Stack); 320 for (const GlobalVariable &G : M.globals()) 321 if (G.hasInitializer()) 322 predictValueUseListOrder(G.getInitializer(), nullptr, OM, Stack); 323 for (const GlobalAlias &A : M.aliases()) 324 predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack); 325 for (const GlobalIFunc &I : M.ifuncs()) 326 predictValueUseListOrder(I.getResolver(), nullptr, OM, Stack); 327 for (const Function &F : M) { 328 for (const Use &U : F.operands()) 329 predictValueUseListOrder(U.get(), nullptr, OM, Stack); 330 } 331 332 return Stack; 333 } 334 335 static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) { 336 return V.first->getType()->isIntOrIntVectorTy(); 337 } 338 339 ValueEnumerator::ValueEnumerator(const Module &M, 340 bool ShouldPreserveUseListOrder) 341 : ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) { 342 if (ShouldPreserveUseListOrder) 343 UseListOrders = predictUseListOrder(M); 344 345 // Enumerate the global variables. 346 for (const GlobalVariable &GV : M.globals()) { 347 EnumerateValue(&GV); 348 EnumerateType(GV.getValueType()); 349 } 350 351 // Enumerate the functions. 352 for (const Function & F : M) { 353 EnumerateValue(&F); 354 EnumerateType(F.getValueType()); 355 EnumerateAttributes(F.getAttributes()); 356 } 357 358 // Enumerate the aliases. 359 for (const GlobalAlias &GA : M.aliases()) { 360 EnumerateValue(&GA); 361 EnumerateType(GA.getValueType()); 362 } 363 364 // Enumerate the ifuncs. 365 for (const GlobalIFunc &GIF : M.ifuncs()) { 366 EnumerateValue(&GIF); 367 EnumerateType(GIF.getValueType()); 368 } 369 370 // Remember what is the cutoff between globalvalue's and other constants. 371 unsigned FirstConstant = Values.size(); 372 373 // Enumerate the global variable initializers and attributes. 374 for (const GlobalVariable &GV : M.globals()) { 375 if (GV.hasInitializer()) 376 EnumerateValue(GV.getInitializer()); 377 if (GV.hasAttributes()) 378 EnumerateAttributes(GV.getAttributesAsList(AttributeList::FunctionIndex)); 379 } 380 381 // Enumerate the aliasees. 382 for (const GlobalAlias &GA : M.aliases()) 383 EnumerateValue(GA.getAliasee()); 384 385 // Enumerate the ifunc resolvers. 386 for (const GlobalIFunc &GIF : M.ifuncs()) 387 EnumerateValue(GIF.getResolver()); 388 389 // Enumerate any optional Function data. 390 for (const Function &F : M) 391 for (const Use &U : F.operands()) 392 EnumerateValue(U.get()); 393 394 // Enumerate the metadata type. 395 // 396 // TODO: Move this to ValueEnumerator::EnumerateOperandType() once bitcode 397 // only encodes the metadata type when it's used as a value. 398 EnumerateType(Type::getMetadataTy(M.getContext())); 399 400 // Insert constants and metadata that are named at module level into the slot 401 // pool so that the module symbol table can refer to them... 402 EnumerateValueSymbolTable(M.getValueSymbolTable()); 403 EnumerateNamedMetadata(M); 404 405 SmallVector<std::pair<unsigned, MDNode *>, 8> MDs; 406 for (const GlobalVariable &GV : M.globals()) { 407 MDs.clear(); 408 GV.getAllMetadata(MDs); 409 for (const auto &I : MDs) 410 // FIXME: Pass GV to EnumerateMetadata and arrange for the bitcode writer 411 // to write metadata to the global variable's own metadata block 412 // (PR28134). 413 EnumerateMetadata(nullptr, I.second); 414 } 415 416 // Enumerate types used by function bodies and argument lists. 417 for (const Function &F : M) { 418 for (const Argument &A : F.args()) 419 EnumerateType(A.getType()); 420 421 // Enumerate metadata attached to this function. 422 MDs.clear(); 423 F.getAllMetadata(MDs); 424 for (const auto &I : MDs) 425 EnumerateMetadata(F.isDeclaration() ? nullptr : &F, I.second); 426 427 for (const BasicBlock &BB : F) 428 for (const Instruction &I : BB) { 429 // Local metadata is enumerated during function-incorporation, but 430 // any ConstantAsMetadata arguments in a DIArgList should be examined 431 // now. 432 auto EnumerateNonLocalValuesFromMetadata = [&](Metadata *MD) { 433 assert(MD && "Metadata unexpectedly null"); 434 if (const auto *AL = dyn_cast<DIArgList>(MD)) { 435 for (const auto *VAM : AL->getArgs()) { 436 if (isa<ConstantAsMetadata>(VAM)) 437 EnumerateMetadata(&F, VAM); 438 } 439 return; 440 } 441 442 if (!isa<LocalAsMetadata>(MD)) 443 EnumerateMetadata(&F, MD); 444 }; 445 446 for (DbgRecord &DR : I.getDbgRecordRange()) { 447 if (DbgLabelRecord *DLR = dyn_cast<DbgLabelRecord>(&DR)) { 448 EnumerateMetadata(&F, DLR->getLabel()); 449 EnumerateMetadata(&F, &*DLR->getDebugLoc()); 450 continue; 451 } 452 // Enumerate non-local location metadata. 453 DbgVariableRecord &DVR = cast<DbgVariableRecord>(DR); 454 EnumerateNonLocalValuesFromMetadata(DVR.getRawLocation()); 455 EnumerateMetadata(&F, DVR.getExpression()); 456 EnumerateMetadata(&F, DVR.getVariable()); 457 EnumerateMetadata(&F, &*DVR.getDebugLoc()); 458 if (DVR.isDbgAssign()) { 459 EnumerateNonLocalValuesFromMetadata(DVR.getRawAddress()); 460 EnumerateMetadata(&F, DVR.getAssignID()); 461 EnumerateMetadata(&F, DVR.getAddressExpression()); 462 } 463 } 464 for (const Use &Op : I.operands()) { 465 auto *MD = dyn_cast<MetadataAsValue>(&Op); 466 if (!MD) { 467 EnumerateOperandType(Op); 468 continue; 469 } 470 471 EnumerateNonLocalValuesFromMetadata(MD->getMetadata()); 472 } 473 if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I)) 474 EnumerateType(SVI->getShuffleMaskForBitcode()->getType()); 475 if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) 476 EnumerateType(GEP->getSourceElementType()); 477 if (auto *AI = dyn_cast<AllocaInst>(&I)) 478 EnumerateType(AI->getAllocatedType()); 479 EnumerateType(I.getType()); 480 if (const auto *Call = dyn_cast<CallBase>(&I)) { 481 EnumerateAttributes(Call->getAttributes()); 482 EnumerateType(Call->getFunctionType()); 483 } 484 485 // Enumerate metadata attached with this instruction. 486 MDs.clear(); 487 I.getAllMetadataOtherThanDebugLoc(MDs); 488 for (const auto &MD : MDs) 489 EnumerateMetadata(&F, MD.second); 490 491 // Don't enumerate the location directly -- it has a special record 492 // type -- but enumerate its operands. 493 if (DILocation *L = I.getDebugLoc()) 494 for (const Metadata *Op : L->operands()) 495 EnumerateMetadata(&F, Op); 496 } 497 } 498 499 // Optimize constant ordering. 500 OptimizeConstants(FirstConstant, Values.size()); 501 502 // Organize metadata ordering. 503 organizeMetadata(); 504 } 505 506 unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const { 507 InstructionMapType::const_iterator I = InstructionMap.find(Inst); 508 assert(I != InstructionMap.end() && "Instruction is not mapped!"); 509 return I->second; 510 } 511 512 unsigned ValueEnumerator::getComdatID(const Comdat *C) const { 513 unsigned ComdatID = Comdats.idFor(C); 514 assert(ComdatID && "Comdat not found!"); 515 return ComdatID; 516 } 517 518 void ValueEnumerator::setInstructionID(const Instruction *I) { 519 InstructionMap[I] = InstructionCount++; 520 } 521 522 unsigned ValueEnumerator::getValueID(const Value *V) const { 523 if (auto *MD = dyn_cast<MetadataAsValue>(V)) 524 return getMetadataID(MD->getMetadata()); 525 526 ValueMapType::const_iterator I = ValueMap.find(V); 527 assert(I != ValueMap.end() && "Value not in slotcalculator!"); 528 return I->second-1; 529 } 530 531 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 532 LLVM_DUMP_METHOD void ValueEnumerator::dump() const { 533 print(dbgs(), ValueMap, "Default"); 534 dbgs() << '\n'; 535 print(dbgs(), MetadataMap, "MetaData"); 536 dbgs() << '\n'; 537 } 538 #endif 539 540 void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map, 541 const char *Name) const { 542 OS << "Map Name: " << Name << "\n"; 543 OS << "Size: " << Map.size() << "\n"; 544 for (const auto &I : Map) { 545 const Value *V = I.first; 546 if (V->hasName()) 547 OS << "Value: " << V->getName(); 548 else 549 OS << "Value: [null]\n"; 550 V->print(errs()); 551 errs() << '\n'; 552 553 OS << " Uses(" << V->getNumUses() << "):"; 554 for (const Use &U : V->uses()) { 555 if (&U != &*V->use_begin()) 556 OS << ","; 557 if(U->hasName()) 558 OS << " " << U->getName(); 559 else 560 OS << " [null]"; 561 562 } 563 OS << "\n\n"; 564 } 565 } 566 567 void ValueEnumerator::print(raw_ostream &OS, const MetadataMapType &Map, 568 const char *Name) const { 569 OS << "Map Name: " << Name << "\n"; 570 OS << "Size: " << Map.size() << "\n"; 571 for (const auto &I : Map) { 572 const Metadata *MD = I.first; 573 OS << "Metadata: slot = " << I.second.ID << "\n"; 574 OS << "Metadata: function = " << I.second.F << "\n"; 575 MD->print(OS); 576 OS << "\n"; 577 } 578 } 579 580 /// OptimizeConstants - Reorder constant pool for denser encoding. 581 void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) { 582 if (CstStart == CstEnd || CstStart+1 == CstEnd) return; 583 584 if (ShouldPreserveUseListOrder) 585 // Optimizing constants makes the use-list order difficult to predict. 586 // Disable it for now when trying to preserve the order. 587 return; 588 589 std::stable_sort(Values.begin() + CstStart, Values.begin() + CstEnd, 590 [this](const std::pair<const Value *, unsigned> &LHS, 591 const std::pair<const Value *, unsigned> &RHS) { 592 // Sort by plane. 593 if (LHS.first->getType() != RHS.first->getType()) 594 return getTypeID(LHS.first->getType()) < getTypeID(RHS.first->getType()); 595 // Then by frequency. 596 return LHS.second > RHS.second; 597 }); 598 599 // Ensure that integer and vector of integer constants are at the start of the 600 // constant pool. This is important so that GEP structure indices come before 601 // gep constant exprs. 602 std::stable_partition(Values.begin() + CstStart, Values.begin() + CstEnd, 603 isIntOrIntVectorValue); 604 605 // Rebuild the modified portion of ValueMap. 606 for (; CstStart != CstEnd; ++CstStart) 607 ValueMap[Values[CstStart].first] = CstStart+1; 608 } 609 610 /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol 611 /// table into the values table. 612 void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) { 613 for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end(); 614 VI != VE; ++VI) 615 EnumerateValue(VI->getValue()); 616 } 617 618 /// Insert all of the values referenced by named metadata in the specified 619 /// module. 620 void ValueEnumerator::EnumerateNamedMetadata(const Module &M) { 621 for (const auto &I : M.named_metadata()) 622 EnumerateNamedMDNode(&I); 623 } 624 625 void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) { 626 for (const MDNode *N : MD->operands()) 627 EnumerateMetadata(nullptr, N); 628 } 629 630 unsigned ValueEnumerator::getMetadataFunctionID(const Function *F) const { 631 return F ? getValueID(F) + 1 : 0; 632 } 633 634 void ValueEnumerator::EnumerateMetadata(const Function *F, const Metadata *MD) { 635 EnumerateMetadata(getMetadataFunctionID(F), MD); 636 } 637 638 void ValueEnumerator::EnumerateFunctionLocalMetadata( 639 const Function &F, const LocalAsMetadata *Local) { 640 EnumerateFunctionLocalMetadata(getMetadataFunctionID(&F), Local); 641 } 642 643 void ValueEnumerator::EnumerateFunctionLocalListMetadata( 644 const Function &F, const DIArgList *ArgList) { 645 EnumerateFunctionLocalListMetadata(getMetadataFunctionID(&F), ArgList); 646 } 647 648 void ValueEnumerator::dropFunctionFromMetadata( 649 MetadataMapType::value_type &FirstMD) { 650 SmallVector<const MDNode *, 64> Worklist; 651 auto push = [&Worklist](MetadataMapType::value_type &MD) { 652 auto &Entry = MD.second; 653 654 // Nothing to do if this metadata isn't tagged. 655 if (!Entry.F) 656 return; 657 658 // Drop the function tag. 659 Entry.F = 0; 660 661 // If this is has an ID and is an MDNode, then its operands have entries as 662 // well. We need to drop the function from them too. 663 if (Entry.ID) 664 if (auto *N = dyn_cast<MDNode>(MD.first)) 665 Worklist.push_back(N); 666 }; 667 push(FirstMD); 668 while (!Worklist.empty()) 669 for (const Metadata *Op : Worklist.pop_back_val()->operands()) { 670 if (!Op) 671 continue; 672 auto MD = MetadataMap.find(Op); 673 if (MD != MetadataMap.end()) 674 push(*MD); 675 } 676 } 677 678 void ValueEnumerator::EnumerateMetadata(unsigned F, const Metadata *MD) { 679 // It's vital for reader efficiency that uniqued subgraphs are done in 680 // post-order; it's expensive when their operands have forward references. 681 // If a distinct node is referenced from a uniqued node, it'll be delayed 682 // until the uniqued subgraph has been completely traversed. 683 SmallVector<const MDNode *, 32> DelayedDistinctNodes; 684 685 // Start by enumerating MD, and then work through its transitive operands in 686 // post-order. This requires a depth-first search. 687 SmallVector<std::pair<const MDNode *, MDNode::op_iterator>, 32> Worklist; 688 if (const MDNode *N = enumerateMetadataImpl(F, MD)) 689 Worklist.push_back(std::make_pair(N, N->op_begin())); 690 691 while (!Worklist.empty()) { 692 const MDNode *N = Worklist.back().first; 693 694 // Enumerate operands until we hit a new node. We need to traverse these 695 // nodes' operands before visiting the rest of N's operands. 696 MDNode::op_iterator I = std::find_if( 697 Worklist.back().second, N->op_end(), 698 [&](const Metadata *MD) { return enumerateMetadataImpl(F, MD); }); 699 if (I != N->op_end()) { 700 auto *Op = cast<MDNode>(*I); 701 Worklist.back().second = ++I; 702 703 // Delay traversing Op if it's a distinct node and N is uniqued. 704 if (Op->isDistinct() && !N->isDistinct()) 705 DelayedDistinctNodes.push_back(Op); 706 else 707 Worklist.push_back(std::make_pair(Op, Op->op_begin())); 708 continue; 709 } 710 711 // All the operands have been visited. Now assign an ID. 712 Worklist.pop_back(); 713 MDs.push_back(N); 714 MetadataMap[N].ID = MDs.size(); 715 716 // Flush out any delayed distinct nodes; these are all the distinct nodes 717 // that are leaves in last uniqued subgraph. 718 if (Worklist.empty() || Worklist.back().first->isDistinct()) { 719 for (const MDNode *N : DelayedDistinctNodes) 720 Worklist.push_back(std::make_pair(N, N->op_begin())); 721 DelayedDistinctNodes.clear(); 722 } 723 } 724 } 725 726 const MDNode *ValueEnumerator::enumerateMetadataImpl(unsigned F, const Metadata *MD) { 727 if (!MD) 728 return nullptr; 729 730 assert( 731 (isa<MDNode>(MD) || isa<MDString>(MD) || isa<ConstantAsMetadata>(MD)) && 732 "Invalid metadata kind"); 733 734 auto Insertion = MetadataMap.insert(std::make_pair(MD, MDIndex(F))); 735 MDIndex &Entry = Insertion.first->second; 736 if (!Insertion.second) { 737 // Already mapped. If F doesn't match the function tag, drop it. 738 if (Entry.hasDifferentFunction(F)) 739 dropFunctionFromMetadata(*Insertion.first); 740 return nullptr; 741 } 742 743 // Don't assign IDs to metadata nodes. 744 if (auto *N = dyn_cast<MDNode>(MD)) 745 return N; 746 747 // Save the metadata. 748 MDs.push_back(MD); 749 Entry.ID = MDs.size(); 750 751 // Enumerate the constant, if any. 752 if (auto *C = dyn_cast<ConstantAsMetadata>(MD)) 753 EnumerateValue(C->getValue()); 754 755 return nullptr; 756 } 757 758 /// EnumerateFunctionLocalMetadata - Incorporate function-local metadata 759 /// information reachable from the metadata. 760 void ValueEnumerator::EnumerateFunctionLocalMetadata( 761 unsigned F, const LocalAsMetadata *Local) { 762 assert(F && "Expected a function"); 763 764 // Check to see if it's already in! 765 MDIndex &Index = MetadataMap[Local]; 766 if (Index.ID) { 767 assert(Index.F == F && "Expected the same function"); 768 return; 769 } 770 771 MDs.push_back(Local); 772 Index.F = F; 773 Index.ID = MDs.size(); 774 775 EnumerateValue(Local->getValue()); 776 } 777 778 /// EnumerateFunctionLocalListMetadata - Incorporate function-local metadata 779 /// information reachable from the metadata. 780 void ValueEnumerator::EnumerateFunctionLocalListMetadata( 781 unsigned F, const DIArgList *ArgList) { 782 assert(F && "Expected a function"); 783 784 // Check to see if it's already in! 785 MDIndex &Index = MetadataMap[ArgList]; 786 if (Index.ID) { 787 assert(Index.F == F && "Expected the same function"); 788 return; 789 } 790 791 for (ValueAsMetadata *VAM : ArgList->getArgs()) { 792 if (isa<LocalAsMetadata>(VAM)) { 793 assert(MetadataMap.count(VAM) && 794 "LocalAsMetadata should be enumerated before DIArgList"); 795 assert(MetadataMap[VAM].F == F && 796 "Expected LocalAsMetadata in the same function"); 797 } else { 798 assert(isa<ConstantAsMetadata>(VAM) && 799 "Expected LocalAsMetadata or ConstantAsMetadata"); 800 assert(ValueMap.count(VAM->getValue()) && 801 "Constant should be enumerated beforeDIArgList"); 802 EnumerateMetadata(F, VAM); 803 } 804 } 805 806 MDs.push_back(ArgList); 807 Index.F = F; 808 Index.ID = MDs.size(); 809 } 810 811 static unsigned getMetadataTypeOrder(const Metadata *MD) { 812 // Strings are emitted in bulk and must come first. 813 if (isa<MDString>(MD)) 814 return 0; 815 816 // ConstantAsMetadata doesn't reference anything. We may as well shuffle it 817 // to the front since we can detect it. 818 auto *N = dyn_cast<MDNode>(MD); 819 if (!N) 820 return 1; 821 822 // The reader is fast forward references for distinct node operands, but slow 823 // when uniqued operands are unresolved. 824 return N->isDistinct() ? 2 : 3; 825 } 826 827 void ValueEnumerator::organizeMetadata() { 828 assert(MetadataMap.size() == MDs.size() && 829 "Metadata map and vector out of sync"); 830 831 if (MDs.empty()) 832 return; 833 834 // Copy out the index information from MetadataMap in order to choose a new 835 // order. 836 SmallVector<MDIndex, 64> Order; 837 Order.reserve(MetadataMap.size()); 838 for (const Metadata *MD : MDs) 839 Order.push_back(MetadataMap.lookup(MD)); 840 841 // Partition: 842 // - by function, then 843 // - by isa<MDString> 844 // and then sort by the original/current ID. Since the IDs are guaranteed to 845 // be unique, the result of llvm::sort will be deterministic. There's no need 846 // for std::stable_sort. 847 llvm::sort(Order, [this](MDIndex LHS, MDIndex RHS) { 848 return std::make_tuple(LHS.F, getMetadataTypeOrder(LHS.get(MDs)), LHS.ID) < 849 std::make_tuple(RHS.F, getMetadataTypeOrder(RHS.get(MDs)), RHS.ID); 850 }); 851 852 // Rebuild MDs, index the metadata ranges for each function in FunctionMDs, 853 // and fix up MetadataMap. 854 std::vector<const Metadata *> OldMDs; 855 MDs.swap(OldMDs); 856 MDs.reserve(OldMDs.size()); 857 for (unsigned I = 0, E = Order.size(); I != E && !Order[I].F; ++I) { 858 auto *MD = Order[I].get(OldMDs); 859 MDs.push_back(MD); 860 MetadataMap[MD].ID = I + 1; 861 if (isa<MDString>(MD)) 862 ++NumMDStrings; 863 } 864 865 // Return early if there's nothing for the functions. 866 if (MDs.size() == Order.size()) 867 return; 868 869 // Build the function metadata ranges. 870 MDRange R; 871 FunctionMDs.reserve(OldMDs.size()); 872 unsigned PrevF = 0; 873 for (unsigned I = MDs.size(), E = Order.size(), ID = MDs.size(); I != E; 874 ++I) { 875 unsigned F = Order[I].F; 876 if (!PrevF) { 877 PrevF = F; 878 } else if (PrevF != F) { 879 R.Last = FunctionMDs.size(); 880 std::swap(R, FunctionMDInfo[PrevF]); 881 R.First = FunctionMDs.size(); 882 883 ID = MDs.size(); 884 PrevF = F; 885 } 886 887 auto *MD = Order[I].get(OldMDs); 888 FunctionMDs.push_back(MD); 889 MetadataMap[MD].ID = ++ID; 890 if (isa<MDString>(MD)) 891 ++R.NumStrings; 892 } 893 R.Last = FunctionMDs.size(); 894 FunctionMDInfo[PrevF] = R; 895 } 896 897 void ValueEnumerator::incorporateFunctionMetadata(const Function &F) { 898 NumModuleMDs = MDs.size(); 899 900 auto R = FunctionMDInfo.lookup(getValueID(&F) + 1); 901 NumMDStrings = R.NumStrings; 902 MDs.insert(MDs.end(), FunctionMDs.begin() + R.First, 903 FunctionMDs.begin() + R.Last); 904 } 905 906 void ValueEnumerator::EnumerateValue(const Value *V) { 907 assert(!V->getType()->isVoidTy() && "Can't insert void values!"); 908 assert(!isa<MetadataAsValue>(V) && "EnumerateValue doesn't handle Metadata!"); 909 910 // Check to see if it's already in! 911 unsigned &ValueID = ValueMap[V]; 912 if (ValueID) { 913 // Increment use count. 914 Values[ValueID-1].second++; 915 return; 916 } 917 918 if (auto *GO = dyn_cast<GlobalObject>(V)) 919 if (const Comdat *C = GO->getComdat()) 920 Comdats.insert(C); 921 922 // Enumerate the type of this value. 923 EnumerateType(V->getType()); 924 925 if (const Constant *C = dyn_cast<Constant>(V)) { 926 if (isa<GlobalValue>(C)) { 927 // Initializers for globals are handled explicitly elsewhere. 928 } else if (C->getNumOperands()) { 929 // If a constant has operands, enumerate them. This makes sure that if a 930 // constant has uses (for example an array of const ints), that they are 931 // inserted also. 932 933 // We prefer to enumerate them with values before we enumerate the user 934 // itself. This makes it more likely that we can avoid forward references 935 // in the reader. We know that there can be no cycles in the constants 936 // graph that don't go through a global variable. 937 for (const Use &U : C->operands()) 938 if (!isa<BasicBlock>(U)) // Don't enumerate BB operand to BlockAddress. 939 EnumerateValue(U); 940 if (auto *CE = dyn_cast<ConstantExpr>(C)) { 941 if (CE->getOpcode() == Instruction::ShuffleVector) 942 EnumerateValue(CE->getShuffleMaskForBitcode()); 943 if (auto *GEP = dyn_cast<GEPOperator>(CE)) 944 EnumerateType(GEP->getSourceElementType()); 945 } 946 947 // Finally, add the value. Doing this could make the ValueID reference be 948 // dangling, don't reuse it. 949 Values.push_back(std::make_pair(V, 1U)); 950 ValueMap[V] = Values.size(); 951 return; 952 } 953 } 954 955 // Add the value. 956 Values.push_back(std::make_pair(V, 1U)); 957 ValueID = Values.size(); 958 } 959 960 961 void ValueEnumerator::EnumerateType(Type *Ty) { 962 unsigned *TypeID = &TypeMap[Ty]; 963 964 // We've already seen this type. 965 if (*TypeID) 966 return; 967 968 // If it is a non-anonymous struct, mark the type as being visited so that we 969 // don't recursively visit it. This is safe because we allow forward 970 // references of these in the bitcode reader. 971 if (StructType *STy = dyn_cast<StructType>(Ty)) 972 if (!STy->isLiteral()) 973 *TypeID = ~0U; 974 975 // Enumerate all of the subtypes before we enumerate this type. This ensures 976 // that the type will be enumerated in an order that can be directly built. 977 for (Type *SubTy : Ty->subtypes()) 978 EnumerateType(SubTy); 979 980 // Refresh the TypeID pointer in case the table rehashed. 981 TypeID = &TypeMap[Ty]; 982 983 // Check to see if we got the pointer another way. This can happen when 984 // enumerating recursive types that hit the base case deeper than they start. 985 // 986 // If this is actually a struct that we are treating as forward ref'able, 987 // then emit the definition now that all of its contents are available. 988 if (*TypeID && *TypeID != ~0U) 989 return; 990 991 // Add this type now that its contents are all happily enumerated. 992 Types.push_back(Ty); 993 994 *TypeID = Types.size(); 995 } 996 997 // Enumerate the types for the specified value. If the value is a constant, 998 // walk through it, enumerating the types of the constant. 999 void ValueEnumerator::EnumerateOperandType(const Value *V) { 1000 EnumerateType(V->getType()); 1001 1002 assert(!isa<MetadataAsValue>(V) && "Unexpected metadata operand"); 1003 1004 const Constant *C = dyn_cast<Constant>(V); 1005 if (!C) 1006 return; 1007 1008 // If this constant is already enumerated, ignore it, we know its type must 1009 // be enumerated. 1010 if (ValueMap.count(C)) 1011 return; 1012 1013 // This constant may have operands, make sure to enumerate the types in 1014 // them. 1015 for (const Value *Op : C->operands()) { 1016 // Don't enumerate basic blocks here, this happens as operands to 1017 // blockaddress. 1018 if (isa<BasicBlock>(Op)) 1019 continue; 1020 1021 EnumerateOperandType(Op); 1022 } 1023 if (auto *CE = dyn_cast<ConstantExpr>(C)) { 1024 if (CE->getOpcode() == Instruction::ShuffleVector) 1025 EnumerateOperandType(CE->getShuffleMaskForBitcode()); 1026 if (CE->getOpcode() == Instruction::GetElementPtr) 1027 EnumerateType(cast<GEPOperator>(CE)->getSourceElementType()); 1028 } 1029 } 1030 1031 void ValueEnumerator::EnumerateAttributes(AttributeList PAL) { 1032 if (PAL.isEmpty()) return; // null is always 0. 1033 1034 // Do a lookup. 1035 unsigned &Entry = AttributeListMap[PAL]; 1036 if (Entry == 0) { 1037 // Never saw this before, add it. 1038 AttributeLists.push_back(PAL); 1039 Entry = AttributeLists.size(); 1040 } 1041 1042 // Do lookups for all attribute groups. 1043 for (unsigned i : PAL.indexes()) { 1044 AttributeSet AS = PAL.getAttributes(i); 1045 if (!AS.hasAttributes()) 1046 continue; 1047 IndexAndAttrSet Pair = {i, AS}; 1048 unsigned &Entry = AttributeGroupMap[Pair]; 1049 if (Entry == 0) { 1050 AttributeGroups.push_back(Pair); 1051 Entry = AttributeGroups.size(); 1052 1053 for (Attribute Attr : AS) { 1054 if (Attr.isTypeAttribute()) 1055 EnumerateType(Attr.getValueAsType()); 1056 } 1057 } 1058 } 1059 } 1060 1061 void ValueEnumerator::incorporateFunction(const Function &F) { 1062 InstructionCount = 0; 1063 NumModuleValues = Values.size(); 1064 1065 // Add global metadata to the function block. This doesn't include 1066 // LocalAsMetadata. 1067 incorporateFunctionMetadata(F); 1068 1069 // Adding function arguments to the value table. 1070 for (const auto &I : F.args()) { 1071 EnumerateValue(&I); 1072 if (I.hasAttribute(Attribute::ByVal)) 1073 EnumerateType(I.getParamByValType()); 1074 else if (I.hasAttribute(Attribute::StructRet)) 1075 EnumerateType(I.getParamStructRetType()); 1076 else if (I.hasAttribute(Attribute::ByRef)) 1077 EnumerateType(I.getParamByRefType()); 1078 } 1079 FirstFuncConstantID = Values.size(); 1080 1081 // Add all function-level constants to the value table. 1082 for (const BasicBlock &BB : F) { 1083 for (const Instruction &I : BB) { 1084 for (const Use &OI : I.operands()) { 1085 if ((isa<Constant>(OI) && !isa<GlobalValue>(OI)) || isa<InlineAsm>(OI)) 1086 EnumerateValue(OI); 1087 } 1088 if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I)) 1089 EnumerateValue(SVI->getShuffleMaskForBitcode()); 1090 } 1091 BasicBlocks.push_back(&BB); 1092 ValueMap[&BB] = BasicBlocks.size(); 1093 } 1094 1095 // Optimize the constant layout. 1096 OptimizeConstants(FirstFuncConstantID, Values.size()); 1097 1098 // Add the function's parameter attributes so they are available for use in 1099 // the function's instruction. 1100 EnumerateAttributes(F.getAttributes()); 1101 1102 FirstInstID = Values.size(); 1103 1104 SmallVector<LocalAsMetadata *, 8> FnLocalMDVector; 1105 SmallVector<DIArgList *, 8> ArgListMDVector; 1106 1107 auto AddFnLocalMetadata = [&](Metadata *MD) { 1108 if (!MD) 1109 return; 1110 if (auto *Local = dyn_cast<LocalAsMetadata>(MD)) { 1111 // Enumerate metadata after the instructions they might refer to. 1112 FnLocalMDVector.push_back(Local); 1113 } else if (auto *ArgList = dyn_cast<DIArgList>(MD)) { 1114 ArgListMDVector.push_back(ArgList); 1115 for (ValueAsMetadata *VMD : ArgList->getArgs()) { 1116 if (auto *Local = dyn_cast<LocalAsMetadata>(VMD)) { 1117 // Enumerate metadata after the instructions they might refer 1118 // to. 1119 FnLocalMDVector.push_back(Local); 1120 } 1121 } 1122 } 1123 }; 1124 1125 // Add all of the instructions. 1126 for (const BasicBlock &BB : F) { 1127 for (const Instruction &I : BB) { 1128 for (const Use &OI : I.operands()) { 1129 if (auto *MD = dyn_cast<MetadataAsValue>(&OI)) 1130 AddFnLocalMetadata(MD->getMetadata()); 1131 } 1132 /// RemoveDIs: Add non-instruction function-local metadata uses. 1133 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) { 1134 assert(DVR.getRawLocation() && 1135 "DbgVariableRecord location unexpectedly null"); 1136 AddFnLocalMetadata(DVR.getRawLocation()); 1137 if (DVR.isDbgAssign()) { 1138 assert(DVR.getRawAddress() && 1139 "DbgVariableRecord location unexpectedly null"); 1140 AddFnLocalMetadata(DVR.getRawAddress()); 1141 } 1142 } 1143 if (!I.getType()->isVoidTy()) 1144 EnumerateValue(&I); 1145 } 1146 } 1147 1148 // Add all of the function-local metadata. 1149 for (const LocalAsMetadata *Local : FnLocalMDVector) { 1150 // At this point, every local values have been incorporated, we shouldn't 1151 // have a metadata operand that references a value that hasn't been seen. 1152 assert(ValueMap.count(Local->getValue()) && 1153 "Missing value for metadata operand"); 1154 EnumerateFunctionLocalMetadata(F, Local); 1155 } 1156 // DIArgList entries must come after function-local metadata, as it is not 1157 // possible to forward-reference them. 1158 for (const DIArgList *ArgList : ArgListMDVector) 1159 EnumerateFunctionLocalListMetadata(F, ArgList); 1160 } 1161 1162 void ValueEnumerator::purgeFunction() { 1163 /// Remove purged values from the ValueMap. 1164 for (const auto &V : llvm::drop_begin(Values, NumModuleValues)) 1165 ValueMap.erase(V.first); 1166 for (const Metadata *MD : llvm::drop_begin(MDs, NumModuleMDs)) 1167 MetadataMap.erase(MD); 1168 for (const BasicBlock *BB : BasicBlocks) 1169 ValueMap.erase(BB); 1170 1171 Values.resize(NumModuleValues); 1172 MDs.resize(NumModuleMDs); 1173 BasicBlocks.clear(); 1174 NumMDStrings = 0; 1175 } 1176 1177 static void IncorporateFunctionInfoGlobalBBIDs(const Function *F, 1178 DenseMap<const BasicBlock*, unsigned> &IDMap) { 1179 unsigned Counter = 0; 1180 for (const BasicBlock &BB : *F) 1181 IDMap[&BB] = ++Counter; 1182 } 1183 1184 /// getGlobalBasicBlockID - This returns the function-specific ID for the 1185 /// specified basic block. This is relatively expensive information, so it 1186 /// should only be used by rare constructs such as address-of-label. 1187 unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const { 1188 unsigned &Idx = GlobalBasicBlockIDs[BB]; 1189 if (Idx != 0) 1190 return Idx-1; 1191 1192 IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs); 1193 return getGlobalBasicBlockID(BB); 1194 } 1195 1196 uint64_t ValueEnumerator::computeBitsRequiredForTypeIndices() const { 1197 return Log2_32_Ceil(getTypes().size() + 1); 1198 } 1199