10b57cec5SDimitry Andric //===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file adds DWARF discriminators to the IR. Path discriminators are 100b57cec5SDimitry Andric // used to decide what CFG path was taken inside sub-graphs whose instructions 110b57cec5SDimitry Andric // share the same line and column number information. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric // The main user of this is the sample profiler. Instruction samples are 140b57cec5SDimitry Andric // mapped to line number information. Since a single line may be spread 150b57cec5SDimitry Andric // out over several basic blocks, discriminators add more precise location 160b57cec5SDimitry Andric // for the samples. 170b57cec5SDimitry Andric // 180b57cec5SDimitry Andric // For example, 190b57cec5SDimitry Andric // 200b57cec5SDimitry Andric // 1 #define ASSERT(P) 210b57cec5SDimitry Andric // 2 if (!(P)) 220b57cec5SDimitry Andric // 3 abort() 230b57cec5SDimitry Andric // ... 240b57cec5SDimitry Andric // 100 while (true) { 250b57cec5SDimitry Andric // 101 ASSERT (sum < 0); 260b57cec5SDimitry Andric // 102 ... 270b57cec5SDimitry Andric // 130 } 280b57cec5SDimitry Andric // 290b57cec5SDimitry Andric // when converted to IR, this snippet looks something like: 300b57cec5SDimitry Andric // 310b57cec5SDimitry Andric // while.body: ; preds = %entry, %if.end 320b57cec5SDimitry Andric // %0 = load i32* %sum, align 4, !dbg !15 330b57cec5SDimitry Andric // %cmp = icmp slt i32 %0, 0, !dbg !15 340b57cec5SDimitry Andric // br i1 %cmp, label %if.end, label %if.then, !dbg !15 350b57cec5SDimitry Andric // 360b57cec5SDimitry Andric // if.then: ; preds = %while.body 370b57cec5SDimitry Andric // call void @abort(), !dbg !15 380b57cec5SDimitry Andric // br label %if.end, !dbg !15 390b57cec5SDimitry Andric // 400b57cec5SDimitry Andric // Notice that all the instructions in blocks 'while.body' and 'if.then' 410b57cec5SDimitry Andric // have exactly the same debug information. When this program is sampled 420b57cec5SDimitry Andric // at runtime, the profiler will assume that all these instructions are 430b57cec5SDimitry Andric // equally frequent. This, in turn, will consider the edge while.body->if.then 440b57cec5SDimitry Andric // to be frequently taken (which is incorrect). 450b57cec5SDimitry Andric // 460b57cec5SDimitry Andric // By adding a discriminator value to the instructions in block 'if.then', 470b57cec5SDimitry Andric // we can distinguish instructions at line 101 with discriminator 0 from 480b57cec5SDimitry Andric // the instructions at line 101 with discriminator 1. 490b57cec5SDimitry Andric // 500b57cec5SDimitry Andric // For more details about DWARF discriminators, please visit 510b57cec5SDimitry Andric // http://wiki.dwarfstd.org/index.php?title=Path_Discriminators 520b57cec5SDimitry Andric // 530b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 540b57cec5SDimitry Andric 550b57cec5SDimitry Andric #include "llvm/Transforms/Utils/AddDiscriminators.h" 560b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 570b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h" 580b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 590b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 600b57cec5SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h" 610b57cec5SDimitry Andric #include "llvm/IR/Function.h" 620b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 630b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 640b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 650b57cec5SDimitry Andric #include "llvm/IR/PassManager.h" 660b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 670b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 680b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 690b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 70fe6060f1SDimitry Andric #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" 710b57cec5SDimitry Andric #include <utility> 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric using namespace llvm; 74fe6060f1SDimitry Andric using namespace sampleprofutil; 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric #define DEBUG_TYPE "add-discriminators" 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric // Command line option to disable discriminator generation even in the 790b57cec5SDimitry Andric // presence of debug information. This is only needed when debugging 800b57cec5SDimitry Andric // debug info generation issues. 810b57cec5SDimitry Andric static cl::opt<bool> NoDiscriminators( 820b57cec5SDimitry Andric "no-discriminators", cl::init(false), 830b57cec5SDimitry Andric cl::desc("Disable generation of discriminator information.")); 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric static bool shouldHaveDiscriminator(const Instruction *I) { 860b57cec5SDimitry Andric return !isa<IntrinsicInst>(I) || isa<MemIntrinsic>(I); 870b57cec5SDimitry Andric } 880b57cec5SDimitry Andric 890b57cec5SDimitry Andric /// Assign DWARF discriminators. 900b57cec5SDimitry Andric /// 910b57cec5SDimitry Andric /// To assign discriminators, we examine the boundaries of every 920b57cec5SDimitry Andric /// basic block and its successors. Suppose there is a basic block B1 930b57cec5SDimitry Andric /// with successor B2. The last instruction I1 in B1 and the first 940b57cec5SDimitry Andric /// instruction I2 in B2 are located at the same file and line number. 950b57cec5SDimitry Andric /// This situation is illustrated in the following code snippet: 960b57cec5SDimitry Andric /// 970b57cec5SDimitry Andric /// if (i < 10) x = i; 980b57cec5SDimitry Andric /// 990b57cec5SDimitry Andric /// entry: 1000b57cec5SDimitry Andric /// br i1 %cmp, label %if.then, label %if.end, !dbg !10 1010b57cec5SDimitry Andric /// if.then: 1020b57cec5SDimitry Andric /// %1 = load i32* %i.addr, align 4, !dbg !10 1030b57cec5SDimitry Andric /// store i32 %1, i32* %x, align 4, !dbg !10 1040b57cec5SDimitry Andric /// br label %if.end, !dbg !10 1050b57cec5SDimitry Andric /// if.end: 1060b57cec5SDimitry Andric /// ret void, !dbg !12 1070b57cec5SDimitry Andric /// 1080b57cec5SDimitry Andric /// Notice how the branch instruction in block 'entry' and all the 1090b57cec5SDimitry Andric /// instructions in block 'if.then' have the exact same debug location 1100b57cec5SDimitry Andric /// information (!dbg !10). 1110b57cec5SDimitry Andric /// 1120b57cec5SDimitry Andric /// To distinguish instructions in block 'entry' from instructions in 1130b57cec5SDimitry Andric /// block 'if.then', we generate a new lexical block for all the 1140b57cec5SDimitry Andric /// instruction in block 'if.then' that share the same file and line 1150b57cec5SDimitry Andric /// location with the last instruction of block 'entry'. 1160b57cec5SDimitry Andric /// 1170b57cec5SDimitry Andric /// This new lexical block will have the same location information as 1180b57cec5SDimitry Andric /// the previous one, but with a new DWARF discriminator value. 1190b57cec5SDimitry Andric /// 1200b57cec5SDimitry Andric /// One of the main uses of this discriminator value is in runtime 1210b57cec5SDimitry Andric /// sample profilers. It allows the profiler to distinguish instructions 1220b57cec5SDimitry Andric /// at location !dbg !10 that execute on different basic blocks. This is 1230b57cec5SDimitry Andric /// important because while the predicate 'if (x < 10)' may have been 1240b57cec5SDimitry Andric /// executed millions of times, the assignment 'x = i' may have only 1250b57cec5SDimitry Andric /// executed a handful of times (meaning that the entry->if.then edge is 1260b57cec5SDimitry Andric /// seldom taken). 1270b57cec5SDimitry Andric /// 1280b57cec5SDimitry Andric /// If we did not have discriminator information, the profiler would 1290b57cec5SDimitry Andric /// assign the same weight to both blocks 'entry' and 'if.then', which 1300b57cec5SDimitry Andric /// in turn will make it conclude that the entry->if.then edge is very 1310b57cec5SDimitry Andric /// hot. 1320b57cec5SDimitry Andric /// 1330b57cec5SDimitry Andric /// To decide where to create new discriminator values, this function 1340b57cec5SDimitry Andric /// traverses the CFG and examines instruction at basic block boundaries. 1350b57cec5SDimitry Andric /// If the last instruction I1 of a block B1 is at the same file and line 1360b57cec5SDimitry Andric /// location as instruction I2 of successor B2, then it creates a new 1370b57cec5SDimitry Andric /// lexical block for I2 and all the instruction in B2 that share the same 1380b57cec5SDimitry Andric /// file and line location as I2. This new lexical block will have a 1390b57cec5SDimitry Andric /// different discriminator number than I1. 1400b57cec5SDimitry Andric static bool addDiscriminators(Function &F) { 1410b57cec5SDimitry Andric // If the function has debug information, but the user has disabled 1420b57cec5SDimitry Andric // discriminators, do nothing. 1430b57cec5SDimitry Andric // Simlarly, if the function has no debug info, do nothing. 1440b57cec5SDimitry Andric if (NoDiscriminators || !F.getSubprogram()) 1450b57cec5SDimitry Andric return false; 1460b57cec5SDimitry Andric 147fe6060f1SDimitry Andric // Create FSDiscriminatorVariable if flow sensitive discriminators are used. 148fe6060f1SDimitry Andric if (EnableFSDiscriminator) 149fe6060f1SDimitry Andric createFSDiscriminatorVariable(F.getParent()); 150fe6060f1SDimitry Andric 1510b57cec5SDimitry Andric bool Changed = false; 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric using Location = std::pair<StringRef, unsigned>; 1540b57cec5SDimitry Andric using BBSet = DenseSet<const BasicBlock *>; 1550b57cec5SDimitry Andric using LocationBBMap = DenseMap<Location, BBSet>; 1560b57cec5SDimitry Andric using LocationDiscriminatorMap = DenseMap<Location, unsigned>; 1570b57cec5SDimitry Andric using LocationSet = DenseSet<Location>; 1580b57cec5SDimitry Andric 1590b57cec5SDimitry Andric LocationBBMap LBM; 1600b57cec5SDimitry Andric LocationDiscriminatorMap LDM; 1610b57cec5SDimitry Andric 1620b57cec5SDimitry Andric // Traverse all instructions in the function. If the source line location 1630b57cec5SDimitry Andric // of the instruction appears in other basic block, assign a new 1640b57cec5SDimitry Andric // discriminator for this instruction. 1650b57cec5SDimitry Andric for (BasicBlock &B : F) { 166*bdd1243dSDimitry Andric for (auto &I : B) { 1670b57cec5SDimitry Andric // Not all intrinsic calls should have a discriminator. 1680b57cec5SDimitry Andric // We want to avoid a non-deterministic assignment of discriminators at 1690b57cec5SDimitry Andric // different debug levels. We still allow discriminators on memory 1700b57cec5SDimitry Andric // intrinsic calls because those can be early expanded by SROA into 1710b57cec5SDimitry Andric // pairs of loads and stores, and the expanded load/store instructions 1720b57cec5SDimitry Andric // should have a valid discriminator. 1730b57cec5SDimitry Andric if (!shouldHaveDiscriminator(&I)) 1740b57cec5SDimitry Andric continue; 1750b57cec5SDimitry Andric const DILocation *DIL = I.getDebugLoc(); 1760b57cec5SDimitry Andric if (!DIL) 1770b57cec5SDimitry Andric continue; 1780b57cec5SDimitry Andric Location L = std::make_pair(DIL->getFilename(), DIL->getLine()); 1790b57cec5SDimitry Andric auto &BBMap = LBM[L]; 1800b57cec5SDimitry Andric auto R = BBMap.insert(&B); 1810b57cec5SDimitry Andric if (BBMap.size() == 1) 1820b57cec5SDimitry Andric continue; 1830b57cec5SDimitry Andric // If we could insert more than one block with the same line+file, a 1840b57cec5SDimitry Andric // discriminator is needed to distinguish both instructions. 1850b57cec5SDimitry Andric // Only the lowest 7 bits are used to represent a discriminator to fit 1860b57cec5SDimitry Andric // it in 1 byte ULEB128 representation. 1870b57cec5SDimitry Andric unsigned Discriminator = R.second ? ++LDM[L] : LDM[L]; 1880b57cec5SDimitry Andric auto NewDIL = DIL->cloneWithBaseDiscriminator(Discriminator); 1890b57cec5SDimitry Andric if (!NewDIL) { 1900b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Could not encode discriminator: " 1910b57cec5SDimitry Andric << DIL->getFilename() << ":" << DIL->getLine() << ":" 1920b57cec5SDimitry Andric << DIL->getColumn() << ":" << Discriminator << " " 1930b57cec5SDimitry Andric << I << "\n"); 1940b57cec5SDimitry Andric } else { 19581ad6265SDimitry Andric I.setDebugLoc(*NewDIL); 1960b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" 1970b57cec5SDimitry Andric << DIL->getColumn() << ":" << Discriminator << " " << I 1980b57cec5SDimitry Andric << "\n"); 1990b57cec5SDimitry Andric } 2000b57cec5SDimitry Andric Changed = true; 2010b57cec5SDimitry Andric } 2020b57cec5SDimitry Andric } 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andric // Traverse all instructions and assign new discriminators to call 2050b57cec5SDimitry Andric // instructions with the same lineno that are in the same basic block. 2060b57cec5SDimitry Andric // Sample base profile needs to distinguish different function calls within 2070b57cec5SDimitry Andric // a same source line for correct profile annotation. 2080b57cec5SDimitry Andric for (BasicBlock &B : F) { 2090b57cec5SDimitry Andric LocationSet CallLocations; 210*bdd1243dSDimitry Andric for (auto &I : B) { 2110b57cec5SDimitry Andric // We bypass intrinsic calls for the following two reasons: 212480093f4SDimitry Andric // 1) We want to avoid a non-deterministic assignment of 2130b57cec5SDimitry Andric // discriminators. 2140b57cec5SDimitry Andric // 2) We want to minimize the number of base discriminators used. 2150b57cec5SDimitry Andric if (!isa<InvokeInst>(I) && (!isa<CallInst>(I) || isa<IntrinsicInst>(I))) 2160b57cec5SDimitry Andric continue; 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andric DILocation *CurrentDIL = I.getDebugLoc(); 2190b57cec5SDimitry Andric if (!CurrentDIL) 2200b57cec5SDimitry Andric continue; 2210b57cec5SDimitry Andric Location L = 2220b57cec5SDimitry Andric std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine()); 2230b57cec5SDimitry Andric if (!CallLocations.insert(L).second) { 2240b57cec5SDimitry Andric unsigned Discriminator = ++LDM[L]; 2250b57cec5SDimitry Andric auto NewDIL = CurrentDIL->cloneWithBaseDiscriminator(Discriminator); 2260b57cec5SDimitry Andric if (!NewDIL) { 2270b57cec5SDimitry Andric LLVM_DEBUG(dbgs() 2280b57cec5SDimitry Andric << "Could not encode discriminator: " 2290b57cec5SDimitry Andric << CurrentDIL->getFilename() << ":" 2300b57cec5SDimitry Andric << CurrentDIL->getLine() << ":" << CurrentDIL->getColumn() 2310b57cec5SDimitry Andric << ":" << Discriminator << " " << I << "\n"); 2320b57cec5SDimitry Andric } else { 23381ad6265SDimitry Andric I.setDebugLoc(*NewDIL); 2340b57cec5SDimitry Andric Changed = true; 2350b57cec5SDimitry Andric } 2360b57cec5SDimitry Andric } 2370b57cec5SDimitry Andric } 2380b57cec5SDimitry Andric } 2390b57cec5SDimitry Andric return Changed; 2400b57cec5SDimitry Andric } 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric PreservedAnalyses AddDiscriminatorsPass::run(Function &F, 2430b57cec5SDimitry Andric FunctionAnalysisManager &AM) { 2440b57cec5SDimitry Andric if (!addDiscriminators(F)) 2450b57cec5SDimitry Andric return PreservedAnalyses::all(); 2460b57cec5SDimitry Andric 2470b57cec5SDimitry Andric // FIXME: should be all() 2480b57cec5SDimitry Andric return PreservedAnalyses::none(); 2490b57cec5SDimitry Andric } 250