xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements GCOV-style profiling. When this pass is run it emits
10 // "gcno" files next to the existing source, and instruments the code that runs
11 // to records the edges between blocks that run and emit a complementary "gcda"
12 // file on exit.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/ADT/Hashing.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/Sequence.h"
20 #include "llvm/ADT/StringMap.h"
21 #include "llvm/Analysis/BlockFrequencyInfo.h"
22 #include "llvm/Analysis/BranchProbabilityInfo.h"
23 #include "llvm/Analysis/TargetLibraryInfo.h"
24 #include "llvm/IR/DebugInfo.h"
25 #include "llvm/IR/DebugLoc.h"
26 #include "llvm/IR/EHPersonalities.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/InstIterator.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/ProfileData/InstrProf.h"
32 #include "llvm/Support/CRC.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/Path.h"
37 #include "llvm/Support/Regex.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "llvm/Transforms/Instrumentation/CFGMST.h"
40 #include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
41 #include "llvm/Transforms/Utils/Instrumentation.h"
42 #include "llvm/Transforms/Utils/ModuleUtils.h"
43 #include <algorithm>
44 #include <memory>
45 #include <string>
46 #include <utility>
47 
48 using namespace llvm;
49 namespace endian = llvm::support::endian;
50 
51 #define DEBUG_TYPE "insert-gcov-profiling"
52 
53 enum : uint32_t {
54   GCOV_ARC_ON_TREE = 1 << 0,
55 
56   GCOV_TAG_FUNCTION = 0x01000000,
57   GCOV_TAG_BLOCKS = 0x01410000,
58   GCOV_TAG_ARCS = 0x01430000,
59   GCOV_TAG_LINES = 0x01450000,
60 };
61 
62 static cl::opt<std::string> DefaultGCOVVersion("default-gcov-version",
63                                                cl::init("0000"), cl::Hidden,
64                                                cl::ValueRequired);
65 
66 static cl::opt<bool> AtomicCounter("gcov-atomic-counter", cl::Hidden,
67                                    cl::desc("Make counter updates atomic"));
68 
69 // Returns the number of words which will be used to represent this string.
wordsOfString(StringRef s)70 static unsigned wordsOfString(StringRef s) {
71   // Length + NUL-terminated string + 0~3 padding NULs.
72   return (s.size() / 4) + 2;
73 }
74 
getDefault()75 GCOVOptions GCOVOptions::getDefault() {
76   GCOVOptions Options;
77   Options.EmitNotes = true;
78   Options.EmitData = true;
79   Options.NoRedZone = false;
80   Options.Atomic = AtomicCounter;
81 
82   if (DefaultGCOVVersion.size() != 4) {
83     reportFatalUsageError(Twine("Invalid -default-gcov-version: ") +
84                           DefaultGCOVVersion);
85   }
86   memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
87   return Options;
88 }
89 
90 namespace {
91 class GCOVFunction;
92 
93 class GCOVProfiler {
94 public:
GCOVProfiler()95   GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {}
GCOVProfiler(const GCOVOptions & Opts)96   GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {}
97   bool
98   runOnModule(Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
99               function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
100               std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
101 
write(uint32_t i)102   void write(uint32_t i) {
103     char Bytes[4];
104     endian::write32(Bytes, i, Endian);
105     os->write(Bytes, 4);
106   }
writeString(StringRef s)107   void writeString(StringRef s) {
108     write(wordsOfString(s) - 1);
109     os->write(s.data(), s.size());
110     os->write_zeros(4 - s.size() % 4);
111   }
writeBytes(const char * Bytes,int Size)112   void writeBytes(const char *Bytes, int Size) { os->write(Bytes, Size); }
113 
114 private:
115   // Create the .gcno files for the Module based on DebugInfo.
116   bool
117   emitProfileNotes(NamedMDNode *CUNode, bool HasExecOrFork,
118                    function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
119                    function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
120                    function_ref<const TargetLibraryInfo &(Function &F)> GetTLI);
121 
122   Function *createInternalFunction(FunctionType *FTy, StringRef Name,
123                                    StringRef MangledType = "");
124 
125   void emitGlobalConstructor(
126       SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP);
127   void emitModuleInitFunctionPtrs(
128       SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP);
129 
130   bool isFunctionInstrumented(const Function &F);
131   std::vector<Regex> createRegexesFromString(StringRef RegexesStr);
132   static bool doesFilenameMatchARegex(StringRef Filename,
133                                       std::vector<Regex> &Regexes);
134 
135   // Get pointers to the functions in the runtime library.
136   FunctionCallee getStartFileFunc(const TargetLibraryInfo *TLI);
137   FunctionCallee getEmitFunctionFunc(const TargetLibraryInfo *TLI);
138   FunctionCallee getEmitArcsFunc(const TargetLibraryInfo *TLI);
139   FunctionCallee getSummaryInfoFunc();
140   FunctionCallee getEndFileFunc();
141 
142   // Add the function to write out all our counters to the global destructor
143   // list.
144   Function *
145   insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
146   Function *insertReset(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
147 
148   bool AddFlushBeforeForkAndExec();
149 
150   enum class GCovFileType { GCNO, GCDA };
151   std::string mangleName(const DICompileUnit *CU, GCovFileType FileType);
152 
153   GCOVOptions Options;
154   llvm::endianness Endian;
155   raw_ostream *os;
156   int Version = 0;
157 
158   // Checksum, produced by hash of EdgeDestinations
159   SmallVector<uint32_t, 4> FileChecksums;
160 
161   Module *M = nullptr;
162   std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
163   LLVMContext *Ctx = nullptr;
164   SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
165   std::vector<Regex> FilterRe;
166   std::vector<Regex> ExcludeRe;
167   DenseSet<const BasicBlock *> ExecBlocks;
168   StringMap<bool> InstrumentedFiles;
169 };
170 
171 struct BBInfo {
172   BBInfo *Group;
173   uint32_t Index;
174   uint32_t Rank = 0;
175 
BBInfo__anon09913f2a0211::BBInfo176   BBInfo(unsigned Index) : Group(this), Index(Index) {}
infoString__anon09913f2a0211::BBInfo177   std::string infoString() const {
178     return (Twine("Index=") + Twine(Index)).str();
179   }
180 };
181 
182 struct Edge {
183   // This class implements the CFG edges. Note the CFG can be a multi-graph.
184   // So there might be multiple edges with same SrcBB and DestBB.
185   const BasicBlock *SrcBB;
186   const BasicBlock *DestBB;
187   uint64_t Weight;
188   BasicBlock *Place = nullptr;
189   uint32_t SrcNumber, DstNumber;
190   bool InMST = false;
191   bool Removed = false;
192   bool IsCritical = false;
193 
Edge__anon09913f2a0211::Edge194   Edge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
195       : SrcBB(Src), DestBB(Dest), Weight(W) {}
196 
197   // Return the information string of an edge.
infoString__anon09913f2a0211::Edge198   std::string infoString() const {
199     return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
200             (IsCritical ? "c" : " ") + "  W=" + Twine(Weight))
201         .str();
202   }
203 };
204 }
205 
getFunctionName(const DISubprogram * SP)206 static StringRef getFunctionName(const DISubprogram *SP) {
207   if (!SP->getLinkageName().empty())
208     return SP->getLinkageName();
209   return SP->getName();
210 }
211 
212 /// Extract a filename for a DIScope.
213 ///
214 /// Prefer relative paths in the coverage notes. Clang also may split
215 /// up absolute paths into a directory and filename component. When
216 /// the relative path doesn't exist, reconstruct the absolute path.
getFilename(const DIScope * SP)217 static SmallString<128> getFilename(const DIScope *SP) {
218   SmallString<128> Path;
219   StringRef RelPath = SP->getFilename();
220   if (sys::fs::exists(RelPath))
221     Path = RelPath;
222   else
223     sys::path::append(Path, SP->getDirectory(), SP->getFilename());
224   return Path;
225 }
226 
227 namespace {
228   class GCOVRecord {
229   protected:
230     GCOVProfiler *P;
231 
GCOVRecord(GCOVProfiler * P)232     GCOVRecord(GCOVProfiler *P) : P(P) {}
233 
write(uint32_t i)234     void write(uint32_t i) { P->write(i); }
writeString(StringRef s)235     void writeString(StringRef s) { P->writeString(s); }
writeBytes(const char * Bytes,int Size)236     void writeBytes(const char *Bytes, int Size) { P->writeBytes(Bytes, Size); }
237   };
238 
239   class GCOVFunction;
240   class GCOVBlock;
241 
242   // Constructed only by requesting it from a GCOVBlock, this object stores a
243   // list of line numbers and a single filename, representing lines that belong
244   // to the block.
245   class GCOVLines : public GCOVRecord {
246   public:
getFilename()247     StringRef getFilename() { return Filename; }
248 
addLine(uint32_t Line)249     void addLine(uint32_t Line) {
250       assert(Line != 0 && "Line zero is not a valid real line number.");
251       Lines.push_back(Line);
252     }
253 
length() const254     uint32_t length() const {
255       return 1 + wordsOfString(Filename) + Lines.size();
256     }
257 
writeOut()258     void writeOut() {
259       write(0);
260       writeString(Filename);
261       for (uint32_t L : Lines)
262         write(L);
263     }
264 
GCOVLines(GCOVProfiler * P,StringRef F)265     GCOVLines(GCOVProfiler *P, StringRef F)
266         : GCOVRecord(P), Filename(std::string(F)) {}
267 
268   private:
269     std::string Filename;
270     SmallVector<uint32_t, 32> Lines;
271   };
272 
273 
274   // Represent a basic block in GCOV. Each block has a unique number in the
275   // function, number of lines belonging to each block, and a set of edges to
276   // other blocks.
277   class GCOVBlock : public GCOVRecord {
278    public:
getFile(StringRef Filename)279     GCOVLines &getFile(StringRef Filename) {
280       if (Lines.empty() || Lines.back().getFilename() != Filename)
281         Lines.emplace_back(P, Filename);
282       return Lines.back();
283     }
284 
addEdge(GCOVBlock & Successor,uint32_t Flags)285     void addEdge(GCOVBlock &Successor, uint32_t Flags) {
286       OutEdges.emplace_back(&Successor, Flags);
287     }
288 
writeOut()289     void writeOut() {
290       uint32_t Len = 3;
291 
292       for (auto &L : Lines)
293         Len += L.length();
294 
295       write(GCOV_TAG_LINES);
296       write(Len);
297       write(Number);
298 
299       for (auto &L : Lines)
300         L.writeOut();
301       write(0);
302       write(0);
303     }
304 
GCOVBlock(const GCOVBlock & RHS)305     GCOVBlock(const GCOVBlock &RHS) : GCOVRecord(RHS), Number(RHS.Number) {
306       // Only allow copy before edges and lines have been added. After that,
307       // there are inter-block pointers (eg: edges) that won't take kindly to
308       // blocks being copied or moved around.
309       assert(Lines.empty());
310       assert(OutEdges.empty());
311     }
312 
313     uint32_t Number;
314     SmallVector<std::pair<GCOVBlock *, uint32_t>, 4> OutEdges;
315 
316   private:
317     friend class GCOVFunction;
318 
GCOVBlock(GCOVProfiler * P,uint32_t Number)319     GCOVBlock(GCOVProfiler *P, uint32_t Number)
320         : GCOVRecord(P), Number(Number) {}
321 
322     SmallVector<GCOVLines> Lines;
323   };
324 
325   // A function has a unique identifier, a checksum (we leave as zero) and a
326   // set of blocks and a map of edges between blocks. This is the only GCOV
327   // object users can construct, the blocks and lines will be rooted here.
328   class GCOVFunction : public GCOVRecord {
329   public:
GCOVFunction(GCOVProfiler * P,Function * F,const DISubprogram * SP,unsigned EndLine,uint32_t Ident,int Version)330     GCOVFunction(GCOVProfiler *P, Function *F, const DISubprogram *SP,
331                  unsigned EndLine, uint32_t Ident, int Version)
332         : GCOVRecord(P), SP(SP), EndLine(EndLine), Ident(Ident),
333           Version(Version), EntryBlock(P, 0), ReturnBlock(P, 1) {
334       LLVM_DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");
335       uint32_t i = 2;
336       for (BasicBlock &BB : *F)
337         Blocks.insert(std::make_pair(&BB, GCOVBlock(P, i++)));
338 
339       std::string FunctionNameAndLine;
340       raw_string_ostream FNLOS(FunctionNameAndLine);
341       FNLOS << getFunctionName(SP) << SP->getLine();
342       FuncChecksum = hash_value(FunctionNameAndLine);
343     }
344 
getBlock(const BasicBlock * BB)345     GCOVBlock &getBlock(const BasicBlock *BB) {
346       return Blocks.find(const_cast<BasicBlock *>(BB))->second;
347     }
348 
getEntryBlock()349     GCOVBlock &getEntryBlock() { return EntryBlock; }
getReturnBlock()350     GCOVBlock &getReturnBlock() {
351       return ReturnBlock;
352     }
353 
getFuncChecksum() const354     uint32_t getFuncChecksum() const {
355       return FuncChecksum;
356     }
357 
writeOut(uint32_t CfgChecksum)358     void writeOut(uint32_t CfgChecksum) {
359       write(GCOV_TAG_FUNCTION);
360       SmallString<128> Filename = getFilename(SP);
361       uint32_t BlockLen = 3 + wordsOfString(getFunctionName(SP));
362       BlockLen += 1 + wordsOfString(Filename) + 4;
363 
364       write(BlockLen);
365       write(Ident);
366       write(FuncChecksum);
367       write(CfgChecksum);
368       writeString(getFunctionName(SP));
369 
370       write(SP->isArtificial()); // artificial
371       writeString(Filename);
372       write(SP->getLine()); // start_line
373       write(0);             // start_column
374       // EndLine is the last line with !dbg. It is not the } line as in GCC,
375       // but good enough.
376       write(EndLine);
377       write(0); // end_column
378 
379       // Emit count of blocks.
380       write(GCOV_TAG_BLOCKS);
381       write(1);
382       write(Blocks.size() + 2);
383       LLVM_DEBUG(dbgs() << (Blocks.size() + 1) << " blocks\n");
384 
385       // Emit edges between blocks.
386       const uint32_t Outgoing = EntryBlock.OutEdges.size();
387       if (Outgoing) {
388         write(GCOV_TAG_ARCS);
389         write(Outgoing * 2 + 1);
390         write(EntryBlock.Number);
391         for (const auto &E : EntryBlock.OutEdges) {
392           write(E.first->Number);
393           write(E.second);
394         }
395       }
396       for (auto &It : Blocks) {
397         const GCOVBlock &Block = It.second;
398         if (Block.OutEdges.empty()) continue;
399 
400         write(GCOV_TAG_ARCS);
401         write(Block.OutEdges.size() * 2 + 1);
402         write(Block.Number);
403         for (const auto &E : Block.OutEdges) {
404           write(E.first->Number);
405           write(E.second);
406         }
407       }
408 
409       // Emit lines for each block.
410       for (auto &It : Blocks)
411         It.second.writeOut();
412     }
413 
414   public:
415     const DISubprogram *SP;
416     unsigned EndLine;
417     uint32_t Ident;
418     uint32_t FuncChecksum;
419     int Version;
420     MapVector<BasicBlock *, GCOVBlock> Blocks;
421     GCOVBlock EntryBlock;
422     GCOVBlock ReturnBlock;
423   };
424 }
425 
426 // RegexesStr is a string containing differents regex separated by a semi-colon.
427 // For example "foo\..*$;bar\..*$".
createRegexesFromString(StringRef RegexesStr)428 std::vector<Regex> GCOVProfiler::createRegexesFromString(StringRef RegexesStr) {
429   std::vector<Regex> Regexes;
430   while (!RegexesStr.empty()) {
431     std::pair<StringRef, StringRef> HeadTail = RegexesStr.split(';');
432     if (!HeadTail.first.empty()) {
433       Regex Re(HeadTail.first);
434       std::string Err;
435       if (!Re.isValid(Err)) {
436         Ctx->emitError(Twine("Regex ") + HeadTail.first +
437                        " is not valid: " + Err);
438       }
439       Regexes.emplace_back(std::move(Re));
440     }
441     RegexesStr = HeadTail.second;
442   }
443   return Regexes;
444 }
445 
doesFilenameMatchARegex(StringRef Filename,std::vector<Regex> & Regexes)446 bool GCOVProfiler::doesFilenameMatchARegex(StringRef Filename,
447                                            std::vector<Regex> &Regexes) {
448   for (Regex &Re : Regexes)
449     if (Re.match(Filename))
450       return true;
451   return false;
452 }
453 
isFunctionInstrumented(const Function & F)454 bool GCOVProfiler::isFunctionInstrumented(const Function &F) {
455   if (FilterRe.empty() && ExcludeRe.empty()) {
456     return true;
457   }
458   SmallString<128> Filename = getFilename(F.getSubprogram());
459   auto It = InstrumentedFiles.find(Filename);
460   if (It != InstrumentedFiles.end()) {
461     return It->second;
462   }
463 
464   SmallString<256> RealPath;
465   StringRef RealFilename;
466 
467   // Path can be
468   // /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/*.h so for
469   // such a case we must get the real_path.
470   if (sys::fs::real_path(Filename, RealPath)) {
471     // real_path can fail with path like "foo.c".
472     RealFilename = Filename;
473   } else {
474     RealFilename = RealPath;
475   }
476 
477   bool ShouldInstrument;
478   if (FilterRe.empty()) {
479     ShouldInstrument = !doesFilenameMatchARegex(RealFilename, ExcludeRe);
480   } else if (ExcludeRe.empty()) {
481     ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe);
482   } else {
483     ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe) &&
484                        !doesFilenameMatchARegex(RealFilename, ExcludeRe);
485   }
486   InstrumentedFiles[Filename] = ShouldInstrument;
487   return ShouldInstrument;
488 }
489 
mangleName(const DICompileUnit * CU,GCovFileType OutputType)490 std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
491                                      GCovFileType OutputType) {
492   bool Notes = OutputType == GCovFileType::GCNO;
493 
494   if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
495     for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
496       MDNode *N = GCov->getOperand(i);
497       bool ThreeElement = N->getNumOperands() == 3;
498       if (!ThreeElement && N->getNumOperands() != 2)
499         continue;
500       if (dyn_cast<MDNode>(N->getOperand(ThreeElement ? 2 : 1)) != CU)
501         continue;
502 
503       if (ThreeElement) {
504         // These nodes have no mangling to apply, it's stored mangled in the
505         // bitcode.
506         MDString *NotesFile = dyn_cast<MDString>(N->getOperand(0));
507         MDString *DataFile = dyn_cast<MDString>(N->getOperand(1));
508         if (!NotesFile || !DataFile)
509           continue;
510         return std::string(Notes ? NotesFile->getString()
511                                  : DataFile->getString());
512       }
513 
514       MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0));
515       if (!GCovFile)
516         continue;
517 
518       SmallString<128> Filename = GCovFile->getString();
519       sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
520       return std::string(Filename);
521     }
522   }
523 
524   SmallString<128> Filename = CU->getFilename();
525   sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
526   StringRef FName = sys::path::filename(Filename);
527   SmallString<128> CurPath;
528   if (sys::fs::current_path(CurPath))
529     return std::string(FName);
530   sys::path::append(CurPath, FName);
531   return std::string(CurPath);
532 }
533 
runOnModule(Module & M,function_ref<BlockFrequencyInfo * (Function & F)> GetBFI,function_ref<BranchProbabilityInfo * (Function & F)> GetBPI,std::function<const TargetLibraryInfo & (Function & F)> GetTLI)534 bool GCOVProfiler::runOnModule(
535     Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
536     function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
537     std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
538   this->M = &M;
539   this->GetTLI = std::move(GetTLI);
540   Ctx = &M.getContext();
541 
542   NamedMDNode *CUNode = M.getNamedMetadata("llvm.dbg.cu");
543   if (!CUNode || (!Options.EmitNotes && !Options.EmitData))
544     return false;
545 
546   bool HasExecOrFork = AddFlushBeforeForkAndExec();
547 
548   FilterRe = createRegexesFromString(Options.Filter);
549   ExcludeRe = createRegexesFromString(Options.Exclude);
550   emitProfileNotes(CUNode, HasExecOrFork, GetBFI, GetBPI, this->GetTLI);
551   return true;
552 }
553 
run(Module & M,ModuleAnalysisManager & AM)554 PreservedAnalyses GCOVProfilerPass::run(Module &M,
555                                         ModuleAnalysisManager &AM) {
556 
557   GCOVProfiler Profiler(GCOVOpts);
558   FunctionAnalysisManager &FAM =
559       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
560 
561   auto GetBFI = [&FAM](Function &F) {
562     return &FAM.getResult<BlockFrequencyAnalysis>(F);
563   };
564   auto GetBPI = [&FAM](Function &F) {
565     return &FAM.getResult<BranchProbabilityAnalysis>(F);
566   };
567   auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & {
568     return FAM.getResult<TargetLibraryAnalysis>(F);
569   };
570 
571   if (!Profiler.runOnModule(M, GetBFI, GetBPI, GetTLI))
572     return PreservedAnalyses::all();
573 
574   return PreservedAnalyses::none();
575 }
576 
functionHasLines(const Function & F,unsigned & EndLine)577 static bool functionHasLines(const Function &F, unsigned &EndLine) {
578   // Check whether this function actually has any source lines. Not only
579   // do these waste space, they also can crash gcov.
580   EndLine = 0;
581   for (const auto &BB : F) {
582     for (const auto &I : BB) {
583       const DebugLoc &Loc = I.getDebugLoc();
584       if (!Loc)
585         continue;
586 
587       // Artificial lines such as calls to the global constructors.
588       if (Loc.getLine() == 0) continue;
589       EndLine = std::max(EndLine, Loc.getLine());
590 
591       return true;
592     }
593   }
594   return false;
595 }
596 
isUsingScopeBasedEH(Function & F)597 static bool isUsingScopeBasedEH(Function &F) {
598   if (!F.hasPersonalityFn()) return false;
599 
600   EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
601   return isScopedEHPersonality(Personality);
602 }
603 
AddFlushBeforeForkAndExec()604 bool GCOVProfiler::AddFlushBeforeForkAndExec() {
605   const TargetLibraryInfo *TLI = nullptr;
606   SmallVector<CallInst *, 2> Forks;
607   SmallVector<CallInst *, 2> Execs;
608   for (auto &F : M->functions()) {
609     TLI = TLI == nullptr ? &GetTLI(F) : TLI;
610     for (auto &I : instructions(F)) {
611       if (CallInst *CI = dyn_cast<CallInst>(&I)) {
612         if (Function *Callee = CI->getCalledFunction()) {
613           LibFunc LF;
614           if (TLI->getLibFunc(*Callee, LF)) {
615             if (LF == LibFunc_fork) {
616 #if !defined(_WIN32)
617               Forks.push_back(CI);
618 #endif
619             } else if (LF == LibFunc_execl || LF == LibFunc_execle ||
620                        LF == LibFunc_execlp || LF == LibFunc_execv ||
621                        LF == LibFunc_execvp || LF == LibFunc_execve ||
622                        LF == LibFunc_execvpe || LF == LibFunc_execvP) {
623               Execs.push_back(CI);
624             }
625           }
626         }
627       }
628     }
629   }
630 
631   for (auto *F : Forks) {
632     IRBuilder<> Builder(F);
633     BasicBlock *Parent = F->getParent();
634     auto NextInst = ++F->getIterator();
635 
636     // We've a fork so just reset the counters in the child process
637     FunctionType *FTy = FunctionType::get(Builder.getInt32Ty(), {}, false);
638     FunctionCallee GCOVFork = M->getOrInsertFunction(
639         "__gcov_fork", FTy,
640         TLI->getAttrList(Ctx, {}, /*Signed=*/true, /*Ret=*/true));
641     F->setCalledFunction(GCOVFork);
642 
643     // We split just after the fork to have a counter for the lines after
644     // Anyway there's a bug:
645     // void foo() { fork(); }
646     // void bar() { foo(); blah(); }
647     // then "blah();" will be called 2 times but showed as 1
648     // because "blah()" belongs to the same block as "foo();"
649     Parent->splitBasicBlock(NextInst);
650 
651     // back() is a br instruction with a debug location
652     // equals to the one from NextAfterFork
653     // So to avoid to have two debug locs on two blocks just change it
654     DebugLoc Loc = F->getDebugLoc();
655     Parent->back().setDebugLoc(Loc);
656   }
657 
658   for (auto *E : Execs) {
659     IRBuilder<> Builder(E);
660     BasicBlock *Parent = E->getParent();
661     auto NextInst = ++E->getIterator();
662 
663     // Since the process is replaced by a new one we need to write out gcdas
664     // No need to reset the counters since they'll be lost after the exec**
665     FunctionType *FTy = FunctionType::get(Builder.getVoidTy(), {}, false);
666     FunctionCallee WriteoutF =
667         M->getOrInsertFunction("llvm_writeout_files", FTy);
668     Builder.CreateCall(WriteoutF);
669 
670     DebugLoc Loc = E->getDebugLoc();
671     Builder.SetInsertPoint(&*NextInst);
672     // If the exec** fails we must reset the counters since they've been
673     // dumped
674     FunctionCallee ResetF = M->getOrInsertFunction("llvm_reset_counters", FTy);
675     Builder.CreateCall(ResetF)->setDebugLoc(Loc);
676     ExecBlocks.insert(Parent);
677     Parent->splitBasicBlock(NextInst);
678     Parent->back().setDebugLoc(Loc);
679   }
680 
681   return !Forks.empty() || !Execs.empty();
682 }
683 
getInstrBB(CFGMST<Edge,BBInfo> & MST,Edge & E,const DenseSet<const BasicBlock * > & ExecBlocks)684 static BasicBlock *getInstrBB(CFGMST<Edge, BBInfo> &MST, Edge &E,
685                               const DenseSet<const BasicBlock *> &ExecBlocks) {
686   if (E.InMST || E.Removed)
687     return nullptr;
688 
689   BasicBlock *SrcBB = const_cast<BasicBlock *>(E.SrcBB);
690   BasicBlock *DestBB = const_cast<BasicBlock *>(E.DestBB);
691   // For a fake edge, instrument the real BB.
692   if (SrcBB == nullptr)
693     return DestBB;
694   if (DestBB == nullptr)
695     return SrcBB;
696 
697   auto CanInstrument = [](BasicBlock *BB) -> BasicBlock * {
698     // There are basic blocks (such as catchswitch) cannot be instrumented.
699     // If the returned first insertion point is the end of BB, skip this BB.
700     if (BB->getFirstInsertionPt() == BB->end())
701       return nullptr;
702     return BB;
703   };
704 
705   // Instrument the SrcBB if it has a single successor,
706   // otherwise, the DestBB if this is not a critical edge.
707   Instruction *TI = SrcBB->getTerminator();
708   if (TI->getNumSuccessors() <= 1 && !ExecBlocks.count(SrcBB))
709     return CanInstrument(SrcBB);
710   if (!E.IsCritical)
711     return CanInstrument(DestBB);
712 
713   // Some IndirectBr critical edges cannot be split by the previous
714   // SplitIndirectBrCriticalEdges call. Bail out.
715   const unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
716   BasicBlock *InstrBB =
717       isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
718   if (!InstrBB)
719     return nullptr;
720 
721   MST.addEdge(SrcBB, InstrBB, 0);
722   MST.addEdge(InstrBB, DestBB, 0).InMST = true;
723   E.Removed = true;
724 
725   return CanInstrument(InstrBB);
726 }
727 
728 #ifndef NDEBUG
dumpEdges(CFGMST<Edge,BBInfo> & MST,GCOVFunction & GF)729 static void dumpEdges(CFGMST<Edge, BBInfo> &MST, GCOVFunction &GF) {
730   size_t ID = 0;
731   for (const auto &E : make_pointee_range(MST.allEdges())) {
732     GCOVBlock &Src = E.SrcBB ? GF.getBlock(E.SrcBB) : GF.getEntryBlock();
733     GCOVBlock &Dst = E.DestBB ? GF.getBlock(E.DestBB) : GF.getReturnBlock();
734     dbgs() << "  Edge " << ID++ << ": " << Src.Number << "->" << Dst.Number
735            << E.infoString() << "\n";
736   }
737 }
738 #endif
739 
emitProfileNotes(NamedMDNode * CUNode,bool HasExecOrFork,function_ref<BlockFrequencyInfo * (Function & F)> GetBFI,function_ref<BranchProbabilityInfo * (Function & F)> GetBPI,function_ref<const TargetLibraryInfo & (Function & F)> GetTLI)740 bool GCOVProfiler::emitProfileNotes(
741     NamedMDNode *CUNode, bool HasExecOrFork,
742     function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
743     function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
744     function_ref<const TargetLibraryInfo &(Function &F)> GetTLI) {
745   {
746     uint8_t c3 = Options.Version[0];
747     uint8_t c2 = Options.Version[1];
748     uint8_t c1 = Options.Version[2];
749     Version = c3 >= 'A' ? (c3 - 'A') * 100 + (c2 - '0') * 10 + c1 - '0'
750                         : (c3 - '0') * 10 + c1 - '0';
751   }
752   // Emit .gcno files that are compatible with GCC 11.1.
753   if (Version < 111) {
754     Version = 111;
755     memcpy(Options.Version, "B11*", 4);
756   }
757 
758   bool EmitGCDA = Options.EmitData;
759   for (unsigned i = 0, e = CUNode->getNumOperands(); i != e; ++i) {
760     // Each compile unit gets its own .gcno file. This means that whether we run
761     // this pass over the original .o's as they're produced, or run it after
762     // LTO, we'll generate the same .gcno files.
763 
764     auto *CU = cast<DICompileUnit>(CUNode->getOperand(i));
765 
766     // Skip module skeleton (and module) CUs.
767     if (CU->getDWOId())
768       continue;
769 
770     std::vector<uint8_t> EdgeDestinations;
771     SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
772 
773     Endian = M->getDataLayout().isLittleEndian() ? llvm::endianness::little
774                                                  : llvm::endianness::big;
775     unsigned FunctionIdent = 0;
776     for (auto &F : M->functions()) {
777       DISubprogram *SP = F.getSubprogram();
778       unsigned EndLine;
779       if (!SP) continue;
780       if (!functionHasLines(F, EndLine) || !isFunctionInstrumented(F))
781         continue;
782       // TODO: Functions using scope-based EH are currently not supported.
783       if (isUsingScopeBasedEH(F)) continue;
784       if (F.hasFnAttribute(llvm::Attribute::NoProfile))
785         continue;
786       if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
787         continue;
788 
789       // Add the function line number to the lines of the entry block
790       // to have a counter for the function definition.
791       uint32_t Line = SP->getLine();
792       auto Filename = getFilename(SP);
793 
794       BranchProbabilityInfo *BPI = GetBPI(F);
795       BlockFrequencyInfo *BFI = GetBFI(F);
796 
797       // Split indirectbr critical edges here before computing the MST rather
798       // than later in getInstrBB() to avoid invalidating it.
799       SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
800                                    BFI);
801 
802       CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry=*/false,
803                                /*InstrumentLoopEntries=*/false, BPI, BFI);
804 
805       // getInstrBB can split basic blocks and push elements to AllEdges.
806       for (size_t I : llvm::seq<size_t>(0, MST.numEdges())) {
807         auto &E = *MST.allEdges()[I];
808         // For now, disable spanning tree optimization when fork or exec* is
809         // used.
810         if (HasExecOrFork)
811           E.InMST = false;
812         E.Place = getInstrBB(MST, E, ExecBlocks);
813       }
814       // Basic blocks in F are finalized at this point.
815       BasicBlock &EntryBlock = F.getEntryBlock();
816       Funcs.push_back(std::make_unique<GCOVFunction>(this, &F, SP, EndLine,
817                                                      FunctionIdent++, Version));
818       GCOVFunction &Func = *Funcs.back();
819 
820       // Some non-tree edges are IndirectBr which cannot be split. Ignore them
821       // as well.
822       llvm::erase_if(MST.allEdges(), [](std::unique_ptr<Edge> &E) {
823         return E->Removed || (!E->InMST && !E->Place);
824       });
825       const size_t Measured =
826           std::stable_partition(
827               MST.allEdges().begin(), MST.allEdges().end(),
828               [](std::unique_ptr<Edge> &E) { return E->Place; }) -
829           MST.allEdges().begin();
830       for (size_t I : llvm::seq<size_t>(0, Measured)) {
831         Edge &E = *MST.allEdges()[I];
832         GCOVBlock &Src =
833             E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
834         GCOVBlock &Dst =
835             E.DestBB ? Func.getBlock(E.DestBB) : Func.getReturnBlock();
836         E.SrcNumber = Src.Number;
837         E.DstNumber = Dst.Number;
838       }
839       std::stable_sort(
840           MST.allEdges().begin(), MST.allEdges().begin() + Measured,
841           [](const std::unique_ptr<Edge> &L, const std::unique_ptr<Edge> &R) {
842             return L->SrcNumber != R->SrcNumber ? L->SrcNumber < R->SrcNumber
843                                                 : L->DstNumber < R->DstNumber;
844           });
845 
846       for (const Edge &E : make_pointee_range(MST.allEdges())) {
847         GCOVBlock &Src =
848             E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
849         GCOVBlock &Dst =
850             E.DestBB ? Func.getBlock(E.DestBB) : Func.getReturnBlock();
851         Src.addEdge(Dst, E.Place ? 0 : uint32_t(GCOV_ARC_ON_TREE));
852       }
853 
854       // Artificial functions such as global initializers
855       if (!SP->isArtificial())
856         Func.getBlock(&EntryBlock).getFile(Filename).addLine(Line);
857 
858       LLVM_DEBUG(dumpEdges(MST, Func));
859 
860       for (auto &GB : Func.Blocks) {
861         const BasicBlock &BB = *GB.first;
862         auto &Block = GB.second;
863         for (auto Succ : Block.OutEdges) {
864           uint32_t Idx = Succ.first->Number;
865           do EdgeDestinations.push_back(Idx & 255);
866           while ((Idx >>= 8) > 0);
867         }
868 
869         for (const auto &I : BB) {
870           const DebugLoc &Loc = I.getDebugLoc();
871           if (!Loc)
872             continue;
873 
874           // Artificial lines such as calls to the global constructors.
875           if (Loc.getLine() == 0 || Loc.isImplicitCode())
876             continue;
877 
878           if (Line == Loc.getLine()) continue;
879           Line = Loc.getLine();
880           MDNode *Scope = Loc.getScope();
881           if (SP != getDISubprogram(Scope))
882             continue;
883 
884           GCOVLines &Lines = Block.getFile(getFilename(Loc->getScope()));
885           Lines.addLine(Loc.getLine());
886         }
887         Line = 0;
888       }
889       if (EmitGCDA) {
890         DISubprogram *SP = F.getSubprogram();
891         ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(*Ctx), Measured);
892         GlobalVariable *Counters = new GlobalVariable(
893             *M, CounterTy, false, GlobalValue::InternalLinkage,
894             Constant::getNullValue(CounterTy), "__llvm_gcov_ctr");
895         const llvm::Triple &Triple = M->getTargetTriple();
896         if (Triple.getObjectFormat() == llvm::Triple::XCOFF)
897           Counters->setSection("__llvm_gcov_ctr_section");
898         CountersBySP.emplace_back(Counters, SP);
899 
900         for (size_t I : llvm::seq<size_t>(0, Measured)) {
901           const Edge &E = *MST.allEdges()[I];
902           IRBuilder<> Builder(E.Place, E.Place->getFirstInsertionPt());
903           Value *V = Builder.CreateConstInBoundsGEP2_64(
904               Counters->getValueType(), Counters, 0, I);
905           // Disable sanitizers to decrease size bloat. We don't expect
906           // sanitizers to catch interesting issues.
907           Instruction *Inst;
908           if (Options.Atomic) {
909             Inst = Builder.CreateAtomicRMW(AtomicRMWInst::Add, V,
910                                            Builder.getInt64(1), MaybeAlign(),
911                                            AtomicOrdering::Monotonic);
912           } else {
913             LoadInst *OldCount =
914                 Builder.CreateLoad(Builder.getInt64Ty(), V, "gcov_ctr");
915             OldCount->setNoSanitizeMetadata();
916             Value *NewCount = Builder.CreateAdd(OldCount, Builder.getInt64(1));
917             Inst = Builder.CreateStore(NewCount, V);
918           }
919           Inst->setNoSanitizeMetadata();
920         }
921       }
922     }
923 
924     char Tmp[4];
925     JamCRC JC;
926     JC.update(EdgeDestinations);
927     uint32_t Stamp = JC.getCRC();
928     FileChecksums.push_back(Stamp);
929 
930     if (Options.EmitNotes) {
931       std::error_code EC;
932       raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC,
933                          sys::fs::OF_None);
934       if (EC) {
935         Ctx->emitError(
936             Twine("failed to open coverage notes file for writing: ") +
937             EC.message());
938         continue;
939       }
940       os = &out;
941       if (Endian == llvm::endianness::big) {
942         out.write("gcno", 4);
943         out.write(Options.Version, 4);
944       } else {
945         out.write("oncg", 4);
946         std::reverse_copy(Options.Version, Options.Version + 4, Tmp);
947         out.write(Tmp, 4);
948       }
949       write(Stamp);
950       writeString("."); // unuseful current_working_directory
951       write(0);         // unuseful has_unexecuted_blocks
952 
953       for (auto &Func : Funcs)
954         Func->writeOut(Stamp);
955 
956       write(0);
957       write(0);
958       out.close();
959     }
960 
961     if (EmitGCDA) {
962       const llvm::Triple &Triple = M->getTargetTriple();
963       if (Triple.getObjectFormat() == llvm::Triple::XCOFF)
964         emitModuleInitFunctionPtrs(CountersBySP);
965       else
966         emitGlobalConstructor(CountersBySP);
967       EmitGCDA = false;
968     }
969   }
970   return true;
971 }
972 
createInternalFunction(FunctionType * FTy,StringRef Name,StringRef MangledType)973 Function *GCOVProfiler::createInternalFunction(FunctionType *FTy,
974                                                StringRef Name,
975                                                StringRef MangledType /*=""*/) {
976   Function *F = Function::createWithDefaultAttr(
977       FTy, GlobalValue::InternalLinkage, 0, Name, M);
978   F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
979   F->addFnAttr(Attribute::NoUnwind);
980   if (Options.NoRedZone)
981     F->addFnAttr(Attribute::NoRedZone);
982   if (!MangledType.empty())
983     setKCFIType(*M, *F, MangledType);
984   return F;
985 }
986 
emitGlobalConstructor(SmallVectorImpl<std::pair<GlobalVariable *,MDNode * >> & CountersBySP)987 void GCOVProfiler::emitGlobalConstructor(
988     SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP) {
989   Function *WriteoutF = insertCounterWriteout(CountersBySP);
990   Function *ResetF = insertReset(CountersBySP);
991 
992   // Create a small bit of code that registers the "__llvm_gcov_writeout" to
993   // be executed at exit and the "__llvm_gcov_reset" function to be executed
994   // when "__gcov_flush" is called.
995   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
996   Function *F = createInternalFunction(FTy, "__llvm_gcov_init", "_ZTSFvvE");
997   F->addFnAttr(Attribute::NoInline);
998 
999   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
1000   IRBuilder<> Builder(BB);
1001 
1002   FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1003   auto *PFTy = PointerType::get(*Ctx, 0);
1004   FTy = FunctionType::get(Builder.getVoidTy(), {PFTy, PFTy}, false);
1005 
1006   // Initialize the environment and register the local writeout, flush and
1007   // reset functions.
1008   FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
1009   Builder.CreateCall(GCOVInit, {WriteoutF, ResetF});
1010   Builder.CreateRetVoid();
1011 
1012   appendToGlobalCtors(*M, F, 0);
1013 }
1014 
emitModuleInitFunctionPtrs(SmallVectorImpl<std::pair<GlobalVariable *,MDNode * >> & CountersBySP)1015 void GCOVProfiler::emitModuleInitFunctionPtrs(
1016     SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP) {
1017   Function *WriteoutF = insertCounterWriteout(CountersBySP);
1018   Function *ResetF = insertReset(CountersBySP);
1019 
1020   // Instead of creating a function call and add it to the constructors list,
1021   // create a global variable in the __llvm_covinit section so the functions
1022   // can be registered by a constructor in the runtime.
1023 
1024   auto &Ctx = M->getContext();
1025 
1026   Type *InitFuncDataTy[] = {
1027 #define COVINIT_FUNC(Type, LLVMType, Name, Init) LLVMType,
1028 #include "llvm/ProfileData/InstrProfData.inc"
1029   };
1030 
1031   auto STy = StructType::get(Ctx, ArrayRef(InitFuncDataTy));
1032 
1033   Constant *InitFuncPtrs[] = {
1034 #define COVINIT_FUNC(Type, LLVMType, Name, Init) Init,
1035 #include "llvm/ProfileData/InstrProfData.inc"
1036   };
1037 
1038   auto *CovInitGV =
1039       new GlobalVariable(*M, STy, false, GlobalValue::PrivateLinkage, nullptr,
1040                          "__llvm_covinit_functions");
1041   CovInitGV->setInitializer(ConstantStruct::get(STy, InitFuncPtrs));
1042   CovInitGV->setVisibility(GlobalValue::VisibilityTypes::DefaultVisibility);
1043   CovInitGV->setSection(getInstrProfSectionName(
1044       IPSK_covinit, M->getTargetTriple().getObjectFormat()));
1045   CovInitGV->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
1046   CovInitGV->setConstant(true);
1047 }
1048 
getStartFileFunc(const TargetLibraryInfo * TLI)1049 FunctionCallee GCOVProfiler::getStartFileFunc(const TargetLibraryInfo *TLI) {
1050   Type *Args[] = {
1051       PointerType::getUnqual(*Ctx), // const char *orig_filename
1052       Type::getInt32Ty(*Ctx),       // uint32_t version
1053       Type::getInt32Ty(*Ctx),       // uint32_t checksum
1054   };
1055   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1056   return M->getOrInsertFunction("llvm_gcda_start_file", FTy,
1057                                 TLI->getAttrList(Ctx, {1, 2}, /*Signed=*/false));
1058 }
1059 
getEmitFunctionFunc(const TargetLibraryInfo * TLI)1060 FunctionCallee GCOVProfiler::getEmitFunctionFunc(const TargetLibraryInfo *TLI) {
1061   Type *Args[] = {
1062     Type::getInt32Ty(*Ctx),    // uint32_t ident
1063     Type::getInt32Ty(*Ctx),    // uint32_t func_checksum
1064     Type::getInt32Ty(*Ctx),    // uint32_t cfg_checksum
1065   };
1066   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1067   return M->getOrInsertFunction("llvm_gcda_emit_function", FTy,
1068                              TLI->getAttrList(Ctx, {0, 1, 2}, /*Signed=*/false));
1069 }
1070 
getEmitArcsFunc(const TargetLibraryInfo * TLI)1071 FunctionCallee GCOVProfiler::getEmitArcsFunc(const TargetLibraryInfo *TLI) {
1072   Type *Args[] = {
1073       Type::getInt32Ty(*Ctx),       // uint32_t num_counters
1074       PointerType::getUnqual(*Ctx), // uint64_t *counters
1075   };
1076   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1077   return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy,
1078                                 TLI->getAttrList(Ctx, {0}, /*Signed=*/false));
1079 }
1080 
getSummaryInfoFunc()1081 FunctionCallee GCOVProfiler::getSummaryInfoFunc() {
1082   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1083   return M->getOrInsertFunction("llvm_gcda_summary_info", FTy);
1084 }
1085 
getEndFileFunc()1086 FunctionCallee GCOVProfiler::getEndFileFunc() {
1087   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1088   return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
1089 }
1090 
insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *,MDNode * >> CountersBySP)1091 Function *GCOVProfiler::insertCounterWriteout(
1092     ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
1093   FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1094   Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
1095   if (!WriteoutF)
1096     WriteoutF =
1097         createInternalFunction(WriteoutFTy, "__llvm_gcov_writeout", "_ZTSFvvE");
1098   WriteoutF->addFnAttr(Attribute::NoInline);
1099 
1100   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
1101   IRBuilder<> Builder(BB);
1102 
1103   auto *TLI = &GetTLI(*WriteoutF);
1104 
1105   FunctionCallee StartFile = getStartFileFunc(TLI);
1106   FunctionCallee EmitFunction = getEmitFunctionFunc(TLI);
1107   FunctionCallee EmitArcs = getEmitArcsFunc(TLI);
1108   FunctionCallee SummaryInfo = getSummaryInfoFunc();
1109   FunctionCallee EndFile = getEndFileFunc();
1110 
1111   NamedMDNode *CUNodes = M->getNamedMetadata("llvm.dbg.cu");
1112   if (!CUNodes) {
1113     Builder.CreateRetVoid();
1114     return WriteoutF;
1115   }
1116 
1117   // Collect the relevant data into a large constant data structure that we can
1118   // walk to write out everything.
1119   StructType *StartFileCallArgsTy = StructType::create(
1120       {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getInt32Ty()},
1121       "start_file_args_ty");
1122   StructType *EmitFunctionCallArgsTy = StructType::create(
1123       {Builder.getInt32Ty(), Builder.getInt32Ty(), Builder.getInt32Ty()},
1124       "emit_function_args_ty");
1125   auto *PtrTy = Builder.getPtrTy();
1126   StructType *EmitArcsCallArgsTy =
1127       StructType::create({Builder.getInt32Ty(), PtrTy}, "emit_arcs_args_ty");
1128   StructType *FileInfoTy = StructType::create(
1129       {StartFileCallArgsTy, Builder.getInt32Ty(), PtrTy, PtrTy}, "file_info");
1130 
1131   Constant *Zero32 = Builder.getInt32(0);
1132   // Build an explicit array of two zeros for use in ConstantExpr GEP building.
1133   Constant *TwoZero32s[] = {Zero32, Zero32};
1134 
1135   SmallVector<Constant *, 8> FileInfos;
1136   for (int i : llvm::seq<int>(0, CUNodes->getNumOperands())) {
1137     auto *CU = cast<DICompileUnit>(CUNodes->getOperand(i));
1138 
1139     // Skip module skeleton (and module) CUs.
1140     if (CU->getDWOId())
1141       continue;
1142 
1143     std::string FilenameGcda = mangleName(CU, GCovFileType::GCDA);
1144     uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i];
1145     auto *StartFileCallArgs = ConstantStruct::get(
1146         StartFileCallArgsTy,
1147         {Builder.CreateGlobalString(FilenameGcda),
1148          Builder.getInt32(endian::read32be(Options.Version)),
1149          Builder.getInt32(CfgChecksum)});
1150 
1151     SmallVector<Constant *, 8> EmitFunctionCallArgsArray;
1152     SmallVector<Constant *, 8> EmitArcsCallArgsArray;
1153     for (int j : llvm::seq<int>(0, CountersBySP.size())) {
1154       uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum();
1155       EmitFunctionCallArgsArray.push_back(ConstantStruct::get(
1156           EmitFunctionCallArgsTy,
1157           {Builder.getInt32(j),
1158            Builder.getInt32(FuncChecksum),
1159            Builder.getInt32(CfgChecksum)}));
1160 
1161       GlobalVariable *GV = CountersBySP[j].first;
1162       unsigned Arcs = cast<ArrayType>(GV->getValueType())->getNumElements();
1163       EmitArcsCallArgsArray.push_back(ConstantStruct::get(
1164           EmitArcsCallArgsTy,
1165           {Builder.getInt32(Arcs), ConstantExpr::getInBoundsGetElementPtr(
1166                                        GV->getValueType(), GV, TwoZero32s)}));
1167     }
1168     // Create global arrays for the two emit calls.
1169     int CountersSize = CountersBySP.size();
1170     assert(CountersSize == (int)EmitFunctionCallArgsArray.size() &&
1171            "Mismatched array size!");
1172     assert(CountersSize == (int)EmitArcsCallArgsArray.size() &&
1173            "Mismatched array size!");
1174     auto *EmitFunctionCallArgsArrayTy =
1175         ArrayType::get(EmitFunctionCallArgsTy, CountersSize);
1176     auto *EmitFunctionCallArgsArrayGV = new GlobalVariable(
1177         *M, EmitFunctionCallArgsArrayTy, /*isConstant*/ true,
1178         GlobalValue::InternalLinkage,
1179         ConstantArray::get(EmitFunctionCallArgsArrayTy,
1180                            EmitFunctionCallArgsArray),
1181         Twine("__llvm_internal_gcov_emit_function_args.") + Twine(i));
1182     auto *EmitArcsCallArgsArrayTy =
1183         ArrayType::get(EmitArcsCallArgsTy, CountersSize);
1184     EmitFunctionCallArgsArrayGV->setUnnamedAddr(
1185         GlobalValue::UnnamedAddr::Global);
1186     auto *EmitArcsCallArgsArrayGV = new GlobalVariable(
1187         *M, EmitArcsCallArgsArrayTy, /*isConstant*/ true,
1188         GlobalValue::InternalLinkage,
1189         ConstantArray::get(EmitArcsCallArgsArrayTy, EmitArcsCallArgsArray),
1190         Twine("__llvm_internal_gcov_emit_arcs_args.") + Twine(i));
1191     EmitArcsCallArgsArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1192 
1193     FileInfos.push_back(ConstantStruct::get(
1194         FileInfoTy,
1195         {StartFileCallArgs, Builder.getInt32(CountersSize),
1196          ConstantExpr::getInBoundsGetElementPtr(EmitFunctionCallArgsArrayTy,
1197                                                 EmitFunctionCallArgsArrayGV,
1198                                                 TwoZero32s),
1199          ConstantExpr::getInBoundsGetElementPtr(
1200              EmitArcsCallArgsArrayTy, EmitArcsCallArgsArrayGV, TwoZero32s)}));
1201   }
1202 
1203   // If we didn't find anything to actually emit, bail on out.
1204   if (FileInfos.empty()) {
1205     Builder.CreateRetVoid();
1206     return WriteoutF;
1207   }
1208 
1209   // To simplify code, we cap the number of file infos we write out to fit
1210   // easily in a 32-bit signed integer. This gives consistent behavior between
1211   // 32-bit and 64-bit systems without requiring (potentially very slow) 64-bit
1212   // operations on 32-bit systems. It also seems unreasonable to try to handle
1213   // more than 2 billion files.
1214   if ((int64_t)FileInfos.size() > (int64_t)INT_MAX)
1215     FileInfos.resize(INT_MAX);
1216 
1217   // Create a global for the entire data structure so we can walk it more
1218   // easily.
1219   auto *FileInfoArrayTy = ArrayType::get(FileInfoTy, FileInfos.size());
1220   auto *FileInfoArrayGV = new GlobalVariable(
1221       *M, FileInfoArrayTy, /*isConstant*/ true, GlobalValue::InternalLinkage,
1222       ConstantArray::get(FileInfoArrayTy, FileInfos),
1223       "__llvm_internal_gcov_emit_file_info");
1224   FileInfoArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1225 
1226   // Create the CFG for walking this data structure.
1227   auto *FileLoopHeader =
1228       BasicBlock::Create(*Ctx, "file.loop.header", WriteoutF);
1229   auto *CounterLoopHeader =
1230       BasicBlock::Create(*Ctx, "counter.loop.header", WriteoutF);
1231   auto *FileLoopLatch = BasicBlock::Create(*Ctx, "file.loop.latch", WriteoutF);
1232   auto *ExitBB = BasicBlock::Create(*Ctx, "exit", WriteoutF);
1233 
1234   // We always have at least one file, so just branch to the header.
1235   Builder.CreateBr(FileLoopHeader);
1236 
1237   // The index into the files structure is our loop induction variable.
1238   Builder.SetInsertPoint(FileLoopHeader);
1239   PHINode *IV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2,
1240                                   "file_idx");
1241   IV->addIncoming(Builder.getInt32(0), BB);
1242   auto *FileInfoPtr = Builder.CreateInBoundsGEP(
1243       FileInfoArrayTy, FileInfoArrayGV, {Builder.getInt32(0), IV});
1244   auto *StartFileCallArgsPtr =
1245       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 0, "start_file_args");
1246   auto *StartFileCall = Builder.CreateCall(
1247       StartFile,
1248       {Builder.CreateLoad(StartFileCallArgsTy->getElementType(0),
1249                           Builder.CreateStructGEP(StartFileCallArgsTy,
1250                                                   StartFileCallArgsPtr, 0),
1251                           "filename"),
1252        Builder.CreateLoad(StartFileCallArgsTy->getElementType(1),
1253                           Builder.CreateStructGEP(StartFileCallArgsTy,
1254                                                   StartFileCallArgsPtr, 1),
1255                           "version"),
1256        Builder.CreateLoad(StartFileCallArgsTy->getElementType(2),
1257                           Builder.CreateStructGEP(StartFileCallArgsTy,
1258                                                   StartFileCallArgsPtr, 2),
1259                           "stamp")});
1260   if (auto AK = TLI->getExtAttrForI32Param(false))
1261     StartFileCall->addParamAttr(2, AK);
1262   auto *NumCounters = Builder.CreateLoad(
1263       FileInfoTy->getElementType(1),
1264       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 1), "num_ctrs");
1265   auto *EmitFunctionCallArgsArray =
1266       Builder.CreateLoad(FileInfoTy->getElementType(2),
1267                          Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 2),
1268                          "emit_function_args");
1269   auto *EmitArcsCallArgsArray = Builder.CreateLoad(
1270       FileInfoTy->getElementType(3),
1271       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 3), "emit_arcs_args");
1272   auto *EnterCounterLoopCond =
1273       Builder.CreateICmpSLT(Builder.getInt32(0), NumCounters);
1274   Builder.CreateCondBr(EnterCounterLoopCond, CounterLoopHeader, FileLoopLatch);
1275 
1276   Builder.SetInsertPoint(CounterLoopHeader);
1277   auto *JV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2,
1278                                "ctr_idx");
1279   JV->addIncoming(Builder.getInt32(0), FileLoopHeader);
1280   auto *EmitFunctionCallArgsPtr = Builder.CreateInBoundsGEP(
1281       EmitFunctionCallArgsTy, EmitFunctionCallArgsArray, JV);
1282   auto *EmitFunctionCall = Builder.CreateCall(
1283       EmitFunction,
1284       {Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(0),
1285                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1286                                                   EmitFunctionCallArgsPtr, 0),
1287                           "ident"),
1288        Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(1),
1289                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1290                                                   EmitFunctionCallArgsPtr, 1),
1291                           "func_checkssum"),
1292        Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(2),
1293                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1294                                                   EmitFunctionCallArgsPtr, 2),
1295                           "cfg_checksum")});
1296   if (auto AK = TLI->getExtAttrForI32Param(false)) {
1297     EmitFunctionCall->addParamAttr(0, AK);
1298     EmitFunctionCall->addParamAttr(1, AK);
1299     EmitFunctionCall->addParamAttr(2, AK);
1300   }
1301   auto *EmitArcsCallArgsPtr =
1302       Builder.CreateInBoundsGEP(EmitArcsCallArgsTy, EmitArcsCallArgsArray, JV);
1303   auto *EmitArcsCall = Builder.CreateCall(
1304       EmitArcs,
1305       {Builder.CreateLoad(
1306            EmitArcsCallArgsTy->getElementType(0),
1307            Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 0),
1308            "num_counters"),
1309        Builder.CreateLoad(
1310            EmitArcsCallArgsTy->getElementType(1),
1311            Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 1),
1312            "counters")});
1313   if (auto AK = TLI->getExtAttrForI32Param(false))
1314     EmitArcsCall->addParamAttr(0, AK);
1315   auto *NextJV = Builder.CreateAdd(JV, Builder.getInt32(1));
1316   auto *CounterLoopCond = Builder.CreateICmpSLT(NextJV, NumCounters);
1317   Builder.CreateCondBr(CounterLoopCond, CounterLoopHeader, FileLoopLatch);
1318   JV->addIncoming(NextJV, CounterLoopHeader);
1319 
1320   Builder.SetInsertPoint(FileLoopLatch);
1321   Builder.CreateCall(SummaryInfo, {});
1322   Builder.CreateCall(EndFile, {});
1323   auto *NextIV = Builder.CreateAdd(IV, Builder.getInt32(1), "next_file_idx");
1324   auto *FileLoopCond =
1325       Builder.CreateICmpSLT(NextIV, Builder.getInt32(FileInfos.size()));
1326   Builder.CreateCondBr(FileLoopCond, FileLoopHeader, ExitBB);
1327   IV->addIncoming(NextIV, FileLoopLatch);
1328 
1329   Builder.SetInsertPoint(ExitBB);
1330   Builder.CreateRetVoid();
1331 
1332   return WriteoutF;
1333 }
1334 
insertReset(ArrayRef<std::pair<GlobalVariable *,MDNode * >> CountersBySP)1335 Function *GCOVProfiler::insertReset(
1336     ArrayRef<std::pair<GlobalVariable *, MDNode *>> CountersBySP) {
1337   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1338   Function *ResetF = M->getFunction("__llvm_gcov_reset");
1339   if (!ResetF)
1340     ResetF = createInternalFunction(FTy, "__llvm_gcov_reset", "_ZTSFvvE");
1341   ResetF->addFnAttr(Attribute::NoInline);
1342 
1343   BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", ResetF);
1344   IRBuilder<> Builder(Entry);
1345   LLVMContext &C = Entry->getContext();
1346 
1347   // Zero out the counters.
1348   for (const auto &I : CountersBySP) {
1349     GlobalVariable *GV = I.first;
1350     auto *GVTy = cast<ArrayType>(GV->getValueType());
1351     Builder.CreateMemSet(GV, Constant::getNullValue(Type::getInt8Ty(C)),
1352                          GVTy->getNumElements() *
1353                              GVTy->getElementType()->getScalarSizeInBits() / 8,
1354                          GV->getAlign());
1355   }
1356 
1357   Type *RetTy = ResetF->getReturnType();
1358   if (RetTy->isVoidTy())
1359     Builder.CreateRetVoid();
1360   else if (RetTy->isIntegerTy())
1361     // Used if __llvm_gcov_reset was implicitly declared.
1362     Builder.CreateRet(ConstantInt::get(RetTy, 0));
1363   else
1364     report_fatal_error("invalid return type for __llvm_gcov_reset");
1365 
1366   return ResetF;
1367 }
1368