xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp (revision 0ad011ececb978e22a9bff2acf76633b094f1ff6)
1 //===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements GCOV-style profiling. When this pass is run it emits
10 // "gcno" files next to the existing source, and instruments the code that runs
11 // to records the edges between blocks that run and emit a complementary "gcda"
12 // file on exit.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/ADT/Hashing.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/Sequence.h"
20 #include "llvm/ADT/StringMap.h"
21 #include "llvm/Analysis/BlockFrequencyInfo.h"
22 #include "llvm/Analysis/BranchProbabilityInfo.h"
23 #include "llvm/Analysis/TargetLibraryInfo.h"
24 #include "llvm/IR/DebugInfo.h"
25 #include "llvm/IR/DebugLoc.h"
26 #include "llvm/IR/EHPersonalities.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/InstIterator.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/IntrinsicInst.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/Support/CRC.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/Path.h"
37 #include "llvm/Support/Regex.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "llvm/Transforms/Instrumentation.h"
40 #include "llvm/Transforms/Instrumentation/CFGMST.h"
41 #include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
42 #include "llvm/Transforms/Utils/ModuleUtils.h"
43 #include <algorithm>
44 #include <memory>
45 #include <string>
46 #include <utility>
47 
48 using namespace llvm;
49 namespace endian = llvm::support::endian;
50 
51 #define DEBUG_TYPE "insert-gcov-profiling"
52 
53 enum : uint32_t {
54   GCOV_ARC_ON_TREE = 1 << 0,
55 
56   GCOV_TAG_FUNCTION = 0x01000000,
57   GCOV_TAG_BLOCKS = 0x01410000,
58   GCOV_TAG_ARCS = 0x01430000,
59   GCOV_TAG_LINES = 0x01450000,
60 };
61 
62 static cl::opt<std::string> DefaultGCOVVersion("default-gcov-version",
63                                                cl::init("408*"), cl::Hidden,
64                                                cl::ValueRequired);
65 
66 static cl::opt<bool> AtomicCounter("gcov-atomic-counter", cl::Hidden,
67                                    cl::desc("Make counter updates atomic"));
68 
69 // Returns the number of words which will be used to represent this string.
70 static unsigned wordsOfString(StringRef s) {
71   // Length + NUL-terminated string + 0~3 padding NULs.
72   return (s.size() / 4) + 2;
73 }
74 
75 GCOVOptions GCOVOptions::getDefault() {
76   GCOVOptions Options;
77   Options.EmitNotes = true;
78   Options.EmitData = true;
79   Options.NoRedZone = false;
80   Options.Atomic = AtomicCounter;
81 
82   if (DefaultGCOVVersion.size() != 4) {
83     llvm::report_fatal_error(Twine("Invalid -default-gcov-version: ") +
84                              DefaultGCOVVersion, /*GenCrashDiag=*/false);
85   }
86   memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
87   return Options;
88 }
89 
90 namespace {
91 class GCOVFunction;
92 
93 class GCOVProfiler {
94 public:
95   GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {}
96   GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {}
97   bool
98   runOnModule(Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
99               function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
100               std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
101 
102   void write(uint32_t i) {
103     char Bytes[4];
104     endian::write32(Bytes, i, Endian);
105     os->write(Bytes, 4);
106   }
107   void writeString(StringRef s) {
108     write(wordsOfString(s) - 1);
109     os->write(s.data(), s.size());
110     os->write_zeros(4 - s.size() % 4);
111   }
112   void writeBytes(const char *Bytes, int Size) { os->write(Bytes, Size); }
113 
114 private:
115   // Create the .gcno files for the Module based on DebugInfo.
116   bool
117   emitProfileNotes(NamedMDNode *CUNode, bool HasExecOrFork,
118                    function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
119                    function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
120                    function_ref<const TargetLibraryInfo &(Function &F)> GetTLI);
121 
122   Function *createInternalFunction(FunctionType *FTy, StringRef Name,
123                                    StringRef MangledType = "");
124   void emitGlobalConstructor(
125       SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP);
126 
127   bool isFunctionInstrumented(const Function &F);
128   std::vector<Regex> createRegexesFromString(StringRef RegexesStr);
129   static bool doesFilenameMatchARegex(StringRef Filename,
130                                       std::vector<Regex> &Regexes);
131 
132   // Get pointers to the functions in the runtime library.
133   FunctionCallee getStartFileFunc(const TargetLibraryInfo *TLI);
134   FunctionCallee getEmitFunctionFunc(const TargetLibraryInfo *TLI);
135   FunctionCallee getEmitArcsFunc(const TargetLibraryInfo *TLI);
136   FunctionCallee getSummaryInfoFunc();
137   FunctionCallee getEndFileFunc();
138 
139   // Add the function to write out all our counters to the global destructor
140   // list.
141   Function *
142   insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
143   Function *insertReset(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
144 
145   bool AddFlushBeforeForkAndExec();
146 
147   enum class GCovFileType { GCNO, GCDA };
148   std::string mangleName(const DICompileUnit *CU, GCovFileType FileType);
149 
150   GCOVOptions Options;
151   support::endianness Endian;
152   raw_ostream *os;
153 
154   // Checksum, produced by hash of EdgeDestinations
155   SmallVector<uint32_t, 4> FileChecksums;
156 
157   Module *M = nullptr;
158   std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
159   LLVMContext *Ctx = nullptr;
160   SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
161   std::vector<Regex> FilterRe;
162   std::vector<Regex> ExcludeRe;
163   DenseSet<const BasicBlock *> ExecBlocks;
164   StringMap<bool> InstrumentedFiles;
165 };
166 
167 struct BBInfo {
168   BBInfo *Group;
169   uint32_t Index;
170   uint32_t Rank = 0;
171 
172   BBInfo(unsigned Index) : Group(this), Index(Index) {}
173   std::string infoString() const {
174     return (Twine("Index=") + Twine(Index)).str();
175   }
176 };
177 
178 struct Edge {
179   // This class implements the CFG edges. Note the CFG can be a multi-graph.
180   // So there might be multiple edges with same SrcBB and DestBB.
181   const BasicBlock *SrcBB;
182   const BasicBlock *DestBB;
183   uint64_t Weight;
184   BasicBlock *Place = nullptr;
185   uint32_t SrcNumber, DstNumber;
186   bool InMST = false;
187   bool Removed = false;
188   bool IsCritical = false;
189 
190   Edge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
191       : SrcBB(Src), DestBB(Dest), Weight(W) {}
192 
193   // Return the information string of an edge.
194   std::string infoString() const {
195     return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
196             (IsCritical ? "c" : " ") + "  W=" + Twine(Weight))
197         .str();
198   }
199 };
200 }
201 
202 static StringRef getFunctionName(const DISubprogram *SP) {
203   if (!SP->getLinkageName().empty())
204     return SP->getLinkageName();
205   return SP->getName();
206 }
207 
208 /// Extract a filename for a DISubprogram.
209 ///
210 /// Prefer relative paths in the coverage notes. Clang also may split
211 /// up absolute paths into a directory and filename component. When
212 /// the relative path doesn't exist, reconstruct the absolute path.
213 static SmallString<128> getFilename(const DISubprogram *SP) {
214   SmallString<128> Path;
215   StringRef RelPath = SP->getFilename();
216   if (sys::fs::exists(RelPath))
217     Path = RelPath;
218   else
219     sys::path::append(Path, SP->getDirectory(), SP->getFilename());
220   return Path;
221 }
222 
223 namespace {
224   class GCOVRecord {
225   protected:
226     GCOVProfiler *P;
227 
228     GCOVRecord(GCOVProfiler *P) : P(P) {}
229 
230     void write(uint32_t i) { P->write(i); }
231     void writeString(StringRef s) { P->writeString(s); }
232     void writeBytes(const char *Bytes, int Size) { P->writeBytes(Bytes, Size); }
233   };
234 
235   class GCOVFunction;
236   class GCOVBlock;
237 
238   // Constructed only by requesting it from a GCOVBlock, this object stores a
239   // list of line numbers and a single filename, representing lines that belong
240   // to the block.
241   class GCOVLines : public GCOVRecord {
242    public:
243     void addLine(uint32_t Line) {
244       assert(Line != 0 && "Line zero is not a valid real line number.");
245       Lines.push_back(Line);
246     }
247 
248     uint32_t length() const {
249       return 1 + wordsOfString(Filename) + Lines.size();
250     }
251 
252     void writeOut() {
253       write(0);
254       writeString(Filename);
255       for (uint32_t L : Lines)
256         write(L);
257     }
258 
259     GCOVLines(GCOVProfiler *P, StringRef F)
260         : GCOVRecord(P), Filename(std::string(F)) {}
261 
262   private:
263     std::string Filename;
264     SmallVector<uint32_t, 32> Lines;
265   };
266 
267 
268   // Represent a basic block in GCOV. Each block has a unique number in the
269   // function, number of lines belonging to each block, and a set of edges to
270   // other blocks.
271   class GCOVBlock : public GCOVRecord {
272    public:
273     GCOVLines &getFile(StringRef Filename) {
274       return LinesByFile.try_emplace(Filename, P, Filename).first->second;
275     }
276 
277     void addEdge(GCOVBlock &Successor, uint32_t Flags) {
278       OutEdges.emplace_back(&Successor, Flags);
279     }
280 
281     void writeOut() {
282       uint32_t Len = 3;
283       SmallVector<StringMapEntry<GCOVLines> *, 32> SortedLinesByFile;
284       for (auto &I : LinesByFile) {
285         Len += I.second.length();
286         SortedLinesByFile.push_back(&I);
287       }
288 
289       write(GCOV_TAG_LINES);
290       write(Len);
291       write(Number);
292 
293       llvm::sort(SortedLinesByFile, [](StringMapEntry<GCOVLines> *LHS,
294                                        StringMapEntry<GCOVLines> *RHS) {
295         return LHS->getKey() < RHS->getKey();
296       });
297       for (auto &I : SortedLinesByFile)
298         I->getValue().writeOut();
299       write(0);
300       write(0);
301     }
302 
303     GCOVBlock(const GCOVBlock &RHS) : GCOVRecord(RHS), Number(RHS.Number) {
304       // Only allow copy before edges and lines have been added. After that,
305       // there are inter-block pointers (eg: edges) that won't take kindly to
306       // blocks being copied or moved around.
307       assert(LinesByFile.empty());
308       assert(OutEdges.empty());
309     }
310 
311     uint32_t Number;
312     SmallVector<std::pair<GCOVBlock *, uint32_t>, 4> OutEdges;
313 
314   private:
315     friend class GCOVFunction;
316 
317     GCOVBlock(GCOVProfiler *P, uint32_t Number)
318         : GCOVRecord(P), Number(Number) {}
319 
320     StringMap<GCOVLines> LinesByFile;
321   };
322 
323   // A function has a unique identifier, a checksum (we leave as zero) and a
324   // set of blocks and a map of edges between blocks. This is the only GCOV
325   // object users can construct, the blocks and lines will be rooted here.
326   class GCOVFunction : public GCOVRecord {
327   public:
328     GCOVFunction(GCOVProfiler *P, Function *F, const DISubprogram *SP,
329                  unsigned EndLine, uint32_t Ident, int Version)
330         : GCOVRecord(P), SP(SP), EndLine(EndLine), Ident(Ident),
331           Version(Version), EntryBlock(P, 0), ReturnBlock(P, 1) {
332       LLVM_DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");
333       bool ExitBlockBeforeBody = Version >= 48;
334       uint32_t i = ExitBlockBeforeBody ? 2 : 1;
335       for (BasicBlock &BB : *F)
336         Blocks.insert(std::make_pair(&BB, GCOVBlock(P, i++)));
337       if (!ExitBlockBeforeBody)
338         ReturnBlock.Number = i;
339 
340       std::string FunctionNameAndLine;
341       raw_string_ostream FNLOS(FunctionNameAndLine);
342       FNLOS << getFunctionName(SP) << SP->getLine();
343       FNLOS.flush();
344       FuncChecksum = hash_value(FunctionNameAndLine);
345     }
346 
347     GCOVBlock &getBlock(const BasicBlock *BB) {
348       return Blocks.find(const_cast<BasicBlock *>(BB))->second;
349     }
350 
351     GCOVBlock &getEntryBlock() { return EntryBlock; }
352     GCOVBlock &getReturnBlock() {
353       return ReturnBlock;
354     }
355 
356     uint32_t getFuncChecksum() const {
357       return FuncChecksum;
358     }
359 
360     void writeOut(uint32_t CfgChecksum) {
361       write(GCOV_TAG_FUNCTION);
362       SmallString<128> Filename = getFilename(SP);
363       uint32_t BlockLen =
364           2 + (Version >= 47) + wordsOfString(getFunctionName(SP));
365       if (Version < 80)
366         BlockLen += wordsOfString(Filename) + 1;
367       else
368         BlockLen += 1 + wordsOfString(Filename) + 3 + (Version >= 90);
369 
370       write(BlockLen);
371       write(Ident);
372       write(FuncChecksum);
373       if (Version >= 47)
374         write(CfgChecksum);
375       writeString(getFunctionName(SP));
376       if (Version < 80) {
377         writeString(Filename);
378         write(SP->getLine());
379       } else {
380         write(SP->isArtificial()); // artificial
381         writeString(Filename);
382         write(SP->getLine()); // start_line
383         write(0);             // start_column
384         // EndLine is the last line with !dbg. It is not the } line as in GCC,
385         // but good enough.
386         write(EndLine);
387         if (Version >= 90)
388           write(0); // end_column
389       }
390 
391       // Emit count of blocks.
392       write(GCOV_TAG_BLOCKS);
393       if (Version < 80) {
394         write(Blocks.size() + 2);
395         for (int i = Blocks.size() + 2; i; --i)
396           write(0);
397       } else {
398         write(1);
399         write(Blocks.size() + 2);
400       }
401       LLVM_DEBUG(dbgs() << (Blocks.size() + 1) << " blocks\n");
402 
403       // Emit edges between blocks.
404       const uint32_t Outgoing = EntryBlock.OutEdges.size();
405       if (Outgoing) {
406         write(GCOV_TAG_ARCS);
407         write(Outgoing * 2 + 1);
408         write(EntryBlock.Number);
409         for (const auto &E : EntryBlock.OutEdges) {
410           write(E.first->Number);
411           write(E.second);
412         }
413       }
414       for (auto &It : Blocks) {
415         const GCOVBlock &Block = It.second;
416         if (Block.OutEdges.empty()) continue;
417 
418         write(GCOV_TAG_ARCS);
419         write(Block.OutEdges.size() * 2 + 1);
420         write(Block.Number);
421         for (const auto &E : Block.OutEdges) {
422           write(E.first->Number);
423           write(E.second);
424         }
425       }
426 
427       // Emit lines for each block.
428       for (auto &It : Blocks)
429         It.second.writeOut();
430     }
431 
432   public:
433     const DISubprogram *SP;
434     unsigned EndLine;
435     uint32_t Ident;
436     uint32_t FuncChecksum;
437     int Version;
438     MapVector<BasicBlock *, GCOVBlock> Blocks;
439     GCOVBlock EntryBlock;
440     GCOVBlock ReturnBlock;
441   };
442 }
443 
444 // RegexesStr is a string containing differents regex separated by a semi-colon.
445 // For example "foo\..*$;bar\..*$".
446 std::vector<Regex> GCOVProfiler::createRegexesFromString(StringRef RegexesStr) {
447   std::vector<Regex> Regexes;
448   while (!RegexesStr.empty()) {
449     std::pair<StringRef, StringRef> HeadTail = RegexesStr.split(';');
450     if (!HeadTail.first.empty()) {
451       Regex Re(HeadTail.first);
452       std::string Err;
453       if (!Re.isValid(Err)) {
454         Ctx->emitError(Twine("Regex ") + HeadTail.first +
455                        " is not valid: " + Err);
456       }
457       Regexes.emplace_back(std::move(Re));
458     }
459     RegexesStr = HeadTail.second;
460   }
461   return Regexes;
462 }
463 
464 bool GCOVProfiler::doesFilenameMatchARegex(StringRef Filename,
465                                            std::vector<Regex> &Regexes) {
466   for (Regex &Re : Regexes)
467     if (Re.match(Filename))
468       return true;
469   return false;
470 }
471 
472 bool GCOVProfiler::isFunctionInstrumented(const Function &F) {
473   if (FilterRe.empty() && ExcludeRe.empty()) {
474     return true;
475   }
476   SmallString<128> Filename = getFilename(F.getSubprogram());
477   auto It = InstrumentedFiles.find(Filename);
478   if (It != InstrumentedFiles.end()) {
479     return It->second;
480   }
481 
482   SmallString<256> RealPath;
483   StringRef RealFilename;
484 
485   // Path can be
486   // /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/*.h so for
487   // such a case we must get the real_path.
488   if (sys::fs::real_path(Filename, RealPath)) {
489     // real_path can fail with path like "foo.c".
490     RealFilename = Filename;
491   } else {
492     RealFilename = RealPath;
493   }
494 
495   bool ShouldInstrument;
496   if (FilterRe.empty()) {
497     ShouldInstrument = !doesFilenameMatchARegex(RealFilename, ExcludeRe);
498   } else if (ExcludeRe.empty()) {
499     ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe);
500   } else {
501     ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe) &&
502                        !doesFilenameMatchARegex(RealFilename, ExcludeRe);
503   }
504   InstrumentedFiles[Filename] = ShouldInstrument;
505   return ShouldInstrument;
506 }
507 
508 std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
509                                      GCovFileType OutputType) {
510   bool Notes = OutputType == GCovFileType::GCNO;
511 
512   if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
513     for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
514       MDNode *N = GCov->getOperand(i);
515       bool ThreeElement = N->getNumOperands() == 3;
516       if (!ThreeElement && N->getNumOperands() != 2)
517         continue;
518       if (dyn_cast<MDNode>(N->getOperand(ThreeElement ? 2 : 1)) != CU)
519         continue;
520 
521       if (ThreeElement) {
522         // These nodes have no mangling to apply, it's stored mangled in the
523         // bitcode.
524         MDString *NotesFile = dyn_cast<MDString>(N->getOperand(0));
525         MDString *DataFile = dyn_cast<MDString>(N->getOperand(1));
526         if (!NotesFile || !DataFile)
527           continue;
528         return std::string(Notes ? NotesFile->getString()
529                                  : DataFile->getString());
530       }
531 
532       MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0));
533       if (!GCovFile)
534         continue;
535 
536       SmallString<128> Filename = GCovFile->getString();
537       sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
538       return std::string(Filename.str());
539     }
540   }
541 
542   SmallString<128> Filename = CU->getFilename();
543   sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
544   StringRef FName = sys::path::filename(Filename);
545   SmallString<128> CurPath;
546   if (sys::fs::current_path(CurPath))
547     return std::string(FName);
548   sys::path::append(CurPath, FName);
549   return std::string(CurPath.str());
550 }
551 
552 bool GCOVProfiler::runOnModule(
553     Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
554     function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
555     std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
556   this->M = &M;
557   this->GetTLI = std::move(GetTLI);
558   Ctx = &M.getContext();
559 
560   NamedMDNode *CUNode = M.getNamedMetadata("llvm.dbg.cu");
561   if (!CUNode || (!Options.EmitNotes && !Options.EmitData))
562     return false;
563 
564   bool HasExecOrFork = AddFlushBeforeForkAndExec();
565 
566   FilterRe = createRegexesFromString(Options.Filter);
567   ExcludeRe = createRegexesFromString(Options.Exclude);
568   emitProfileNotes(CUNode, HasExecOrFork, GetBFI, GetBPI, this->GetTLI);
569   return true;
570 }
571 
572 PreservedAnalyses GCOVProfilerPass::run(Module &M,
573                                         ModuleAnalysisManager &AM) {
574 
575   GCOVProfiler Profiler(GCOVOpts);
576   FunctionAnalysisManager &FAM =
577       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
578 
579   auto GetBFI = [&FAM](Function &F) {
580     return &FAM.getResult<BlockFrequencyAnalysis>(F);
581   };
582   auto GetBPI = [&FAM](Function &F) {
583     return &FAM.getResult<BranchProbabilityAnalysis>(F);
584   };
585   auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & {
586     return FAM.getResult<TargetLibraryAnalysis>(F);
587   };
588 
589   if (!Profiler.runOnModule(M, GetBFI, GetBPI, GetTLI))
590     return PreservedAnalyses::all();
591 
592   return PreservedAnalyses::none();
593 }
594 
595 static bool functionHasLines(const Function &F, unsigned &EndLine) {
596   // Check whether this function actually has any source lines. Not only
597   // do these waste space, they also can crash gcov.
598   EndLine = 0;
599   for (const auto &BB : F) {
600     for (const auto &I : BB) {
601       // Debug intrinsic locations correspond to the location of the
602       // declaration, not necessarily any statements or expressions.
603       if (isa<DbgInfoIntrinsic>(&I)) continue;
604 
605       const DebugLoc &Loc = I.getDebugLoc();
606       if (!Loc)
607         continue;
608 
609       // Artificial lines such as calls to the global constructors.
610       if (Loc.getLine() == 0) continue;
611       EndLine = std::max(EndLine, Loc.getLine());
612 
613       return true;
614     }
615   }
616   return false;
617 }
618 
619 static bool isUsingScopeBasedEH(Function &F) {
620   if (!F.hasPersonalityFn()) return false;
621 
622   EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
623   return isScopedEHPersonality(Personality);
624 }
625 
626 bool GCOVProfiler::AddFlushBeforeForkAndExec() {
627   const TargetLibraryInfo *TLI = nullptr;
628   SmallVector<CallInst *, 2> Forks;
629   SmallVector<CallInst *, 2> Execs;
630   for (auto &F : M->functions()) {
631     TLI = TLI == nullptr ? &GetTLI(F) : TLI;
632     for (auto &I : instructions(F)) {
633       if (CallInst *CI = dyn_cast<CallInst>(&I)) {
634         if (Function *Callee = CI->getCalledFunction()) {
635           LibFunc LF;
636           if (TLI->getLibFunc(*Callee, LF)) {
637             if (LF == LibFunc_fork) {
638 #if !defined(_WIN32)
639               Forks.push_back(CI);
640 #endif
641             } else if (LF == LibFunc_execl || LF == LibFunc_execle ||
642                        LF == LibFunc_execlp || LF == LibFunc_execv ||
643                        LF == LibFunc_execvp || LF == LibFunc_execve ||
644                        LF == LibFunc_execvpe || LF == LibFunc_execvP) {
645               Execs.push_back(CI);
646             }
647           }
648         }
649       }
650     }
651   }
652 
653   for (auto *F : Forks) {
654     IRBuilder<> Builder(F);
655     BasicBlock *Parent = F->getParent();
656     auto NextInst = ++F->getIterator();
657 
658     // We've a fork so just reset the counters in the child process
659     FunctionType *FTy = FunctionType::get(Builder.getInt32Ty(), {}, false);
660     FunctionCallee GCOVFork = M->getOrInsertFunction(
661         "__gcov_fork", FTy,
662         TLI->getAttrList(Ctx, {}, /*Signed=*/true, /*Ret=*/true));
663     F->setCalledFunction(GCOVFork);
664 
665     // We split just after the fork to have a counter for the lines after
666     // Anyway there's a bug:
667     // void foo() { fork(); }
668     // void bar() { foo(); blah(); }
669     // then "blah();" will be called 2 times but showed as 1
670     // because "blah()" belongs to the same block as "foo();"
671     Parent->splitBasicBlock(NextInst);
672 
673     // back() is a br instruction with a debug location
674     // equals to the one from NextAfterFork
675     // So to avoid to have two debug locs on two blocks just change it
676     DebugLoc Loc = F->getDebugLoc();
677     Parent->back().setDebugLoc(Loc);
678   }
679 
680   for (auto *E : Execs) {
681     IRBuilder<> Builder(E);
682     BasicBlock *Parent = E->getParent();
683     auto NextInst = ++E->getIterator();
684 
685     // Since the process is replaced by a new one we need to write out gcdas
686     // No need to reset the counters since they'll be lost after the exec**
687     FunctionType *FTy = FunctionType::get(Builder.getVoidTy(), {}, false);
688     FunctionCallee WriteoutF =
689         M->getOrInsertFunction("llvm_writeout_files", FTy);
690     Builder.CreateCall(WriteoutF);
691 
692     DebugLoc Loc = E->getDebugLoc();
693     Builder.SetInsertPoint(&*NextInst);
694     // If the exec** fails we must reset the counters since they've been
695     // dumped
696     FunctionCallee ResetF = M->getOrInsertFunction("llvm_reset_counters", FTy);
697     Builder.CreateCall(ResetF)->setDebugLoc(Loc);
698     ExecBlocks.insert(Parent);
699     Parent->splitBasicBlock(NextInst);
700     Parent->back().setDebugLoc(Loc);
701   }
702 
703   return !Forks.empty() || !Execs.empty();
704 }
705 
706 static BasicBlock *getInstrBB(CFGMST<Edge, BBInfo> &MST, Edge &E,
707                               const DenseSet<const BasicBlock *> &ExecBlocks) {
708   if (E.InMST || E.Removed)
709     return nullptr;
710 
711   BasicBlock *SrcBB = const_cast<BasicBlock *>(E.SrcBB);
712   BasicBlock *DestBB = const_cast<BasicBlock *>(E.DestBB);
713   // For a fake edge, instrument the real BB.
714   if (SrcBB == nullptr)
715     return DestBB;
716   if (DestBB == nullptr)
717     return SrcBB;
718 
719   auto CanInstrument = [](BasicBlock *BB) -> BasicBlock * {
720     // There are basic blocks (such as catchswitch) cannot be instrumented.
721     // If the returned first insertion point is the end of BB, skip this BB.
722     if (BB->getFirstInsertionPt() == BB->end())
723       return nullptr;
724     return BB;
725   };
726 
727   // Instrument the SrcBB if it has a single successor,
728   // otherwise, the DestBB if this is not a critical edge.
729   Instruction *TI = SrcBB->getTerminator();
730   if (TI->getNumSuccessors() <= 1 && !ExecBlocks.count(SrcBB))
731     return CanInstrument(SrcBB);
732   if (!E.IsCritical)
733     return CanInstrument(DestBB);
734 
735   // Some IndirectBr critical edges cannot be split by the previous
736   // SplitIndirectBrCriticalEdges call. Bail out.
737   const unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
738   BasicBlock *InstrBB =
739       isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
740   if (!InstrBB)
741     return nullptr;
742 
743   MST.addEdge(SrcBB, InstrBB, 0);
744   MST.addEdge(InstrBB, DestBB, 0).InMST = true;
745   E.Removed = true;
746 
747   return CanInstrument(InstrBB);
748 }
749 
750 #ifndef NDEBUG
751 static void dumpEdges(CFGMST<Edge, BBInfo> &MST, GCOVFunction &GF) {
752   size_t ID = 0;
753   for (auto &E : make_pointee_range(MST.AllEdges)) {
754     GCOVBlock &Src = E.SrcBB ? GF.getBlock(E.SrcBB) : GF.getEntryBlock();
755     GCOVBlock &Dst = E.DestBB ? GF.getBlock(E.DestBB) : GF.getReturnBlock();
756     dbgs() << "  Edge " << ID++ << ": " << Src.Number << "->" << Dst.Number
757            << E.infoString() << "\n";
758   }
759 }
760 #endif
761 
762 bool GCOVProfiler::emitProfileNotes(
763     NamedMDNode *CUNode, bool HasExecOrFork,
764     function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
765     function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
766     function_ref<const TargetLibraryInfo &(Function &F)> GetTLI) {
767   int Version;
768   {
769     uint8_t c3 = Options.Version[0];
770     uint8_t c2 = Options.Version[1];
771     uint8_t c1 = Options.Version[2];
772     Version = c3 >= 'A' ? (c3 - 'A') * 100 + (c2 - '0') * 10 + c1 - '0'
773                         : (c3 - '0') * 10 + c1 - '0';
774   }
775 
776   bool EmitGCDA = Options.EmitData;
777   for (unsigned i = 0, e = CUNode->getNumOperands(); i != e; ++i) {
778     // Each compile unit gets its own .gcno file. This means that whether we run
779     // this pass over the original .o's as they're produced, or run it after
780     // LTO, we'll generate the same .gcno files.
781 
782     auto *CU = cast<DICompileUnit>(CUNode->getOperand(i));
783 
784     // Skip module skeleton (and module) CUs.
785     if (CU->getDWOId())
786       continue;
787 
788     std::vector<uint8_t> EdgeDestinations;
789     SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
790 
791     Endian = M->getDataLayout().isLittleEndian() ? support::endianness::little
792                                                  : support::endianness::big;
793     unsigned FunctionIdent = 0;
794     for (auto &F : M->functions()) {
795       DISubprogram *SP = F.getSubprogram();
796       unsigned EndLine;
797       if (!SP) continue;
798       if (!functionHasLines(F, EndLine) || !isFunctionInstrumented(F))
799         continue;
800       // TODO: Functions using scope-based EH are currently not supported.
801       if (isUsingScopeBasedEH(F)) continue;
802       if (F.hasFnAttribute(llvm::Attribute::NoProfile))
803         continue;
804       if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
805         continue;
806 
807       // Add the function line number to the lines of the entry block
808       // to have a counter for the function definition.
809       uint32_t Line = SP->getLine();
810       auto Filename = getFilename(SP);
811 
812       BranchProbabilityInfo *BPI = GetBPI(F);
813       BlockFrequencyInfo *BFI = GetBFI(F);
814 
815       // Split indirectbr critical edges here before computing the MST rather
816       // than later in getInstrBB() to avoid invalidating it.
817       SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
818                                    BFI);
819 
820       CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry_=*/false, BPI, BFI);
821 
822       // getInstrBB can split basic blocks and push elements to AllEdges.
823       for (size_t I : llvm::seq<size_t>(0, MST.AllEdges.size())) {
824         auto &E = *MST.AllEdges[I];
825         // For now, disable spanning tree optimization when fork or exec* is
826         // used.
827         if (HasExecOrFork)
828           E.InMST = false;
829         E.Place = getInstrBB(MST, E, ExecBlocks);
830       }
831       // Basic blocks in F are finalized at this point.
832       BasicBlock &EntryBlock = F.getEntryBlock();
833       Funcs.push_back(std::make_unique<GCOVFunction>(this, &F, SP, EndLine,
834                                                      FunctionIdent++, Version));
835       GCOVFunction &Func = *Funcs.back();
836 
837       // Some non-tree edges are IndirectBr which cannot be split. Ignore them
838       // as well.
839       llvm::erase_if(MST.AllEdges, [](std::unique_ptr<Edge> &E) {
840         return E->Removed || (!E->InMST && !E->Place);
841       });
842       const size_t Measured =
843           std::stable_partition(
844               MST.AllEdges.begin(), MST.AllEdges.end(),
845               [](std::unique_ptr<Edge> &E) { return E->Place; }) -
846           MST.AllEdges.begin();
847       for (size_t I : llvm::seq<size_t>(0, Measured)) {
848         Edge &E = *MST.AllEdges[I];
849         GCOVBlock &Src =
850             E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
851         GCOVBlock &Dst =
852             E.DestBB ? Func.getBlock(E.DestBB) : Func.getReturnBlock();
853         E.SrcNumber = Src.Number;
854         E.DstNumber = Dst.Number;
855       }
856       std::stable_sort(
857           MST.AllEdges.begin(), MST.AllEdges.begin() + Measured,
858           [](const std::unique_ptr<Edge> &L, const std::unique_ptr<Edge> &R) {
859             return L->SrcNumber != R->SrcNumber ? L->SrcNumber < R->SrcNumber
860                                                 : L->DstNumber < R->DstNumber;
861           });
862 
863       for (const Edge &E : make_pointee_range(MST.AllEdges)) {
864         GCOVBlock &Src =
865             E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
866         GCOVBlock &Dst =
867             E.DestBB ? Func.getBlock(E.DestBB) : Func.getReturnBlock();
868         Src.addEdge(Dst, E.Place ? 0 : uint32_t(GCOV_ARC_ON_TREE));
869       }
870 
871       // Artificial functions such as global initializers
872       if (!SP->isArtificial())
873         Func.getBlock(&EntryBlock).getFile(Filename).addLine(Line);
874 
875       LLVM_DEBUG(dumpEdges(MST, Func));
876 
877       for (auto &GB : Func.Blocks) {
878         const BasicBlock &BB = *GB.first;
879         auto &Block = GB.second;
880         for (auto Succ : Block.OutEdges) {
881           uint32_t Idx = Succ.first->Number;
882           do EdgeDestinations.push_back(Idx & 255);
883           while ((Idx >>= 8) > 0);
884         }
885 
886         for (const auto &I : BB) {
887           // Debug intrinsic locations correspond to the location of the
888           // declaration, not necessarily any statements or expressions.
889           if (isa<DbgInfoIntrinsic>(&I)) continue;
890 
891           const DebugLoc &Loc = I.getDebugLoc();
892           if (!Loc)
893             continue;
894 
895           // Artificial lines such as calls to the global constructors.
896           if (Loc.getLine() == 0 || Loc.isImplicitCode())
897             continue;
898 
899           if (Line == Loc.getLine()) continue;
900           Line = Loc.getLine();
901           MDNode *Scope = Loc.getScope();
902           // TODO: Handle blocks from another file due to #line, #include, etc.
903           if (isa<DILexicalBlockFile>(Scope) || SP != getDISubprogram(Scope))
904             continue;
905 
906           GCOVLines &Lines = Block.getFile(Filename);
907           Lines.addLine(Loc.getLine());
908         }
909         Line = 0;
910       }
911       if (EmitGCDA) {
912         DISubprogram *SP = F.getSubprogram();
913         ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(*Ctx), Measured);
914         GlobalVariable *Counters = new GlobalVariable(
915             *M, CounterTy, false, GlobalValue::InternalLinkage,
916             Constant::getNullValue(CounterTy), "__llvm_gcov_ctr");
917         CountersBySP.emplace_back(Counters, SP);
918 
919         for (size_t I : llvm::seq<size_t>(0, Measured)) {
920           const Edge &E = *MST.AllEdges[I];
921           IRBuilder<> Builder(E.Place, E.Place->getFirstInsertionPt());
922           Value *V = Builder.CreateConstInBoundsGEP2_64(
923               Counters->getValueType(), Counters, 0, I);
924           // Disable sanitizers to decrease size bloat. We don't expect
925           // sanitizers to catch interesting issues.
926           Instruction *Inst;
927           if (Options.Atomic) {
928             Inst = Builder.CreateAtomicRMW(AtomicRMWInst::Add, V,
929                                            Builder.getInt64(1), MaybeAlign(),
930                                            AtomicOrdering::Monotonic);
931           } else {
932             LoadInst *OldCount =
933                 Builder.CreateLoad(Builder.getInt64Ty(), V, "gcov_ctr");
934             OldCount->setNoSanitizeMetadata();
935             Value *NewCount = Builder.CreateAdd(OldCount, Builder.getInt64(1));
936             Inst = Builder.CreateStore(NewCount, V);
937           }
938           Inst->setNoSanitizeMetadata();
939         }
940       }
941     }
942 
943     char Tmp[4];
944     JamCRC JC;
945     JC.update(EdgeDestinations);
946     uint32_t Stamp = JC.getCRC();
947     FileChecksums.push_back(Stamp);
948 
949     if (Options.EmitNotes) {
950       std::error_code EC;
951       raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC,
952                          sys::fs::OF_None);
953       if (EC) {
954         Ctx->emitError(
955             Twine("failed to open coverage notes file for writing: ") +
956             EC.message());
957         continue;
958       }
959       os = &out;
960       if (Endian == support::endianness::big) {
961         out.write("gcno", 4);
962         out.write(Options.Version, 4);
963       } else {
964         out.write("oncg", 4);
965         std::reverse_copy(Options.Version, Options.Version + 4, Tmp);
966         out.write(Tmp, 4);
967       }
968       write(Stamp);
969       if (Version >= 90)
970         writeString(""); // unuseful current_working_directory
971       if (Version >= 80)
972         write(0); // unuseful has_unexecuted_blocks
973 
974       for (auto &Func : Funcs)
975         Func->writeOut(Stamp);
976 
977       write(0);
978       write(0);
979       out.close();
980     }
981 
982     if (EmitGCDA) {
983       emitGlobalConstructor(CountersBySP);
984       EmitGCDA = false;
985     }
986   }
987   return true;
988 }
989 
990 Function *GCOVProfiler::createInternalFunction(FunctionType *FTy,
991                                                StringRef Name,
992                                                StringRef MangledType /*=""*/) {
993   Function *F = Function::createWithDefaultAttr(
994       FTy, GlobalValue::InternalLinkage, 0, Name, M);
995   F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
996   F->addFnAttr(Attribute::NoUnwind);
997   if (Options.NoRedZone)
998     F->addFnAttr(Attribute::NoRedZone);
999   if (!MangledType.empty())
1000     setKCFIType(*M, *F, MangledType);
1001   return F;
1002 }
1003 
1004 void GCOVProfiler::emitGlobalConstructor(
1005     SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP) {
1006   Function *WriteoutF = insertCounterWriteout(CountersBySP);
1007   Function *ResetF = insertReset(CountersBySP);
1008 
1009   // Create a small bit of code that registers the "__llvm_gcov_writeout" to
1010   // be executed at exit and the "__llvm_gcov_reset" function to be executed
1011   // when "__gcov_flush" is called.
1012   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1013   Function *F = createInternalFunction(FTy, "__llvm_gcov_init", "_ZTSFvvE");
1014   F->addFnAttr(Attribute::NoInline);
1015 
1016   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
1017   IRBuilder<> Builder(BB);
1018 
1019   FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1020   auto *PFTy = PointerType::get(FTy, 0);
1021   FTy = FunctionType::get(Builder.getVoidTy(), {PFTy, PFTy}, false);
1022 
1023   // Initialize the environment and register the local writeout, flush and
1024   // reset functions.
1025   FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
1026   Builder.CreateCall(GCOVInit, {WriteoutF, ResetF});
1027   Builder.CreateRetVoid();
1028 
1029   appendToGlobalCtors(*M, F, 0);
1030 }
1031 
1032 FunctionCallee GCOVProfiler::getStartFileFunc(const TargetLibraryInfo *TLI) {
1033   Type *Args[] = {
1034       Type::getInt8PtrTy(*Ctx), // const char *orig_filename
1035       Type::getInt32Ty(*Ctx),   // uint32_t version
1036       Type::getInt32Ty(*Ctx),   // uint32_t checksum
1037   };
1038   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1039   return M->getOrInsertFunction("llvm_gcda_start_file", FTy,
1040                                 TLI->getAttrList(Ctx, {1, 2}, /*Signed=*/false));
1041 }
1042 
1043 FunctionCallee GCOVProfiler::getEmitFunctionFunc(const TargetLibraryInfo *TLI) {
1044   Type *Args[] = {
1045     Type::getInt32Ty(*Ctx),    // uint32_t ident
1046     Type::getInt32Ty(*Ctx),    // uint32_t func_checksum
1047     Type::getInt32Ty(*Ctx),    // uint32_t cfg_checksum
1048   };
1049   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1050   return M->getOrInsertFunction("llvm_gcda_emit_function", FTy,
1051                              TLI->getAttrList(Ctx, {0, 1, 2}, /*Signed=*/false));
1052 }
1053 
1054 FunctionCallee GCOVProfiler::getEmitArcsFunc(const TargetLibraryInfo *TLI) {
1055   Type *Args[] = {
1056     Type::getInt32Ty(*Ctx),     // uint32_t num_counters
1057     Type::getInt64PtrTy(*Ctx),  // uint64_t *counters
1058   };
1059   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1060   return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy,
1061                                 TLI->getAttrList(Ctx, {0}, /*Signed=*/false));
1062 }
1063 
1064 FunctionCallee GCOVProfiler::getSummaryInfoFunc() {
1065   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1066   return M->getOrInsertFunction("llvm_gcda_summary_info", FTy);
1067 }
1068 
1069 FunctionCallee GCOVProfiler::getEndFileFunc() {
1070   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1071   return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
1072 }
1073 
1074 Function *GCOVProfiler::insertCounterWriteout(
1075     ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
1076   FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1077   Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
1078   if (!WriteoutF)
1079     WriteoutF =
1080         createInternalFunction(WriteoutFTy, "__llvm_gcov_writeout", "_ZTSFvvE");
1081   WriteoutF->addFnAttr(Attribute::NoInline);
1082 
1083   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
1084   IRBuilder<> Builder(BB);
1085 
1086   auto *TLI = &GetTLI(*WriteoutF);
1087 
1088   FunctionCallee StartFile = getStartFileFunc(TLI);
1089   FunctionCallee EmitFunction = getEmitFunctionFunc(TLI);
1090   FunctionCallee EmitArcs = getEmitArcsFunc(TLI);
1091   FunctionCallee SummaryInfo = getSummaryInfoFunc();
1092   FunctionCallee EndFile = getEndFileFunc();
1093 
1094   NamedMDNode *CUNodes = M->getNamedMetadata("llvm.dbg.cu");
1095   if (!CUNodes) {
1096     Builder.CreateRetVoid();
1097     return WriteoutF;
1098   }
1099 
1100   // Collect the relevant data into a large constant data structure that we can
1101   // walk to write out everything.
1102   StructType *StartFileCallArgsTy = StructType::create(
1103       {Builder.getInt8PtrTy(), Builder.getInt32Ty(), Builder.getInt32Ty()},
1104       "start_file_args_ty");
1105   StructType *EmitFunctionCallArgsTy = StructType::create(
1106       {Builder.getInt32Ty(), Builder.getInt32Ty(), Builder.getInt32Ty()},
1107       "emit_function_args_ty");
1108   StructType *EmitArcsCallArgsTy = StructType::create(
1109       {Builder.getInt32Ty(), Builder.getInt64Ty()->getPointerTo()},
1110       "emit_arcs_args_ty");
1111   StructType *FileInfoTy =
1112       StructType::create({StartFileCallArgsTy, Builder.getInt32Ty(),
1113                           EmitFunctionCallArgsTy->getPointerTo(),
1114                           EmitArcsCallArgsTy->getPointerTo()},
1115                          "file_info");
1116 
1117   Constant *Zero32 = Builder.getInt32(0);
1118   // Build an explicit array of two zeros for use in ConstantExpr GEP building.
1119   Constant *TwoZero32s[] = {Zero32, Zero32};
1120 
1121   SmallVector<Constant *, 8> FileInfos;
1122   for (int i : llvm::seq<int>(0, CUNodes->getNumOperands())) {
1123     auto *CU = cast<DICompileUnit>(CUNodes->getOperand(i));
1124 
1125     // Skip module skeleton (and module) CUs.
1126     if (CU->getDWOId())
1127       continue;
1128 
1129     std::string FilenameGcda = mangleName(CU, GCovFileType::GCDA);
1130     uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i];
1131     auto *StartFileCallArgs = ConstantStruct::get(
1132         StartFileCallArgsTy,
1133         {Builder.CreateGlobalStringPtr(FilenameGcda),
1134          Builder.getInt32(endian::read32be(Options.Version)),
1135          Builder.getInt32(CfgChecksum)});
1136 
1137     SmallVector<Constant *, 8> EmitFunctionCallArgsArray;
1138     SmallVector<Constant *, 8> EmitArcsCallArgsArray;
1139     for (int j : llvm::seq<int>(0, CountersBySP.size())) {
1140       uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum();
1141       EmitFunctionCallArgsArray.push_back(ConstantStruct::get(
1142           EmitFunctionCallArgsTy,
1143           {Builder.getInt32(j),
1144            Builder.getInt32(FuncChecksum),
1145            Builder.getInt32(CfgChecksum)}));
1146 
1147       GlobalVariable *GV = CountersBySP[j].first;
1148       unsigned Arcs = cast<ArrayType>(GV->getValueType())->getNumElements();
1149       EmitArcsCallArgsArray.push_back(ConstantStruct::get(
1150           EmitArcsCallArgsTy,
1151           {Builder.getInt32(Arcs), ConstantExpr::getInBoundsGetElementPtr(
1152                                        GV->getValueType(), GV, TwoZero32s)}));
1153     }
1154     // Create global arrays for the two emit calls.
1155     int CountersSize = CountersBySP.size();
1156     assert(CountersSize == (int)EmitFunctionCallArgsArray.size() &&
1157            "Mismatched array size!");
1158     assert(CountersSize == (int)EmitArcsCallArgsArray.size() &&
1159            "Mismatched array size!");
1160     auto *EmitFunctionCallArgsArrayTy =
1161         ArrayType::get(EmitFunctionCallArgsTy, CountersSize);
1162     auto *EmitFunctionCallArgsArrayGV = new GlobalVariable(
1163         *M, EmitFunctionCallArgsArrayTy, /*isConstant*/ true,
1164         GlobalValue::InternalLinkage,
1165         ConstantArray::get(EmitFunctionCallArgsArrayTy,
1166                            EmitFunctionCallArgsArray),
1167         Twine("__llvm_internal_gcov_emit_function_args.") + Twine(i));
1168     auto *EmitArcsCallArgsArrayTy =
1169         ArrayType::get(EmitArcsCallArgsTy, CountersSize);
1170     EmitFunctionCallArgsArrayGV->setUnnamedAddr(
1171         GlobalValue::UnnamedAddr::Global);
1172     auto *EmitArcsCallArgsArrayGV = new GlobalVariable(
1173         *M, EmitArcsCallArgsArrayTy, /*isConstant*/ true,
1174         GlobalValue::InternalLinkage,
1175         ConstantArray::get(EmitArcsCallArgsArrayTy, EmitArcsCallArgsArray),
1176         Twine("__llvm_internal_gcov_emit_arcs_args.") + Twine(i));
1177     EmitArcsCallArgsArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1178 
1179     FileInfos.push_back(ConstantStruct::get(
1180         FileInfoTy,
1181         {StartFileCallArgs, Builder.getInt32(CountersSize),
1182          ConstantExpr::getInBoundsGetElementPtr(EmitFunctionCallArgsArrayTy,
1183                                                 EmitFunctionCallArgsArrayGV,
1184                                                 TwoZero32s),
1185          ConstantExpr::getInBoundsGetElementPtr(
1186              EmitArcsCallArgsArrayTy, EmitArcsCallArgsArrayGV, TwoZero32s)}));
1187   }
1188 
1189   // If we didn't find anything to actually emit, bail on out.
1190   if (FileInfos.empty()) {
1191     Builder.CreateRetVoid();
1192     return WriteoutF;
1193   }
1194 
1195   // To simplify code, we cap the number of file infos we write out to fit
1196   // easily in a 32-bit signed integer. This gives consistent behavior between
1197   // 32-bit and 64-bit systems without requiring (potentially very slow) 64-bit
1198   // operations on 32-bit systems. It also seems unreasonable to try to handle
1199   // more than 2 billion files.
1200   if ((int64_t)FileInfos.size() > (int64_t)INT_MAX)
1201     FileInfos.resize(INT_MAX);
1202 
1203   // Create a global for the entire data structure so we can walk it more
1204   // easily.
1205   auto *FileInfoArrayTy = ArrayType::get(FileInfoTy, FileInfos.size());
1206   auto *FileInfoArrayGV = new GlobalVariable(
1207       *M, FileInfoArrayTy, /*isConstant*/ true, GlobalValue::InternalLinkage,
1208       ConstantArray::get(FileInfoArrayTy, FileInfos),
1209       "__llvm_internal_gcov_emit_file_info");
1210   FileInfoArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1211 
1212   // Create the CFG for walking this data structure.
1213   auto *FileLoopHeader =
1214       BasicBlock::Create(*Ctx, "file.loop.header", WriteoutF);
1215   auto *CounterLoopHeader =
1216       BasicBlock::Create(*Ctx, "counter.loop.header", WriteoutF);
1217   auto *FileLoopLatch = BasicBlock::Create(*Ctx, "file.loop.latch", WriteoutF);
1218   auto *ExitBB = BasicBlock::Create(*Ctx, "exit", WriteoutF);
1219 
1220   // We always have at least one file, so just branch to the header.
1221   Builder.CreateBr(FileLoopHeader);
1222 
1223   // The index into the files structure is our loop induction variable.
1224   Builder.SetInsertPoint(FileLoopHeader);
1225   PHINode *IV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2,
1226                                   "file_idx");
1227   IV->addIncoming(Builder.getInt32(0), BB);
1228   auto *FileInfoPtr = Builder.CreateInBoundsGEP(
1229       FileInfoArrayTy, FileInfoArrayGV, {Builder.getInt32(0), IV});
1230   auto *StartFileCallArgsPtr =
1231       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 0, "start_file_args");
1232   auto *StartFileCall = Builder.CreateCall(
1233       StartFile,
1234       {Builder.CreateLoad(StartFileCallArgsTy->getElementType(0),
1235                           Builder.CreateStructGEP(StartFileCallArgsTy,
1236                                                   StartFileCallArgsPtr, 0),
1237                           "filename"),
1238        Builder.CreateLoad(StartFileCallArgsTy->getElementType(1),
1239                           Builder.CreateStructGEP(StartFileCallArgsTy,
1240                                                   StartFileCallArgsPtr, 1),
1241                           "version"),
1242        Builder.CreateLoad(StartFileCallArgsTy->getElementType(2),
1243                           Builder.CreateStructGEP(StartFileCallArgsTy,
1244                                                   StartFileCallArgsPtr, 2),
1245                           "stamp")});
1246   if (auto AK = TLI->getExtAttrForI32Param(false))
1247     StartFileCall->addParamAttr(2, AK);
1248   auto *NumCounters = Builder.CreateLoad(
1249       FileInfoTy->getElementType(1),
1250       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 1), "num_ctrs");
1251   auto *EmitFunctionCallArgsArray =
1252       Builder.CreateLoad(FileInfoTy->getElementType(2),
1253                          Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 2),
1254                          "emit_function_args");
1255   auto *EmitArcsCallArgsArray = Builder.CreateLoad(
1256       FileInfoTy->getElementType(3),
1257       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 3), "emit_arcs_args");
1258   auto *EnterCounterLoopCond =
1259       Builder.CreateICmpSLT(Builder.getInt32(0), NumCounters);
1260   Builder.CreateCondBr(EnterCounterLoopCond, CounterLoopHeader, FileLoopLatch);
1261 
1262   Builder.SetInsertPoint(CounterLoopHeader);
1263   auto *JV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2,
1264                                "ctr_idx");
1265   JV->addIncoming(Builder.getInt32(0), FileLoopHeader);
1266   auto *EmitFunctionCallArgsPtr = Builder.CreateInBoundsGEP(
1267       EmitFunctionCallArgsTy, EmitFunctionCallArgsArray, JV);
1268   auto *EmitFunctionCall = Builder.CreateCall(
1269       EmitFunction,
1270       {Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(0),
1271                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1272                                                   EmitFunctionCallArgsPtr, 0),
1273                           "ident"),
1274        Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(1),
1275                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1276                                                   EmitFunctionCallArgsPtr, 1),
1277                           "func_checkssum"),
1278        Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(2),
1279                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1280                                                   EmitFunctionCallArgsPtr, 2),
1281                           "cfg_checksum")});
1282   if (auto AK = TLI->getExtAttrForI32Param(false)) {
1283     EmitFunctionCall->addParamAttr(0, AK);
1284     EmitFunctionCall->addParamAttr(1, AK);
1285     EmitFunctionCall->addParamAttr(2, AK);
1286   }
1287   auto *EmitArcsCallArgsPtr =
1288       Builder.CreateInBoundsGEP(EmitArcsCallArgsTy, EmitArcsCallArgsArray, JV);
1289   auto *EmitArcsCall = Builder.CreateCall(
1290       EmitArcs,
1291       {Builder.CreateLoad(
1292            EmitArcsCallArgsTy->getElementType(0),
1293            Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 0),
1294            "num_counters"),
1295        Builder.CreateLoad(
1296            EmitArcsCallArgsTy->getElementType(1),
1297            Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 1),
1298            "counters")});
1299   if (auto AK = TLI->getExtAttrForI32Param(false))
1300     EmitArcsCall->addParamAttr(0, AK);
1301   auto *NextJV = Builder.CreateAdd(JV, Builder.getInt32(1));
1302   auto *CounterLoopCond = Builder.CreateICmpSLT(NextJV, NumCounters);
1303   Builder.CreateCondBr(CounterLoopCond, CounterLoopHeader, FileLoopLatch);
1304   JV->addIncoming(NextJV, CounterLoopHeader);
1305 
1306   Builder.SetInsertPoint(FileLoopLatch);
1307   Builder.CreateCall(SummaryInfo, {});
1308   Builder.CreateCall(EndFile, {});
1309   auto *NextIV = Builder.CreateAdd(IV, Builder.getInt32(1), "next_file_idx");
1310   auto *FileLoopCond =
1311       Builder.CreateICmpSLT(NextIV, Builder.getInt32(FileInfos.size()));
1312   Builder.CreateCondBr(FileLoopCond, FileLoopHeader, ExitBB);
1313   IV->addIncoming(NextIV, FileLoopLatch);
1314 
1315   Builder.SetInsertPoint(ExitBB);
1316   Builder.CreateRetVoid();
1317 
1318   return WriteoutF;
1319 }
1320 
1321 Function *GCOVProfiler::insertReset(
1322     ArrayRef<std::pair<GlobalVariable *, MDNode *>> CountersBySP) {
1323   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1324   Function *ResetF = M->getFunction("__llvm_gcov_reset");
1325   if (!ResetF)
1326     ResetF = createInternalFunction(FTy, "__llvm_gcov_reset", "_ZTSFvvE");
1327   ResetF->addFnAttr(Attribute::NoInline);
1328 
1329   BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", ResetF);
1330   IRBuilder<> Builder(Entry);
1331   LLVMContext &C = Entry->getContext();
1332 
1333   // Zero out the counters.
1334   for (const auto &I : CountersBySP) {
1335     GlobalVariable *GV = I.first;
1336     auto *GVTy = cast<ArrayType>(GV->getValueType());
1337     Builder.CreateMemSet(GV, Constant::getNullValue(Type::getInt8Ty(C)),
1338                          GVTy->getNumElements() *
1339                              GVTy->getElementType()->getScalarSizeInBits() / 8,
1340                          GV->getAlign());
1341   }
1342 
1343   Type *RetTy = ResetF->getReturnType();
1344   if (RetTy->isVoidTy())
1345     Builder.CreateRetVoid();
1346   else if (RetTy->isIntegerTy())
1347     // Used if __llvm_gcov_reset was implicitly declared.
1348     Builder.CreateRet(ConstantInt::get(RetTy, 0));
1349   else
1350     report_fatal_error("invalid return type for __llvm_gcov_reset");
1351 
1352   return ResetF;
1353 }
1354