xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp (revision bc5304a006238115291e7568583632889dffbab9)
1 //===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements GCOV-style profiling. When this pass is run it emits
10 // "gcno" files next to the existing source, and instruments the code that runs
11 // to records the edges between blocks that run and emit a complementary "gcda"
12 // file on exit.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "CFGMST.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/MapVector.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/Sequence.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/StringMap.h"
25 #include "llvm/Analysis/BlockFrequencyInfo.h"
26 #include "llvm/Analysis/BranchProbabilityInfo.h"
27 #include "llvm/Analysis/EHPersonalities.h"
28 #include "llvm/Analysis/TargetLibraryInfo.h"
29 #include "llvm/IR/CFG.h"
30 #include "llvm/IR/DebugInfo.h"
31 #include "llvm/IR/DebugLoc.h"
32 #include "llvm/IR/IRBuilder.h"
33 #include "llvm/IR/InstIterator.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/IntrinsicInst.h"
36 #include "llvm/IR/Module.h"
37 #include "llvm/InitializePasses.h"
38 #include "llvm/Pass.h"
39 #include "llvm/Support/CRC.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/FileSystem.h"
43 #include "llvm/Support/Path.h"
44 #include "llvm/Support/Regex.h"
45 #include "llvm/Support/raw_ostream.h"
46 #include "llvm/Transforms/Instrumentation.h"
47 #include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
48 #include "llvm/Transforms/Utils/ModuleUtils.h"
49 #include <algorithm>
50 #include <memory>
51 #include <string>
52 #include <utility>
53 
54 using namespace llvm;
55 namespace endian = llvm::support::endian;
56 
57 #define DEBUG_TYPE "insert-gcov-profiling"
58 
59 enum : uint32_t {
60   GCOV_ARC_ON_TREE = 1 << 0,
61 
62   GCOV_TAG_FUNCTION = 0x01000000,
63   GCOV_TAG_BLOCKS = 0x01410000,
64   GCOV_TAG_ARCS = 0x01430000,
65   GCOV_TAG_LINES = 0x01450000,
66 };
67 
68 static cl::opt<std::string> DefaultGCOVVersion("default-gcov-version",
69                                                cl::init("408*"), cl::Hidden,
70                                                cl::ValueRequired);
71 
72 static cl::opt<bool> AtomicCounter("gcov-atomic-counter", cl::Hidden,
73                                    cl::desc("Make counter updates atomic"));
74 
75 // Returns the number of words which will be used to represent this string.
76 static unsigned wordsOfString(StringRef s) {
77   // Length + NUL-terminated string + 0~3 padding NULs.
78   return (s.size() / 4) + 2;
79 }
80 
81 GCOVOptions GCOVOptions::getDefault() {
82   GCOVOptions Options;
83   Options.EmitNotes = true;
84   Options.EmitData = true;
85   Options.NoRedZone = false;
86   Options.Atomic = AtomicCounter;
87 
88   if (DefaultGCOVVersion.size() != 4) {
89     llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") +
90                              DefaultGCOVVersion);
91   }
92   memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
93   return Options;
94 }
95 
96 namespace {
97 class GCOVFunction;
98 
99 class GCOVProfiler {
100 public:
101   GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {}
102   GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {}
103   bool
104   runOnModule(Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
105               function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
106               std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
107 
108   void write(uint32_t i) {
109     char Bytes[4];
110     endian::write32(Bytes, i, Endian);
111     os->write(Bytes, 4);
112   }
113   void writeString(StringRef s) {
114     write(wordsOfString(s) - 1);
115     os->write(s.data(), s.size());
116     os->write_zeros(4 - s.size() % 4);
117   }
118   void writeBytes(const char *Bytes, int Size) { os->write(Bytes, Size); }
119 
120 private:
121   // Create the .gcno files for the Module based on DebugInfo.
122   bool
123   emitProfileNotes(NamedMDNode *CUNode, bool HasExecOrFork,
124                    function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
125                    function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
126                    function_ref<const TargetLibraryInfo &(Function &F)> GetTLI);
127 
128   void emitGlobalConstructor(
129       SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP);
130 
131   bool isFunctionInstrumented(const Function &F);
132   std::vector<Regex> createRegexesFromString(StringRef RegexesStr);
133   static bool doesFilenameMatchARegex(StringRef Filename,
134                                       std::vector<Regex> &Regexes);
135 
136   // Get pointers to the functions in the runtime library.
137   FunctionCallee getStartFileFunc(const TargetLibraryInfo *TLI);
138   FunctionCallee getEmitFunctionFunc(const TargetLibraryInfo *TLI);
139   FunctionCallee getEmitArcsFunc(const TargetLibraryInfo *TLI);
140   FunctionCallee getSummaryInfoFunc();
141   FunctionCallee getEndFileFunc();
142 
143   // Add the function to write out all our counters to the global destructor
144   // list.
145   Function *
146   insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
147   Function *insertReset(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
148 
149   bool AddFlushBeforeForkAndExec();
150 
151   enum class GCovFileType { GCNO, GCDA };
152   std::string mangleName(const DICompileUnit *CU, GCovFileType FileType);
153 
154   GCOVOptions Options;
155   support::endianness Endian;
156   raw_ostream *os;
157 
158   // Checksum, produced by hash of EdgeDestinations
159   SmallVector<uint32_t, 4> FileChecksums;
160 
161   Module *M = nullptr;
162   std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
163   LLVMContext *Ctx = nullptr;
164   SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
165   std::vector<Regex> FilterRe;
166   std::vector<Regex> ExcludeRe;
167   DenseSet<const BasicBlock *> ExecBlocks;
168   StringMap<bool> InstrumentedFiles;
169 };
170 
171 class GCOVProfilerLegacyPass : public ModulePass {
172 public:
173   static char ID;
174   GCOVProfilerLegacyPass()
175       : GCOVProfilerLegacyPass(GCOVOptions::getDefault()) {}
176   GCOVProfilerLegacyPass(const GCOVOptions &Opts)
177       : ModulePass(ID), Profiler(Opts) {
178     initializeGCOVProfilerLegacyPassPass(*PassRegistry::getPassRegistry());
179   }
180   StringRef getPassName() const override { return "GCOV Profiler"; }
181 
182   bool runOnModule(Module &M) override {
183     auto GetBFI = [this](Function &F) {
184       return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
185     };
186     auto GetBPI = [this](Function &F) {
187       return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
188     };
189     auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
190       return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
191     };
192     return Profiler.runOnModule(M, GetBFI, GetBPI, GetTLI);
193   }
194 
195   void getAnalysisUsage(AnalysisUsage &AU) const override {
196     AU.addRequired<BlockFrequencyInfoWrapperPass>();
197     AU.addRequired<TargetLibraryInfoWrapperPass>();
198   }
199 
200 private:
201   GCOVProfiler Profiler;
202 };
203 
204 struct BBInfo {
205   BBInfo *Group;
206   uint32_t Index;
207   uint32_t Rank = 0;
208 
209   BBInfo(unsigned Index) : Group(this), Index(Index) {}
210   const std::string infoString() const {
211     return (Twine("Index=") + Twine(Index)).str();
212   }
213 };
214 
215 struct Edge {
216   // This class implements the CFG edges. Note the CFG can be a multi-graph.
217   // So there might be multiple edges with same SrcBB and DestBB.
218   const BasicBlock *SrcBB;
219   const BasicBlock *DestBB;
220   uint64_t Weight;
221   BasicBlock *Place = nullptr;
222   uint32_t SrcNumber, DstNumber;
223   bool InMST = false;
224   bool Removed = false;
225   bool IsCritical = false;
226 
227   Edge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
228       : SrcBB(Src), DestBB(Dest), Weight(W) {}
229 
230   // Return the information string of an edge.
231   const std::string infoString() const {
232     return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
233             (IsCritical ? "c" : " ") + "  W=" + Twine(Weight))
234         .str();
235   }
236 };
237 }
238 
239 char GCOVProfilerLegacyPass::ID = 0;
240 INITIALIZE_PASS_BEGIN(
241     GCOVProfilerLegacyPass, "insert-gcov-profiling",
242     "Insert instrumentation for GCOV profiling", false, false)
243 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
244 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
245 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
246 INITIALIZE_PASS_END(
247     GCOVProfilerLegacyPass, "insert-gcov-profiling",
248     "Insert instrumentation for GCOV profiling", false, false)
249 
250 ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
251   return new GCOVProfilerLegacyPass(Options);
252 }
253 
254 static StringRef getFunctionName(const DISubprogram *SP) {
255   if (!SP->getLinkageName().empty())
256     return SP->getLinkageName();
257   return SP->getName();
258 }
259 
260 /// Extract a filename for a DISubprogram.
261 ///
262 /// Prefer relative paths in the coverage notes. Clang also may split
263 /// up absolute paths into a directory and filename component. When
264 /// the relative path doesn't exist, reconstruct the absolute path.
265 static SmallString<128> getFilename(const DISubprogram *SP) {
266   SmallString<128> Path;
267   StringRef RelPath = SP->getFilename();
268   if (sys::fs::exists(RelPath))
269     Path = RelPath;
270   else
271     sys::path::append(Path, SP->getDirectory(), SP->getFilename());
272   return Path;
273 }
274 
275 namespace {
276   class GCOVRecord {
277   protected:
278     GCOVProfiler *P;
279 
280     GCOVRecord(GCOVProfiler *P) : P(P) {}
281 
282     void write(uint32_t i) { P->write(i); }
283     void writeString(StringRef s) { P->writeString(s); }
284     void writeBytes(const char *Bytes, int Size) { P->writeBytes(Bytes, Size); }
285   };
286 
287   class GCOVFunction;
288   class GCOVBlock;
289 
290   // Constructed only by requesting it from a GCOVBlock, this object stores a
291   // list of line numbers and a single filename, representing lines that belong
292   // to the block.
293   class GCOVLines : public GCOVRecord {
294    public:
295     void addLine(uint32_t Line) {
296       assert(Line != 0 && "Line zero is not a valid real line number.");
297       Lines.push_back(Line);
298     }
299 
300     uint32_t length() const {
301       return 1 + wordsOfString(Filename) + Lines.size();
302     }
303 
304     void writeOut() {
305       write(0);
306       writeString(Filename);
307       for (int i = 0, e = Lines.size(); i != e; ++i)
308         write(Lines[i]);
309     }
310 
311     GCOVLines(GCOVProfiler *P, StringRef F)
312         : GCOVRecord(P), Filename(std::string(F)) {}
313 
314   private:
315     std::string Filename;
316     SmallVector<uint32_t, 32> Lines;
317   };
318 
319 
320   // Represent a basic block in GCOV. Each block has a unique number in the
321   // function, number of lines belonging to each block, and a set of edges to
322   // other blocks.
323   class GCOVBlock : public GCOVRecord {
324    public:
325     GCOVLines &getFile(StringRef Filename) {
326       return LinesByFile.try_emplace(Filename, P, Filename).first->second;
327     }
328 
329     void addEdge(GCOVBlock &Successor, uint32_t Flags) {
330       OutEdges.emplace_back(&Successor, Flags);
331     }
332 
333     void writeOut() {
334       uint32_t Len = 3;
335       SmallVector<StringMapEntry<GCOVLines> *, 32> SortedLinesByFile;
336       for (auto &I : LinesByFile) {
337         Len += I.second.length();
338         SortedLinesByFile.push_back(&I);
339       }
340 
341       write(GCOV_TAG_LINES);
342       write(Len);
343       write(Number);
344 
345       llvm::sort(SortedLinesByFile, [](StringMapEntry<GCOVLines> *LHS,
346                                        StringMapEntry<GCOVLines> *RHS) {
347         return LHS->getKey() < RHS->getKey();
348       });
349       for (auto &I : SortedLinesByFile)
350         I->getValue().writeOut();
351       write(0);
352       write(0);
353     }
354 
355     GCOVBlock(const GCOVBlock &RHS) : GCOVRecord(RHS), Number(RHS.Number) {
356       // Only allow copy before edges and lines have been added. After that,
357       // there are inter-block pointers (eg: edges) that won't take kindly to
358       // blocks being copied or moved around.
359       assert(LinesByFile.empty());
360       assert(OutEdges.empty());
361     }
362 
363     uint32_t Number;
364     SmallVector<std::pair<GCOVBlock *, uint32_t>, 4> OutEdges;
365 
366   private:
367     friend class GCOVFunction;
368 
369     GCOVBlock(GCOVProfiler *P, uint32_t Number)
370         : GCOVRecord(P), Number(Number) {}
371 
372     StringMap<GCOVLines> LinesByFile;
373   };
374 
375   // A function has a unique identifier, a checksum (we leave as zero) and a
376   // set of blocks and a map of edges between blocks. This is the only GCOV
377   // object users can construct, the blocks and lines will be rooted here.
378   class GCOVFunction : public GCOVRecord {
379   public:
380     GCOVFunction(GCOVProfiler *P, Function *F, const DISubprogram *SP,
381                  unsigned EndLine, uint32_t Ident, int Version)
382         : GCOVRecord(P), SP(SP), EndLine(EndLine), Ident(Ident),
383           Version(Version), EntryBlock(P, 0), ReturnBlock(P, 1) {
384       LLVM_DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");
385       bool ExitBlockBeforeBody = Version >= 48;
386       uint32_t i = ExitBlockBeforeBody ? 2 : 1;
387       for (BasicBlock &BB : *F)
388         Blocks.insert(std::make_pair(&BB, GCOVBlock(P, i++)));
389       if (!ExitBlockBeforeBody)
390         ReturnBlock.Number = i;
391 
392       std::string FunctionNameAndLine;
393       raw_string_ostream FNLOS(FunctionNameAndLine);
394       FNLOS << getFunctionName(SP) << SP->getLine();
395       FNLOS.flush();
396       FuncChecksum = hash_value(FunctionNameAndLine);
397     }
398 
399     GCOVBlock &getBlock(const BasicBlock *BB) {
400       return Blocks.find(const_cast<BasicBlock *>(BB))->second;
401     }
402 
403     GCOVBlock &getEntryBlock() { return EntryBlock; }
404     GCOVBlock &getReturnBlock() {
405       return ReturnBlock;
406     }
407 
408     uint32_t getFuncChecksum() const {
409       return FuncChecksum;
410     }
411 
412     void writeOut(uint32_t CfgChecksum) {
413       write(GCOV_TAG_FUNCTION);
414       SmallString<128> Filename = getFilename(SP);
415       uint32_t BlockLen =
416           2 + (Version >= 47) + wordsOfString(getFunctionName(SP));
417       if (Version < 80)
418         BlockLen += wordsOfString(Filename) + 1;
419       else
420         BlockLen += 1 + wordsOfString(Filename) + 3 + (Version >= 90);
421 
422       write(BlockLen);
423       write(Ident);
424       write(FuncChecksum);
425       if (Version >= 47)
426         write(CfgChecksum);
427       writeString(getFunctionName(SP));
428       if (Version < 80) {
429         writeString(Filename);
430         write(SP->getLine());
431       } else {
432         write(SP->isArtificial()); // artificial
433         writeString(Filename);
434         write(SP->getLine()); // start_line
435         write(0);             // start_column
436         // EndLine is the last line with !dbg. It is not the } line as in GCC,
437         // but good enough.
438         write(EndLine);
439         if (Version >= 90)
440           write(0); // end_column
441       }
442 
443       // Emit count of blocks.
444       write(GCOV_TAG_BLOCKS);
445       if (Version < 80) {
446         write(Blocks.size() + 2);
447         for (int i = Blocks.size() + 2; i; --i)
448           write(0);
449       } else {
450         write(1);
451         write(Blocks.size() + 2);
452       }
453       LLVM_DEBUG(dbgs() << (Blocks.size() + 1) << " blocks\n");
454 
455       // Emit edges between blocks.
456       const uint32_t Outgoing = EntryBlock.OutEdges.size();
457       if (Outgoing) {
458         write(GCOV_TAG_ARCS);
459         write(Outgoing * 2 + 1);
460         write(EntryBlock.Number);
461         for (const auto &E : EntryBlock.OutEdges) {
462           write(E.first->Number);
463           write(E.second);
464         }
465       }
466       for (auto &It : Blocks) {
467         const GCOVBlock &Block = It.second;
468         if (Block.OutEdges.empty()) continue;
469 
470         write(GCOV_TAG_ARCS);
471         write(Block.OutEdges.size() * 2 + 1);
472         write(Block.Number);
473         for (const auto &E : Block.OutEdges) {
474           write(E.first->Number);
475           write(E.second);
476         }
477       }
478 
479       // Emit lines for each block.
480       for (auto &It : Blocks)
481         It.second.writeOut();
482     }
483 
484   public:
485     const DISubprogram *SP;
486     unsigned EndLine;
487     uint32_t Ident;
488     uint32_t FuncChecksum;
489     int Version;
490     MapVector<BasicBlock *, GCOVBlock> Blocks;
491     GCOVBlock EntryBlock;
492     GCOVBlock ReturnBlock;
493   };
494 }
495 
496 // RegexesStr is a string containing differents regex separated by a semi-colon.
497 // For example "foo\..*$;bar\..*$".
498 std::vector<Regex> GCOVProfiler::createRegexesFromString(StringRef RegexesStr) {
499   std::vector<Regex> Regexes;
500   while (!RegexesStr.empty()) {
501     std::pair<StringRef, StringRef> HeadTail = RegexesStr.split(';');
502     if (!HeadTail.first.empty()) {
503       Regex Re(HeadTail.first);
504       std::string Err;
505       if (!Re.isValid(Err)) {
506         Ctx->emitError(Twine("Regex ") + HeadTail.first +
507                        " is not valid: " + Err);
508       }
509       Regexes.emplace_back(std::move(Re));
510     }
511     RegexesStr = HeadTail.second;
512   }
513   return Regexes;
514 }
515 
516 bool GCOVProfiler::doesFilenameMatchARegex(StringRef Filename,
517                                            std::vector<Regex> &Regexes) {
518   for (Regex &Re : Regexes)
519     if (Re.match(Filename))
520       return true;
521   return false;
522 }
523 
524 bool GCOVProfiler::isFunctionInstrumented(const Function &F) {
525   if (FilterRe.empty() && ExcludeRe.empty()) {
526     return true;
527   }
528   SmallString<128> Filename = getFilename(F.getSubprogram());
529   auto It = InstrumentedFiles.find(Filename);
530   if (It != InstrumentedFiles.end()) {
531     return It->second;
532   }
533 
534   SmallString<256> RealPath;
535   StringRef RealFilename;
536 
537   // Path can be
538   // /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/*.h so for
539   // such a case we must get the real_path.
540   if (sys::fs::real_path(Filename, RealPath)) {
541     // real_path can fail with path like "foo.c".
542     RealFilename = Filename;
543   } else {
544     RealFilename = RealPath;
545   }
546 
547   bool ShouldInstrument;
548   if (FilterRe.empty()) {
549     ShouldInstrument = !doesFilenameMatchARegex(RealFilename, ExcludeRe);
550   } else if (ExcludeRe.empty()) {
551     ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe);
552   } else {
553     ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe) &&
554                        !doesFilenameMatchARegex(RealFilename, ExcludeRe);
555   }
556   InstrumentedFiles[Filename] = ShouldInstrument;
557   return ShouldInstrument;
558 }
559 
560 std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
561                                      GCovFileType OutputType) {
562   bool Notes = OutputType == GCovFileType::GCNO;
563 
564   if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
565     for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
566       MDNode *N = GCov->getOperand(i);
567       bool ThreeElement = N->getNumOperands() == 3;
568       if (!ThreeElement && N->getNumOperands() != 2)
569         continue;
570       if (dyn_cast<MDNode>(N->getOperand(ThreeElement ? 2 : 1)) != CU)
571         continue;
572 
573       if (ThreeElement) {
574         // These nodes have no mangling to apply, it's stored mangled in the
575         // bitcode.
576         MDString *NotesFile = dyn_cast<MDString>(N->getOperand(0));
577         MDString *DataFile = dyn_cast<MDString>(N->getOperand(1));
578         if (!NotesFile || !DataFile)
579           continue;
580         return std::string(Notes ? NotesFile->getString()
581                                  : DataFile->getString());
582       }
583 
584       MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0));
585       if (!GCovFile)
586         continue;
587 
588       SmallString<128> Filename = GCovFile->getString();
589       sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
590       return std::string(Filename.str());
591     }
592   }
593 
594   SmallString<128> Filename = CU->getFilename();
595   sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
596   StringRef FName = sys::path::filename(Filename);
597   SmallString<128> CurPath;
598   if (sys::fs::current_path(CurPath))
599     return std::string(FName);
600   sys::path::append(CurPath, FName);
601   return std::string(CurPath.str());
602 }
603 
604 bool GCOVProfiler::runOnModule(
605     Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
606     function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
607     std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
608   this->M = &M;
609   this->GetTLI = std::move(GetTLI);
610   Ctx = &M.getContext();
611 
612   NamedMDNode *CUNode = M.getNamedMetadata("llvm.dbg.cu");
613   if (!CUNode || (!Options.EmitNotes && !Options.EmitData))
614     return false;
615 
616   bool HasExecOrFork = AddFlushBeforeForkAndExec();
617 
618   FilterRe = createRegexesFromString(Options.Filter);
619   ExcludeRe = createRegexesFromString(Options.Exclude);
620   emitProfileNotes(CUNode, HasExecOrFork, GetBFI, GetBPI, this->GetTLI);
621   return true;
622 }
623 
624 PreservedAnalyses GCOVProfilerPass::run(Module &M,
625                                         ModuleAnalysisManager &AM) {
626 
627   GCOVProfiler Profiler(GCOVOpts);
628   FunctionAnalysisManager &FAM =
629       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
630 
631   auto GetBFI = [&FAM](Function &F) {
632     return &FAM.getResult<BlockFrequencyAnalysis>(F);
633   };
634   auto GetBPI = [&FAM](Function &F) {
635     return &FAM.getResult<BranchProbabilityAnalysis>(F);
636   };
637   auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & {
638     return FAM.getResult<TargetLibraryAnalysis>(F);
639   };
640 
641   if (!Profiler.runOnModule(M, GetBFI, GetBPI, GetTLI))
642     return PreservedAnalyses::all();
643 
644   return PreservedAnalyses::none();
645 }
646 
647 static bool functionHasLines(const Function &F, unsigned &EndLine) {
648   // Check whether this function actually has any source lines. Not only
649   // do these waste space, they also can crash gcov.
650   EndLine = 0;
651   for (auto &BB : F) {
652     for (auto &I : BB) {
653       // Debug intrinsic locations correspond to the location of the
654       // declaration, not necessarily any statements or expressions.
655       if (isa<DbgInfoIntrinsic>(&I)) continue;
656 
657       const DebugLoc &Loc = I.getDebugLoc();
658       if (!Loc)
659         continue;
660 
661       // Artificial lines such as calls to the global constructors.
662       if (Loc.getLine() == 0) continue;
663       EndLine = std::max(EndLine, Loc.getLine());
664 
665       return true;
666     }
667   }
668   return false;
669 }
670 
671 static bool isUsingScopeBasedEH(Function &F) {
672   if (!F.hasPersonalityFn()) return false;
673 
674   EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
675   return isScopedEHPersonality(Personality);
676 }
677 
678 bool GCOVProfiler::AddFlushBeforeForkAndExec() {
679   SmallVector<CallInst *, 2> Forks;
680   SmallVector<CallInst *, 2> Execs;
681   for (auto &F : M->functions()) {
682     auto *TLI = &GetTLI(F);
683     for (auto &I : instructions(F)) {
684       if (CallInst *CI = dyn_cast<CallInst>(&I)) {
685         if (Function *Callee = CI->getCalledFunction()) {
686           LibFunc LF;
687           if (TLI->getLibFunc(*Callee, LF)) {
688             if (LF == LibFunc_fork) {
689 #if !defined(_WIN32)
690               Forks.push_back(CI);
691 #endif
692             } else if (LF == LibFunc_execl || LF == LibFunc_execle ||
693                        LF == LibFunc_execlp || LF == LibFunc_execv ||
694                        LF == LibFunc_execvp || LF == LibFunc_execve ||
695                        LF == LibFunc_execvpe || LF == LibFunc_execvP) {
696               Execs.push_back(CI);
697             }
698           }
699         }
700       }
701     }
702   }
703 
704   for (auto F : Forks) {
705     IRBuilder<> Builder(F);
706     BasicBlock *Parent = F->getParent();
707     auto NextInst = ++F->getIterator();
708 
709     // We've a fork so just reset the counters in the child process
710     FunctionType *FTy = FunctionType::get(Builder.getInt32Ty(), {}, false);
711     FunctionCallee GCOVFork = M->getOrInsertFunction("__gcov_fork", FTy);
712     F->setCalledFunction(GCOVFork);
713 
714     // We split just after the fork to have a counter for the lines after
715     // Anyway there's a bug:
716     // void foo() { fork(); }
717     // void bar() { foo(); blah(); }
718     // then "blah();" will be called 2 times but showed as 1
719     // because "blah()" belongs to the same block as "foo();"
720     Parent->splitBasicBlock(NextInst);
721 
722     // back() is a br instruction with a debug location
723     // equals to the one from NextAfterFork
724     // So to avoid to have two debug locs on two blocks just change it
725     DebugLoc Loc = F->getDebugLoc();
726     Parent->back().setDebugLoc(Loc);
727   }
728 
729   for (auto E : Execs) {
730     IRBuilder<> Builder(E);
731     BasicBlock *Parent = E->getParent();
732     auto NextInst = ++E->getIterator();
733 
734     // Since the process is replaced by a new one we need to write out gcdas
735     // No need to reset the counters since they'll be lost after the exec**
736     FunctionType *FTy = FunctionType::get(Builder.getVoidTy(), {}, false);
737     FunctionCallee WriteoutF =
738         M->getOrInsertFunction("llvm_writeout_files", FTy);
739     Builder.CreateCall(WriteoutF);
740 
741     DebugLoc Loc = E->getDebugLoc();
742     Builder.SetInsertPoint(&*NextInst);
743     // If the exec** fails we must reset the counters since they've been
744     // dumped
745     FunctionCallee ResetF = M->getOrInsertFunction("llvm_reset_counters", FTy);
746     Builder.CreateCall(ResetF)->setDebugLoc(Loc);
747     ExecBlocks.insert(Parent);
748     Parent->splitBasicBlock(NextInst);
749     Parent->back().setDebugLoc(Loc);
750   }
751 
752   return !Forks.empty() || !Execs.empty();
753 }
754 
755 static BasicBlock *getInstrBB(CFGMST<Edge, BBInfo> &MST, Edge &E,
756                               const DenseSet<const BasicBlock *> &ExecBlocks) {
757   if (E.InMST || E.Removed)
758     return nullptr;
759 
760   BasicBlock *SrcBB = const_cast<BasicBlock *>(E.SrcBB);
761   BasicBlock *DestBB = const_cast<BasicBlock *>(E.DestBB);
762   // For a fake edge, instrument the real BB.
763   if (SrcBB == nullptr)
764     return DestBB;
765   if (DestBB == nullptr)
766     return SrcBB;
767 
768   auto CanInstrument = [](BasicBlock *BB) -> BasicBlock * {
769     // There are basic blocks (such as catchswitch) cannot be instrumented.
770     // If the returned first insertion point is the end of BB, skip this BB.
771     if (BB->getFirstInsertionPt() == BB->end())
772       return nullptr;
773     return BB;
774   };
775 
776   // Instrument the SrcBB if it has a single successor,
777   // otherwise, the DestBB if this is not a critical edge.
778   Instruction *TI = SrcBB->getTerminator();
779   if (TI->getNumSuccessors() <= 1 && !ExecBlocks.count(SrcBB))
780     return CanInstrument(SrcBB);
781   if (!E.IsCritical)
782     return CanInstrument(DestBB);
783 
784   // Some IndirectBr critical edges cannot be split by the previous
785   // SplitIndirectBrCriticalEdges call. Bail out.
786   const unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
787   BasicBlock *InstrBB =
788       isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
789   if (!InstrBB)
790     return nullptr;
791 
792   MST.addEdge(SrcBB, InstrBB, 0);
793   MST.addEdge(InstrBB, DestBB, 0).InMST = true;
794   E.Removed = true;
795 
796   return CanInstrument(InstrBB);
797 }
798 
799 #ifndef NDEBUG
800 static void dumpEdges(CFGMST<Edge, BBInfo> &MST, GCOVFunction &GF) {
801   size_t ID = 0;
802   for (auto &E : make_pointee_range(MST.AllEdges)) {
803     GCOVBlock &Src = E.SrcBB ? GF.getBlock(E.SrcBB) : GF.getEntryBlock();
804     GCOVBlock &Dst = E.DestBB ? GF.getBlock(E.DestBB) : GF.getReturnBlock();
805     dbgs() << "  Edge " << ID++ << ": " << Src.Number << "->" << Dst.Number
806            << E.infoString() << "\n";
807   }
808 }
809 #endif
810 
811 bool GCOVProfiler::emitProfileNotes(
812     NamedMDNode *CUNode, bool HasExecOrFork,
813     function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
814     function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
815     function_ref<const TargetLibraryInfo &(Function &F)> GetTLI) {
816   int Version;
817   {
818     uint8_t c3 = Options.Version[0];
819     uint8_t c2 = Options.Version[1];
820     uint8_t c1 = Options.Version[2];
821     Version = c3 >= 'A' ? (c3 - 'A') * 100 + (c2 - '0') * 10 + c1 - '0'
822                         : (c3 - '0') * 10 + c1 - '0';
823   }
824 
825   bool EmitGCDA = Options.EmitData;
826   for (unsigned i = 0, e = CUNode->getNumOperands(); i != e; ++i) {
827     // Each compile unit gets its own .gcno file. This means that whether we run
828     // this pass over the original .o's as they're produced, or run it after
829     // LTO, we'll generate the same .gcno files.
830 
831     auto *CU = cast<DICompileUnit>(CUNode->getOperand(i));
832 
833     // Skip module skeleton (and module) CUs.
834     if (CU->getDWOId())
835       continue;
836 
837     std::vector<uint8_t> EdgeDestinations;
838     SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
839 
840     Endian = M->getDataLayout().isLittleEndian() ? support::endianness::little
841                                                  : support::endianness::big;
842     unsigned FunctionIdent = 0;
843     for (auto &F : M->functions()) {
844       DISubprogram *SP = F.getSubprogram();
845       unsigned EndLine;
846       if (!SP) continue;
847       if (!functionHasLines(F, EndLine) || !isFunctionInstrumented(F))
848         continue;
849       // TODO: Functions using scope-based EH are currently not supported.
850       if (isUsingScopeBasedEH(F)) continue;
851 
852       // Add the function line number to the lines of the entry block
853       // to have a counter for the function definition.
854       uint32_t Line = SP->getLine();
855       auto Filename = getFilename(SP);
856 
857       BranchProbabilityInfo *BPI = GetBPI(F);
858       BlockFrequencyInfo *BFI = GetBFI(F);
859 
860       // Split indirectbr critical edges here before computing the MST rather
861       // than later in getInstrBB() to avoid invalidating it.
862       SplitIndirectBrCriticalEdges(F, BPI, BFI);
863 
864       CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry_=*/false, BPI, BFI);
865 
866       // getInstrBB can split basic blocks and push elements to AllEdges.
867       for (size_t I : llvm::seq<size_t>(0, MST.AllEdges.size())) {
868         auto &E = *MST.AllEdges[I];
869         // For now, disable spanning tree optimization when fork or exec* is
870         // used.
871         if (HasExecOrFork)
872           E.InMST = false;
873         E.Place = getInstrBB(MST, E, ExecBlocks);
874       }
875       // Basic blocks in F are finalized at this point.
876       BasicBlock &EntryBlock = F.getEntryBlock();
877       Funcs.push_back(std::make_unique<GCOVFunction>(this, &F, SP, EndLine,
878                                                      FunctionIdent++, Version));
879       GCOVFunction &Func = *Funcs.back();
880 
881       // Some non-tree edges are IndirectBr which cannot be split. Ignore them
882       // as well.
883       llvm::erase_if(MST.AllEdges, [](std::unique_ptr<Edge> &E) {
884         return E->Removed || (!E->InMST && !E->Place);
885       });
886       const size_t Measured =
887           std::stable_partition(
888               MST.AllEdges.begin(), MST.AllEdges.end(),
889               [](std::unique_ptr<Edge> &E) { return E->Place; }) -
890           MST.AllEdges.begin();
891       for (size_t I : llvm::seq<size_t>(0, Measured)) {
892         Edge &E = *MST.AllEdges[I];
893         GCOVBlock &Src =
894             E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
895         GCOVBlock &Dst =
896             E.DestBB ? Func.getBlock(E.DestBB) : Func.getReturnBlock();
897         E.SrcNumber = Src.Number;
898         E.DstNumber = Dst.Number;
899       }
900       std::stable_sort(
901           MST.AllEdges.begin(), MST.AllEdges.begin() + Measured,
902           [](const std::unique_ptr<Edge> &L, const std::unique_ptr<Edge> &R) {
903             return L->SrcNumber != R->SrcNumber ? L->SrcNumber < R->SrcNumber
904                                                 : L->DstNumber < R->DstNumber;
905           });
906 
907       for (const Edge &E : make_pointee_range(MST.AllEdges)) {
908         GCOVBlock &Src =
909             E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
910         GCOVBlock &Dst =
911             E.DestBB ? Func.getBlock(E.DestBB) : Func.getReturnBlock();
912         Src.addEdge(Dst, E.Place ? 0 : uint32_t(GCOV_ARC_ON_TREE));
913       }
914 
915       // Artificial functions such as global initializers
916       if (!SP->isArtificial())
917         Func.getBlock(&EntryBlock).getFile(Filename).addLine(Line);
918 
919       LLVM_DEBUG(dumpEdges(MST, Func));
920 
921       for (auto &GB : Func.Blocks) {
922         const BasicBlock &BB = *GB.first;
923         auto &Block = GB.second;
924         for (auto Succ : Block.OutEdges) {
925           uint32_t Idx = Succ.first->Number;
926           do EdgeDestinations.push_back(Idx & 255);
927           while ((Idx >>= 8) > 0);
928         }
929 
930         for (auto &I : BB) {
931           // Debug intrinsic locations correspond to the location of the
932           // declaration, not necessarily any statements or expressions.
933           if (isa<DbgInfoIntrinsic>(&I)) continue;
934 
935           const DebugLoc &Loc = I.getDebugLoc();
936           if (!Loc)
937             continue;
938 
939           // Artificial lines such as calls to the global constructors.
940           if (Loc.getLine() == 0 || Loc.isImplicitCode())
941             continue;
942 
943           if (Line == Loc.getLine()) continue;
944           Line = Loc.getLine();
945           if (SP != getDISubprogram(Loc.getScope()))
946             continue;
947 
948           GCOVLines &Lines = Block.getFile(Filename);
949           Lines.addLine(Loc.getLine());
950         }
951         Line = 0;
952       }
953       if (EmitGCDA) {
954         DISubprogram *SP = F.getSubprogram();
955         ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(*Ctx), Measured);
956         GlobalVariable *Counters = new GlobalVariable(
957             *M, CounterTy, false, GlobalValue::InternalLinkage,
958             Constant::getNullValue(CounterTy), "__llvm_gcov_ctr");
959         CountersBySP.emplace_back(Counters, SP);
960 
961         for (size_t I : llvm::seq<size_t>(0, Measured)) {
962           const Edge &E = *MST.AllEdges[I];
963           IRBuilder<> Builder(E.Place, E.Place->getFirstInsertionPt());
964           Value *V = Builder.CreateConstInBoundsGEP2_64(
965               Counters->getValueType(), Counters, 0, I);
966           if (Options.Atomic) {
967             Builder.CreateAtomicRMW(AtomicRMWInst::Add, V, Builder.getInt64(1),
968                                     AtomicOrdering::Monotonic);
969           } else {
970             Value *Count =
971                 Builder.CreateLoad(Builder.getInt64Ty(), V, "gcov_ctr");
972             Count = Builder.CreateAdd(Count, Builder.getInt64(1));
973             Builder.CreateStore(Count, V);
974           }
975         }
976       }
977     }
978 
979     char Tmp[4];
980     JamCRC JC;
981     JC.update(EdgeDestinations);
982     uint32_t Stamp = JC.getCRC();
983     FileChecksums.push_back(Stamp);
984 
985     if (Options.EmitNotes) {
986       std::error_code EC;
987       raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC,
988                          sys::fs::OF_None);
989       if (EC) {
990         Ctx->emitError(
991             Twine("failed to open coverage notes file for writing: ") +
992             EC.message());
993         continue;
994       }
995       os = &out;
996       if (Endian == support::endianness::big) {
997         out.write("gcno", 4);
998         out.write(Options.Version, 4);
999       } else {
1000         out.write("oncg", 4);
1001         std::reverse_copy(Options.Version, Options.Version + 4, Tmp);
1002         out.write(Tmp, 4);
1003       }
1004       write(Stamp);
1005       if (Version >= 90)
1006         writeString(""); // unuseful current_working_directory
1007       if (Version >= 80)
1008         write(0); // unuseful has_unexecuted_blocks
1009 
1010       for (auto &Func : Funcs)
1011         Func->writeOut(Stamp);
1012 
1013       write(0);
1014       write(0);
1015       out.close();
1016     }
1017 
1018     if (EmitGCDA) {
1019       emitGlobalConstructor(CountersBySP);
1020       EmitGCDA = false;
1021     }
1022   }
1023   return true;
1024 }
1025 
1026 void GCOVProfiler::emitGlobalConstructor(
1027     SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP) {
1028   Function *WriteoutF = insertCounterWriteout(CountersBySP);
1029   Function *ResetF = insertReset(CountersBySP);
1030 
1031   // Create a small bit of code that registers the "__llvm_gcov_writeout" to
1032   // be executed at exit and the "__llvm_gcov_flush" function to be executed
1033   // when "__gcov_flush" is called.
1034   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1035   Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
1036                                  "__llvm_gcov_init", M);
1037   F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1038   F->setLinkage(GlobalValue::InternalLinkage);
1039   F->addFnAttr(Attribute::NoInline);
1040   if (Options.NoRedZone)
1041     F->addFnAttr(Attribute::NoRedZone);
1042 
1043   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
1044   IRBuilder<> Builder(BB);
1045 
1046   FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1047   auto *PFTy = PointerType::get(FTy, 0);
1048   FTy = FunctionType::get(Builder.getVoidTy(), {PFTy, PFTy}, false);
1049 
1050   // Initialize the environment and register the local writeout, flush and
1051   // reset functions.
1052   FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
1053   Builder.CreateCall(GCOVInit, {WriteoutF, ResetF});
1054   Builder.CreateRetVoid();
1055 
1056   appendToGlobalCtors(*M, F, 0);
1057 }
1058 
1059 FunctionCallee GCOVProfiler::getStartFileFunc(const TargetLibraryInfo *TLI) {
1060   Type *Args[] = {
1061       Type::getInt8PtrTy(*Ctx), // const char *orig_filename
1062       Type::getInt32Ty(*Ctx),   // uint32_t version
1063       Type::getInt32Ty(*Ctx),   // uint32_t checksum
1064   };
1065   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1066   AttributeList AL;
1067   if (auto AK = TLI->getExtAttrForI32Param(false))
1068     AL = AL.addParamAttribute(*Ctx, 2, AK);
1069   FunctionCallee Res = M->getOrInsertFunction("llvm_gcda_start_file", FTy, AL);
1070   return Res;
1071 }
1072 
1073 FunctionCallee GCOVProfiler::getEmitFunctionFunc(const TargetLibraryInfo *TLI) {
1074   Type *Args[] = {
1075     Type::getInt32Ty(*Ctx),    // uint32_t ident
1076     Type::getInt32Ty(*Ctx),    // uint32_t func_checksum
1077     Type::getInt32Ty(*Ctx),    // uint32_t cfg_checksum
1078   };
1079   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1080   AttributeList AL;
1081   if (auto AK = TLI->getExtAttrForI32Param(false)) {
1082     AL = AL.addParamAttribute(*Ctx, 0, AK);
1083     AL = AL.addParamAttribute(*Ctx, 1, AK);
1084     AL = AL.addParamAttribute(*Ctx, 2, AK);
1085   }
1086   return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
1087 }
1088 
1089 FunctionCallee GCOVProfiler::getEmitArcsFunc(const TargetLibraryInfo *TLI) {
1090   Type *Args[] = {
1091     Type::getInt32Ty(*Ctx),     // uint32_t num_counters
1092     Type::getInt64PtrTy(*Ctx),  // uint64_t *counters
1093   };
1094   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1095   AttributeList AL;
1096   if (auto AK = TLI->getExtAttrForI32Param(false))
1097     AL = AL.addParamAttribute(*Ctx, 0, AK);
1098   return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy, AL);
1099 }
1100 
1101 FunctionCallee GCOVProfiler::getSummaryInfoFunc() {
1102   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1103   return M->getOrInsertFunction("llvm_gcda_summary_info", FTy);
1104 }
1105 
1106 FunctionCallee GCOVProfiler::getEndFileFunc() {
1107   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1108   return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
1109 }
1110 
1111 Function *GCOVProfiler::insertCounterWriteout(
1112     ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
1113   FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1114   Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
1115   if (!WriteoutF)
1116     WriteoutF = Function::Create(WriteoutFTy, GlobalValue::InternalLinkage,
1117                                  "__llvm_gcov_writeout", M);
1118   WriteoutF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1119   WriteoutF->addFnAttr(Attribute::NoInline);
1120   if (Options.NoRedZone)
1121     WriteoutF->addFnAttr(Attribute::NoRedZone);
1122 
1123   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
1124   IRBuilder<> Builder(BB);
1125 
1126   auto *TLI = &GetTLI(*WriteoutF);
1127 
1128   FunctionCallee StartFile = getStartFileFunc(TLI);
1129   FunctionCallee EmitFunction = getEmitFunctionFunc(TLI);
1130   FunctionCallee EmitArcs = getEmitArcsFunc(TLI);
1131   FunctionCallee SummaryInfo = getSummaryInfoFunc();
1132   FunctionCallee EndFile = getEndFileFunc();
1133 
1134   NamedMDNode *CUNodes = M->getNamedMetadata("llvm.dbg.cu");
1135   if (!CUNodes) {
1136     Builder.CreateRetVoid();
1137     return WriteoutF;
1138   }
1139 
1140   // Collect the relevant data into a large constant data structure that we can
1141   // walk to write out everything.
1142   StructType *StartFileCallArgsTy = StructType::create(
1143       {Builder.getInt8PtrTy(), Builder.getInt32Ty(), Builder.getInt32Ty()},
1144       "start_file_args_ty");
1145   StructType *EmitFunctionCallArgsTy = StructType::create(
1146       {Builder.getInt32Ty(), Builder.getInt32Ty(), Builder.getInt32Ty()},
1147       "emit_function_args_ty");
1148   StructType *EmitArcsCallArgsTy = StructType::create(
1149       {Builder.getInt32Ty(), Builder.getInt64Ty()->getPointerTo()},
1150       "emit_arcs_args_ty");
1151   StructType *FileInfoTy =
1152       StructType::create({StartFileCallArgsTy, Builder.getInt32Ty(),
1153                           EmitFunctionCallArgsTy->getPointerTo(),
1154                           EmitArcsCallArgsTy->getPointerTo()},
1155                          "file_info");
1156 
1157   Constant *Zero32 = Builder.getInt32(0);
1158   // Build an explicit array of two zeros for use in ConstantExpr GEP building.
1159   Constant *TwoZero32s[] = {Zero32, Zero32};
1160 
1161   SmallVector<Constant *, 8> FileInfos;
1162   for (int i : llvm::seq<int>(0, CUNodes->getNumOperands())) {
1163     auto *CU = cast<DICompileUnit>(CUNodes->getOperand(i));
1164 
1165     // Skip module skeleton (and module) CUs.
1166     if (CU->getDWOId())
1167       continue;
1168 
1169     std::string FilenameGcda = mangleName(CU, GCovFileType::GCDA);
1170     uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i];
1171     auto *StartFileCallArgs = ConstantStruct::get(
1172         StartFileCallArgsTy,
1173         {Builder.CreateGlobalStringPtr(FilenameGcda),
1174          Builder.getInt32(endian::read32be(Options.Version)),
1175          Builder.getInt32(CfgChecksum)});
1176 
1177     SmallVector<Constant *, 8> EmitFunctionCallArgsArray;
1178     SmallVector<Constant *, 8> EmitArcsCallArgsArray;
1179     for (int j : llvm::seq<int>(0, CountersBySP.size())) {
1180       uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum();
1181       EmitFunctionCallArgsArray.push_back(ConstantStruct::get(
1182           EmitFunctionCallArgsTy,
1183           {Builder.getInt32(j),
1184            Builder.getInt32(FuncChecksum),
1185            Builder.getInt32(CfgChecksum)}));
1186 
1187       GlobalVariable *GV = CountersBySP[j].first;
1188       unsigned Arcs = cast<ArrayType>(GV->getValueType())->getNumElements();
1189       EmitArcsCallArgsArray.push_back(ConstantStruct::get(
1190           EmitArcsCallArgsTy,
1191           {Builder.getInt32(Arcs), ConstantExpr::getInBoundsGetElementPtr(
1192                                        GV->getValueType(), GV, TwoZero32s)}));
1193     }
1194     // Create global arrays for the two emit calls.
1195     int CountersSize = CountersBySP.size();
1196     assert(CountersSize == (int)EmitFunctionCallArgsArray.size() &&
1197            "Mismatched array size!");
1198     assert(CountersSize == (int)EmitArcsCallArgsArray.size() &&
1199            "Mismatched array size!");
1200     auto *EmitFunctionCallArgsArrayTy =
1201         ArrayType::get(EmitFunctionCallArgsTy, CountersSize);
1202     auto *EmitFunctionCallArgsArrayGV = new GlobalVariable(
1203         *M, EmitFunctionCallArgsArrayTy, /*isConstant*/ true,
1204         GlobalValue::InternalLinkage,
1205         ConstantArray::get(EmitFunctionCallArgsArrayTy,
1206                            EmitFunctionCallArgsArray),
1207         Twine("__llvm_internal_gcov_emit_function_args.") + Twine(i));
1208     auto *EmitArcsCallArgsArrayTy =
1209         ArrayType::get(EmitArcsCallArgsTy, CountersSize);
1210     EmitFunctionCallArgsArrayGV->setUnnamedAddr(
1211         GlobalValue::UnnamedAddr::Global);
1212     auto *EmitArcsCallArgsArrayGV = new GlobalVariable(
1213         *M, EmitArcsCallArgsArrayTy, /*isConstant*/ true,
1214         GlobalValue::InternalLinkage,
1215         ConstantArray::get(EmitArcsCallArgsArrayTy, EmitArcsCallArgsArray),
1216         Twine("__llvm_internal_gcov_emit_arcs_args.") + Twine(i));
1217     EmitArcsCallArgsArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1218 
1219     FileInfos.push_back(ConstantStruct::get(
1220         FileInfoTy,
1221         {StartFileCallArgs, Builder.getInt32(CountersSize),
1222          ConstantExpr::getInBoundsGetElementPtr(EmitFunctionCallArgsArrayTy,
1223                                                 EmitFunctionCallArgsArrayGV,
1224                                                 TwoZero32s),
1225          ConstantExpr::getInBoundsGetElementPtr(
1226              EmitArcsCallArgsArrayTy, EmitArcsCallArgsArrayGV, TwoZero32s)}));
1227   }
1228 
1229   // If we didn't find anything to actually emit, bail on out.
1230   if (FileInfos.empty()) {
1231     Builder.CreateRetVoid();
1232     return WriteoutF;
1233   }
1234 
1235   // To simplify code, we cap the number of file infos we write out to fit
1236   // easily in a 32-bit signed integer. This gives consistent behavior between
1237   // 32-bit and 64-bit systems without requiring (potentially very slow) 64-bit
1238   // operations on 32-bit systems. It also seems unreasonable to try to handle
1239   // more than 2 billion files.
1240   if ((int64_t)FileInfos.size() > (int64_t)INT_MAX)
1241     FileInfos.resize(INT_MAX);
1242 
1243   // Create a global for the entire data structure so we can walk it more
1244   // easily.
1245   auto *FileInfoArrayTy = ArrayType::get(FileInfoTy, FileInfos.size());
1246   auto *FileInfoArrayGV = new GlobalVariable(
1247       *M, FileInfoArrayTy, /*isConstant*/ true, GlobalValue::InternalLinkage,
1248       ConstantArray::get(FileInfoArrayTy, FileInfos),
1249       "__llvm_internal_gcov_emit_file_info");
1250   FileInfoArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1251 
1252   // Create the CFG for walking this data structure.
1253   auto *FileLoopHeader =
1254       BasicBlock::Create(*Ctx, "file.loop.header", WriteoutF);
1255   auto *CounterLoopHeader =
1256       BasicBlock::Create(*Ctx, "counter.loop.header", WriteoutF);
1257   auto *FileLoopLatch = BasicBlock::Create(*Ctx, "file.loop.latch", WriteoutF);
1258   auto *ExitBB = BasicBlock::Create(*Ctx, "exit", WriteoutF);
1259 
1260   // We always have at least one file, so just branch to the header.
1261   Builder.CreateBr(FileLoopHeader);
1262 
1263   // The index into the files structure is our loop induction variable.
1264   Builder.SetInsertPoint(FileLoopHeader);
1265   PHINode *IV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2,
1266                                   "file_idx");
1267   IV->addIncoming(Builder.getInt32(0), BB);
1268   auto *FileInfoPtr = Builder.CreateInBoundsGEP(
1269       FileInfoArrayTy, FileInfoArrayGV, {Builder.getInt32(0), IV});
1270   auto *StartFileCallArgsPtr =
1271       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 0, "start_file_args");
1272   auto *StartFileCall = Builder.CreateCall(
1273       StartFile,
1274       {Builder.CreateLoad(StartFileCallArgsTy->getElementType(0),
1275                           Builder.CreateStructGEP(StartFileCallArgsTy,
1276                                                   StartFileCallArgsPtr, 0),
1277                           "filename"),
1278        Builder.CreateLoad(StartFileCallArgsTy->getElementType(1),
1279                           Builder.CreateStructGEP(StartFileCallArgsTy,
1280                                                   StartFileCallArgsPtr, 1),
1281                           "version"),
1282        Builder.CreateLoad(StartFileCallArgsTy->getElementType(2),
1283                           Builder.CreateStructGEP(StartFileCallArgsTy,
1284                                                   StartFileCallArgsPtr, 2),
1285                           "stamp")});
1286   if (auto AK = TLI->getExtAttrForI32Param(false))
1287     StartFileCall->addParamAttr(2, AK);
1288   auto *NumCounters = Builder.CreateLoad(
1289       FileInfoTy->getElementType(1),
1290       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 1), "num_ctrs");
1291   auto *EmitFunctionCallArgsArray =
1292       Builder.CreateLoad(FileInfoTy->getElementType(2),
1293                          Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 2),
1294                          "emit_function_args");
1295   auto *EmitArcsCallArgsArray = Builder.CreateLoad(
1296       FileInfoTy->getElementType(3),
1297       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 3), "emit_arcs_args");
1298   auto *EnterCounterLoopCond =
1299       Builder.CreateICmpSLT(Builder.getInt32(0), NumCounters);
1300   Builder.CreateCondBr(EnterCounterLoopCond, CounterLoopHeader, FileLoopLatch);
1301 
1302   Builder.SetInsertPoint(CounterLoopHeader);
1303   auto *JV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2,
1304                                "ctr_idx");
1305   JV->addIncoming(Builder.getInt32(0), FileLoopHeader);
1306   auto *EmitFunctionCallArgsPtr = Builder.CreateInBoundsGEP(
1307       EmitFunctionCallArgsTy, EmitFunctionCallArgsArray, JV);
1308   auto *EmitFunctionCall = Builder.CreateCall(
1309       EmitFunction,
1310       {Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(0),
1311                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1312                                                   EmitFunctionCallArgsPtr, 0),
1313                           "ident"),
1314        Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(1),
1315                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1316                                                   EmitFunctionCallArgsPtr, 1),
1317                           "func_checkssum"),
1318        Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(2),
1319                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1320                                                   EmitFunctionCallArgsPtr, 2),
1321                           "cfg_checksum")});
1322   if (auto AK = TLI->getExtAttrForI32Param(false)) {
1323     EmitFunctionCall->addParamAttr(0, AK);
1324     EmitFunctionCall->addParamAttr(1, AK);
1325     EmitFunctionCall->addParamAttr(2, AK);
1326   }
1327   auto *EmitArcsCallArgsPtr =
1328       Builder.CreateInBoundsGEP(EmitArcsCallArgsTy, EmitArcsCallArgsArray, JV);
1329   auto *EmitArcsCall = Builder.CreateCall(
1330       EmitArcs,
1331       {Builder.CreateLoad(
1332            EmitArcsCallArgsTy->getElementType(0),
1333            Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 0),
1334            "num_counters"),
1335        Builder.CreateLoad(
1336            EmitArcsCallArgsTy->getElementType(1),
1337            Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 1),
1338            "counters")});
1339   if (auto AK = TLI->getExtAttrForI32Param(false))
1340     EmitArcsCall->addParamAttr(0, AK);
1341   auto *NextJV = Builder.CreateAdd(JV, Builder.getInt32(1));
1342   auto *CounterLoopCond = Builder.CreateICmpSLT(NextJV, NumCounters);
1343   Builder.CreateCondBr(CounterLoopCond, CounterLoopHeader, FileLoopLatch);
1344   JV->addIncoming(NextJV, CounterLoopHeader);
1345 
1346   Builder.SetInsertPoint(FileLoopLatch);
1347   Builder.CreateCall(SummaryInfo, {});
1348   Builder.CreateCall(EndFile, {});
1349   auto *NextIV = Builder.CreateAdd(IV, Builder.getInt32(1), "next_file_idx");
1350   auto *FileLoopCond =
1351       Builder.CreateICmpSLT(NextIV, Builder.getInt32(FileInfos.size()));
1352   Builder.CreateCondBr(FileLoopCond, FileLoopHeader, ExitBB);
1353   IV->addIncoming(NextIV, FileLoopLatch);
1354 
1355   Builder.SetInsertPoint(ExitBB);
1356   Builder.CreateRetVoid();
1357 
1358   return WriteoutF;
1359 }
1360 
1361 Function *GCOVProfiler::insertReset(
1362     ArrayRef<std::pair<GlobalVariable *, MDNode *>> CountersBySP) {
1363   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1364   Function *ResetF = M->getFunction("__llvm_gcov_reset");
1365   if (!ResetF)
1366     ResetF = Function::Create(FTy, GlobalValue::InternalLinkage,
1367                               "__llvm_gcov_reset", M);
1368   ResetF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1369   ResetF->addFnAttr(Attribute::NoInline);
1370   if (Options.NoRedZone)
1371     ResetF->addFnAttr(Attribute::NoRedZone);
1372 
1373   BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", ResetF);
1374   IRBuilder<> Builder(Entry);
1375 
1376   // Zero out the counters.
1377   for (const auto &I : CountersBySP) {
1378     GlobalVariable *GV = I.first;
1379     Constant *Null = Constant::getNullValue(GV->getValueType());
1380     Builder.CreateStore(Null, GV);
1381   }
1382 
1383   Type *RetTy = ResetF->getReturnType();
1384   if (RetTy->isVoidTy())
1385     Builder.CreateRetVoid();
1386   else if (RetTy->isIntegerTy())
1387     // Used if __llvm_gcov_reset was implicitly declared.
1388     Builder.CreateRet(ConstantInt::get(RetTy, 0));
1389   else
1390     report_fatal_error("invalid return type for __llvm_gcov_reset");
1391 
1392   return ResetF;
1393 }
1394