xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===-- BasicBlockSectionsProfileReader.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the basic block sections profile reader pass. It parses
10 // and stores the basic block sections profile file (which is specified via the
11 // `-basic-block-sections` flag).
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/SmallSet.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringMap.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/IR/DebugInfoMetadata.h"
23 #include "llvm/Pass.h"
24 #include "llvm/Support/Error.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include "llvm/Support/LineIterator.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/Path.h"
29 #include "llvm/Support/UniqueBBID.h"
30 #include <llvm/ADT/STLExtras.h>
31 
32 using namespace llvm;
33 
34 char BasicBlockSectionsProfileReaderWrapperPass::ID = 0;
35 INITIALIZE_PASS(BasicBlockSectionsProfileReaderWrapperPass,
36                 "bbsections-profile-reader",
37                 "Reads and parses a basic block sections profile.", false,
38                 false)
39 
40 Expected<UniqueBBID>
41 BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const {
42   SmallVector<StringRef, 2> Parts;
43   S.split(Parts, '.');
44   if (Parts.size() > 2)
45     return createProfileParseError(Twine("unable to parse basic block id: '") +
46                                    S + "'");
47   unsigned long long BaseBBID;
48   if (getAsUnsignedInteger(Parts[0], 10, BaseBBID))
49     return createProfileParseError(
50         Twine("unable to parse BB id: '" + Parts[0]) +
51         "': unsigned integer expected");
52   unsigned long long CloneID = 0;
53   if (Parts.size() > 1 && getAsUnsignedInteger(Parts[1], 10, CloneID))
54     return createProfileParseError(Twine("unable to parse clone id: '") +
55                                    Parts[1] + "': unsigned integer expected");
56   return UniqueBBID{static_cast<unsigned>(BaseBBID),
57                     static_cast<unsigned>(CloneID)};
58 }
59 
60 bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const {
61   return getClusterInfoForFunction(FuncName).first;
62 }
63 
64 std::pair<bool, SmallVector<BBClusterInfo>>
65 BasicBlockSectionsProfileReader::getClusterInfoForFunction(
66     StringRef FuncName) const {
67   auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName));
68   return R != ProgramPathAndClusterInfo.end()
69              ? std::pair(true, R->second.ClusterInfo)
70              : std::pair(false, SmallVector<BBClusterInfo>());
71 }
72 
73 SmallVector<SmallVector<unsigned>>
74 BasicBlockSectionsProfileReader::getClonePathsForFunction(
75     StringRef FuncName) const {
76   return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).ClonePaths;
77 }
78 
79 // Reads the version 1 basic block sections profile. Profile for each function
80 // is encoded as follows:
81 //   m <module_name>
82 //   f <function_name_1> <function_name_2> ...
83 //   c <bb_id_1> <bb_id_2> <bb_id_3>
84 //   c <bb_id_4> <bb_id_5>
85 //   ...
86 // Module name specifier (starting with 'm') is optional and allows
87 // distinguishing profile for internal-linkage functions with the same name. If
88 // not specified, it will apply to any function with the same name. Function
89 // name specifier (starting with 'f') can specify multiple function name
90 // aliases. Basic block clusters are specified by 'c' and specify the cluster of
91 // basic blocks, and the internal order in which they must be placed in the same
92 // section.
93 // This profile can also specify cloning paths which instruct the compiler to
94 // clone basic blocks along a path. The cloned blocks are then specified in the
95 // cluster information.
96 // The following profile lists two cloning paths (starting with 'p') for
97 // function bar and places the total 9 blocks within two clusters. The first two
98 // blocks of a cloning path specify the edge along which the path is cloned. For
99 // instance, path 1 (1 -> 3 -> 4) instructs that 3 and 4 must be cloned along
100 // the edge 1->3. Within the given clusters, each cloned block is identified by
101 // "<original block id>.<clone id>". For instance, 3.1 represents the first
102 // clone of block 3. Original blocks are specified just with their block ids. A
103 // block cloned multiple times appears with distinct clone ids. The CFG for bar
104 // is shown below before and after cloning with its final clusters labeled.
105 //
106 // f main
107 // f bar
108 // p 1 3 4           # cloning path 1
109 // p 4 2             # cloning path 2
110 // c 1 3.1 4.1 6     # basic block cluster 1
111 // c 0 2 3 4 2.1 5   # basic block cluster 2
112 // ****************************************************************************
113 // function bar before and after cloning with basic block clusters shown.
114 // ****************************************************************************
115 //                                ....      ..............
116 //      0 -------+                : 0 :---->: 1 ---> 3.1 :
117 //      |        |                : | :     :........ |  :
118 //      v        v                : v :             : v  :
119 // +--> 2 --> 5  1   ~~~~~~>  +---: 2 :             : 4.1: clsuter 1
120 // |    |        |            |   : | :             : |  :
121 // |    v        |            |   : v .......       : v  :
122 // |    3 <------+            |   : 3 <--+  :       : 6  :
123 // |    |                     |   : |    |  :       :....:
124 // |    v                     |   : v    |  :
125 // +--- 4 ---> 6              |   : 4    |  :
126 //                            |   : |    |  :
127 //                            |   : v    |  :
128 //                            |   :2.1---+  : cluster 2
129 //                            |   : | ......:
130 //                            |   : v :
131 //                            +-->: 5 :
132 //                                ....
133 // ****************************************************************************
134 Error BasicBlockSectionsProfileReader::ReadV1Profile() {
135   auto FI = ProgramPathAndClusterInfo.end();
136 
137   // Current cluster ID corresponding to this function.
138   unsigned CurrentCluster = 0;
139   // Current position in the current cluster.
140   unsigned CurrentPosition = 0;
141 
142   // Temporary set to ensure every basic block ID appears once in the clusters
143   // of a function.
144   DenseSet<UniqueBBID> FuncBBIDs;
145 
146   // Debug-info-based module filename for the current function. Empty string
147   // means no filename.
148   StringRef DIFilename;
149 
150   for (; !LineIt.is_at_eof(); ++LineIt) {
151     StringRef S(*LineIt);
152     char Specifier = S[0];
153     S = S.drop_front().trim();
154     SmallVector<StringRef, 4> Values;
155     S.split(Values, ' ');
156     switch (Specifier) {
157     case '@':
158       continue;
159     case 'm': // Module name speicifer.
160       if (Values.size() != 1) {
161         return createProfileParseError(Twine("invalid module name value: '") +
162                                        S + "'");
163       }
164       DIFilename = sys::path::remove_leading_dotslash(Values[0]);
165       continue;
166     case 'f': { // Function names specifier.
167       bool FunctionFound = any_of(Values, [&](StringRef Alias) {
168         auto It = FunctionNameToDIFilename.find(Alias);
169         // No match if this function name is not found in this module.
170         if (It == FunctionNameToDIFilename.end())
171           return false;
172         // Return a match if debug-info-filename is not specified. Otherwise,
173         // check for equality.
174         return DIFilename.empty() || It->second == DIFilename;
175       });
176       if (!FunctionFound) {
177         // Skip the following profile by setting the profile iterator (FI) to
178         // the past-the-end element.
179         FI = ProgramPathAndClusterInfo.end();
180         DIFilename = "";
181         continue;
182       }
183       for (size_t i = 1; i < Values.size(); ++i)
184         FuncAliasMap.try_emplace(Values[i], Values.front());
185 
186       // Prepare for parsing clusters of this function name.
187       // Start a new cluster map for this function name.
188       auto R = ProgramPathAndClusterInfo.try_emplace(Values.front());
189       // Report error when multiple profiles have been specified for the same
190       // function.
191       if (!R.second)
192         return createProfileParseError("duplicate profile for function '" +
193                                        Values.front() + "'");
194       FI = R.first;
195       CurrentCluster = 0;
196       FuncBBIDs.clear();
197       // We won't need DIFilename anymore. Clean it up to avoid its application
198       // on the next function.
199       DIFilename = "";
200       continue;
201     }
202     case 'c': // Basic block cluster specifier.
203       // Skip the profile when we the profile iterator (FI) refers to the
204       // past-the-end element.
205       if (FI == ProgramPathAndClusterInfo.end())
206         continue;
207       // Reset current cluster position.
208       CurrentPosition = 0;
209       for (auto BasicBlockIDStr : Values) {
210         auto BasicBlockID = parseUniqueBBID(BasicBlockIDStr);
211         if (!BasicBlockID)
212           return BasicBlockID.takeError();
213         if (!FuncBBIDs.insert(*BasicBlockID).second)
214           return createProfileParseError(
215               Twine("duplicate basic block id found '") + BasicBlockIDStr +
216               "'");
217 
218         FI->second.ClusterInfo.emplace_back(BBClusterInfo{
219             *std::move(BasicBlockID), CurrentCluster, CurrentPosition++});
220       }
221       CurrentCluster++;
222       continue;
223     case 'p': { // Basic block cloning path specifier.
224       // Skip the profile when we the profile iterator (FI) refers to the
225       // past-the-end element.
226       if (FI == ProgramPathAndClusterInfo.end())
227         continue;
228       SmallSet<unsigned, 5> BBsInPath;
229       FI->second.ClonePaths.push_back({});
230       for (size_t I = 0; I < Values.size(); ++I) {
231         auto BaseBBIDStr = Values[I];
232         unsigned long long BaseBBID = 0;
233         if (getAsUnsignedInteger(BaseBBIDStr, 10, BaseBBID))
234           return createProfileParseError(Twine("unsigned integer expected: '") +
235                                          BaseBBIDStr + "'");
236         if (I != 0 && !BBsInPath.insert(BaseBBID).second)
237           return createProfileParseError(
238               Twine("duplicate cloned block in path: '") + BaseBBIDStr + "'");
239         FI->second.ClonePaths.back().push_back(BaseBBID);
240       }
241       continue;
242     }
243     default:
244       return createProfileParseError(Twine("invalid specifier: '") +
245                                      Twine(Specifier) + "'");
246     }
247     llvm_unreachable("should not break from this switch statement");
248   }
249   return Error::success();
250 }
251 
252 Error BasicBlockSectionsProfileReader::ReadV0Profile() {
253   auto FI = ProgramPathAndClusterInfo.end();
254   // Current cluster ID corresponding to this function.
255   unsigned CurrentCluster = 0;
256   // Current position in the current cluster.
257   unsigned CurrentPosition = 0;
258 
259   // Temporary set to ensure every basic block ID appears once in the clusters
260   // of a function.
261   SmallSet<unsigned, 4> FuncBBIDs;
262 
263   for (; !LineIt.is_at_eof(); ++LineIt) {
264     StringRef S(*LineIt);
265     if (S[0] == '@')
266       continue;
267     // Check for the leading "!"
268     if (!S.consume_front("!") || S.empty())
269       break;
270     // Check for second "!" which indicates a cluster of basic blocks.
271     if (S.consume_front("!")) {
272       // Skip the profile when we the profile iterator (FI) refers to the
273       // past-the-end element.
274       if (FI == ProgramPathAndClusterInfo.end())
275         continue;
276       SmallVector<StringRef, 4> BBIDs;
277       S.split(BBIDs, ' ');
278       // Reset current cluster position.
279       CurrentPosition = 0;
280       for (auto BBIDStr : BBIDs) {
281         unsigned long long BBID;
282         if (getAsUnsignedInteger(BBIDStr, 10, BBID))
283           return createProfileParseError(Twine("unsigned integer expected: '") +
284                                          BBIDStr + "'");
285         if (!FuncBBIDs.insert(BBID).second)
286           return createProfileParseError(
287               Twine("duplicate basic block id found '") + BBIDStr + "'");
288 
289         FI->second.ClusterInfo.emplace_back(
290             BBClusterInfo({{static_cast<unsigned>(BBID), 0},
291                            CurrentCluster,
292                            CurrentPosition++}));
293       }
294       CurrentCluster++;
295     } else {
296       // This is a function name specifier. It may include a debug info filename
297       // specifier starting with `M=`.
298       auto [AliasesStr, DIFilenameStr] = S.split(' ');
299       SmallString<128> DIFilename;
300       if (DIFilenameStr.starts_with("M=")) {
301         DIFilename =
302             sys::path::remove_leading_dotslash(DIFilenameStr.substr(2));
303         if (DIFilename.empty())
304           return createProfileParseError("empty module name specifier");
305       } else if (!DIFilenameStr.empty()) {
306         return createProfileParseError("unknown string found: '" +
307                                        DIFilenameStr + "'");
308       }
309       // Function aliases are separated using '/'. We use the first function
310       // name for the cluster info mapping and delegate all other aliases to
311       // this one.
312       SmallVector<StringRef, 4> Aliases;
313       AliasesStr.split(Aliases, '/');
314       bool FunctionFound = any_of(Aliases, [&](StringRef Alias) {
315         auto It = FunctionNameToDIFilename.find(Alias);
316         // No match if this function name is not found in this module.
317         if (It == FunctionNameToDIFilename.end())
318           return false;
319         // Return a match if debug-info-filename is not specified. Otherwise,
320         // check for equality.
321         return DIFilename.empty() || It->second == DIFilename;
322       });
323       if (!FunctionFound) {
324         // Skip the following profile by setting the profile iterator (FI) to
325         // the past-the-end element.
326         FI = ProgramPathAndClusterInfo.end();
327         continue;
328       }
329       for (size_t i = 1; i < Aliases.size(); ++i)
330         FuncAliasMap.try_emplace(Aliases[i], Aliases.front());
331 
332       // Prepare for parsing clusters of this function name.
333       // Start a new cluster map for this function name.
334       auto R = ProgramPathAndClusterInfo.try_emplace(Aliases.front());
335       // Report error when multiple profiles have been specified for the same
336       // function.
337       if (!R.second)
338         return createProfileParseError("duplicate profile for function '" +
339                                        Aliases.front() + "'");
340       FI = R.first;
341       CurrentCluster = 0;
342       FuncBBIDs.clear();
343     }
344   }
345   return Error::success();
346 }
347 
348 // Basic Block Sections can be enabled for a subset of machine basic blocks.
349 // This is done by passing a file containing names of functions for which basic
350 // block sections are desired. Additionally, machine basic block ids of the
351 // functions can also be specified for a finer granularity. Moreover, a cluster
352 // of basic blocks could be assigned to the same section.
353 // Optionally, a debug-info filename can be specified for each function to allow
354 // distinguishing internal-linkage functions of the same name.
355 // A file with basic block sections for all of function main and three blocks
356 // for function foo (of which 1 and 2 are placed in a cluster) looks like this:
357 // (Profile for function foo is only loaded when its debug-info filename
358 // matches 'path/to/foo_file.cc').
359 // ----------------------------
360 // list.txt:
361 // !main
362 // !foo M=path/to/foo_file.cc
363 // !!1 2
364 // !!4
365 Error BasicBlockSectionsProfileReader::ReadProfile() {
366   assert(MBuf);
367 
368   unsigned long long Version = 0;
369   StringRef FirstLine(*LineIt);
370   if (FirstLine.consume_front("v")) {
371     if (getAsUnsignedInteger(FirstLine, 10, Version)) {
372       return createProfileParseError(Twine("version number expected: '") +
373                                      FirstLine + "'");
374     }
375     if (Version > 1) {
376       return createProfileParseError(Twine("invalid profile version: ") +
377                                      Twine(Version));
378     }
379     ++LineIt;
380   }
381 
382   switch (Version) {
383   case 0:
384     // TODO: Deprecate V0 once V1 is fully integrated downstream.
385     return ReadV0Profile();
386   case 1:
387     return ReadV1Profile();
388   default:
389     llvm_unreachable("Invalid profile version.");
390   }
391 }
392 
393 bool BasicBlockSectionsProfileReaderWrapperPass::doInitialization(Module &M) {
394   if (!BBSPR.MBuf)
395     return false;
396   // Get the function name to debug info filename mapping.
397   BBSPR.FunctionNameToDIFilename.clear();
398   for (const Function &F : M) {
399     SmallString<128> DIFilename;
400     if (F.isDeclaration())
401       continue;
402     DISubprogram *Subprogram = F.getSubprogram();
403     if (Subprogram) {
404       llvm::DICompileUnit *CU = Subprogram->getUnit();
405       if (CU)
406         DIFilename = sys::path::remove_leading_dotslash(CU->getFilename());
407     }
408     [[maybe_unused]] bool inserted =
409         BBSPR.FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename)
410             .second;
411     assert(inserted);
412   }
413   if (auto Err = BBSPR.ReadProfile())
414     report_fatal_error(std::move(Err));
415   return false;
416 }
417 
418 AnalysisKey BasicBlockSectionsProfileReaderAnalysis::Key;
419 
420 BasicBlockSectionsProfileReader
421 BasicBlockSectionsProfileReaderAnalysis::run(Function &F,
422                                              FunctionAnalysisManager &AM) {
423   return BasicBlockSectionsProfileReader(TM->getBBSectionsFuncListBuf());
424 }
425 
426 bool BasicBlockSectionsProfileReaderWrapperPass::isFunctionHot(
427     StringRef FuncName) const {
428   return BBSPR.isFunctionHot(FuncName);
429 }
430 
431 std::pair<bool, SmallVector<BBClusterInfo>>
432 BasicBlockSectionsProfileReaderWrapperPass::getClusterInfoForFunction(
433     StringRef FuncName) const {
434   return BBSPR.getClusterInfoForFunction(FuncName);
435 }
436 
437 SmallVector<SmallVector<unsigned>>
438 BasicBlockSectionsProfileReaderWrapperPass::getClonePathsForFunction(
439     StringRef FuncName) const {
440   return BBSPR.getClonePathsForFunction(FuncName);
441 }
442 
443 BasicBlockSectionsProfileReader &
444 BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() {
445   return BBSPR;
446 }
447 
448 ImmutablePass *llvm::createBasicBlockSectionsProfileReaderWrapperPass(
449     const MemoryBuffer *Buf) {
450   return new BasicBlockSectionsProfileReaderWrapperPass(Buf);
451 }
452