xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1 //===-- BasicBlockSectionsProfileReader.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the basic block sections profile reader pass. It parses
10 // and stores the basic block sections profile file (which is specified via the
11 // `-basic-block-sections` flag).
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/SmallSet.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringMap.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/IR/DebugInfoMetadata.h"
23 #include "llvm/Pass.h"
24 #include "llvm/Support/Error.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include "llvm/Support/LineIterator.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/Path.h"
29 #include <llvm/ADT/STLExtras.h>
30 
31 using namespace llvm;
32 
33 char BasicBlockSectionsProfileReaderWrapperPass::ID = 0;
34 INITIALIZE_PASS(BasicBlockSectionsProfileReaderWrapperPass,
35                 "bbsections-profile-reader",
36                 "Reads and parses a basic block sections profile.", false,
37                 false)
38 
39 Expected<UniqueBBID>
40 BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const {
41   SmallVector<StringRef, 2> Parts;
42   S.split(Parts, '.');
43   if (Parts.size() > 2)
44     return createProfileParseError(Twine("unable to parse basic block id: '") +
45                                    S + "'");
46   unsigned long long BaseBBID;
47   if (getAsUnsignedInteger(Parts[0], 10, BaseBBID))
48     return createProfileParseError(
49         Twine("unable to parse BB id: '" + Parts[0]) +
50         "': unsigned integer expected");
51   unsigned long long CloneID = 0;
52   if (Parts.size() > 1 && getAsUnsignedInteger(Parts[1], 10, CloneID))
53     return createProfileParseError(Twine("unable to parse clone id: '") +
54                                    Parts[1] + "': unsigned integer expected");
55   return UniqueBBID{static_cast<unsigned>(BaseBBID),
56                     static_cast<unsigned>(CloneID)};
57 }
58 
59 bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const {
60   return getClusterInfoForFunction(FuncName).first;
61 }
62 
63 std::pair<bool, SmallVector<BBClusterInfo>>
64 BasicBlockSectionsProfileReader::getClusterInfoForFunction(
65     StringRef FuncName) const {
66   auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName));
67   return R != ProgramPathAndClusterInfo.end()
68              ? std::pair(true, R->second.ClusterInfo)
69              : std::pair(false, SmallVector<BBClusterInfo>());
70 }
71 
72 SmallVector<SmallVector<unsigned>>
73 BasicBlockSectionsProfileReader::getClonePathsForFunction(
74     StringRef FuncName) const {
75   return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).ClonePaths;
76 }
77 
78 // Reads the version 1 basic block sections profile. Profile for each function
79 // is encoded as follows:
80 //   m <module_name>
81 //   f <function_name_1> <function_name_2> ...
82 //   c <bb_id_1> <bb_id_2> <bb_id_3>
83 //   c <bb_id_4> <bb_id_5>
84 //   ...
85 // Module name specifier (starting with 'm') is optional and allows
86 // distinguishing profile for internal-linkage functions with the same name. If
87 // not specified, it will apply to any function with the same name. Function
88 // name specifier (starting with 'f') can specify multiple function name
89 // aliases. Basic block clusters are specified by 'c' and specify the cluster of
90 // basic blocks, and the internal order in which they must be placed in the same
91 // section.
92 // This profile can also specify cloning paths which instruct the compiler to
93 // clone basic blocks along a path. The cloned blocks are then specified in the
94 // cluster information.
95 // The following profile lists two cloning paths (starting with 'p') for
96 // function bar and places the total 9 blocks within two clusters. The first two
97 // blocks of a cloning path specify the edge along which the path is cloned. For
98 // instance, path 1 (1 -> 3 -> 4) instructs that 3 and 4 must be cloned along
99 // the edge 1->3. Within the given clusters, each cloned block is identified by
100 // "<original block id>.<clone id>". For instance, 3.1 represents the first
101 // clone of block 3. Original blocks are specified just with their block ids. A
102 // block cloned multiple times appears with distinct clone ids. The CFG for bar
103 // is shown below before and after cloning with its final clusters labeled.
104 //
105 // f main
106 // f bar
107 // p 1 3 4           # cloning path 1
108 // p 4 2             # cloning path 2
109 // c 1 3.1 4.1 6     # basic block cluster 1
110 // c 0 2 3 4 2.1 5   # basic block cluster 2
111 // ****************************************************************************
112 // function bar before and after cloning with basic block clusters shown.
113 // ****************************************************************************
114 //                                ....      ..............
115 //      0 -------+                : 0 :---->: 1 ---> 3.1 :
116 //      |        |                : | :     :........ |  :
117 //      v        v                : v :             : v  :
118 // +--> 2 --> 5  1   ~~~~~~>  +---: 2 :             : 4.1: clsuter 1
119 // |    |        |            |   : | :             : |  :
120 // |    v        |            |   : v .......       : v  :
121 // |    3 <------+            |   : 3 <--+  :       : 6  :
122 // |    |                     |   : |    |  :       :....:
123 // |    v                     |   : v    |  :
124 // +--- 4 ---> 6              |   : 4    |  :
125 //                            |   : |    |  :
126 //                            |   : v    |  :
127 //                            |   :2.1---+  : cluster 2
128 //                            |   : | ......:
129 //                            |   : v :
130 //                            +-->: 5 :
131 //                                ....
132 // ****************************************************************************
133 Error BasicBlockSectionsProfileReader::ReadV1Profile() {
134   auto FI = ProgramPathAndClusterInfo.end();
135 
136   // Current cluster ID corresponding to this function.
137   unsigned CurrentCluster = 0;
138   // Current position in the current cluster.
139   unsigned CurrentPosition = 0;
140 
141   // Temporary set to ensure every basic block ID appears once in the clusters
142   // of a function.
143   DenseSet<UniqueBBID> FuncBBIDs;
144 
145   // Debug-info-based module filename for the current function. Empty string
146   // means no filename.
147   StringRef DIFilename;
148 
149   for (; !LineIt.is_at_eof(); ++LineIt) {
150     StringRef S(*LineIt);
151     char Specifier = S[0];
152     S = S.drop_front().trim();
153     SmallVector<StringRef, 4> Values;
154     S.split(Values, ' ');
155     switch (Specifier) {
156     case '@':
157       continue;
158     case 'm': // Module name speicifer.
159       if (Values.size() != 1) {
160         return createProfileParseError(Twine("invalid module name value: '") +
161                                        S + "'");
162       }
163       DIFilename = sys::path::remove_leading_dotslash(Values[0]);
164       continue;
165     case 'f': { // Function names specifier.
166       bool FunctionFound = any_of(Values, [&](StringRef Alias) {
167         auto It = FunctionNameToDIFilename.find(Alias);
168         // No match if this function name is not found in this module.
169         if (It == FunctionNameToDIFilename.end())
170           return false;
171         // Return a match if debug-info-filename is not specified. Otherwise,
172         // check for equality.
173         return DIFilename.empty() || It->second.equals(DIFilename);
174       });
175       if (!FunctionFound) {
176         // Skip the following profile by setting the profile iterator (FI) to
177         // the past-the-end element.
178         FI = ProgramPathAndClusterInfo.end();
179         DIFilename = "";
180         continue;
181       }
182       for (size_t i = 1; i < Values.size(); ++i)
183         FuncAliasMap.try_emplace(Values[i], Values.front());
184 
185       // Prepare for parsing clusters of this function name.
186       // Start a new cluster map for this function name.
187       auto R = ProgramPathAndClusterInfo.try_emplace(Values.front());
188       // Report error when multiple profiles have been specified for the same
189       // function.
190       if (!R.second)
191         return createProfileParseError("duplicate profile for function '" +
192                                        Values.front() + "'");
193       FI = R.first;
194       CurrentCluster = 0;
195       FuncBBIDs.clear();
196       // We won't need DIFilename anymore. Clean it up to avoid its application
197       // on the next function.
198       DIFilename = "";
199       continue;
200     }
201     case 'c': // Basic block cluster specifier.
202       // Skip the profile when we the profile iterator (FI) refers to the
203       // past-the-end element.
204       if (FI == ProgramPathAndClusterInfo.end())
205         continue;
206       // Reset current cluster position.
207       CurrentPosition = 0;
208       for (auto BasicBlockIDStr : Values) {
209         auto BasicBlockID = parseUniqueBBID(BasicBlockIDStr);
210         if (!BasicBlockID)
211           return BasicBlockID.takeError();
212         if (!FuncBBIDs.insert(*BasicBlockID).second)
213           return createProfileParseError(
214               Twine("duplicate basic block id found '") + BasicBlockIDStr +
215               "'");
216 
217         if (!BasicBlockID->BaseID && CurrentPosition)
218           return createProfileParseError(
219               "entry BB (0) does not begin a cluster.");
220 
221         FI->second.ClusterInfo.emplace_back(BBClusterInfo{
222             *std::move(BasicBlockID), CurrentCluster, CurrentPosition++});
223       }
224       CurrentCluster++;
225       continue;
226     case 'p': { // Basic block cloning path specifier.
227       // Skip the profile when we the profile iterator (FI) refers to the
228       // past-the-end element.
229       if (FI == ProgramPathAndClusterInfo.end())
230         continue;
231       SmallSet<unsigned, 5> BBsInPath;
232       FI->second.ClonePaths.push_back({});
233       for (size_t I = 0; I < Values.size(); ++I) {
234         auto BaseBBIDStr = Values[I];
235         unsigned long long BaseBBID = 0;
236         if (getAsUnsignedInteger(BaseBBIDStr, 10, BaseBBID))
237           return createProfileParseError(Twine("unsigned integer expected: '") +
238                                          BaseBBIDStr + "'");
239         if (I != 0 && !BBsInPath.insert(BaseBBID).second)
240           return createProfileParseError(
241               Twine("duplicate cloned block in path: '") + BaseBBIDStr + "'");
242         FI->second.ClonePaths.back().push_back(BaseBBID);
243       }
244       continue;
245     }
246     default:
247       return createProfileParseError(Twine("invalid specifier: '") +
248                                      Twine(Specifier) + "'");
249     }
250     llvm_unreachable("should not break from this switch statement");
251   }
252   return Error::success();
253 }
254 
255 Error BasicBlockSectionsProfileReader::ReadV0Profile() {
256   auto FI = ProgramPathAndClusterInfo.end();
257   // Current cluster ID corresponding to this function.
258   unsigned CurrentCluster = 0;
259   // Current position in the current cluster.
260   unsigned CurrentPosition = 0;
261 
262   // Temporary set to ensure every basic block ID appears once in the clusters
263   // of a function.
264   SmallSet<unsigned, 4> FuncBBIDs;
265 
266   for (; !LineIt.is_at_eof(); ++LineIt) {
267     StringRef S(*LineIt);
268     if (S[0] == '@')
269       continue;
270     // Check for the leading "!"
271     if (!S.consume_front("!") || S.empty())
272       break;
273     // Check for second "!" which indicates a cluster of basic blocks.
274     if (S.consume_front("!")) {
275       // Skip the profile when we the profile iterator (FI) refers to the
276       // past-the-end element.
277       if (FI == ProgramPathAndClusterInfo.end())
278         continue;
279       SmallVector<StringRef, 4> BBIDs;
280       S.split(BBIDs, ' ');
281       // Reset current cluster position.
282       CurrentPosition = 0;
283       for (auto BBIDStr : BBIDs) {
284         unsigned long long BBID;
285         if (getAsUnsignedInteger(BBIDStr, 10, BBID))
286           return createProfileParseError(Twine("unsigned integer expected: '") +
287                                          BBIDStr + "'");
288         if (!FuncBBIDs.insert(BBID).second)
289           return createProfileParseError(
290               Twine("duplicate basic block id found '") + BBIDStr + "'");
291         if (BBID == 0 && CurrentPosition)
292           return createProfileParseError(
293               "entry BB (0) does not begin a cluster");
294 
295         FI->second.ClusterInfo.emplace_back(
296             BBClusterInfo({{static_cast<unsigned>(BBID), 0},
297                            CurrentCluster,
298                            CurrentPosition++}));
299       }
300       CurrentCluster++;
301     } else {
302       // This is a function name specifier. It may include a debug info filename
303       // specifier starting with `M=`.
304       auto [AliasesStr, DIFilenameStr] = S.split(' ');
305       SmallString<128> DIFilename;
306       if (DIFilenameStr.starts_with("M=")) {
307         DIFilename =
308             sys::path::remove_leading_dotslash(DIFilenameStr.substr(2));
309         if (DIFilename.empty())
310           return createProfileParseError("empty module name specifier");
311       } else if (!DIFilenameStr.empty()) {
312         return createProfileParseError("unknown string found: '" +
313                                        DIFilenameStr + "'");
314       }
315       // Function aliases are separated using '/'. We use the first function
316       // name for the cluster info mapping and delegate all other aliases to
317       // this one.
318       SmallVector<StringRef, 4> Aliases;
319       AliasesStr.split(Aliases, '/');
320       bool FunctionFound = any_of(Aliases, [&](StringRef Alias) {
321         auto It = FunctionNameToDIFilename.find(Alias);
322         // No match if this function name is not found in this module.
323         if (It == FunctionNameToDIFilename.end())
324           return false;
325         // Return a match if debug-info-filename is not specified. Otherwise,
326         // check for equality.
327         return DIFilename.empty() || It->second.equals(DIFilename);
328       });
329       if (!FunctionFound) {
330         // Skip the following profile by setting the profile iterator (FI) to
331         // the past-the-end element.
332         FI = ProgramPathAndClusterInfo.end();
333         continue;
334       }
335       for (size_t i = 1; i < Aliases.size(); ++i)
336         FuncAliasMap.try_emplace(Aliases[i], Aliases.front());
337 
338       // Prepare for parsing clusters of this function name.
339       // Start a new cluster map for this function name.
340       auto R = ProgramPathAndClusterInfo.try_emplace(Aliases.front());
341       // Report error when multiple profiles have been specified for the same
342       // function.
343       if (!R.second)
344         return createProfileParseError("duplicate profile for function '" +
345                                        Aliases.front() + "'");
346       FI = R.first;
347       CurrentCluster = 0;
348       FuncBBIDs.clear();
349     }
350   }
351   return Error::success();
352 }
353 
354 // Basic Block Sections can be enabled for a subset of machine basic blocks.
355 // This is done by passing a file containing names of functions for which basic
356 // block sections are desired. Additionally, machine basic block ids of the
357 // functions can also be specified for a finer granularity. Moreover, a cluster
358 // of basic blocks could be assigned to the same section.
359 // Optionally, a debug-info filename can be specified for each function to allow
360 // distinguishing internal-linkage functions of the same name.
361 // A file with basic block sections for all of function main and three blocks
362 // for function foo (of which 1 and 2 are placed in a cluster) looks like this:
363 // (Profile for function foo is only loaded when its debug-info filename
364 // matches 'path/to/foo_file.cc').
365 // ----------------------------
366 // list.txt:
367 // !main
368 // !foo M=path/to/foo_file.cc
369 // !!1 2
370 // !!4
371 Error BasicBlockSectionsProfileReader::ReadProfile() {
372   assert(MBuf);
373 
374   unsigned long long Version = 0;
375   StringRef FirstLine(*LineIt);
376   if (FirstLine.consume_front("v")) {
377     if (getAsUnsignedInteger(FirstLine, 10, Version)) {
378       return createProfileParseError(Twine("version number expected: '") +
379                                      FirstLine + "'");
380     }
381     if (Version > 1) {
382       return createProfileParseError(Twine("invalid profile version: ") +
383                                      Twine(Version));
384     }
385     ++LineIt;
386   }
387 
388   switch (Version) {
389   case 0:
390     // TODO: Deprecate V0 once V1 is fully integrated downstream.
391     return ReadV0Profile();
392   case 1:
393     return ReadV1Profile();
394   default:
395     llvm_unreachable("Invalid profile version.");
396   }
397 }
398 
399 bool BasicBlockSectionsProfileReaderWrapperPass::doInitialization(Module &M) {
400   if (!BBSPR.MBuf)
401     return false;
402   // Get the function name to debug info filename mapping.
403   BBSPR.FunctionNameToDIFilename.clear();
404   for (const Function &F : M) {
405     SmallString<128> DIFilename;
406     if (F.isDeclaration())
407       continue;
408     DISubprogram *Subprogram = F.getSubprogram();
409     if (Subprogram) {
410       llvm::DICompileUnit *CU = Subprogram->getUnit();
411       if (CU)
412         DIFilename = sys::path::remove_leading_dotslash(CU->getFilename());
413     }
414     [[maybe_unused]] bool inserted =
415         BBSPR.FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename)
416             .second;
417     assert(inserted);
418   }
419   if (auto Err = BBSPR.ReadProfile())
420     report_fatal_error(std::move(Err));
421   return false;
422 }
423 
424 AnalysisKey BasicBlockSectionsProfileReaderAnalysis::Key;
425 
426 BasicBlockSectionsProfileReader
427 BasicBlockSectionsProfileReaderAnalysis::run(Function &F,
428                                              FunctionAnalysisManager &AM) {
429   return BasicBlockSectionsProfileReader(TM->getBBSectionsFuncListBuf());
430 }
431 
432 bool BasicBlockSectionsProfileReaderWrapperPass::isFunctionHot(
433     StringRef FuncName) const {
434   return BBSPR.isFunctionHot(FuncName);
435 }
436 
437 std::pair<bool, SmallVector<BBClusterInfo>>
438 BasicBlockSectionsProfileReaderWrapperPass::getClusterInfoForFunction(
439     StringRef FuncName) const {
440   return BBSPR.getClusterInfoForFunction(FuncName);
441 }
442 
443 SmallVector<SmallVector<unsigned>>
444 BasicBlockSectionsProfileReaderWrapperPass::getClonePathsForFunction(
445     StringRef FuncName) const {
446   return BBSPR.getClonePathsForFunction(FuncName);
447 }
448 
449 BasicBlockSectionsProfileReader &
450 BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() {
451   return BBSPR;
452 }
453 
454 ImmutablePass *llvm::createBasicBlockSectionsProfileReaderWrapperPass(
455     const MemoryBuffer *Buf) {
456   return new BasicBlockSectionsProfileReaderWrapperPass(Buf);
457 }
458