1 //===-- BasicBlockSectionsProfileReader.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the basic block sections profile reader pass. It parses
10 // and stores the basic block sections profile file (which is specified via the
11 // `-basic-block-sections` flag).
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/SmallSet.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringMap.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/IR/DebugInfoMetadata.h"
23 #include "llvm/Pass.h"
24 #include "llvm/Support/Error.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include "llvm/Support/LineIterator.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/Path.h"
29 #include "llvm/Support/UniqueBBID.h"
30 #include <llvm/ADT/STLExtras.h>
31
32 using namespace llvm;
33
34 char BasicBlockSectionsProfileReaderWrapperPass::ID = 0;
35 INITIALIZE_PASS(BasicBlockSectionsProfileReaderWrapperPass,
36 "bbsections-profile-reader",
37 "Reads and parses a basic block sections profile.", false,
38 false)
39
40 Expected<UniqueBBID>
parseUniqueBBID(StringRef S) const41 BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const {
42 SmallVector<StringRef, 2> Parts;
43 S.split(Parts, '.');
44 if (Parts.size() > 2)
45 return createProfileParseError(Twine("unable to parse basic block id: '") +
46 S + "'");
47 unsigned long long BaseBBID;
48 if (getAsUnsignedInteger(Parts[0], 10, BaseBBID))
49 return createProfileParseError(
50 Twine("unable to parse BB id: '" + Parts[0]) +
51 "': unsigned integer expected");
52 unsigned long long CloneID = 0;
53 if (Parts.size() > 1 && getAsUnsignedInteger(Parts[1], 10, CloneID))
54 return createProfileParseError(Twine("unable to parse clone id: '") +
55 Parts[1] + "': unsigned integer expected");
56 return UniqueBBID{static_cast<unsigned>(BaseBBID),
57 static_cast<unsigned>(CloneID)};
58 }
59
isFunctionHot(StringRef FuncName) const60 bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const {
61 return getClusterInfoForFunction(FuncName).first;
62 }
63
64 std::pair<bool, SmallVector<BBClusterInfo>>
getClusterInfoForFunction(StringRef FuncName) const65 BasicBlockSectionsProfileReader::getClusterInfoForFunction(
66 StringRef FuncName) const {
67 auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName));
68 return R != ProgramPathAndClusterInfo.end()
69 ? std::pair(true, R->second.ClusterInfo)
70 : std::pair(false, SmallVector<BBClusterInfo>());
71 }
72
73 SmallVector<SmallVector<unsigned>>
getClonePathsForFunction(StringRef FuncName) const74 BasicBlockSectionsProfileReader::getClonePathsForFunction(
75 StringRef FuncName) const {
76 return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).ClonePaths;
77 }
78
79 // Reads the version 1 basic block sections profile. Profile for each function
80 // is encoded as follows:
81 // m <module_name>
82 // f <function_name_1> <function_name_2> ...
83 // c <bb_id_1> <bb_id_2> <bb_id_3>
84 // c <bb_id_4> <bb_id_5>
85 // ...
86 // Module name specifier (starting with 'm') is optional and allows
87 // distinguishing profile for internal-linkage functions with the same name. If
88 // not specified, it will apply to any function with the same name. Function
89 // name specifier (starting with 'f') can specify multiple function name
90 // aliases. Basic block clusters are specified by 'c' and specify the cluster of
91 // basic blocks, and the internal order in which they must be placed in the same
92 // section.
93 // This profile can also specify cloning paths which instruct the compiler to
94 // clone basic blocks along a path. The cloned blocks are then specified in the
95 // cluster information.
96 // The following profile lists two cloning paths (starting with 'p') for
97 // function bar and places the total 9 blocks within two clusters. The first two
98 // blocks of a cloning path specify the edge along which the path is cloned. For
99 // instance, path 1 (1 -> 3 -> 4) instructs that 3 and 4 must be cloned along
100 // the edge 1->3. Within the given clusters, each cloned block is identified by
101 // "<original block id>.<clone id>". For instance, 3.1 represents the first
102 // clone of block 3. Original blocks are specified just with their block ids. A
103 // block cloned multiple times appears with distinct clone ids. The CFG for bar
104 // is shown below before and after cloning with its final clusters labeled.
105 //
106 // f main
107 // f bar
108 // p 1 3 4 # cloning path 1
109 // p 4 2 # cloning path 2
110 // c 1 3.1 4.1 6 # basic block cluster 1
111 // c 0 2 3 4 2.1 5 # basic block cluster 2
112 // ****************************************************************************
113 // function bar before and after cloning with basic block clusters shown.
114 // ****************************************************************************
115 // .... ..............
116 // 0 -------+ : 0 :---->: 1 ---> 3.1 :
117 // | | : | : :........ | :
118 // v v : v : : v :
119 // +--> 2 --> 5 1 ~~~~~~> +---: 2 : : 4.1: clsuter 1
120 // | | | | : | : : | :
121 // | v | | : v ....... : v :
122 // | 3 <------+ | : 3 <--+ : : 6 :
123 // | | | : | | : :....:
124 // | v | : v | :
125 // +--- 4 ---> 6 | : 4 | :
126 // | : | | :
127 // | : v | :
128 // | :2.1---+ : cluster 2
129 // | : | ......:
130 // | : v :
131 // +-->: 5 :
132 // ....
133 // ****************************************************************************
ReadV1Profile()134 Error BasicBlockSectionsProfileReader::ReadV1Profile() {
135 auto FI = ProgramPathAndClusterInfo.end();
136
137 // Current cluster ID corresponding to this function.
138 unsigned CurrentCluster = 0;
139 // Current position in the current cluster.
140 unsigned CurrentPosition = 0;
141
142 // Temporary set to ensure every basic block ID appears once in the clusters
143 // of a function.
144 DenseSet<UniqueBBID> FuncBBIDs;
145
146 // Debug-info-based module filename for the current function. Empty string
147 // means no filename.
148 StringRef DIFilename;
149
150 for (; !LineIt.is_at_eof(); ++LineIt) {
151 StringRef S(*LineIt);
152 char Specifier = S[0];
153 S = S.drop_front().trim();
154 SmallVector<StringRef, 4> Values;
155 S.split(Values, ' ');
156 switch (Specifier) {
157 case '@':
158 continue;
159 case 'm': // Module name speicifer.
160 if (Values.size() != 1) {
161 return createProfileParseError(Twine("invalid module name value: '") +
162 S + "'");
163 }
164 DIFilename = sys::path::remove_leading_dotslash(Values[0]);
165 continue;
166 case 'f': { // Function names specifier.
167 bool FunctionFound = any_of(Values, [&](StringRef Alias) {
168 auto It = FunctionNameToDIFilename.find(Alias);
169 // No match if this function name is not found in this module.
170 if (It == FunctionNameToDIFilename.end())
171 return false;
172 // Return a match if debug-info-filename is not specified. Otherwise,
173 // check for equality.
174 return DIFilename.empty() || It->second == DIFilename;
175 });
176 if (!FunctionFound) {
177 // Skip the following profile by setting the profile iterator (FI) to
178 // the past-the-end element.
179 FI = ProgramPathAndClusterInfo.end();
180 DIFilename = "";
181 continue;
182 }
183 for (size_t i = 1; i < Values.size(); ++i)
184 FuncAliasMap.try_emplace(Values[i], Values.front());
185
186 // Prepare for parsing clusters of this function name.
187 // Start a new cluster map for this function name.
188 auto R = ProgramPathAndClusterInfo.try_emplace(Values.front());
189 // Report error when multiple profiles have been specified for the same
190 // function.
191 if (!R.second)
192 return createProfileParseError("duplicate profile for function '" +
193 Values.front() + "'");
194 FI = R.first;
195 CurrentCluster = 0;
196 FuncBBIDs.clear();
197 // We won't need DIFilename anymore. Clean it up to avoid its application
198 // on the next function.
199 DIFilename = "";
200 continue;
201 }
202 case 'c': // Basic block cluster specifier.
203 // Skip the profile when we the profile iterator (FI) refers to the
204 // past-the-end element.
205 if (FI == ProgramPathAndClusterInfo.end())
206 continue;
207 // Reset current cluster position.
208 CurrentPosition = 0;
209 for (auto BasicBlockIDStr : Values) {
210 auto BasicBlockID = parseUniqueBBID(BasicBlockIDStr);
211 if (!BasicBlockID)
212 return BasicBlockID.takeError();
213 if (!FuncBBIDs.insert(*BasicBlockID).second)
214 return createProfileParseError(
215 Twine("duplicate basic block id found '") + BasicBlockIDStr +
216 "'");
217
218 FI->second.ClusterInfo.emplace_back(BBClusterInfo{
219 *std::move(BasicBlockID), CurrentCluster, CurrentPosition++});
220 }
221 CurrentCluster++;
222 continue;
223 case 'p': { // Basic block cloning path specifier.
224 // Skip the profile when we the profile iterator (FI) refers to the
225 // past-the-end element.
226 if (FI == ProgramPathAndClusterInfo.end())
227 continue;
228 SmallSet<unsigned, 5> BBsInPath;
229 FI->second.ClonePaths.push_back({});
230 for (size_t I = 0; I < Values.size(); ++I) {
231 auto BaseBBIDStr = Values[I];
232 unsigned long long BaseBBID = 0;
233 if (getAsUnsignedInteger(BaseBBIDStr, 10, BaseBBID))
234 return createProfileParseError(Twine("unsigned integer expected: '") +
235 BaseBBIDStr + "'");
236 if (I != 0 && !BBsInPath.insert(BaseBBID).second)
237 return createProfileParseError(
238 Twine("duplicate cloned block in path: '") + BaseBBIDStr + "'");
239 FI->second.ClonePaths.back().push_back(BaseBBID);
240 }
241 continue;
242 }
243 default:
244 return createProfileParseError(Twine("invalid specifier: '") +
245 Twine(Specifier) + "'");
246 }
247 llvm_unreachable("should not break from this switch statement");
248 }
249 return Error::success();
250 }
251
ReadV0Profile()252 Error BasicBlockSectionsProfileReader::ReadV0Profile() {
253 auto FI = ProgramPathAndClusterInfo.end();
254 // Current cluster ID corresponding to this function.
255 unsigned CurrentCluster = 0;
256 // Current position in the current cluster.
257 unsigned CurrentPosition = 0;
258
259 // Temporary set to ensure every basic block ID appears once in the clusters
260 // of a function.
261 SmallSet<unsigned, 4> FuncBBIDs;
262
263 for (; !LineIt.is_at_eof(); ++LineIt) {
264 StringRef S(*LineIt);
265 if (S[0] == '@')
266 continue;
267 // Check for the leading "!"
268 if (!S.consume_front("!") || S.empty())
269 break;
270 // Check for second "!" which indicates a cluster of basic blocks.
271 if (S.consume_front("!")) {
272 // Skip the profile when we the profile iterator (FI) refers to the
273 // past-the-end element.
274 if (FI == ProgramPathAndClusterInfo.end())
275 continue;
276 SmallVector<StringRef, 4> BBIDs;
277 S.split(BBIDs, ' ');
278 // Reset current cluster position.
279 CurrentPosition = 0;
280 for (auto BBIDStr : BBIDs) {
281 unsigned long long BBID;
282 if (getAsUnsignedInteger(BBIDStr, 10, BBID))
283 return createProfileParseError(Twine("unsigned integer expected: '") +
284 BBIDStr + "'");
285 if (!FuncBBIDs.insert(BBID).second)
286 return createProfileParseError(
287 Twine("duplicate basic block id found '") + BBIDStr + "'");
288
289 FI->second.ClusterInfo.emplace_back(
290 BBClusterInfo({{static_cast<unsigned>(BBID), 0},
291 CurrentCluster,
292 CurrentPosition++}));
293 }
294 CurrentCluster++;
295 } else {
296 // This is a function name specifier. It may include a debug info filename
297 // specifier starting with `M=`.
298 auto [AliasesStr, DIFilenameStr] = S.split(' ');
299 SmallString<128> DIFilename;
300 if (DIFilenameStr.starts_with("M=")) {
301 DIFilename =
302 sys::path::remove_leading_dotslash(DIFilenameStr.substr(2));
303 if (DIFilename.empty())
304 return createProfileParseError("empty module name specifier");
305 } else if (!DIFilenameStr.empty()) {
306 return createProfileParseError("unknown string found: '" +
307 DIFilenameStr + "'");
308 }
309 // Function aliases are separated using '/'. We use the first function
310 // name for the cluster info mapping and delegate all other aliases to
311 // this one.
312 SmallVector<StringRef, 4> Aliases;
313 AliasesStr.split(Aliases, '/');
314 bool FunctionFound = any_of(Aliases, [&](StringRef Alias) {
315 auto It = FunctionNameToDIFilename.find(Alias);
316 // No match if this function name is not found in this module.
317 if (It == FunctionNameToDIFilename.end())
318 return false;
319 // Return a match if debug-info-filename is not specified. Otherwise,
320 // check for equality.
321 return DIFilename.empty() || It->second == DIFilename;
322 });
323 if (!FunctionFound) {
324 // Skip the following profile by setting the profile iterator (FI) to
325 // the past-the-end element.
326 FI = ProgramPathAndClusterInfo.end();
327 continue;
328 }
329 for (size_t i = 1; i < Aliases.size(); ++i)
330 FuncAliasMap.try_emplace(Aliases[i], Aliases.front());
331
332 // Prepare for parsing clusters of this function name.
333 // Start a new cluster map for this function name.
334 auto R = ProgramPathAndClusterInfo.try_emplace(Aliases.front());
335 // Report error when multiple profiles have been specified for the same
336 // function.
337 if (!R.second)
338 return createProfileParseError("duplicate profile for function '" +
339 Aliases.front() + "'");
340 FI = R.first;
341 CurrentCluster = 0;
342 FuncBBIDs.clear();
343 }
344 }
345 return Error::success();
346 }
347
348 // Basic Block Sections can be enabled for a subset of machine basic blocks.
349 // This is done by passing a file containing names of functions for which basic
350 // block sections are desired. Additionally, machine basic block ids of the
351 // functions can also be specified for a finer granularity. Moreover, a cluster
352 // of basic blocks could be assigned to the same section.
353 // Optionally, a debug-info filename can be specified for each function to allow
354 // distinguishing internal-linkage functions of the same name.
355 // A file with basic block sections for all of function main and three blocks
356 // for function foo (of which 1 and 2 are placed in a cluster) looks like this:
357 // (Profile for function foo is only loaded when its debug-info filename
358 // matches 'path/to/foo_file.cc').
359 // ----------------------------
360 // list.txt:
361 // !main
362 // !foo M=path/to/foo_file.cc
363 // !!1 2
364 // !!4
ReadProfile()365 Error BasicBlockSectionsProfileReader::ReadProfile() {
366 assert(MBuf);
367
368 unsigned long long Version = 0;
369 StringRef FirstLine(*LineIt);
370 if (FirstLine.consume_front("v")) {
371 if (getAsUnsignedInteger(FirstLine, 10, Version)) {
372 return createProfileParseError(Twine("version number expected: '") +
373 FirstLine + "'");
374 }
375 if (Version > 1) {
376 return createProfileParseError(Twine("invalid profile version: ") +
377 Twine(Version));
378 }
379 ++LineIt;
380 }
381
382 switch (Version) {
383 case 0:
384 // TODO: Deprecate V0 once V1 is fully integrated downstream.
385 return ReadV0Profile();
386 case 1:
387 return ReadV1Profile();
388 default:
389 llvm_unreachable("Invalid profile version.");
390 }
391 }
392
doInitialization(Module & M)393 bool BasicBlockSectionsProfileReaderWrapperPass::doInitialization(Module &M) {
394 if (!BBSPR.MBuf)
395 return false;
396 // Get the function name to debug info filename mapping.
397 BBSPR.FunctionNameToDIFilename.clear();
398 for (const Function &F : M) {
399 SmallString<128> DIFilename;
400 if (F.isDeclaration())
401 continue;
402 DISubprogram *Subprogram = F.getSubprogram();
403 if (Subprogram) {
404 llvm::DICompileUnit *CU = Subprogram->getUnit();
405 if (CU)
406 DIFilename = sys::path::remove_leading_dotslash(CU->getFilename());
407 }
408 [[maybe_unused]] bool inserted =
409 BBSPR.FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename)
410 .second;
411 assert(inserted);
412 }
413 if (auto Err = BBSPR.ReadProfile())
414 report_fatal_error(std::move(Err));
415 return false;
416 }
417
418 AnalysisKey BasicBlockSectionsProfileReaderAnalysis::Key;
419
420 BasicBlockSectionsProfileReader
run(Function & F,FunctionAnalysisManager & AM)421 BasicBlockSectionsProfileReaderAnalysis::run(Function &F,
422 FunctionAnalysisManager &AM) {
423 return BasicBlockSectionsProfileReader(TM->getBBSectionsFuncListBuf());
424 }
425
isFunctionHot(StringRef FuncName) const426 bool BasicBlockSectionsProfileReaderWrapperPass::isFunctionHot(
427 StringRef FuncName) const {
428 return BBSPR.isFunctionHot(FuncName);
429 }
430
431 std::pair<bool, SmallVector<BBClusterInfo>>
getClusterInfoForFunction(StringRef FuncName) const432 BasicBlockSectionsProfileReaderWrapperPass::getClusterInfoForFunction(
433 StringRef FuncName) const {
434 return BBSPR.getClusterInfoForFunction(FuncName);
435 }
436
437 SmallVector<SmallVector<unsigned>>
getClonePathsForFunction(StringRef FuncName) const438 BasicBlockSectionsProfileReaderWrapperPass::getClonePathsForFunction(
439 StringRef FuncName) const {
440 return BBSPR.getClonePathsForFunction(FuncName);
441 }
442
443 BasicBlockSectionsProfileReader &
getBBSPR()444 BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() {
445 return BBSPR;
446 }
447
createBasicBlockSectionsProfileReaderWrapperPass(const MemoryBuffer * Buf)448 ImmutablePass *llvm::createBasicBlockSectionsProfileReaderWrapperPass(
449 const MemoryBuffer *Buf) {
450 return new BasicBlockSectionsProfileReaderWrapperPass(Buf);
451 }
452