1 //===-- BasicBlockSectionsProfileReader.cpp -------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of the basic block sections profile reader pass. It parses 10 // and stores the basic block sections profile file (which is specified via the 11 // `-basic-block-sections` flag). 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SmallSet.h" 18 #include "llvm/ADT/SmallString.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/StringMap.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/IR/DebugInfoMetadata.h" 23 #include "llvm/Pass.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Support/ErrorHandling.h" 26 #include "llvm/Support/LineIterator.h" 27 #include "llvm/Support/MemoryBuffer.h" 28 #include "llvm/Support/Path.h" 29 #include "llvm/Support/UniqueBBID.h" 30 #include <llvm/ADT/STLExtras.h> 31 32 using namespace llvm; 33 34 char BasicBlockSectionsProfileReaderWrapperPass::ID = 0; 35 INITIALIZE_PASS(BasicBlockSectionsProfileReaderWrapperPass, 36 "bbsections-profile-reader", 37 "Reads and parses a basic block sections profile.", false, 38 false) 39 40 Expected<UniqueBBID> 41 BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const { 42 SmallVector<StringRef, 2> Parts; 43 S.split(Parts, '.'); 44 if (Parts.size() > 2) 45 return createProfileParseError(Twine("unable to parse basic block id: '") + 46 S + "'"); 47 unsigned long long BaseBBID; 48 if (getAsUnsignedInteger(Parts[0], 10, BaseBBID)) 49 return createProfileParseError( 50 Twine("unable to parse BB id: '" + Parts[0]) + 51 "': unsigned integer expected"); 52 unsigned long long CloneID = 0; 53 if (Parts.size() > 1 && getAsUnsignedInteger(Parts[1], 10, CloneID)) 54 return createProfileParseError(Twine("unable to parse clone id: '") + 55 Parts[1] + "': unsigned integer expected"); 56 return UniqueBBID{static_cast<unsigned>(BaseBBID), 57 static_cast<unsigned>(CloneID)}; 58 } 59 60 bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { 61 return getClusterInfoForFunction(FuncName).first; 62 } 63 64 std::pair<bool, SmallVector<BBClusterInfo>> 65 BasicBlockSectionsProfileReader::getClusterInfoForFunction( 66 StringRef FuncName) const { 67 auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName)); 68 return R != ProgramPathAndClusterInfo.end() 69 ? std::pair(true, R->second.ClusterInfo) 70 : std::pair(false, SmallVector<BBClusterInfo>()); 71 } 72 73 SmallVector<SmallVector<unsigned>> 74 BasicBlockSectionsProfileReader::getClonePathsForFunction( 75 StringRef FuncName) const { 76 return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).ClonePaths; 77 } 78 79 // Reads the version 1 basic block sections profile. Profile for each function 80 // is encoded as follows: 81 // m <module_name> 82 // f <function_name_1> <function_name_2> ... 83 // c <bb_id_1> <bb_id_2> <bb_id_3> 84 // c <bb_id_4> <bb_id_5> 85 // ... 86 // Module name specifier (starting with 'm') is optional and allows 87 // distinguishing profile for internal-linkage functions with the same name. If 88 // not specified, it will apply to any function with the same name. Function 89 // name specifier (starting with 'f') can specify multiple function name 90 // aliases. Basic block clusters are specified by 'c' and specify the cluster of 91 // basic blocks, and the internal order in which they must be placed in the same 92 // section. 93 // This profile can also specify cloning paths which instruct the compiler to 94 // clone basic blocks along a path. The cloned blocks are then specified in the 95 // cluster information. 96 // The following profile lists two cloning paths (starting with 'p') for 97 // function bar and places the total 9 blocks within two clusters. The first two 98 // blocks of a cloning path specify the edge along which the path is cloned. For 99 // instance, path 1 (1 -> 3 -> 4) instructs that 3 and 4 must be cloned along 100 // the edge 1->3. Within the given clusters, each cloned block is identified by 101 // "<original block id>.<clone id>". For instance, 3.1 represents the first 102 // clone of block 3. Original blocks are specified just with their block ids. A 103 // block cloned multiple times appears with distinct clone ids. The CFG for bar 104 // is shown below before and after cloning with its final clusters labeled. 105 // 106 // f main 107 // f bar 108 // p 1 3 4 # cloning path 1 109 // p 4 2 # cloning path 2 110 // c 1 3.1 4.1 6 # basic block cluster 1 111 // c 0 2 3 4 2.1 5 # basic block cluster 2 112 // **************************************************************************** 113 // function bar before and after cloning with basic block clusters shown. 114 // **************************************************************************** 115 // .... .............. 116 // 0 -------+ : 0 :---->: 1 ---> 3.1 : 117 // | | : | : :........ | : 118 // v v : v : : v : 119 // +--> 2 --> 5 1 ~~~~~~> +---: 2 : : 4.1: clsuter 1 120 // | | | | : | : : | : 121 // | v | | : v ....... : v : 122 // | 3 <------+ | : 3 <--+ : : 6 : 123 // | | | : | | : :....: 124 // | v | : v | : 125 // +--- 4 ---> 6 | : 4 | : 126 // | : | | : 127 // | : v | : 128 // | :2.1---+ : cluster 2 129 // | : | ......: 130 // | : v : 131 // +-->: 5 : 132 // .... 133 // **************************************************************************** 134 Error BasicBlockSectionsProfileReader::ReadV1Profile() { 135 auto FI = ProgramPathAndClusterInfo.end(); 136 137 // Current cluster ID corresponding to this function. 138 unsigned CurrentCluster = 0; 139 // Current position in the current cluster. 140 unsigned CurrentPosition = 0; 141 142 // Temporary set to ensure every basic block ID appears once in the clusters 143 // of a function. 144 DenseSet<UniqueBBID> FuncBBIDs; 145 146 // Debug-info-based module filename for the current function. Empty string 147 // means no filename. 148 StringRef DIFilename; 149 150 for (; !LineIt.is_at_eof(); ++LineIt) { 151 StringRef S(*LineIt); 152 char Specifier = S[0]; 153 S = S.drop_front().trim(); 154 SmallVector<StringRef, 4> Values; 155 S.split(Values, ' '); 156 switch (Specifier) { 157 case '@': 158 continue; 159 case 'm': // Module name speicifer. 160 if (Values.size() != 1) { 161 return createProfileParseError(Twine("invalid module name value: '") + 162 S + "'"); 163 } 164 DIFilename = sys::path::remove_leading_dotslash(Values[0]); 165 continue; 166 case 'f': { // Function names specifier. 167 bool FunctionFound = any_of(Values, [&](StringRef Alias) { 168 auto It = FunctionNameToDIFilename.find(Alias); 169 // No match if this function name is not found in this module. 170 if (It == FunctionNameToDIFilename.end()) 171 return false; 172 // Return a match if debug-info-filename is not specified. Otherwise, 173 // check for equality. 174 return DIFilename.empty() || It->second == DIFilename; 175 }); 176 if (!FunctionFound) { 177 // Skip the following profile by setting the profile iterator (FI) to 178 // the past-the-end element. 179 FI = ProgramPathAndClusterInfo.end(); 180 DIFilename = ""; 181 continue; 182 } 183 for (size_t i = 1; i < Values.size(); ++i) 184 FuncAliasMap.try_emplace(Values[i], Values.front()); 185 186 // Prepare for parsing clusters of this function name. 187 // Start a new cluster map for this function name. 188 auto R = ProgramPathAndClusterInfo.try_emplace(Values.front()); 189 // Report error when multiple profiles have been specified for the same 190 // function. 191 if (!R.second) 192 return createProfileParseError("duplicate profile for function '" + 193 Values.front() + "'"); 194 FI = R.first; 195 CurrentCluster = 0; 196 FuncBBIDs.clear(); 197 // We won't need DIFilename anymore. Clean it up to avoid its application 198 // on the next function. 199 DIFilename = ""; 200 continue; 201 } 202 case 'c': // Basic block cluster specifier. 203 // Skip the profile when we the profile iterator (FI) refers to the 204 // past-the-end element. 205 if (FI == ProgramPathAndClusterInfo.end()) 206 continue; 207 // Reset current cluster position. 208 CurrentPosition = 0; 209 for (auto BasicBlockIDStr : Values) { 210 auto BasicBlockID = parseUniqueBBID(BasicBlockIDStr); 211 if (!BasicBlockID) 212 return BasicBlockID.takeError(); 213 if (!FuncBBIDs.insert(*BasicBlockID).second) 214 return createProfileParseError( 215 Twine("duplicate basic block id found '") + BasicBlockIDStr + 216 "'"); 217 218 FI->second.ClusterInfo.emplace_back(BBClusterInfo{ 219 *std::move(BasicBlockID), CurrentCluster, CurrentPosition++}); 220 } 221 CurrentCluster++; 222 continue; 223 case 'p': { // Basic block cloning path specifier. 224 // Skip the profile when we the profile iterator (FI) refers to the 225 // past-the-end element. 226 if (FI == ProgramPathAndClusterInfo.end()) 227 continue; 228 SmallSet<unsigned, 5> BBsInPath; 229 FI->second.ClonePaths.push_back({}); 230 for (size_t I = 0; I < Values.size(); ++I) { 231 auto BaseBBIDStr = Values[I]; 232 unsigned long long BaseBBID = 0; 233 if (getAsUnsignedInteger(BaseBBIDStr, 10, BaseBBID)) 234 return createProfileParseError(Twine("unsigned integer expected: '") + 235 BaseBBIDStr + "'"); 236 if (I != 0 && !BBsInPath.insert(BaseBBID).second) 237 return createProfileParseError( 238 Twine("duplicate cloned block in path: '") + BaseBBIDStr + "'"); 239 FI->second.ClonePaths.back().push_back(BaseBBID); 240 } 241 continue; 242 } 243 default: 244 return createProfileParseError(Twine("invalid specifier: '") + 245 Twine(Specifier) + "'"); 246 } 247 llvm_unreachable("should not break from this switch statement"); 248 } 249 return Error::success(); 250 } 251 252 Error BasicBlockSectionsProfileReader::ReadV0Profile() { 253 auto FI = ProgramPathAndClusterInfo.end(); 254 // Current cluster ID corresponding to this function. 255 unsigned CurrentCluster = 0; 256 // Current position in the current cluster. 257 unsigned CurrentPosition = 0; 258 259 // Temporary set to ensure every basic block ID appears once in the clusters 260 // of a function. 261 SmallSet<unsigned, 4> FuncBBIDs; 262 263 for (; !LineIt.is_at_eof(); ++LineIt) { 264 StringRef S(*LineIt); 265 if (S[0] == '@') 266 continue; 267 // Check for the leading "!" 268 if (!S.consume_front("!") || S.empty()) 269 break; 270 // Check for second "!" which indicates a cluster of basic blocks. 271 if (S.consume_front("!")) { 272 // Skip the profile when we the profile iterator (FI) refers to the 273 // past-the-end element. 274 if (FI == ProgramPathAndClusterInfo.end()) 275 continue; 276 SmallVector<StringRef, 4> BBIDs; 277 S.split(BBIDs, ' '); 278 // Reset current cluster position. 279 CurrentPosition = 0; 280 for (auto BBIDStr : BBIDs) { 281 unsigned long long BBID; 282 if (getAsUnsignedInteger(BBIDStr, 10, BBID)) 283 return createProfileParseError(Twine("unsigned integer expected: '") + 284 BBIDStr + "'"); 285 if (!FuncBBIDs.insert(BBID).second) 286 return createProfileParseError( 287 Twine("duplicate basic block id found '") + BBIDStr + "'"); 288 289 FI->second.ClusterInfo.emplace_back( 290 BBClusterInfo({{static_cast<unsigned>(BBID), 0}, 291 CurrentCluster, 292 CurrentPosition++})); 293 } 294 CurrentCluster++; 295 } else { 296 // This is a function name specifier. It may include a debug info filename 297 // specifier starting with `M=`. 298 auto [AliasesStr, DIFilenameStr] = S.split(' '); 299 SmallString<128> DIFilename; 300 if (DIFilenameStr.starts_with("M=")) { 301 DIFilename = 302 sys::path::remove_leading_dotslash(DIFilenameStr.substr(2)); 303 if (DIFilename.empty()) 304 return createProfileParseError("empty module name specifier"); 305 } else if (!DIFilenameStr.empty()) { 306 return createProfileParseError("unknown string found: '" + 307 DIFilenameStr + "'"); 308 } 309 // Function aliases are separated using '/'. We use the first function 310 // name for the cluster info mapping and delegate all other aliases to 311 // this one. 312 SmallVector<StringRef, 4> Aliases; 313 AliasesStr.split(Aliases, '/'); 314 bool FunctionFound = any_of(Aliases, [&](StringRef Alias) { 315 auto It = FunctionNameToDIFilename.find(Alias); 316 // No match if this function name is not found in this module. 317 if (It == FunctionNameToDIFilename.end()) 318 return false; 319 // Return a match if debug-info-filename is not specified. Otherwise, 320 // check for equality. 321 return DIFilename.empty() || It->second == DIFilename; 322 }); 323 if (!FunctionFound) { 324 // Skip the following profile by setting the profile iterator (FI) to 325 // the past-the-end element. 326 FI = ProgramPathAndClusterInfo.end(); 327 continue; 328 } 329 for (size_t i = 1; i < Aliases.size(); ++i) 330 FuncAliasMap.try_emplace(Aliases[i], Aliases.front()); 331 332 // Prepare for parsing clusters of this function name. 333 // Start a new cluster map for this function name. 334 auto R = ProgramPathAndClusterInfo.try_emplace(Aliases.front()); 335 // Report error when multiple profiles have been specified for the same 336 // function. 337 if (!R.second) 338 return createProfileParseError("duplicate profile for function '" + 339 Aliases.front() + "'"); 340 FI = R.first; 341 CurrentCluster = 0; 342 FuncBBIDs.clear(); 343 } 344 } 345 return Error::success(); 346 } 347 348 // Basic Block Sections can be enabled for a subset of machine basic blocks. 349 // This is done by passing a file containing names of functions for which basic 350 // block sections are desired. Additionally, machine basic block ids of the 351 // functions can also be specified for a finer granularity. Moreover, a cluster 352 // of basic blocks could be assigned to the same section. 353 // Optionally, a debug-info filename can be specified for each function to allow 354 // distinguishing internal-linkage functions of the same name. 355 // A file with basic block sections for all of function main and three blocks 356 // for function foo (of which 1 and 2 are placed in a cluster) looks like this: 357 // (Profile for function foo is only loaded when its debug-info filename 358 // matches 'path/to/foo_file.cc'). 359 // ---------------------------- 360 // list.txt: 361 // !main 362 // !foo M=path/to/foo_file.cc 363 // !!1 2 364 // !!4 365 Error BasicBlockSectionsProfileReader::ReadProfile() { 366 assert(MBuf); 367 368 unsigned long long Version = 0; 369 StringRef FirstLine(*LineIt); 370 if (FirstLine.consume_front("v")) { 371 if (getAsUnsignedInteger(FirstLine, 10, Version)) { 372 return createProfileParseError(Twine("version number expected: '") + 373 FirstLine + "'"); 374 } 375 if (Version > 1) { 376 return createProfileParseError(Twine("invalid profile version: ") + 377 Twine(Version)); 378 } 379 ++LineIt; 380 } 381 382 switch (Version) { 383 case 0: 384 // TODO: Deprecate V0 once V1 is fully integrated downstream. 385 return ReadV0Profile(); 386 case 1: 387 return ReadV1Profile(); 388 default: 389 llvm_unreachable("Invalid profile version."); 390 } 391 } 392 393 bool BasicBlockSectionsProfileReaderWrapperPass::doInitialization(Module &M) { 394 if (!BBSPR.MBuf) 395 return false; 396 // Get the function name to debug info filename mapping. 397 BBSPR.FunctionNameToDIFilename.clear(); 398 for (const Function &F : M) { 399 SmallString<128> DIFilename; 400 if (F.isDeclaration()) 401 continue; 402 DISubprogram *Subprogram = F.getSubprogram(); 403 if (Subprogram) { 404 llvm::DICompileUnit *CU = Subprogram->getUnit(); 405 if (CU) 406 DIFilename = sys::path::remove_leading_dotslash(CU->getFilename()); 407 } 408 [[maybe_unused]] bool inserted = 409 BBSPR.FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename) 410 .second; 411 assert(inserted); 412 } 413 if (auto Err = BBSPR.ReadProfile()) 414 report_fatal_error(std::move(Err)); 415 return false; 416 } 417 418 AnalysisKey BasicBlockSectionsProfileReaderAnalysis::Key; 419 420 BasicBlockSectionsProfileReader 421 BasicBlockSectionsProfileReaderAnalysis::run(Function &F, 422 FunctionAnalysisManager &AM) { 423 return BasicBlockSectionsProfileReader(TM->getBBSectionsFuncListBuf()); 424 } 425 426 bool BasicBlockSectionsProfileReaderWrapperPass::isFunctionHot( 427 StringRef FuncName) const { 428 return BBSPR.isFunctionHot(FuncName); 429 } 430 431 std::pair<bool, SmallVector<BBClusterInfo>> 432 BasicBlockSectionsProfileReaderWrapperPass::getClusterInfoForFunction( 433 StringRef FuncName) const { 434 return BBSPR.getClusterInfoForFunction(FuncName); 435 } 436 437 SmallVector<SmallVector<unsigned>> 438 BasicBlockSectionsProfileReaderWrapperPass::getClonePathsForFunction( 439 StringRef FuncName) const { 440 return BBSPR.getClonePathsForFunction(FuncName); 441 } 442 443 BasicBlockSectionsProfileReader & 444 BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() { 445 return BBSPR; 446 } 447 448 ImmutablePass *llvm::createBasicBlockSectionsProfileReaderWrapperPass( 449 const MemoryBuffer *Buf) { 450 return new BasicBlockSectionsProfileReaderWrapperPass(Buf); 451 } 452