1 //===-- BasicBlockSectionsProfileReader.cpp -------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of the basic block sections profile reader pass. It parses 10 // and stores the basic block sections profile file (which is specified via the 11 // `-basic-block-sections` flag). 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SmallSet.h" 18 #include "llvm/ADT/SmallString.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/StringMap.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/IR/DebugInfoMetadata.h" 23 #include "llvm/Pass.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Support/ErrorHandling.h" 26 #include "llvm/Support/LineIterator.h" 27 #include "llvm/Support/MemoryBuffer.h" 28 #include "llvm/Support/Path.h" 29 #include <llvm/ADT/STLExtras.h> 30 31 using namespace llvm; 32 33 char BasicBlockSectionsProfileReaderWrapperPass::ID = 0; 34 INITIALIZE_PASS(BasicBlockSectionsProfileReaderWrapperPass, 35 "bbsections-profile-reader", 36 "Reads and parses a basic block sections profile.", false, 37 false) 38 39 Expected<UniqueBBID> 40 BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const { 41 SmallVector<StringRef, 2> Parts; 42 S.split(Parts, '.'); 43 if (Parts.size() > 2) 44 return createProfileParseError(Twine("unable to parse basic block id: '") + 45 S + "'"); 46 unsigned long long BaseBBID; 47 if (getAsUnsignedInteger(Parts[0], 10, BaseBBID)) 48 return createProfileParseError( 49 Twine("unable to parse BB id: '" + Parts[0]) + 50 "': unsigned integer expected"); 51 unsigned long long CloneID = 0; 52 if (Parts.size() > 1 && getAsUnsignedInteger(Parts[1], 10, CloneID)) 53 return createProfileParseError(Twine("unable to parse clone id: '") + 54 Parts[1] + "': unsigned integer expected"); 55 return UniqueBBID{static_cast<unsigned>(BaseBBID), 56 static_cast<unsigned>(CloneID)}; 57 } 58 59 bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { 60 return getClusterInfoForFunction(FuncName).first; 61 } 62 63 std::pair<bool, SmallVector<BBClusterInfo>> 64 BasicBlockSectionsProfileReader::getClusterInfoForFunction( 65 StringRef FuncName) const { 66 auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName)); 67 return R != ProgramPathAndClusterInfo.end() 68 ? std::pair(true, R->second.ClusterInfo) 69 : std::pair(false, SmallVector<BBClusterInfo>()); 70 } 71 72 SmallVector<SmallVector<unsigned>> 73 BasicBlockSectionsProfileReader::getClonePathsForFunction( 74 StringRef FuncName) const { 75 return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).ClonePaths; 76 } 77 78 // Reads the version 1 basic block sections profile. Profile for each function 79 // is encoded as follows: 80 // m <module_name> 81 // f <function_name_1> <function_name_2> ... 82 // c <bb_id_1> <bb_id_2> <bb_id_3> 83 // c <bb_id_4> <bb_id_5> 84 // ... 85 // Module name specifier (starting with 'm') is optional and allows 86 // distinguishing profile for internal-linkage functions with the same name. If 87 // not specified, it will apply to any function with the same name. Function 88 // name specifier (starting with 'f') can specify multiple function name 89 // aliases. Basic block clusters are specified by 'c' and specify the cluster of 90 // basic blocks, and the internal order in which they must be placed in the same 91 // section. 92 // This profile can also specify cloning paths which instruct the compiler to 93 // clone basic blocks along a path. The cloned blocks are then specified in the 94 // cluster information. 95 // The following profile lists two cloning paths (starting with 'p') for 96 // function bar and places the total 9 blocks within two clusters. The first two 97 // blocks of a cloning path specify the edge along which the path is cloned. For 98 // instance, path 1 (1 -> 3 -> 4) instructs that 3 and 4 must be cloned along 99 // the edge 1->3. Within the given clusters, each cloned block is identified by 100 // "<original block id>.<clone id>". For instance, 3.1 represents the first 101 // clone of block 3. Original blocks are specified just with their block ids. A 102 // block cloned multiple times appears with distinct clone ids. The CFG for bar 103 // is shown below before and after cloning with its final clusters labeled. 104 // 105 // f main 106 // f bar 107 // p 1 3 4 # cloning path 1 108 // p 4 2 # cloning path 2 109 // c 1 3.1 4.1 6 # basic block cluster 1 110 // c 0 2 3 4 2.1 5 # basic block cluster 2 111 // **************************************************************************** 112 // function bar before and after cloning with basic block clusters shown. 113 // **************************************************************************** 114 // .... .............. 115 // 0 -------+ : 0 :---->: 1 ---> 3.1 : 116 // | | : | : :........ | : 117 // v v : v : : v : 118 // +--> 2 --> 5 1 ~~~~~~> +---: 2 : : 4.1: clsuter 1 119 // | | | | : | : : | : 120 // | v | | : v ....... : v : 121 // | 3 <------+ | : 3 <--+ : : 6 : 122 // | | | : | | : :....: 123 // | v | : v | : 124 // +--- 4 ---> 6 | : 4 | : 125 // | : | | : 126 // | : v | : 127 // | :2.1---+ : cluster 2 128 // | : | ......: 129 // | : v : 130 // +-->: 5 : 131 // .... 132 // **************************************************************************** 133 Error BasicBlockSectionsProfileReader::ReadV1Profile() { 134 auto FI = ProgramPathAndClusterInfo.end(); 135 136 // Current cluster ID corresponding to this function. 137 unsigned CurrentCluster = 0; 138 // Current position in the current cluster. 139 unsigned CurrentPosition = 0; 140 141 // Temporary set to ensure every basic block ID appears once in the clusters 142 // of a function. 143 DenseSet<UniqueBBID> FuncBBIDs; 144 145 // Debug-info-based module filename for the current function. Empty string 146 // means no filename. 147 StringRef DIFilename; 148 149 for (; !LineIt.is_at_eof(); ++LineIt) { 150 StringRef S(*LineIt); 151 char Specifier = S[0]; 152 S = S.drop_front().trim(); 153 SmallVector<StringRef, 4> Values; 154 S.split(Values, ' '); 155 switch (Specifier) { 156 case '@': 157 continue; 158 case 'm': // Module name speicifer. 159 if (Values.size() != 1) { 160 return createProfileParseError(Twine("invalid module name value: '") + 161 S + "'"); 162 } 163 DIFilename = sys::path::remove_leading_dotslash(Values[0]); 164 continue; 165 case 'f': { // Function names specifier. 166 bool FunctionFound = any_of(Values, [&](StringRef Alias) { 167 auto It = FunctionNameToDIFilename.find(Alias); 168 // No match if this function name is not found in this module. 169 if (It == FunctionNameToDIFilename.end()) 170 return false; 171 // Return a match if debug-info-filename is not specified. Otherwise, 172 // check for equality. 173 return DIFilename.empty() || It->second == DIFilename; 174 }); 175 if (!FunctionFound) { 176 // Skip the following profile by setting the profile iterator (FI) to 177 // the past-the-end element. 178 FI = ProgramPathAndClusterInfo.end(); 179 DIFilename = ""; 180 continue; 181 } 182 for (size_t i = 1; i < Values.size(); ++i) 183 FuncAliasMap.try_emplace(Values[i], Values.front()); 184 185 // Prepare for parsing clusters of this function name. 186 // Start a new cluster map for this function name. 187 auto R = ProgramPathAndClusterInfo.try_emplace(Values.front()); 188 // Report error when multiple profiles have been specified for the same 189 // function. 190 if (!R.second) 191 return createProfileParseError("duplicate profile for function '" + 192 Values.front() + "'"); 193 FI = R.first; 194 CurrentCluster = 0; 195 FuncBBIDs.clear(); 196 // We won't need DIFilename anymore. Clean it up to avoid its application 197 // on the next function. 198 DIFilename = ""; 199 continue; 200 } 201 case 'c': // Basic block cluster specifier. 202 // Skip the profile when we the profile iterator (FI) refers to the 203 // past-the-end element. 204 if (FI == ProgramPathAndClusterInfo.end()) 205 continue; 206 // Reset current cluster position. 207 CurrentPosition = 0; 208 for (auto BasicBlockIDStr : Values) { 209 auto BasicBlockID = parseUniqueBBID(BasicBlockIDStr); 210 if (!BasicBlockID) 211 return BasicBlockID.takeError(); 212 if (!FuncBBIDs.insert(*BasicBlockID).second) 213 return createProfileParseError( 214 Twine("duplicate basic block id found '") + BasicBlockIDStr + 215 "'"); 216 217 FI->second.ClusterInfo.emplace_back(BBClusterInfo{ 218 *std::move(BasicBlockID), CurrentCluster, CurrentPosition++}); 219 } 220 CurrentCluster++; 221 continue; 222 case 'p': { // Basic block cloning path specifier. 223 // Skip the profile when we the profile iterator (FI) refers to the 224 // past-the-end element. 225 if (FI == ProgramPathAndClusterInfo.end()) 226 continue; 227 SmallSet<unsigned, 5> BBsInPath; 228 FI->second.ClonePaths.push_back({}); 229 for (size_t I = 0; I < Values.size(); ++I) { 230 auto BaseBBIDStr = Values[I]; 231 unsigned long long BaseBBID = 0; 232 if (getAsUnsignedInteger(BaseBBIDStr, 10, BaseBBID)) 233 return createProfileParseError(Twine("unsigned integer expected: '") + 234 BaseBBIDStr + "'"); 235 if (I != 0 && !BBsInPath.insert(BaseBBID).second) 236 return createProfileParseError( 237 Twine("duplicate cloned block in path: '") + BaseBBIDStr + "'"); 238 FI->second.ClonePaths.back().push_back(BaseBBID); 239 } 240 continue; 241 } 242 default: 243 return createProfileParseError(Twine("invalid specifier: '") + 244 Twine(Specifier) + "'"); 245 } 246 llvm_unreachable("should not break from this switch statement"); 247 } 248 return Error::success(); 249 } 250 251 Error BasicBlockSectionsProfileReader::ReadV0Profile() { 252 auto FI = ProgramPathAndClusterInfo.end(); 253 // Current cluster ID corresponding to this function. 254 unsigned CurrentCluster = 0; 255 // Current position in the current cluster. 256 unsigned CurrentPosition = 0; 257 258 // Temporary set to ensure every basic block ID appears once in the clusters 259 // of a function. 260 SmallSet<unsigned, 4> FuncBBIDs; 261 262 for (; !LineIt.is_at_eof(); ++LineIt) { 263 StringRef S(*LineIt); 264 if (S[0] == '@') 265 continue; 266 // Check for the leading "!" 267 if (!S.consume_front("!") || S.empty()) 268 break; 269 // Check for second "!" which indicates a cluster of basic blocks. 270 if (S.consume_front("!")) { 271 // Skip the profile when we the profile iterator (FI) refers to the 272 // past-the-end element. 273 if (FI == ProgramPathAndClusterInfo.end()) 274 continue; 275 SmallVector<StringRef, 4> BBIDs; 276 S.split(BBIDs, ' '); 277 // Reset current cluster position. 278 CurrentPosition = 0; 279 for (auto BBIDStr : BBIDs) { 280 unsigned long long BBID; 281 if (getAsUnsignedInteger(BBIDStr, 10, BBID)) 282 return createProfileParseError(Twine("unsigned integer expected: '") + 283 BBIDStr + "'"); 284 if (!FuncBBIDs.insert(BBID).second) 285 return createProfileParseError( 286 Twine("duplicate basic block id found '") + BBIDStr + "'"); 287 288 FI->second.ClusterInfo.emplace_back( 289 BBClusterInfo({{static_cast<unsigned>(BBID), 0}, 290 CurrentCluster, 291 CurrentPosition++})); 292 } 293 CurrentCluster++; 294 } else { 295 // This is a function name specifier. It may include a debug info filename 296 // specifier starting with `M=`. 297 auto [AliasesStr, DIFilenameStr] = S.split(' '); 298 SmallString<128> DIFilename; 299 if (DIFilenameStr.starts_with("M=")) { 300 DIFilename = 301 sys::path::remove_leading_dotslash(DIFilenameStr.substr(2)); 302 if (DIFilename.empty()) 303 return createProfileParseError("empty module name specifier"); 304 } else if (!DIFilenameStr.empty()) { 305 return createProfileParseError("unknown string found: '" + 306 DIFilenameStr + "'"); 307 } 308 // Function aliases are separated using '/'. We use the first function 309 // name for the cluster info mapping and delegate all other aliases to 310 // this one. 311 SmallVector<StringRef, 4> Aliases; 312 AliasesStr.split(Aliases, '/'); 313 bool FunctionFound = any_of(Aliases, [&](StringRef Alias) { 314 auto It = FunctionNameToDIFilename.find(Alias); 315 // No match if this function name is not found in this module. 316 if (It == FunctionNameToDIFilename.end()) 317 return false; 318 // Return a match if debug-info-filename is not specified. Otherwise, 319 // check for equality. 320 return DIFilename.empty() || It->second == DIFilename; 321 }); 322 if (!FunctionFound) { 323 // Skip the following profile by setting the profile iterator (FI) to 324 // the past-the-end element. 325 FI = ProgramPathAndClusterInfo.end(); 326 continue; 327 } 328 for (size_t i = 1; i < Aliases.size(); ++i) 329 FuncAliasMap.try_emplace(Aliases[i], Aliases.front()); 330 331 // Prepare for parsing clusters of this function name. 332 // Start a new cluster map for this function name. 333 auto R = ProgramPathAndClusterInfo.try_emplace(Aliases.front()); 334 // Report error when multiple profiles have been specified for the same 335 // function. 336 if (!R.second) 337 return createProfileParseError("duplicate profile for function '" + 338 Aliases.front() + "'"); 339 FI = R.first; 340 CurrentCluster = 0; 341 FuncBBIDs.clear(); 342 } 343 } 344 return Error::success(); 345 } 346 347 // Basic Block Sections can be enabled for a subset of machine basic blocks. 348 // This is done by passing a file containing names of functions for which basic 349 // block sections are desired. Additionally, machine basic block ids of the 350 // functions can also be specified for a finer granularity. Moreover, a cluster 351 // of basic blocks could be assigned to the same section. 352 // Optionally, a debug-info filename can be specified for each function to allow 353 // distinguishing internal-linkage functions of the same name. 354 // A file with basic block sections for all of function main and three blocks 355 // for function foo (of which 1 and 2 are placed in a cluster) looks like this: 356 // (Profile for function foo is only loaded when its debug-info filename 357 // matches 'path/to/foo_file.cc'). 358 // ---------------------------- 359 // list.txt: 360 // !main 361 // !foo M=path/to/foo_file.cc 362 // !!1 2 363 // !!4 364 Error BasicBlockSectionsProfileReader::ReadProfile() { 365 assert(MBuf); 366 367 unsigned long long Version = 0; 368 StringRef FirstLine(*LineIt); 369 if (FirstLine.consume_front("v")) { 370 if (getAsUnsignedInteger(FirstLine, 10, Version)) { 371 return createProfileParseError(Twine("version number expected: '") + 372 FirstLine + "'"); 373 } 374 if (Version > 1) { 375 return createProfileParseError(Twine("invalid profile version: ") + 376 Twine(Version)); 377 } 378 ++LineIt; 379 } 380 381 switch (Version) { 382 case 0: 383 // TODO: Deprecate V0 once V1 is fully integrated downstream. 384 return ReadV0Profile(); 385 case 1: 386 return ReadV1Profile(); 387 default: 388 llvm_unreachable("Invalid profile version."); 389 } 390 } 391 392 bool BasicBlockSectionsProfileReaderWrapperPass::doInitialization(Module &M) { 393 if (!BBSPR.MBuf) 394 return false; 395 // Get the function name to debug info filename mapping. 396 BBSPR.FunctionNameToDIFilename.clear(); 397 for (const Function &F : M) { 398 SmallString<128> DIFilename; 399 if (F.isDeclaration()) 400 continue; 401 DISubprogram *Subprogram = F.getSubprogram(); 402 if (Subprogram) { 403 llvm::DICompileUnit *CU = Subprogram->getUnit(); 404 if (CU) 405 DIFilename = sys::path::remove_leading_dotslash(CU->getFilename()); 406 } 407 [[maybe_unused]] bool inserted = 408 BBSPR.FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename) 409 .second; 410 assert(inserted); 411 } 412 if (auto Err = BBSPR.ReadProfile()) 413 report_fatal_error(std::move(Err)); 414 return false; 415 } 416 417 AnalysisKey BasicBlockSectionsProfileReaderAnalysis::Key; 418 419 BasicBlockSectionsProfileReader 420 BasicBlockSectionsProfileReaderAnalysis::run(Function &F, 421 FunctionAnalysisManager &AM) { 422 return BasicBlockSectionsProfileReader(TM->getBBSectionsFuncListBuf()); 423 } 424 425 bool BasicBlockSectionsProfileReaderWrapperPass::isFunctionHot( 426 StringRef FuncName) const { 427 return BBSPR.isFunctionHot(FuncName); 428 } 429 430 std::pair<bool, SmallVector<BBClusterInfo>> 431 BasicBlockSectionsProfileReaderWrapperPass::getClusterInfoForFunction( 432 StringRef FuncName) const { 433 return BBSPR.getClusterInfoForFunction(FuncName); 434 } 435 436 SmallVector<SmallVector<unsigned>> 437 BasicBlockSectionsProfileReaderWrapperPass::getClonePathsForFunction( 438 StringRef FuncName) const { 439 return BBSPR.getClonePathsForFunction(FuncName); 440 } 441 442 BasicBlockSectionsProfileReader & 443 BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() { 444 return BBSPR; 445 } 446 447 ImmutablePass *llvm::createBasicBlockSectionsProfileReaderWrapperPass( 448 const MemoryBuffer *Buf) { 449 return new BasicBlockSectionsProfileReaderWrapperPass(Buf); 450 } 451