1 //===-- BasicBlockSectionsProfileReader.cpp -------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of the basic block sections profile reader pass. It parses 10 // and stores the basic block sections profile file (which is specified via the 11 // `-basic-block-sections` flag). 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SmallSet.h" 18 #include "llvm/ADT/SmallString.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/StringMap.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/IR/DebugInfoMetadata.h" 23 #include "llvm/Pass.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Support/ErrorHandling.h" 26 #include "llvm/Support/LineIterator.h" 27 #include "llvm/Support/MemoryBuffer.h" 28 #include "llvm/Support/Path.h" 29 #include <llvm/ADT/STLExtras.h> 30 31 using namespace llvm; 32 33 char BasicBlockSectionsProfileReaderWrapperPass::ID = 0; 34 INITIALIZE_PASS(BasicBlockSectionsProfileReaderWrapperPass, 35 "bbsections-profile-reader", 36 "Reads and parses a basic block sections profile.", false, 37 false) 38 39 Expected<UniqueBBID> 40 BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const { 41 SmallVector<StringRef, 2> Parts; 42 S.split(Parts, '.'); 43 if (Parts.size() > 2) 44 return createProfileParseError(Twine("unable to parse basic block id: '") + 45 S + "'"); 46 unsigned long long BaseBBID; 47 if (getAsUnsignedInteger(Parts[0], 10, BaseBBID)) 48 return createProfileParseError( 49 Twine("unable to parse BB id: '" + Parts[0]) + 50 "': unsigned integer expected"); 51 unsigned long long CloneID = 0; 52 if (Parts.size() > 1 && getAsUnsignedInteger(Parts[1], 10, CloneID)) 53 return createProfileParseError(Twine("unable to parse clone id: '") + 54 Parts[1] + "': unsigned integer expected"); 55 return UniqueBBID{static_cast<unsigned>(BaseBBID), 56 static_cast<unsigned>(CloneID)}; 57 } 58 59 bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { 60 return getClusterInfoForFunction(FuncName).first; 61 } 62 63 std::pair<bool, SmallVector<BBClusterInfo>> 64 BasicBlockSectionsProfileReader::getClusterInfoForFunction( 65 StringRef FuncName) const { 66 auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName)); 67 return R != ProgramPathAndClusterInfo.end() 68 ? std::pair(true, R->second.ClusterInfo) 69 : std::pair(false, SmallVector<BBClusterInfo>()); 70 } 71 72 SmallVector<SmallVector<unsigned>> 73 BasicBlockSectionsProfileReader::getClonePathsForFunction( 74 StringRef FuncName) const { 75 return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).ClonePaths; 76 } 77 78 // Reads the version 1 basic block sections profile. Profile for each function 79 // is encoded as follows: 80 // m <module_name> 81 // f <function_name_1> <function_name_2> ... 82 // c <bb_id_1> <bb_id_2> <bb_id_3> 83 // c <bb_id_4> <bb_id_5> 84 // ... 85 // Module name specifier (starting with 'm') is optional and allows 86 // distinguishing profile for internal-linkage functions with the same name. If 87 // not specified, it will apply to any function with the same name. Function 88 // name specifier (starting with 'f') can specify multiple function name 89 // aliases. Basic block clusters are specified by 'c' and specify the cluster of 90 // basic blocks, and the internal order in which they must be placed in the same 91 // section. 92 // This profile can also specify cloning paths which instruct the compiler to 93 // clone basic blocks along a path. The cloned blocks are then specified in the 94 // cluster information. 95 // The following profile lists two cloning paths (starting with 'p') for 96 // function bar and places the total 9 blocks within two clusters. The first two 97 // blocks of a cloning path specify the edge along which the path is cloned. For 98 // instance, path 1 (1 -> 3 -> 4) instructs that 3 and 4 must be cloned along 99 // the edge 1->3. Within the given clusters, each cloned block is identified by 100 // "<original block id>.<clone id>". For instance, 3.1 represents the first 101 // clone of block 3. Original blocks are specified just with their block ids. A 102 // block cloned multiple times appears with distinct clone ids. The CFG for bar 103 // is shown below before and after cloning with its final clusters labeled. 104 // 105 // f main 106 // f bar 107 // p 1 3 4 # cloning path 1 108 // p 4 2 # cloning path 2 109 // c 1 3.1 4.1 6 # basic block cluster 1 110 // c 0 2 3 4 2.1 5 # basic block cluster 2 111 // **************************************************************************** 112 // function bar before and after cloning with basic block clusters shown. 113 // **************************************************************************** 114 // .... .............. 115 // 0 -------+ : 0 :---->: 1 ---> 3.1 : 116 // | | : | : :........ | : 117 // v v : v : : v : 118 // +--> 2 --> 5 1 ~~~~~~> +---: 2 : : 4.1: clsuter 1 119 // | | | | : | : : | : 120 // | v | | : v ....... : v : 121 // | 3 <------+ | : 3 <--+ : : 6 : 122 // | | | : | | : :....: 123 // | v | : v | : 124 // +--- 4 ---> 6 | : 4 | : 125 // | : | | : 126 // | : v | : 127 // | :2.1---+ : cluster 2 128 // | : | ......: 129 // | : v : 130 // +-->: 5 : 131 // .... 132 // **************************************************************************** 133 Error BasicBlockSectionsProfileReader::ReadV1Profile() { 134 auto FI = ProgramPathAndClusterInfo.end(); 135 136 // Current cluster ID corresponding to this function. 137 unsigned CurrentCluster = 0; 138 // Current position in the current cluster. 139 unsigned CurrentPosition = 0; 140 141 // Temporary set to ensure every basic block ID appears once in the clusters 142 // of a function. 143 DenseSet<UniqueBBID> FuncBBIDs; 144 145 // Debug-info-based module filename for the current function. Empty string 146 // means no filename. 147 StringRef DIFilename; 148 149 for (; !LineIt.is_at_eof(); ++LineIt) { 150 StringRef S(*LineIt); 151 char Specifier = S[0]; 152 S = S.drop_front().trim(); 153 SmallVector<StringRef, 4> Values; 154 S.split(Values, ' '); 155 switch (Specifier) { 156 case '@': 157 continue; 158 case 'm': // Module name speicifer. 159 if (Values.size() != 1) { 160 return createProfileParseError(Twine("invalid module name value: '") + 161 S + "'"); 162 } 163 DIFilename = sys::path::remove_leading_dotslash(Values[0]); 164 continue; 165 case 'f': { // Function names specifier. 166 bool FunctionFound = any_of(Values, [&](StringRef Alias) { 167 auto It = FunctionNameToDIFilename.find(Alias); 168 // No match if this function name is not found in this module. 169 if (It == FunctionNameToDIFilename.end()) 170 return false; 171 // Return a match if debug-info-filename is not specified. Otherwise, 172 // check for equality. 173 return DIFilename.empty() || It->second.equals(DIFilename); 174 }); 175 if (!FunctionFound) { 176 // Skip the following profile by setting the profile iterator (FI) to 177 // the past-the-end element. 178 FI = ProgramPathAndClusterInfo.end(); 179 DIFilename = ""; 180 continue; 181 } 182 for (size_t i = 1; i < Values.size(); ++i) 183 FuncAliasMap.try_emplace(Values[i], Values.front()); 184 185 // Prepare for parsing clusters of this function name. 186 // Start a new cluster map for this function name. 187 auto R = ProgramPathAndClusterInfo.try_emplace(Values.front()); 188 // Report error when multiple profiles have been specified for the same 189 // function. 190 if (!R.second) 191 return createProfileParseError("duplicate profile for function '" + 192 Values.front() + "'"); 193 FI = R.first; 194 CurrentCluster = 0; 195 FuncBBIDs.clear(); 196 // We won't need DIFilename anymore. Clean it up to avoid its application 197 // on the next function. 198 DIFilename = ""; 199 continue; 200 } 201 case 'c': // Basic block cluster specifier. 202 // Skip the profile when we the profile iterator (FI) refers to the 203 // past-the-end element. 204 if (FI == ProgramPathAndClusterInfo.end()) 205 continue; 206 // Reset current cluster position. 207 CurrentPosition = 0; 208 for (auto BasicBlockIDStr : Values) { 209 auto BasicBlockID = parseUniqueBBID(BasicBlockIDStr); 210 if (!BasicBlockID) 211 return BasicBlockID.takeError(); 212 if (!FuncBBIDs.insert(*BasicBlockID).second) 213 return createProfileParseError( 214 Twine("duplicate basic block id found '") + BasicBlockIDStr + 215 "'"); 216 217 if (!BasicBlockID->BaseID && CurrentPosition) 218 return createProfileParseError( 219 "entry BB (0) does not begin a cluster."); 220 221 FI->second.ClusterInfo.emplace_back(BBClusterInfo{ 222 *std::move(BasicBlockID), CurrentCluster, CurrentPosition++}); 223 } 224 CurrentCluster++; 225 continue; 226 case 'p': { // Basic block cloning path specifier. 227 // Skip the profile when we the profile iterator (FI) refers to the 228 // past-the-end element. 229 if (FI == ProgramPathAndClusterInfo.end()) 230 continue; 231 SmallSet<unsigned, 5> BBsInPath; 232 FI->second.ClonePaths.push_back({}); 233 for (size_t I = 0; I < Values.size(); ++I) { 234 auto BaseBBIDStr = Values[I]; 235 unsigned long long BaseBBID = 0; 236 if (getAsUnsignedInteger(BaseBBIDStr, 10, BaseBBID)) 237 return createProfileParseError(Twine("unsigned integer expected: '") + 238 BaseBBIDStr + "'"); 239 if (I != 0 && !BBsInPath.insert(BaseBBID).second) 240 return createProfileParseError( 241 Twine("duplicate cloned block in path: '") + BaseBBIDStr + "'"); 242 FI->second.ClonePaths.back().push_back(BaseBBID); 243 } 244 continue; 245 } 246 default: 247 return createProfileParseError(Twine("invalid specifier: '") + 248 Twine(Specifier) + "'"); 249 } 250 llvm_unreachable("should not break from this switch statement"); 251 } 252 return Error::success(); 253 } 254 255 Error BasicBlockSectionsProfileReader::ReadV0Profile() { 256 auto FI = ProgramPathAndClusterInfo.end(); 257 // Current cluster ID corresponding to this function. 258 unsigned CurrentCluster = 0; 259 // Current position in the current cluster. 260 unsigned CurrentPosition = 0; 261 262 // Temporary set to ensure every basic block ID appears once in the clusters 263 // of a function. 264 SmallSet<unsigned, 4> FuncBBIDs; 265 266 for (; !LineIt.is_at_eof(); ++LineIt) { 267 StringRef S(*LineIt); 268 if (S[0] == '@') 269 continue; 270 // Check for the leading "!" 271 if (!S.consume_front("!") || S.empty()) 272 break; 273 // Check for second "!" which indicates a cluster of basic blocks. 274 if (S.consume_front("!")) { 275 // Skip the profile when we the profile iterator (FI) refers to the 276 // past-the-end element. 277 if (FI == ProgramPathAndClusterInfo.end()) 278 continue; 279 SmallVector<StringRef, 4> BBIDs; 280 S.split(BBIDs, ' '); 281 // Reset current cluster position. 282 CurrentPosition = 0; 283 for (auto BBIDStr : BBIDs) { 284 unsigned long long BBID; 285 if (getAsUnsignedInteger(BBIDStr, 10, BBID)) 286 return createProfileParseError(Twine("unsigned integer expected: '") + 287 BBIDStr + "'"); 288 if (!FuncBBIDs.insert(BBID).second) 289 return createProfileParseError( 290 Twine("duplicate basic block id found '") + BBIDStr + "'"); 291 if (BBID == 0 && CurrentPosition) 292 return createProfileParseError( 293 "entry BB (0) does not begin a cluster"); 294 295 FI->second.ClusterInfo.emplace_back( 296 BBClusterInfo({{static_cast<unsigned>(BBID), 0}, 297 CurrentCluster, 298 CurrentPosition++})); 299 } 300 CurrentCluster++; 301 } else { 302 // This is a function name specifier. It may include a debug info filename 303 // specifier starting with `M=`. 304 auto [AliasesStr, DIFilenameStr] = S.split(' '); 305 SmallString<128> DIFilename; 306 if (DIFilenameStr.starts_with("M=")) { 307 DIFilename = 308 sys::path::remove_leading_dotslash(DIFilenameStr.substr(2)); 309 if (DIFilename.empty()) 310 return createProfileParseError("empty module name specifier"); 311 } else if (!DIFilenameStr.empty()) { 312 return createProfileParseError("unknown string found: '" + 313 DIFilenameStr + "'"); 314 } 315 // Function aliases are separated using '/'. We use the first function 316 // name for the cluster info mapping and delegate all other aliases to 317 // this one. 318 SmallVector<StringRef, 4> Aliases; 319 AliasesStr.split(Aliases, '/'); 320 bool FunctionFound = any_of(Aliases, [&](StringRef Alias) { 321 auto It = FunctionNameToDIFilename.find(Alias); 322 // No match if this function name is not found in this module. 323 if (It == FunctionNameToDIFilename.end()) 324 return false; 325 // Return a match if debug-info-filename is not specified. Otherwise, 326 // check for equality. 327 return DIFilename.empty() || It->second.equals(DIFilename); 328 }); 329 if (!FunctionFound) { 330 // Skip the following profile by setting the profile iterator (FI) to 331 // the past-the-end element. 332 FI = ProgramPathAndClusterInfo.end(); 333 continue; 334 } 335 for (size_t i = 1; i < Aliases.size(); ++i) 336 FuncAliasMap.try_emplace(Aliases[i], Aliases.front()); 337 338 // Prepare for parsing clusters of this function name. 339 // Start a new cluster map for this function name. 340 auto R = ProgramPathAndClusterInfo.try_emplace(Aliases.front()); 341 // Report error when multiple profiles have been specified for the same 342 // function. 343 if (!R.second) 344 return createProfileParseError("duplicate profile for function '" + 345 Aliases.front() + "'"); 346 FI = R.first; 347 CurrentCluster = 0; 348 FuncBBIDs.clear(); 349 } 350 } 351 return Error::success(); 352 } 353 354 // Basic Block Sections can be enabled for a subset of machine basic blocks. 355 // This is done by passing a file containing names of functions for which basic 356 // block sections are desired. Additionally, machine basic block ids of the 357 // functions can also be specified for a finer granularity. Moreover, a cluster 358 // of basic blocks could be assigned to the same section. 359 // Optionally, a debug-info filename can be specified for each function to allow 360 // distinguishing internal-linkage functions of the same name. 361 // A file with basic block sections for all of function main and three blocks 362 // for function foo (of which 1 and 2 are placed in a cluster) looks like this: 363 // (Profile for function foo is only loaded when its debug-info filename 364 // matches 'path/to/foo_file.cc'). 365 // ---------------------------- 366 // list.txt: 367 // !main 368 // !foo M=path/to/foo_file.cc 369 // !!1 2 370 // !!4 371 Error BasicBlockSectionsProfileReader::ReadProfile() { 372 assert(MBuf); 373 374 unsigned long long Version = 0; 375 StringRef FirstLine(*LineIt); 376 if (FirstLine.consume_front("v")) { 377 if (getAsUnsignedInteger(FirstLine, 10, Version)) { 378 return createProfileParseError(Twine("version number expected: '") + 379 FirstLine + "'"); 380 } 381 if (Version > 1) { 382 return createProfileParseError(Twine("invalid profile version: ") + 383 Twine(Version)); 384 } 385 ++LineIt; 386 } 387 388 switch (Version) { 389 case 0: 390 // TODO: Deprecate V0 once V1 is fully integrated downstream. 391 return ReadV0Profile(); 392 case 1: 393 return ReadV1Profile(); 394 default: 395 llvm_unreachable("Invalid profile version."); 396 } 397 } 398 399 bool BasicBlockSectionsProfileReaderWrapperPass::doInitialization(Module &M) { 400 if (!BBSPR.MBuf) 401 return false; 402 // Get the function name to debug info filename mapping. 403 BBSPR.FunctionNameToDIFilename.clear(); 404 for (const Function &F : M) { 405 SmallString<128> DIFilename; 406 if (F.isDeclaration()) 407 continue; 408 DISubprogram *Subprogram = F.getSubprogram(); 409 if (Subprogram) { 410 llvm::DICompileUnit *CU = Subprogram->getUnit(); 411 if (CU) 412 DIFilename = sys::path::remove_leading_dotslash(CU->getFilename()); 413 } 414 [[maybe_unused]] bool inserted = 415 BBSPR.FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename) 416 .second; 417 assert(inserted); 418 } 419 if (auto Err = BBSPR.ReadProfile()) 420 report_fatal_error(std::move(Err)); 421 return false; 422 } 423 424 AnalysisKey BasicBlockSectionsProfileReaderAnalysis::Key; 425 426 BasicBlockSectionsProfileReader 427 BasicBlockSectionsProfileReaderAnalysis::run(Function &F, 428 FunctionAnalysisManager &AM) { 429 return BasicBlockSectionsProfileReader(TM->getBBSectionsFuncListBuf()); 430 } 431 432 bool BasicBlockSectionsProfileReaderWrapperPass::isFunctionHot( 433 StringRef FuncName) const { 434 return BBSPR.isFunctionHot(FuncName); 435 } 436 437 std::pair<bool, SmallVector<BBClusterInfo>> 438 BasicBlockSectionsProfileReaderWrapperPass::getClusterInfoForFunction( 439 StringRef FuncName) const { 440 return BBSPR.getClusterInfoForFunction(FuncName); 441 } 442 443 SmallVector<SmallVector<unsigned>> 444 BasicBlockSectionsProfileReaderWrapperPass::getClonePathsForFunction( 445 StringRef FuncName) const { 446 return BBSPR.getClonePathsForFunction(FuncName); 447 } 448 449 BasicBlockSectionsProfileReader & 450 BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() { 451 return BBSPR; 452 } 453 454 ImmutablePass *llvm::createBasicBlockSectionsProfileReaderWrapperPass( 455 const MemoryBuffer *Buf) { 456 return new BasicBlockSectionsProfileReaderWrapperPass(Buf); 457 } 458