1 //===- Profile.cpp - XRay Profile Abstraction -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Defines the XRay Profile class representing the latency profile generated by 10 // XRay's profiling mode. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "llvm/XRay/Profile.h" 14 15 #include "llvm/Support/DataExtractor.h" 16 #include "llvm/Support/Error.h" 17 #include "llvm/Support/FileSystem.h" 18 #include "llvm/XRay/Trace.h" 19 #include <deque> 20 #include <memory> 21 22 namespace llvm { 23 namespace xray { 24 25 Profile::Profile(const Profile &O) { 26 // We need to re-create all the tries from the original (O), into the current 27 // Profile being initialized, through the Block instances we see. 28 for (const auto &Block : O) { 29 Blocks.push_back({Block.Thread, {}}); 30 auto &B = Blocks.back(); 31 for (const auto &PathData : Block.PathData) 32 B.PathData.push_back({internPath(cantFail(O.expandPath(PathData.first))), 33 PathData.second}); 34 } 35 } 36 37 Profile &Profile::operator=(const Profile &O) { 38 Profile P = O; 39 *this = std::move(P); 40 return *this; 41 } 42 43 namespace { 44 45 struct BlockHeader { 46 uint32_t Size; 47 uint32_t Number; 48 uint64_t Thread; 49 }; 50 51 static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor, 52 uint32_t &Offset) { 53 BlockHeader H; 54 uint32_t CurrentOffset = Offset; 55 H.Size = Extractor.getU32(&Offset); 56 if (Offset == CurrentOffset) 57 return make_error<StringError>( 58 Twine("Error parsing block header size at offset '") + 59 Twine(CurrentOffset) + "'", 60 std::make_error_code(std::errc::invalid_argument)); 61 CurrentOffset = Offset; 62 H.Number = Extractor.getU32(&Offset); 63 if (Offset == CurrentOffset) 64 return make_error<StringError>( 65 Twine("Error parsing block header number at offset '") + 66 Twine(CurrentOffset) + "'", 67 std::make_error_code(std::errc::invalid_argument)); 68 CurrentOffset = Offset; 69 H.Thread = Extractor.getU64(&Offset); 70 if (Offset == CurrentOffset) 71 return make_error<StringError>( 72 Twine("Error parsing block header thread id at offset '") + 73 Twine(CurrentOffset) + "'", 74 std::make_error_code(std::errc::invalid_argument)); 75 return H; 76 } 77 78 static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor, 79 uint32_t &Offset) { 80 // We're reading a sequence of int32_t's until we find a 0. 81 std::vector<Profile::FuncID> Path; 82 auto CurrentOffset = Offset; 83 int32_t FuncId; 84 do { 85 FuncId = Extractor.getSigned(&Offset, 4); 86 if (CurrentOffset == Offset) 87 return make_error<StringError>( 88 Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'", 89 std::make_error_code(std::errc::invalid_argument)); 90 CurrentOffset = Offset; 91 Path.push_back(FuncId); 92 } while (FuncId != 0); 93 return std::move(Path); 94 } 95 96 static Expected<Profile::Data> readData(DataExtractor &Extractor, 97 uint32_t &Offset) { 98 // We expect a certain number of elements for Data: 99 // - A 64-bit CallCount 100 // - A 64-bit CumulativeLocalTime counter 101 Profile::Data D; 102 auto CurrentOffset = Offset; 103 D.CallCount = Extractor.getU64(&Offset); 104 if (CurrentOffset == Offset) 105 return make_error<StringError>( 106 Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) + 107 "'", 108 std::make_error_code(std::errc::invalid_argument)); 109 CurrentOffset = Offset; 110 D.CumulativeLocalTime = Extractor.getU64(&Offset); 111 if (CurrentOffset == Offset) 112 return make_error<StringError>( 113 Twine("Error parsing cumulative local time at offset '") + 114 Twine(CurrentOffset) + "'", 115 std::make_error_code(std::errc::invalid_argument)); 116 return D; 117 } 118 119 } // namespace 120 121 Error Profile::addBlock(Block &&B) { 122 if (B.PathData.empty()) 123 return make_error<StringError>( 124 "Block may not have empty path data.", 125 std::make_error_code(std::errc::invalid_argument)); 126 127 Blocks.emplace_back(std::move(B)); 128 return Error::success(); 129 } 130 131 Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const { 132 auto It = PathIDMap.find(P); 133 if (It == PathIDMap.end()) 134 return make_error<StringError>( 135 Twine("PathID not found: ") + Twine(P), 136 std::make_error_code(std::errc::invalid_argument)); 137 std::vector<Profile::FuncID> Path; 138 for (auto Node = It->second; Node; Node = Node->Caller) 139 Path.push_back(Node->Func); 140 return std::move(Path); 141 } 142 143 Profile::PathID Profile::internPath(ArrayRef<FuncID> P) { 144 if (P.empty()) 145 return 0; 146 147 auto RootToLeafPath = reverse(P); 148 149 // Find the root. 150 auto It = RootToLeafPath.begin(); 151 auto PathRoot = *It++; 152 auto RootIt = 153 find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; }); 154 155 // If we've not seen this root before, remember it. 156 TrieNode *Node = nullptr; 157 if (RootIt == Roots.end()) { 158 NodeStorage.emplace_back(); 159 Node = &NodeStorage.back(); 160 Node->Func = PathRoot; 161 Roots.push_back(Node); 162 } else { 163 Node = *RootIt; 164 } 165 166 // Now traverse the path, re-creating if necessary. 167 while (It != RootToLeafPath.end()) { 168 auto NodeFuncID = *It++; 169 auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) { 170 return N->Func == NodeFuncID; 171 }); 172 if (CalleeIt == Node->Callees.end()) { 173 NodeStorage.emplace_back(); 174 auto NewNode = &NodeStorage.back(); 175 NewNode->Func = NodeFuncID; 176 NewNode->Caller = Node; 177 Node->Callees.push_back(NewNode); 178 Node = NewNode; 179 } else { 180 Node = *CalleeIt; 181 } 182 } 183 184 // At this point, Node *must* be pointing at the leaf. 185 assert(Node->Func == P.front()); 186 if (Node->ID == 0) { 187 Node->ID = NextID++; 188 PathIDMap.insert({Node->ID, Node}); 189 } 190 return Node->ID; 191 } 192 193 Profile mergeProfilesByThread(const Profile &L, const Profile &R) { 194 Profile Merged; 195 using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; 196 using PathDataMapPtr = std::unique_ptr<PathDataMap>; 197 using PathDataVector = decltype(Profile::Block::PathData); 198 using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>; 199 ThreadProfileIndexMap ThreadProfileIndex; 200 201 for (const auto &P : {std::ref(L), std::ref(R)}) 202 for (const auto &Block : P.get()) { 203 ThreadProfileIndexMap::iterator It; 204 std::tie(It, std::ignore) = ThreadProfileIndex.insert( 205 {Block.Thread, PathDataMapPtr{new PathDataMap()}}); 206 for (const auto &PathAndData : Block.PathData) { 207 auto &PathID = PathAndData.first; 208 auto &Data = PathAndData.second; 209 auto NewPathID = 210 Merged.internPath(cantFail(P.get().expandPath(PathID))); 211 PathDataMap::iterator PathDataIt; 212 bool Inserted; 213 std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data}); 214 if (!Inserted) { 215 auto &ExistingData = PathDataIt->second; 216 ExistingData.CallCount += Data.CallCount; 217 ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; 218 } 219 } 220 } 221 222 for (const auto &IndexedThreadBlock : ThreadProfileIndex) { 223 PathDataVector PathAndData; 224 PathAndData.reserve(IndexedThreadBlock.second->size()); 225 copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData)); 226 cantFail( 227 Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)})); 228 } 229 return Merged; 230 } 231 232 Profile mergeProfilesByStack(const Profile &L, const Profile &R) { 233 Profile Merged; 234 using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; 235 PathDataMap PathData; 236 using PathDataVector = decltype(Profile::Block::PathData); 237 for (const auto &P : {std::ref(L), std::ref(R)}) 238 for (const auto &Block : P.get()) 239 for (const auto &PathAndData : Block.PathData) { 240 auto &PathId = PathAndData.first; 241 auto &Data = PathAndData.second; 242 auto NewPathID = 243 Merged.internPath(cantFail(P.get().expandPath(PathId))); 244 PathDataMap::iterator PathDataIt; 245 bool Inserted; 246 std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data}); 247 if (!Inserted) { 248 auto &ExistingData = PathDataIt->second; 249 ExistingData.CallCount += Data.CallCount; 250 ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; 251 } 252 } 253 254 // In the end there's a single Block, for thread 0. 255 PathDataVector Block; 256 Block.reserve(PathData.size()); 257 copy(PathData, std::back_inserter(Block)); 258 cantFail(Merged.addBlock({0, std::move(Block)})); 259 return Merged; 260 } 261 262 Expected<Profile> loadProfile(StringRef Filename) { 263 Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename); 264 if (!FdOrErr) 265 return FdOrErr.takeError(); 266 267 uint64_t FileSize; 268 if (auto EC = sys::fs::file_size(Filename, FileSize)) 269 return make_error<StringError>( 270 Twine("Cannot get filesize of '") + Filename + "'", EC); 271 272 std::error_code EC; 273 sys::fs::mapped_file_region MappedFile( 274 *FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, 275 EC); 276 sys::fs::closeFile(*FdOrErr); 277 if (EC) 278 return make_error<StringError>( 279 Twine("Cannot mmap profile '") + Filename + "'", EC); 280 StringRef Data(MappedFile.data(), MappedFile.size()); 281 282 Profile P; 283 uint32_t Offset = 0; 284 DataExtractor Extractor(Data, true, 8); 285 286 // For each block we get from the file: 287 while (Offset != MappedFile.size()) { 288 auto HeaderOrError = readBlockHeader(Extractor, Offset); 289 if (!HeaderOrError) 290 return HeaderOrError.takeError(); 291 292 // TODO: Maybe store this header information for each block, even just for 293 // debugging? 294 const auto &Header = HeaderOrError.get(); 295 296 // Read in the path data. 297 auto PathOrError = readPath(Extractor, Offset); 298 if (!PathOrError) 299 return PathOrError.takeError(); 300 const auto &Path = PathOrError.get(); 301 302 // For each path we encounter, we should intern it to get a PathID. 303 auto DataOrError = readData(Extractor, Offset); 304 if (!DataOrError) 305 return DataOrError.takeError(); 306 auto &Data = DataOrError.get(); 307 308 if (auto E = 309 P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread}, 310 {{P.internPath(Path), std::move(Data)}}})) 311 return std::move(E); 312 } 313 314 return P; 315 } 316 317 namespace { 318 319 struct StackEntry { 320 uint64_t Timestamp; 321 Profile::FuncID FuncId; 322 }; 323 324 } // namespace 325 326 Expected<Profile> profileFromTrace(const Trace &T) { 327 Profile P; 328 329 // The implementation of the algorithm re-creates the execution of 330 // the functions based on the trace data. To do this, we set up a number of 331 // data structures to track the execution context of every thread in the 332 // Trace. 333 DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks; 334 DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>> 335 ThreadPathData; 336 337 // We then do a pass through the Trace to account data on a per-thread-basis. 338 for (const auto &E : T) { 339 auto &TSD = ThreadStacks[E.TId]; 340 switch (E.Type) { 341 case RecordTypes::ENTER: 342 case RecordTypes::ENTER_ARG: 343 344 // Push entries into the function call stack. 345 TSD.push_back({E.TSC, E.FuncId}); 346 break; 347 348 case RecordTypes::EXIT: 349 case RecordTypes::TAIL_EXIT: 350 351 // Exits cause some accounting to happen, based on the state of the stack. 352 // For each function we pop off the stack, we take note of the path and 353 // record the cumulative state for this path. As we're doing this, we 354 // intern the path into the Profile. 355 while (!TSD.empty()) { 356 auto Top = TSD.back(); 357 auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC); 358 SmallVector<Profile::FuncID, 16> Path; 359 transform(reverse(TSD), std::back_inserter(Path), 360 std::mem_fn(&StackEntry::FuncId)); 361 auto InternedPath = P.internPath(Path); 362 auto &TPD = ThreadPathData[E.TId][InternedPath]; 363 ++TPD.CallCount; 364 TPD.CumulativeLocalTime += FunctionLocalTime; 365 TSD.pop_back(); 366 367 // If we've matched the corresponding entry event for this function, 368 // then we exit the loop. 369 if (Top.FuncId == E.FuncId) 370 break; 371 372 // FIXME: Consider the intermediate times and the cumulative tree time 373 // as well. 374 } 375 376 break; 377 378 case RecordTypes::CUSTOM_EVENT: 379 case RecordTypes::TYPED_EVENT: 380 // TODO: Support an extension point to allow handling of custom and typed 381 // events in profiles. 382 break; 383 } 384 } 385 386 // Once we've gone through the Trace, we now create one Block per thread in 387 // the Profile. 388 for (const auto &ThreadPaths : ThreadPathData) { 389 const auto &TID = ThreadPaths.first; 390 const auto &PathsData = ThreadPaths.second; 391 if (auto E = P.addBlock({ 392 TID, 393 std::vector<std::pair<Profile::PathID, Profile::Data>>( 394 PathsData.begin(), PathsData.end()), 395 })) 396 return std::move(E); 397 } 398 399 return P; 400 } 401 402 } // namespace xray 403 } // namespace llvm 404