xref: /freebsd/contrib/llvm-project/llvm/lib/XRay/Profile.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Defines the XRay Profile class representing the latency profile generated by
10 // XRay's profiling mode.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "llvm/XRay/Profile.h"
14 
15 #include "llvm/Support/DataExtractor.h"
16 #include "llvm/Support/Error.h"
17 #include "llvm/Support/FileSystem.h"
18 #include "llvm/XRay/Trace.h"
19 #include <memory>
20 
21 namespace llvm {
22 namespace xray {
23 
Profile(const Profile & O)24 Profile::Profile(const Profile &O) {
25   // We need to re-create all the tries from the original (O), into the current
26   // Profile being initialized, through the Block instances we see.
27   for (const auto &Block : O) {
28     Blocks.push_back({Block.Thread, {}});
29     auto &B = Blocks.back();
30     for (const auto &PathData : Block.PathData)
31       B.PathData.push_back({internPath(cantFail(O.expandPath(PathData.first))),
32                             PathData.second});
33   }
34 }
35 
operator =(const Profile & O)36 Profile &Profile::operator=(const Profile &O) {
37   Profile P = O;
38   *this = std::move(P);
39   return *this;
40 }
41 
42 namespace {
43 
44 struct BlockHeader {
45   uint32_t Size;
46   uint32_t Number;
47   uint64_t Thread;
48 };
49 
readBlockHeader(DataExtractor & Extractor,uint64_t & Offset)50 static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
51                                              uint64_t &Offset) {
52   BlockHeader H;
53   uint64_t CurrentOffset = Offset;
54   H.Size = Extractor.getU32(&Offset);
55   if (Offset == CurrentOffset)
56     return make_error<StringError>(
57         Twine("Error parsing block header size at offset '") +
58             Twine(CurrentOffset) + "'",
59         std::make_error_code(std::errc::invalid_argument));
60   CurrentOffset = Offset;
61   H.Number = Extractor.getU32(&Offset);
62   if (Offset == CurrentOffset)
63     return make_error<StringError>(
64         Twine("Error parsing block header number at offset '") +
65             Twine(CurrentOffset) + "'",
66         std::make_error_code(std::errc::invalid_argument));
67   CurrentOffset = Offset;
68   H.Thread = Extractor.getU64(&Offset);
69   if (Offset == CurrentOffset)
70     return make_error<StringError>(
71         Twine("Error parsing block header thread id at offset '") +
72             Twine(CurrentOffset) + "'",
73         std::make_error_code(std::errc::invalid_argument));
74   return H;
75 }
76 
readPath(DataExtractor & Extractor,uint64_t & Offset)77 static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor,
78                                                        uint64_t &Offset) {
79   // We're reading a sequence of int32_t's until we find a 0.
80   std::vector<Profile::FuncID> Path;
81   auto CurrentOffset = Offset;
82   int32_t FuncId;
83   do {
84     FuncId = Extractor.getSigned(&Offset, 4);
85     if (CurrentOffset == Offset)
86       return make_error<StringError>(
87           Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'",
88           std::make_error_code(std::errc::invalid_argument));
89     CurrentOffset = Offset;
90     Path.push_back(FuncId);
91   } while (FuncId != 0);
92   return std::move(Path);
93 }
94 
readData(DataExtractor & Extractor,uint64_t & Offset)95 static Expected<Profile::Data> readData(DataExtractor &Extractor,
96                                         uint64_t &Offset) {
97   // We expect a certain number of elements for Data:
98   //   - A 64-bit CallCount
99   //   - A 64-bit CumulativeLocalTime counter
100   Profile::Data D;
101   auto CurrentOffset = Offset;
102   D.CallCount = Extractor.getU64(&Offset);
103   if (CurrentOffset == Offset)
104     return make_error<StringError>(
105         Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) +
106             "'",
107         std::make_error_code(std::errc::invalid_argument));
108   CurrentOffset = Offset;
109   D.CumulativeLocalTime = Extractor.getU64(&Offset);
110   if (CurrentOffset == Offset)
111     return make_error<StringError>(
112         Twine("Error parsing cumulative local time at offset '") +
113             Twine(CurrentOffset) + "'",
114         std::make_error_code(std::errc::invalid_argument));
115   return D;
116 }
117 
118 } // namespace
119 
addBlock(Block && B)120 Error Profile::addBlock(Block &&B) {
121   if (B.PathData.empty())
122     return make_error<StringError>(
123         "Block may not have empty path data.",
124         std::make_error_code(std::errc::invalid_argument));
125 
126   Blocks.emplace_back(std::move(B));
127   return Error::success();
128 }
129 
expandPath(PathID P) const130 Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const {
131   auto It = PathIDMap.find(P);
132   if (It == PathIDMap.end())
133     return make_error<StringError>(
134         Twine("PathID not found: ") + Twine(P),
135         std::make_error_code(std::errc::invalid_argument));
136   std::vector<Profile::FuncID> Path;
137   for (auto Node = It->second; Node; Node = Node->Caller)
138     Path.push_back(Node->Func);
139   return std::move(Path);
140 }
141 
internPath(ArrayRef<FuncID> P)142 Profile::PathID Profile::internPath(ArrayRef<FuncID> P) {
143   if (P.empty())
144     return 0;
145 
146   auto RootToLeafPath = reverse(P);
147 
148   // Find the root.
149   auto It = RootToLeafPath.begin();
150   auto PathRoot = *It++;
151   auto RootIt =
152       find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; });
153 
154   // If we've not seen this root before, remember it.
155   TrieNode *Node = nullptr;
156   if (RootIt == Roots.end()) {
157     NodeStorage.emplace_back();
158     Node = &NodeStorage.back();
159     Node->Func = PathRoot;
160     Roots.push_back(Node);
161   } else {
162     Node = *RootIt;
163   }
164 
165   // Now traverse the path, re-creating if necessary.
166   while (It != RootToLeafPath.end()) {
167     auto NodeFuncID = *It++;
168     auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) {
169       return N->Func == NodeFuncID;
170     });
171     if (CalleeIt == Node->Callees.end()) {
172       NodeStorage.emplace_back();
173       auto NewNode = &NodeStorage.back();
174       NewNode->Func = NodeFuncID;
175       NewNode->Caller = Node;
176       Node->Callees.push_back(NewNode);
177       Node = NewNode;
178     } else {
179       Node = *CalleeIt;
180     }
181   }
182 
183   // At this point, Node *must* be pointing at the leaf.
184   assert(Node->Func == P.front());
185   if (Node->ID == 0) {
186     Node->ID = NextID++;
187     PathIDMap.insert({Node->ID, Node});
188   }
189   return Node->ID;
190 }
191 
mergeProfilesByThread(const Profile & L,const Profile & R)192 Profile mergeProfilesByThread(const Profile &L, const Profile &R) {
193   Profile Merged;
194   using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
195   using PathDataMapPtr = std::unique_ptr<PathDataMap>;
196   using PathDataVector = decltype(Profile::Block::PathData);
197   using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>;
198   ThreadProfileIndexMap ThreadProfileIndex;
199 
200   for (const auto &P : {std::ref(L), std::ref(R)})
201     for (const auto &Block : P.get()) {
202       ThreadProfileIndexMap::iterator It;
203       std::tie(It, std::ignore) = ThreadProfileIndex.insert(
204           {Block.Thread, std::make_unique<PathDataMap>()});
205       for (const auto &PathAndData : Block.PathData) {
206         auto &PathID = PathAndData.first;
207         auto &Data = PathAndData.second;
208         auto NewPathID =
209             Merged.internPath(cantFail(P.get().expandPath(PathID)));
210         PathDataMap::iterator PathDataIt;
211         bool Inserted;
212         std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data});
213         if (!Inserted) {
214           auto &ExistingData = PathDataIt->second;
215           ExistingData.CallCount += Data.CallCount;
216           ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
217         }
218       }
219     }
220 
221   for (const auto &IndexedThreadBlock : ThreadProfileIndex) {
222     PathDataVector PathAndData;
223     PathAndData.reserve(IndexedThreadBlock.second->size());
224     copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData));
225     cantFail(
226         Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)}));
227   }
228   return Merged;
229 }
230 
mergeProfilesByStack(const Profile & L,const Profile & R)231 Profile mergeProfilesByStack(const Profile &L, const Profile &R) {
232   Profile Merged;
233   using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
234   PathDataMap PathData;
235   using PathDataVector = decltype(Profile::Block::PathData);
236   for (const auto &P : {std::ref(L), std::ref(R)})
237     for (const auto &Block : P.get())
238       for (const auto &PathAndData : Block.PathData) {
239         auto &PathId = PathAndData.first;
240         auto &Data = PathAndData.second;
241         auto NewPathID =
242             Merged.internPath(cantFail(P.get().expandPath(PathId)));
243         PathDataMap::iterator PathDataIt;
244         bool Inserted;
245         std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data});
246         if (!Inserted) {
247           auto &ExistingData = PathDataIt->second;
248           ExistingData.CallCount += Data.CallCount;
249           ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
250         }
251       }
252 
253   // In the end there's a single Block, for thread 0.
254   PathDataVector Block;
255   Block.reserve(PathData.size());
256   copy(PathData, std::back_inserter(Block));
257   cantFail(Merged.addBlock({0, std::move(Block)}));
258   return Merged;
259 }
260 
loadProfile(StringRef Filename)261 Expected<Profile> loadProfile(StringRef Filename) {
262   Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename);
263   if (!FdOrErr)
264     return FdOrErr.takeError();
265 
266   uint64_t FileSize;
267   if (auto EC = sys::fs::file_size(Filename, FileSize))
268     return make_error<StringError>(
269         Twine("Cannot get filesize of '") + Filename + "'", EC);
270 
271   std::error_code EC;
272   sys::fs::mapped_file_region MappedFile(
273       *FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0,
274       EC);
275   sys::fs::closeFile(*FdOrErr);
276   if (EC)
277     return make_error<StringError>(
278         Twine("Cannot mmap profile '") + Filename + "'", EC);
279   StringRef Data(MappedFile.data(), MappedFile.size());
280 
281   Profile P;
282   uint64_t Offset = 0;
283   DataExtractor Extractor(Data, true, 8);
284 
285   // For each block we get from the file:
286   while (Offset != MappedFile.size()) {
287     auto HeaderOrError = readBlockHeader(Extractor, Offset);
288     if (!HeaderOrError)
289       return HeaderOrError.takeError();
290 
291     // TODO: Maybe store this header information for each block, even just for
292     // debugging?
293     const auto &Header = HeaderOrError.get();
294 
295     // Read in the path data.
296     auto PathOrError = readPath(Extractor, Offset);
297     if (!PathOrError)
298       return PathOrError.takeError();
299     const auto &Path = PathOrError.get();
300 
301     // For each path we encounter, we should intern it to get a PathID.
302     auto DataOrError = readData(Extractor, Offset);
303     if (!DataOrError)
304       return DataOrError.takeError();
305     auto &Data = DataOrError.get();
306 
307     if (auto E =
308             P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread},
309                                       {{P.internPath(Path), std::move(Data)}}}))
310       return std::move(E);
311   }
312 
313   return P;
314 }
315 
316 namespace {
317 
318 struct StackEntry {
319   uint64_t Timestamp;
320   Profile::FuncID FuncId;
321 };
322 
323 } // namespace
324 
profileFromTrace(const Trace & T)325 Expected<Profile> profileFromTrace(const Trace &T) {
326   Profile P;
327 
328   // The implementation of the algorithm re-creates the execution of
329   // the functions based on the trace data. To do this, we set up a number of
330   // data structures to track the execution context of every thread in the
331   // Trace.
332   DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks;
333   DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>>
334       ThreadPathData;
335 
336   //  We then do a pass through the Trace to account data on a per-thread-basis.
337   for (const auto &E : T) {
338     auto &TSD = ThreadStacks[E.TId];
339     switch (E.Type) {
340     case RecordTypes::ENTER:
341     case RecordTypes::ENTER_ARG:
342 
343       // Push entries into the function call stack.
344       TSD.push_back({E.TSC, E.FuncId});
345       break;
346 
347     case RecordTypes::EXIT:
348     case RecordTypes::TAIL_EXIT:
349 
350       // Exits cause some accounting to happen, based on the state of the stack.
351       // For each function we pop off the stack, we take note of the path and
352       // record the cumulative state for this path. As we're doing this, we
353       // intern the path into the Profile.
354       while (!TSD.empty()) {
355         auto Top = TSD.back();
356         auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC);
357         SmallVector<Profile::FuncID, 16> Path;
358         transform(reverse(TSD), std::back_inserter(Path),
359                   std::mem_fn(&StackEntry::FuncId));
360         auto InternedPath = P.internPath(Path);
361         auto &TPD = ThreadPathData[E.TId][InternedPath];
362         ++TPD.CallCount;
363         TPD.CumulativeLocalTime += FunctionLocalTime;
364         TSD.pop_back();
365 
366         // If we've matched the corresponding entry event for this function,
367         // then we exit the loop.
368         if (Top.FuncId == E.FuncId)
369           break;
370 
371         // FIXME: Consider the intermediate times and the cumulative tree time
372         // as well.
373       }
374 
375       break;
376 
377     case RecordTypes::CUSTOM_EVENT:
378     case RecordTypes::TYPED_EVENT:
379       // TODO: Support an extension point to allow handling of custom and typed
380       // events in profiles.
381       break;
382     }
383   }
384 
385   // Once we've gone through the Trace, we now create one Block per thread in
386   // the Profile.
387   for (const auto &ThreadPaths : ThreadPathData) {
388     const auto &TID = ThreadPaths.first;
389     const auto &PathsData = ThreadPaths.second;
390     if (auto E = P.addBlock({
391             TID,
392             std::vector<std::pair<Profile::PathID, Profile::Data>>(
393                 PathsData.begin(), PathsData.end()),
394         }))
395       return std::move(E);
396   }
397 
398   return P;
399 }
400 
401 } // namespace xray
402 } // namespace llvm
403