xref: /freebsd/contrib/llvm-project/llvm/lib/XRay/Profile.cpp (revision a3c35da61bb201168575f1d18f4ca3e96937d35c)
1  //===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // Defines the XRay Profile class representing the latency profile generated by
10  // XRay's profiling mode.
11  //
12  //===----------------------------------------------------------------------===//
13  #include "llvm/XRay/Profile.h"
14  
15  #include "llvm/Support/DataExtractor.h"
16  #include "llvm/Support/Error.h"
17  #include "llvm/Support/FileSystem.h"
18  #include "llvm/XRay/Trace.h"
19  #include <deque>
20  #include <memory>
21  
22  namespace llvm {
23  namespace xray {
24  
25  Profile::Profile(const Profile &O) {
26    // We need to re-create all the tries from the original (O), into the current
27    // Profile being initialized, through the Block instances we see.
28    for (const auto &Block : O) {
29      Blocks.push_back({Block.Thread, {}});
30      auto &B = Blocks.back();
31      for (const auto &PathData : Block.PathData)
32        B.PathData.push_back({internPath(cantFail(O.expandPath(PathData.first))),
33                              PathData.second});
34    }
35  }
36  
37  Profile &Profile::operator=(const Profile &O) {
38    Profile P = O;
39    *this = std::move(P);
40    return *this;
41  }
42  
43  namespace {
44  
45  struct BlockHeader {
46    uint32_t Size;
47    uint32_t Number;
48    uint64_t Thread;
49  };
50  
51  static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
52                                               uint32_t &Offset) {
53    BlockHeader H;
54    uint32_t CurrentOffset = Offset;
55    H.Size = Extractor.getU32(&Offset);
56    if (Offset == CurrentOffset)
57      return make_error<StringError>(
58          Twine("Error parsing block header size at offset '") +
59              Twine(CurrentOffset) + "'",
60          std::make_error_code(std::errc::invalid_argument));
61    CurrentOffset = Offset;
62    H.Number = Extractor.getU32(&Offset);
63    if (Offset == CurrentOffset)
64      return make_error<StringError>(
65          Twine("Error parsing block header number at offset '") +
66              Twine(CurrentOffset) + "'",
67          std::make_error_code(std::errc::invalid_argument));
68    CurrentOffset = Offset;
69    H.Thread = Extractor.getU64(&Offset);
70    if (Offset == CurrentOffset)
71      return make_error<StringError>(
72          Twine("Error parsing block header thread id at offset '") +
73              Twine(CurrentOffset) + "'",
74          std::make_error_code(std::errc::invalid_argument));
75    return H;
76  }
77  
78  static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor,
79                                                         uint32_t &Offset) {
80    // We're reading a sequence of int32_t's until we find a 0.
81    std::vector<Profile::FuncID> Path;
82    auto CurrentOffset = Offset;
83    int32_t FuncId;
84    do {
85      FuncId = Extractor.getSigned(&Offset, 4);
86      if (CurrentOffset == Offset)
87        return make_error<StringError>(
88            Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'",
89            std::make_error_code(std::errc::invalid_argument));
90      CurrentOffset = Offset;
91      Path.push_back(FuncId);
92    } while (FuncId != 0);
93    return std::move(Path);
94  }
95  
96  static Expected<Profile::Data> readData(DataExtractor &Extractor,
97                                          uint32_t &Offset) {
98    // We expect a certain number of elements for Data:
99    //   - A 64-bit CallCount
100    //   - A 64-bit CumulativeLocalTime counter
101    Profile::Data D;
102    auto CurrentOffset = Offset;
103    D.CallCount = Extractor.getU64(&Offset);
104    if (CurrentOffset == Offset)
105      return make_error<StringError>(
106          Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) +
107              "'",
108          std::make_error_code(std::errc::invalid_argument));
109    CurrentOffset = Offset;
110    D.CumulativeLocalTime = Extractor.getU64(&Offset);
111    if (CurrentOffset == Offset)
112      return make_error<StringError>(
113          Twine("Error parsing cumulative local time at offset '") +
114              Twine(CurrentOffset) + "'",
115          std::make_error_code(std::errc::invalid_argument));
116    return D;
117  }
118  
119  } // namespace
120  
121  Error Profile::addBlock(Block &&B) {
122    if (B.PathData.empty())
123      return make_error<StringError>(
124          "Block may not have empty path data.",
125          std::make_error_code(std::errc::invalid_argument));
126  
127    Blocks.emplace_back(std::move(B));
128    return Error::success();
129  }
130  
131  Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const {
132    auto It = PathIDMap.find(P);
133    if (It == PathIDMap.end())
134      return make_error<StringError>(
135          Twine("PathID not found: ") + Twine(P),
136          std::make_error_code(std::errc::invalid_argument));
137    std::vector<Profile::FuncID> Path;
138    for (auto Node = It->second; Node; Node = Node->Caller)
139      Path.push_back(Node->Func);
140    return std::move(Path);
141  }
142  
143  Profile::PathID Profile::internPath(ArrayRef<FuncID> P) {
144    if (P.empty())
145      return 0;
146  
147    auto RootToLeafPath = reverse(P);
148  
149    // Find the root.
150    auto It = RootToLeafPath.begin();
151    auto PathRoot = *It++;
152    auto RootIt =
153        find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; });
154  
155    // If we've not seen this root before, remember it.
156    TrieNode *Node = nullptr;
157    if (RootIt == Roots.end()) {
158      NodeStorage.emplace_back();
159      Node = &NodeStorage.back();
160      Node->Func = PathRoot;
161      Roots.push_back(Node);
162    } else {
163      Node = *RootIt;
164    }
165  
166    // Now traverse the path, re-creating if necessary.
167    while (It != RootToLeafPath.end()) {
168      auto NodeFuncID = *It++;
169      auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) {
170        return N->Func == NodeFuncID;
171      });
172      if (CalleeIt == Node->Callees.end()) {
173        NodeStorage.emplace_back();
174        auto NewNode = &NodeStorage.back();
175        NewNode->Func = NodeFuncID;
176        NewNode->Caller = Node;
177        Node->Callees.push_back(NewNode);
178        Node = NewNode;
179      } else {
180        Node = *CalleeIt;
181      }
182    }
183  
184    // At this point, Node *must* be pointing at the leaf.
185    assert(Node->Func == P.front());
186    if (Node->ID == 0) {
187      Node->ID = NextID++;
188      PathIDMap.insert({Node->ID, Node});
189    }
190    return Node->ID;
191  }
192  
193  Profile mergeProfilesByThread(const Profile &L, const Profile &R) {
194    Profile Merged;
195    using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
196    using PathDataMapPtr = std::unique_ptr<PathDataMap>;
197    using PathDataVector = decltype(Profile::Block::PathData);
198    using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>;
199    ThreadProfileIndexMap ThreadProfileIndex;
200  
201    for (const auto &P : {std::ref(L), std::ref(R)})
202      for (const auto &Block : P.get()) {
203        ThreadProfileIndexMap::iterator It;
204        std::tie(It, std::ignore) = ThreadProfileIndex.insert(
205            {Block.Thread, PathDataMapPtr{new PathDataMap()}});
206        for (const auto &PathAndData : Block.PathData) {
207          auto &PathID = PathAndData.first;
208          auto &Data = PathAndData.second;
209          auto NewPathID =
210              Merged.internPath(cantFail(P.get().expandPath(PathID)));
211          PathDataMap::iterator PathDataIt;
212          bool Inserted;
213          std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data});
214          if (!Inserted) {
215            auto &ExistingData = PathDataIt->second;
216            ExistingData.CallCount += Data.CallCount;
217            ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
218          }
219        }
220      }
221  
222    for (const auto &IndexedThreadBlock : ThreadProfileIndex) {
223      PathDataVector PathAndData;
224      PathAndData.reserve(IndexedThreadBlock.second->size());
225      copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData));
226      cantFail(
227          Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)}));
228    }
229    return Merged;
230  }
231  
232  Profile mergeProfilesByStack(const Profile &L, const Profile &R) {
233    Profile Merged;
234    using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
235    PathDataMap PathData;
236    using PathDataVector = decltype(Profile::Block::PathData);
237    for (const auto &P : {std::ref(L), std::ref(R)})
238      for (const auto &Block : P.get())
239        for (const auto &PathAndData : Block.PathData) {
240          auto &PathId = PathAndData.first;
241          auto &Data = PathAndData.second;
242          auto NewPathID =
243              Merged.internPath(cantFail(P.get().expandPath(PathId)));
244          PathDataMap::iterator PathDataIt;
245          bool Inserted;
246          std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data});
247          if (!Inserted) {
248            auto &ExistingData = PathDataIt->second;
249            ExistingData.CallCount += Data.CallCount;
250            ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
251          }
252        }
253  
254    // In the end there's a single Block, for thread 0.
255    PathDataVector Block;
256    Block.reserve(PathData.size());
257    copy(PathData, std::back_inserter(Block));
258    cantFail(Merged.addBlock({0, std::move(Block)}));
259    return Merged;
260  }
261  
262  Expected<Profile> loadProfile(StringRef Filename) {
263    Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename);
264    if (!FdOrErr)
265      return FdOrErr.takeError();
266  
267    uint64_t FileSize;
268    if (auto EC = sys::fs::file_size(Filename, FileSize))
269      return make_error<StringError>(
270          Twine("Cannot get filesize of '") + Filename + "'", EC);
271  
272    std::error_code EC;
273    sys::fs::mapped_file_region MappedFile(
274        *FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0,
275        EC);
276    sys::fs::closeFile(*FdOrErr);
277    if (EC)
278      return make_error<StringError>(
279          Twine("Cannot mmap profile '") + Filename + "'", EC);
280    StringRef Data(MappedFile.data(), MappedFile.size());
281  
282    Profile P;
283    uint32_t Offset = 0;
284    DataExtractor Extractor(Data, true, 8);
285  
286    // For each block we get from the file:
287    while (Offset != MappedFile.size()) {
288      auto HeaderOrError = readBlockHeader(Extractor, Offset);
289      if (!HeaderOrError)
290        return HeaderOrError.takeError();
291  
292      // TODO: Maybe store this header information for each block, even just for
293      // debugging?
294      const auto &Header = HeaderOrError.get();
295  
296      // Read in the path data.
297      auto PathOrError = readPath(Extractor, Offset);
298      if (!PathOrError)
299        return PathOrError.takeError();
300      const auto &Path = PathOrError.get();
301  
302      // For each path we encounter, we should intern it to get a PathID.
303      auto DataOrError = readData(Extractor, Offset);
304      if (!DataOrError)
305        return DataOrError.takeError();
306      auto &Data = DataOrError.get();
307  
308      if (auto E =
309              P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread},
310                                        {{P.internPath(Path), std::move(Data)}}}))
311        return std::move(E);
312    }
313  
314    return P;
315  }
316  
317  namespace {
318  
319  struct StackEntry {
320    uint64_t Timestamp;
321    Profile::FuncID FuncId;
322  };
323  
324  } // namespace
325  
326  Expected<Profile> profileFromTrace(const Trace &T) {
327    Profile P;
328  
329    // The implementation of the algorithm re-creates the execution of
330    // the functions based on the trace data. To do this, we set up a number of
331    // data structures to track the execution context of every thread in the
332    // Trace.
333    DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks;
334    DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>>
335        ThreadPathData;
336  
337    //  We then do a pass through the Trace to account data on a per-thread-basis.
338    for (const auto &E : T) {
339      auto &TSD = ThreadStacks[E.TId];
340      switch (E.Type) {
341      case RecordTypes::ENTER:
342      case RecordTypes::ENTER_ARG:
343  
344        // Push entries into the function call stack.
345        TSD.push_back({E.TSC, E.FuncId});
346        break;
347  
348      case RecordTypes::EXIT:
349      case RecordTypes::TAIL_EXIT:
350  
351        // Exits cause some accounting to happen, based on the state of the stack.
352        // For each function we pop off the stack, we take note of the path and
353        // record the cumulative state for this path. As we're doing this, we
354        // intern the path into the Profile.
355        while (!TSD.empty()) {
356          auto Top = TSD.back();
357          auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC);
358          SmallVector<Profile::FuncID, 16> Path;
359          transform(reverse(TSD), std::back_inserter(Path),
360                    std::mem_fn(&StackEntry::FuncId));
361          auto InternedPath = P.internPath(Path);
362          auto &TPD = ThreadPathData[E.TId][InternedPath];
363          ++TPD.CallCount;
364          TPD.CumulativeLocalTime += FunctionLocalTime;
365          TSD.pop_back();
366  
367          // If we've matched the corresponding entry event for this function,
368          // then we exit the loop.
369          if (Top.FuncId == E.FuncId)
370            break;
371  
372          // FIXME: Consider the intermediate times and the cumulative tree time
373          // as well.
374        }
375  
376        break;
377  
378      case RecordTypes::CUSTOM_EVENT:
379      case RecordTypes::TYPED_EVENT:
380        // TODO: Support an extension point to allow handling of custom and typed
381        // events in profiles.
382        break;
383      }
384    }
385  
386    // Once we've gone through the Trace, we now create one Block per thread in
387    // the Profile.
388    for (const auto &ThreadPaths : ThreadPathData) {
389      const auto &TID = ThreadPaths.first;
390      const auto &PathsData = ThreadPaths.second;
391      if (auto E = P.addBlock({
392              TID,
393              std::vector<std::pair<Profile::PathID, Profile::Data>>(
394                  PathsData.begin(), PathsData.end()),
395          }))
396        return std::move(E);
397    }
398  
399    return P;
400  }
401  
402  } // namespace xray
403  } // namespace llvm
404