1*0b57cec5SDimitry Andric //===- xray-stacks.cpp: XRay Function Call Stack Accounting ---------------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric // This file implements stack-based accounting. It takes XRay traces, and 10*0b57cec5SDimitry Andric // collates statistics across these traces to show a breakdown of time spent 11*0b57cec5SDimitry Andric // at various points of the stack to provide insight into which functions 12*0b57cec5SDimitry Andric // spend the most time in terms of a call stack. We provide a few 13*0b57cec5SDimitry Andric // sorting/filtering options for zero'ing in on the useful stacks. 14*0b57cec5SDimitry Andric // 15*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 16*0b57cec5SDimitry Andric 17*0b57cec5SDimitry Andric #include <forward_list> 18*0b57cec5SDimitry Andric #include <numeric> 19*0b57cec5SDimitry Andric 20*0b57cec5SDimitry Andric #include "func-id-helper.h" 21*0b57cec5SDimitry Andric #include "trie-node.h" 22*0b57cec5SDimitry Andric #include "xray-registry.h" 23*0b57cec5SDimitry Andric #include "llvm/ADT/StringExtras.h" 24*0b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 25*0b57cec5SDimitry Andric #include "llvm/Support/Errc.h" 26*0b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 27*0b57cec5SDimitry Andric #include "llvm/Support/FormatAdapters.h" 28*0b57cec5SDimitry Andric #include "llvm/Support/FormatVariadic.h" 29*0b57cec5SDimitry Andric #include "llvm/XRay/Graph.h" 30*0b57cec5SDimitry Andric #include "llvm/XRay/InstrumentationMap.h" 31*0b57cec5SDimitry Andric #include "llvm/XRay/Trace.h" 32*0b57cec5SDimitry Andric 33*0b57cec5SDimitry Andric using namespace llvm; 34*0b57cec5SDimitry Andric using namespace llvm::xray; 35*0b57cec5SDimitry Andric 36*0b57cec5SDimitry Andric static cl::SubCommand Stack("stack", "Call stack accounting"); 37*0b57cec5SDimitry Andric static cl::list<std::string> StackInputs(cl::Positional, 38*0b57cec5SDimitry Andric cl::desc("<xray trace>"), cl::Required, 39*0b57cec5SDimitry Andric cl::sub(Stack), cl::OneOrMore); 40*0b57cec5SDimitry Andric 41*0b57cec5SDimitry Andric static cl::opt<bool> 42*0b57cec5SDimitry Andric StackKeepGoing("keep-going", cl::desc("Keep going on errors encountered"), 43*0b57cec5SDimitry Andric cl::sub(Stack), cl::init(false)); 44*0b57cec5SDimitry Andric static cl::alias StackKeepGoing2("k", cl::aliasopt(StackKeepGoing), 45*0b57cec5SDimitry Andric cl::desc("Alias for -keep-going"), 46*0b57cec5SDimitry Andric cl::sub(Stack)); 47*0b57cec5SDimitry Andric 48*0b57cec5SDimitry Andric // TODO: Does there need to be an option to deduce tail or sibling calls? 49*0b57cec5SDimitry Andric 50*0b57cec5SDimitry Andric static cl::opt<std::string> StacksInstrMap( 51*0b57cec5SDimitry Andric "instr_map", 52*0b57cec5SDimitry Andric cl::desc("instrumentation map used to identify function ids. " 53*0b57cec5SDimitry Andric "Currently supports elf file instrumentation maps."), 54*0b57cec5SDimitry Andric cl::sub(Stack), cl::init("")); 55*0b57cec5SDimitry Andric static cl::alias StacksInstrMap2("m", cl::aliasopt(StacksInstrMap), 56*0b57cec5SDimitry Andric cl::desc("Alias for -instr_map"), 57*0b57cec5SDimitry Andric cl::sub(Stack)); 58*0b57cec5SDimitry Andric 59*0b57cec5SDimitry Andric static cl::opt<bool> 60*0b57cec5SDimitry Andric SeparateThreadStacks("per-thread-stacks", 61*0b57cec5SDimitry Andric cl::desc("Report top stacks within each thread id"), 62*0b57cec5SDimitry Andric cl::sub(Stack), cl::init(false)); 63*0b57cec5SDimitry Andric 64*0b57cec5SDimitry Andric static cl::opt<bool> 65*0b57cec5SDimitry Andric AggregateThreads("aggregate-threads", 66*0b57cec5SDimitry Andric cl::desc("Aggregate stack times across threads"), 67*0b57cec5SDimitry Andric cl::sub(Stack), cl::init(false)); 68*0b57cec5SDimitry Andric 69*0b57cec5SDimitry Andric static cl::opt<bool> 70*0b57cec5SDimitry Andric DumpAllStacks("all-stacks", 71*0b57cec5SDimitry Andric cl::desc("Dump sum of timings for all stacks. " 72*0b57cec5SDimitry Andric "By default separates stacks per-thread."), 73*0b57cec5SDimitry Andric cl::sub(Stack), cl::init(false)); 74*0b57cec5SDimitry Andric static cl::alias DumpAllStacksShort("all", cl::aliasopt(DumpAllStacks), 75*0b57cec5SDimitry Andric cl::desc("Alias for -all-stacks"), 76*0b57cec5SDimitry Andric cl::sub(Stack)); 77*0b57cec5SDimitry Andric 78*0b57cec5SDimitry Andric // TODO(kpw): Add other interesting formats. Perhaps chrome trace viewer format 79*0b57cec5SDimitry Andric // possibly with aggregations or just a linear trace of timings. 80*0b57cec5SDimitry Andric enum StackOutputFormat { HUMAN, FLAMETOOL }; 81*0b57cec5SDimitry Andric 82*0b57cec5SDimitry Andric static cl::opt<StackOutputFormat> StacksOutputFormat( 83*0b57cec5SDimitry Andric "stack-format", 84*0b57cec5SDimitry Andric cl::desc("The format that output stacks should be " 85*0b57cec5SDimitry Andric "output in. Only applies with all-stacks."), 86*0b57cec5SDimitry Andric cl::values( 87*0b57cec5SDimitry Andric clEnumValN(HUMAN, "human", 88*0b57cec5SDimitry Andric "Human readable output. Only valid without -all-stacks."), 89*0b57cec5SDimitry Andric clEnumValN(FLAMETOOL, "flame", 90*0b57cec5SDimitry Andric "Format consumable by Brendan Gregg's FlameGraph tool. " 91*0b57cec5SDimitry Andric "Only valid with -all-stacks.")), 92*0b57cec5SDimitry Andric cl::sub(Stack), cl::init(HUMAN)); 93*0b57cec5SDimitry Andric 94*0b57cec5SDimitry Andric // Types of values for each stack in a CallTrie. 95*0b57cec5SDimitry Andric enum class AggregationType { 96*0b57cec5SDimitry Andric TOTAL_TIME, // The total time spent in a stack and its callees. 97*0b57cec5SDimitry Andric INVOCATION_COUNT // The number of times the stack was invoked. 98*0b57cec5SDimitry Andric }; 99*0b57cec5SDimitry Andric 100*0b57cec5SDimitry Andric static cl::opt<AggregationType> RequestedAggregation( 101*0b57cec5SDimitry Andric "aggregation-type", 102*0b57cec5SDimitry Andric cl::desc("The type of aggregation to do on call stacks."), 103*0b57cec5SDimitry Andric cl::values( 104*0b57cec5SDimitry Andric clEnumValN( 105*0b57cec5SDimitry Andric AggregationType::TOTAL_TIME, "time", 106*0b57cec5SDimitry Andric "Capture the total time spent in an all invocations of a stack."), 107*0b57cec5SDimitry Andric clEnumValN(AggregationType::INVOCATION_COUNT, "count", 108*0b57cec5SDimitry Andric "Capture the number of times a stack was invoked. " 109*0b57cec5SDimitry Andric "In flamegraph mode, this count also includes invocations " 110*0b57cec5SDimitry Andric "of all callees.")), 111*0b57cec5SDimitry Andric cl::sub(Stack), cl::init(AggregationType::TOTAL_TIME)); 112*0b57cec5SDimitry Andric 113*0b57cec5SDimitry Andric /// A helper struct to work with formatv and XRayRecords. Makes it easier to 114*0b57cec5SDimitry Andric /// use instrumentation map names or addresses in formatted output. 115*0b57cec5SDimitry Andric struct format_xray_record : public FormatAdapter<XRayRecord> { 116*0b57cec5SDimitry Andric explicit format_xray_record(XRayRecord record, 117*0b57cec5SDimitry Andric const FuncIdConversionHelper &conv) 118*0b57cec5SDimitry Andric : FormatAdapter<XRayRecord>(std::move(record)), Converter(&conv) {} 119*0b57cec5SDimitry Andric void format(raw_ostream &Stream, StringRef Style) override { 120*0b57cec5SDimitry Andric Stream << formatv( 121*0b57cec5SDimitry Andric "{FuncId: \"{0}\", ThreadId: \"{1}\", RecordType: \"{2}\"}", 122*0b57cec5SDimitry Andric Converter->SymbolOrNumber(Item.FuncId), Item.TId, 123*0b57cec5SDimitry Andric DecodeRecordType(Item.RecordType)); 124*0b57cec5SDimitry Andric } 125*0b57cec5SDimitry Andric 126*0b57cec5SDimitry Andric private: 127*0b57cec5SDimitry Andric Twine DecodeRecordType(uint16_t recordType) { 128*0b57cec5SDimitry Andric switch (recordType) { 129*0b57cec5SDimitry Andric case 0: 130*0b57cec5SDimitry Andric return Twine("Fn Entry"); 131*0b57cec5SDimitry Andric case 1: 132*0b57cec5SDimitry Andric return Twine("Fn Exit"); 133*0b57cec5SDimitry Andric default: 134*0b57cec5SDimitry Andric // TODO: Add Tail exit when it is added to llvm/XRay/XRayRecord.h 135*0b57cec5SDimitry Andric return Twine("Unknown"); 136*0b57cec5SDimitry Andric } 137*0b57cec5SDimitry Andric } 138*0b57cec5SDimitry Andric 139*0b57cec5SDimitry Andric const FuncIdConversionHelper *Converter; 140*0b57cec5SDimitry Andric }; 141*0b57cec5SDimitry Andric 142*0b57cec5SDimitry Andric /// The stack command will take a set of XRay traces as arguments, and collects 143*0b57cec5SDimitry Andric /// information about the stacks of instrumented functions that appear in the 144*0b57cec5SDimitry Andric /// traces. We track the following pieces of information: 145*0b57cec5SDimitry Andric /// 146*0b57cec5SDimitry Andric /// - Total time: amount of time/cycles accounted for in the traces. 147*0b57cec5SDimitry Andric /// - Stack count: number of times a specific stack appears in the 148*0b57cec5SDimitry Andric /// traces. Only instrumented functions show up in stacks. 149*0b57cec5SDimitry Andric /// - Cumulative stack time: amount of time spent in a stack accumulated 150*0b57cec5SDimitry Andric /// across the invocations in the traces. 151*0b57cec5SDimitry Andric /// - Cumulative local time: amount of time spent in each instrumented 152*0b57cec5SDimitry Andric /// function showing up in a specific stack, accumulated across the traces. 153*0b57cec5SDimitry Andric /// 154*0b57cec5SDimitry Andric /// Example output for the kind of data we'd like to provide looks like the 155*0b57cec5SDimitry Andric /// following: 156*0b57cec5SDimitry Andric /// 157*0b57cec5SDimitry Andric /// Total time: 3.33234 s 158*0b57cec5SDimitry Andric /// Stack ID: ... 159*0b57cec5SDimitry Andric /// Stack Count: 2093 160*0b57cec5SDimitry Andric /// # Function Local Time (%) Stack Time (%) 161*0b57cec5SDimitry Andric /// 0 main 2.34 ms 0.07% 3.33234 s 100% 162*0b57cec5SDimitry Andric /// 1 foo() 3.30000 s 99.02% 3.33 s 99.92% 163*0b57cec5SDimitry Andric /// 2 bar() 30 ms 0.90% 30 ms 0.90% 164*0b57cec5SDimitry Andric /// 165*0b57cec5SDimitry Andric /// We can also show distributions of the function call durations with 166*0b57cec5SDimitry Andric /// statistics at each level of the stack. This works by doing the following 167*0b57cec5SDimitry Andric /// algorithm: 168*0b57cec5SDimitry Andric /// 169*0b57cec5SDimitry Andric /// 1. When unwinding, record the duration of each unwound function associated 170*0b57cec5SDimitry Andric /// with the path up to which the unwinding stops. For example: 171*0b57cec5SDimitry Andric /// 172*0b57cec5SDimitry Andric /// Step Duration (? means has start time) 173*0b57cec5SDimitry Andric /// 174*0b57cec5SDimitry Andric /// push a <start time> a = ? 175*0b57cec5SDimitry Andric /// push b <start time> a = ?, a->b = ? 176*0b57cec5SDimitry Andric /// push c <start time> a = ?, a->b = ?, a->b->c = ? 177*0b57cec5SDimitry Andric /// pop c <end time> a = ?, a->b = ?, emit duration(a->b->c) 178*0b57cec5SDimitry Andric /// pop b <end time> a = ?, emit duration(a->b) 179*0b57cec5SDimitry Andric /// push c <start time> a = ?, a->c = ? 180*0b57cec5SDimitry Andric /// pop c <end time> a = ?, emit duration(a->c) 181*0b57cec5SDimitry Andric /// pop a <end time> emit duration(a) 182*0b57cec5SDimitry Andric /// 183*0b57cec5SDimitry Andric /// 2. We then account for the various stacks we've collected, and for each of 184*0b57cec5SDimitry Andric /// them will have measurements that look like the following (continuing 185*0b57cec5SDimitry Andric /// with the above simple example): 186*0b57cec5SDimitry Andric /// 187*0b57cec5SDimitry Andric /// c : [<id("a->b->c"), [durations]>, <id("a->c"), [durations]>] 188*0b57cec5SDimitry Andric /// b : [<id("a->b"), [durations]>] 189*0b57cec5SDimitry Andric /// a : [<id("a"), [durations]>] 190*0b57cec5SDimitry Andric /// 191*0b57cec5SDimitry Andric /// This allows us to compute, for each stack id, and each function that 192*0b57cec5SDimitry Andric /// shows up in the stack, some important statistics like: 193*0b57cec5SDimitry Andric /// 194*0b57cec5SDimitry Andric /// - median 195*0b57cec5SDimitry Andric /// - 99th percentile 196*0b57cec5SDimitry Andric /// - mean + stddev 197*0b57cec5SDimitry Andric /// - count 198*0b57cec5SDimitry Andric /// 199*0b57cec5SDimitry Andric /// 3. For cases where we don't have durations for some of the higher levels 200*0b57cec5SDimitry Andric /// of the stack (perhaps instrumentation wasn't activated when the stack was 201*0b57cec5SDimitry Andric /// entered), we can mark them appropriately. 202*0b57cec5SDimitry Andric /// 203*0b57cec5SDimitry Andric /// Computing this data also allows us to implement lookup by call stack nodes, 204*0b57cec5SDimitry Andric /// so that we can find functions that show up in multiple stack traces and 205*0b57cec5SDimitry Andric /// show the statistical properties of that function in various contexts. We 206*0b57cec5SDimitry Andric /// can compute information similar to the following: 207*0b57cec5SDimitry Andric /// 208*0b57cec5SDimitry Andric /// Function: 'c' 209*0b57cec5SDimitry Andric /// Stacks: 2 / 2 210*0b57cec5SDimitry Andric /// Stack ID: ... 211*0b57cec5SDimitry Andric /// Stack Count: ... 212*0b57cec5SDimitry Andric /// # Function ... 213*0b57cec5SDimitry Andric /// 0 a ... 214*0b57cec5SDimitry Andric /// 1 b ... 215*0b57cec5SDimitry Andric /// 2 c ... 216*0b57cec5SDimitry Andric /// 217*0b57cec5SDimitry Andric /// Stack ID: ... 218*0b57cec5SDimitry Andric /// Stack Count: ... 219*0b57cec5SDimitry Andric /// # Function ... 220*0b57cec5SDimitry Andric /// 0 a ... 221*0b57cec5SDimitry Andric /// 1 c ... 222*0b57cec5SDimitry Andric /// ----------------... 223*0b57cec5SDimitry Andric /// 224*0b57cec5SDimitry Andric /// Function: 'b' 225*0b57cec5SDimitry Andric /// Stacks: 1 / 2 226*0b57cec5SDimitry Andric /// Stack ID: ... 227*0b57cec5SDimitry Andric /// Stack Count: ... 228*0b57cec5SDimitry Andric /// # Function ... 229*0b57cec5SDimitry Andric /// 0 a ... 230*0b57cec5SDimitry Andric /// 1 b ... 231*0b57cec5SDimitry Andric /// 2 c ... 232*0b57cec5SDimitry Andric /// 233*0b57cec5SDimitry Andric /// 234*0b57cec5SDimitry Andric /// To do this we require a Trie data structure that will allow us to represent 235*0b57cec5SDimitry Andric /// all the call stacks of instrumented functions in an easily traversible 236*0b57cec5SDimitry Andric /// manner when we do the aggregations and lookups. For instrumented call 237*0b57cec5SDimitry Andric /// sequences like the following: 238*0b57cec5SDimitry Andric /// 239*0b57cec5SDimitry Andric /// a() 240*0b57cec5SDimitry Andric /// b() 241*0b57cec5SDimitry Andric /// c() 242*0b57cec5SDimitry Andric /// d() 243*0b57cec5SDimitry Andric /// c() 244*0b57cec5SDimitry Andric /// 245*0b57cec5SDimitry Andric /// We will have a representation like so: 246*0b57cec5SDimitry Andric /// 247*0b57cec5SDimitry Andric /// a -> b -> c 248*0b57cec5SDimitry Andric /// | | 249*0b57cec5SDimitry Andric /// | +--> d 250*0b57cec5SDimitry Andric /// | 251*0b57cec5SDimitry Andric /// +--> c 252*0b57cec5SDimitry Andric /// 253*0b57cec5SDimitry Andric /// We maintain a sequence of durations on the leaves and in the internal nodes 254*0b57cec5SDimitry Andric /// as we go through and process every record from the XRay trace. We also 255*0b57cec5SDimitry Andric /// maintain an index of unique functions, and provide a means of iterating 256*0b57cec5SDimitry Andric /// through all the instrumented call stacks which we know about. 257*0b57cec5SDimitry Andric 258*0b57cec5SDimitry Andric struct StackDuration { 259*0b57cec5SDimitry Andric llvm::SmallVector<int64_t, 4> TerminalDurations; 260*0b57cec5SDimitry Andric llvm::SmallVector<int64_t, 4> IntermediateDurations; 261*0b57cec5SDimitry Andric }; 262*0b57cec5SDimitry Andric 263*0b57cec5SDimitry Andric StackDuration mergeStackDuration(const StackDuration &Left, 264*0b57cec5SDimitry Andric const StackDuration &Right) { 265*0b57cec5SDimitry Andric StackDuration Data{}; 266*0b57cec5SDimitry Andric Data.TerminalDurations.reserve(Left.TerminalDurations.size() + 267*0b57cec5SDimitry Andric Right.TerminalDurations.size()); 268*0b57cec5SDimitry Andric Data.IntermediateDurations.reserve(Left.IntermediateDurations.size() + 269*0b57cec5SDimitry Andric Right.IntermediateDurations.size()); 270*0b57cec5SDimitry Andric // Aggregate the durations. 271*0b57cec5SDimitry Andric for (auto duration : Left.TerminalDurations) 272*0b57cec5SDimitry Andric Data.TerminalDurations.push_back(duration); 273*0b57cec5SDimitry Andric for (auto duration : Right.TerminalDurations) 274*0b57cec5SDimitry Andric Data.TerminalDurations.push_back(duration); 275*0b57cec5SDimitry Andric 276*0b57cec5SDimitry Andric for (auto duration : Left.IntermediateDurations) 277*0b57cec5SDimitry Andric Data.IntermediateDurations.push_back(duration); 278*0b57cec5SDimitry Andric for (auto duration : Right.IntermediateDurations) 279*0b57cec5SDimitry Andric Data.IntermediateDurations.push_back(duration); 280*0b57cec5SDimitry Andric return Data; 281*0b57cec5SDimitry Andric } 282*0b57cec5SDimitry Andric 283*0b57cec5SDimitry Andric using StackTrieNode = TrieNode<StackDuration>; 284*0b57cec5SDimitry Andric 285*0b57cec5SDimitry Andric template <AggregationType AggType> 286*0b57cec5SDimitry Andric std::size_t GetValueForStack(const StackTrieNode *Node); 287*0b57cec5SDimitry Andric 288*0b57cec5SDimitry Andric // When computing total time spent in a stack, we're adding the timings from 289*0b57cec5SDimitry Andric // its callees and the timings from when it was a leaf. 290*0b57cec5SDimitry Andric template <> 291*0b57cec5SDimitry Andric std::size_t 292*0b57cec5SDimitry Andric GetValueForStack<AggregationType::TOTAL_TIME>(const StackTrieNode *Node) { 293*0b57cec5SDimitry Andric auto TopSum = std::accumulate(Node->ExtraData.TerminalDurations.begin(), 294*0b57cec5SDimitry Andric Node->ExtraData.TerminalDurations.end(), 0uLL); 295*0b57cec5SDimitry Andric return std::accumulate(Node->ExtraData.IntermediateDurations.begin(), 296*0b57cec5SDimitry Andric Node->ExtraData.IntermediateDurations.end(), TopSum); 297*0b57cec5SDimitry Andric } 298*0b57cec5SDimitry Andric 299*0b57cec5SDimitry Andric // Calculates how many times a function was invoked. 300*0b57cec5SDimitry Andric // TODO: Hook up option to produce stacks 301*0b57cec5SDimitry Andric template <> 302*0b57cec5SDimitry Andric std::size_t 303*0b57cec5SDimitry Andric GetValueForStack<AggregationType::INVOCATION_COUNT>(const StackTrieNode *Node) { 304*0b57cec5SDimitry Andric return Node->ExtraData.TerminalDurations.size() + 305*0b57cec5SDimitry Andric Node->ExtraData.IntermediateDurations.size(); 306*0b57cec5SDimitry Andric } 307*0b57cec5SDimitry Andric 308*0b57cec5SDimitry Andric // Make sure there are implementations for each enum value. 309*0b57cec5SDimitry Andric template <AggregationType T> struct DependentFalseType : std::false_type {}; 310*0b57cec5SDimitry Andric 311*0b57cec5SDimitry Andric template <AggregationType AggType> 312*0b57cec5SDimitry Andric std::size_t GetValueForStack(const StackTrieNode *Node) { 313*0b57cec5SDimitry Andric static_assert(DependentFalseType<AggType>::value, 314*0b57cec5SDimitry Andric "No implementation found for aggregation type provided."); 315*0b57cec5SDimitry Andric return 0; 316*0b57cec5SDimitry Andric } 317*0b57cec5SDimitry Andric 318*0b57cec5SDimitry Andric class StackTrie { 319*0b57cec5SDimitry Andric // Avoid the magic number of 4 propagated through the code with an alias. 320*0b57cec5SDimitry Andric // We use this SmallVector to track the root nodes in a call graph. 321*0b57cec5SDimitry Andric using RootVector = SmallVector<StackTrieNode *, 4>; 322*0b57cec5SDimitry Andric 323*0b57cec5SDimitry Andric // We maintain pointers to the roots of the tries we see. 324*0b57cec5SDimitry Andric DenseMap<uint32_t, RootVector> Roots; 325*0b57cec5SDimitry Andric 326*0b57cec5SDimitry Andric // We make sure all the nodes are accounted for in this list. 327*0b57cec5SDimitry Andric std::forward_list<StackTrieNode> NodeStore; 328*0b57cec5SDimitry Andric 329*0b57cec5SDimitry Andric // A map of thread ids to pairs call stack trie nodes and their start times. 330*0b57cec5SDimitry Andric DenseMap<uint32_t, SmallVector<std::pair<StackTrieNode *, uint64_t>, 8>> 331*0b57cec5SDimitry Andric ThreadStackMap; 332*0b57cec5SDimitry Andric 333*0b57cec5SDimitry Andric StackTrieNode *createTrieNode(uint32_t ThreadId, int32_t FuncId, 334*0b57cec5SDimitry Andric StackTrieNode *Parent) { 335*0b57cec5SDimitry Andric NodeStore.push_front(StackTrieNode{FuncId, Parent, {}, {{}, {}}}); 336*0b57cec5SDimitry Andric auto I = NodeStore.begin(); 337*0b57cec5SDimitry Andric auto *Node = &*I; 338*0b57cec5SDimitry Andric if (!Parent) 339*0b57cec5SDimitry Andric Roots[ThreadId].push_back(Node); 340*0b57cec5SDimitry Andric return Node; 341*0b57cec5SDimitry Andric } 342*0b57cec5SDimitry Andric 343*0b57cec5SDimitry Andric StackTrieNode *findRootNode(uint32_t ThreadId, int32_t FuncId) { 344*0b57cec5SDimitry Andric const auto &RootsByThread = Roots[ThreadId]; 345*0b57cec5SDimitry Andric auto I = find_if(RootsByThread, 346*0b57cec5SDimitry Andric [&](StackTrieNode *N) { return N->FuncId == FuncId; }); 347*0b57cec5SDimitry Andric return (I == RootsByThread.end()) ? nullptr : *I; 348*0b57cec5SDimitry Andric } 349*0b57cec5SDimitry Andric 350*0b57cec5SDimitry Andric public: 351*0b57cec5SDimitry Andric enum class AccountRecordStatus { 352*0b57cec5SDimitry Andric OK, // Successfully processed 353*0b57cec5SDimitry Andric ENTRY_NOT_FOUND, // An exit record had no matching call stack entry 354*0b57cec5SDimitry Andric UNKNOWN_RECORD_TYPE 355*0b57cec5SDimitry Andric }; 356*0b57cec5SDimitry Andric 357*0b57cec5SDimitry Andric struct AccountRecordState { 358*0b57cec5SDimitry Andric // We keep track of whether the call stack is currently unwinding. 359*0b57cec5SDimitry Andric bool wasLastRecordExit; 360*0b57cec5SDimitry Andric 361*0b57cec5SDimitry Andric static AccountRecordState CreateInitialState() { return {false}; } 362*0b57cec5SDimitry Andric }; 363*0b57cec5SDimitry Andric 364*0b57cec5SDimitry Andric AccountRecordStatus accountRecord(const XRayRecord &R, 365*0b57cec5SDimitry Andric AccountRecordState *state) { 366*0b57cec5SDimitry Andric auto &TS = ThreadStackMap[R.TId]; 367*0b57cec5SDimitry Andric switch (R.Type) { 368*0b57cec5SDimitry Andric case RecordTypes::CUSTOM_EVENT: 369*0b57cec5SDimitry Andric case RecordTypes::TYPED_EVENT: 370*0b57cec5SDimitry Andric return AccountRecordStatus::OK; 371*0b57cec5SDimitry Andric case RecordTypes::ENTER: 372*0b57cec5SDimitry Andric case RecordTypes::ENTER_ARG: { 373*0b57cec5SDimitry Andric state->wasLastRecordExit = false; 374*0b57cec5SDimitry Andric // When we encounter a new function entry, we want to record the TSC for 375*0b57cec5SDimitry Andric // that entry, and the function id. Before doing so we check the top of 376*0b57cec5SDimitry Andric // the stack to see if there are callees that already represent this 377*0b57cec5SDimitry Andric // function. 378*0b57cec5SDimitry Andric if (TS.empty()) { 379*0b57cec5SDimitry Andric auto *Root = findRootNode(R.TId, R.FuncId); 380*0b57cec5SDimitry Andric TS.emplace_back(Root ? Root : createTrieNode(R.TId, R.FuncId, nullptr), 381*0b57cec5SDimitry Andric R.TSC); 382*0b57cec5SDimitry Andric return AccountRecordStatus::OK; 383*0b57cec5SDimitry Andric } 384*0b57cec5SDimitry Andric 385*0b57cec5SDimitry Andric auto &Top = TS.back(); 386*0b57cec5SDimitry Andric auto I = find_if(Top.first->Callees, 387*0b57cec5SDimitry Andric [&](StackTrieNode *N) { return N->FuncId == R.FuncId; }); 388*0b57cec5SDimitry Andric if (I == Top.first->Callees.end()) { 389*0b57cec5SDimitry Andric // We didn't find the callee in the stack trie, so we're going to 390*0b57cec5SDimitry Andric // add to the stack then set up the pointers properly. 391*0b57cec5SDimitry Andric auto N = createTrieNode(R.TId, R.FuncId, Top.first); 392*0b57cec5SDimitry Andric Top.first->Callees.emplace_back(N); 393*0b57cec5SDimitry Andric 394*0b57cec5SDimitry Andric // Top may be invalidated after this statement. 395*0b57cec5SDimitry Andric TS.emplace_back(N, R.TSC); 396*0b57cec5SDimitry Andric } else { 397*0b57cec5SDimitry Andric // We found the callee in the stack trie, so we'll use that pointer 398*0b57cec5SDimitry Andric // instead, add it to the stack associated with the TSC. 399*0b57cec5SDimitry Andric TS.emplace_back(*I, R.TSC); 400*0b57cec5SDimitry Andric } 401*0b57cec5SDimitry Andric return AccountRecordStatus::OK; 402*0b57cec5SDimitry Andric } 403*0b57cec5SDimitry Andric case RecordTypes::EXIT: 404*0b57cec5SDimitry Andric case RecordTypes::TAIL_EXIT: { 405*0b57cec5SDimitry Andric bool wasLastRecordExit = state->wasLastRecordExit; 406*0b57cec5SDimitry Andric state->wasLastRecordExit = true; 407*0b57cec5SDimitry Andric // The exit case is more interesting, since we want to be able to deduce 408*0b57cec5SDimitry Andric // missing exit records. To do that properly, we need to look up the stack 409*0b57cec5SDimitry Andric // and see whether the exit record matches any of the entry records. If it 410*0b57cec5SDimitry Andric // does match, we attempt to record the durations as we pop the stack to 411*0b57cec5SDimitry Andric // where we see the parent. 412*0b57cec5SDimitry Andric if (TS.empty()) { 413*0b57cec5SDimitry Andric // Short circuit, and say we can't find it. 414*0b57cec5SDimitry Andric 415*0b57cec5SDimitry Andric return AccountRecordStatus::ENTRY_NOT_FOUND; 416*0b57cec5SDimitry Andric } 417*0b57cec5SDimitry Andric 418*0b57cec5SDimitry Andric auto FunctionEntryMatch = find_if( 419*0b57cec5SDimitry Andric reverse(TS), [&](const std::pair<StackTrieNode *, uint64_t> &E) { 420*0b57cec5SDimitry Andric return E.first->FuncId == R.FuncId; 421*0b57cec5SDimitry Andric }); 422*0b57cec5SDimitry Andric auto status = AccountRecordStatus::OK; 423*0b57cec5SDimitry Andric if (FunctionEntryMatch == TS.rend()) { 424*0b57cec5SDimitry Andric status = AccountRecordStatus::ENTRY_NOT_FOUND; 425*0b57cec5SDimitry Andric } else { 426*0b57cec5SDimitry Andric // Account for offset of 1 between reverse and forward iterators. We 427*0b57cec5SDimitry Andric // want the forward iterator to include the function that is exited. 428*0b57cec5SDimitry Andric ++FunctionEntryMatch; 429*0b57cec5SDimitry Andric } 430*0b57cec5SDimitry Andric auto I = FunctionEntryMatch.base(); 431*0b57cec5SDimitry Andric for (auto &E : make_range(I, TS.end() - 1)) 432*0b57cec5SDimitry Andric E.first->ExtraData.IntermediateDurations.push_back( 433*0b57cec5SDimitry Andric std::max(E.second, R.TSC) - std::min(E.second, R.TSC)); 434*0b57cec5SDimitry Andric auto &Deepest = TS.back(); 435*0b57cec5SDimitry Andric if (wasLastRecordExit) 436*0b57cec5SDimitry Andric Deepest.first->ExtraData.IntermediateDurations.push_back( 437*0b57cec5SDimitry Andric std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC)); 438*0b57cec5SDimitry Andric else 439*0b57cec5SDimitry Andric Deepest.first->ExtraData.TerminalDurations.push_back( 440*0b57cec5SDimitry Andric std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC)); 441*0b57cec5SDimitry Andric TS.erase(I, TS.end()); 442*0b57cec5SDimitry Andric return status; 443*0b57cec5SDimitry Andric } 444*0b57cec5SDimitry Andric } 445*0b57cec5SDimitry Andric return AccountRecordStatus::UNKNOWN_RECORD_TYPE; 446*0b57cec5SDimitry Andric } 447*0b57cec5SDimitry Andric 448*0b57cec5SDimitry Andric bool isEmpty() const { return Roots.empty(); } 449*0b57cec5SDimitry Andric 450*0b57cec5SDimitry Andric void printStack(raw_ostream &OS, const StackTrieNode *Top, 451*0b57cec5SDimitry Andric FuncIdConversionHelper &FN) { 452*0b57cec5SDimitry Andric // Traverse the pointers up to the parent, noting the sums, then print 453*0b57cec5SDimitry Andric // in reverse order (callers at top, callees down bottom). 454*0b57cec5SDimitry Andric SmallVector<const StackTrieNode *, 8> CurrentStack; 455*0b57cec5SDimitry Andric for (auto *F = Top; F != nullptr; F = F->Parent) 456*0b57cec5SDimitry Andric CurrentStack.push_back(F); 457*0b57cec5SDimitry Andric int Level = 0; 458*0b57cec5SDimitry Andric OS << formatv("{0,-5} {1,-60} {2,+12} {3,+16}\n", "lvl", "function", 459*0b57cec5SDimitry Andric "count", "sum"); 460*0b57cec5SDimitry Andric for (auto *F : 461*0b57cec5SDimitry Andric reverse(make_range(CurrentStack.begin() + 1, CurrentStack.end()))) { 462*0b57cec5SDimitry Andric auto Sum = std::accumulate(F->ExtraData.IntermediateDurations.begin(), 463*0b57cec5SDimitry Andric F->ExtraData.IntermediateDurations.end(), 0LL); 464*0b57cec5SDimitry Andric auto FuncId = FN.SymbolOrNumber(F->FuncId); 465*0b57cec5SDimitry Andric OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++, 466*0b57cec5SDimitry Andric FuncId.size() > 60 ? FuncId.substr(0, 57) + "..." : FuncId, 467*0b57cec5SDimitry Andric F->ExtraData.IntermediateDurations.size(), Sum); 468*0b57cec5SDimitry Andric } 469*0b57cec5SDimitry Andric auto *Leaf = *CurrentStack.begin(); 470*0b57cec5SDimitry Andric auto LeafSum = 471*0b57cec5SDimitry Andric std::accumulate(Leaf->ExtraData.TerminalDurations.begin(), 472*0b57cec5SDimitry Andric Leaf->ExtraData.TerminalDurations.end(), 0LL); 473*0b57cec5SDimitry Andric auto LeafFuncId = FN.SymbolOrNumber(Leaf->FuncId); 474*0b57cec5SDimitry Andric OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++, 475*0b57cec5SDimitry Andric LeafFuncId.size() > 60 ? LeafFuncId.substr(0, 57) + "..." 476*0b57cec5SDimitry Andric : LeafFuncId, 477*0b57cec5SDimitry Andric Leaf->ExtraData.TerminalDurations.size(), LeafSum); 478*0b57cec5SDimitry Andric OS << "\n"; 479*0b57cec5SDimitry Andric } 480*0b57cec5SDimitry Andric 481*0b57cec5SDimitry Andric /// Prints top stacks for each thread. 482*0b57cec5SDimitry Andric void printPerThread(raw_ostream &OS, FuncIdConversionHelper &FN) { 483*0b57cec5SDimitry Andric for (auto iter : Roots) { 484*0b57cec5SDimitry Andric OS << "Thread " << iter.first << ":\n"; 485*0b57cec5SDimitry Andric print(OS, FN, iter.second); 486*0b57cec5SDimitry Andric OS << "\n"; 487*0b57cec5SDimitry Andric } 488*0b57cec5SDimitry Andric } 489*0b57cec5SDimitry Andric 490*0b57cec5SDimitry Andric /// Prints timing sums for each stack in each threads. 491*0b57cec5SDimitry Andric template <AggregationType AggType> 492*0b57cec5SDimitry Andric void printAllPerThread(raw_ostream &OS, FuncIdConversionHelper &FN, 493*0b57cec5SDimitry Andric StackOutputFormat format) { 494*0b57cec5SDimitry Andric for (auto iter : Roots) { 495*0b57cec5SDimitry Andric uint32_t threadId = iter.first; 496*0b57cec5SDimitry Andric RootVector &perThreadRoots = iter.second; 497*0b57cec5SDimitry Andric bool reportThreadId = true; 498*0b57cec5SDimitry Andric printAll<AggType>(OS, FN, perThreadRoots, threadId, reportThreadId); 499*0b57cec5SDimitry Andric } 500*0b57cec5SDimitry Andric } 501*0b57cec5SDimitry Andric 502*0b57cec5SDimitry Andric /// Prints top stacks from looking at all the leaves and ignoring thread IDs. 503*0b57cec5SDimitry Andric /// Stacks that consist of the same function IDs but were called in different 504*0b57cec5SDimitry Andric /// thread IDs are not considered unique in this printout. 505*0b57cec5SDimitry Andric void printIgnoringThreads(raw_ostream &OS, FuncIdConversionHelper &FN) { 506*0b57cec5SDimitry Andric RootVector RootValues; 507*0b57cec5SDimitry Andric 508*0b57cec5SDimitry Andric // Function to pull the values out of a map iterator. 509*0b57cec5SDimitry Andric using RootsType = decltype(Roots.begin())::value_type; 510*0b57cec5SDimitry Andric auto MapValueFn = [](const RootsType &Value) { return Value.second; }; 511*0b57cec5SDimitry Andric 512*0b57cec5SDimitry Andric for (const auto &RootNodeRange : 513*0b57cec5SDimitry Andric make_range(map_iterator(Roots.begin(), MapValueFn), 514*0b57cec5SDimitry Andric map_iterator(Roots.end(), MapValueFn))) { 515*0b57cec5SDimitry Andric for (auto *RootNode : RootNodeRange) 516*0b57cec5SDimitry Andric RootValues.push_back(RootNode); 517*0b57cec5SDimitry Andric } 518*0b57cec5SDimitry Andric 519*0b57cec5SDimitry Andric print(OS, FN, RootValues); 520*0b57cec5SDimitry Andric } 521*0b57cec5SDimitry Andric 522*0b57cec5SDimitry Andric /// Creates a merged list of Tries for unique stacks that disregards their 523*0b57cec5SDimitry Andric /// thread IDs. 524*0b57cec5SDimitry Andric RootVector mergeAcrossThreads(std::forward_list<StackTrieNode> &NodeStore) { 525*0b57cec5SDimitry Andric RootVector MergedByThreadRoots; 526*0b57cec5SDimitry Andric for (auto MapIter : Roots) { 527*0b57cec5SDimitry Andric const auto &RootNodeVector = MapIter.second; 528*0b57cec5SDimitry Andric for (auto *Node : RootNodeVector) { 529*0b57cec5SDimitry Andric auto MaybeFoundIter = 530*0b57cec5SDimitry Andric find_if(MergedByThreadRoots, [Node](StackTrieNode *elem) { 531*0b57cec5SDimitry Andric return Node->FuncId == elem->FuncId; 532*0b57cec5SDimitry Andric }); 533*0b57cec5SDimitry Andric if (MaybeFoundIter == MergedByThreadRoots.end()) { 534*0b57cec5SDimitry Andric MergedByThreadRoots.push_back(Node); 535*0b57cec5SDimitry Andric } else { 536*0b57cec5SDimitry Andric MergedByThreadRoots.push_back(mergeTrieNodes( 537*0b57cec5SDimitry Andric **MaybeFoundIter, *Node, nullptr, NodeStore, mergeStackDuration)); 538*0b57cec5SDimitry Andric MergedByThreadRoots.erase(MaybeFoundIter); 539*0b57cec5SDimitry Andric } 540*0b57cec5SDimitry Andric } 541*0b57cec5SDimitry Andric } 542*0b57cec5SDimitry Andric return MergedByThreadRoots; 543*0b57cec5SDimitry Andric } 544*0b57cec5SDimitry Andric 545*0b57cec5SDimitry Andric /// Print timing sums for all stacks merged by Thread ID. 546*0b57cec5SDimitry Andric template <AggregationType AggType> 547*0b57cec5SDimitry Andric void printAllAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN, 548*0b57cec5SDimitry Andric StackOutputFormat format) { 549*0b57cec5SDimitry Andric std::forward_list<StackTrieNode> AggregatedNodeStore; 550*0b57cec5SDimitry Andric RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore); 551*0b57cec5SDimitry Andric bool reportThreadId = false; 552*0b57cec5SDimitry Andric printAll<AggType>(OS, FN, MergedByThreadRoots, 553*0b57cec5SDimitry Andric /*threadId*/ 0, reportThreadId); 554*0b57cec5SDimitry Andric } 555*0b57cec5SDimitry Andric 556*0b57cec5SDimitry Andric /// Merges the trie by thread id before printing top stacks. 557*0b57cec5SDimitry Andric void printAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN) { 558*0b57cec5SDimitry Andric std::forward_list<StackTrieNode> AggregatedNodeStore; 559*0b57cec5SDimitry Andric RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore); 560*0b57cec5SDimitry Andric print(OS, FN, MergedByThreadRoots); 561*0b57cec5SDimitry Andric } 562*0b57cec5SDimitry Andric 563*0b57cec5SDimitry Andric // TODO: Add a format option when more than one are supported. 564*0b57cec5SDimitry Andric template <AggregationType AggType> 565*0b57cec5SDimitry Andric void printAll(raw_ostream &OS, FuncIdConversionHelper &FN, 566*0b57cec5SDimitry Andric RootVector RootValues, uint32_t ThreadId, bool ReportThread) { 567*0b57cec5SDimitry Andric SmallVector<const StackTrieNode *, 16> S; 568*0b57cec5SDimitry Andric for (const auto *N : RootValues) { 569*0b57cec5SDimitry Andric S.clear(); 570*0b57cec5SDimitry Andric S.push_back(N); 571*0b57cec5SDimitry Andric while (!S.empty()) { 572*0b57cec5SDimitry Andric auto *Top = S.pop_back_val(); 573*0b57cec5SDimitry Andric printSingleStack<AggType>(OS, FN, ReportThread, ThreadId, Top); 574*0b57cec5SDimitry Andric for (const auto *C : Top->Callees) 575*0b57cec5SDimitry Andric S.push_back(C); 576*0b57cec5SDimitry Andric } 577*0b57cec5SDimitry Andric } 578*0b57cec5SDimitry Andric } 579*0b57cec5SDimitry Andric 580*0b57cec5SDimitry Andric /// Prints values for stacks in a format consumable for the flamegraph.pl 581*0b57cec5SDimitry Andric /// tool. This is a line based format that lists each level in the stack 582*0b57cec5SDimitry Andric /// hierarchy in a semicolon delimited form followed by a space and a numeric 583*0b57cec5SDimitry Andric /// value. If breaking down by thread, the thread ID will be added as the 584*0b57cec5SDimitry Andric /// root level of the stack. 585*0b57cec5SDimitry Andric template <AggregationType AggType> 586*0b57cec5SDimitry Andric void printSingleStack(raw_ostream &OS, FuncIdConversionHelper &Converter, 587*0b57cec5SDimitry Andric bool ReportThread, uint32_t ThreadId, 588*0b57cec5SDimitry Andric const StackTrieNode *Node) { 589*0b57cec5SDimitry Andric if (ReportThread) 590*0b57cec5SDimitry Andric OS << "thread_" << ThreadId << ";"; 591*0b57cec5SDimitry Andric SmallVector<const StackTrieNode *, 5> lineage{}; 592*0b57cec5SDimitry Andric lineage.push_back(Node); 593*0b57cec5SDimitry Andric while (lineage.back()->Parent != nullptr) 594*0b57cec5SDimitry Andric lineage.push_back(lineage.back()->Parent); 595*0b57cec5SDimitry Andric while (!lineage.empty()) { 596*0b57cec5SDimitry Andric OS << Converter.SymbolOrNumber(lineage.back()->FuncId) << ";"; 597*0b57cec5SDimitry Andric lineage.pop_back(); 598*0b57cec5SDimitry Andric } 599*0b57cec5SDimitry Andric OS << " " << GetValueForStack<AggType>(Node) << "\n"; 600*0b57cec5SDimitry Andric } 601*0b57cec5SDimitry Andric 602*0b57cec5SDimitry Andric void print(raw_ostream &OS, FuncIdConversionHelper &FN, 603*0b57cec5SDimitry Andric RootVector RootValues) { 604*0b57cec5SDimitry Andric // Go through each of the roots, and traverse the call stack, producing the 605*0b57cec5SDimitry Andric // aggregates as you go along. Remember these aggregates and stacks, and 606*0b57cec5SDimitry Andric // show summary statistics about: 607*0b57cec5SDimitry Andric // 608*0b57cec5SDimitry Andric // - Total number of unique stacks 609*0b57cec5SDimitry Andric // - Top 10 stacks by count 610*0b57cec5SDimitry Andric // - Top 10 stacks by aggregate duration 611*0b57cec5SDimitry Andric SmallVector<std::pair<const StackTrieNode *, uint64_t>, 11> 612*0b57cec5SDimitry Andric TopStacksByCount; 613*0b57cec5SDimitry Andric SmallVector<std::pair<const StackTrieNode *, uint64_t>, 11> TopStacksBySum; 614*0b57cec5SDimitry Andric auto greater_second = 615*0b57cec5SDimitry Andric [](const std::pair<const StackTrieNode *, uint64_t> &A, 616*0b57cec5SDimitry Andric const std::pair<const StackTrieNode *, uint64_t> &B) { 617*0b57cec5SDimitry Andric return A.second > B.second; 618*0b57cec5SDimitry Andric }; 619*0b57cec5SDimitry Andric uint64_t UniqueStacks = 0; 620*0b57cec5SDimitry Andric for (const auto *N : RootValues) { 621*0b57cec5SDimitry Andric SmallVector<const StackTrieNode *, 16> S; 622*0b57cec5SDimitry Andric S.emplace_back(N); 623*0b57cec5SDimitry Andric 624*0b57cec5SDimitry Andric while (!S.empty()) { 625*0b57cec5SDimitry Andric auto *Top = S.pop_back_val(); 626*0b57cec5SDimitry Andric 627*0b57cec5SDimitry Andric // We only start printing the stack (by walking up the parent pointers) 628*0b57cec5SDimitry Andric // when we get to a leaf function. 629*0b57cec5SDimitry Andric if (!Top->ExtraData.TerminalDurations.empty()) { 630*0b57cec5SDimitry Andric ++UniqueStacks; 631*0b57cec5SDimitry Andric auto TopSum = 632*0b57cec5SDimitry Andric std::accumulate(Top->ExtraData.TerminalDurations.begin(), 633*0b57cec5SDimitry Andric Top->ExtraData.TerminalDurations.end(), 0uLL); 634*0b57cec5SDimitry Andric { 635*0b57cec5SDimitry Andric auto E = std::make_pair(Top, TopSum); 636*0b57cec5SDimitry Andric TopStacksBySum.insert( 637*0b57cec5SDimitry Andric llvm::lower_bound(TopStacksBySum, E, greater_second), E); 638*0b57cec5SDimitry Andric if (TopStacksBySum.size() == 11) 639*0b57cec5SDimitry Andric TopStacksBySum.pop_back(); 640*0b57cec5SDimitry Andric } 641*0b57cec5SDimitry Andric { 642*0b57cec5SDimitry Andric auto E = 643*0b57cec5SDimitry Andric std::make_pair(Top, Top->ExtraData.TerminalDurations.size()); 644*0b57cec5SDimitry Andric TopStacksByCount.insert(std::lower_bound(TopStacksByCount.begin(), 645*0b57cec5SDimitry Andric TopStacksByCount.end(), E, 646*0b57cec5SDimitry Andric greater_second), 647*0b57cec5SDimitry Andric E); 648*0b57cec5SDimitry Andric if (TopStacksByCount.size() == 11) 649*0b57cec5SDimitry Andric TopStacksByCount.pop_back(); 650*0b57cec5SDimitry Andric } 651*0b57cec5SDimitry Andric } 652*0b57cec5SDimitry Andric for (const auto *C : Top->Callees) 653*0b57cec5SDimitry Andric S.push_back(C); 654*0b57cec5SDimitry Andric } 655*0b57cec5SDimitry Andric } 656*0b57cec5SDimitry Andric 657*0b57cec5SDimitry Andric // Now print the statistics in the end. 658*0b57cec5SDimitry Andric OS << "\n"; 659*0b57cec5SDimitry Andric OS << "Unique Stacks: " << UniqueStacks << "\n"; 660*0b57cec5SDimitry Andric OS << "Top 10 Stacks by leaf sum:\n\n"; 661*0b57cec5SDimitry Andric for (const auto &P : TopStacksBySum) { 662*0b57cec5SDimitry Andric OS << "Sum: " << P.second << "\n"; 663*0b57cec5SDimitry Andric printStack(OS, P.first, FN); 664*0b57cec5SDimitry Andric } 665*0b57cec5SDimitry Andric OS << "\n"; 666*0b57cec5SDimitry Andric OS << "Top 10 Stacks by leaf count:\n\n"; 667*0b57cec5SDimitry Andric for (const auto &P : TopStacksByCount) { 668*0b57cec5SDimitry Andric OS << "Count: " << P.second << "\n"; 669*0b57cec5SDimitry Andric printStack(OS, P.first, FN); 670*0b57cec5SDimitry Andric } 671*0b57cec5SDimitry Andric OS << "\n"; 672*0b57cec5SDimitry Andric } 673*0b57cec5SDimitry Andric }; 674*0b57cec5SDimitry Andric 675*0b57cec5SDimitry Andric std::string CreateErrorMessage(StackTrie::AccountRecordStatus Error, 676*0b57cec5SDimitry Andric const XRayRecord &Record, 677*0b57cec5SDimitry Andric const FuncIdConversionHelper &Converter) { 678*0b57cec5SDimitry Andric switch (Error) { 679*0b57cec5SDimitry Andric case StackTrie::AccountRecordStatus::ENTRY_NOT_FOUND: 680*0b57cec5SDimitry Andric return formatv("Found record {0} with no matching function entry\n", 681*0b57cec5SDimitry Andric format_xray_record(Record, Converter)); 682*0b57cec5SDimitry Andric default: 683*0b57cec5SDimitry Andric return formatv("Unknown error type for record {0}\n", 684*0b57cec5SDimitry Andric format_xray_record(Record, Converter)); 685*0b57cec5SDimitry Andric } 686*0b57cec5SDimitry Andric } 687*0b57cec5SDimitry Andric 688*0b57cec5SDimitry Andric static CommandRegistration Unused(&Stack, []() -> Error { 689*0b57cec5SDimitry Andric // Load each file provided as a command-line argument. For each one of them 690*0b57cec5SDimitry Andric // account to a single StackTrie, and just print the whole trie for now. 691*0b57cec5SDimitry Andric StackTrie ST; 692*0b57cec5SDimitry Andric InstrumentationMap Map; 693*0b57cec5SDimitry Andric if (!StacksInstrMap.empty()) { 694*0b57cec5SDimitry Andric auto InstrumentationMapOrError = loadInstrumentationMap(StacksInstrMap); 695*0b57cec5SDimitry Andric if (!InstrumentationMapOrError) 696*0b57cec5SDimitry Andric return joinErrors( 697*0b57cec5SDimitry Andric make_error<StringError>( 698*0b57cec5SDimitry Andric Twine("Cannot open instrumentation map: ") + StacksInstrMap, 699*0b57cec5SDimitry Andric std::make_error_code(std::errc::invalid_argument)), 700*0b57cec5SDimitry Andric InstrumentationMapOrError.takeError()); 701*0b57cec5SDimitry Andric Map = std::move(*InstrumentationMapOrError); 702*0b57cec5SDimitry Andric } 703*0b57cec5SDimitry Andric 704*0b57cec5SDimitry Andric if (SeparateThreadStacks && AggregateThreads) 705*0b57cec5SDimitry Andric return make_error<StringError>( 706*0b57cec5SDimitry Andric Twine("Can't specify options for per thread reporting and reporting " 707*0b57cec5SDimitry Andric "that aggregates threads."), 708*0b57cec5SDimitry Andric std::make_error_code(std::errc::invalid_argument)); 709*0b57cec5SDimitry Andric 710*0b57cec5SDimitry Andric if (!DumpAllStacks && StacksOutputFormat != HUMAN) 711*0b57cec5SDimitry Andric return make_error<StringError>( 712*0b57cec5SDimitry Andric Twine("Can't specify a non-human format without -all-stacks."), 713*0b57cec5SDimitry Andric std::make_error_code(std::errc::invalid_argument)); 714*0b57cec5SDimitry Andric 715*0b57cec5SDimitry Andric if (DumpAllStacks && StacksOutputFormat == HUMAN) 716*0b57cec5SDimitry Andric return make_error<StringError>( 717*0b57cec5SDimitry Andric Twine("You must specify a non-human format when reporting with " 718*0b57cec5SDimitry Andric "-all-stacks."), 719*0b57cec5SDimitry Andric std::make_error_code(std::errc::invalid_argument)); 720*0b57cec5SDimitry Andric 721*0b57cec5SDimitry Andric symbolize::LLVMSymbolizer Symbolizer; 722*0b57cec5SDimitry Andric FuncIdConversionHelper FuncIdHelper(StacksInstrMap, Symbolizer, 723*0b57cec5SDimitry Andric Map.getFunctionAddresses()); 724*0b57cec5SDimitry Andric // TODO: Someday, support output to files instead of just directly to 725*0b57cec5SDimitry Andric // standard output. 726*0b57cec5SDimitry Andric for (const auto &Filename : StackInputs) { 727*0b57cec5SDimitry Andric auto TraceOrErr = loadTraceFile(Filename); 728*0b57cec5SDimitry Andric if (!TraceOrErr) { 729*0b57cec5SDimitry Andric if (!StackKeepGoing) 730*0b57cec5SDimitry Andric return joinErrors( 731*0b57cec5SDimitry Andric make_error<StringError>( 732*0b57cec5SDimitry Andric Twine("Failed loading input file '") + Filename + "'", 733*0b57cec5SDimitry Andric std::make_error_code(std::errc::invalid_argument)), 734*0b57cec5SDimitry Andric TraceOrErr.takeError()); 735*0b57cec5SDimitry Andric logAllUnhandledErrors(TraceOrErr.takeError(), errs()); 736*0b57cec5SDimitry Andric continue; 737*0b57cec5SDimitry Andric } 738*0b57cec5SDimitry Andric auto &T = *TraceOrErr; 739*0b57cec5SDimitry Andric StackTrie::AccountRecordState AccountRecordState = 740*0b57cec5SDimitry Andric StackTrie::AccountRecordState::CreateInitialState(); 741*0b57cec5SDimitry Andric for (const auto &Record : T) { 742*0b57cec5SDimitry Andric auto error = ST.accountRecord(Record, &AccountRecordState); 743*0b57cec5SDimitry Andric if (error != StackTrie::AccountRecordStatus::OK) { 744*0b57cec5SDimitry Andric if (!StackKeepGoing) 745*0b57cec5SDimitry Andric return make_error<StringError>( 746*0b57cec5SDimitry Andric CreateErrorMessage(error, Record, FuncIdHelper), 747*0b57cec5SDimitry Andric make_error_code(errc::illegal_byte_sequence)); 748*0b57cec5SDimitry Andric errs() << CreateErrorMessage(error, Record, FuncIdHelper); 749*0b57cec5SDimitry Andric } 750*0b57cec5SDimitry Andric } 751*0b57cec5SDimitry Andric } 752*0b57cec5SDimitry Andric if (ST.isEmpty()) { 753*0b57cec5SDimitry Andric return make_error<StringError>( 754*0b57cec5SDimitry Andric "No instrumented calls were accounted in the input file.", 755*0b57cec5SDimitry Andric make_error_code(errc::result_out_of_range)); 756*0b57cec5SDimitry Andric } 757*0b57cec5SDimitry Andric 758*0b57cec5SDimitry Andric // Report the stacks in a long form mode for another tool to analyze. 759*0b57cec5SDimitry Andric if (DumpAllStacks) { 760*0b57cec5SDimitry Andric if (AggregateThreads) { 761*0b57cec5SDimitry Andric switch (RequestedAggregation) { 762*0b57cec5SDimitry Andric case AggregationType::TOTAL_TIME: 763*0b57cec5SDimitry Andric ST.printAllAggregatingThreads<AggregationType::TOTAL_TIME>( 764*0b57cec5SDimitry Andric outs(), FuncIdHelper, StacksOutputFormat); 765*0b57cec5SDimitry Andric break; 766*0b57cec5SDimitry Andric case AggregationType::INVOCATION_COUNT: 767*0b57cec5SDimitry Andric ST.printAllAggregatingThreads<AggregationType::INVOCATION_COUNT>( 768*0b57cec5SDimitry Andric outs(), FuncIdHelper, StacksOutputFormat); 769*0b57cec5SDimitry Andric break; 770*0b57cec5SDimitry Andric } 771*0b57cec5SDimitry Andric } else { 772*0b57cec5SDimitry Andric switch (RequestedAggregation) { 773*0b57cec5SDimitry Andric case AggregationType::TOTAL_TIME: 774*0b57cec5SDimitry Andric ST.printAllPerThread<AggregationType::TOTAL_TIME>(outs(), FuncIdHelper, 775*0b57cec5SDimitry Andric StacksOutputFormat); 776*0b57cec5SDimitry Andric break; 777*0b57cec5SDimitry Andric case AggregationType::INVOCATION_COUNT: 778*0b57cec5SDimitry Andric ST.printAllPerThread<AggregationType::INVOCATION_COUNT>( 779*0b57cec5SDimitry Andric outs(), FuncIdHelper, StacksOutputFormat); 780*0b57cec5SDimitry Andric break; 781*0b57cec5SDimitry Andric } 782*0b57cec5SDimitry Andric } 783*0b57cec5SDimitry Andric return Error::success(); 784*0b57cec5SDimitry Andric } 785*0b57cec5SDimitry Andric 786*0b57cec5SDimitry Andric // We're only outputting top stacks. 787*0b57cec5SDimitry Andric if (AggregateThreads) { 788*0b57cec5SDimitry Andric ST.printAggregatingThreads(outs(), FuncIdHelper); 789*0b57cec5SDimitry Andric } else if (SeparateThreadStacks) { 790*0b57cec5SDimitry Andric ST.printPerThread(outs(), FuncIdHelper); 791*0b57cec5SDimitry Andric } else { 792*0b57cec5SDimitry Andric ST.printIgnoringThreads(outs(), FuncIdHelper); 793*0b57cec5SDimitry Andric } 794*0b57cec5SDimitry Andric return Error::success(); 795*0b57cec5SDimitry Andric }); 796