1 //===- xray-converter.cpp: XRay Trace Conversion --------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implements the trace conversion functions. 10 // 11 //===----------------------------------------------------------------------===// 12 #include "xray-converter.h" 13 14 #include "trie-node.h" 15 #include "xray-registry.h" 16 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 17 #include "llvm/Support/EndianStream.h" 18 #include "llvm/Support/FileSystem.h" 19 #include "llvm/Support/FormatVariadic.h" 20 #include "llvm/Support/ScopedPrinter.h" 21 #include "llvm/Support/YAMLTraits.h" 22 #include "llvm/Support/raw_ostream.h" 23 #include "llvm/XRay/InstrumentationMap.h" 24 #include "llvm/XRay/Trace.h" 25 #include "llvm/XRay/YAMLXRayRecord.h" 26 27 using namespace llvm; 28 using namespace xray; 29 30 // llvm-xray convert 31 // ---------------------------------------------------------------------------- 32 static cl::SubCommand Convert("convert", "Trace Format Conversion"); 33 static cl::opt<std::string> ConvertInput(cl::Positional, 34 cl::desc("<xray log file>"), 35 cl::Required, cl::sub(Convert)); 36 enum class ConvertFormats { BINARY, YAML, CHROME_TRACE_EVENT }; 37 static cl::opt<ConvertFormats> ConvertOutputFormat( 38 "output-format", cl::desc("output format"), 39 cl::values(clEnumValN(ConvertFormats::BINARY, "raw", "output in binary"), 40 clEnumValN(ConvertFormats::YAML, "yaml", "output in yaml"), 41 clEnumValN(ConvertFormats::CHROME_TRACE_EVENT, "trace_event", 42 "Output in chrome's trace event format. " 43 "May be visualized with the Catapult trace viewer.")), 44 cl::sub(Convert)); 45 static cl::alias ConvertOutputFormat2("f", cl::aliasopt(ConvertOutputFormat), 46 cl::desc("Alias for -output-format"), 47 cl::sub(Convert)); 48 static cl::opt<std::string> 49 ConvertOutput("output", cl::value_desc("output file"), cl::init("-"), 50 cl::desc("output file; use '-' for stdout"), 51 cl::sub(Convert)); 52 static cl::alias ConvertOutput2("o", cl::aliasopt(ConvertOutput), 53 cl::desc("Alias for -output"), 54 cl::sub(Convert)); 55 56 static cl::opt<bool> 57 ConvertSymbolize("symbolize", 58 cl::desc("symbolize function ids from the input log"), 59 cl::init(false), cl::sub(Convert)); 60 static cl::alias ConvertSymbolize2("y", cl::aliasopt(ConvertSymbolize), 61 cl::desc("Alias for -symbolize"), 62 cl::sub(Convert)); 63 64 static cl::opt<std::string> 65 ConvertInstrMap("instr_map", 66 cl::desc("binary with the instrumentation map, or " 67 "a separate instrumentation map"), 68 cl::value_desc("binary with xray_instr_map"), 69 cl::sub(Convert), cl::init("")); 70 static cl::alias ConvertInstrMap2("m", cl::aliasopt(ConvertInstrMap), 71 cl::desc("Alias for -instr_map"), 72 cl::sub(Convert)); 73 static cl::opt<bool> ConvertSortInput( 74 "sort", 75 cl::desc("determines whether to sort input log records by timestamp"), 76 cl::sub(Convert), cl::init(true)); 77 static cl::alias ConvertSortInput2("s", cl::aliasopt(ConvertSortInput), 78 cl::desc("Alias for -sort"), 79 cl::sub(Convert)); 80 81 using llvm::yaml::Output; 82 83 void TraceConverter::exportAsYAML(const Trace &Records, raw_ostream &OS) { 84 YAMLXRayTrace Trace; 85 const auto &FH = Records.getFileHeader(); 86 Trace.Header = {FH.Version, FH.Type, FH.ConstantTSC, FH.NonstopTSC, 87 FH.CycleFrequency}; 88 Trace.Records.reserve(Records.size()); 89 for (const auto &R : Records) { 90 Trace.Records.push_back({R.RecordType, R.CPU, R.Type, R.FuncId, 91 Symbolize ? FuncIdHelper.SymbolOrNumber(R.FuncId) 92 : llvm::to_string(R.FuncId), 93 R.TSC, R.TId, R.PId, R.CallArgs, R.Data}); 94 } 95 Output Out(OS, nullptr, 0); 96 Out.setWriteDefaultValues(false); 97 Out << Trace; 98 } 99 100 void TraceConverter::exportAsRAWv1(const Trace &Records, raw_ostream &OS) { 101 // First write out the file header, in the correct endian-appropriate format 102 // (XRay assumes currently little endian). 103 support::endian::Writer Writer(OS, support::endianness::little); 104 const auto &FH = Records.getFileHeader(); 105 Writer.write(FH.Version); 106 Writer.write(FH.Type); 107 uint32_t Bitfield{0}; 108 if (FH.ConstantTSC) 109 Bitfield |= 1uL; 110 if (FH.NonstopTSC) 111 Bitfield |= 1uL << 1; 112 Writer.write(Bitfield); 113 Writer.write(FH.CycleFrequency); 114 115 // There's 16 bytes of padding at the end of the file header. 116 static constexpr uint32_t Padding4B = 0; 117 Writer.write(Padding4B); 118 Writer.write(Padding4B); 119 Writer.write(Padding4B); 120 Writer.write(Padding4B); 121 122 // Then write out the rest of the records, still in an endian-appropriate 123 // format. 124 for (const auto &R : Records) { 125 switch (R.Type) { 126 case RecordTypes::ENTER: 127 case RecordTypes::ENTER_ARG: 128 Writer.write(R.RecordType); 129 Writer.write(static_cast<uint8_t>(R.CPU)); 130 Writer.write(uint8_t{0}); 131 break; 132 case RecordTypes::EXIT: 133 Writer.write(R.RecordType); 134 Writer.write(static_cast<uint8_t>(R.CPU)); 135 Writer.write(uint8_t{1}); 136 break; 137 case RecordTypes::TAIL_EXIT: 138 Writer.write(R.RecordType); 139 Writer.write(static_cast<uint8_t>(R.CPU)); 140 Writer.write(uint8_t{2}); 141 break; 142 case RecordTypes::CUSTOM_EVENT: 143 case RecordTypes::TYPED_EVENT: 144 // Skip custom and typed event records for v1 logs. 145 continue; 146 } 147 Writer.write(R.FuncId); 148 Writer.write(R.TSC); 149 Writer.write(R.TId); 150 151 if (FH.Version >= 3) 152 Writer.write(R.PId); 153 else 154 Writer.write(Padding4B); 155 156 Writer.write(Padding4B); 157 Writer.write(Padding4B); 158 } 159 } 160 161 namespace { 162 163 // A structure that allows building a dictionary of stack ids for the Chrome 164 // trace event format. 165 struct StackIdData { 166 // Each Stack of function calls has a unique ID. 167 unsigned id; 168 169 // Bookkeeping so that IDs can be maintained uniquely across threads. 170 // Traversal keeps sibling pointers to other threads stacks. This is helpful 171 // to determine when a thread encounters a new stack and should assign a new 172 // unique ID. 173 SmallVector<TrieNode<StackIdData> *, 4> siblings; 174 }; 175 176 using StackTrieNode = TrieNode<StackIdData>; 177 178 // A helper function to find the sibling nodes for an encountered function in a 179 // thread of execution. Relies on the invariant that each time a new node is 180 // traversed in a thread, sibling bidirectional pointers are maintained. 181 SmallVector<StackTrieNode *, 4> 182 findSiblings(StackTrieNode *parent, int32_t FnId, uint32_t TId, 183 const DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> 184 &StackRootsByThreadId) { 185 186 SmallVector<StackTrieNode *, 4> Siblings{}; 187 188 if (parent == nullptr) { 189 for (auto map_iter : StackRootsByThreadId) { 190 // Only look for siblings in other threads. 191 if (map_iter.first != TId) 192 for (auto node_iter : map_iter.second) { 193 if (node_iter->FuncId == FnId) 194 Siblings.push_back(node_iter); 195 } 196 } 197 return Siblings; 198 } 199 200 for (auto *ParentSibling : parent->ExtraData.siblings) 201 for (auto node_iter : ParentSibling->Callees) 202 if (node_iter->FuncId == FnId) 203 Siblings.push_back(node_iter); 204 205 return Siblings; 206 } 207 208 // Given a function being invoked in a thread with id TId, finds and returns the 209 // StackTrie representing the function call stack. If no node exists, creates 210 // the node. Assigns unique IDs to stacks newly encountered among all threads 211 // and keeps sibling links up to when creating new nodes. 212 StackTrieNode *findOrCreateStackNode( 213 StackTrieNode *Parent, int32_t FuncId, uint32_t TId, 214 DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> &StackRootsByThreadId, 215 DenseMap<unsigned, StackTrieNode *> &StacksByStackId, unsigned *id_counter, 216 std::forward_list<StackTrieNode> &NodeStore) { 217 SmallVector<StackTrieNode *, 4> &ParentCallees = 218 Parent == nullptr ? StackRootsByThreadId[TId] : Parent->Callees; 219 auto match = find_if(ParentCallees, [FuncId](StackTrieNode *ParentCallee) { 220 return FuncId == ParentCallee->FuncId; 221 }); 222 if (match != ParentCallees.end()) 223 return *match; 224 225 SmallVector<StackTrieNode *, 4> siblings = 226 findSiblings(Parent, FuncId, TId, StackRootsByThreadId); 227 if (siblings.empty()) { 228 NodeStore.push_front({FuncId, Parent, {}, {(*id_counter)++, {}}}); 229 StackTrieNode *CurrentStack = &NodeStore.front(); 230 StacksByStackId[*id_counter - 1] = CurrentStack; 231 ParentCallees.push_back(CurrentStack); 232 return CurrentStack; 233 } 234 unsigned stack_id = siblings[0]->ExtraData.id; 235 NodeStore.push_front({FuncId, Parent, {}, {stack_id, std::move(siblings)}}); 236 StackTrieNode *CurrentStack = &NodeStore.front(); 237 for (auto *sibling : CurrentStack->ExtraData.siblings) 238 sibling->ExtraData.siblings.push_back(CurrentStack); 239 ParentCallees.push_back(CurrentStack); 240 return CurrentStack; 241 } 242 243 void writeTraceViewerRecord(uint16_t Version, raw_ostream &OS, int32_t FuncId, 244 uint32_t TId, uint32_t PId, bool Symbolize, 245 const FuncIdConversionHelper &FuncIdHelper, 246 double EventTimestampUs, 247 const StackTrieNode &StackCursor, 248 StringRef FunctionPhenotype) { 249 OS << " "; 250 if (Version >= 3) { 251 OS << llvm::formatv( 252 R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "{3}", )" 253 R"("ts" : "{4:f4}", "sf" : "{5}" })", 254 (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId) 255 : llvm::to_string(FuncId)), 256 FunctionPhenotype, TId, PId, EventTimestampUs, 257 StackCursor.ExtraData.id); 258 } else { 259 OS << llvm::formatv( 260 R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "1", )" 261 R"("ts" : "{3:f3}", "sf" : "{4}" })", 262 (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId) 263 : llvm::to_string(FuncId)), 264 FunctionPhenotype, TId, EventTimestampUs, StackCursor.ExtraData.id); 265 } 266 } 267 268 } // namespace 269 270 void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records, 271 raw_ostream &OS) { 272 const auto &FH = Records.getFileHeader(); 273 auto Version = FH.Version; 274 auto CycleFreq = FH.CycleFrequency; 275 276 unsigned id_counter = 0; 277 278 OS << "{\n \"traceEvents\": ["; 279 DenseMap<uint32_t, StackTrieNode *> StackCursorByThreadId{}; 280 DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> StackRootsByThreadId{}; 281 DenseMap<unsigned, StackTrieNode *> StacksByStackId{}; 282 std::forward_list<StackTrieNode> NodeStore{}; 283 int loop_count = 0; 284 for (const auto &R : Records) { 285 if (loop_count++ == 0) 286 OS << "\n"; 287 else 288 OS << ",\n"; 289 290 // Chrome trace event format always wants data in micros. 291 // CyclesPerMicro = CycleHertz / 10^6 292 // TSC / CyclesPerMicro == TSC * 10^6 / CycleHertz == MicroTimestamp 293 // Could lose some precision here by converting the TSC to a double to 294 // multiply by the period in micros. 52 bit mantissa is a good start though. 295 // TODO: Make feature request to Chrome Trace viewer to accept ticks and a 296 // frequency or do some more involved calculation to avoid dangers of 297 // conversion. 298 double EventTimestampUs = double(1000000) / CycleFreq * double(R.TSC); 299 StackTrieNode *&StackCursor = StackCursorByThreadId[R.TId]; 300 switch (R.Type) { 301 case RecordTypes::CUSTOM_EVENT: 302 case RecordTypes::TYPED_EVENT: 303 // TODO: Support typed and custom event rendering on Chrome Trace Viewer. 304 break; 305 case RecordTypes::ENTER: 306 case RecordTypes::ENTER_ARG: 307 StackCursor = findOrCreateStackNode(StackCursor, R.FuncId, R.TId, 308 StackRootsByThreadId, StacksByStackId, 309 &id_counter, NodeStore); 310 // Each record is represented as a json dictionary with function name, 311 // type of B for begin or E for end, thread id, process id, 312 // timestamp in microseconds, and a stack frame id. The ids are logged 313 // in an id dictionary after the events. 314 writeTraceViewerRecord(Version, OS, R.FuncId, R.TId, R.PId, Symbolize, 315 FuncIdHelper, EventTimestampUs, *StackCursor, "B"); 316 break; 317 case RecordTypes::EXIT: 318 case RecordTypes::TAIL_EXIT: 319 // No entries to record end for. 320 if (StackCursor == nullptr) 321 break; 322 // Should we emit an END record anyway or account this condition? 323 // (And/Or in loop termination below) 324 StackTrieNode *PreviousCursor = nullptr; 325 do { 326 if (PreviousCursor != nullptr) { 327 OS << ",\n"; 328 } 329 writeTraceViewerRecord(Version, OS, StackCursor->FuncId, R.TId, R.PId, 330 Symbolize, FuncIdHelper, EventTimestampUs, 331 *StackCursor, "E"); 332 PreviousCursor = StackCursor; 333 StackCursor = StackCursor->Parent; 334 } while (PreviousCursor->FuncId != R.FuncId && StackCursor != nullptr); 335 break; 336 } 337 } 338 OS << "\n ],\n"; // Close the Trace Events array. 339 OS << " " 340 << "\"displayTimeUnit\": \"ns\",\n"; 341 342 // The stackFrames dictionary substantially reduces size of the output file by 343 // avoiding repeating the entire call stack of function names for each entry. 344 OS << R"( "stackFrames": {)"; 345 int stack_frame_count = 0; 346 for (auto map_iter : StacksByStackId) { 347 if (stack_frame_count++ == 0) 348 OS << "\n"; 349 else 350 OS << ",\n"; 351 OS << " "; 352 OS << llvm::formatv( 353 R"("{0}" : { "name" : "{1}")", map_iter.first, 354 (Symbolize ? FuncIdHelper.SymbolOrNumber(map_iter.second->FuncId) 355 : llvm::to_string(map_iter.second->FuncId))); 356 if (map_iter.second->Parent != nullptr) 357 OS << llvm::formatv(R"(, "parent": "{0}")", 358 map_iter.second->Parent->ExtraData.id); 359 OS << " }"; 360 } 361 OS << "\n }\n"; // Close the stack frames map. 362 OS << "}\n"; // Close the JSON entry. 363 } 364 365 namespace llvm { 366 namespace xray { 367 368 static CommandRegistration Unused(&Convert, []() -> Error { 369 // FIXME: Support conversion to BINARY when upgrading XRay trace versions. 370 InstrumentationMap Map; 371 if (!ConvertInstrMap.empty()) { 372 auto InstrumentationMapOrError = loadInstrumentationMap(ConvertInstrMap); 373 if (!InstrumentationMapOrError) 374 return joinErrors(make_error<StringError>( 375 Twine("Cannot open instrumentation map '") + 376 ConvertInstrMap + "'", 377 std::make_error_code(std::errc::invalid_argument)), 378 InstrumentationMapOrError.takeError()); 379 Map = std::move(*InstrumentationMapOrError); 380 } 381 382 const auto &FunctionAddresses = Map.getFunctionAddresses(); 383 symbolize::LLVMSymbolizer Symbolizer; 384 llvm::xray::FuncIdConversionHelper FuncIdHelper(ConvertInstrMap, Symbolizer, 385 FunctionAddresses); 386 llvm::xray::TraceConverter TC(FuncIdHelper, ConvertSymbolize); 387 std::error_code EC; 388 raw_fd_ostream OS(ConvertOutput, EC, 389 ConvertOutputFormat == ConvertFormats::BINARY 390 ? sys::fs::OpenFlags::F_None 391 : sys::fs::OpenFlags::F_Text); 392 if (EC) 393 return make_error<StringError>( 394 Twine("Cannot open file '") + ConvertOutput + "' for writing.", EC); 395 396 auto TraceOrErr = loadTraceFile(ConvertInput, ConvertSortInput); 397 if (!TraceOrErr) 398 return joinErrors( 399 make_error<StringError>( 400 Twine("Failed loading input file '") + ConvertInput + "'.", 401 std::make_error_code(std::errc::executable_format_error)), 402 TraceOrErr.takeError()); 403 404 auto &T = *TraceOrErr; 405 switch (ConvertOutputFormat) { 406 case ConvertFormats::YAML: 407 TC.exportAsYAML(T, OS); 408 break; 409 case ConvertFormats::BINARY: 410 TC.exportAsRAWv1(T, OS); 411 break; 412 case ConvertFormats::CHROME_TRACE_EVENT: 413 TC.exportAsChromeTraceEventFormat(T, OS); 414 break; 415 } 416 return Error::success(); 417 }); 418 419 } // namespace xray 420 } // namespace llvm 421