1 //===-- DecodedThread.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "DecodedThread.h" 10 #include "TraceCursorIntelPT.h" 11 #include <intel-pt.h> 12 #include <memory> 13 #include <optional> 14 15 using namespace lldb; 16 using namespace lldb_private; 17 using namespace lldb_private::trace_intel_pt; 18 using namespace llvm; 19 20 char IntelPTError::ID; 21 22 IntelPTError::IntelPTError(int libipt_error_code, lldb::addr_t address) 23 : m_libipt_error_code(libipt_error_code), m_address(address) { 24 assert(libipt_error_code < 0); 25 } 26 27 void IntelPTError::log(llvm::raw_ostream &OS) const { 28 OS << pt_errstr(pt_errcode(m_libipt_error_code)); 29 if (m_address != LLDB_INVALID_ADDRESS && m_address > 0) 30 OS << formatv(": {0:x+16}", m_address); 31 } 32 33 bool DecodedThread::TSCRange::InRange(uint64_t item_index) const { 34 return item_index >= first_item_index && 35 item_index < first_item_index + items_count; 36 } 37 38 bool DecodedThread::NanosecondsRange::InRange(uint64_t item_index) const { 39 return item_index >= first_item_index && 40 item_index < first_item_index + items_count; 41 } 42 43 double DecodedThread::NanosecondsRange::GetInterpolatedTime( 44 uint64_t item_index, uint64_t begin_of_time_nanos, 45 const LinuxPerfZeroTscConversion &tsc_conversion) const { 46 uint64_t items_since_last_tsc = item_index - first_item_index; 47 48 auto interpolate = [&](uint64_t next_range_start_ns) { 49 if (next_range_start_ns == nanos) { 50 // If the resolution of the conversion formula is bad enough to consider 51 // these two timestamps as equal, then we just increase the next one by 1 52 // for correction 53 next_range_start_ns++; 54 } 55 long double item_duration = 56 static_cast<long double>(items_count) / (next_range_start_ns - nanos); 57 return (nanos - begin_of_time_nanos) + items_since_last_tsc * item_duration; 58 }; 59 60 if (!next_range) { 61 // If this is the last TSC range, so we have to extrapolate. In this case, 62 // we assume that each instruction took one TSC, which is what an 63 // instruction would take if no parallelism is achieved and the frequency 64 // multiplier is 1. 65 return interpolate(tsc_conversion.ToNanos(tsc + items_count)); 66 } 67 if (items_count < (next_range->tsc - tsc)) { 68 // If the numbers of items in this range is less than the total TSC duration 69 // of this range, i.e. each instruction taking longer than 1 TSC, then we 70 // can assume that something else happened between these TSCs (e.g. a 71 // context switch, change to kernel, decoding errors, etc). In this case, we 72 // also assume that each instruction took 1 TSC. A proper way to improve 73 // this would be to analize the next events in the trace looking for context 74 // switches or trace disablement events, but for now, as we only want an 75 // approximation, we keep it simple. We are also guaranteed that the time in 76 // nanos of the next range is different to the current one, just because of 77 // the definition of a NanosecondsRange. 78 return interpolate( 79 std::min(tsc_conversion.ToNanos(tsc + items_count), next_range->nanos)); 80 } 81 82 // In this case, each item took less than 1 TSC, so some parallelism was 83 // achieved, which is an indication that we didn't suffered of any kind of 84 // interruption. 85 return interpolate(next_range->nanos); 86 } 87 88 uint64_t DecodedThread::GetItemsCount() const { return m_item_data.size(); } 89 90 lldb::addr_t 91 DecodedThread::GetInstructionLoadAddress(uint64_t item_index) const { 92 return std::get<lldb::addr_t>(m_item_data[item_index]); 93 } 94 95 lldb::addr_t 96 DecodedThread::GetSyncPointOffsetByIndex(uint64_t item_index) const { 97 return m_psb_offsets.find(item_index)->second; 98 } 99 100 ThreadSP DecodedThread::GetThread() { return m_thread_sp; } 101 102 template <typename Data> 103 DecodedThread::TraceItemStorage & 104 DecodedThread::CreateNewTraceItem(lldb::TraceItemKind kind, Data &&data) { 105 m_item_data.emplace_back(data); 106 107 if (m_last_tsc) 108 (*m_last_tsc)->second.items_count++; 109 if (m_last_nanoseconds) 110 (*m_last_nanoseconds)->second.items_count++; 111 112 return m_item_data.back(); 113 } 114 115 void DecodedThread::NotifySyncPoint(lldb::addr_t psb_offset) { 116 m_psb_offsets.try_emplace(GetItemsCount(), psb_offset); 117 AppendEvent(lldb::eTraceEventSyncPoint); 118 } 119 120 void DecodedThread::NotifyTsc(TSC tsc) { 121 if (m_last_tsc && (*m_last_tsc)->second.tsc == tsc) 122 return; 123 if (m_last_tsc) 124 assert(tsc >= (*m_last_tsc)->second.tsc && 125 "We can't have decreasing times"); 126 127 m_last_tsc = 128 m_tscs.emplace(GetItemsCount(), TSCRange{tsc, 0, GetItemsCount()}).first; 129 130 if (m_tsc_conversion) { 131 uint64_t nanos = m_tsc_conversion->ToNanos(tsc); 132 if (!m_last_nanoseconds || (*m_last_nanoseconds)->second.nanos != nanos) { 133 m_last_nanoseconds = 134 m_nanoseconds 135 .emplace(GetItemsCount(), NanosecondsRange{nanos, tsc, nullptr, 0, 136 GetItemsCount()}) 137 .first; 138 if (*m_last_nanoseconds != m_nanoseconds.begin()) { 139 auto prev_range = prev(*m_last_nanoseconds); 140 prev_range->second.next_range = &(*m_last_nanoseconds)->second; 141 } 142 } 143 } 144 AppendEvent(lldb::eTraceEventHWClockTick); 145 } 146 147 void DecodedThread::NotifyCPU(lldb::cpu_id_t cpu_id) { 148 if (!m_last_cpu || *m_last_cpu != cpu_id) { 149 m_cpus.emplace(GetItemsCount(), cpu_id); 150 m_last_cpu = cpu_id; 151 AppendEvent(lldb::eTraceEventCPUChanged); 152 } 153 } 154 155 lldb::cpu_id_t DecodedThread::GetCPUByIndex(uint64_t item_index) const { 156 auto it = m_cpus.upper_bound(item_index); 157 return it == m_cpus.begin() ? LLDB_INVALID_CPU_ID : prev(it)->second; 158 } 159 160 std::optional<DecodedThread::TSCRange> 161 DecodedThread::GetTSCRangeByIndex(uint64_t item_index) const { 162 auto next_it = m_tscs.upper_bound(item_index); 163 if (next_it == m_tscs.begin()) 164 return std::nullopt; 165 return prev(next_it)->second; 166 } 167 168 std::optional<DecodedThread::NanosecondsRange> 169 DecodedThread::GetNanosecondsRangeByIndex(uint64_t item_index) { 170 auto next_it = m_nanoseconds.upper_bound(item_index); 171 if (next_it == m_nanoseconds.begin()) 172 return std::nullopt; 173 return prev(next_it)->second; 174 } 175 176 uint64_t DecodedThread::GetTotalInstructionCount() const { 177 return m_insn_count; 178 } 179 180 void DecodedThread::AppendEvent(lldb::TraceEvent event) { 181 CreateNewTraceItem(lldb::eTraceItemKindEvent, event); 182 m_events_stats.RecordEvent(event); 183 } 184 185 void DecodedThread::AppendInstruction(const pt_insn &insn) { 186 CreateNewTraceItem(lldb::eTraceItemKindInstruction, insn.ip); 187 m_insn_count++; 188 } 189 190 void DecodedThread::AppendError(const IntelPTError &error) { 191 CreateNewTraceItem(lldb::eTraceItemKindError, error.message()); 192 m_error_stats.RecordError(/*fatal=*/false); 193 } 194 195 void DecodedThread::AppendCustomError(StringRef err, bool fatal) { 196 CreateNewTraceItem(lldb::eTraceItemKindError, err.str()); 197 m_error_stats.RecordError(fatal); 198 } 199 200 lldb::TraceEvent DecodedThread::GetEventByIndex(int item_index) const { 201 return std::get<lldb::TraceEvent>(m_item_data[item_index]); 202 } 203 204 const DecodedThread::EventsStats &DecodedThread::GetEventsStats() const { 205 return m_events_stats; 206 } 207 208 void DecodedThread::EventsStats::RecordEvent(lldb::TraceEvent event) { 209 events_counts[event]++; 210 total_count++; 211 } 212 213 uint64_t DecodedThread::ErrorStats::GetTotalCount() const { 214 uint64_t total = 0; 215 for (const auto &[kind, count] : libipt_errors) 216 total += count; 217 218 return total + other_errors + fatal_errors; 219 } 220 221 void DecodedThread::ErrorStats::RecordError(bool fatal) { 222 if (fatal) 223 fatal_errors++; 224 else 225 other_errors++; 226 } 227 228 void DecodedThread::ErrorStats::RecordError(int libipt_error_code) { 229 libipt_errors[pt_errstr(pt_errcode(libipt_error_code))]++; 230 } 231 232 const DecodedThread::ErrorStats &DecodedThread::GetErrorStats() const { 233 return m_error_stats; 234 } 235 236 lldb::TraceItemKind 237 DecodedThread::GetItemKindByIndex(uint64_t item_index) const { 238 return std::visit( 239 llvm::makeVisitor( 240 [](const std::string &) { return lldb::eTraceItemKindError; }, 241 [](lldb::TraceEvent) { return lldb::eTraceItemKindEvent; }, 242 [](lldb::addr_t) { return lldb::eTraceItemKindInstruction; }), 243 m_item_data[item_index]); 244 } 245 246 llvm::StringRef DecodedThread::GetErrorByIndex(uint64_t item_index) const { 247 if (item_index >= m_item_data.size()) 248 return llvm::StringRef(); 249 return std::get<std::string>(m_item_data[item_index]); 250 } 251 252 DecodedThread::DecodedThread( 253 ThreadSP thread_sp, 254 const std::optional<LinuxPerfZeroTscConversion> &tsc_conversion) 255 : m_thread_sp(thread_sp), m_tsc_conversion(tsc_conversion) {} 256 257 size_t DecodedThread::CalculateApproximateMemoryUsage() const { 258 return sizeof(TraceItemStorage) * m_item_data.size() + 259 (sizeof(uint64_t) + sizeof(TSC)) * m_tscs.size() + 260 (sizeof(uint64_t) + sizeof(uint64_t)) * m_nanoseconds.size() + 261 (sizeof(uint64_t) + sizeof(lldb::cpu_id_t)) * m_cpus.size(); 262 } 263