xref: /freebsd/contrib/llvm-project/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp (revision 96190b4fef3b4a0cc3ca0606b0c4e3e69a5e6717)
1 //===-- DecodedThread.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "DecodedThread.h"
10 #include "TraceCursorIntelPT.h"
11 #include <intel-pt.h>
12 #include <memory>
13 #include <optional>
14 
15 using namespace lldb;
16 using namespace lldb_private;
17 using namespace lldb_private::trace_intel_pt;
18 using namespace llvm;
19 
20 char IntelPTError::ID;
21 
22 IntelPTError::IntelPTError(int libipt_error_code, lldb::addr_t address)
23     : m_libipt_error_code(libipt_error_code), m_address(address) {
24   assert(libipt_error_code < 0);
25 }
26 
27 void IntelPTError::log(llvm::raw_ostream &OS) const {
28   OS << pt_errstr(pt_errcode(m_libipt_error_code));
29   if (m_address != LLDB_INVALID_ADDRESS && m_address > 0)
30     OS << formatv(": {0:x+16}", m_address);
31 }
32 
33 bool DecodedThread::TSCRange::InRange(uint64_t item_index) const {
34   return item_index >= first_item_index &&
35          item_index < first_item_index + items_count;
36 }
37 
38 bool DecodedThread::NanosecondsRange::InRange(uint64_t item_index) const {
39   return item_index >= first_item_index &&
40          item_index < first_item_index + items_count;
41 }
42 
43 double DecodedThread::NanosecondsRange::GetInterpolatedTime(
44     uint64_t item_index, uint64_t begin_of_time_nanos,
45     const LinuxPerfZeroTscConversion &tsc_conversion) const {
46   uint64_t items_since_last_tsc = item_index - first_item_index;
47 
48   auto interpolate = [&](uint64_t next_range_start_ns) {
49     if (next_range_start_ns == nanos) {
50       // If the resolution of the conversion formula is bad enough to consider
51       // these two timestamps as equal, then we just increase the next one by 1
52       // for correction
53       next_range_start_ns++;
54     }
55     long double item_duration =
56         static_cast<long double>(items_count) / (next_range_start_ns - nanos);
57     return (nanos - begin_of_time_nanos) + items_since_last_tsc * item_duration;
58   };
59 
60   if (!next_range) {
61     // If this is the last TSC range, so we have to extrapolate. In this case,
62     // we assume that each instruction took one TSC, which is what an
63     // instruction would take if no parallelism is achieved and the frequency
64     // multiplier is 1.
65     return interpolate(tsc_conversion.ToNanos(tsc + items_count));
66   }
67   if (items_count < (next_range->tsc - tsc)) {
68     // If the numbers of items in this range is less than the total TSC duration
69     // of this range, i.e. each instruction taking longer than 1 TSC, then we
70     // can assume that something else happened between these TSCs (e.g. a
71     // context switch, change to kernel, decoding errors, etc). In this case, we
72     // also assume that each instruction took 1 TSC. A proper way to improve
73     // this would be to analize the next events in the trace looking for context
74     // switches or trace disablement events, but for now, as we only want an
75     // approximation, we keep it simple. We are also guaranteed that the time in
76     // nanos of the next range is different to the current one, just because of
77     // the definition of a NanosecondsRange.
78     return interpolate(
79         std::min(tsc_conversion.ToNanos(tsc + items_count), next_range->nanos));
80   }
81 
82   // In this case, each item took less than 1 TSC, so some parallelism was
83   // achieved, which is an indication that we didn't suffered of any kind of
84   // interruption.
85   return interpolate(next_range->nanos);
86 }
87 
88 uint64_t DecodedThread::GetItemsCount() const { return m_item_data.size(); }
89 
90 lldb::addr_t
91 DecodedThread::GetInstructionLoadAddress(uint64_t item_index) const {
92   return std::get<lldb::addr_t>(m_item_data[item_index]);
93 }
94 
95 lldb::addr_t
96 DecodedThread::GetSyncPointOffsetByIndex(uint64_t item_index) const {
97   return m_psb_offsets.find(item_index)->second;
98 }
99 
100 ThreadSP DecodedThread::GetThread() { return m_thread_sp; }
101 
102 template <typename Data>
103 DecodedThread::TraceItemStorage &
104 DecodedThread::CreateNewTraceItem(lldb::TraceItemKind kind, Data &&data) {
105   m_item_data.emplace_back(data);
106 
107   if (m_last_tsc)
108     (*m_last_tsc)->second.items_count++;
109   if (m_last_nanoseconds)
110     (*m_last_nanoseconds)->second.items_count++;
111 
112   return m_item_data.back();
113 }
114 
115 void DecodedThread::NotifySyncPoint(lldb::addr_t psb_offset) {
116   m_psb_offsets.try_emplace(GetItemsCount(), psb_offset);
117   AppendEvent(lldb::eTraceEventSyncPoint);
118 }
119 
120 void DecodedThread::NotifyTsc(TSC tsc) {
121   if (m_last_tsc && (*m_last_tsc)->second.tsc == tsc)
122     return;
123   if (m_last_tsc)
124     assert(tsc >= (*m_last_tsc)->second.tsc &&
125            "We can't have decreasing times");
126 
127   m_last_tsc =
128       m_tscs.emplace(GetItemsCount(), TSCRange{tsc, 0, GetItemsCount()}).first;
129 
130   if (m_tsc_conversion) {
131     uint64_t nanos = m_tsc_conversion->ToNanos(tsc);
132     if (!m_last_nanoseconds || (*m_last_nanoseconds)->second.nanos != nanos) {
133       m_last_nanoseconds =
134           m_nanoseconds
135               .emplace(GetItemsCount(), NanosecondsRange{nanos, tsc, nullptr, 0,
136                                                          GetItemsCount()})
137               .first;
138       if (*m_last_nanoseconds != m_nanoseconds.begin()) {
139         auto prev_range = prev(*m_last_nanoseconds);
140         prev_range->second.next_range = &(*m_last_nanoseconds)->second;
141       }
142     }
143   }
144   AppendEvent(lldb::eTraceEventHWClockTick);
145 }
146 
147 void DecodedThread::NotifyCPU(lldb::cpu_id_t cpu_id) {
148   if (!m_last_cpu || *m_last_cpu != cpu_id) {
149     m_cpus.emplace(GetItemsCount(), cpu_id);
150     m_last_cpu = cpu_id;
151     AppendEvent(lldb::eTraceEventCPUChanged);
152   }
153 }
154 
155 lldb::cpu_id_t DecodedThread::GetCPUByIndex(uint64_t item_index) const {
156   auto it = m_cpus.upper_bound(item_index);
157   return it == m_cpus.begin() ? LLDB_INVALID_CPU_ID : prev(it)->second;
158 }
159 
160 std::optional<DecodedThread::TSCRange>
161 DecodedThread::GetTSCRangeByIndex(uint64_t item_index) const {
162   auto next_it = m_tscs.upper_bound(item_index);
163   if (next_it == m_tscs.begin())
164     return std::nullopt;
165   return prev(next_it)->second;
166 }
167 
168 std::optional<DecodedThread::NanosecondsRange>
169 DecodedThread::GetNanosecondsRangeByIndex(uint64_t item_index) {
170   auto next_it = m_nanoseconds.upper_bound(item_index);
171   if (next_it == m_nanoseconds.begin())
172     return std::nullopt;
173   return prev(next_it)->second;
174 }
175 
176 uint64_t DecodedThread::GetTotalInstructionCount() const {
177   return m_insn_count;
178 }
179 
180 void DecodedThread::AppendEvent(lldb::TraceEvent event) {
181   CreateNewTraceItem(lldb::eTraceItemKindEvent, event);
182   m_events_stats.RecordEvent(event);
183 }
184 
185 void DecodedThread::AppendInstruction(const pt_insn &insn) {
186   CreateNewTraceItem(lldb::eTraceItemKindInstruction, insn.ip);
187   m_insn_count++;
188 }
189 
190 void DecodedThread::AppendError(const IntelPTError &error) {
191   CreateNewTraceItem(lldb::eTraceItemKindError, error.message());
192   m_error_stats.RecordError(/*fatal=*/false);
193 }
194 
195 void DecodedThread::AppendCustomError(StringRef err, bool fatal) {
196   CreateNewTraceItem(lldb::eTraceItemKindError, err.str());
197   m_error_stats.RecordError(fatal);
198 }
199 
200 lldb::TraceEvent DecodedThread::GetEventByIndex(int item_index) const {
201   return std::get<lldb::TraceEvent>(m_item_data[item_index]);
202 }
203 
204 const DecodedThread::EventsStats &DecodedThread::GetEventsStats() const {
205   return m_events_stats;
206 }
207 
208 void DecodedThread::EventsStats::RecordEvent(lldb::TraceEvent event) {
209   events_counts[event]++;
210   total_count++;
211 }
212 
213 uint64_t DecodedThread::ErrorStats::GetTotalCount() const {
214   uint64_t total = 0;
215   for (const auto &[kind, count] : libipt_errors)
216     total += count;
217 
218   return total + other_errors + fatal_errors;
219 }
220 
221 void DecodedThread::ErrorStats::RecordError(bool fatal) {
222   if (fatal)
223     fatal_errors++;
224   else
225     other_errors++;
226 }
227 
228 void DecodedThread::ErrorStats::RecordError(int libipt_error_code) {
229   libipt_errors[pt_errstr(pt_errcode(libipt_error_code))]++;
230 }
231 
232 const DecodedThread::ErrorStats &DecodedThread::GetErrorStats() const {
233   return m_error_stats;
234 }
235 
236 lldb::TraceItemKind
237 DecodedThread::GetItemKindByIndex(uint64_t item_index) const {
238   return std::visit(
239       llvm::makeVisitor(
240           [](const std::string &) { return lldb::eTraceItemKindError; },
241           [](lldb::TraceEvent) { return lldb::eTraceItemKindEvent; },
242           [](lldb::addr_t) { return lldb::eTraceItemKindInstruction; }),
243       m_item_data[item_index]);
244 }
245 
246 llvm::StringRef DecodedThread::GetErrorByIndex(uint64_t item_index) const {
247   if (item_index >= m_item_data.size())
248     return llvm::StringRef();
249   return std::get<std::string>(m_item_data[item_index]);
250 }
251 
252 DecodedThread::DecodedThread(
253     ThreadSP thread_sp,
254     const std::optional<LinuxPerfZeroTscConversion> &tsc_conversion)
255     : m_thread_sp(thread_sp), m_tsc_conversion(tsc_conversion) {}
256 
257 size_t DecodedThread::CalculateApproximateMemoryUsage() const {
258   return sizeof(TraceItemStorage) * m_item_data.size() +
259          (sizeof(uint64_t) + sizeof(TSC)) * m_tscs.size() +
260          (sizeof(uint64_t) + sizeof(uint64_t)) * m_nanoseconds.size() +
261          (sizeof(uint64_t) + sizeof(lldb::cpu_id_t)) * m_cpus.size();
262 }
263