xref: /titanic_50/usr/src/cmd/latencytop/common/latencytop.d (revision de3d2ce46fc25c7b67ccbae4afe5f15e5357568f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008-2009, Intel Corporation.
23  * All Rights Reserved.
24  */
25 
26 #pragma D option aggsize=8m
27 #pragma D option bufsize=16m
28 #pragma D option dynvarsize=16m
29 #pragma D option aggrate=0
30 #pragma D option stackframes=64
31 
32 #if defined(ENABLE_SCHED)
33 #define TRACE_FILTER
34 #define TRACE_FILTER_COND(a)	/ (a) /
35 #else
36 #define TRACE_FILTER	/ pid != 0 /
37 #define TRACE_FILTER_COND(a)	/ pid != 0 && (a) /
38 #endif
39 
40 #define FILTER_THRESHOLD	5000000
41 /* From thread.h */
42 #define T_WAKEABLE		2
43 
44 /*
45  * This array is used to store the timestamp when threads are enqueued
46  * to dispq.
47  * self-> is not accessible when enqueue happens.
48  */
49 unsigned long long lt_timestamps[int, int];
50 
51 self unsigned int lt_is_block_wakeable;
52 self unsigned long long lt_sleep_start;
53 self unsigned long long lt_sleep_duration;
54 self unsigned long long lt_sch_delay;
55 self unsigned int lt_counter;		/* only used in low overhead */
56 self unsigned long long lt_timestamp;	/* only used in low overhead */
57 
58 /*
59  * Make sure we leave nothing behind,
60  * otherwise memory will eventually run out.
61  */
62 proc:::lwp-exit
63 {
64 	lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] = 0;
65 	self->lt_sleep_start = 0;
66 	self->lt_is_block_wakeable = 0;
67 	self->lt_counter = 0;
68 	self->lt_timestamp = 0;
69 }
70 
71 #if !defined(ENABLE_LOW_OVERHEAD)
72 /*
73  * Log timestamp when a thread is off CPU.
74  */
75 sched::resume:off-cpu
76 TRACE_FILTER_COND(curlwpsinfo->pr_state == SSLEEP)
77 {
78 	self->lt_sleep_start = timestamp;
79 	self->lt_is_block_wakeable = curthread->t_flag & T_WAKEABLE;
80 	lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] =
81 	    self->lt_sleep_start;
82 }
83 
84 /*
85  * Log timestamp when a thread is put on a dispatch queue and becomes runnable.
86  */
87 sched:::enqueue
88 / lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] != 0 /
89 {
90 	lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] = timestamp;
91 }
92 
93 /*
94  * Calculate latencies when the thread is actually on CPU.
95  * This is necessary to get the right stack().
96  */
97 this unsigned long long end;
98 this unsigned long long now;
99 sched::resume:on-cpu
100 / self->lt_sleep_start != 0 /
101 {
102 	this->end = lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid];
103 	this->now = timestamp;
104 	lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] = 0;
105 	this->end = (this->end != 0 && this->end != self->lt_sleep_start)
106 	    ? this->end : this->now;
107 	self->lt_sch_delay = this->now - this->end;
108 	self->lt_sleep_duration = this->end - self->lt_sleep_start;
109 	self->lt_sleep_start = 0;
110 }
111 
112 /*
113  * Filter: drop all "large" latencies when it is wakeable,
114  * trying to filter sleep() etc.
115  */
116 #if defined(ENABLE_FILTER)
117 sched::resume:on-cpu
118 / self->lt_sleep_duration > FILTER_THRESHOLD &&
119   self->lt_is_block_wakeable != 0 /
120 {
121 	self->lt_sch_delay = 0;
122 	self->lt_sleep_duration = 0;
123 	self->lt_is_block_wakeable = 0;
124 }
125 #endif /* defined(ENABLE_FILTER) */
126 
127 /*
128  * Write sleep time to the aggregation.
129  * lt_sleep_duration is from thread off cpu to it is enqueued again.
130  */
131 sched::resume:on-cpu
132 / self->lt_sleep_duration != 0 /
133 {
134 	@lt_call_count[pid, tid, stack()] = count();
135 	@lt_call_sum[pid, tid, stack()] = sum(self->lt_sleep_duration);
136 	@lt_call_max[pid, tid, stack()] = max(self->lt_sleep_duration);
137 	self->lt_is_block_wakeable = 0;	/* Clean the flag to avoid leak */
138 	self->lt_sleep_duration = 0;
139 }
140 
141 /*
142  * Write time spent in queue to the aggregation.
143  * lt_sch_delay: the interval between "thread runnable" and "thread on cpu".
144  */
145 sched::resume:on-cpu
146 / self->lt_sch_delay != 0 /
147 {
148 	@lt_named_count[pid, tid, "Wait for available CPU"] = count();
149 	@lt_named_sum[pid, tid, "Wait for available CPU"] =
150 	    sum(self->lt_sch_delay);
151 	@lt_named_max[pid, tid, "Wait for available CPU"] =
152 	    max(self->lt_sch_delay);
153 	self->lt_sch_delay = 0;
154 }
155 
156 /*
157  * Probes that tracks lock spinning
158  */
159 lockstat:::adaptive-spin
160 TRACE_FILTER
161 {
162 	@lt_named_count[pid, tid, "Adapt. lock spin"] = count();
163 	@lt_named_sum[pid, tid, "Adapt. lock spin"] = sum(arg1);
164 	@lt_named_max[pid, tid, "Adapt. lock spin"] = max(arg1);
165 }
166 
167 lockstat:::spin-spin
168 TRACE_FILTER
169 {
170 	@lt_named_count[pid, tid, "Spinlock spin"] = count();
171 	@lt_named_sum[pid, tid, "Spinlock spin"] = sum(arg1);
172 	@lt_named_max[pid, tid, "Spinlock spin"] = max(arg1);
173 }
174 
175 /*
176  * Probes that tracks lock blocking
177  */
178 lockstat:::adaptive-block
179 TRACE_FILTER
180 {
181 	@lt_named_count[pid, tid, "#Adapt. lock block"] = count();
182 	@lt_named_sum[pid, tid, "#Adapt. lock block"] = sum(arg1);
183 	@lt_named_max[pid, tid, "#Adapt. lock block"] = max(arg1);
184 }
185 
186 lockstat:::rw-block
187 TRACE_FILTER
188 {
189 	@lt_named_count[pid, tid, "#RW. lock block"] = count();
190 	@lt_named_sum[pid, tid, "#RW. lock block"] = sum(arg1);
191 	@lt_named_max[pid, tid, "#RW. lock block"] = max(arg1);
192 }
193 
194 #if defined(ENABLE_SYNCOBJ)
195 /*
196  * Probes that tracks synchronization objects.
197  */
198 this int stype;
199 this unsigned long long wchan;
200 this unsigned long long wtime;
201 sched:::wakeup
202 /*
203  * Currently we are not able to track wakeup from sched, because all lwpid
204  * are zero for when we trace sched. That makes lt_timestamps not usable.
205  */
206 / args[1]->pr_pid != 0 &&
207   lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] != 0 /
208 {
209 	this->stype = args[0]->pr_stype;
210 	this->wchan = args[0]->pr_wchan;
211 	/*
212 	 * We can use lt_timestamps[] here, because
213 	 * wakeup is always fired before enqueue.
214 	 * After enqueue, lt_timestamps[] will be overwritten.
215 	 */
216 	this->wtime = timestamp -
217 	    lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid];
218 	@lt_sync_count[args[1]->pr_pid, args[0]->pr_lwpid, this->stype,
219 	    this->wchan] = count();
220 	@lt_sync_sum[args[1]->pr_pid, args[0]->pr_lwpid, this->stype,
221 	    this->wchan] = sum(this->wtime);
222 	@lt_sync_max[args[1]->pr_pid, args[0]->pr_lwpid, this->stype,
223 	    this->wchan] = max(this->wtime);
224 }
225 #endif  /* defined(ENABLE_SYNCOBJ) */
226 
227 #else /* !defined(ENABLE_LOW_OVERHEAD) */
228 
229 /*
230  * This is the low overhead mode.
231  * In order to reduce the number of instructions executed during each
232  * off-cpu and on-cpu event, we do:
233  * 1. Use sampling, only update aggregations roughly 1/100 times (SAMPLE_TIMES).
234  * 2. Do not track anything other than needed for "main" window.
235  * 3. Use as few thread local variables as possible.
236  */
237 
238 #define SAMPLE_TIMES		100
239 #define SAMPLE_THRESHOLD	50000000
240 
241 /*
242  * Log timestamp when a thread is off CPU.
243  */
244 sched::resume:off-cpu
245 TRACE_FILTER_COND(curlwpsinfo->pr_state == SSLEEP)
246 {
247 	self->lt_timestamp = timestamp;
248 #if defined(ENABLE_FILTER)
249 	self->lt_is_block_wakeable = curthread->t_flag & T_WAKEABLE;
250 #endif /* defined(ENABLE_FILTER) */
251 }
252 
253 /*
254  * Calculate latencies when the thread is actually on CPU.
255  */
256 this int need_skip;
257 sched::resume:on-cpu
258 / self->lt_timestamp != 0 /
259 {
260 	self->lt_timestamp = timestamp - self->lt_timestamp;
261 
262 #if defined(ENABLE_FILTER)
263 	self->lt_timestamp =
264 	    (self->lt_timestamp > FILTER_THRESHOLD &&
265 	    self->lt_is_block_wakeable != 0) ? 0 : self->lt_timestamp;
266 	self->lt_is_block_wakeable = 0;
267 #endif /* defined(ENABLE_FILTER) */
268 
269 	this->need_skip = (self->lt_counter < (SAMPLE_TIMES - 1) &&
270 	    self->lt_timestamp <= SAMPLE_THRESHOLD) ? 1 : 0;
271 	self->lt_timestamp = this->need_skip ? 0 : self->lt_timestamp;
272 	self->lt_counter += this->need_skip;
273 }
274 
275 /*
276  * Log large ones first.
277  */
278 sched::resume:on-cpu
279 / self->lt_timestamp > SAMPLE_THRESHOLD /
280 {
281 	@lt_call_count[pid, tid, stack()] = sum(1);
282 	@lt_call_sum[pid, tid, stack()] = sum(self->lt_timestamp);
283 	@lt_call_max[pid, tid, stack()] = max(self->lt_timestamp);
284 
285 	self->lt_timestamp = 0;
286 }
287 
288 /*
289  * If we fall to this probe, this must be a small latency and counter
290  * reaches SAMPLE_TIMES.
291  */
292 sched::resume:on-cpu
293 / self->lt_timestamp != 0 /
294 {
295 	/* Need +1 because lt_counter has not been updated in this cycle. */
296 	@lt_call_count[pid, tid, stack()] = sum(self->lt_counter + 1);
297 	@lt_call_sum[pid, tid, stack()] =
298 	    sum((self->lt_counter + 1) * self->lt_timestamp);
299 	@lt_call_max[pid, tid, stack()] = max(self->lt_timestamp);
300 
301 	self->lt_timestamp = 0;
302 	self->lt_counter = 0;
303 }
304 
305 #endif /* !defined(ENABLE_LOW_OVERHEAD) */
306