1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2008-2009, Intel Corporation. 23 * All Rights Reserved. 24 */ 25 26 #define MAX_TAG 8 27 #define MAX_STACK 64 28 29 #pragma D option aggsize=8m 30 #pragma D option bufsize=16m 31 #pragma D option dynvarsize=16m 32 #pragma D option aggrate=0 33 #pragma D option stackframes=MAX_STACK 34 /* 35 * Our D script needs to compile even if some of the TRANSLATE probes cannot 36 * be found. Missing probes can be caused by older kernel, different 37 * architecture, unloaded modules etc. 38 */ 39 #pragma D option zdefs 40 41 #if defined(ENABLE_SCHED) 42 #if defined(TRACE_PID) 43 #define TRACE_FILTER / pid == 0 || pid == TRACE_PID / 44 #define TRACE_FILTER_COND(a) / (pid == 0 || pid == TRACE_PID) && (a) / 45 #elif defined(TRACE_PGID) 46 #define TRACE_FILTER / pid == 0 || curpsinfo->pr_pgid == TRACE_PGID / 47 #define TRACE_FILTER_COND(a) 48 / (pid == 0 || curpsinfo->pr_pgid == TRACE_PGID) && (a) / 49 #else 50 #define TRACE_FILTER 51 #define TRACE_FILTER_COND(a) / (a) / 52 #endif 53 #else /* ENABLE_SCHED */ 54 #if defined(TRACE_PID) 55 #define TRACE_FILTER / pid == TRACE_PID / 56 #define TRACE_FILTER_COND(a) / (pid == TRACE_PID) && (a) / 57 #elif defined(TRACE_PGID) 58 #define TRACE_FILTER / curpsinfo->pr_pgid == TRACE_PGID / 59 #define TRACE_FILTER_COND(a) / (curpsinfo->pr_pgid == TRACE_PGID) && (a) / 60 #else 61 #define TRACE_FILTER / pid != 0 / 62 #define TRACE_FILTER_COND(a) / (pid != 0) && (a) / 63 #endif 64 #endif /* ENABLE_SCHED */ 65 66 /* Threshold to filter WAKEABLE latencies. */ 67 #define FILTER_THRESHOLD 5000000 68 /* From thread.h */ 69 #define T_WAKEABLE 2 70 71 /* 72 * This array is used to store timestamp of when threads are enqueued 73 * to dispatch queue. 74 * self-> is not accessible when enqueue happens. 75 */ 76 unsigned long long lt_timestamps[int, int]; 77 78 self unsigned int lt_is_block_wakeable; 79 self unsigned long long lt_sleep_start; 80 self unsigned long long lt_sleep_duration; 81 self unsigned long long lt_sch_delay; 82 self unsigned int lt_counter; /* only used in low overhead */ 83 self unsigned long long lt_timestamp; /* only used in low overhead */ 84 self unsigned int lt_stackp; 85 self unsigned int lt_prio[int]; 86 self string lt_cause[int]; 87 88 this unsigned int priority; 89 this string cause; 90 91 /* 92 * Clean up everything, otherwise we will run out of memory. 93 */ 94 proc:::lwp-exit 95 { 96 lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] = 0; 97 98 self->lt_sleep_start = 0; 99 self->lt_is_block_wakeable = 0; 100 self->lt_counter = 0; 101 self->lt_timestamp = 0; 102 103 /* 104 * Workaround: no way to clear associative array. 105 * We have to manually clear 0 ~ (MAX_TAG-1). 106 */ 107 108 self->lt_prio[0] = 0; 109 self->lt_prio[1] = 0; 110 self->lt_prio[2] = 0; 111 self->lt_prio[3] = 0; 112 self->lt_prio[4] = 0; 113 self->lt_prio[5] = 0; 114 self->lt_prio[6] = 0; 115 self->lt_prio[7] = 0; 116 117 self->lt_cause[0] = 0; 118 self->lt_cause[1] = 0; 119 self->lt_cause[2] = 0; 120 self->lt_cause[3] = 0; 121 self->lt_cause[4] = 0; 122 self->lt_cause[5] = 0; 123 self->lt_cause[6] = 0; 124 self->lt_cause[7] = 0; 125 } 126 127 #if !defined(ENABLE_LOW_OVERHEAD) 128 /* 129 * Log timestamp when a thread is taken off the CPU. 130 */ 131 sched::resume:off-cpu 132 TRACE_FILTER_COND(curlwpsinfo->pr_state == SSLEEP) 133 { 134 self->lt_sleep_start = timestamp; 135 self->lt_is_block_wakeable = curthread->t_flag & T_WAKEABLE; 136 137 lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] = 138 self->lt_sleep_start; 139 } 140 141 /* 142 * Log timestamp when a thread is put on a dispatch queue and becomes runnable. 143 */ 144 sched:::enqueue 145 /lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] != 0/ 146 { 147 lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] = timestamp; 148 } 149 150 /* 151 * Calculate latency when the thread is actually on the CPU. 152 * This is necessary in order to get the right stack. 153 */ 154 this unsigned long long end; 155 this unsigned long long now; 156 sched::resume:on-cpu 157 /self->lt_sleep_start != 0/ 158 { 159 this->end = lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid]; 160 this->now = timestamp; 161 lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] = 0; 162 this->end = (this->end != 0 && this->end != self->lt_sleep_start) 163 ? this->end : this->now; 164 165 self->lt_sch_delay = this->now - this->end; 166 self->lt_sleep_duration = this->end - self->lt_sleep_start; 167 self->lt_sleep_start = 0; 168 } 169 170 /* 171 * Filter: drop all "large" latency when it is interruptible, i.e., sleep() 172 * etc. 173 */ 174 #if defined(ENABLE_FILTER) 175 sched::resume:on-cpu 176 /self->lt_sleep_duration > FILTER_THRESHOLD && 177 self->lt_is_block_wakeable != 0/ 178 { 179 self->lt_sch_delay = 0; 180 self->lt_sleep_duration = 0; 181 self->lt_is_block_wakeable = 0; 182 } 183 #endif /* defined(ENABLE_FILTER) */ 184 185 /* 186 * Write sleep time to the aggregation. 187 * lt_sleep_duration is the duration between the time when a thread is taken 188 * off the CPU and the time when it is enqueued again. 189 */ 190 sched::resume:on-cpu 191 /self->lt_sleep_duration != 0/ 192 { 193 this->cause = self->lt_stackp > 0 ? 194 self->lt_cause[self->lt_stackp - 1] : ""; 195 this->priority = self->lt_stackp > 0 ? 196 self->lt_prio[self->lt_stackp - 1] : 0; 197 198 @lt_call_count[pid, tid, stack(), this->cause, 199 this->priority] = count(); 200 @lt_call_sum[pid, tid, stack(), this->cause, 201 this->priority] = sum(self->lt_sleep_duration); 202 @lt_call_max[pid, tid, stack(), this->cause, 203 this->priority] = max(self->lt_sleep_duration); 204 205 self->lt_is_block_wakeable = 0; /* Clear the flag to avoid leak */ 206 self->lt_sleep_duration = 0; 207 } 208 209 /* 210 * Write time spent in queue to the aggregation. 211 * lt_sch_delay is the interval between the time when a thread becomes 212 * runnable and the time when it is actually on the CPU. 213 */ 214 sched::resume:on-cpu 215 /self->lt_sch_delay != 0/ 216 { 217 @lt_named_count[pid, tid, "Wait for available CPU"] = count(); 218 @lt_named_sum[pid, tid, "Wait for available CPU"] = 219 sum(self->lt_sch_delay); 220 @lt_named_max[pid, tid, "Wait for available CPU"] = 221 max(self->lt_sch_delay); 222 223 self->lt_sch_delay = 0; 224 } 225 226 /* 227 * Probes to track latency caused by spinning on a lock. 228 */ 229 lockstat:::adaptive-spin 230 TRACE_FILTER 231 { 232 @lt_named_count[pid, tid, "Adapt. lock spin"] = count(); 233 @lt_named_sum[pid, tid, "Adapt. lock spin"] = sum(arg1); 234 @lt_named_max[pid, tid, "Adapt. lock spin"] = max(arg1); 235 } 236 237 lockstat:::spin-spin 238 TRACE_FILTER 239 { 240 @lt_named_count[pid, tid, "Spinlock spin"] = count(); 241 @lt_named_sum[pid, tid, "Spinlock spin"] = sum(arg1); 242 @lt_named_max[pid, tid, "Spinlock spin"] = max(arg1); 243 } 244 245 /* 246 * Probes to track latency caused by blocking on a lock. 247 */ 248 lockstat:::adaptive-block 249 TRACE_FILTER 250 { 251 @lt_named_count[pid, tid, "#Adapt. lock block"] = count(); 252 @lt_named_sum[pid, tid, "#Adapt. lock block"] = sum(arg1); 253 @lt_named_max[pid, tid, "#Adapt. lock block"] = max(arg1); 254 } 255 256 lockstat:::rw-block 257 TRACE_FILTER 258 { 259 @lt_named_count[pid, tid, "#RW. lock block"] = count(); 260 @lt_named_sum[pid, tid, "#RW. lock block"] = sum(arg1); 261 @lt_named_max[pid, tid, "#RW. lock block"] = max(arg1); 262 } 263 264 #if defined(ENABLE_SYNCOBJ) 265 /* 266 * Probes to track latency caused by synchronization objects. 267 */ 268 this int stype; 269 this unsigned long long wchan; 270 this unsigned long long wtime; 271 272 sched:::wakeup 273 /* 274 * Currently we are unable to track wakeup from sched, because all its LWP IDs 275 * are zero when we trace it and that makes lt_timestamps unusable. 276 */ 277 /args[1]->pr_pid != 0 && 278 lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] != 0/ 279 { 280 this->stype = args[0]->pr_stype; 281 this->wchan = args[0]->pr_wchan; 282 /* 283 * We can use lt_timestamps[] here, because 284 * wakeup is always fired before enqueue. 285 * After enqueue, lt_timestamps[] will be overwritten. 286 */ 287 this->wtime = timestamp - lt_timestamps[args[1]->pr_pid, 288 args[0]->pr_lwpid]; 289 290 @lt_sync_count[args[1]->pr_pid, args[0]->pr_lwpid, this->stype, 291 this->wchan] = count(); 292 @lt_sync_sum[args[1]->pr_pid, args[0]->pr_lwpid, this->stype, 293 this->wchan] = sum(this->wtime); 294 @lt_sync_max[args[1]->pr_pid, args[0]->pr_lwpid, this->stype, 295 this->wchan] = max(this->wtime); 296 } 297 #endif /* defined(ENABLE_SYNCOBJ) */ 298 299 #else /* !defined(ENABLE_LOW_OVERHEAD) */ 300 301 /* 302 * This is the low overhead mode. 303 * In order to reduce the number of instructions executed during each 304 * off-cpu and on-cpu event, we do the following: 305 * 306 * 1. Use sampling and update aggregations only roughly 1/100 times 307 * (SAMPLE_TIMES). 308 * 2. Do not track anything other than what is needed for "main" window. 309 * 3. Use as few thread local variables as possible. 310 */ 311 312 #define SAMPLE_TIMES 100 313 #define SAMPLE_THRESHOLD 50000000 314 315 /* 316 * Log timestamp when a thread is off CPU. 317 */ 318 sched::resume:off-cpu 319 TRACE_FILTER_COND(curlwpsinfo->pr_state == SSLEEP) 320 { 321 self->lt_timestamp = timestamp; 322 #if defined(ENABLE_FILTER) 323 self->lt_is_block_wakeable = curthread->t_flag & T_WAKEABLE; 324 #endif /* defined(ENABLE_FILTER) */ 325 } 326 327 /* 328 * Calculate latency when a thread is actually on the CPU. 329 */ 330 this int need_skip; 331 sched::resume:on-cpu 332 /self->lt_timestamp != 0/ 333 { 334 self->lt_timestamp = timestamp - self->lt_timestamp; 335 336 #if defined(ENABLE_FILTER) 337 self->lt_timestamp = 338 (self->lt_timestamp > FILTER_THRESHOLD && 339 self->lt_is_block_wakeable != 0) ? 0 : self->lt_timestamp; 340 self->lt_is_block_wakeable = 0; 341 #endif /* defined(ENABLE_FILTER) */ 342 343 this->need_skip = (self->lt_counter < (SAMPLE_TIMES - 1) && 344 self->lt_timestamp <= SAMPLE_THRESHOLD) ? 1 : 0; 345 self->lt_timestamp = this->need_skip ? 0 : self->lt_timestamp; 346 self->lt_counter += this->need_skip; 347 } 348 349 /* 350 * Track large latency first. 351 */ 352 sched::resume:on-cpu 353 /self->lt_timestamp > SAMPLE_THRESHOLD/ 354 { 355 this->cause = self->lt_stackp > 0 ? 356 self->lt_cause[self->lt_stackp - 1] : ""; 357 this->priority = self->lt_stackp > 0 ? 358 self->lt_prio[self->lt_stackp - 1] : 0; 359 360 @lt_call_count[pid, tid, stack(), this->cause, 361 this->priority] = sum(1); 362 @lt_call_sum[pid, tid, stack(), this->cause, 363 this->priority] = sum(self->lt_timestamp); 364 @lt_call_max[pid, tid, stack(), this->cause, 365 this->priority] = max(self->lt_timestamp); 366 367 self->lt_timestamp = 0; 368 } 369 370 /* 371 * If we fall back to this probe, that means the latency is small and counter 372 * has reached SAMPLE_TIMES. 373 */ 374 sched::resume:on-cpu 375 /self->lt_timestamp != 0/ 376 { 377 this->cause = self->lt_stackp > 0 ? 378 self->lt_cause[self->lt_stackp - 1] : ""; 379 this->priority = self->lt_stackp > 0 ? 380 self->lt_prio[self->lt_stackp - 1] : 0; 381 382 /* Need +1 because lt_counter has not been updated in this cycle. */ 383 @lt_call_count[pid, tid, stack(), this->cause, 384 this->priority] = sum(self->lt_counter + 1); 385 @lt_call_sum[pid, tid, stack(), this->cause, 386 this->priority] = sum((self->lt_counter + 1) * self->lt_timestamp); 387 @lt_call_max[pid, tid, stack(), this->cause, 388 this->priority] = max(self->lt_timestamp); 389 390 self->lt_timestamp = 0; 391 self->lt_counter = 0; 392 } 393 394 #endif /* !defined(ENABLE_LOW_OVERHEAD) */ 395 396 #define TRANSLATE(entryprobe, returnprobe, cause, priority) \ 397 entryprobe \ 398 TRACE_FILTER_COND(self->lt_stackp == 0 || \ 399 (self->lt_stackp < MAX_TAG && \ 400 self->lt_prio[self->lt_stackp - 1] <= priority) ) \ 401 { \ 402 self->lt_prio[self->lt_stackp] = priority; \ 403 self->lt_cause[self->lt_stackp] = cause; \ 404 ++self->lt_stackp; \ 405 } \ 406 returnprobe \ 407 TRACE_FILTER_COND(self->lt_stackp > 0 && \ 408 self->lt_cause[self->lt_stackp - 1] == cause) \ 409 { \ 410 --self->lt_stackp; \ 411 self->lt_cause[self->lt_stackp] = NULL; \ 412 } 413 414 /* 415 * Syscalls have a priority of 10. This is to make sure that latency is 416 * traced to one of the syscalls only if nothing else matches. 417 * We put this special probe here because it uses "probefunc" variable instead 418 * of a constant string. 419 */ 420 421 TRANSLATE(syscall:::entry, syscall:::return, probefunc, 10) 422