1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2008-2009, Intel Corporation. 23 * All Rights Reserved. 24 */ 25 26#define MAX_TAG 8 27#define MAX_STACK 64 28 29#pragma D option aggsize=8m 30#pragma D option bufsize=16m 31#pragma D option dynvarsize=16m 32#pragma D option aggrate=0 33#pragma D option stackframes=MAX_STACK 34/* 35 * Our D script needs to compile even if some of the TRANSLATE probes cannot 36 * be found. Missing probes can be caused by older kernel, different 37 * architecture, unloaded modules etc. 38 */ 39#pragma D option zdefs 40 41#if defined(ENABLE_SCHED) 42#if defined(TRACE_PID) 43#define TRACE_FILTER / pid == 0 || pid == TRACE_PID / 44#define TRACE_FILTER_COND(a) / (pid == 0 || pid == TRACE_PID) && (a) / 45#elif defined(TRACE_PGID) 46#define TRACE_FILTER / pid == 0 || curpsinfo->pr_pgid == TRACE_PGID / 47#define TRACE_FILTER_COND(a) 48 / (pid == 0 || curpsinfo->pr_pgid == TRACE_PGID) && (a) / 49#else 50#define TRACE_FILTER 51#define TRACE_FILTER_COND(a) / (a) / 52#endif 53#else /* ENABLE_SCHED */ 54#if defined(TRACE_PID) 55#define TRACE_FILTER / pid == TRACE_PID / 56#define TRACE_FILTER_COND(a) / (pid == TRACE_PID) && (a) / 57#elif defined(TRACE_PGID) 58#define TRACE_FILTER / curpsinfo->pr_pgid == TRACE_PGID / 59#define TRACE_FILTER_COND(a) / (curpsinfo->pr_pgid == TRACE_PGID) && (a) / 60#else 61#define TRACE_FILTER / pid != 0 / 62#define TRACE_FILTER_COND(a) / (pid != 0) && (a) / 63#endif 64#endif /* ENABLE_SCHED */ 65 66/* Threshold to filter WAKEABLE latencies. */ 67#define FILTER_THRESHOLD 5000000 68/* From thread.h */ 69#define T_WAKEABLE 2 70 71/* 72 * This array is used to store timestamp of when threads are enqueued 73 * to dispatch queue. 74 * self-> is not accessible when enqueue happens. 75 */ 76unsigned long long lt_timestamps[int, int]; 77 78self unsigned int lt_is_block_wakeable; 79self unsigned long long lt_sleep_start; 80self unsigned long long lt_sleep_duration; 81self unsigned long long lt_sch_delay; 82self unsigned int lt_counter; /* only used in low overhead */ 83self unsigned long long lt_timestamp; /* only used in low overhead */ 84self unsigned int lt_stackp; 85self unsigned int lt_prio[int]; 86self string lt_cause[int]; 87 88this unsigned int priority; 89this string cause; 90 91/* 92 * Clean up everything, otherwise we will run out of memory. 93 */ 94proc:::lwp-exit 95{ 96 lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] = 0; 97 98 self->lt_sleep_start = 0; 99 self->lt_is_block_wakeable = 0; 100 self->lt_counter = 0; 101 self->lt_timestamp = 0; 102 103 /* 104 * Workaround: no way to clear associative array. 105 * We have to manually clear 0 ~ (MAX_TAG-1). 106 */ 107 108 self->lt_prio[0] = 0; 109 self->lt_prio[1] = 0; 110 self->lt_prio[2] = 0; 111 self->lt_prio[3] = 0; 112 self->lt_prio[4] = 0; 113 self->lt_prio[5] = 0; 114 self->lt_prio[6] = 0; 115 self->lt_prio[7] = 0; 116 117 self->lt_cause[0] = 0; 118 self->lt_cause[1] = 0; 119 self->lt_cause[2] = 0; 120 self->lt_cause[3] = 0; 121 self->lt_cause[4] = 0; 122 self->lt_cause[5] = 0; 123 self->lt_cause[6] = 0; 124 self->lt_cause[7] = 0; 125} 126 127#if !defined(ENABLE_LOW_OVERHEAD) 128/* 129 * Log timestamp when a thread is taken off the CPU. 130 */ 131sched::resume:off-cpu 132TRACE_FILTER_COND(curlwpsinfo->pr_state == SSLEEP) 133{ 134 self->lt_sleep_start = timestamp; 135 self->lt_is_block_wakeable = curthread->t_flag & T_WAKEABLE; 136 137 lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] = 138 self->lt_sleep_start; 139} 140 141/* 142 * Log timestamp when a thread is put on a dispatch queue and becomes runnable. 143 */ 144sched:::enqueue 145/lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] != 0/ 146{ 147 lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] = timestamp; 148} 149 150/* 151 * Calculate latency when the thread is actually on the CPU. 152 * This is necessary in order to get the right stack. 153 */ 154this unsigned long long end; 155this unsigned long long now; 156sched::resume:on-cpu 157/self->lt_sleep_start != 0/ 158{ 159 this->end = lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid]; 160 this->now = timestamp; 161 lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] = 0; 162 this->end = (this->end != 0 && this->end != self->lt_sleep_start) 163 ? this->end : this->now; 164 165 self->lt_sch_delay = this->now - this->end; 166 self->lt_sleep_duration = this->end - self->lt_sleep_start; 167 self->lt_sleep_start = 0; 168} 169 170/* 171 * Filter: drop all "large" latency when it is interruptible, i.e., sleep() 172 * etc. 173 */ 174#if defined(ENABLE_FILTER) 175sched::resume:on-cpu 176/self->lt_sleep_duration > FILTER_THRESHOLD && 177 self->lt_is_block_wakeable != 0/ 178{ 179 self->lt_sch_delay = 0; 180 self->lt_sleep_duration = 0; 181 self->lt_is_block_wakeable = 0; 182} 183#endif /* defined(ENABLE_FILTER) */ 184 185/* 186 * Write sleep time to the aggregation. 187 * lt_sleep_duration is the duration between the time when a thread is taken 188 * off the CPU and the time when it is enqueued again. 189 */ 190sched::resume:on-cpu 191/self->lt_sleep_duration != 0/ 192{ 193 this->cause = self->lt_stackp > 0 ? 194 self->lt_cause[self->lt_stackp - 1] : ""; 195 this->priority = self->lt_stackp > 0 ? 196 self->lt_prio[self->lt_stackp - 1] : 0; 197 198 @lt_call_count[pid, tid, stack(), this->cause, 199 this->priority] = count(); 200 @lt_call_sum[pid, tid, stack(), this->cause, 201 this->priority] = sum(self->lt_sleep_duration); 202 @lt_call_max[pid, tid, stack(), this->cause, 203 this->priority] = max(self->lt_sleep_duration); 204 205 self->lt_is_block_wakeable = 0; /* Clear the flag to avoid leak */ 206 self->lt_sleep_duration = 0; 207} 208 209/* 210 * Write time spent in queue to the aggregation. 211 * lt_sch_delay is the interval between the time when a thread becomes 212 * runnable and the time when it is actually on the CPU. 213 */ 214sched::resume:on-cpu 215/self->lt_sch_delay != 0/ 216{ 217 @lt_named_count[pid, tid, "Wait for available CPU"] = count(); 218 @lt_named_sum[pid, tid, "Wait for available CPU"] = 219 sum(self->lt_sch_delay); 220 @lt_named_max[pid, tid, "Wait for available CPU"] = 221 max(self->lt_sch_delay); 222 223 self->lt_sch_delay = 0; 224} 225 226/* 227 * Probes to track latency caused by spinning on a lock. 228 */ 229lockstat:::adaptive-spin 230TRACE_FILTER 231{ 232 @lt_named_count[pid, tid, "Adapt. lock spin"] = count(); 233 @lt_named_sum[pid, tid, "Adapt. lock spin"] = sum(arg1); 234 @lt_named_max[pid, tid, "Adapt. lock spin"] = max(arg1); 235} 236 237lockstat:::spin-spin 238TRACE_FILTER 239{ 240 @lt_named_count[pid, tid, "Spinlock spin"] = count(); 241 @lt_named_sum[pid, tid, "Spinlock spin"] = sum(arg1); 242 @lt_named_max[pid, tid, "Spinlock spin"] = max(arg1); 243} 244 245/* 246 * Probes to track latency caused by blocking on a lock. 247 */ 248lockstat:::adaptive-block 249TRACE_FILTER 250{ 251 @lt_named_count[pid, tid, "#Adapt. lock block"] = count(); 252 @lt_named_sum[pid, tid, "#Adapt. lock block"] = sum(arg1); 253 @lt_named_max[pid, tid, "#Adapt. lock block"] = max(arg1); 254} 255 256lockstat:::rw-block 257TRACE_FILTER 258{ 259 @lt_named_count[pid, tid, "#RW. lock block"] = count(); 260 @lt_named_sum[pid, tid, "#RW. lock block"] = sum(arg1); 261 @lt_named_max[pid, tid, "#RW. lock block"] = max(arg1); 262} 263 264#if defined(ENABLE_SYNCOBJ) 265/* 266 * Probes to track latency caused by synchronization objects. 267 */ 268this int stype; 269this unsigned long long wchan; 270this unsigned long long wtime; 271 272sched:::wakeup 273/* 274 * Currently we are unable to track wakeup from sched, because all its LWP IDs 275 * are zero when we trace it and that makes lt_timestamps unusable. 276 */ 277/args[1]->pr_pid != 0 && 278 lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] != 0/ 279{ 280 this->stype = args[0]->pr_stype; 281 this->wchan = args[0]->pr_wchan; 282 /* 283 * We can use lt_timestamps[] here, because 284 * wakeup is always fired before enqueue. 285 * After enqueue, lt_timestamps[] will be overwritten. 286 */ 287 this->wtime = timestamp - lt_timestamps[args[1]->pr_pid, 288 args[0]->pr_lwpid]; 289 290 @lt_sync_count[args[1]->pr_pid, args[0]->pr_lwpid, this->stype, 291 this->wchan] = count(); 292 @lt_sync_sum[args[1]->pr_pid, args[0]->pr_lwpid, this->stype, 293 this->wchan] = sum(this->wtime); 294 @lt_sync_max[args[1]->pr_pid, args[0]->pr_lwpid, this->stype, 295 this->wchan] = max(this->wtime); 296} 297#endif /* defined(ENABLE_SYNCOBJ) */ 298 299#else /* !defined(ENABLE_LOW_OVERHEAD) */ 300 301/* 302 * This is the low overhead mode. 303 * In order to reduce the number of instructions executed during each 304 * off-cpu and on-cpu event, we do the following: 305 * 306 * 1. Use sampling and update aggregations only roughly 1/100 times 307 * (SAMPLE_TIMES). 308 * 2. Do not track anything other than what is needed for "main" window. 309 * 3. Use as few thread local variables as possible. 310 */ 311 312#define SAMPLE_TIMES 100 313#define SAMPLE_THRESHOLD 50000000 314 315/* 316 * Log timestamp when a thread is off CPU. 317 */ 318sched::resume:off-cpu 319TRACE_FILTER_COND(curlwpsinfo->pr_state == SSLEEP) 320{ 321 self->lt_timestamp = timestamp; 322#if defined(ENABLE_FILTER) 323 self->lt_is_block_wakeable = curthread->t_flag & T_WAKEABLE; 324#endif /* defined(ENABLE_FILTER) */ 325} 326 327/* 328 * Calculate latency when a thread is actually on the CPU. 329 */ 330this int need_skip; 331sched::resume:on-cpu 332/self->lt_timestamp != 0/ 333{ 334 self->lt_timestamp = timestamp - self->lt_timestamp; 335 336#if defined(ENABLE_FILTER) 337 self->lt_timestamp = 338 (self->lt_timestamp > FILTER_THRESHOLD && 339 self->lt_is_block_wakeable != 0) ? 0 : self->lt_timestamp; 340 self->lt_is_block_wakeable = 0; 341#endif /* defined(ENABLE_FILTER) */ 342 343 this->need_skip = (self->lt_counter < (SAMPLE_TIMES - 1) && 344 self->lt_timestamp <= SAMPLE_THRESHOLD) ? 1 : 0; 345 self->lt_timestamp = this->need_skip ? 0 : self->lt_timestamp; 346 self->lt_counter += this->need_skip; 347} 348 349/* 350 * Track large latency first. 351 */ 352sched::resume:on-cpu 353/self->lt_timestamp > SAMPLE_THRESHOLD/ 354{ 355 this->cause = self->lt_stackp > 0 ? 356 self->lt_cause[self->lt_stackp - 1] : ""; 357 this->priority = self->lt_stackp > 0 ? 358 self->lt_prio[self->lt_stackp - 1] : 0; 359 360 @lt_call_count[pid, tid, stack(), this->cause, 361 this->priority] = sum(1); 362 @lt_call_sum[pid, tid, stack(), this->cause, 363 this->priority] = sum(self->lt_timestamp); 364 @lt_call_max[pid, tid, stack(), this->cause, 365 this->priority] = max(self->lt_timestamp); 366 367 self->lt_timestamp = 0; 368} 369 370/* 371 * If we fall back to this probe, that means the latency is small and counter 372 * has reached SAMPLE_TIMES. 373 */ 374sched::resume:on-cpu 375/self->lt_timestamp != 0/ 376{ 377 this->cause = self->lt_stackp > 0 ? 378 self->lt_cause[self->lt_stackp - 1] : ""; 379 this->priority = self->lt_stackp > 0 ? 380 self->lt_prio[self->lt_stackp - 1] : 0; 381 382 /* Need +1 because lt_counter has not been updated in this cycle. */ 383 @lt_call_count[pid, tid, stack(), this->cause, 384 this->priority] = sum(self->lt_counter + 1); 385 @lt_call_sum[pid, tid, stack(), this->cause, 386 this->priority] = sum((self->lt_counter + 1) * self->lt_timestamp); 387 @lt_call_max[pid, tid, stack(), this->cause, 388 this->priority] = max(self->lt_timestamp); 389 390 self->lt_timestamp = 0; 391 self->lt_counter = 0; 392} 393 394#endif /* !defined(ENABLE_LOW_OVERHEAD) */ 395 396#define TRANSLATE(entryprobe, returnprobe, cause, priority) \ 397entryprobe \ 398TRACE_FILTER_COND(self->lt_stackp == 0 || \ 399 (self->lt_stackp < MAX_TAG && \ 400 self->lt_prio[self->lt_stackp - 1] <= priority) ) \ 401{ \ 402 self->lt_prio[self->lt_stackp] = priority; \ 403 self->lt_cause[self->lt_stackp] = cause; \ 404 ++self->lt_stackp; \ 405} \ 406returnprobe \ 407TRACE_FILTER_COND(self->lt_stackp > 0 && \ 408 self->lt_cause[self->lt_stackp - 1] == cause) \ 409{ \ 410 --self->lt_stackp; \ 411 self->lt_cause[self->lt_stackp] = NULL; \ 412} 413 414/* 415 * Syscalls have a priority of 10. This is to make sure that latency is 416 * traced to one of the syscalls only if nothing else matches. 417 * We put this special probe here because it uses "probefunc" variable instead 418 * of a constant string. 419 */ 420 421TRANSLATE(syscall:::entry, syscall:::return, probefunc, 10) 422