xref: /illumos-gate/usr/src/cmd/plockstat/plockstat.c (revision 9a411307f0d1eedbc81618ec290e0685284d8a2b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <assert.h>
30 #include <dtrace.h>
31 #include <limits.h>
32 #include <link.h>
33 #include <priv.h>
34 #include <signal.h>
35 #include <stdlib.h>
36 #include <stdarg.h>
37 #include <stdio.h>
38 #include <string.h>
39 #include <strings.h>
40 #include <errno.h>
41 #include <sys/wait.h>
42 #include <libgen.h>
43 #include <libproc.h>
44 
45 static char *g_pname;
46 static dtrace_hdl_t *g_dtp;
47 struct ps_prochandle *g_pr;
48 
49 #define	E_SUCCESS	0
50 #define	E_ERROR		1
51 #define	E_USAGE		2
52 
53 /*
54  * For hold times we use a global associative array since for mutexes, in
55  * user-land, it's not invalid to release a sychonization primitive that
56  * another thread acquired; rwlocks require a thread-local associative array
57  * since multiple thread can hold the same lock for reading. Note that we
58  * ignore recursive mutex acquisitions and releases as they don't truly
59  * affect lock contention.
60  */
61 static const char *g_hold_init =
62 "plockstat$target:::rw-acquire\n"
63 "{\n"
64 "	self->rwhold[arg0] = timestamp;\n"
65 "}\n"
66 "plockstat$target:::mutex-acquire\n"
67 "/arg1 == 0/\n"
68 "{\n"
69 "	mtxhold[arg0] = timestamp;\n"
70 "}\n";
71 
72 static const char *g_hold_histogram =
73 "plockstat$target:::rw-release\n"
74 "/self->rwhold[arg0] && arg1 == 1/\n"
75 "{\n"
76 "	@rw_w_hold[arg0, ustack()] =\n"
77 "	    quantize(timestamp - self->rwhold[arg0]);\n"
78 "	self->rwhold[arg0] = 0;\n"
79 "	rw_w_hold_found = 1;\n"
80 "}\n"
81 "plockstat$target:::rw-release\n"
82 "/self->rwhold[arg0]/\n"
83 "{\n"
84 "	@rw_r_hold[arg0, ustack()] =\n"
85 "	    quantize(timestamp - self->rwhold[arg0]);\n"
86 "	self->rwhold[arg0] = 0;\n"
87 "	rw_r_hold_found = 1;\n"
88 "}\n"
89 "plockstat$target:::mutex-release\n"
90 "/mtxhold[arg0] && arg1 == 0/\n"
91 "{\n"
92 "	@mtx_hold[arg0, ustack()] = quantize(timestamp - mtxhold[arg0]);\n"
93 "	mtxhold[arg0] = 0;\n"
94 "	mtx_hold_found = 1;\n"
95 "}\n"
96 "\n"
97 "END\n"
98 "/mtx_hold_found/\n"
99 "{\n"
100 "	trace(\"Mutex hold\");\n"
101 "	printa(@mtx_hold);\n"
102 "}\n"
103 "END\n"
104 "/rw_r_hold_found/\n"
105 "{\n"
106 "	trace(\"R/W reader hold\");\n"
107 "	printa(@rw_r_hold);\n"
108 "}\n"
109 "END\n"
110 "/rw_w_hold_found/\n"
111 "{\n"
112 "	trace(\"R/W writer hold\");\n"
113 "	printa(@rw_w_hold);\n"
114 "}\n";
115 
116 static const char *g_hold_times =
117 "plockstat$target:::rw-release\n"
118 "/self->rwhold[arg0] && arg1 == 1/\n"
119 "{\n"
120 "	@rw_w_hold[arg0, ustack(5)] = sum(timestamp - self->rwhold[arg0]);\n"
121 "	@rw_w_hold_count[arg0, ustack(5)] = count();\n"
122 "	self->rwhold[arg0] = 0;\n"
123 "	rw_w_hold_found = 1;\n"
124 "}\n"
125 "plockstat$target:::rw-release\n"
126 "/self->rwhold[arg0]/\n"
127 "{\n"
128 "	@rw_r_hold[arg0, ustack(5)] = sum(timestamp - self->rwhold[arg0]);\n"
129 "	@rw_r_hold_count[arg0, ustack(5)] = count();\n"
130 "	self->rwhold[arg0] = 0;\n"
131 "	rw_r_hold_found = 1;\n"
132 "}\n"
133 "plockstat$target:::mutex-release\n"
134 "/mtxhold[arg0] && arg1 == 0/\n"
135 "{\n"
136 "	@mtx_hold[arg0, ustack(5)] = sum(timestamp - mtxhold[arg0]);\n"
137 "	@mtx_hold_count[arg0, ustack(5)] = count();\n"
138 "	mtxhold[arg0] = 0;\n"
139 "	mtx_hold_found = 1;\n"
140 "}\n"
141 "\n"
142 "END\n"
143 "/mtx_hold_found/\n"
144 "{\n"
145 "	trace(\"Mutex hold\");\n"
146 "	printa(@mtx_hold, @mtx_hold_count);\n"
147 "}\n"
148 "END\n"
149 "/rw_r_hold_found/\n"
150 "{\n"
151 "	trace(\"R/W reader hold\");\n"
152 "	printa(@rw_r_hold, @rw_r_hold_count);\n"
153 "}\n"
154 "END\n"
155 "/rw_w_hold_found/\n"
156 "{\n"
157 "	trace(\"R/W writer hold\");\n"
158 "	printa(@rw_w_hold, @rw_w_hold_count);\n"
159 "}\n";
160 
161 
162 /*
163  * For contention, we use thread-local associative arrays since we're tracing
164  * a single thread's activity in libc and multiple threads can be blocking or
165  * spinning on the same sychonization primitive.
166  */
167 static const char *g_ctnd_init =
168 "plockstat$target:::rw-block\n"
169 "{\n"
170 "	self->rwblock[arg0] = timestamp;\n"
171 "}\n"
172 "plockstat$target:::mutex-block\n"
173 "{\n"
174 "	self->mtxblock[arg0] = timestamp;\n"
175 "}\n"
176 "plockstat$target:::mutex-spin\n"
177 "{\n"
178 "	self->mtxspin[arg0] = timestamp;\n"
179 "}\n";
180 
181 static const char *g_ctnd_histogram =
182 "plockstat$target:::rw-blocked\n"
183 "/self->rwblock[arg0] && arg1 == 1 && arg2 != 0/\n"
184 "{\n"
185 "	@rw_w_block[arg0, ustack()] =\n"
186 "	    quantize(timestamp - self->rwblock[arg0]);\n"
187 "	self->rwblock[arg0] = 0;\n"
188 "	rw_w_block_found = 1;\n"
189 "}\n"
190 "plockstat$target:::rw-blocked\n"
191 "/self->rwblock[arg0] && arg2 != 0/\n"
192 "{\n"
193 "	@rw_r_block[arg0, ustack()] =\n"
194 "	    quantize(timestamp - self->rwblock[arg0]);\n"
195 "	self->rwblock[arg0] = 0;\n"
196 "	rw_r_block_found = 1;\n"
197 "}\n"
198 "plockstat$target:::rw-blocked\n"
199 "/self->rwblock[arg0]/\n"
200 "{\n"
201 "	self->rwblock[arg0] = 0;\n"
202 "}\n"
203 "plockstat$target:::mutex-spun\n"
204 "/self->mtxspin[arg0] && arg1 != 0/\n"
205 "{\n"
206 "	@mtx_spin[arg0, ustack()] =\n"
207 "	    quantize(timestamp - self->mtxspin[arg0]);\n"
208 "	self->mtxspin[arg0] = 0;\n"
209 "	mtx_spin_found = 1;\n"
210 "}\n"
211 "plockstat$target:::mutex-spun\n"
212 "/self->mtxspin[arg0]/\n"
213 "{\n"
214 "	@mtx_vain_spin[arg0, ustack()] =\n"
215 "	    quantize(timestamp - self->mtxspin[arg0]);\n"
216 "	self->mtxspin[arg0] = 0;\n"
217 "	mtx_vain_spin_found = 1;\n"
218 "}\n"
219 "plockstat$target:::mutex-blocked\n"
220 "/self->mtxblock[arg0] && arg1 != 0/\n"
221 "{\n"
222 "	@mtx_block[arg0, ustack()] =\n"
223 "	    quantize(timestamp - self->mtxblock[arg0]);\n"
224 "	self->mtxblock[arg0] = 0;\n"
225 "	mtx_block_found = 1;\n"
226 "}\n"
227 "plockstat$target:::mutex-blocked\n"
228 "/self->mtxblock[arg0]/\n"
229 "{\n"
230 "	self->mtxblock[arg0] = 0;\n"
231 "}\n"
232 "\n"
233 "END\n"
234 "/mtx_block_found/\n"
235 "{\n"
236 "	trace(\"Mutex block\");\n"
237 "	printa(@mtx_block);\n"
238 "}\n"
239 "END\n"
240 "/mtx_spin_found/\n"
241 "{\n"
242 "	trace(\"Mutex spin\");\n"
243 "	printa(@mtx_spin);\n"
244 "}\n"
245 "END\n"
246 "/mtx_vain_spin_found/\n"
247 "{\n"
248 "	trace(\"Mutex unsuccessful spin\");\n"
249 "	printa(@mtx_vain_spin);\n"
250 "}\n"
251 "END\n"
252 "/rw_r_block_found/\n"
253 "{\n"
254 "	trace(\"R/W reader block\");\n"
255 "	printa(@rw_r_block);\n"
256 "}\n"
257 "END\n"
258 "/rw_w_block_found/\n"
259 "{\n"
260 "	trace(\"R/W writer block\");\n"
261 "	printa(@rw_w_block);\n"
262 "}\n";
263 
264 
265 static const char *g_ctnd_times =
266 "plockstat$target:::rw-blocked\n"
267 "/self->rwblock[arg0] && arg1 == 1 && arg2 != 0/\n"
268 "{\n"
269 "	@rw_w_block[arg0, ustack(5)] =\n"
270 "	    sum(timestamp - self->rwblock[arg0]);\n"
271 "	@rw_w_block_count[arg0, ustack(5)] = count();\n"
272 "	self->rwblock[arg0] = 0;\n"
273 "	rw_w_block_found = 1;\n"
274 "}\n"
275 "plockstat$target:::rw-blocked\n"
276 "/self->rwblock[arg0] && arg2 != 0/\n"
277 "{\n"
278 "	@rw_r_block[arg0, ustack(5)] =\n"
279 "	    sum(timestamp - self->rwblock[arg0]);\n"
280 "	@rw_r_block_count[arg0, ustack(5)] = count();\n"
281 "	self->rwblock[arg0] = 0;\n"
282 "	rw_r_block_found = 1;\n"
283 "}\n"
284 "plockstat$target:::rw-blocked\n"
285 "/self->rwblock[arg0]/\n"
286 "{\n"
287 "	self->rwblock[arg0] = 0;\n"
288 "}\n"
289 "plockstat$target:::mutex-spun\n"
290 "/self->mtxspin[arg0] && arg1 != 0/\n"
291 "{\n"
292 "	@mtx_spin[arg0, ustack(5)] =\n"
293 "	    sum(timestamp - self->mtxspin[arg0]);\n"
294 "	@mtx_spin_count[arg0, ustack(5)] = count();\n"
295 "	self->mtxspin[arg0] = 0;\n"
296 "	mtx_spin_found = 1;\n"
297 "}\n"
298 "plockstat$target:::mutex-spun\n"
299 "/self->mtxspin[arg0]/\n"
300 "{\n"
301 "	@mtx_vain_spin[arg0, ustack(5)] =\n"
302 "	    sum(timestamp - self->mtxspin[arg0]);\n"
303 "	@mtx_vain_spin_count[arg0, ustack(5)] = count();\n"
304 "	self->mtxspin[arg0] = 0;\n"
305 "	mtx_vain_spin_found = 1;\n"
306 "}\n"
307 "plockstat$target:::mutex-blocked\n"
308 "/self->mtxblock[arg0] && arg1 != 0/\n"
309 "{\n"
310 "	@mtx_block[arg0, ustack(5)] =\n"
311 "	    sum(timestamp - self->mtxblock[arg0]);\n"
312 "	@mtx_block_count[arg0, ustack(5)] = count();\n"
313 "	self->mtxblock[arg0] = 0;\n"
314 "	mtx_block_found = 1;\n"
315 "}\n"
316 "plockstat$target:::mutex-blocked\n"
317 "/self->mtxblock[arg0]/\n"
318 "{\n"
319 "	self->mtxblock[arg0] = 0;\n"
320 "}\n"
321 "\n"
322 "END\n"
323 "/mtx_block_found/\n"
324 "{\n"
325 "	trace(\"Mutex block\");\n"
326 "	printa(@mtx_block, @mtx_block_count);\n"
327 "}\n"
328 "END\n"
329 "/mtx_spin_found/\n"
330 "{\n"
331 "	trace(\"Mutex spin\");\n"
332 "	printa(@mtx_spin, @mtx_spin_count);\n"
333 "}\n"
334 "END\n"
335 "/mtx_vain_spin_found/\n"
336 "{\n"
337 "	trace(\"Mutex unsuccessful spin\");\n"
338 "	printa(@mtx_vain_spin, @mtx_vain_spin_count);\n"
339 "}\n"
340 "END\n"
341 "/rw_r_block_found/\n"
342 "{\n"
343 "	trace(\"R/W reader block\");\n"
344 "	printa(@rw_r_block, @rw_r_block_count);\n"
345 "}\n"
346 "END\n"
347 "/rw_w_block_found/\n"
348 "{\n"
349 "	trace(\"R/W writer block\");\n"
350 "	printa(@rw_w_block, @rw_w_block_count);\n"
351 "}\n";
352 
353 static char g_prog[4096];
354 static size_t g_proglen;
355 static int g_opt_V, g_opt_s;
356 static int g_intr;
357 static int g_exited;
358 static dtrace_optval_t g_nframes;
359 static ulong_t g_nent = ULONG_MAX;
360 
361 #define	PLOCKSTAT_OPTSTR	"n:ps:e:vx:ACHV"
362 
363 static void
364 usage(void)
365 {
366 	(void) fprintf(stderr, "Usage:\n"
367 	    "\t%s [-vACHV] [-n count] [-s depth] [-e secs] [-x opt[=val]]\n"
368 	    "\t    command [arg...]\n"
369 	    "\t%s [-vACHV] [-n count] [-s depth] [-e secs] [-x opt[=val]]\n"
370 	    "\t    -p pid\n", g_pname, g_pname);
371 
372 	exit(E_USAGE);
373 }
374 
375 static void
376 verror(const char *fmt, va_list ap)
377 {
378 	int error = errno;
379 
380 	(void) fprintf(stderr, "%s: ", g_pname);
381 	(void) vfprintf(stderr, fmt, ap);
382 
383 	if (fmt[strlen(fmt) - 1] != '\n')
384 		(void) fprintf(stderr, ": %s\n", strerror(error));
385 }
386 
387 /*PRINTFLIKE1*/
388 static void
389 fatal(const char *fmt, ...)
390 {
391 	va_list ap;
392 
393 	va_start(ap, fmt);
394 	verror(fmt, ap);
395 	va_end(ap);
396 
397 	if (g_pr != NULL && g_dtp != NULL)
398 		dtrace_proc_release(g_dtp, g_pr);
399 
400 	exit(E_ERROR);
401 }
402 
403 /*PRINTFLIKE1*/
404 static void
405 dfatal(const char *fmt, ...)
406 {
407 	va_list ap;
408 
409 	va_start(ap, fmt);
410 
411 	(void) fprintf(stderr, "%s: ", g_pname);
412 	if (fmt != NULL)
413 		(void) vfprintf(stderr, fmt, ap);
414 
415 	va_end(ap);
416 
417 	if (fmt != NULL && fmt[strlen(fmt) - 1] != '\n') {
418 		(void) fprintf(stderr, ": %s\n",
419 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
420 	} else if (fmt == NULL) {
421 		(void) fprintf(stderr, "%s\n",
422 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
423 	}
424 
425 	if (g_pr != NULL) {
426 		dtrace_proc_continue(g_dtp, g_pr);
427 		dtrace_proc_release(g_dtp, g_pr);
428 	}
429 
430 	exit(E_ERROR);
431 }
432 
433 /*PRINTFLIKE1*/
434 static void
435 notice(const char *fmt, ...)
436 {
437 	va_list ap;
438 
439 	va_start(ap, fmt);
440 	verror(fmt, ap);
441 	va_end(ap);
442 }
443 
444 static void
445 dprog_add(const char *prog)
446 {
447 	size_t len = strlen(prog);
448 	bcopy(prog, g_prog + g_proglen, len + 1);
449 	g_proglen += len;
450 	assert(g_proglen < sizeof (g_prog));
451 }
452 
453 static void
454 dprog_compile(void)
455 {
456 	dtrace_prog_t *prog;
457 	dtrace_proginfo_t info;
458 
459 	if (g_opt_V) {
460 		(void) fprintf(stderr, "%s: vvvv D program vvvv\n", g_pname);
461 		(void) fputs(g_prog, stderr);
462 		(void) fprintf(stderr, "%s: ^^^^ D program ^^^^\n", g_pname);
463 	}
464 
465 	if ((prog = dtrace_program_strcompile(g_dtp, g_prog,
466 	    DTRACE_PROBESPEC_NAME, 0, 0, NULL)) == NULL)
467 		dfatal("failed to compile program");
468 
469 	if (dtrace_program_exec(g_dtp, prog, &info) == -1)
470 		dfatal("failed to enable probes");
471 }
472 
473 void
474 print_legend(void)
475 {
476 	(void) printf("%5s %8s %-28s %s\n", "Count", "nsec", "Lock", "Caller");
477 }
478 
479 void
480 print_bar(void)
481 {
482 	(void) printf("---------------------------------------"
483 	    "----------------------------------------\n");
484 }
485 
486 void
487 print_histogram_header(void)
488 {
489 	(void) printf("\n%10s ---- Time Distribution --- %5s %s\n",
490 	    "nsec", "count", "Stack");
491 }
492 
493 /*
494  * Convert an address to a symbolic string or a numeric string. If nolocks
495  * is set, we return an error code if this symbol appears to be a mutex- or
496  * rwlock-related symbol in libc so the caller has a chance to find a more
497  * helpful symbol.
498  */
499 static int
500 getsym(struct ps_prochandle *P, uintptr_t addr, char *buf, size_t size,
501     int nolocks)
502 {
503 	char name[256];
504 	GElf_Sym sym;
505 	prsyminfo_t info;
506 	size_t len;
507 
508 	if (P == NULL || Pxlookup_by_addr(P, addr, name, sizeof (name),
509 	    &sym, &info) != 0) {
510 		(void) snprintf(buf, size, "%#lx", addr);
511 		return (0);
512 	}
513 	if (info.prs_object == NULL)
514 		info.prs_object = "<unknown>";
515 
516 	if (info.prs_lmid != LM_ID_BASE) {
517 		len = snprintf(buf, size, "LM%lu`", info.prs_lmid);
518 		buf += len;
519 		size -= len;
520 	}
521 
522 	len = snprintf(buf, size, "%s`%s", info.prs_object, info.prs_name);
523 	buf += len;
524 	size -= len;
525 
526 	if (sym.st_value != addr)
527 		len = snprintf(buf, size, "+%#lx", addr - sym.st_value);
528 
529 	if (nolocks && strcmp("libc.so.1", info.prs_object) == 0 &&
530 	    (strstr("mutex", info.prs_name) == 0 ||
531 	    strstr("rw", info.prs_name) == 0))
532 		return (-1);
533 
534 	return (0);
535 }
536 
537 /*ARGSUSED*/
538 static int
539 process_aggregate(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
540 {
541 	const dtrace_recdesc_t *rec;
542 	uintptr_t lock;
543 	uint64_t *stack;
544 	caddr_t data;
545 	pid_t pid;
546 	struct ps_prochandle *P;
547 	char buf[256];
548 	int i, j;
549 	uint64_t sum, count, avg;
550 
551 	if ((*(uint_t *)arg)++ >= g_nent)
552 		return (DTRACE_AGGWALK_NEXT);
553 
554 	rec = aggsdata[0]->dtada_desc->dtagd_rec;
555 	data = aggsdata[0]->dtada_data;
556 
557 	/*LINTED - alignment*/
558 	lock = (uintptr_t)*(uint64_t *)(data + rec[1].dtrd_offset);
559 	/*LINTED - alignment*/
560 	stack = (uint64_t *)(data + rec[2].dtrd_offset);
561 
562 	if (!g_opt_s) {
563 		/*LINTED - alignment*/
564 		sum = *(uint64_t *)(aggsdata[1]->dtada_data +
565 		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
566 		/*LINTED - alignment*/
567 		count = *(uint64_t *)(aggsdata[2]->dtada_data +
568 		    aggsdata[2]->dtada_desc->dtagd_rec[3].dtrd_offset);
569 	} else {
570 		uint64_t *a;
571 
572 		/*LINTED - alignment*/
573 		a = (uint64_t *)(aggsdata[1]->dtada_data +
574 		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
575 
576 		print_bar();
577 		print_legend();
578 
579 		for (count = sum = 0, i = DTRACE_QUANTIZE_ZEROBUCKET, j = 0;
580 		    i < DTRACE_QUANTIZE_NBUCKETS; i++, j++) {
581 			count += a[i];
582 			sum += a[i] << (j - 64);
583 		}
584 	}
585 
586 	avg = sum / count;
587 	(void) printf("%5llu %8llu ", (u_longlong_t)count, (u_longlong_t)avg);
588 
589 	pid = stack[0];
590 	P = dtrace_proc_grab(g_dtp, pid, PGRAB_RDONLY);
591 
592 	(void) getsym(P, lock, buf, sizeof (buf), 0);
593 	(void) printf("%-28s ", buf);
594 
595 	for (i = 2; i <= 5; i++) {
596 		if (getsym(P, stack[i], buf, sizeof (buf), 1) == 0)
597 			break;
598 	}
599 	(void) printf("%s\n", buf);
600 
601 	if (g_opt_s) {
602 		int stack_done = 0;
603 		int quant_done = 0;
604 		int first_bin, last_bin;
605 		uint64_t bin_size, *a;
606 
607 		/*LINTED - alignment*/
608 		a = (uint64_t *)(aggsdata[1]->dtada_data +
609 		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
610 
611 		print_histogram_header();
612 
613 		for (first_bin = DTRACE_QUANTIZE_ZEROBUCKET;
614 		    a[first_bin] == 0; first_bin++)
615 			continue;
616 		for (last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 63;
617 		    a[last_bin] == 0; last_bin--)
618 			continue;
619 
620 		for (i = 0; !stack_done || !quant_done; i++) {
621 			if (!stack_done) {
622 				(void) getsym(P, stack[i + 2], buf,
623 				    sizeof (buf), 0);
624 			} else {
625 				buf[0] = '\0';
626 			}
627 
628 			if (!quant_done) {
629 				bin_size = a[first_bin];
630 
631 				(void) printf("%10llu |%-24.*s| %5llu %s\n",
632 				    1ULL <<
633 				    (first_bin - DTRACE_QUANTIZE_ZEROBUCKET),
634 				    (int)(24.0 * bin_size / count),
635 				    "@@@@@@@@@@@@@@@@@@@@@@@@@@",
636 				    (u_longlong_t)bin_size, buf);
637 			} else {
638 				(void) printf("%43s %s\n", "", buf);
639 			}
640 
641 			if (i + 1 >= g_nframes || stack[i + 3] == 0)
642 				stack_done = 1;
643 
644 			if (first_bin++ == last_bin)
645 				quant_done = 1;
646 		}
647 	}
648 
649 	dtrace_proc_release(g_dtp, P);
650 
651 	return (DTRACE_AGGWALK_NEXT);
652 }
653 
654 /*ARGSUSED*/
655 static void
656 prochandler(struct ps_prochandle *P, const char *msg, void *arg)
657 {
658 	const psinfo_t *prp = Ppsinfo(P);
659 	int pid = Pstatus(P)->pr_pid;
660 	char name[SIG2STR_MAX];
661 
662 	if (msg != NULL) {
663 		notice("pid %d: %s\n", pid, msg);
664 		return;
665 	}
666 
667 	switch (Pstate(P)) {
668 	case PS_UNDEAD:
669 		/*
670 		 * Ideally we would like to always report pr_wstat here, but it
671 		 * isn't possible given current /proc semantics.  If we grabbed
672 		 * the process, Ppsinfo() will either fail or return a zeroed
673 		 * psinfo_t depending on how far the parent is in reaping it.
674 		 * When /proc provides a stable pr_wstat in the status file,
675 		 * this code can be improved by examining this new pr_wstat.
676 		 */
677 		if (prp != NULL && WIFSIGNALED(prp->pr_wstat)) {
678 			notice("pid %d terminated by %s\n", pid,
679 			    proc_signame(WTERMSIG(prp->pr_wstat),
680 			    name, sizeof (name)));
681 		} else if (prp != NULL && WEXITSTATUS(prp->pr_wstat) != 0) {
682 			notice("pid %d exited with status %d\n",
683 			    pid, WEXITSTATUS(prp->pr_wstat));
684 		} else {
685 			notice("pid %d has exited\n", pid);
686 		}
687 		g_exited = 1;
688 		break;
689 
690 	case PS_LOST:
691 		notice("pid %d exec'd a set-id or unobservable program\n", pid);
692 		g_exited = 1;
693 		break;
694 	}
695 }
696 
697 /*ARGSUSED*/
698 static int
699 chewrec(const dtrace_probedata_t *data, const dtrace_recdesc_t *rec, void *arg)
700 {
701 	dtrace_eprobedesc_t *epd = data->dtpda_edesc;
702 	dtrace_aggvarid_t aggvars[2];
703 	const void *buf;
704 	int i, nagv;
705 
706 	/*
707 	 * A NULL rec indicates that we've processed the last record.
708 	 */
709 	if (rec == NULL)
710 		return (DTRACE_CONSUME_NEXT);
711 
712 	buf = data->dtpda_data - rec->dtrd_offset;
713 
714 	switch (rec->dtrd_action) {
715 	case DTRACEACT_DIFEXPR:
716 		(void) printf("\n%s\n\n", (char *)buf + rec->dtrd_offset);
717 		if (!g_opt_s) {
718 			print_legend();
719 			print_bar();
720 		}
721 		return (DTRACE_CONSUME_NEXT);
722 
723 	case DTRACEACT_PRINTA:
724 		for (nagv = 0, i = 0; i < epd->dtepd_nrecs - 1; i++) {
725 			const dtrace_recdesc_t *nrec = &rec[i];
726 
727 			if (nrec->dtrd_uarg != rec->dtrd_uarg)
728 				break;
729 
730 			/*LINTED - alignment*/
731 			aggvars[nagv++] = *(dtrace_aggvarid_t *)((caddr_t)buf +
732 			    nrec->dtrd_offset);
733 		}
734 
735 		if (nagv == (g_opt_s ? 1 : 2)) {
736 			uint_t nent = 0;
737 			if (dtrace_aggregate_walk_joined(g_dtp, aggvars, nagv,
738 			    process_aggregate, &nent) != 0)
739 				dfatal("failed to walk aggregate");
740 		}
741 
742 		return (DTRACE_CONSUME_NEXT);
743 	}
744 
745 	return (DTRACE_CONSUME_THIS);
746 }
747 
748 /*ARGSUSED*/
749 static void
750 intr(int signo)
751 {
752 	g_intr = 1;
753 }
754 
755 int
756 main(int argc, char **argv)
757 {
758 	ucred_t *ucp;
759 	int err;
760 	int opt_C = 0, opt_H = 0, opt_p = 0, opt_v = 0;
761 	char c, *p, *end;
762 	struct sigaction act;
763 	int done = 0;
764 
765 	g_pname = basename(argv[0]);
766 	argv[0] = g_pname; /* rewrite argv[0] for getopt errors */
767 
768 	/*
769 	 * Make sure we have the required dtrace_proc privilege.
770 	 */
771 	if ((ucp = ucred_get(getpid())) != NULL) {
772 		const priv_set_t *psp;
773 		if ((psp = ucred_getprivset(ucp, PRIV_EFFECTIVE)) != NULL &&
774 		    !priv_ismember(psp, PRIV_DTRACE_PROC)) {
775 			fatal("dtrace_proc privilege required\n");
776 		}
777 
778 		ucred_free(ucp);
779 	}
780 
781 	while ((c = getopt(argc, argv, PLOCKSTAT_OPTSTR)) != EOF) {
782 		switch (c) {
783 		case 'n':
784 			errno = 0;
785 			g_nent = strtoul(optarg, &end, 10);
786 			if (*end != '\0' || errno != 0) {
787 				(void) fprintf(stderr, "%s: invalid count "
788 				    "'%s'\n", g_pname, optarg);
789 				usage();
790 			}
791 			break;
792 
793 		case 'p':
794 			opt_p = 1;
795 			break;
796 
797 		case 'v':
798 			opt_v = 1;
799 			break;
800 
801 		case 'A':
802 			opt_C = opt_H = 1;
803 			break;
804 
805 		case 'C':
806 			opt_C = 1;
807 			break;
808 
809 		case 'H':
810 			opt_H = 1;
811 			break;
812 
813 		case 'V':
814 			g_opt_V = 1;
815 			break;
816 
817 		default:
818 			if (strchr(PLOCKSTAT_OPTSTR, c) == NULL)
819 				usage();
820 		}
821 	}
822 
823 	/*
824 	 * We need a command or at least one pid.
825 	 */
826 	if (argc == optind)
827 		usage();
828 
829 	if (opt_C == 0 && opt_H == 0)
830 		opt_C = 1;
831 
832 	if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL)
833 		fatal("failed to initialize dtrace: %s\n",
834 		    dtrace_errmsg(NULL, err));
835 
836 	/*
837 	 * The longest string we trace is 23 bytes long -- so 32 is plenty.
838 	 */
839 	if (dtrace_setopt(g_dtp, "strsize", "32") == -1)
840 		dfatal("failed to set 'strsize'");
841 
842 	/*
843 	 * 1k should be more than enough for all trace() and printa() actions.
844 	 */
845 	if (dtrace_setopt(g_dtp, "bufsize", "1k") == -1)
846 		dfatal("failed to set 'bufsize'");
847 
848 	/*
849 	 * The table we produce has the hottest locks at the top.
850 	 */
851 	if (dtrace_setopt(g_dtp, "aggsortrev", NULL) == -1)
852 		dfatal("failed to set 'aggsortrev'");
853 
854 	/*
855 	 * These are two reasonable defaults which should suffice.
856 	 */
857 	if (dtrace_setopt(g_dtp, "aggsize", "256k") == -1)
858 		dfatal("failed to set 'aggsize'");
859 	if (dtrace_setopt(g_dtp, "aggrate", "1sec") == -1)
860 		dfatal("failed to set 'aggrate'");
861 
862 	/*
863 	 * Take a second pass through to look for options that set options now
864 	 * that we have an open dtrace handle.
865 	 */
866 	optind = 1;
867 	while ((c = getopt(argc, argv, PLOCKSTAT_OPTSTR)) != EOF) {
868 		switch (c) {
869 		case 's':
870 			g_opt_s = 1;
871 			if (dtrace_setopt(g_dtp, "ustackframes", optarg) == -1)
872 				dfatal("failed to set 'ustackframes'");
873 			break;
874 
875 		case 'x':
876 			if ((p = strchr(optarg, '=')) != NULL)
877 				*p++ = '\0';
878 
879 			if (dtrace_setopt(g_dtp, optarg, p) != 0)
880 				dfatal("failed to set -x %s", optarg);
881 			break;
882 
883 		case 'e':
884 			errno = 0;
885 			(void) strtoul(optarg, &end, 10);
886 			if (*optarg == '-' || *end != '\0' || errno != 0) {
887 				(void) fprintf(stderr, "%s: invalid timeout "
888 				    "'%s'\n", g_pname, optarg);
889 				usage();
890 			}
891 
892 			/*
893 			 * Construct a DTrace enabling that will exit after
894 			 * the specified number of seconds.
895 			 */
896 			dprog_add("BEGIN\n{\n\tend = timestamp + ");
897 			dprog_add(optarg);
898 			dprog_add(" * 1000000000;\n}\n");
899 			dprog_add("tick-10hz\n/timestamp >= end/\n");
900 			dprog_add("{\n\texit(0);\n}\n");
901 			break;
902 		}
903 	}
904 
905 	argc -= optind;
906 	argv += optind;
907 
908 	if (opt_H) {
909 		dprog_add(g_hold_init);
910 		if (g_opt_s == NULL)
911 			dprog_add(g_hold_times);
912 		else
913 			dprog_add(g_hold_histogram);
914 	}
915 
916 	if (opt_C) {
917 		dprog_add(g_ctnd_init);
918 		if (g_opt_s == NULL)
919 			dprog_add(g_ctnd_times);
920 		else
921 			dprog_add(g_ctnd_histogram);
922 	}
923 
924 	if (opt_p) {
925 		ulong_t pid;
926 
927 		if (argc > 1) {
928 			(void) fprintf(stderr, "%s: only one pid is allowed\n",
929 			    g_pname);
930 			usage();
931 		}
932 
933 		errno = 0;
934 		pid = strtoul(argv[0], &end, 10);
935 		if (*end != '\0' || errno != 0 || (pid_t)pid != pid) {
936 			(void) fprintf(stderr, "%s: invalid pid '%s'\n",
937 			    g_pname, argv[0]);
938 			usage();
939 		}
940 
941 		if ((g_pr = dtrace_proc_grab(g_dtp, (pid_t)pid, 0)) == NULL)
942 			dfatal(NULL);
943 	} else {
944 		if ((g_pr = dtrace_proc_create(g_dtp, argv[0], argv)) == NULL)
945 			dfatal(NULL);
946 	}
947 
948 	dprog_compile();
949 
950 	if (dtrace_handle_proc(g_dtp, &prochandler, NULL) == -1)
951 		dfatal("failed to establish proc handler");
952 
953 	(void) sigemptyset(&act.sa_mask);
954 	act.sa_flags = 0;
955 	act.sa_handler = intr;
956 	(void) sigaction(SIGINT, &act, NULL);
957 	(void) sigaction(SIGTERM, &act, NULL);
958 
959 	if (dtrace_go(g_dtp) != 0)
960 		dfatal("dtrace_go()");
961 
962 	if (dtrace_getopt(g_dtp, "ustackframes", &g_nframes) != 0)
963 		dfatal("failed to get 'ustackframes'");
964 
965 	dtrace_proc_continue(g_dtp, g_pr);
966 
967 	if (opt_v)
968 		(void) printf("%s: tracing enabled for pid %d\n", g_pname,
969 		    (int)Pstatus(g_pr)->pr_pid);
970 
971 	do {
972 		if (!g_intr && !done)
973 			dtrace_sleep(g_dtp);
974 
975 		if (done || g_intr || g_exited) {
976 			done = 1;
977 			if (dtrace_stop(g_dtp) == -1)
978 				dfatal("couldn't stop tracing");
979 		}
980 
981 		switch (dtrace_work(g_dtp, stdout, NULL, chewrec, NULL)) {
982 		case DTRACE_WORKSTATUS_DONE:
983 			done = 1;
984 			break;
985 		case DTRACE_WORKSTATUS_OKAY:
986 			break;
987 		default:
988 			dfatal("processing aborted");
989 		}
990 
991 	} while (!done);
992 
993 	dtrace_close(g_dtp);
994 
995 	return (0);
996 }
997