xref: /illumos-gate/usr/src/cmd/plockstat/plockstat.c (revision 86ef0a63e1cfa5dc98606efef379365acca98063)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <assert.h>
28 #include <dtrace.h>
29 #include <limits.h>
30 #include <link.h>
31 #include <priv.h>
32 #include <signal.h>
33 #include <stdlib.h>
34 #include <stdarg.h>
35 #include <stdio.h>
36 #include <string.h>
37 #include <strings.h>
38 #include <errno.h>
39 #include <sys/wait.h>
40 #include <libgen.h>
41 #include <libproc.h>
42 
43 static char *g_pname;
44 static dtrace_hdl_t *g_dtp;
45 struct ps_prochandle *g_pr;
46 
47 #define	E_SUCCESS	0
48 #define	E_ERROR		1
49 #define	E_USAGE		2
50 
51 /*
52  * For hold times we use a global associative array since for mutexes, in
53  * user-land, it's not invalid to release a sychonization primitive that
54  * another thread acquired; rwlocks require a thread-local associative array
55  * since multiple thread can hold the same lock for reading. Note that we
56  * ignore recursive mutex acquisitions and releases as they don't truly
57  * affect lock contention.
58  */
59 static const char *g_hold_init =
60 "plockstat$target:::rw-acquire\n"
61 "{\n"
62 "	self->rwhold[arg0] = timestamp;\n"
63 "}\n"
64 "plockstat$target:::mutex-acquire\n"
65 "/arg1 == 0/\n"
66 "{\n"
67 "	mtxhold[arg0] = timestamp;\n"
68 "}\n";
69 
70 static const char *g_hold_histogram =
71 "plockstat$target:::rw-release\n"
72 "/self->rwhold[arg0] && arg1 == 1/\n"
73 "{\n"
74 "	@rw_w_hold[arg0, ustack()] =\n"
75 "	    quantize(timestamp - self->rwhold[arg0]);\n"
76 "	self->rwhold[arg0] = 0;\n"
77 "	rw_w_hold_found = 1;\n"
78 "}\n"
79 "plockstat$target:::rw-release\n"
80 "/self->rwhold[arg0]/\n"
81 "{\n"
82 "	@rw_r_hold[arg0, ustack()] =\n"
83 "	    quantize(timestamp - self->rwhold[arg0]);\n"
84 "	self->rwhold[arg0] = 0;\n"
85 "	rw_r_hold_found = 1;\n"
86 "}\n"
87 "plockstat$target:::mutex-release\n"
88 "/mtxhold[arg0] && arg1 == 0/\n"
89 "{\n"
90 "	@mtx_hold[arg0, ustack()] = quantize(timestamp - mtxhold[arg0]);\n"
91 "	mtxhold[arg0] = 0;\n"
92 "	mtx_hold_found = 1;\n"
93 "}\n"
94 "\n"
95 "END\n"
96 "/mtx_hold_found/\n"
97 "{\n"
98 "	trace(\"Mutex hold\");\n"
99 "	printa(@mtx_hold);\n"
100 "}\n"
101 "END\n"
102 "/rw_r_hold_found/\n"
103 "{\n"
104 "	trace(\"R/W reader hold\");\n"
105 "	printa(@rw_r_hold);\n"
106 "}\n"
107 "END\n"
108 "/rw_w_hold_found/\n"
109 "{\n"
110 "	trace(\"R/W writer hold\");\n"
111 "	printa(@rw_w_hold);\n"
112 "}\n";
113 
114 static const char *g_hold_times =
115 "plockstat$target:::rw-release\n"
116 "/self->rwhold[arg0] && arg1 == 1/\n"
117 "{\n"
118 "	@rw_w_hold[arg0, ustack(5)] = sum(timestamp - self->rwhold[arg0]);\n"
119 "	@rw_w_hold_count[arg0, ustack(5)] = count();\n"
120 "	self->rwhold[arg0] = 0;\n"
121 "	rw_w_hold_found = 1;\n"
122 "}\n"
123 "plockstat$target:::rw-release\n"
124 "/self->rwhold[arg0]/\n"
125 "{\n"
126 "	@rw_r_hold[arg0, ustack(5)] = sum(timestamp - self->rwhold[arg0]);\n"
127 "	@rw_r_hold_count[arg0, ustack(5)] = count();\n"
128 "	self->rwhold[arg0] = 0;\n"
129 "	rw_r_hold_found = 1;\n"
130 "}\n"
131 "plockstat$target:::mutex-release\n"
132 "/mtxhold[arg0] && arg1 == 0/\n"
133 "{\n"
134 "	@mtx_hold[arg0, ustack(5)] = sum(timestamp - mtxhold[arg0]);\n"
135 "	@mtx_hold_count[arg0, ustack(5)] = count();\n"
136 "	mtxhold[arg0] = 0;\n"
137 "	mtx_hold_found = 1;\n"
138 "}\n"
139 "\n"
140 "END\n"
141 "/mtx_hold_found/\n"
142 "{\n"
143 "	trace(\"Mutex hold\");\n"
144 "	printa(@mtx_hold, @mtx_hold_count);\n"
145 "}\n"
146 "END\n"
147 "/rw_r_hold_found/\n"
148 "{\n"
149 "	trace(\"R/W reader hold\");\n"
150 "	printa(@rw_r_hold, @rw_r_hold_count);\n"
151 "}\n"
152 "END\n"
153 "/rw_w_hold_found/\n"
154 "{\n"
155 "	trace(\"R/W writer hold\");\n"
156 "	printa(@rw_w_hold, @rw_w_hold_count);\n"
157 "}\n";
158 
159 
160 /*
161  * For contention, we use thread-local associative arrays since we're tracing
162  * a single thread's activity in libc and multiple threads can be blocking or
163  * spinning on the same sychonization primitive.
164  */
165 static const char *g_ctnd_init =
166 "plockstat$target:::rw-block\n"
167 "{\n"
168 "	self->rwblock[arg0] = timestamp;\n"
169 "}\n"
170 "plockstat$target:::mutex-block\n"
171 "{\n"
172 "	self->mtxblock[arg0] = timestamp;\n"
173 "}\n"
174 "plockstat$target:::mutex-spin\n"
175 "{\n"
176 "	self->mtxspin[arg0] = timestamp;\n"
177 "}\n";
178 
179 static const char *g_ctnd_histogram =
180 "plockstat$target:::rw-blocked\n"
181 "/self->rwblock[arg0] && arg1 == 1 && arg2 != 0/\n"
182 "{\n"
183 "	@rw_w_block[arg0, ustack()] =\n"
184 "	    quantize(timestamp - self->rwblock[arg0]);\n"
185 "	self->rwblock[arg0] = 0;\n"
186 "	rw_w_block_found = 1;\n"
187 "}\n"
188 "plockstat$target:::rw-blocked\n"
189 "/self->rwblock[arg0] && arg2 != 0/\n"
190 "{\n"
191 "	@rw_r_block[arg0, ustack()] =\n"
192 "	    quantize(timestamp - self->rwblock[arg0]);\n"
193 "	self->rwblock[arg0] = 0;\n"
194 "	rw_r_block_found = 1;\n"
195 "}\n"
196 "plockstat$target:::rw-blocked\n"
197 "/self->rwblock[arg0]/\n"
198 "{\n"
199 "	self->rwblock[arg0] = 0;\n"
200 "}\n"
201 "plockstat$target:::mutex-spun\n"
202 "/self->mtxspin[arg0] && arg1 != 0/\n"
203 "{\n"
204 "	@mtx_spin[arg0, ustack()] =\n"
205 "	    quantize(timestamp - self->mtxspin[arg0]);\n"
206 "	self->mtxspin[arg0] = 0;\n"
207 "	mtx_spin_found = 1;\n"
208 "}\n"
209 "plockstat$target:::mutex-spun\n"
210 "/self->mtxspin[arg0]/\n"
211 "{\n"
212 "	@mtx_vain_spin[arg0, ustack()] =\n"
213 "	    quantize(timestamp - self->mtxspin[arg0]);\n"
214 "	self->mtxspin[arg0] = 0;\n"
215 "	mtx_vain_spin_found = 1;\n"
216 "}\n"
217 "plockstat$target:::mutex-blocked\n"
218 "/self->mtxblock[arg0] && arg1 != 0/\n"
219 "{\n"
220 "	@mtx_block[arg0, ustack()] =\n"
221 "	    quantize(timestamp - self->mtxblock[arg0]);\n"
222 "	self->mtxblock[arg0] = 0;\n"
223 "	mtx_block_found = 1;\n"
224 "}\n"
225 "plockstat$target:::mutex-blocked\n"
226 "/self->mtxblock[arg0]/\n"
227 "{\n"
228 "	self->mtxblock[arg0] = 0;\n"
229 "}\n"
230 "\n"
231 "END\n"
232 "/mtx_block_found/\n"
233 "{\n"
234 "	trace(\"Mutex block\");\n"
235 "	printa(@mtx_block);\n"
236 "}\n"
237 "END\n"
238 "/mtx_spin_found/\n"
239 "{\n"
240 "	trace(\"Mutex spin\");\n"
241 "	printa(@mtx_spin);\n"
242 "}\n"
243 "END\n"
244 "/mtx_vain_spin_found/\n"
245 "{\n"
246 "	trace(\"Mutex unsuccessful spin\");\n"
247 "	printa(@mtx_vain_spin);\n"
248 "}\n"
249 "END\n"
250 "/rw_r_block_found/\n"
251 "{\n"
252 "	trace(\"R/W reader block\");\n"
253 "	printa(@rw_r_block);\n"
254 "}\n"
255 "END\n"
256 "/rw_w_block_found/\n"
257 "{\n"
258 "	trace(\"R/W writer block\");\n"
259 "	printa(@rw_w_block);\n"
260 "}\n";
261 
262 
263 static const char *g_ctnd_times =
264 "plockstat$target:::rw-blocked\n"
265 "/self->rwblock[arg0] && arg1 == 1 && arg2 != 0/\n"
266 "{\n"
267 "	@rw_w_block[arg0, ustack(5)] =\n"
268 "	    sum(timestamp - self->rwblock[arg0]);\n"
269 "	@rw_w_block_count[arg0, ustack(5)] = count();\n"
270 "	self->rwblock[arg0] = 0;\n"
271 "	rw_w_block_found = 1;\n"
272 "}\n"
273 "plockstat$target:::rw-blocked\n"
274 "/self->rwblock[arg0] && arg2 != 0/\n"
275 "{\n"
276 "	@rw_r_block[arg0, ustack(5)] =\n"
277 "	    sum(timestamp - self->rwblock[arg0]);\n"
278 "	@rw_r_block_count[arg0, ustack(5)] = count();\n"
279 "	self->rwblock[arg0] = 0;\n"
280 "	rw_r_block_found = 1;\n"
281 "}\n"
282 "plockstat$target:::rw-blocked\n"
283 "/self->rwblock[arg0]/\n"
284 "{\n"
285 "	self->rwblock[arg0] = 0;\n"
286 "}\n"
287 "plockstat$target:::mutex-spun\n"
288 "/self->mtxspin[arg0] && arg1 != 0/\n"
289 "{\n"
290 "	@mtx_spin[arg0, ustack(5)] =\n"
291 "	    sum(timestamp - self->mtxspin[arg0]);\n"
292 "	@mtx_spin_count[arg0, ustack(5)] = count();\n"
293 "	self->mtxspin[arg0] = 0;\n"
294 "	mtx_spin_found = 1;\n"
295 "}\n"
296 "plockstat$target:::mutex-spun\n"
297 "/self->mtxspin[arg0]/\n"
298 "{\n"
299 "	@mtx_vain_spin[arg0, ustack(5)] =\n"
300 "	    sum(timestamp - self->mtxspin[arg0]);\n"
301 "	@mtx_vain_spin_count[arg0, ustack(5)] = count();\n"
302 "	self->mtxspin[arg0] = 0;\n"
303 "	mtx_vain_spin_found = 1;\n"
304 "}\n"
305 "plockstat$target:::mutex-blocked\n"
306 "/self->mtxblock[arg0] && arg1 != 0/\n"
307 "{\n"
308 "	@mtx_block[arg0, ustack(5)] =\n"
309 "	    sum(timestamp - self->mtxblock[arg0]);\n"
310 "	@mtx_block_count[arg0, ustack(5)] = count();\n"
311 "	self->mtxblock[arg0] = 0;\n"
312 "	mtx_block_found = 1;\n"
313 "}\n"
314 "plockstat$target:::mutex-blocked\n"
315 "/self->mtxblock[arg0]/\n"
316 "{\n"
317 "	self->mtxblock[arg0] = 0;\n"
318 "}\n"
319 "\n"
320 "END\n"
321 "/mtx_block_found/\n"
322 "{\n"
323 "	trace(\"Mutex block\");\n"
324 "	printa(@mtx_block, @mtx_block_count);\n"
325 "}\n"
326 "END\n"
327 "/mtx_spin_found/\n"
328 "{\n"
329 "	trace(\"Mutex spin\");\n"
330 "	printa(@mtx_spin, @mtx_spin_count);\n"
331 "}\n"
332 "END\n"
333 "/mtx_vain_spin_found/\n"
334 "{\n"
335 "	trace(\"Mutex unsuccessful spin\");\n"
336 "	printa(@mtx_vain_spin, @mtx_vain_spin_count);\n"
337 "}\n"
338 "END\n"
339 "/rw_r_block_found/\n"
340 "{\n"
341 "	trace(\"R/W reader block\");\n"
342 "	printa(@rw_r_block, @rw_r_block_count);\n"
343 "}\n"
344 "END\n"
345 "/rw_w_block_found/\n"
346 "{\n"
347 "	trace(\"R/W writer block\");\n"
348 "	printa(@rw_w_block, @rw_w_block_count);\n"
349 "}\n";
350 
351 static char g_prog[4096];
352 static size_t g_proglen;
353 static int g_opt_V, g_opt_s;
354 static int g_intr;
355 static int g_exited;
356 static dtrace_optval_t g_nframes;
357 static ulong_t g_nent = ULONG_MAX;
358 
359 #define	PLOCKSTAT_OPTSTR	"n:ps:e:vx:ACHV"
360 
361 static void
362 usage(void)
363 {
364 	(void) fprintf(stderr, "Usage:\n"
365 	    "\t%s [-vACHV] [-n count] [-s depth] [-e secs] [-x opt[=val]]\n"
366 	    "\t    command [arg...]\n"
367 	    "\t%s [-vACHV] [-n count] [-s depth] [-e secs] [-x opt[=val]]\n"
368 	    "\t    -p pid\n", g_pname, g_pname);
369 
370 	exit(E_USAGE);
371 }
372 
373 static void
374 verror(const char *fmt, va_list ap)
375 {
376 	int error = errno;
377 
378 	(void) fprintf(stderr, "%s: ", g_pname);
379 	(void) vfprintf(stderr, fmt, ap);
380 
381 	if (fmt[strlen(fmt) - 1] != '\n')
382 		(void) fprintf(stderr, ": %s\n", strerror(error));
383 }
384 
385 /*PRINTFLIKE1*/
386 static void
387 fatal(const char *fmt, ...)
388 {
389 	va_list ap;
390 
391 	va_start(ap, fmt);
392 	verror(fmt, ap);
393 	va_end(ap);
394 
395 	if (g_pr != NULL && g_dtp != NULL)
396 		dtrace_proc_release(g_dtp, g_pr);
397 
398 	exit(E_ERROR);
399 }
400 
401 /*PRINTFLIKE1*/
402 static void
403 dfatal(const char *fmt, ...)
404 {
405 	va_list ap;
406 
407 	va_start(ap, fmt);
408 
409 	(void) fprintf(stderr, "%s: ", g_pname);
410 	if (fmt != NULL)
411 		(void) vfprintf(stderr, fmt, ap);
412 
413 	va_end(ap);
414 
415 	if (fmt != NULL && fmt[strlen(fmt) - 1] != '\n') {
416 		(void) fprintf(stderr, ": %s\n",
417 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
418 	} else if (fmt == NULL) {
419 		(void) fprintf(stderr, "%s\n",
420 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
421 	}
422 
423 	if (g_pr != NULL) {
424 		dtrace_proc_continue(g_dtp, g_pr);
425 		dtrace_proc_release(g_dtp, g_pr);
426 	}
427 
428 	exit(E_ERROR);
429 }
430 
431 /*PRINTFLIKE1*/
432 static void
433 notice(const char *fmt, ...)
434 {
435 	va_list ap;
436 
437 	va_start(ap, fmt);
438 	verror(fmt, ap);
439 	va_end(ap);
440 }
441 
442 static void
443 dprog_add(const char *prog)
444 {
445 	size_t len = strlen(prog);
446 	bcopy(prog, g_prog + g_proglen, len + 1);
447 	g_proglen += len;
448 	assert(g_proglen < sizeof (g_prog));
449 }
450 
451 static void
452 dprog_compile(void)
453 {
454 	dtrace_prog_t *prog;
455 	dtrace_proginfo_t info;
456 
457 	if (g_opt_V) {
458 		(void) fprintf(stderr, "%s: vvvv D program vvvv\n", g_pname);
459 		(void) fputs(g_prog, stderr);
460 		(void) fprintf(stderr, "%s: ^^^^ D program ^^^^\n", g_pname);
461 	}
462 
463 	if ((prog = dtrace_program_strcompile(g_dtp, g_prog,
464 	    DTRACE_PROBESPEC_NAME, 0, 0, NULL)) == NULL)
465 		dfatal("failed to compile program");
466 
467 	if (dtrace_program_exec(g_dtp, prog, &info) == -1)
468 		dfatal("failed to enable probes");
469 }
470 
471 void
472 print_legend(void)
473 {
474 	(void) printf("%5s %8s %-28s %s\n", "Count", "nsec", "Lock", "Caller");
475 }
476 
477 void
478 print_bar(void)
479 {
480 	(void) printf("---------------------------------------"
481 	    "----------------------------------------\n");
482 }
483 
484 void
485 print_histogram_header(void)
486 {
487 	(void) printf("\n%10s ---- Time Distribution --- %5s %s\n",
488 	    "nsec", "count", "Stack");
489 }
490 
491 /*
492  * Convert an address to a symbolic string or a numeric string. If nolocks
493  * is set, we return an error code if this symbol appears to be a mutex- or
494  * rwlock-related symbol in libc so the caller has a chance to find a more
495  * helpful symbol.
496  */
497 static int
498 getsym(struct ps_prochandle *P, uintptr_t addr, char *buf, size_t size,
499     int nolocks)
500 {
501 	char name[256];
502 	GElf_Sym sym;
503 	prsyminfo_t info;
504 	size_t len;
505 
506 	if (P == NULL || Pxlookup_by_addr(P, addr, name, sizeof (name),
507 	    &sym, &info) != 0) {
508 		(void) snprintf(buf, size, "%#lx", addr);
509 		return (0);
510 	}
511 	if (info.prs_object == NULL)
512 		info.prs_object = "<unknown>";
513 
514 	if (info.prs_lmid != LM_ID_BASE) {
515 		len = snprintf(buf, size, "LM%lu`", info.prs_lmid);
516 		buf += len;
517 		size -= len;
518 	}
519 
520 	len = snprintf(buf, size, "%s`%s", info.prs_object, info.prs_name);
521 	buf += len;
522 	size -= len;
523 
524 	if (sym.st_value != addr)
525 		len = snprintf(buf, size, "+%#lx", addr - sym.st_value);
526 
527 	if (nolocks && strcmp("libc.so.1", info.prs_object) == 0 &&
528 	    (strstr("mutex", info.prs_name) == 0 ||
529 	    strstr("rw", info.prs_name) == 0))
530 		return (-1);
531 
532 	return (0);
533 }
534 
535 /*ARGSUSED*/
536 static int
537 process_aggregate(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
538 {
539 	const dtrace_recdesc_t *rec;
540 	uintptr_t lock;
541 	uint64_t *stack;
542 	caddr_t data;
543 	pid_t pid;
544 	struct ps_prochandle *P;
545 	char buf[256];
546 	int i, j;
547 	uint64_t sum, count, avg;
548 
549 	if ((*(uint_t *)arg)++ >= g_nent)
550 		return (DTRACE_AGGWALK_NEXT);
551 
552 	rec = aggsdata[0]->dtada_desc->dtagd_rec;
553 	data = aggsdata[0]->dtada_data;
554 
555 	/*LINTED - alignment*/
556 	lock = (uintptr_t)*(uint64_t *)(data + rec[1].dtrd_offset);
557 	/*LINTED - alignment*/
558 	stack = (uint64_t *)(data + rec[2].dtrd_offset);
559 
560 	if (!g_opt_s) {
561 		/*LINTED - alignment*/
562 		sum = *(uint64_t *)(aggsdata[1]->dtada_data +
563 		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
564 		/*LINTED - alignment*/
565 		count = *(uint64_t *)(aggsdata[2]->dtada_data +
566 		    aggsdata[2]->dtada_desc->dtagd_rec[3].dtrd_offset);
567 	} else {
568 		uint64_t *a;
569 
570 		/*LINTED - alignment*/
571 		a = (uint64_t *)(aggsdata[1]->dtada_data +
572 		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
573 
574 		print_bar();
575 		print_legend();
576 
577 		for (count = sum = 0, i = DTRACE_QUANTIZE_ZEROBUCKET, j = 0;
578 		    i < DTRACE_QUANTIZE_NBUCKETS; i++, j++) {
579 			count += a[i];
580 			sum += a[i] << (j - 64);
581 		}
582 	}
583 
584 	avg = sum / count;
585 	(void) printf("%5llu %8llu ", (u_longlong_t)count, (u_longlong_t)avg);
586 
587 	pid = stack[0];
588 	P = dtrace_proc_grab(g_dtp, pid, PGRAB_RDONLY);
589 
590 	(void) getsym(P, lock, buf, sizeof (buf), 0);
591 	(void) printf("%-28s ", buf);
592 
593 	for (i = 2; i <= 5; i++) {
594 		if (getsym(P, stack[i], buf, sizeof (buf), 1) == 0)
595 			break;
596 	}
597 	(void) printf("%s\n", buf);
598 
599 	if (g_opt_s) {
600 		int stack_done = 0;
601 		int quant_done = 0;
602 		int first_bin, last_bin;
603 		uint64_t bin_size, *a;
604 
605 		/*LINTED - alignment*/
606 		a = (uint64_t *)(aggsdata[1]->dtada_data +
607 		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
608 
609 		print_histogram_header();
610 
611 		for (first_bin = DTRACE_QUANTIZE_ZEROBUCKET;
612 		    a[first_bin] == 0; first_bin++)
613 			continue;
614 		for (last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 63;
615 		    a[last_bin] == 0; last_bin--)
616 			continue;
617 
618 		for (i = 0; !stack_done || !quant_done; i++) {
619 			if (!stack_done) {
620 				(void) getsym(P, stack[i + 2], buf,
621 				    sizeof (buf), 0);
622 			} else {
623 				buf[0] = '\0';
624 			}
625 
626 			if (!quant_done) {
627 				bin_size = a[first_bin];
628 
629 				(void) printf("%10llu |%-24.*s| %5llu %s\n",
630 				    1ULL <<
631 				    (first_bin - DTRACE_QUANTIZE_ZEROBUCKET),
632 				    (int)(24.0 * bin_size / count),
633 				    "@@@@@@@@@@@@@@@@@@@@@@@@@@",
634 				    (u_longlong_t)bin_size, buf);
635 			} else {
636 				(void) printf("%43s %s\n", "", buf);
637 			}
638 
639 			if (i + 1 >= g_nframes || stack[i + 3] == 0)
640 				stack_done = 1;
641 
642 			if (first_bin++ == last_bin)
643 				quant_done = 1;
644 		}
645 	}
646 
647 	dtrace_proc_release(g_dtp, P);
648 
649 	return (DTRACE_AGGWALK_NEXT);
650 }
651 
652 /*ARGSUSED*/
653 static void
654 prochandler(struct ps_prochandle *P, const char *msg, void *arg)
655 {
656 	const psinfo_t *prp = Ppsinfo(P);
657 	int pid = Pstatus(P)->pr_pid;
658 	char name[SIG2STR_MAX];
659 
660 	if (msg != NULL) {
661 		notice("pid %d: %s\n", pid, msg);
662 		return;
663 	}
664 
665 	switch (Pstate(P)) {
666 	case PS_UNDEAD:
667 		/*
668 		 * Ideally we would like to always report pr_wstat here, but it
669 		 * isn't possible given current /proc semantics.  If we grabbed
670 		 * the process, Ppsinfo() will either fail or return a zeroed
671 		 * psinfo_t depending on how far the parent is in reaping it.
672 		 * When /proc provides a stable pr_wstat in the status file,
673 		 * this code can be improved by examining this new pr_wstat.
674 		 */
675 		if (prp != NULL && WIFSIGNALED(prp->pr_wstat)) {
676 			notice("pid %d terminated by %s\n", pid,
677 			    proc_signame(WTERMSIG(prp->pr_wstat),
678 			    name, sizeof (name)));
679 		} else if (prp != NULL && WEXITSTATUS(prp->pr_wstat) != 0) {
680 			notice("pid %d exited with status %d\n",
681 			    pid, WEXITSTATUS(prp->pr_wstat));
682 		} else {
683 			notice("pid %d has exited\n", pid);
684 		}
685 		g_exited = 1;
686 		break;
687 
688 	case PS_LOST:
689 		notice("pid %d exec'd a set-id or unobservable program\n", pid);
690 		g_exited = 1;
691 		break;
692 	}
693 }
694 
695 /*ARGSUSED*/
696 static int
697 chewrec(const dtrace_probedata_t *data, const dtrace_recdesc_t *rec, void *arg)
698 {
699 	dtrace_eprobedesc_t *epd = data->dtpda_edesc;
700 	dtrace_aggvarid_t aggvars[2];
701 	const void *buf;
702 	int i, nagv;
703 
704 	/*
705 	 * A NULL rec indicates that we've processed the last record.
706 	 */
707 	if (rec == NULL)
708 		return (DTRACE_CONSUME_NEXT);
709 
710 	buf = data->dtpda_data - rec->dtrd_offset;
711 
712 	switch (rec->dtrd_action) {
713 	case DTRACEACT_DIFEXPR:
714 		(void) printf("\n%s\n\n", (char *)buf + rec->dtrd_offset);
715 		if (!g_opt_s) {
716 			print_legend();
717 			print_bar();
718 		}
719 		return (DTRACE_CONSUME_NEXT);
720 
721 	case DTRACEACT_PRINTA:
722 		for (nagv = 0, i = 0; i < epd->dtepd_nrecs - 1; i++) {
723 			const dtrace_recdesc_t *nrec = &rec[i];
724 
725 			if (nrec->dtrd_uarg != rec->dtrd_uarg)
726 				break;
727 
728 			/*LINTED - alignment*/
729 			aggvars[nagv++] = *(dtrace_aggvarid_t *)((caddr_t)buf +
730 			    nrec->dtrd_offset);
731 		}
732 
733 		if (nagv == (g_opt_s ? 1 : 2)) {
734 			uint_t nent = 0;
735 			if (dtrace_aggregate_walk_joined(g_dtp, aggvars, nagv,
736 			    process_aggregate, &nent) != 0)
737 				dfatal("failed to walk aggregate");
738 		}
739 
740 		return (DTRACE_CONSUME_NEXT);
741 	}
742 
743 	return (DTRACE_CONSUME_THIS);
744 }
745 
746 /*ARGSUSED*/
747 static void
748 intr(int signo)
749 {
750 	g_intr = 1;
751 }
752 
753 int
754 main(int argc, char **argv)
755 {
756 	ucred_t *ucp;
757 	int err;
758 	int opt_C = 0, opt_H = 0, opt_p = 0, opt_v = 0;
759 	char c, *p, *end;
760 	struct sigaction act;
761 	int done = 0;
762 
763 	g_pname = basename(argv[0]);
764 	argv[0] = g_pname; /* rewrite argv[0] for getopt errors */
765 
766 	/*
767 	 * Make sure we have the required dtrace_proc privilege.
768 	 */
769 	if ((ucp = ucred_get(getpid())) != NULL) {
770 		const priv_set_t *psp;
771 		if ((psp = ucred_getprivset(ucp, PRIV_EFFECTIVE)) != NULL &&
772 		    !priv_ismember(psp, PRIV_DTRACE_PROC)) {
773 			fatal("dtrace_proc privilege required\n");
774 		}
775 
776 		ucred_free(ucp);
777 	}
778 
779 	while ((c = getopt(argc, argv, PLOCKSTAT_OPTSTR)) != EOF) {
780 		switch (c) {
781 		case 'n':
782 			errno = 0;
783 			g_nent = strtoul(optarg, &end, 10);
784 			if (*end != '\0' || errno != 0) {
785 				(void) fprintf(stderr, "%s: invalid count "
786 				    "'%s'\n", g_pname, optarg);
787 				usage();
788 			}
789 			break;
790 
791 		case 'p':
792 			opt_p = 1;
793 			break;
794 
795 		case 'v':
796 			opt_v = 1;
797 			break;
798 
799 		case 'A':
800 			opt_C = opt_H = 1;
801 			break;
802 
803 		case 'C':
804 			opt_C = 1;
805 			break;
806 
807 		case 'H':
808 			opt_H = 1;
809 			break;
810 
811 		case 'V':
812 			g_opt_V = 1;
813 			break;
814 
815 		default:
816 			if (strchr(PLOCKSTAT_OPTSTR, c) == NULL)
817 				usage();
818 		}
819 	}
820 
821 	/*
822 	 * We need a command or at least one pid.
823 	 */
824 	if (argc == optind)
825 		usage();
826 
827 	if (opt_C == 0 && opt_H == 0)
828 		opt_C = 1;
829 
830 	if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL)
831 		fatal("failed to initialize dtrace: %s\n",
832 		    dtrace_errmsg(NULL, err));
833 
834 	/*
835 	 * The longest string we trace is 23 bytes long -- so 32 is plenty.
836 	 */
837 	if (dtrace_setopt(g_dtp, "strsize", "32") == -1)
838 		dfatal("failed to set 'strsize'");
839 
840 	/*
841 	 * 1k should be more than enough for all trace() and printa() actions.
842 	 */
843 	if (dtrace_setopt(g_dtp, "bufsize", "1k") == -1)
844 		dfatal("failed to set 'bufsize'");
845 
846 	/*
847 	 * The table we produce has the hottest locks at the top.
848 	 */
849 	if (dtrace_setopt(g_dtp, "aggsortrev", NULL) == -1)
850 		dfatal("failed to set 'aggsortrev'");
851 
852 	/*
853 	 * These are two reasonable defaults which should suffice.
854 	 */
855 	if (dtrace_setopt(g_dtp, "aggsize", "256k") == -1)
856 		dfatal("failed to set 'aggsize'");
857 	if (dtrace_setopt(g_dtp, "aggrate", "1sec") == -1)
858 		dfatal("failed to set 'aggrate'");
859 
860 	/*
861 	 * Take a second pass through to look for options that set options now
862 	 * that we have an open dtrace handle.
863 	 */
864 	optind = 1;
865 	while ((c = getopt(argc, argv, PLOCKSTAT_OPTSTR)) != EOF) {
866 		switch (c) {
867 		case 's':
868 			g_opt_s = 1;
869 			if (dtrace_setopt(g_dtp, "ustackframes", optarg) == -1)
870 				dfatal("failed to set 'ustackframes'");
871 			break;
872 
873 		case 'x':
874 			if ((p = strchr(optarg, '=')) != NULL)
875 				*p++ = '\0';
876 
877 			if (dtrace_setopt(g_dtp, optarg, p) != 0)
878 				dfatal("failed to set -x %s", optarg);
879 			break;
880 
881 		case 'e':
882 			errno = 0;
883 			(void) strtoul(optarg, &end, 10);
884 			if (*optarg == '-' || *end != '\0' || errno != 0) {
885 				(void) fprintf(stderr, "%s: invalid timeout "
886 				    "'%s'\n", g_pname, optarg);
887 				usage();
888 			}
889 
890 			/*
891 			 * Construct a DTrace enabling that will exit after
892 			 * the specified number of seconds.
893 			 */
894 			dprog_add("BEGIN\n{\n\tend = timestamp + ");
895 			dprog_add(optarg);
896 			dprog_add(" * 1000000000;\n}\n");
897 			dprog_add("tick-10hz\n/timestamp >= end/\n");
898 			dprog_add("{\n\texit(0);\n}\n");
899 			break;
900 		}
901 	}
902 
903 	argc -= optind;
904 	argv += optind;
905 
906 	if (opt_H) {
907 		dprog_add(g_hold_init);
908 		if (g_opt_s == 0)
909 			dprog_add(g_hold_times);
910 		else
911 			dprog_add(g_hold_histogram);
912 	}
913 
914 	if (opt_C) {
915 		dprog_add(g_ctnd_init);
916 		if (g_opt_s == 0)
917 			dprog_add(g_ctnd_times);
918 		else
919 			dprog_add(g_ctnd_histogram);
920 	}
921 
922 	if (opt_p) {
923 		ulong_t pid;
924 
925 		if (argc > 1) {
926 			(void) fprintf(stderr, "%s: only one pid is allowed\n",
927 			    g_pname);
928 			usage();
929 		}
930 
931 		errno = 0;
932 		pid = strtoul(argv[0], &end, 10);
933 		if (*end != '\0' || errno != 0 || (pid_t)pid != pid) {
934 			(void) fprintf(stderr, "%s: invalid pid '%s'\n",
935 			    g_pname, argv[0]);
936 			usage();
937 		}
938 
939 		if ((g_pr = dtrace_proc_grab(g_dtp, (pid_t)pid, 0)) == NULL)
940 			dfatal(NULL);
941 	} else {
942 		if ((g_pr = dtrace_proc_create(g_dtp, argv[0], argv)) == NULL)
943 			dfatal(NULL);
944 	}
945 
946 	dprog_compile();
947 
948 	if (dtrace_handle_proc(g_dtp, &prochandler, NULL) == -1)
949 		dfatal("failed to establish proc handler");
950 
951 	(void) sigemptyset(&act.sa_mask);
952 	act.sa_flags = 0;
953 	act.sa_handler = intr;
954 	(void) sigaction(SIGINT, &act, NULL);
955 	(void) sigaction(SIGTERM, &act, NULL);
956 
957 	if (dtrace_go(g_dtp) != 0)
958 		dfatal("dtrace_go()");
959 
960 	if (dtrace_getopt(g_dtp, "ustackframes", &g_nframes) != 0)
961 		dfatal("failed to get 'ustackframes'");
962 
963 	dtrace_proc_continue(g_dtp, g_pr);
964 
965 	if (opt_v)
966 		(void) printf("%s: tracing enabled for pid %d\n", g_pname,
967 		    (int)Pstatus(g_pr)->pr_pid);
968 
969 	do {
970 		if (!g_intr && !done)
971 			dtrace_sleep(g_dtp);
972 
973 		if (done || g_intr || g_exited) {
974 			done = 1;
975 			if (dtrace_stop(g_dtp) == -1)
976 				dfatal("couldn't stop tracing");
977 		}
978 
979 		switch (dtrace_work(g_dtp, stdout, NULL, chewrec, NULL)) {
980 		case DTRACE_WORKSTATUS_DONE:
981 			done = 1;
982 			break;
983 		case DTRACE_WORKSTATUS_OKAY:
984 			break;
985 		default:
986 			dfatal("processing aborted");
987 		}
988 
989 	} while (!done);
990 
991 	dtrace_close(g_dtp);
992 
993 	return (0);
994 }
995