xref: /freebsd/cddl/contrib/opensolaris/cmd/plockstat/plockstat.c (revision f2b7bf8afcfd630e0fbd8417f1ce974de79feaf0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifdef illumos
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 #endif
30 
31 #include <assert.h>
32 #include <dtrace.h>
33 #include <limits.h>
34 #include <link.h>
35 #include <priv.h>
36 #include <signal.h>
37 #include <stdlib.h>
38 #include <stdarg.h>
39 #include <stdio.h>
40 #include <string.h>
41 #include <strings.h>
42 #include <errno.h>
43 #include <sys/wait.h>
44 #include <libgen.h>
45 #include <libproc.h>
46 #include <libproc_compat.h>
47 
48 static char *g_pname;
49 static dtrace_hdl_t *g_dtp;
50 struct ps_prochandle *g_pr;
51 
52 #define	E_SUCCESS	0
53 #define	E_ERROR		1
54 #define	E_USAGE		2
55 
56 /*
57  * For hold times we use a global associative array since for mutexes, in
58  * user-land, it's not invalid to release a sychonization primitive that
59  * another thread acquired; rwlocks require a thread-local associative array
60  * since multiple thread can hold the same lock for reading. Note that we
61  * ignore recursive mutex acquisitions and releases as they don't truly
62  * affect lock contention.
63  */
64 static const char *g_hold_init =
65 "plockstat$target:::rw-acquire\n"
66 "{\n"
67 "	self->rwhold[arg0] = timestamp;\n"
68 "}\n"
69 "plockstat$target:::mutex-acquire\n"
70 "/arg1 == 0/\n"
71 "{\n"
72 "	mtxhold[arg0] = timestamp;\n"
73 "}\n";
74 
75 static const char *g_hold_histogram =
76 "plockstat$target:::rw-release\n"
77 "/self->rwhold[arg0] && arg1 == 1/\n"
78 "{\n"
79 "	@rw_w_hold[arg0, ustack()] =\n"
80 "	    quantize(timestamp - self->rwhold[arg0]);\n"
81 "	self->rwhold[arg0] = 0;\n"
82 "	rw_w_hold_found = 1;\n"
83 "}\n"
84 "plockstat$target:::rw-release\n"
85 "/self->rwhold[arg0]/\n"
86 "{\n"
87 "	@rw_r_hold[arg0, ustack()] =\n"
88 "	    quantize(timestamp - self->rwhold[arg0]);\n"
89 "	self->rwhold[arg0] = 0;\n"
90 "	rw_r_hold_found = 1;\n"
91 "}\n"
92 "plockstat$target:::mutex-release\n"
93 "/mtxhold[arg0] && arg1 == 0/\n"
94 "{\n"
95 "	@mtx_hold[arg0, ustack()] = quantize(timestamp - mtxhold[arg0]);\n"
96 "	mtxhold[arg0] = 0;\n"
97 "	mtx_hold_found = 1;\n"
98 "}\n"
99 "\n"
100 "END\n"
101 "/mtx_hold_found/\n"
102 "{\n"
103 "	trace(\"Mutex hold\");\n"
104 "	printa(@mtx_hold);\n"
105 "}\n"
106 "END\n"
107 "/rw_r_hold_found/\n"
108 "{\n"
109 "	trace(\"R/W reader hold\");\n"
110 "	printa(@rw_r_hold);\n"
111 "}\n"
112 "END\n"
113 "/rw_w_hold_found/\n"
114 "{\n"
115 "	trace(\"R/W writer hold\");\n"
116 "	printa(@rw_w_hold);\n"
117 "}\n";
118 
119 static const char *g_hold_times =
120 "plockstat$target:::rw-release\n"
121 "/self->rwhold[arg0] && arg1 == 1/\n"
122 "{\n"
123 "	@rw_w_hold[arg0, ustack(5)] = sum(timestamp - self->rwhold[arg0]);\n"
124 "	@rw_w_hold_count[arg0, ustack(5)] = count();\n"
125 "	self->rwhold[arg0] = 0;\n"
126 "	rw_w_hold_found = 1;\n"
127 "}\n"
128 "plockstat$target:::rw-release\n"
129 "/self->rwhold[arg0]/\n"
130 "{\n"
131 "	@rw_r_hold[arg0, ustack(5)] = sum(timestamp - self->rwhold[arg0]);\n"
132 "	@rw_r_hold_count[arg0, ustack(5)] = count();\n"
133 "	self->rwhold[arg0] = 0;\n"
134 "	rw_r_hold_found = 1;\n"
135 "}\n"
136 "plockstat$target:::mutex-release\n"
137 "/mtxhold[arg0] && arg1 == 0/\n"
138 "{\n"
139 "	@mtx_hold[arg0, ustack(5)] = sum(timestamp - mtxhold[arg0]);\n"
140 "	@mtx_hold_count[arg0, ustack(5)] = count();\n"
141 "	mtxhold[arg0] = 0;\n"
142 "	mtx_hold_found = 1;\n"
143 "}\n"
144 "\n"
145 "END\n"
146 "/mtx_hold_found/\n"
147 "{\n"
148 "	trace(\"Mutex hold\");\n"
149 "	printa(@mtx_hold, @mtx_hold_count);\n"
150 "}\n"
151 "END\n"
152 "/rw_r_hold_found/\n"
153 "{\n"
154 "	trace(\"R/W reader hold\");\n"
155 "	printa(@rw_r_hold, @rw_r_hold_count);\n"
156 "}\n"
157 "END\n"
158 "/rw_w_hold_found/\n"
159 "{\n"
160 "	trace(\"R/W writer hold\");\n"
161 "	printa(@rw_w_hold, @rw_w_hold_count);\n"
162 "}\n";
163 
164 
165 /*
166  * For contention, we use thread-local associative arrays since we're tracing
167  * a single thread's activity in libc and multiple threads can be blocking or
168  * spinning on the same sychonization primitive.
169  */
170 static const char *g_ctnd_init =
171 "plockstat$target:::rw-block\n"
172 "{\n"
173 "	self->rwblock[arg0] = timestamp;\n"
174 "}\n"
175 "plockstat$target:::mutex-block\n"
176 "{\n"
177 "	self->mtxblock[arg0] = timestamp;\n"
178 "}\n"
179 "plockstat$target:::mutex-spin\n"
180 "{\n"
181 "	self->mtxspin[arg0] = timestamp;\n"
182 "}\n";
183 
184 static const char *g_ctnd_histogram =
185 "plockstat$target:::rw-blocked\n"
186 "/self->rwblock[arg0] && arg1 == 1 && arg2 != 0/\n"
187 "{\n"
188 "	@rw_w_block[arg0, ustack()] =\n"
189 "	    quantize(timestamp - self->rwblock[arg0]);\n"
190 "	self->rwblock[arg0] = 0;\n"
191 "	rw_w_block_found = 1;\n"
192 "}\n"
193 "plockstat$target:::rw-blocked\n"
194 "/self->rwblock[arg0] && arg2 != 0/\n"
195 "{\n"
196 "	@rw_r_block[arg0, ustack()] =\n"
197 "	    quantize(timestamp - self->rwblock[arg0]);\n"
198 "	self->rwblock[arg0] = 0;\n"
199 "	rw_r_block_found = 1;\n"
200 "}\n"
201 "plockstat$target:::rw-blocked\n"
202 "/self->rwblock[arg0]/\n"
203 "{\n"
204 "	self->rwblock[arg0] = 0;\n"
205 "}\n"
206 "plockstat$target:::mutex-spun\n"
207 "/self->mtxspin[arg0] && arg1 != 0/\n"
208 "{\n"
209 "	@mtx_spin[arg0, ustack()] =\n"
210 "	    quantize(timestamp - self->mtxspin[arg0]);\n"
211 "	self->mtxspin[arg0] = 0;\n"
212 "	mtx_spin_found = 1;\n"
213 "}\n"
214 "plockstat$target:::mutex-spun\n"
215 "/self->mtxspin[arg0]/\n"
216 "{\n"
217 "	@mtx_vain_spin[arg0, ustack()] =\n"
218 "	    quantize(timestamp - self->mtxspin[arg0]);\n"
219 "	self->mtxspin[arg0] = 0;\n"
220 "	mtx_vain_spin_found = 1;\n"
221 "}\n"
222 "plockstat$target:::mutex-blocked\n"
223 "/self->mtxblock[arg0] && arg1 != 0/\n"
224 "{\n"
225 "	@mtx_block[arg0, ustack()] =\n"
226 "	    quantize(timestamp - self->mtxblock[arg0]);\n"
227 "	self->mtxblock[arg0] = 0;\n"
228 "	mtx_block_found = 1;\n"
229 "}\n"
230 "plockstat$target:::mutex-blocked\n"
231 "/self->mtxblock[arg0]/\n"
232 "{\n"
233 "	self->mtxblock[arg0] = 0;\n"
234 "}\n"
235 "\n"
236 "END\n"
237 "/mtx_block_found/\n"
238 "{\n"
239 "	trace(\"Mutex block\");\n"
240 "	printa(@mtx_block);\n"
241 "}\n"
242 "END\n"
243 "/mtx_spin_found/\n"
244 "{\n"
245 "	trace(\"Mutex spin\");\n"
246 "	printa(@mtx_spin);\n"
247 "}\n"
248 "END\n"
249 "/mtx_vain_spin_found/\n"
250 "{\n"
251 "	trace(\"Mutex unsuccessful spin\");\n"
252 "	printa(@mtx_vain_spin);\n"
253 "}\n"
254 "END\n"
255 "/rw_r_block_found/\n"
256 "{\n"
257 "	trace(\"R/W reader block\");\n"
258 "	printa(@rw_r_block);\n"
259 "}\n"
260 "END\n"
261 "/rw_w_block_found/\n"
262 "{\n"
263 "	trace(\"R/W writer block\");\n"
264 "	printa(@rw_w_block);\n"
265 "}\n";
266 
267 
268 static const char *g_ctnd_times =
269 "plockstat$target:::rw-blocked\n"
270 "/self->rwblock[arg0] && arg1 == 1 && arg2 != 0/\n"
271 "{\n"
272 "	@rw_w_block[arg0, ustack(5)] =\n"
273 "	    sum(timestamp - self->rwblock[arg0]);\n"
274 "	@rw_w_block_count[arg0, ustack(5)] = count();\n"
275 "	self->rwblock[arg0] = 0;\n"
276 "	rw_w_block_found = 1;\n"
277 "}\n"
278 "plockstat$target:::rw-blocked\n"
279 "/self->rwblock[arg0] && arg2 != 0/\n"
280 "{\n"
281 "	@rw_r_block[arg0, ustack(5)] =\n"
282 "	    sum(timestamp - self->rwblock[arg0]);\n"
283 "	@rw_r_block_count[arg0, ustack(5)] = count();\n"
284 "	self->rwblock[arg0] = 0;\n"
285 "	rw_r_block_found = 1;\n"
286 "}\n"
287 "plockstat$target:::rw-blocked\n"
288 "/self->rwblock[arg0]/\n"
289 "{\n"
290 "	self->rwblock[arg0] = 0;\n"
291 "}\n"
292 "plockstat$target:::mutex-spun\n"
293 "/self->mtxspin[arg0] && arg1 != 0/\n"
294 "{\n"
295 "	@mtx_spin[arg0, ustack(5)] =\n"
296 "	    sum(timestamp - self->mtxspin[arg0]);\n"
297 "	@mtx_spin_count[arg0, ustack(5)] = count();\n"
298 "	self->mtxspin[arg0] = 0;\n"
299 "	mtx_spin_found = 1;\n"
300 "}\n"
301 "plockstat$target:::mutex-spun\n"
302 "/self->mtxspin[arg0]/\n"
303 "{\n"
304 "	@mtx_vain_spin[arg0, ustack(5)] =\n"
305 "	    sum(timestamp - self->mtxspin[arg0]);\n"
306 "	@mtx_vain_spin_count[arg0, ustack(5)] = count();\n"
307 "	self->mtxspin[arg0] = 0;\n"
308 "	mtx_vain_spin_found = 1;\n"
309 "}\n"
310 "plockstat$target:::mutex-blocked\n"
311 "/self->mtxblock[arg0] && arg1 != 0/\n"
312 "{\n"
313 "	@mtx_block[arg0, ustack(5)] =\n"
314 "	    sum(timestamp - self->mtxblock[arg0]);\n"
315 "	@mtx_block_count[arg0, ustack(5)] = count();\n"
316 "	self->mtxblock[arg0] = 0;\n"
317 "	mtx_block_found = 1;\n"
318 "}\n"
319 "plockstat$target:::mutex-blocked\n"
320 "/self->mtxblock[arg0]/\n"
321 "{\n"
322 "	self->mtxblock[arg0] = 0;\n"
323 "}\n"
324 "\n"
325 "END\n"
326 "/mtx_block_found/\n"
327 "{\n"
328 "	trace(\"Mutex block\");\n"
329 "	printa(@mtx_block, @mtx_block_count);\n"
330 "}\n"
331 "END\n"
332 "/mtx_spin_found/\n"
333 "{\n"
334 "	trace(\"Mutex spin\");\n"
335 "	printa(@mtx_spin, @mtx_spin_count);\n"
336 "}\n"
337 "END\n"
338 "/mtx_vain_spin_found/\n"
339 "{\n"
340 "	trace(\"Mutex unsuccessful spin\");\n"
341 "	printa(@mtx_vain_spin, @mtx_vain_spin_count);\n"
342 "}\n"
343 "END\n"
344 "/rw_r_block_found/\n"
345 "{\n"
346 "	trace(\"R/W reader block\");\n"
347 "	printa(@rw_r_block, @rw_r_block_count);\n"
348 "}\n"
349 "END\n"
350 "/rw_w_block_found/\n"
351 "{\n"
352 "	trace(\"R/W writer block\");\n"
353 "	printa(@rw_w_block, @rw_w_block_count);\n"
354 "}\n";
355 
356 static char g_prog[4096];
357 static size_t g_proglen;
358 static int g_opt_V, g_opt_s;
359 static int g_intr;
360 static int g_exited;
361 static dtrace_optval_t g_nframes;
362 static ulong_t g_nent = ULONG_MAX;
363 
364 #define	PLOCKSTAT_OPTSTR	"n:ps:e:vx:ACHV"
365 
366 static void
367 usage(void)
368 {
369 	(void) fprintf(stderr, "Usage:\n"
370 	    "\t%s [-vACHV] [-n count] [-s depth] [-e secs] [-x opt[=val]]\n"
371 	    "\t    command [arg...]\n"
372 	    "\t%s [-vACHV] [-n count] [-s depth] [-e secs] [-x opt[=val]]\n"
373 	    "\t    -p pid\n", g_pname, g_pname);
374 
375 	exit(E_USAGE);
376 }
377 
378 static void
379 verror(const char *fmt, va_list ap)
380 {
381 	int error = errno;
382 
383 	(void) fprintf(stderr, "%s: ", g_pname);
384 	(void) vfprintf(stderr, fmt, ap);
385 
386 	if (fmt[strlen(fmt) - 1] != '\n')
387 		(void) fprintf(stderr, ": %s\n", strerror(error));
388 }
389 
390 /*PRINTFLIKE1*/
391 static void
392 fatal(const char *fmt, ...)
393 {
394 	va_list ap;
395 
396 	va_start(ap, fmt);
397 	verror(fmt, ap);
398 	va_end(ap);
399 
400 	if (g_pr != NULL && g_dtp != NULL)
401 		dtrace_proc_release(g_dtp, g_pr);
402 
403 	exit(E_ERROR);
404 }
405 
406 /*PRINTFLIKE1*/
407 static void
408 dfatal(const char *fmt, ...)
409 {
410 	va_list ap;
411 
412 	va_start(ap, fmt);
413 
414 	(void) fprintf(stderr, "%s: ", g_pname);
415 	if (fmt != NULL)
416 		(void) vfprintf(stderr, fmt, ap);
417 
418 	va_end(ap);
419 
420 	if (fmt != NULL && fmt[strlen(fmt) - 1] != '\n') {
421 		(void) fprintf(stderr, ": %s\n",
422 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
423 	} else if (fmt == NULL) {
424 		(void) fprintf(stderr, "%s\n",
425 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
426 	}
427 
428 	if (g_pr != NULL) {
429 		dtrace_proc_continue(g_dtp, g_pr);
430 		dtrace_proc_release(g_dtp, g_pr);
431 	}
432 
433 	exit(E_ERROR);
434 }
435 
436 /*PRINTFLIKE1*/
437 static void
438 notice(const char *fmt, ...)
439 {
440 	va_list ap;
441 
442 	va_start(ap, fmt);
443 	verror(fmt, ap);
444 	va_end(ap);
445 }
446 
447 static void
448 dprog_add(const char *prog)
449 {
450 	size_t len = strlen(prog);
451 	bcopy(prog, g_prog + g_proglen, len + 1);
452 	g_proglen += len;
453 	assert(g_proglen < sizeof (g_prog));
454 }
455 
456 static void
457 dprog_compile(void)
458 {
459 	dtrace_prog_t *prog;
460 	dtrace_proginfo_t info;
461 
462 	if (g_opt_V) {
463 		(void) fprintf(stderr, "%s: vvvv D program vvvv\n", g_pname);
464 		(void) fputs(g_prog, stderr);
465 		(void) fprintf(stderr, "%s: ^^^^ D program ^^^^\n", g_pname);
466 	}
467 
468 	if ((prog = dtrace_program_strcompile(g_dtp, g_prog,
469 	    DTRACE_PROBESPEC_NAME, 0, 0, NULL)) == NULL)
470 		dfatal("failed to compile program");
471 
472 	if (dtrace_program_exec(g_dtp, prog, &info) == -1)
473 		dfatal("failed to enable probes");
474 }
475 
476 void
477 print_legend(void)
478 {
479 	(void) printf("%5s %8s %-28s %s\n", "Count", "nsec", "Lock", "Caller");
480 }
481 
482 void
483 print_bar(void)
484 {
485 	(void) printf("---------------------------------------"
486 	    "----------------------------------------\n");
487 }
488 
489 void
490 print_histogram_header(void)
491 {
492 	(void) printf("\n%10s ---- Time Distribution --- %5s %s\n",
493 	    "nsec", "count", "Stack");
494 }
495 
496 /*
497  * Convert an address to a symbolic string or a numeric string. If nolocks
498  * is set, we return an error code if this symbol appears to be a mutex- or
499  * rwlock-related symbol in libc so the caller has a chance to find a more
500  * helpful symbol.
501  */
502 static int
503 getsym(struct ps_prochandle *P, uintptr_t addr, char *buf, size_t size,
504     int nolocks)
505 {
506 	char name[256];
507 	GElf_Sym sym;
508 #ifdef illumos
509 	prsyminfo_t info;
510 #else
511 	prmap_t *map;
512 	int info; /* XXX unused */
513 #endif
514 	size_t len;
515 
516 	if (P == NULL || Pxlookup_by_addr(P, addr, name, sizeof (name),
517 	    &sym, &info) != 0) {
518 		(void) snprintf(buf, size, "%#lx", (unsigned long)addr);
519 		return (0);
520 	}
521 #ifdef illumos
522 	if (info.prs_object == NULL)
523 		info.prs_object = "<unknown>";
524 
525 	if (info.prs_lmid != LM_ID_BASE) {
526 		len = snprintf(buf, size, "LM%lu`", info.prs_lmid);
527 		buf += len;
528 		size -= len;
529 	}
530 
531 	len = snprintf(buf, size, "%s`%s", info.prs_object, info.prs_name);
532 #else
533 	map = proc_addr2map(P, addr);
534 	len = snprintf(buf, size, "%s`%s", map->pr_mapname, name);
535 #endif
536 	buf += len;
537 	size -= len;
538 
539 	if (sym.st_value != addr)
540 		len = snprintf(buf, size, "+%#lx", (unsigned long)(addr - sym.st_value));
541 
542 	if (nolocks && strcmp("libc.so.1", map->pr_mapname) == 0 &&
543 	    (strstr("mutex", name) == 0 ||
544 	    strstr("rw", name) == 0))
545 		return (-1);
546 
547 	return (0);
548 }
549 
550 /*ARGSUSED*/
551 static int
552 process_aggregate(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
553 {
554 	const dtrace_recdesc_t *rec;
555 	uintptr_t lock;
556 	uint64_t *stack;
557 	caddr_t data;
558 	pid_t pid;
559 	struct ps_prochandle *P;
560 	char buf[256];
561 	int i, j;
562 	uint64_t sum, count, avg;
563 
564 	if ((*(uint_t *)arg)++ >= g_nent)
565 		return (DTRACE_AGGWALK_NEXT);
566 
567 	rec = aggsdata[0]->dtada_desc->dtagd_rec;
568 	data = aggsdata[0]->dtada_data;
569 
570 	/*LINTED - alignment*/
571 	lock = (uintptr_t)*(uint64_t *)(data + rec[1].dtrd_offset);
572 	/*LINTED - alignment*/
573 	stack = (uint64_t *)(data + rec[2].dtrd_offset);
574 
575 	if (!g_opt_s) {
576 		/*LINTED - alignment*/
577 		sum = *(uint64_t *)(aggsdata[1]->dtada_data +
578 		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
579 		/*LINTED - alignment*/
580 		count = *(uint64_t *)(aggsdata[2]->dtada_data +
581 		    aggsdata[2]->dtada_desc->dtagd_rec[3].dtrd_offset);
582 	} else {
583 		uint64_t *a;
584 
585 		/*LINTED - alignment*/
586 		a = (uint64_t *)(aggsdata[1]->dtada_data +
587 		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
588 
589 		print_bar();
590 		print_legend();
591 
592 		for (count = sum = 0, i = DTRACE_QUANTIZE_ZEROBUCKET, j = 0;
593 		    i < DTRACE_QUANTIZE_NBUCKETS; i++, j++) {
594 			count += a[i];
595 			sum += a[i] << (j - 64);
596 		}
597 	}
598 
599 	avg = sum / count;
600 	(void) printf("%5llu %8llu ", (u_longlong_t)count, (u_longlong_t)avg);
601 
602 	pid = stack[0];
603 	P = dtrace_proc_grab(g_dtp, pid, PGRAB_RDONLY);
604 
605 	(void) getsym(P, lock, buf, sizeof (buf), 0);
606 	(void) printf("%-28s ", buf);
607 
608 	for (i = 2; i <= 5; i++) {
609 		if (getsym(P, stack[i], buf, sizeof (buf), 1) == 0)
610 			break;
611 	}
612 	(void) printf("%s\n", buf);
613 
614 	if (g_opt_s) {
615 		int stack_done = 0;
616 		int quant_done = 0;
617 		int first_bin, last_bin;
618 		uint64_t bin_size, *a;
619 
620 		/*LINTED - alignment*/
621 		a = (uint64_t *)(aggsdata[1]->dtada_data +
622 		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
623 
624 		print_histogram_header();
625 
626 		for (first_bin = DTRACE_QUANTIZE_ZEROBUCKET;
627 		    a[first_bin] == 0; first_bin++)
628 			continue;
629 		for (last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 63;
630 		    a[last_bin] == 0; last_bin--)
631 			continue;
632 
633 		for (i = 0; !stack_done || !quant_done; i++) {
634 			if (!stack_done) {
635 				(void) getsym(P, stack[i + 2], buf,
636 				    sizeof (buf), 0);
637 			} else {
638 				buf[0] = '\0';
639 			}
640 
641 			if (!quant_done) {
642 				bin_size = a[first_bin];
643 
644 				(void) printf("%10llu |%-24.*s| %5llu %s\n",
645 				    1ULL <<
646 				    (first_bin - DTRACE_QUANTIZE_ZEROBUCKET),
647 				    (int)(24.0 * bin_size / count),
648 				    "@@@@@@@@@@@@@@@@@@@@@@@@@@",
649 				    (u_longlong_t)bin_size, buf);
650 			} else {
651 				(void) printf("%43s %s\n", "", buf);
652 			}
653 
654 			if (i + 1 >= g_nframes || stack[i + 3] == 0)
655 				stack_done = 1;
656 
657 			if (first_bin++ == last_bin)
658 				quant_done = 1;
659 		}
660 	}
661 
662 	dtrace_proc_release(g_dtp, P);
663 
664 	return (DTRACE_AGGWALK_NEXT);
665 }
666 
667 /*ARGSUSED*/
668 static void
669 prochandler(struct ps_prochandle *P, const char *msg, void *arg)
670 {
671 #ifdef illumos
672 	const psinfo_t *prp = Ppsinfo(P);
673 	int pid = Pstatus(P)->pr_pid;
674 #else
675 	int pid = proc_getpid(P);
676 	int wstat = proc_getwstat(P);
677 #endif
678 	char name[SIG2STR_MAX];
679 
680 	if (msg != NULL) {
681 		notice("pid %d: %s\n", pid, msg);
682 		return;
683 	}
684 
685 	switch (Pstate(P)) {
686 	case PS_UNDEAD:
687 		/*
688 		 * Ideally we would like to always report pr_wstat here, but it
689 		 * isn't possible given current /proc semantics.  If we grabbed
690 		 * the process, Ppsinfo() will either fail or return a zeroed
691 		 * psinfo_t depending on how far the parent is in reaping it.
692 		 * When /proc provides a stable pr_wstat in the status file,
693 		 * this code can be improved by examining this new pr_wstat.
694 		 */
695 		if (WIFSIGNALED(wstat)) {
696 			notice("pid %d terminated by %s\n", pid,
697 			    proc_signame(WTERMSIG(wstat),
698 			    name, sizeof (name)));
699 		} else if (WEXITSTATUS(wstat) != 0) {
700 			notice("pid %d exited with status %d\n",
701 			    pid, WEXITSTATUS(wstat));
702 		} else {
703 			notice("pid %d has exited\n", pid);
704 		}
705 		g_exited = 1;
706 		break;
707 
708 	case PS_LOST:
709 		notice("pid %d exec'd a set-id or unobservable program\n", pid);
710 		g_exited = 1;
711 		break;
712 	}
713 }
714 
715 /*ARGSUSED*/
716 static int
717 chewrec(const dtrace_probedata_t *data, const dtrace_recdesc_t *rec, void *arg)
718 {
719 	dtrace_eprobedesc_t *epd = data->dtpda_edesc;
720 	dtrace_aggvarid_t aggvars[2];
721 	const void *buf;
722 	int i, nagv;
723 
724 	/*
725 	 * A NULL rec indicates that we've processed the last record.
726 	 */
727 	if (rec == NULL)
728 		return (DTRACE_CONSUME_NEXT);
729 
730 	buf = data->dtpda_data - rec->dtrd_offset;
731 
732 	switch (rec->dtrd_action) {
733 	case DTRACEACT_DIFEXPR:
734 		(void) printf("\n%s\n\n", (char *)buf + rec->dtrd_offset);
735 		if (!g_opt_s) {
736 			print_legend();
737 			print_bar();
738 		}
739 		return (DTRACE_CONSUME_NEXT);
740 
741 	case DTRACEACT_PRINTA:
742 		for (nagv = 0, i = 0; i < epd->dtepd_nrecs - 1; i++) {
743 			const dtrace_recdesc_t *nrec = &rec[i];
744 
745 			if (nrec->dtrd_uarg != rec->dtrd_uarg)
746 				break;
747 
748 			/*LINTED - alignment*/
749 			aggvars[nagv++] = *(dtrace_aggvarid_t *)((caddr_t)buf +
750 			    nrec->dtrd_offset);
751 		}
752 
753 		if (nagv == (g_opt_s ? 1 : 2)) {
754 			uint_t nent = 0;
755 			if (dtrace_aggregate_walk_joined(g_dtp, aggvars, nagv,
756 			    process_aggregate, &nent) != 0)
757 				dfatal("failed to walk aggregate");
758 		}
759 
760 		return (DTRACE_CONSUME_NEXT);
761 	}
762 
763 	return (DTRACE_CONSUME_THIS);
764 }
765 
766 /*ARGSUSED*/
767 static void
768 intr(int signo)
769 {
770 	g_intr = 1;
771 }
772 
773 int
774 main(int argc, char **argv)
775 {
776 #ifdef illumos
777 	ucred_t *ucp;
778 #endif
779 	int err;
780 	int opt_C = 0, opt_H = 0, opt_p = 0, opt_v = 0;
781 	int c;
782 	char *p, *end;
783 	struct sigaction act;
784 	int done = 0;
785 
786 	g_pname = basename(argv[0]);
787 	argv[0] = g_pname; /* rewrite argv[0] for getopt errors */
788 #ifdef illumos
789 	/*
790 	 * Make sure we have the required dtrace_proc privilege.
791 	 */
792 	if ((ucp = ucred_get(getpid())) != NULL) {
793 		const priv_set_t *psp;
794 		if ((psp = ucred_getprivset(ucp, PRIV_EFFECTIVE)) != NULL &&
795 		    !priv_ismember(psp, PRIV_DTRACE_PROC)) {
796 			fatal("dtrace_proc privilege required\n");
797 		}
798 
799 		ucred_free(ucp);
800 	}
801 #endif
802 
803 	while ((c = getopt(argc, argv, PLOCKSTAT_OPTSTR)) != EOF) {
804 		switch (c) {
805 		case 'n':
806 			errno = 0;
807 			g_nent = strtoul(optarg, &end, 10);
808 			if (*end != '\0' || errno != 0) {
809 				(void) fprintf(stderr, "%s: invalid count "
810 				    "'%s'\n", g_pname, optarg);
811 				usage();
812 			}
813 			break;
814 
815 		case 'p':
816 			opt_p = 1;
817 			break;
818 
819 		case 'v':
820 			opt_v = 1;
821 			break;
822 
823 		case 'A':
824 			opt_C = opt_H = 1;
825 			break;
826 
827 		case 'C':
828 			opt_C = 1;
829 			break;
830 
831 		case 'H':
832 			opt_H = 1;
833 			break;
834 
835 		case 'V':
836 			g_opt_V = 1;
837 			break;
838 
839 		default:
840 			if (strchr(PLOCKSTAT_OPTSTR, c) == NULL)
841 				usage();
842 		}
843 	}
844 
845 	/*
846 	 * We need a command or at least one pid.
847 	 */
848 	if (argc == optind)
849 		usage();
850 
851 	if (opt_C == 0 && opt_H == 0)
852 		opt_C = 1;
853 
854 	if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL)
855 		fatal("failed to initialize dtrace: %s\n",
856 		    dtrace_errmsg(NULL, err));
857 
858 	/*
859 	 * The longest string we trace is 23 bytes long -- so 32 is plenty.
860 	 */
861 	if (dtrace_setopt(g_dtp, "strsize", "32") == -1)
862 		dfatal("failed to set 'strsize'");
863 
864 	/*
865 	 * 1k should be more than enough for all trace() and printa() actions.
866 	 */
867 	if (dtrace_setopt(g_dtp, "bufsize", "1k") == -1)
868 		dfatal("failed to set 'bufsize'");
869 
870 	/*
871 	 * The table we produce has the hottest locks at the top.
872 	 */
873 	if (dtrace_setopt(g_dtp, "aggsortrev", NULL) == -1)
874 		dfatal("failed to set 'aggsortrev'");
875 
876 	/*
877 	 * These are two reasonable defaults which should suffice.
878 	 */
879 	if (dtrace_setopt(g_dtp, "aggsize", "256k") == -1)
880 		dfatal("failed to set 'aggsize'");
881 	if (dtrace_setopt(g_dtp, "aggrate", "1sec") == -1)
882 		dfatal("failed to set 'aggrate'");
883 
884 	/*
885 	 * Take a second pass through to look for options that set options now
886 	 * that we have an open dtrace handle.
887 	 */
888 	optind = 1;
889 	while ((c = getopt(argc, argv, PLOCKSTAT_OPTSTR)) != EOF) {
890 		switch (c) {
891 		case 's':
892 			g_opt_s = 1;
893 			if (dtrace_setopt(g_dtp, "ustackframes", optarg) == -1)
894 				dfatal("failed to set 'ustackframes'");
895 			break;
896 
897 		case 'x':
898 			if ((p = strchr(optarg, '=')) != NULL)
899 				*p++ = '\0';
900 
901 			if (dtrace_setopt(g_dtp, optarg, p) != 0)
902 				dfatal("failed to set -x %s", optarg);
903 			break;
904 
905 		case 'e':
906 			errno = 0;
907 			(void) strtoul(optarg, &end, 10);
908 			if (*optarg == '-' || *end != '\0' || errno != 0) {
909 				(void) fprintf(stderr, "%s: invalid timeout "
910 				    "'%s'\n", g_pname, optarg);
911 				usage();
912 			}
913 
914 			/*
915 			 * Construct a DTrace enabling that will exit after
916 			 * the specified number of seconds.
917 			 */
918 			dprog_add("BEGIN\n{\n\tend = timestamp + ");
919 			dprog_add(optarg);
920 			dprog_add(" * 1000000000;\n}\n");
921 			dprog_add("tick-10hz\n/timestamp >= end/\n");
922 			dprog_add("{\n\texit(0);\n}\n");
923 			break;
924 		}
925 	}
926 
927 	argc -= optind;
928 	argv += optind;
929 
930 	if (opt_H) {
931 		dprog_add(g_hold_init);
932 		if (!g_opt_s)
933 			dprog_add(g_hold_times);
934 		else
935 			dprog_add(g_hold_histogram);
936 	}
937 
938 	if (opt_C) {
939 		dprog_add(g_ctnd_init);
940 		if (!g_opt_s)
941 			dprog_add(g_ctnd_times);
942 		else
943 			dprog_add(g_ctnd_histogram);
944 	}
945 
946 	if (opt_p) {
947 		ulong_t pid;
948 
949 		if (argc > 1) {
950 			(void) fprintf(stderr, "%s: only one pid is allowed\n",
951 			    g_pname);
952 			usage();
953 		}
954 
955 		errno = 0;
956 		pid = strtoul(argv[0], &end, 10);
957 		if (*end != '\0' || errno != 0 || (pid_t)pid != pid) {
958 			(void) fprintf(stderr, "%s: invalid pid '%s'\n",
959 			    g_pname, argv[0]);
960 			usage();
961 		}
962 
963 		if ((g_pr = dtrace_proc_grab(g_dtp, (pid_t)pid, 0)) == NULL)
964 			dfatal(NULL);
965 	} else {
966 		if ((g_pr = dtrace_proc_create(g_dtp, argv[0], argv, NULL, NULL)) == NULL)
967 			dfatal(NULL);
968 	}
969 
970 	dprog_compile();
971 
972 	if (dtrace_handle_proc(g_dtp, &prochandler, NULL) == -1)
973 		dfatal("failed to establish proc handler");
974 
975 	(void) sigemptyset(&act.sa_mask);
976 	act.sa_flags = 0;
977 	act.sa_handler = intr;
978 	(void) sigaction(SIGINT, &act, NULL);
979 	(void) sigaction(SIGTERM, &act, NULL);
980 
981 	if (dtrace_go(g_dtp) != 0)
982 		dfatal("dtrace_go()");
983 
984 	if (dtrace_getopt(g_dtp, "ustackframes", &g_nframes) != 0)
985 		dfatal("failed to get 'ustackframes'");
986 
987 	dtrace_proc_continue(g_dtp, g_pr);
988 
989 	if (opt_v)
990 		(void) printf("%s: tracing enabled for pid %d\n", g_pname,
991 #ifdef illumos
992 		    (int)Pstatus(g_pr)->pr_pid);
993 #else
994 		    (int)proc_getpid(g_pr));
995 #endif
996 
997 	do {
998 		if (!g_intr && !done)
999 			dtrace_sleep(g_dtp);
1000 
1001 		if (done || g_intr || g_exited) {
1002 			done = 1;
1003 			if (dtrace_stop(g_dtp) == -1)
1004 				dfatal("couldn't stop tracing");
1005 		}
1006 
1007 		switch (dtrace_work(g_dtp, stdout, NULL, chewrec, NULL)) {
1008 		case DTRACE_WORKSTATUS_DONE:
1009 			done = 1;
1010 			break;
1011 		case DTRACE_WORKSTATUS_OKAY:
1012 			break;
1013 		default:
1014 			dfatal("processing aborted");
1015 		}
1016 
1017 	} while (!done);
1018 
1019 	dtrace_close(g_dtp);
1020 
1021 	return (0);
1022 }
1023