xref: /freebsd/cddl/contrib/opensolaris/cmd/lockstat/lockstat.c (revision ab875b713d0afab5f0650f06d48a38777a7ae43e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <stdio.h>
29 #include <stddef.h>
30 #include <stdlib.h>
31 #include <stdarg.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <ctype.h>
35 #include <fcntl.h>
36 #include <unistd.h>
37 #include <errno.h>
38 #include <limits.h>
39 #include <sys/types.h>
40 #include <sys/modctl.h>
41 #include <sys/stat.h>
42 #include <sys/wait.h>
43 #include <dtrace.h>
44 #include <sys/lockstat.h>
45 #include <alloca.h>
46 #include <signal.h>
47 #include <assert.h>
48 
49 #ifdef illumos
50 #define	GETOPT_EOF	EOF
51 #else
52 #include <sys/time.h>
53 #include <sys/resource.h>
54 
55 #define	mergesort(a, b, c, d)	lsmergesort(a, b, c, d)
56 #define	GETOPT_EOF		(-1)
57 
58 typedef	uintptr_t	pc_t;
59 #endif
60 
61 #define	LOCKSTAT_OPTSTR	"x:bths:n:d:i:l:f:e:ckwWgCHEATID:RpPo:V"
62 
63 #define	LS_MAX_STACK_DEPTH	50
64 #define	LS_MAX_EVENTS		64
65 
66 typedef struct lsrec {
67 	struct lsrec	*ls_next;	/* next in hash chain */
68 	uintptr_t	ls_lock;	/* lock address */
69 	uintptr_t	ls_caller;	/* caller address */
70 	uint32_t	ls_count;	/* cumulative event count */
71 	uint32_t	ls_event;	/* type of event */
72 	uintptr_t	ls_refcnt;	/* cumulative reference count */
73 	uint64_t	ls_time;	/* cumulative event duration */
74 	uint32_t	ls_hist[64];	/* log2(duration) histogram */
75 	uintptr_t	ls_stack[LS_MAX_STACK_DEPTH];
76 } lsrec_t;
77 
78 typedef struct lsdata {
79 	struct lsrec	*lsd_next;	/* next available */
80 	int		lsd_count;	/* number of records */
81 } lsdata_t;
82 
83 /*
84  * Definitions for the types of experiments which can be run.  They are
85  * listed in increasing order of memory cost and processing time cost.
86  * The numerical value of each type is the number of bytes needed per record.
87  */
88 #define	LS_BASIC	offsetof(lsrec_t, ls_time)
89 #define	LS_TIME		offsetof(lsrec_t, ls_hist[0])
90 #define	LS_HIST		offsetof(lsrec_t, ls_stack[0])
91 #define	LS_STACK(depth)	offsetof(lsrec_t, ls_stack[depth])
92 
93 static void report_stats(FILE *, lsrec_t **, size_t, uint64_t, uint64_t);
94 static void report_trace(FILE *, lsrec_t **);
95 
96 extern int symtab_init(void);
97 extern char *addr_to_sym(uintptr_t, uintptr_t *, size_t *);
98 extern uintptr_t sym_to_addr(char *name);
99 extern size_t sym_size(char *name);
100 extern char *strtok_r(char *, const char *, char **);
101 
102 #define	DEFAULT_NRECS	10000
103 #define	DEFAULT_HZ	97
104 #define	MAX_HZ		1000
105 #define	MIN_AGGSIZE	(16 * 1024)
106 #define	MAX_AGGSIZE	(32 * 1024 * 1024)
107 
108 static int g_stkdepth;
109 static int g_topn = INT_MAX;
110 static hrtime_t g_elapsed;
111 static int g_rates = 0;
112 static int g_pflag = 0;
113 static int g_Pflag = 0;
114 static int g_wflag = 0;
115 static int g_Wflag = 0;
116 static int g_cflag = 0;
117 static int g_kflag = 0;
118 static int g_gflag = 0;
119 static int g_Vflag = 0;
120 static int g_tracing = 0;
121 static size_t g_recsize;
122 static size_t g_nrecs;
123 static int g_nrecs_used;
124 static uchar_t g_enabled[LS_MAX_EVENTS];
125 static hrtime_t g_min_duration[LS_MAX_EVENTS];
126 static dtrace_hdl_t *g_dtp;
127 static char *g_predicate;
128 static char *g_ipredicate;
129 static char *g_prog;
130 static int g_proglen;
131 static int g_dropped;
132 
133 typedef struct ls_event_info {
134 	char	ev_type;
135 	char	ev_lhdr[20];
136 	char	ev_desc[80];
137 	char	ev_units[10];
138 	char	ev_name[DTRACE_NAMELEN];
139 	char	*ev_predicate;
140 	char	*ev_acquire;
141 } ls_event_info_t;
142 
143 static ls_event_info_t g_event_info[LS_MAX_EVENTS] = {
144 	{ 'C',	"Lock",	"Adaptive mutex spin",			"nsec",
145 	    "lockstat:::adaptive-spin" },
146 	{ 'C',	"Lock",	"Adaptive mutex block",			"nsec",
147 	    "lockstat:::adaptive-block" },
148 	{ 'C',	"Lock",	"Spin lock spin",			"nsec",
149 	    "lockstat:::spin-spin" },
150 	{ 'C',	"Lock",	"Thread lock spin",			"nsec",
151 	    "lockstat:::thread-spin" },
152 	{ 'C',	"Lock",	"R/W writer blocked by writer",		"nsec",
153 	    "lockstat:::rw-block", "arg2 == 0 && arg3 == 1" },
154 	{ 'C',	"Lock",	"R/W writer blocked by readers",	"nsec",
155 	    "lockstat:::rw-block", "arg2 == 0 && arg3 == 0 && arg4" },
156 	{ 'C',	"Lock",	"R/W reader blocked by writer",		"nsec",
157 	    "lockstat:::rw-block", "arg2 != 0 && arg3 == 1" },
158 	{ 'C',	"Lock",	"R/W reader blocked by write wanted",	"nsec",
159 	    "lockstat:::rw-block", "arg2 != 0 && arg3 == 0 && arg4" },
160 	{ 'C',	"Lock",	"R/W writer spin on writer",		"nsec",
161 	    "lockstat:::rw-spin", "arg2 == 0 && arg3 == 1" },
162 	{ 'C',	"Lock",	"R/W writer spin on readers",		"nsec",
163 	    "lockstat:::rw-spin", "arg2 == 0 && arg3 == 0 && arg4" },
164 	{ 'C',	"Lock",	"R/W reader spin on writer",		"nsec",
165 	    "lockstat:::rw-spin", "arg2 != 0 && arg3 == 1" },
166 	{ 'C',	"Lock",	"R/W reader spin on write wanted",	"nsec",
167 	    "lockstat:::rw-spin", "arg2 != 0 && arg3 == 0 && arg4" },
168 	{ 'C',	"Lock",	"SX exclusive block",			"nsec",
169 	    "lockstat:::sx-block", "arg2 == 0" },
170 	{ 'C',	"Lock",	"SX shared block",			"nsec",
171 	    "lockstat:::sx-block", "arg2 != 0" },
172 	{ 'C',	"Lock",	"SX exclusive spin",			"nsec",
173 	    "lockstat:::sx-spin", "arg2 == 0" },
174 	{ 'C',	"Lock",	"SX shared spin",			"nsec",
175 	    "lockstat:::sx-spin", "arg2 != 0" },
176 	{ 'C',	"Lock",	"Unknown event (type 16)",		"units"	},
177 	{ 'C',	"Lock",	"Unknown event (type 17)",		"units"	},
178 	{ 'C',	"Lock",	"Unknown event (type 18)",		"units"	},
179 	{ 'C',	"Lock",	"Unknown event (type 19)",		"units"	},
180 	{ 'C',	"Lock",	"Unknown event (type 20)",		"units"	},
181 	{ 'C',	"Lock",	"Unknown event (type 21)",		"units"	},
182 	{ 'C',	"Lock",	"Unknown event (type 22)",		"units"	},
183 	{ 'C',	"Lock",	"Unknown event (type 23)",		"units"	},
184 	{ 'C',	"Lock",	"Unknown event (type 24)",		"units"	},
185 	{ 'C',	"Lock",	"Unknown event (type 25)",		"units"	},
186 	{ 'C',	"Lock",	"Unknown event (type 26)",		"units"	},
187 	{ 'C',	"Lock",	"Unknown event (type 27)",		"units"	},
188 	{ 'C',	"Lock",	"Unknown event (type 28)",		"units"	},
189 	{ 'C',	"Lock",	"Unknown event (type 29)",		"units"	},
190 	{ 'C',	"Lock",	"Unknown event (type 30)",		"units"	},
191 	{ 'C',	"Lock",	"Unknown event (type 31)",		"units"	},
192 	{ 'H',	"Lock",	"Adaptive mutex hold",			"nsec",
193 	    "lockstat:::adaptive-release", NULL,
194 	    "lockstat:::adaptive-acquire" },
195 	{ 'H',	"Lock",	"Spin lock hold",			"nsec",
196 	    "lockstat:::spin-release", NULL,
197 	    "lockstat:::spin-acquire" },
198 	{ 'H',	"Lock",	"R/W writer hold",			"nsec",
199 	    "lockstat:::rw-release", "arg1 == 0",
200 	    "lockstat:::rw-acquire" },
201 	{ 'H',	"Lock",	"R/W reader hold",			"nsec",
202 	    "lockstat:::rw-release", "arg1 == 1",
203 	    "lockstat:::rw-acquire" },
204 	{ 'H',	"Lock",	"SX shared hold",			"nsec",
205 	    "lockstat:::sx-release", "arg1 == 0",
206 	    "lockstat:::sx-acquire" },
207 	{ 'H',	"Lock",	"SX exclusive hold",			"nsec",
208 	    "lockstat:::sx-release", "arg1 == 1",
209 	    "lockstat:::sx-acquire" },
210 	{ 'H',	"Lock",	"Unknown event (type 38)",		"units"	},
211 	{ 'H',	"Lock",	"Unknown event (type 39)",		"units"	},
212 	{ 'H',	"Lock",	"Unknown event (type 40)",		"units"	},
213 	{ 'H',	"Lock",	"Unknown event (type 41)",		"units"	},
214 	{ 'H',	"Lock",	"Unknown event (type 42)",		"units"	},
215 	{ 'H',	"Lock",	"Unknown event (type 43)",		"units"	},
216 	{ 'H',	"Lock",	"Unknown event (type 44)",		"units"	},
217 	{ 'H',	"Lock",	"Unknown event (type 45)",		"units"	},
218 	{ 'H',	"Lock",	"Unknown event (type 46)",		"units"	},
219 	{ 'H',	"Lock",	"Unknown event (type 47)",		"units"	},
220 	{ 'H',	"Lock",	"Unknown event (type 48)",		"units"	},
221 	{ 'H',	"Lock",	"Unknown event (type 49)",		"units"	},
222 	{ 'H',	"Lock",	"Unknown event (type 50)",		"units"	},
223 	{ 'H',	"Lock",	"Unknown event (type 51)",		"units"	},
224 	{ 'H',	"Lock",	"Unknown event (type 52)",		"units"	},
225 	{ 'H',	"Lock",	"Unknown event (type 53)",		"units"	},
226 	{ 'H',	"Lock",	"Unknown event (type 54)",		"units"	},
227 	{ 'H',	"Lock",	"Unknown event (type 55)",		"units"	},
228 #ifdef illumos
229 	{ 'I',	"CPU+PIL", "Profiling interrupt",		"nsec",
230 #else
231 	{ 'I',	"CPU+Pri_Class", "Profiling interrupt",		"nsec",
232 #endif
233 	    "profile:::profile-97", NULL },
234 	{ 'I',	"Lock",	"Unknown event (type 57)",		"units"	},
235 	{ 'I',	"Lock",	"Unknown event (type 58)",		"units"	},
236 	{ 'I',	"Lock",	"Unknown event (type 59)",		"units"	},
237 	{ 'E',	"Lock",	"Recursive lock entry detected",	"(N/A)",
238 	    "lockstat:::rw-release", NULL, "lockstat:::rw-acquire" },
239 	{ 'E',	"Lock",	"Lockstat enter failure",		"(N/A)"	},
240 	{ 'E',	"Lock",	"Lockstat exit failure",		"nsec"	},
241 	{ 'E',	"Lock",	"Lockstat record failure",		"(N/A)"	},
242 };
243 
244 #ifndef illumos
245 static char *g_pri_class[] = {
246 	"",
247 	"Intr",
248 	"RealT",
249 	"TShar",
250 	"Idle"
251 };
252 #endif
253 
254 static void
255 fail(int do_perror, const char *message, ...)
256 {
257 	va_list args;
258 	int save_errno = errno;
259 
260 	va_start(args, message);
261 	(void) fprintf(stderr, "lockstat: ");
262 	(void) vfprintf(stderr, message, args);
263 	va_end(args);
264 	if (do_perror)
265 		(void) fprintf(stderr, ": %s", strerror(save_errno));
266 	(void) fprintf(stderr, "\n");
267 	exit(2);
268 }
269 
270 static void
271 dfail(const char *message, ...)
272 {
273 	va_list args;
274 
275 	va_start(args, message);
276 	(void) fprintf(stderr, "lockstat: ");
277 	(void) vfprintf(stderr, message, args);
278 	va_end(args);
279 	(void) fprintf(stderr, ": %s\n",
280 	    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
281 
282 	exit(2);
283 }
284 
285 static void
286 show_events(char event_type, char *desc)
287 {
288 	int i, first = -1, last;
289 
290 	for (i = 0; i < LS_MAX_EVENTS; i++) {
291 		ls_event_info_t *evp = &g_event_info[i];
292 		if (evp->ev_type != event_type ||
293 		    strncmp(evp->ev_desc, "Unknown event", 13) == 0)
294 			continue;
295 		if (first == -1)
296 			first = i;
297 		last = i;
298 	}
299 
300 	(void) fprintf(stderr,
301 	    "\n%s events (lockstat -%c or lockstat -e %d-%d):\n\n",
302 	    desc, event_type, first, last);
303 
304 	for (i = first; i <= last; i++)
305 		(void) fprintf(stderr,
306 		    "%4d = %s\n", i, g_event_info[i].ev_desc);
307 }
308 
309 static void
310 usage(void)
311 {
312 	(void) fprintf(stderr,
313 	    "Usage: lockstat [options] command [args]\n"
314 	    "\nGeneral options:\n\n"
315 	    "  -V              print the corresponding D program\n"
316 	    "\nEvent selection options:\n\n"
317 	    "  -C              watch contention events [on by default]\n"
318 	    "  -E              watch error events [off by default]\n"
319 	    "  -H              watch hold events [off by default]\n"
320 	    "  -I              watch interrupt events [off by default]\n"
321 	    "  -A              watch all lock events [equivalent to -CH]\n"
322 	    "  -e event_list   only watch the specified events (shown below);\n"
323 	    "                  <event_list> is a comma-separated list of\n"
324 	    "                  events or ranges of events, e.g. 1,4-7,35\n"
325 	    "  -i rate         interrupt rate for -I [default: %d Hz]\n"
326 	    "\nData gathering options:\n\n"
327 	    "  -b              basic statistics (lock, caller, event count)\n"
328 	    "  -t              timing for all events [default]\n"
329 	    "  -h              histograms for event times\n"
330 	    "  -s depth        stack traces <depth> deep\n"
331 	    "  -x opt[=val]    enable or modify DTrace options\n"
332 	    "\nData filtering options:\n\n"
333 	    "  -n nrecords     maximum number of data records [default: %d]\n"
334 	    "  -l lock[,size]  only watch <lock>, which can be specified as a\n"
335 	    "                  symbolic name or hex address; <size> defaults\n"
336 	    "                  to the ELF symbol size if available, 1 if not\n"
337 	    "  -f func[,size]  only watch events generated by <func>\n"
338 	    "  -d duration     only watch events longer than <duration>\n"
339 	    "  -T              trace (rather than sample) events\n"
340 	    "\nData reporting options:\n\n"
341 	    "  -c              coalesce lock data for arrays like pse_mutex[]\n"
342 	    "  -k              coalesce PCs within functions\n"
343 	    "  -g              show total events generated by function\n"
344 	    "  -w              wherever: don't distinguish events by caller\n"
345 	    "  -W              whichever: don't distinguish events by lock\n"
346 	    "  -R              display rates rather than counts\n"
347 	    "  -p              parsable output format (awk(1)-friendly)\n"
348 	    "  -P              sort lock data by (count * avg_time) product\n"
349 	    "  -D n            only display top <n> events of each type\n"
350 	    "  -o filename     send output to <filename>\n",
351 	    DEFAULT_HZ, DEFAULT_NRECS);
352 
353 	show_events('C', "Contention");
354 	show_events('H', "Hold-time");
355 	show_events('I', "Interrupt");
356 	show_events('E', "Error");
357 	(void) fprintf(stderr, "\n");
358 
359 	exit(1);
360 }
361 
362 static int
363 lockcmp(lsrec_t *a, lsrec_t *b)
364 {
365 	int i;
366 
367 	if (a->ls_event < b->ls_event)
368 		return (-1);
369 	if (a->ls_event > b->ls_event)
370 		return (1);
371 
372 	for (i = g_stkdepth - 1; i >= 0; i--) {
373 		if (a->ls_stack[i] < b->ls_stack[i])
374 			return (-1);
375 		if (a->ls_stack[i] > b->ls_stack[i])
376 			return (1);
377 	}
378 
379 	if (a->ls_caller < b->ls_caller)
380 		return (-1);
381 	if (a->ls_caller > b->ls_caller)
382 		return (1);
383 
384 	if (a->ls_lock < b->ls_lock)
385 		return (-1);
386 	if (a->ls_lock > b->ls_lock)
387 		return (1);
388 
389 	return (0);
390 }
391 
392 static int
393 countcmp(lsrec_t *a, lsrec_t *b)
394 {
395 	if (a->ls_event < b->ls_event)
396 		return (-1);
397 	if (a->ls_event > b->ls_event)
398 		return (1);
399 
400 	return (b->ls_count - a->ls_count);
401 }
402 
403 static int
404 timecmp(lsrec_t *a, lsrec_t *b)
405 {
406 	if (a->ls_event < b->ls_event)
407 		return (-1);
408 	if (a->ls_event > b->ls_event)
409 		return (1);
410 
411 	if (a->ls_time < b->ls_time)
412 		return (1);
413 	if (a->ls_time > b->ls_time)
414 		return (-1);
415 
416 	return (0);
417 }
418 
419 static int
420 lockcmp_anywhere(lsrec_t *a, lsrec_t *b)
421 {
422 	if (a->ls_event < b->ls_event)
423 		return (-1);
424 	if (a->ls_event > b->ls_event)
425 		return (1);
426 
427 	if (a->ls_lock < b->ls_lock)
428 		return (-1);
429 	if (a->ls_lock > b->ls_lock)
430 		return (1);
431 
432 	return (0);
433 }
434 
435 static int
436 lock_and_count_cmp_anywhere(lsrec_t *a, lsrec_t *b)
437 {
438 	if (a->ls_event < b->ls_event)
439 		return (-1);
440 	if (a->ls_event > b->ls_event)
441 		return (1);
442 
443 	if (a->ls_lock < b->ls_lock)
444 		return (-1);
445 	if (a->ls_lock > b->ls_lock)
446 		return (1);
447 
448 	return (b->ls_count - a->ls_count);
449 }
450 
451 static int
452 sitecmp_anylock(lsrec_t *a, lsrec_t *b)
453 {
454 	int i;
455 
456 	if (a->ls_event < b->ls_event)
457 		return (-1);
458 	if (a->ls_event > b->ls_event)
459 		return (1);
460 
461 	for (i = g_stkdepth - 1; i >= 0; i--) {
462 		if (a->ls_stack[i] < b->ls_stack[i])
463 			return (-1);
464 		if (a->ls_stack[i] > b->ls_stack[i])
465 			return (1);
466 	}
467 
468 	if (a->ls_caller < b->ls_caller)
469 		return (-1);
470 	if (a->ls_caller > b->ls_caller)
471 		return (1);
472 
473 	return (0);
474 }
475 
476 static int
477 site_and_count_cmp_anylock(lsrec_t *a, lsrec_t *b)
478 {
479 	int i;
480 
481 	if (a->ls_event < b->ls_event)
482 		return (-1);
483 	if (a->ls_event > b->ls_event)
484 		return (1);
485 
486 	for (i = g_stkdepth - 1; i >= 0; i--) {
487 		if (a->ls_stack[i] < b->ls_stack[i])
488 			return (-1);
489 		if (a->ls_stack[i] > b->ls_stack[i])
490 			return (1);
491 	}
492 
493 	if (a->ls_caller < b->ls_caller)
494 		return (-1);
495 	if (a->ls_caller > b->ls_caller)
496 		return (1);
497 
498 	return (b->ls_count - a->ls_count);
499 }
500 
501 static void
502 lsmergesort(int (*cmp)(lsrec_t *, lsrec_t *), lsrec_t **a, lsrec_t **b, int n)
503 {
504 	int m = n / 2;
505 	int i, j;
506 
507 	if (m > 1)
508 		lsmergesort(cmp, a, b, m);
509 	if (n - m > 1)
510 		lsmergesort(cmp, a + m, b + m, n - m);
511 	for (i = m; i > 0; i--)
512 		b[i - 1] = a[i - 1];
513 	for (j = m - 1; j < n - 1; j++)
514 		b[n + m - j - 2] = a[j + 1];
515 	while (i < j)
516 		*a++ = cmp(b[i], b[j]) < 0 ? b[i++] : b[j--];
517 	*a = b[i];
518 }
519 
520 static void
521 coalesce(int (*cmp)(lsrec_t *, lsrec_t *), lsrec_t **lock, int n)
522 {
523 	int i, j;
524 	lsrec_t *target, *current;
525 
526 	target = lock[0];
527 
528 	for (i = 1; i < n; i++) {
529 		current = lock[i];
530 		if (cmp(current, target) != 0) {
531 			target = current;
532 			continue;
533 		}
534 		current->ls_event = LS_MAX_EVENTS;
535 		target->ls_count += current->ls_count;
536 		target->ls_refcnt += current->ls_refcnt;
537 		if (g_recsize < LS_TIME)
538 			continue;
539 		target->ls_time += current->ls_time;
540 		if (g_recsize < LS_HIST)
541 			continue;
542 		for (j = 0; j < 64; j++)
543 			target->ls_hist[j] += current->ls_hist[j];
544 	}
545 }
546 
547 static void
548 coalesce_symbol(uintptr_t *addrp)
549 {
550 	uintptr_t symoff;
551 	size_t symsize;
552 
553 	if (addr_to_sym(*addrp, &symoff, &symsize) != NULL && symoff < symsize)
554 		*addrp -= symoff;
555 }
556 
557 static void
558 predicate_add(char **pred, char *what, char *cmp, uintptr_t value)
559 {
560 	char *new;
561 	int len, newlen;
562 
563 	if (what == NULL)
564 		return;
565 
566 	if (*pred == NULL) {
567 		*pred = malloc(1);
568 		*pred[0] = '\0';
569 	}
570 
571 	len = strlen(*pred);
572 	newlen = len + strlen(what) + 32 + strlen("( && )");
573 	new = malloc(newlen);
574 
575 	if (*pred[0] != '\0') {
576 		if (cmp != NULL) {
577 			(void) sprintf(new, "(%s) && (%s %s 0x%p)",
578 			    *pred, what, cmp, (void *)value);
579 		} else {
580 			(void) sprintf(new, "(%s) && (%s)", *pred, what);
581 		}
582 	} else {
583 		if (cmp != NULL) {
584 			(void) sprintf(new, "%s %s 0x%p",
585 			    what, cmp, (void *)value);
586 		} else {
587 			(void) sprintf(new, "%s", what);
588 		}
589 	}
590 
591 	free(*pred);
592 	*pred = new;
593 }
594 
595 static void
596 predicate_destroy(char **pred)
597 {
598 	free(*pred);
599 	*pred = NULL;
600 }
601 
602 static void
603 filter_add(char **filt, char *what, uintptr_t base, uintptr_t size)
604 {
605 	char buf[256], *c = buf, *new;
606 	int len, newlen;
607 
608 	if (*filt == NULL) {
609 		*filt = malloc(1);
610 		*filt[0] = '\0';
611 	}
612 
613 #ifdef illumos
614 	(void) sprintf(c, "%s(%s >= 0x%p && %s < 0x%p)", *filt[0] != '\0' ?
615 	    " || " : "", what, (void *)base, what, (void *)(base + size));
616 #else
617 	(void) sprintf(c, "%s(%s >= %p && %s < %p)", *filt[0] != '\0' ?
618 	    " || " : "", what, (void *)base, what, (void *)(base + size));
619 #endif
620 
621 	newlen = (len = strlen(*filt) + 1) + strlen(c);
622 	new = malloc(newlen);
623 	bcopy(*filt, new, len);
624 	(void) strcat(new, c);
625 	free(*filt);
626 	*filt = new;
627 }
628 
629 static void
630 filter_destroy(char **filt)
631 {
632 	free(*filt);
633 	*filt = NULL;
634 }
635 
636 static void
637 dprog_add(const char *fmt, ...)
638 {
639 	va_list args;
640 	int size, offs;
641 	char c;
642 
643 	va_start(args, fmt);
644 	size = vsnprintf(&c, 1, fmt, args) + 1;
645 	va_end(args);
646 
647 	if (g_proglen == 0) {
648 		offs = 0;
649 	} else {
650 		offs = g_proglen - 1;
651 	}
652 
653 	g_proglen = offs + size;
654 
655 	if ((g_prog = realloc(g_prog, g_proglen)) == NULL)
656 		fail(1, "failed to reallocate program text");
657 
658 	va_start(args, fmt);
659 	(void) vsnprintf(&g_prog[offs], size, fmt, args);
660 	va_end(args);
661 }
662 
663 /*
664  * This function may read like an open sewer, but keep in mind that programs
665  * that generate other programs are rarely pretty.  If one has the unenviable
666  * task of maintaining or -- worse -- extending this code, use the -V option
667  * to examine the D program as generated by this function.
668  */
669 static void
670 dprog_addevent(int event)
671 {
672 	ls_event_info_t *info = &g_event_info[event];
673 	char *pred = NULL;
674 	char stack[20];
675 	const char *arg0, *caller;
676 	char *arg1 = "arg1";
677 	char buf[80];
678 	hrtime_t dur;
679 	int depth;
680 
681 	if (info->ev_name[0] == '\0')
682 		return;
683 
684 	if (info->ev_type == 'I') {
685 		/*
686 		 * For interrupt events, arg0 (normally the lock pointer) is
687 		 * the CPU address plus the current pil, and arg1 (normally
688 		 * the number of nanoseconds) is the number of nanoseconds
689 		 * late -- and it's stored in arg2.
690 		 */
691 #ifdef illumos
692 		arg0 = "(uintptr_t)curthread->t_cpu + \n"
693 		    "\t    curthread->t_cpu->cpu_profile_pil";
694 #else
695 		arg0 = "(uintptr_t)(curthread->td_oncpu << 16) + \n"
696 		    "\t    0x01000000 + curthread->td_pri_class";
697 #endif
698 		caller = "(uintptr_t)arg0";
699 		arg1 = "arg2";
700 	} else {
701 		arg0 = "(uintptr_t)arg0";
702 		caller = "caller";
703 	}
704 
705 	if (g_recsize > LS_HIST) {
706 		for (depth = 0; g_recsize > LS_STACK(depth); depth++)
707 			continue;
708 
709 		if (g_tracing) {
710 			(void) sprintf(stack, "\tstack(%d);\n", depth);
711 		} else {
712 			(void) sprintf(stack, ", stack(%d)", depth);
713 		}
714 	} else {
715 		(void) sprintf(stack, "");
716 	}
717 
718 	if (info->ev_acquire != NULL) {
719 		/*
720 		 * If this is a hold event, we need to generate an additional
721 		 * clause for the acquire; the clause for the release will be
722 		 * generated with the aggregating statement, below.
723 		 */
724 		dprog_add("%s\n", info->ev_acquire);
725 		predicate_add(&pred, info->ev_predicate, NULL, 0);
726 		predicate_add(&pred, g_predicate, NULL, 0);
727 		if (pred != NULL)
728 			dprog_add("/%s/\n", pred);
729 
730 		dprog_add("{\n");
731 		(void) sprintf(buf, "self->ev%d[(uintptr_t)arg0]", event);
732 
733 		if (info->ev_type == 'H') {
734 			dprog_add("\t%s = timestamp;\n", buf);
735 		} else {
736 			/*
737 			 * If this isn't a hold event, it's the recursive
738 			 * error event.  For this, we simply bump the
739 			 * thread-local, per-lock count.
740 			 */
741 			dprog_add("\t%s++;\n", buf);
742 		}
743 
744 		dprog_add("}\n\n");
745 		predicate_destroy(&pred);
746 		pred = NULL;
747 
748 		if (info->ev_type == 'E') {
749 			/*
750 			 * If this is the recursive lock error event, we need
751 			 * to generate an additional clause to decrement the
752 			 * thread-local, per-lock count.  This assures that we
753 			 * only execute the aggregating clause if we have
754 			 * recursive entry.
755 			 */
756 			dprog_add("%s\n", info->ev_name);
757 			dprog_add("/%s/\n{\n\t%s--;\n}\n\n", buf, buf);
758 		}
759 
760 		predicate_add(&pred, buf, NULL, 0);
761 
762 		if (info->ev_type == 'H') {
763 			(void) sprintf(buf, "timestamp -\n\t    "
764 			    "self->ev%d[(uintptr_t)arg0]", event);
765 		}
766 
767 		arg1 = buf;
768 	} else {
769 		predicate_add(&pred, info->ev_predicate, NULL, 0);
770 		if (info->ev_type != 'I')
771 			predicate_add(&pred, g_predicate, NULL, 0);
772 		else
773 			predicate_add(&pred, g_ipredicate, NULL, 0);
774 	}
775 
776 	if ((dur = g_min_duration[event]) != 0)
777 		predicate_add(&pred, arg1, ">=", dur);
778 
779 	dprog_add("%s\n", info->ev_name);
780 
781 	if (pred != NULL)
782 		dprog_add("/%s/\n", pred);
783 	predicate_destroy(&pred);
784 
785 	dprog_add("{\n");
786 
787 	if (g_tracing) {
788 		dprog_add("\ttrace(%dULL);\n", event);
789 		dprog_add("\ttrace(%s);\n", arg0);
790 		dprog_add("\ttrace(%s);\n", caller);
791 		dprog_add(stack);
792 	} else {
793 		/*
794 		 * The ordering here is important:  when we process the
795 		 * aggregate, we count on the fact that @avg appears before
796 		 * @hist in program order to assure that @avg is assigned the
797 		 * first aggregation variable ID and @hist assigned the
798 		 * second; see the comment in process_aggregate() for details.
799 		 */
800 		dprog_add("\t@avg[%dULL, %s, %s%s] = avg(%s);\n",
801 		    event, arg0, caller, stack, arg1);
802 
803 		if (g_recsize >= LS_HIST) {
804 			dprog_add("\t@hist[%dULL, %s, %s%s] = quantize"
805 			    "(%s);\n", event, arg0, caller, stack, arg1);
806 		}
807 	}
808 
809 	if (info->ev_acquire != NULL)
810 		dprog_add("\tself->ev%d[arg0] = 0;\n", event);
811 
812 	dprog_add("}\n\n");
813 }
814 
815 static void
816 dprog_compile()
817 {
818 	dtrace_prog_t *prog;
819 	dtrace_proginfo_t info;
820 
821 	if (g_Vflag) {
822 		(void) fprintf(stderr, "lockstat: vvvv D program vvvv\n");
823 		(void) fputs(g_prog, stderr);
824 		(void) fprintf(stderr, "lockstat: ^^^^ D program ^^^^\n");
825 	}
826 
827 	if ((prog = dtrace_program_strcompile(g_dtp, g_prog,
828 	    DTRACE_PROBESPEC_NAME, 0, 0, NULL)) == NULL)
829 		dfail("failed to compile program");
830 
831 	if (dtrace_program_exec(g_dtp, prog, &info) == -1)
832 		dfail("failed to enable probes");
833 
834 	if (dtrace_go(g_dtp) != 0)
835 		dfail("couldn't start tracing");
836 }
837 
838 static void
839 #ifdef illumos
840 status_fire(void)
841 #else
842 status_fire(int i)
843 #endif
844 {}
845 
846 static void
847 status_init(void)
848 {
849 	dtrace_optval_t val, status, agg;
850 	struct sigaction act;
851 	struct itimerspec ts;
852 	struct sigevent ev;
853 	timer_t tid;
854 
855 	if (dtrace_getopt(g_dtp, "statusrate", &status) == -1)
856 		dfail("failed to get 'statusrate'");
857 
858 	if (dtrace_getopt(g_dtp, "aggrate", &agg) == -1)
859 		dfail("failed to get 'statusrate'");
860 
861 	/*
862 	 * We would want to awaken at a rate that is the GCD of the statusrate
863 	 * and the aggrate -- but that seems a bit absurd.  Instead, we'll
864 	 * simply awaken at a rate that is the more frequent of the two, which
865 	 * assures that we're never later than the interval implied by the
866 	 * more frequent rate.
867 	 */
868 	val = status < agg ? status : agg;
869 
870 	(void) sigemptyset(&act.sa_mask);
871 	act.sa_flags = 0;
872 	act.sa_handler = status_fire;
873 	(void) sigaction(SIGUSR1, &act, NULL);
874 
875 	ev.sigev_notify = SIGEV_SIGNAL;
876 	ev.sigev_signo = SIGUSR1;
877 
878 	if (timer_create(CLOCK_REALTIME, &ev, &tid) == -1)
879 		dfail("cannot create CLOCK_REALTIME timer");
880 
881 	ts.it_value.tv_sec = val / NANOSEC;
882 	ts.it_value.tv_nsec = val % NANOSEC;
883 	ts.it_interval = ts.it_value;
884 
885 	if (timer_settime(tid, TIMER_RELTIME, &ts, NULL) == -1)
886 		dfail("cannot set time on CLOCK_REALTIME timer");
887 }
888 
889 static void
890 status_check(void)
891 {
892 	if (!g_tracing && dtrace_aggregate_snap(g_dtp) != 0)
893 		dfail("failed to snap aggregate");
894 
895 	if (dtrace_status(g_dtp) == -1)
896 		dfail("dtrace_status()");
897 }
898 
899 static void
900 lsrec_fill(lsrec_t *lsrec, const dtrace_recdesc_t *rec, int nrecs, caddr_t data)
901 {
902 	bzero(lsrec, g_recsize);
903 	lsrec->ls_count = 1;
904 
905 	if ((g_recsize > LS_HIST && nrecs < 4) || (nrecs < 3))
906 		fail(0, "truncated DTrace record");
907 
908 	if (rec->dtrd_size != sizeof (uint64_t))
909 		fail(0, "bad event size in first record");
910 
911 	/* LINTED - alignment */
912 	lsrec->ls_event = (uint32_t)*((uint64_t *)(data + rec->dtrd_offset));
913 	rec++;
914 
915 	if (rec->dtrd_size != sizeof (uintptr_t))
916 		fail(0, "bad lock address size in second record");
917 
918 	/* LINTED - alignment */
919 	lsrec->ls_lock = *((uintptr_t *)(data + rec->dtrd_offset));
920 	rec++;
921 
922 	if (rec->dtrd_size != sizeof (uintptr_t))
923 		fail(0, "bad caller size in third record");
924 
925 	/* LINTED - alignment */
926 	lsrec->ls_caller = *((uintptr_t *)(data + rec->dtrd_offset));
927 	rec++;
928 
929 	if (g_recsize > LS_HIST) {
930 		int frames, i;
931 		pc_t *stack;
932 
933 		frames = rec->dtrd_size / sizeof (pc_t);
934 		/* LINTED - alignment */
935 		stack = (pc_t *)(data + rec->dtrd_offset);
936 
937 		for (i = 1; i < frames; i++)
938 			lsrec->ls_stack[i - 1] = stack[i];
939 	}
940 }
941 
942 /*ARGSUSED*/
943 static int
944 count_aggregate(const dtrace_aggdata_t *agg, void *arg)
945 {
946 	*((size_t *)arg) += 1;
947 
948 	return (DTRACE_AGGWALK_NEXT);
949 }
950 
951 static int
952 process_aggregate(const dtrace_aggdata_t *agg, void *arg)
953 {
954 	const dtrace_aggdesc_t *aggdesc = agg->dtada_desc;
955 	caddr_t data = agg->dtada_data;
956 	lsdata_t *lsdata = arg;
957 	lsrec_t *lsrec = lsdata->lsd_next;
958 	const dtrace_recdesc_t *rec;
959 	uint64_t *avg, *quantized;
960 	int i, j;
961 
962 	assert(lsdata->lsd_count < g_nrecs);
963 
964 	/*
965 	 * Aggregation variable IDs are guaranteed to be generated in program
966 	 * order, and they are guaranteed to start from DTRACE_AGGVARIDNONE
967 	 * plus one.  As "avg" appears before "hist" in program order, we know
968 	 * that "avg" will be allocated the first aggregation variable ID, and
969 	 * "hist" will be allocated the second aggregation variable ID -- and
970 	 * we therefore use the aggregation variable ID to differentiate the
971 	 * cases.
972 	 */
973 	if (aggdesc->dtagd_varid > DTRACE_AGGVARIDNONE + 1) {
974 		/*
975 		 * If this is the histogram entry.  We'll copy the quantized
976 		 * data into lc_hist, and jump over the rest.
977 		 */
978 		rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
979 
980 		if (aggdesc->dtagd_varid != DTRACE_AGGVARIDNONE + 2)
981 			fail(0, "bad variable ID in aggregation record");
982 
983 		if (rec->dtrd_size !=
984 		    DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
985 			fail(0, "bad quantize size in aggregation record");
986 
987 		/* LINTED - alignment */
988 		quantized = (uint64_t *)(data + rec->dtrd_offset);
989 
990 		for (i = DTRACE_QUANTIZE_ZEROBUCKET, j = 0;
991 		    i < DTRACE_QUANTIZE_NBUCKETS; i++, j++)
992 			lsrec->ls_hist[j] = quantized[i];
993 
994 		goto out;
995 	}
996 
997 	lsrec_fill(lsrec, &aggdesc->dtagd_rec[1],
998 	    aggdesc->dtagd_nrecs - 1, data);
999 
1000 	rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
1001 
1002 	if (rec->dtrd_size != 2 * sizeof (uint64_t))
1003 		fail(0, "bad avg size in aggregation record");
1004 
1005 	/* LINTED - alignment */
1006 	avg = (uint64_t *)(data + rec->dtrd_offset);
1007 	lsrec->ls_count = (uint32_t)avg[0];
1008 	lsrec->ls_time = (uintptr_t)avg[1];
1009 
1010 	if (g_recsize >= LS_HIST)
1011 		return (DTRACE_AGGWALK_NEXT);
1012 
1013 out:
1014 	lsdata->lsd_next = (lsrec_t *)((uintptr_t)lsrec + g_recsize);
1015 	lsdata->lsd_count++;
1016 
1017 	return (DTRACE_AGGWALK_NEXT);
1018 }
1019 
1020 static int
1021 process_trace(const dtrace_probedata_t *pdata, void *arg)
1022 {
1023 	lsdata_t *lsdata = arg;
1024 	lsrec_t *lsrec = lsdata->lsd_next;
1025 	dtrace_eprobedesc_t *edesc = pdata->dtpda_edesc;
1026 	caddr_t data = pdata->dtpda_data;
1027 
1028 	if (lsdata->lsd_count >= g_nrecs)
1029 		return (DTRACE_CONSUME_NEXT);
1030 
1031 	lsrec_fill(lsrec, edesc->dtepd_rec, edesc->dtepd_nrecs, data);
1032 
1033 	lsdata->lsd_next = (lsrec_t *)((uintptr_t)lsrec + g_recsize);
1034 	lsdata->lsd_count++;
1035 
1036 	return (DTRACE_CONSUME_NEXT);
1037 }
1038 
1039 static int
1040 process_data(FILE *out, char *data)
1041 {
1042 	lsdata_t lsdata;
1043 
1044 	/* LINTED - alignment */
1045 	lsdata.lsd_next = (lsrec_t *)data;
1046 	lsdata.lsd_count = 0;
1047 
1048 	if (g_tracing) {
1049 		if (dtrace_consume(g_dtp, out,
1050 		    process_trace, NULL, &lsdata) != 0)
1051 			dfail("failed to consume buffer");
1052 
1053 		return (lsdata.lsd_count);
1054 	}
1055 
1056 	if (dtrace_aggregate_walk_keyvarsorted(g_dtp,
1057 	    process_aggregate, &lsdata) != 0)
1058 		dfail("failed to walk aggregate");
1059 
1060 	return (lsdata.lsd_count);
1061 }
1062 
1063 /*ARGSUSED*/
1064 static int
1065 drophandler(const dtrace_dropdata_t *data, void *arg)
1066 {
1067 	g_dropped++;
1068 	(void) fprintf(stderr, "lockstat: warning: %s", data->dtdda_msg);
1069 	return (DTRACE_HANDLE_OK);
1070 }
1071 
1072 int
1073 main(int argc, char **argv)
1074 {
1075 	char *data_buf;
1076 	lsrec_t *lsp, **current, **first, **sort_buf, **merge_buf;
1077 	FILE *out = stdout;
1078 	int c;
1079 	pid_t child;
1080 	int status;
1081 	int i, j;
1082 	hrtime_t duration;
1083 	char *addrp, *offp, *sizep, *evp, *lastp, *p;
1084 	uintptr_t addr;
1085 	size_t size, off;
1086 	int events_specified = 0;
1087 	int exec_errno = 0;
1088 	uint32_t event;
1089 	char *filt = NULL, *ifilt = NULL;
1090 	static uint64_t ev_count[LS_MAX_EVENTS + 1];
1091 	static uint64_t ev_time[LS_MAX_EVENTS + 1];
1092 	dtrace_optval_t aggsize;
1093 	char aggstr[10];
1094 	long ncpus;
1095 	int dynvar = 0;
1096 	int err;
1097 
1098 	if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
1099 		fail(0, "cannot open dtrace library: %s",
1100 		    dtrace_errmsg(NULL, err));
1101 	}
1102 
1103 	if (dtrace_handle_drop(g_dtp, &drophandler, NULL) == -1)
1104 		dfail("couldn't establish drop handler");
1105 
1106 	if (symtab_init() == -1)
1107 		fail(1, "can't load kernel symbols");
1108 
1109 	g_nrecs = DEFAULT_NRECS;
1110 
1111 	while ((c = getopt(argc, argv, LOCKSTAT_OPTSTR)) != GETOPT_EOF) {
1112 		switch (c) {
1113 		case 'b':
1114 			g_recsize = LS_BASIC;
1115 			break;
1116 
1117 		case 't':
1118 			g_recsize = LS_TIME;
1119 			break;
1120 
1121 		case 'h':
1122 			g_recsize = LS_HIST;
1123 			break;
1124 
1125 		case 's':
1126 			if (!isdigit(optarg[0]))
1127 				usage();
1128 			g_stkdepth = atoi(optarg);
1129 			if (g_stkdepth > LS_MAX_STACK_DEPTH)
1130 				fail(0, "max stack depth is %d",
1131 				    LS_MAX_STACK_DEPTH);
1132 			g_recsize = LS_STACK(g_stkdepth);
1133 			break;
1134 
1135 		case 'n':
1136 			if (!isdigit(optarg[0]))
1137 				usage();
1138 			g_nrecs = atoi(optarg);
1139 			break;
1140 
1141 		case 'd':
1142 			if (!isdigit(optarg[0]))
1143 				usage();
1144 			duration = atoll(optarg);
1145 
1146 			/*
1147 			 * XXX -- durations really should be per event
1148 			 * since the units are different, but it's hard
1149 			 * to express this nicely in the interface.
1150 			 * Not clear yet what the cleanest solution is.
1151 			 */
1152 			for (i = 0; i < LS_MAX_EVENTS; i++)
1153 				if (g_event_info[i].ev_type != 'E')
1154 					g_min_duration[i] = duration;
1155 
1156 			break;
1157 
1158 		case 'i':
1159 			if (!isdigit(optarg[0]))
1160 				usage();
1161 			i = atoi(optarg);
1162 			if (i <= 0)
1163 				usage();
1164 			if (i > MAX_HZ)
1165 				fail(0, "max interrupt rate is %d Hz", MAX_HZ);
1166 
1167 			for (j = 0; j < LS_MAX_EVENTS; j++)
1168 				if (strcmp(g_event_info[j].ev_desc,
1169 				    "Profiling interrupt") == 0)
1170 					break;
1171 
1172 			(void) sprintf(g_event_info[j].ev_name,
1173 			    "profile:::profile-%d", i);
1174 			break;
1175 
1176 		case 'l':
1177 		case 'f':
1178 			addrp = strtok(optarg, ",");
1179 			sizep = strtok(NULL, ",");
1180 			addrp = strtok(optarg, ",+");
1181 			offp = strtok(NULL, ",");
1182 
1183 			size = sizep ? strtoul(sizep, NULL, 0) : 1;
1184 			off = offp ? strtoul(offp, NULL, 0) : 0;
1185 
1186 			if (addrp[0] == '0') {
1187 				addr = strtoul(addrp, NULL, 16) + off;
1188 			} else {
1189 				addr = sym_to_addr(addrp) + off;
1190 				if (sizep == NULL)
1191 					size = sym_size(addrp) - off;
1192 				if (addr - off == 0)
1193 					fail(0, "symbol '%s' not found", addrp);
1194 				if (size == 0)
1195 					size = 1;
1196 			}
1197 
1198 
1199 			if (c == 'l') {
1200 				filter_add(&filt, "arg0", addr, size);
1201 			} else {
1202 				filter_add(&filt, "caller", addr, size);
1203 				filter_add(&ifilt, "arg0", addr, size);
1204 			}
1205 			break;
1206 
1207 		case 'e':
1208 			evp = strtok_r(optarg, ",", &lastp);
1209 			while (evp) {
1210 				int ev1, ev2;
1211 				char *evp2;
1212 
1213 				(void) strtok(evp, "-");
1214 				evp2 = strtok(NULL, "-");
1215 				ev1 = atoi(evp);
1216 				ev2 = evp2 ? atoi(evp2) : ev1;
1217 				if ((uint_t)ev1 >= LS_MAX_EVENTS ||
1218 				    (uint_t)ev2 >= LS_MAX_EVENTS || ev1 > ev2)
1219 					fail(0, "-e events out of range");
1220 				for (i = ev1; i <= ev2; i++)
1221 					g_enabled[i] = 1;
1222 				evp = strtok_r(NULL, ",", &lastp);
1223 			}
1224 			events_specified = 1;
1225 			break;
1226 
1227 		case 'c':
1228 			g_cflag = 1;
1229 			break;
1230 
1231 		case 'k':
1232 			g_kflag = 1;
1233 			break;
1234 
1235 		case 'w':
1236 			g_wflag = 1;
1237 			break;
1238 
1239 		case 'W':
1240 			g_Wflag = 1;
1241 			break;
1242 
1243 		case 'g':
1244 			g_gflag = 1;
1245 			break;
1246 
1247 		case 'C':
1248 		case 'E':
1249 		case 'H':
1250 		case 'I':
1251 			for (i = 0; i < LS_MAX_EVENTS; i++)
1252 				if (g_event_info[i].ev_type == c)
1253 					g_enabled[i] = 1;
1254 			events_specified = 1;
1255 			break;
1256 
1257 		case 'A':
1258 			for (i = 0; i < LS_MAX_EVENTS; i++)
1259 				if (strchr("CH", g_event_info[i].ev_type))
1260 					g_enabled[i] = 1;
1261 			events_specified = 1;
1262 			break;
1263 
1264 		case 'T':
1265 			g_tracing = 1;
1266 			break;
1267 
1268 		case 'D':
1269 			if (!isdigit(optarg[0]))
1270 				usage();
1271 			g_topn = atoi(optarg);
1272 			break;
1273 
1274 		case 'R':
1275 			g_rates = 1;
1276 			break;
1277 
1278 		case 'p':
1279 			g_pflag = 1;
1280 			break;
1281 
1282 		case 'P':
1283 			g_Pflag = 1;
1284 			break;
1285 
1286 		case 'o':
1287 			if ((out = fopen(optarg, "w")) == NULL)
1288 				fail(1, "error opening file");
1289 			break;
1290 
1291 		case 'V':
1292 			g_Vflag = 1;
1293 			break;
1294 
1295 		default:
1296 			if (strchr(LOCKSTAT_OPTSTR, c) == NULL)
1297 				usage();
1298 		}
1299 	}
1300 
1301 	if (filt != NULL) {
1302 		predicate_add(&g_predicate, filt, NULL, 0);
1303 		filter_destroy(&filt);
1304 	}
1305 
1306 	if (ifilt != NULL) {
1307 		predicate_add(&g_ipredicate, ifilt, NULL, 0);
1308 		filter_destroy(&ifilt);
1309 	}
1310 
1311 	if (g_recsize == 0) {
1312 		if (g_gflag) {
1313 			g_stkdepth = LS_MAX_STACK_DEPTH;
1314 			g_recsize = LS_STACK(g_stkdepth);
1315 		} else {
1316 			g_recsize = LS_TIME;
1317 		}
1318 	}
1319 
1320 	if (g_gflag && g_recsize <= LS_STACK(0))
1321 		fail(0, "'-g' requires at least '-s 1' data gathering");
1322 
1323 	/*
1324 	 * Make sure the alignment is reasonable
1325 	 */
1326 	g_recsize = -(-g_recsize & -sizeof (uint64_t));
1327 
1328 	for (i = 0; i < LS_MAX_EVENTS; i++) {
1329 		/*
1330 		 * If no events were specified, enable -C.
1331 		 */
1332 		if (!events_specified && g_event_info[i].ev_type == 'C')
1333 			g_enabled[i] = 1;
1334 	}
1335 
1336 	for (i = 0; i < LS_MAX_EVENTS; i++) {
1337 		if (!g_enabled[i])
1338 			continue;
1339 
1340 		if (g_event_info[i].ev_acquire != NULL) {
1341 			/*
1342 			 * If we've enabled a hold event, we must explicitly
1343 			 * allocate dynamic variable space.
1344 			 */
1345 			dynvar = 1;
1346 		}
1347 
1348 		dprog_addevent(i);
1349 	}
1350 
1351 	/*
1352 	 * Make sure there are remaining arguments to specify a child command
1353 	 * to execute.
1354 	 */
1355 	if (argc <= optind)
1356 		usage();
1357 
1358 	if ((ncpus = sysconf(_SC_NPROCESSORS_ONLN)) == -1)
1359 		dfail("couldn't determine number of online CPUs");
1360 
1361 	/*
1362 	 * By default, we set our data buffer size to be the number of records
1363 	 * multiplied by the size of the record, doubled to account for some
1364 	 * DTrace slop and divided by the number of CPUs.  We silently clamp
1365 	 * the aggregation size at both a minimum and a maximum to prevent
1366 	 * absurdly low or high values.
1367 	 */
1368 	if ((aggsize = (g_nrecs * g_recsize * 2) / ncpus) < MIN_AGGSIZE)
1369 		aggsize = MIN_AGGSIZE;
1370 
1371 	if (aggsize > MAX_AGGSIZE)
1372 		aggsize = MAX_AGGSIZE;
1373 
1374 	(void) sprintf(aggstr, "%lld", (long long)aggsize);
1375 
1376 	if (!g_tracing) {
1377 		if (dtrace_setopt(g_dtp, "bufsize", "4k") == -1)
1378 			dfail("failed to set 'bufsize'");
1379 
1380 		if (dtrace_setopt(g_dtp, "aggsize", aggstr) == -1)
1381 			dfail("failed to set 'aggsize'");
1382 
1383 		if (dynvar) {
1384 			/*
1385 			 * If we're using dynamic variables, we set our
1386 			 * dynamic variable size to be one megabyte per CPU,
1387 			 * with a hard-limit of 32 megabytes.  This may still
1388 			 * be too small in some cases, but it can be tuned
1389 			 * manually via -x if need be.
1390 			 */
1391 			(void) sprintf(aggstr, "%ldm", ncpus < 32 ? ncpus : 32);
1392 
1393 			if (dtrace_setopt(g_dtp, "dynvarsize", aggstr) == -1)
1394 				dfail("failed to set 'dynvarsize'");
1395 		}
1396 	} else {
1397 		if (dtrace_setopt(g_dtp, "bufsize", aggstr) == -1)
1398 			dfail("failed to set 'bufsize'");
1399 	}
1400 
1401 	if (dtrace_setopt(g_dtp, "statusrate", "10sec") == -1)
1402 		dfail("failed to set 'statusrate'");
1403 
1404 	optind = 1;
1405 	while ((c = getopt(argc, argv, LOCKSTAT_OPTSTR)) != GETOPT_EOF) {
1406 		switch (c) {
1407 		case 'x':
1408 			if ((p = strchr(optarg, '=')) != NULL)
1409 				*p++ = '\0';
1410 
1411 			if (dtrace_setopt(g_dtp, optarg, p) != 0)
1412 				dfail("failed to set -x %s", optarg);
1413 			break;
1414 		}
1415 	}
1416 
1417 	argc -= optind;
1418 	argv += optind;
1419 
1420 	dprog_compile();
1421 	status_init();
1422 
1423 	g_elapsed = -gethrtime();
1424 
1425 	/*
1426 	 * Spawn the specified command and wait for it to complete.
1427 	 */
1428 	child = fork();
1429 	if (child == -1)
1430 		fail(1, "cannot fork");
1431 	if (child == 0) {
1432 		(void) dtrace_close(g_dtp);
1433 		(void) execvp(argv[0], &argv[0]);
1434 		exec_errno = errno;
1435 		exit(127);
1436 	}
1437 
1438 #ifdef illumos
1439 	while (waitpid(child, &status, WEXITED) != child)
1440 #else
1441 	while (waitpid(child, &status, 0) != child)
1442 #endif
1443 		status_check();
1444 
1445 	g_elapsed += gethrtime();
1446 
1447 	if (WIFEXITED(status)) {
1448 		if (WEXITSTATUS(status) != 0) {
1449 			if (exec_errno != 0) {
1450 				errno = exec_errno;
1451 				fail(1, "could not execute %s", argv[0]);
1452 			}
1453 			(void) fprintf(stderr,
1454 			    "lockstat: warning: %s exited with code %d\n",
1455 			    argv[0], WEXITSTATUS(status));
1456 		}
1457 	} else {
1458 		(void) fprintf(stderr,
1459 		    "lockstat: warning: %s died on signal %d\n",
1460 		    argv[0], WTERMSIG(status));
1461 	}
1462 
1463 	if (dtrace_stop(g_dtp) == -1)
1464 		dfail("failed to stop dtrace");
1465 
1466 	/*
1467 	 * Before we read out the results, we need to allocate our buffer.
1468 	 * If we're tracing, then we'll just use the precalculated size.  If
1469 	 * we're not, then we'll take a snapshot of the aggregate, and walk
1470 	 * it to count the number of records.
1471 	 */
1472 	if (!g_tracing) {
1473 		if (dtrace_aggregate_snap(g_dtp) != 0)
1474 			dfail("failed to snap aggregate");
1475 
1476 		g_nrecs = 0;
1477 
1478 		if (dtrace_aggregate_walk(g_dtp,
1479 		    count_aggregate, &g_nrecs) != 0)
1480 			dfail("failed to walk aggregate");
1481 	}
1482 
1483 #ifdef illumos
1484 	if ((data_buf = memalign(sizeof (uint64_t),
1485 	    (g_nrecs + 1) * g_recsize)) == NULL)
1486 #else
1487 	if (posix_memalign((void **)&data_buf, sizeof (uint64_t),
1488 	    (g_nrecs + 1) * g_recsize) )
1489 #endif
1490 		fail(1, "Memory allocation failed");
1491 
1492 	/*
1493 	 * Read out the DTrace data.
1494 	 */
1495 	g_nrecs_used = process_data(out, data_buf);
1496 
1497 	if (g_nrecs_used > g_nrecs || g_dropped)
1498 		(void) fprintf(stderr, "lockstat: warning: "
1499 		    "ran out of data records (use -n for more)\n");
1500 
1501 	/* LINTED - alignment */
1502 	for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
1503 	    /* LINTED - alignment */
1504 	    lsp = (lsrec_t *)((char *)lsp + g_recsize)) {
1505 		ev_count[lsp->ls_event] += lsp->ls_count;
1506 		ev_time[lsp->ls_event] += lsp->ls_time;
1507 	}
1508 
1509 	/*
1510 	 * If -g was specified, convert stacks into individual records.
1511 	 */
1512 	if (g_gflag) {
1513 		lsrec_t *newlsp, *oldlsp;
1514 
1515 #ifdef illumos
1516 		newlsp = memalign(sizeof (uint64_t),
1517 		    g_nrecs_used * LS_TIME * (g_stkdepth + 1));
1518 #else
1519 		posix_memalign((void **)&newlsp, sizeof (uint64_t),
1520 		    g_nrecs_used * LS_TIME * (g_stkdepth + 1));
1521 #endif
1522 		if (newlsp == NULL)
1523 			fail(1, "Cannot allocate space for -g processing");
1524 		lsp = newlsp;
1525 		/* LINTED - alignment */
1526 		for (i = 0, oldlsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
1527 		    /* LINTED - alignment */
1528 		    oldlsp = (lsrec_t *)((char *)oldlsp + g_recsize)) {
1529 			int fr;
1530 			int caller_in_stack = 0;
1531 
1532 			if (oldlsp->ls_count == 0)
1533 				continue;
1534 
1535 			for (fr = 0; fr < g_stkdepth; fr++) {
1536 				if (oldlsp->ls_stack[fr] == 0)
1537 					break;
1538 				if (oldlsp->ls_stack[fr] == oldlsp->ls_caller)
1539 					caller_in_stack = 1;
1540 				bcopy(oldlsp, lsp, LS_TIME);
1541 				lsp->ls_caller = oldlsp->ls_stack[fr];
1542 				/* LINTED - alignment */
1543 				lsp = (lsrec_t *)((char *)lsp + LS_TIME);
1544 			}
1545 			if (!caller_in_stack) {
1546 				bcopy(oldlsp, lsp, LS_TIME);
1547 				/* LINTED - alignment */
1548 				lsp = (lsrec_t *)((char *)lsp + LS_TIME);
1549 			}
1550 		}
1551 		g_nrecs = g_nrecs_used =
1552 		    ((uintptr_t)lsp - (uintptr_t)newlsp) / LS_TIME;
1553 		g_recsize = LS_TIME;
1554 		g_stkdepth = 0;
1555 		free(data_buf);
1556 		data_buf = (char *)newlsp;
1557 	}
1558 
1559 	if ((sort_buf = calloc(2 * (g_nrecs + 1),
1560 	    sizeof (void *))) == NULL)
1561 		fail(1, "Sort buffer allocation failed");
1562 	merge_buf = sort_buf + (g_nrecs + 1);
1563 
1564 	/*
1565 	 * Build the sort buffer, discarding zero-count records along the way.
1566 	 */
1567 	/* LINTED - alignment */
1568 	for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
1569 	    /* LINTED - alignment */
1570 	    lsp = (lsrec_t *)((char *)lsp + g_recsize)) {
1571 		if (lsp->ls_count == 0)
1572 			lsp->ls_event = LS_MAX_EVENTS;
1573 		sort_buf[i] = lsp;
1574 	}
1575 
1576 	if (g_nrecs_used == 0)
1577 		exit(0);
1578 
1579 	/*
1580 	 * Add a sentinel after the last record
1581 	 */
1582 	sort_buf[i] = lsp;
1583 	lsp->ls_event = LS_MAX_EVENTS;
1584 
1585 	if (g_tracing) {
1586 		report_trace(out, sort_buf);
1587 		return (0);
1588 	}
1589 
1590 	/*
1591 	 * Application of -g may have resulted in multiple records
1592 	 * with the same signature; coalesce them.
1593 	 */
1594 	if (g_gflag) {
1595 		mergesort(lockcmp, sort_buf, merge_buf, g_nrecs_used);
1596 		coalesce(lockcmp, sort_buf, g_nrecs_used);
1597 	}
1598 
1599 	/*
1600 	 * Coalesce locks within the same symbol if -c option specified.
1601 	 * Coalesce PCs within the same function if -k option specified.
1602 	 */
1603 	if (g_cflag || g_kflag) {
1604 		for (i = 0; i < g_nrecs_used; i++) {
1605 			int fr;
1606 			lsp = sort_buf[i];
1607 			if (g_cflag)
1608 				coalesce_symbol(&lsp->ls_lock);
1609 			if (g_kflag) {
1610 				for (fr = 0; fr < g_stkdepth; fr++)
1611 					coalesce_symbol(&lsp->ls_stack[fr]);
1612 				coalesce_symbol(&lsp->ls_caller);
1613 			}
1614 		}
1615 		mergesort(lockcmp, sort_buf, merge_buf, g_nrecs_used);
1616 		coalesce(lockcmp, sort_buf, g_nrecs_used);
1617 	}
1618 
1619 	/*
1620 	 * Coalesce callers if -w option specified
1621 	 */
1622 	if (g_wflag) {
1623 		mergesort(lock_and_count_cmp_anywhere,
1624 		    sort_buf, merge_buf, g_nrecs_used);
1625 		coalesce(lockcmp_anywhere, sort_buf, g_nrecs_used);
1626 	}
1627 
1628 	/*
1629 	 * Coalesce locks if -W option specified
1630 	 */
1631 	if (g_Wflag) {
1632 		mergesort(site_and_count_cmp_anylock,
1633 		    sort_buf, merge_buf, g_nrecs_used);
1634 		coalesce(sitecmp_anylock, sort_buf, g_nrecs_used);
1635 	}
1636 
1637 	/*
1638 	 * Sort data by contention count (ls_count) or total time (ls_time),
1639 	 * depending on g_Pflag.  Override g_Pflag if time wasn't measured.
1640 	 */
1641 	if (g_recsize < LS_TIME)
1642 		g_Pflag = 0;
1643 
1644 	if (g_Pflag)
1645 		mergesort(timecmp, sort_buf, merge_buf, g_nrecs_used);
1646 	else
1647 		mergesort(countcmp, sort_buf, merge_buf, g_nrecs_used);
1648 
1649 	/*
1650 	 * Display data by event type
1651 	 */
1652 	first = &sort_buf[0];
1653 	while ((event = (*first)->ls_event) < LS_MAX_EVENTS) {
1654 		current = first;
1655 		while ((lsp = *current)->ls_event == event)
1656 			current++;
1657 		report_stats(out, first, current - first, ev_count[event],
1658 		    ev_time[event]);
1659 		first = current;
1660 	}
1661 
1662 	return (0);
1663 }
1664 
1665 static char *
1666 format_symbol(char *buf, uintptr_t addr, int show_size)
1667 {
1668 	uintptr_t symoff;
1669 	char *symname;
1670 	size_t symsize;
1671 
1672 	symname = addr_to_sym(addr, &symoff, &symsize);
1673 
1674 	if (show_size && symoff == 0)
1675 		(void) sprintf(buf, "%s[%ld]", symname, (long)symsize);
1676 	else if (symoff == 0)
1677 		(void) sprintf(buf, "%s", symname);
1678 	else if (symoff < 16 && bcmp(symname, "cpu[", 4) == 0)	/* CPU+PIL */
1679 #ifdef illumos
1680 		(void) sprintf(buf, "%s+%ld", symname, (long)symoff);
1681 #else
1682 		(void) sprintf(buf, "%s+%s", symname, g_pri_class[(int)symoff]);
1683 #endif
1684 	else if (symoff <= symsize || (symoff < 256 && addr != symoff))
1685 		(void) sprintf(buf, "%s+0x%llx", symname,
1686 		    (unsigned long long)symoff);
1687 	else
1688 		(void) sprintf(buf, "0x%llx", (unsigned long long)addr);
1689 	return (buf);
1690 }
1691 
1692 static void
1693 report_stats(FILE *out, lsrec_t **sort_buf, size_t nrecs, uint64_t total_count,
1694 	uint64_t total_time)
1695 {
1696 	uint32_t event = sort_buf[0]->ls_event;
1697 	lsrec_t *lsp;
1698 	double ptotal = 0.0;
1699 	double percent;
1700 	int i, j, fr;
1701 	int displayed;
1702 	int first_bin, last_bin, max_bin_count, total_bin_count;
1703 	int rectype;
1704 	char buf[256];
1705 	char lhdr[80], chdr[80];
1706 
1707 	rectype = g_recsize;
1708 
1709 	if (g_topn == 0) {
1710 		(void) fprintf(out, "%20llu %s\n",
1711 		    g_rates == 0 ? total_count :
1712 		    ((unsigned long long)total_count * NANOSEC) / g_elapsed,
1713 		    g_event_info[event].ev_desc);
1714 		return;
1715 	}
1716 
1717 	(void) sprintf(lhdr, "%s%s",
1718 	    g_Wflag ? "Hottest " : "", g_event_info[event].ev_lhdr);
1719 	(void) sprintf(chdr, "%s%s",
1720 	    g_wflag ? "Hottest " : "", "Caller");
1721 
1722 	if (!g_pflag)
1723 		(void) fprintf(out,
1724 		    "\n%s: %.0f events in %.3f seconds (%.0f events/sec)\n\n",
1725 		    g_event_info[event].ev_desc, (double)total_count,
1726 		    (double)g_elapsed / NANOSEC,
1727 		    (double)total_count * NANOSEC / g_elapsed);
1728 
1729 	if (!g_pflag && rectype < LS_HIST) {
1730 		(void) sprintf(buf, "%s", g_event_info[event].ev_units);
1731 		(void) fprintf(out, "%5s %4s %4s %4s %8s %-22s %-24s\n",
1732 		    g_rates ? "ops/s" : "Count",
1733 		    g_gflag ? "genr" : "indv",
1734 		    "cuml", "rcnt", rectype >= LS_TIME ? buf : "", lhdr, chdr);
1735 		(void) fprintf(out, "---------------------------------"
1736 		    "----------------------------------------------\n");
1737 	}
1738 
1739 	displayed = 0;
1740 	for (i = 0; i < nrecs; i++) {
1741 		lsp = sort_buf[i];
1742 
1743 		if (displayed++ >= g_topn)
1744 			break;
1745 
1746 		if (g_pflag) {
1747 			int j;
1748 
1749 			(void) fprintf(out, "%u %u",
1750 			    lsp->ls_event, lsp->ls_count);
1751 			(void) fprintf(out, " %s",
1752 			    format_symbol(buf, lsp->ls_lock, g_cflag));
1753 			(void) fprintf(out, " %s",
1754 			    format_symbol(buf, lsp->ls_caller, 0));
1755 			(void) fprintf(out, " %f",
1756 			    (double)lsp->ls_refcnt / lsp->ls_count);
1757 			if (rectype >= LS_TIME)
1758 				(void) fprintf(out, " %llu",
1759 				    (unsigned long long)lsp->ls_time);
1760 			if (rectype >= LS_HIST) {
1761 				for (j = 0; j < 64; j++)
1762 					(void) fprintf(out, " %u",
1763 					    lsp->ls_hist[j]);
1764 			}
1765 			for (j = 0; j < LS_MAX_STACK_DEPTH; j++) {
1766 				if (rectype <= LS_STACK(j) ||
1767 				    lsp->ls_stack[j] == 0)
1768 					break;
1769 				(void) fprintf(out, " %s",
1770 				    format_symbol(buf, lsp->ls_stack[j], 0));
1771 			}
1772 			(void) fprintf(out, "\n");
1773 			continue;
1774 		}
1775 
1776 		if (rectype >= LS_HIST) {
1777 			(void) fprintf(out, "---------------------------------"
1778 			    "----------------------------------------------\n");
1779 			(void) sprintf(buf, "%s",
1780 			    g_event_info[event].ev_units);
1781 			(void) fprintf(out, "%5s %4s %4s %4s %8s %-22s %-24s\n",
1782 			    g_rates ? "ops/s" : "Count",
1783 			    g_gflag ? "genr" : "indv",
1784 			    "cuml", "rcnt", buf, lhdr, chdr);
1785 		}
1786 
1787 		if (g_Pflag && total_time != 0)
1788 			percent = (lsp->ls_time * 100.00) / total_time;
1789 		else
1790 			percent = (lsp->ls_count * 100.00) / total_count;
1791 
1792 		ptotal += percent;
1793 
1794 		if (rectype >= LS_TIME)
1795 			(void) sprintf(buf, "%llu",
1796 			    (unsigned long long)(lsp->ls_time / lsp->ls_count));
1797 		else
1798 			buf[0] = '\0';
1799 
1800 		(void) fprintf(out, "%5llu ",
1801 		    g_rates == 0 ? lsp->ls_count :
1802 		    ((uint64_t)lsp->ls_count * NANOSEC) / g_elapsed);
1803 
1804 		(void) fprintf(out, "%3.0f%% ", percent);
1805 
1806 		if (g_gflag)
1807 			(void) fprintf(out, "---- ");
1808 		else
1809 			(void) fprintf(out, "%3.0f%% ", ptotal);
1810 
1811 		(void) fprintf(out, "%4.2f %8s ",
1812 		    (double)lsp->ls_refcnt / lsp->ls_count, buf);
1813 
1814 		(void) fprintf(out, "%-22s ",
1815 		    format_symbol(buf, lsp->ls_lock, g_cflag));
1816 
1817 		(void) fprintf(out, "%-24s\n",
1818 		    format_symbol(buf, lsp->ls_caller, 0));
1819 
1820 		if (rectype < LS_HIST)
1821 			continue;
1822 
1823 		(void) fprintf(out, "\n");
1824 		(void) fprintf(out, "%10s %31s %-9s %-24s\n",
1825 		    g_event_info[event].ev_units,
1826 		    "------ Time Distribution ------",
1827 		    g_rates ? "ops/s" : "count",
1828 		    rectype > LS_STACK(0) ? "Stack" : "");
1829 
1830 		first_bin = 0;
1831 		while (lsp->ls_hist[first_bin] == 0)
1832 			first_bin++;
1833 
1834 		last_bin = 63;
1835 		while (lsp->ls_hist[last_bin] == 0)
1836 			last_bin--;
1837 
1838 		max_bin_count = 0;
1839 		total_bin_count = 0;
1840 		for (j = first_bin; j <= last_bin; j++) {
1841 			total_bin_count += lsp->ls_hist[j];
1842 			if (lsp->ls_hist[j] > max_bin_count)
1843 				max_bin_count = lsp->ls_hist[j];
1844 		}
1845 
1846 		/*
1847 		 * If we went a few frames below the caller, ignore them
1848 		 */
1849 		for (fr = 3; fr > 0; fr--)
1850 			if (lsp->ls_stack[fr] == lsp->ls_caller)
1851 				break;
1852 
1853 		for (j = first_bin; j <= last_bin; j++) {
1854 			uint_t depth = (lsp->ls_hist[j] * 30) / total_bin_count;
1855 			(void) fprintf(out, "%10llu |%s%s %-9u ",
1856 			    1ULL << j,
1857 			    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" + 30 - depth,
1858 			    "                              " + depth,
1859 			    g_rates == 0 ? lsp->ls_hist[j] :
1860 			    (uint_t)(((uint64_t)lsp->ls_hist[j] * NANOSEC) /
1861 			    g_elapsed));
1862 			if (rectype <= LS_STACK(fr) || lsp->ls_stack[fr] == 0) {
1863 				(void) fprintf(out, "\n");
1864 				continue;
1865 			}
1866 			(void) fprintf(out, "%-24s\n",
1867 			    format_symbol(buf, lsp->ls_stack[fr], 0));
1868 			fr++;
1869 		}
1870 		while (rectype > LS_STACK(fr) && lsp->ls_stack[fr] != 0) {
1871 			(void) fprintf(out, "%15s %-36s %-24s\n", "", "",
1872 			    format_symbol(buf, lsp->ls_stack[fr], 0));
1873 			fr++;
1874 		}
1875 	}
1876 
1877 	if (!g_pflag)
1878 		(void) fprintf(out, "---------------------------------"
1879 		    "----------------------------------------------\n");
1880 
1881 	(void) fflush(out);
1882 }
1883 
1884 static void
1885 report_trace(FILE *out, lsrec_t **sort_buf)
1886 {
1887 	lsrec_t *lsp;
1888 	int i, fr;
1889 	int rectype;
1890 	char buf[256], buf2[256];
1891 
1892 	rectype = g_recsize;
1893 
1894 	if (!g_pflag) {
1895 		(void) fprintf(out, "%5s  %7s  %11s  %-24s  %-24s\n",
1896 		    "Event", "Time", "Owner", "Lock", "Caller");
1897 		(void) fprintf(out, "---------------------------------"
1898 		    "----------------------------------------------\n");
1899 	}
1900 
1901 	for (i = 0; i < g_nrecs_used; i++) {
1902 
1903 		lsp = sort_buf[i];
1904 
1905 		if (lsp->ls_event >= LS_MAX_EVENTS || lsp->ls_count == 0)
1906 			continue;
1907 
1908 		(void) fprintf(out, "%2d  %10llu  %11p  %-24s  %-24s\n",
1909 		    lsp->ls_event, (unsigned long long)lsp->ls_time,
1910 		    (void *)lsp->ls_next,
1911 		    format_symbol(buf, lsp->ls_lock, 0),
1912 		    format_symbol(buf2, lsp->ls_caller, 0));
1913 
1914 		if (rectype <= LS_STACK(0))
1915 			continue;
1916 
1917 		/*
1918 		 * If we went a few frames below the caller, ignore them
1919 		 */
1920 		for (fr = 3; fr > 0; fr--)
1921 			if (lsp->ls_stack[fr] == lsp->ls_caller)
1922 				break;
1923 
1924 		while (rectype > LS_STACK(fr) && lsp->ls_stack[fr] != 0) {
1925 			(void) fprintf(out, "%53s  %-24s\n", "",
1926 			    format_symbol(buf, lsp->ls_stack[fr], 0));
1927 			fr++;
1928 		}
1929 		(void) fprintf(out, "\n");
1930 	}
1931 
1932 	(void) fflush(out);
1933 }
1934