xref: /freebsd/cddl/contrib/opensolaris/cmd/lockstat/lockstat.c (revision 7aa383846770374466b1dcb2cefd71bde9acf463)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <stdio.h>
29 #include <stddef.h>
30 #include <stdlib.h>
31 #include <stdarg.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <ctype.h>
35 #include <fcntl.h>
36 #include <unistd.h>
37 #include <errno.h>
38 #include <limits.h>
39 #include <sys/types.h>
40 #include <sys/modctl.h>
41 #include <sys/stat.h>
42 #include <sys/wait.h>
43 #include <dtrace.h>
44 #include <sys/lockstat.h>
45 #include <alloca.h>
46 #include <signal.h>
47 #include <assert.h>
48 
49 #if defined(sun)
50 #define	GETOPT_EOF	EOF
51 #else
52 /* FreeBSD */
53 #include <sys/time.h>
54 #include <sys/resource.h>
55 
56 #define	mergesort(a, b, c, d)	lsmergesort(a, b, c, d)
57 #define	GETOPT_EOF		(-1)
58 
59 typedef	uintptr_t	pc_t;
60 #endif /* defined(sun) */
61 
62 #define	LOCKSTAT_OPTSTR	"x:bths:n:d:i:l:f:e:ckwWgCHEATID:RpPo:V"
63 
64 #define	LS_MAX_STACK_DEPTH	50
65 #define	LS_MAX_EVENTS		64
66 
67 typedef struct lsrec {
68 	struct lsrec	*ls_next;	/* next in hash chain */
69 	uintptr_t	ls_lock;	/* lock address */
70 	uintptr_t	ls_caller;	/* caller address */
71 	uint32_t	ls_count;	/* cumulative event count */
72 	uint32_t	ls_event;	/* type of event */
73 	uintptr_t	ls_refcnt;	/* cumulative reference count */
74 	uint64_t	ls_time;	/* cumulative event duration */
75 	uint32_t	ls_hist[64];	/* log2(duration) histogram */
76 	uintptr_t	ls_stack[LS_MAX_STACK_DEPTH];
77 } lsrec_t;
78 
79 typedef struct lsdata {
80 	struct lsrec	*lsd_next;	/* next available */
81 	int		lsd_count;	/* number of records */
82 } lsdata_t;
83 
84 /*
85  * Definitions for the types of experiments which can be run.  They are
86  * listed in increasing order of memory cost and processing time cost.
87  * The numerical value of each type is the number of bytes needed per record.
88  */
89 #define	LS_BASIC	offsetof(lsrec_t, ls_time)
90 #define	LS_TIME		offsetof(lsrec_t, ls_hist[0])
91 #define	LS_HIST		offsetof(lsrec_t, ls_stack[0])
92 #define	LS_STACK(depth)	offsetof(lsrec_t, ls_stack[depth])
93 
94 static void report_stats(FILE *, lsrec_t **, size_t, uint64_t, uint64_t);
95 static void report_trace(FILE *, lsrec_t **);
96 
97 extern int symtab_init(void);
98 extern char *addr_to_sym(uintptr_t, uintptr_t *, size_t *);
99 extern uintptr_t sym_to_addr(char *name);
100 extern size_t sym_size(char *name);
101 extern char *strtok_r(char *, const char *, char **);
102 
103 #define	DEFAULT_NRECS	10000
104 #define	DEFAULT_HZ	97
105 #define	MAX_HZ		1000
106 #define	MIN_AGGSIZE	(16 * 1024)
107 #define	MAX_AGGSIZE	(32 * 1024 * 1024)
108 
109 static int g_stkdepth;
110 static int g_topn = INT_MAX;
111 static hrtime_t g_elapsed;
112 static int g_rates = 0;
113 static int g_pflag = 0;
114 static int g_Pflag = 0;
115 static int g_wflag = 0;
116 static int g_Wflag = 0;
117 static int g_cflag = 0;
118 static int g_kflag = 0;
119 static int g_gflag = 0;
120 static int g_Vflag = 0;
121 static int g_tracing = 0;
122 static size_t g_recsize;
123 static size_t g_nrecs;
124 static int g_nrecs_used;
125 static uchar_t g_enabled[LS_MAX_EVENTS];
126 static hrtime_t g_min_duration[LS_MAX_EVENTS];
127 static dtrace_hdl_t *g_dtp;
128 static char *g_predicate;
129 static char *g_ipredicate;
130 static char *g_prog;
131 static int g_proglen;
132 static int g_dropped;
133 
134 typedef struct ls_event_info {
135 	char	ev_type;
136 	char	ev_lhdr[20];
137 	char	ev_desc[80];
138 	char	ev_units[10];
139 	char	ev_name[DTRACE_NAMELEN];
140 	char	*ev_predicate;
141 	char	*ev_acquire;
142 } ls_event_info_t;
143 
144 static ls_event_info_t g_event_info[LS_MAX_EVENTS] = {
145 	{ 'C',	"Lock",	"Adaptive mutex spin",			"nsec",
146 	    "lockstat:::adaptive-spin" },
147 	{ 'C',	"Lock",	"Adaptive mutex block",			"nsec",
148 	    "lockstat:::adaptive-block" },
149 	{ 'C',	"Lock",	"Spin lock spin",			"nsec",
150 	    "lockstat:::spin-spin" },
151 	{ 'C',	"Lock",	"Thread lock spin",			"nsec",
152 	    "lockstat:::thread-spin" },
153 	{ 'C',	"Lock",	"R/W writer blocked by writer",		"nsec",
154 	    "lockstat:::rw-block", "arg2 == 0 && arg3 == 1" },
155 	{ 'C',	"Lock",	"R/W writer blocked by readers",	"nsec",
156 	    "lockstat:::rw-block", "arg2 == 0 && arg3 == 0 && arg4" },
157 	{ 'C',	"Lock",	"R/W reader blocked by writer",		"nsec",
158 	    "lockstat:::rw-block", "arg2 != 0 && arg3 == 1" },
159 	{ 'C',	"Lock",	"R/W reader blocked by write wanted",	"nsec",
160 	    "lockstat:::rw-block", "arg2 != 0 && arg3 == 0 && arg4" },
161 	{ 'C',	"Lock",	"Unknown event (type 8)",		"units"	},
162 	{ 'C',	"Lock",	"Unknown event (type 9)",		"units"	},
163 	{ 'C',	"Lock",	"Unknown event (type 10)",		"units"	},
164 	{ 'C',	"Lock",	"Unknown event (type 11)",		"units"	},
165 	{ 'C',	"Lock",	"Unknown event (type 12)",		"units"	},
166 	{ 'C',	"Lock",	"Unknown event (type 13)",		"units"	},
167 	{ 'C',	"Lock",	"Unknown event (type 14)",		"units"	},
168 	{ 'C',	"Lock",	"Unknown event (type 15)",		"units"	},
169 	{ 'C',	"Lock",	"Unknown event (type 16)",		"units"	},
170 	{ 'C',	"Lock",	"Unknown event (type 17)",		"units"	},
171 	{ 'C',	"Lock",	"Unknown event (type 18)",		"units"	},
172 	{ 'C',	"Lock",	"Unknown event (type 19)",		"units"	},
173 	{ 'C',	"Lock",	"Unknown event (type 20)",		"units"	},
174 	{ 'C',	"Lock",	"Unknown event (type 21)",		"units"	},
175 	{ 'C',	"Lock",	"Unknown event (type 22)",		"units"	},
176 	{ 'C',	"Lock",	"Unknown event (type 23)",		"units"	},
177 	{ 'C',	"Lock",	"Unknown event (type 24)",		"units"	},
178 	{ 'C',	"Lock",	"Unknown event (type 25)",		"units"	},
179 	{ 'C',	"Lock",	"Unknown event (type 26)",		"units"	},
180 	{ 'C',	"Lock",	"Unknown event (type 27)",		"units"	},
181 	{ 'C',	"Lock",	"Unknown event (type 28)",		"units"	},
182 	{ 'C',	"Lock",	"Unknown event (type 29)",		"units"	},
183 	{ 'C',	"Lock",	"Unknown event (type 30)",		"units"	},
184 	{ 'C',	"Lock",	"Unknown event (type 31)",		"units"	},
185 	{ 'H',	"Lock",	"Adaptive mutex hold",			"nsec",
186 	    "lockstat:::adaptive-release", NULL,
187 	    "lockstat:::adaptive-acquire" },
188 	{ 'H',	"Lock",	"Spin lock hold",			"nsec",
189 	    "lockstat:::spin-release", NULL,
190 	    "lockstat:::spin-acquire" },
191 	{ 'H',	"Lock",	"R/W writer hold",			"nsec",
192 	    "lockstat:::rw-release", "arg1 == 0",
193 	    "lockstat:::rw-acquire" },
194 	{ 'H',	"Lock",	"R/W reader hold",			"nsec",
195 	    "lockstat:::rw-release", "arg1 != 0",
196 	    "lockstat:::rw-acquire" },
197 	{ 'H',	"Lock",	"Unknown event (type 36)",		"units"	},
198 	{ 'H',	"Lock",	"Unknown event (type 37)",		"units"	},
199 	{ 'H',	"Lock",	"Unknown event (type 38)",		"units"	},
200 	{ 'H',	"Lock",	"Unknown event (type 39)",		"units"	},
201 	{ 'H',	"Lock",	"Unknown event (type 40)",		"units"	},
202 	{ 'H',	"Lock",	"Unknown event (type 41)",		"units"	},
203 	{ 'H',	"Lock",	"Unknown event (type 42)",		"units"	},
204 	{ 'H',	"Lock",	"Unknown event (type 43)",		"units"	},
205 	{ 'H',	"Lock",	"Unknown event (type 44)",		"units"	},
206 	{ 'H',	"Lock",	"Unknown event (type 45)",		"units"	},
207 	{ 'H',	"Lock",	"Unknown event (type 46)",		"units"	},
208 	{ 'H',	"Lock",	"Unknown event (type 47)",		"units"	},
209 	{ 'H',	"Lock",	"Unknown event (type 48)",		"units"	},
210 	{ 'H',	"Lock",	"Unknown event (type 49)",		"units"	},
211 	{ 'H',	"Lock",	"Unknown event (type 50)",		"units"	},
212 	{ 'H',	"Lock",	"Unknown event (type 51)",		"units"	},
213 	{ 'H',	"Lock",	"Unknown event (type 52)",		"units"	},
214 	{ 'H',	"Lock",	"Unknown event (type 53)",		"units"	},
215 	{ 'H',	"Lock",	"Unknown event (type 54)",		"units"	},
216 	{ 'H',	"Lock",	"Unknown event (type 55)",		"units"	},
217 #if defined(sun)
218 	{ 'I',	"CPU+PIL", "Profiling interrupt",		"nsec",
219 #else
220 	/* FreeBSD */
221 	{ 'I',	"CPU+Pri_Class", "Profiling interrupt",		"nsec",
222 #endif
223 	    "profile:::profile-97", NULL },
224 	{ 'I',	"Lock",	"Unknown event (type 57)",		"units"	},
225 	{ 'I',	"Lock",	"Unknown event (type 58)",		"units"	},
226 	{ 'I',	"Lock",	"Unknown event (type 59)",		"units"	},
227 	{ 'E',	"Lock",	"Recursive lock entry detected",	"(N/A)",
228 	    "lockstat:::rw-release", NULL, "lockstat:::rw-acquire" },
229 	{ 'E',	"Lock",	"Lockstat enter failure",		"(N/A)"	},
230 	{ 'E',	"Lock",	"Lockstat exit failure",		"nsec"	},
231 	{ 'E',	"Lock",	"Lockstat record failure",		"(N/A)"	},
232 };
233 
234 #if !defined(sun)
235 static char *g_pri_class[] = {
236 	"",
237 	"Intr",
238 	"RealT",
239 	"TShar",
240 	"Idle"
241 };
242 #endif
243 
244 static void
245 fail(int do_perror, const char *message, ...)
246 {
247 	va_list args;
248 	int save_errno = errno;
249 
250 	va_start(args, message);
251 	(void) fprintf(stderr, "lockstat: ");
252 	(void) vfprintf(stderr, message, args);
253 	va_end(args);
254 	if (do_perror)
255 		(void) fprintf(stderr, ": %s", strerror(save_errno));
256 	(void) fprintf(stderr, "\n");
257 	exit(2);
258 }
259 
260 static void
261 dfail(const char *message, ...)
262 {
263 	va_list args;
264 
265 	va_start(args, message);
266 	(void) fprintf(stderr, "lockstat: ");
267 	(void) vfprintf(stderr, message, args);
268 	va_end(args);
269 	(void) fprintf(stderr, ": %s\n",
270 	    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
271 
272 	exit(2);
273 }
274 
275 static void
276 show_events(char event_type, char *desc)
277 {
278 	int i, first = -1, last;
279 
280 	for (i = 0; i < LS_MAX_EVENTS; i++) {
281 		ls_event_info_t *evp = &g_event_info[i];
282 		if (evp->ev_type != event_type ||
283 		    strncmp(evp->ev_desc, "Unknown event", 13) == 0)
284 			continue;
285 		if (first == -1)
286 			first = i;
287 		last = i;
288 	}
289 
290 	(void) fprintf(stderr,
291 	    "\n%s events (lockstat -%c or lockstat -e %d-%d):\n\n",
292 	    desc, event_type, first, last);
293 
294 	for (i = first; i <= last; i++)
295 		(void) fprintf(stderr,
296 		    "%4d = %s\n", i, g_event_info[i].ev_desc);
297 }
298 
299 static void
300 usage(void)
301 {
302 	(void) fprintf(stderr,
303 	    "Usage: lockstat [options] command [args]\n"
304 	    "\nEvent selection options:\n\n"
305 	    "  -C              watch contention events [on by default]\n"
306 	    "  -E              watch error events [off by default]\n"
307 	    "  -H              watch hold events [off by default]\n"
308 	    "  -I              watch interrupt events [off by default]\n"
309 	    "  -A              watch all lock events [equivalent to -CH]\n"
310 	    "  -e event_list   only watch the specified events (shown below);\n"
311 	    "                  <event_list> is a comma-separated list of\n"
312 	    "                  events or ranges of events, e.g. 1,4-7,35\n"
313 	    "  -i rate         interrupt rate for -I [default: %d Hz]\n"
314 	    "\nData gathering options:\n\n"
315 	    "  -b              basic statistics (lock, caller, event count)\n"
316 	    "  -t              timing for all events [default]\n"
317 	    "  -h              histograms for event times\n"
318 	    "  -s depth        stack traces <depth> deep\n"
319 	    "  -x opt[=val]    enable or modify DTrace options\n"
320 	    "\nData filtering options:\n\n"
321 	    "  -n nrecords     maximum number of data records [default: %d]\n"
322 	    "  -l lock[,size]  only watch <lock>, which can be specified as a\n"
323 	    "                  symbolic name or hex address; <size> defaults\n"
324 	    "                  to the ELF symbol size if available, 1 if not\n"
325 	    "  -f func[,size]  only watch events generated by <func>\n"
326 	    "  -d duration     only watch events longer than <duration>\n"
327 	    "  -T              trace (rather than sample) events\n"
328 	    "\nData reporting options:\n\n"
329 	    "  -c              coalesce lock data for arrays like pse_mutex[]\n"
330 	    "  -k              coalesce PCs within functions\n"
331 	    "  -g              show total events generated by function\n"
332 	    "  -w              wherever: don't distinguish events by caller\n"
333 	    "  -W              whichever: don't distinguish events by lock\n"
334 	    "  -R              display rates rather than counts\n"
335 	    "  -p              parsable output format (awk(1)-friendly)\n"
336 	    "  -P              sort lock data by (count * avg_time) product\n"
337 	    "  -D n            only display top <n> events of each type\n"
338 	    "  -o filename     send output to <filename>\n",
339 	    DEFAULT_HZ, DEFAULT_NRECS);
340 
341 	show_events('C', "Contention");
342 	show_events('H', "Hold-time");
343 	show_events('I', "Interrupt");
344 	show_events('E', "Error");
345 	(void) fprintf(stderr, "\n");
346 
347 	exit(1);
348 }
349 
350 static int
351 lockcmp(lsrec_t *a, lsrec_t *b)
352 {
353 	int i;
354 
355 	if (a->ls_event < b->ls_event)
356 		return (-1);
357 	if (a->ls_event > b->ls_event)
358 		return (1);
359 
360 	for (i = g_stkdepth - 1; i >= 0; i--) {
361 		if (a->ls_stack[i] < b->ls_stack[i])
362 			return (-1);
363 		if (a->ls_stack[i] > b->ls_stack[i])
364 			return (1);
365 	}
366 
367 	if (a->ls_caller < b->ls_caller)
368 		return (-1);
369 	if (a->ls_caller > b->ls_caller)
370 		return (1);
371 
372 	if (a->ls_lock < b->ls_lock)
373 		return (-1);
374 	if (a->ls_lock > b->ls_lock)
375 		return (1);
376 
377 	return (0);
378 }
379 
380 static int
381 countcmp(lsrec_t *a, lsrec_t *b)
382 {
383 	if (a->ls_event < b->ls_event)
384 		return (-1);
385 	if (a->ls_event > b->ls_event)
386 		return (1);
387 
388 	return (b->ls_count - a->ls_count);
389 }
390 
391 static int
392 timecmp(lsrec_t *a, lsrec_t *b)
393 {
394 	if (a->ls_event < b->ls_event)
395 		return (-1);
396 	if (a->ls_event > b->ls_event)
397 		return (1);
398 
399 	if (a->ls_time < b->ls_time)
400 		return (1);
401 	if (a->ls_time > b->ls_time)
402 		return (-1);
403 
404 	return (0);
405 }
406 
407 static int
408 lockcmp_anywhere(lsrec_t *a, lsrec_t *b)
409 {
410 	if (a->ls_event < b->ls_event)
411 		return (-1);
412 	if (a->ls_event > b->ls_event)
413 		return (1);
414 
415 	if (a->ls_lock < b->ls_lock)
416 		return (-1);
417 	if (a->ls_lock > b->ls_lock)
418 		return (1);
419 
420 	return (0);
421 }
422 
423 static int
424 lock_and_count_cmp_anywhere(lsrec_t *a, lsrec_t *b)
425 {
426 	if (a->ls_event < b->ls_event)
427 		return (-1);
428 	if (a->ls_event > b->ls_event)
429 		return (1);
430 
431 	if (a->ls_lock < b->ls_lock)
432 		return (-1);
433 	if (a->ls_lock > b->ls_lock)
434 		return (1);
435 
436 	return (b->ls_count - a->ls_count);
437 }
438 
439 static int
440 sitecmp_anylock(lsrec_t *a, lsrec_t *b)
441 {
442 	int i;
443 
444 	if (a->ls_event < b->ls_event)
445 		return (-1);
446 	if (a->ls_event > b->ls_event)
447 		return (1);
448 
449 	for (i = g_stkdepth - 1; i >= 0; i--) {
450 		if (a->ls_stack[i] < b->ls_stack[i])
451 			return (-1);
452 		if (a->ls_stack[i] > b->ls_stack[i])
453 			return (1);
454 	}
455 
456 	if (a->ls_caller < b->ls_caller)
457 		return (-1);
458 	if (a->ls_caller > b->ls_caller)
459 		return (1);
460 
461 	return (0);
462 }
463 
464 static int
465 site_and_count_cmp_anylock(lsrec_t *a, lsrec_t *b)
466 {
467 	int i;
468 
469 	if (a->ls_event < b->ls_event)
470 		return (-1);
471 	if (a->ls_event > b->ls_event)
472 		return (1);
473 
474 	for (i = g_stkdepth - 1; i >= 0; i--) {
475 		if (a->ls_stack[i] < b->ls_stack[i])
476 			return (-1);
477 		if (a->ls_stack[i] > b->ls_stack[i])
478 			return (1);
479 	}
480 
481 	if (a->ls_caller < b->ls_caller)
482 		return (-1);
483 	if (a->ls_caller > b->ls_caller)
484 		return (1);
485 
486 	return (b->ls_count - a->ls_count);
487 }
488 
489 static void
490 lsmergesort(int (*cmp)(lsrec_t *, lsrec_t *), lsrec_t **a, lsrec_t **b, int n)
491 {
492 	int m = n / 2;
493 	int i, j;
494 
495 	if (m > 1)
496 		lsmergesort(cmp, a, b, m);
497 	if (n - m > 1)
498 		lsmergesort(cmp, a + m, b + m, n - m);
499 	for (i = m; i > 0; i--)
500 		b[i - 1] = a[i - 1];
501 	for (j = m - 1; j < n - 1; j++)
502 		b[n + m - j - 2] = a[j + 1];
503 	while (i < j)
504 		*a++ = cmp(b[i], b[j]) < 0 ? b[i++] : b[j--];
505 	*a = b[i];
506 }
507 
508 static void
509 coalesce(int (*cmp)(lsrec_t *, lsrec_t *), lsrec_t **lock, int n)
510 {
511 	int i, j;
512 	lsrec_t *target, *current;
513 
514 	target = lock[0];
515 
516 	for (i = 1; i < n; i++) {
517 		current = lock[i];
518 		if (cmp(current, target) != 0) {
519 			target = current;
520 			continue;
521 		}
522 		current->ls_event = LS_MAX_EVENTS;
523 		target->ls_count += current->ls_count;
524 		target->ls_refcnt += current->ls_refcnt;
525 		if (g_recsize < LS_TIME)
526 			continue;
527 		target->ls_time += current->ls_time;
528 		if (g_recsize < LS_HIST)
529 			continue;
530 		for (j = 0; j < 64; j++)
531 			target->ls_hist[j] += current->ls_hist[j];
532 	}
533 }
534 
535 static void
536 coalesce_symbol(uintptr_t *addrp)
537 {
538 	uintptr_t symoff;
539 	size_t symsize;
540 
541 	if (addr_to_sym(*addrp, &symoff, &symsize) != NULL && symoff < symsize)
542 		*addrp -= symoff;
543 }
544 
545 static void
546 predicate_add(char **pred, char *what, char *cmp, uintptr_t value)
547 {
548 	char *new;
549 	int len, newlen;
550 
551 	if (what == NULL)
552 		return;
553 
554 	if (*pred == NULL) {
555 		*pred = malloc(1);
556 		*pred[0] = '\0';
557 	}
558 
559 	len = strlen(*pred);
560 	newlen = len + strlen(what) + 32 + strlen("( && )");
561 	new = malloc(newlen);
562 
563 	if (*pred[0] != '\0') {
564 		if (cmp != NULL) {
565 			(void) sprintf(new, "(%s) && (%s %s 0x%p)",
566 			    *pred, what, cmp, (void *)value);
567 		} else {
568 			(void) sprintf(new, "(%s) && (%s)", *pred, what);
569 		}
570 	} else {
571 		if (cmp != NULL) {
572 			(void) sprintf(new, "%s %s 0x%p",
573 			    what, cmp, (void *)value);
574 		} else {
575 			(void) sprintf(new, "%s", what);
576 		}
577 	}
578 
579 	free(*pred);
580 	*pred = new;
581 }
582 
583 static void
584 predicate_destroy(char **pred)
585 {
586 	free(*pred);
587 	*pred = NULL;
588 }
589 
590 static void
591 filter_add(char **filt, char *what, uintptr_t base, uintptr_t size)
592 {
593 	char buf[256], *c = buf, *new;
594 	int len, newlen;
595 
596 	if (*filt == NULL) {
597 		*filt = malloc(1);
598 		*filt[0] = '\0';
599 	}
600 
601 #if defined(sun)
602 	(void) sprintf(c, "%s(%s >= 0x%p && %s < 0x%p)", *filt[0] != '\0' ?
603 	    " || " : "", what, (void *)base, what, (void *)(base + size));
604 #else
605 	(void) sprintf(c, "%s(%s >= %p && %s < %p)", *filt[0] != '\0' ?
606 	    " || " : "", what, (void *)base, what, (void *)(base + size));
607 #endif
608 
609 	newlen = (len = strlen(*filt) + 1) + strlen(c);
610 	new = malloc(newlen);
611 	bcopy(*filt, new, len);
612 	(void) strcat(new, c);
613 	free(*filt);
614 	*filt = new;
615 }
616 
617 static void
618 filter_destroy(char **filt)
619 {
620 	free(*filt);
621 	*filt = NULL;
622 }
623 
624 static void
625 dprog_add(const char *fmt, ...)
626 {
627 	va_list args;
628 	int size, offs;
629 	char c;
630 
631 	va_start(args, fmt);
632 	size = vsnprintf(&c, 1, fmt, args) + 1;
633 	va_end(args);
634 
635 	if (g_proglen == 0) {
636 		offs = 0;
637 	} else {
638 		offs = g_proglen - 1;
639 	}
640 
641 	g_proglen = offs + size;
642 
643 	if ((g_prog = realloc(g_prog, g_proglen)) == NULL)
644 		fail(1, "failed to reallocate program text");
645 
646 	va_start(args, fmt);
647 	(void) vsnprintf(&g_prog[offs], size, fmt, args);
648 	va_end(args);
649 }
650 
651 /*
652  * This function may read like an open sewer, but keep in mind that programs
653  * that generate other programs are rarely pretty.  If one has the unenviable
654  * task of maintaining or -- worse -- extending this code, use the -V option
655  * to examine the D program as generated by this function.
656  */
657 static void
658 dprog_addevent(int event)
659 {
660 	ls_event_info_t *info = &g_event_info[event];
661 	char *pred = NULL;
662 	char stack[20];
663 	const char *arg0, *caller;
664 	char *arg1 = "arg1";
665 	char buf[80];
666 	hrtime_t dur;
667 	int depth;
668 
669 	if (info->ev_name[0] == '\0')
670 		return;
671 
672 	if (info->ev_type == 'I') {
673 		/*
674 		 * For interrupt events, arg0 (normally the lock pointer) is
675 		 * the CPU address plus the current pil, and arg1 (normally
676 		 * the number of nanoseconds) is the number of nanoseconds
677 		 * late -- and it's stored in arg2.
678 		 */
679 #if defined(sun)
680 		arg0 = "(uintptr_t)curthread->t_cpu + \n"
681 		    "\t    curthread->t_cpu->cpu_profile_pil";
682 #else
683 		arg0 = "(uintptr_t)(curthread->td_oncpu << 16) + \n"
684 		    "\t    0x01000000 + curthread->td_pri_class";
685 #endif
686 		caller = "(uintptr_t)arg0";
687 		arg1 = "arg2";
688 	} else {
689 		arg0 = "(uintptr_t)arg0";
690 		caller = "caller";
691 	}
692 
693 	if (g_recsize > LS_HIST) {
694 		for (depth = 0; g_recsize > LS_STACK(depth); depth++)
695 			continue;
696 
697 		if (g_tracing) {
698 			(void) sprintf(stack, "\tstack(%d);\n", depth);
699 		} else {
700 			(void) sprintf(stack, ", stack(%d)", depth);
701 		}
702 	} else {
703 		(void) sprintf(stack, "");
704 	}
705 
706 	if (info->ev_acquire != NULL) {
707 		/*
708 		 * If this is a hold event, we need to generate an additional
709 		 * clause for the acquire; the clause for the release will be
710 		 * generated with the aggregating statement, below.
711 		 */
712 		dprog_add("%s\n", info->ev_acquire);
713 		predicate_add(&pred, info->ev_predicate, NULL, 0);
714 		predicate_add(&pred, g_predicate, NULL, 0);
715 		if (pred != NULL)
716 			dprog_add("/%s/\n", pred);
717 
718 		dprog_add("{\n");
719 		(void) sprintf(buf, "self->ev%d[(uintptr_t)arg0]", event);
720 
721 		if (info->ev_type == 'H') {
722 			dprog_add("\t%s = timestamp;\n", buf);
723 		} else {
724 			/*
725 			 * If this isn't a hold event, it's the recursive
726 			 * error event.  For this, we simply bump the
727 			 * thread-local, per-lock count.
728 			 */
729 			dprog_add("\t%s++;\n", buf);
730 		}
731 
732 		dprog_add("}\n\n");
733 		predicate_destroy(&pred);
734 		pred = NULL;
735 
736 		if (info->ev_type == 'E') {
737 			/*
738 			 * If this is the recursive lock error event, we need
739 			 * to generate an additional clause to decrement the
740 			 * thread-local, per-lock count.  This assures that we
741 			 * only execute the aggregating clause if we have
742 			 * recursive entry.
743 			 */
744 			dprog_add("%s\n", info->ev_name);
745 			dprog_add("/%s/\n{\n\t%s--;\n}\n\n", buf, buf);
746 		}
747 
748 		predicate_add(&pred, buf, NULL, 0);
749 
750 		if (info->ev_type == 'H') {
751 			(void) sprintf(buf, "timestamp -\n\t    "
752 			    "self->ev%d[(uintptr_t)arg0]", event);
753 		}
754 
755 		arg1 = buf;
756 	} else {
757 		predicate_add(&pred, info->ev_predicate, NULL, 0);
758 		if (info->ev_type != 'I')
759 			predicate_add(&pred, g_predicate, NULL, 0);
760 		else
761 			predicate_add(&pred, g_ipredicate, NULL, 0);
762 	}
763 
764 	if ((dur = g_min_duration[event]) != 0)
765 		predicate_add(&pred, arg1, ">=", dur);
766 
767 	dprog_add("%s\n", info->ev_name);
768 
769 	if (pred != NULL)
770 		dprog_add("/%s/\n", pred);
771 	predicate_destroy(&pred);
772 
773 	dprog_add("{\n");
774 
775 	if (g_tracing) {
776 		dprog_add("\ttrace(%dULL);\n", event);
777 		dprog_add("\ttrace(%s);\n", arg0);
778 		dprog_add("\ttrace(%s);\n", caller);
779 		dprog_add(stack);
780 	} else {
781 		/*
782 		 * The ordering here is important:  when we process the
783 		 * aggregate, we count on the fact that @avg appears before
784 		 * @hist in program order to assure that @avg is assigned the
785 		 * first aggregation variable ID and @hist assigned the
786 		 * second; see the comment in process_aggregate() for details.
787 		 */
788 		dprog_add("\t@avg[%dULL, %s, %s%s] = avg(%s);\n",
789 		    event, arg0, caller, stack, arg1);
790 
791 		if (g_recsize >= LS_HIST) {
792 			dprog_add("\t@hist[%dULL, %s, %s%s] = quantize"
793 			    "(%s);\n", event, arg0, caller, stack, arg1);
794 		}
795 	}
796 
797 	if (info->ev_acquire != NULL)
798 		dprog_add("\tself->ev%d[arg0] = 0;\n", event);
799 
800 	dprog_add("}\n\n");
801 }
802 
803 static void
804 dprog_compile()
805 {
806 	dtrace_prog_t *prog;
807 	dtrace_proginfo_t info;
808 
809 	if (g_Vflag) {
810 		(void) fprintf(stderr, "lockstat: vvvv D program vvvv\n");
811 		(void) fputs(g_prog, stderr);
812 		(void) fprintf(stderr, "lockstat: ^^^^ D program ^^^^\n");
813 	}
814 
815 	if ((prog = dtrace_program_strcompile(g_dtp, g_prog,
816 	    DTRACE_PROBESPEC_NAME, 0, 0, NULL)) == NULL)
817 		dfail("failed to compile program");
818 
819 	if (dtrace_program_exec(g_dtp, prog, &info) == -1)
820 		dfail("failed to enable probes");
821 
822 	if (dtrace_go(g_dtp) != 0)
823 		dfail("couldn't start tracing");
824 }
825 
826 static void
827 #if defined(sun)
828 status_fire(void)
829 #else
830 status_fire(int i)
831 #endif
832 {}
833 
834 static void
835 status_init(void)
836 {
837 	dtrace_optval_t val, status, agg;
838 	struct sigaction act;
839 	struct itimerspec ts;
840 	struct sigevent ev;
841 	timer_t tid;
842 
843 	if (dtrace_getopt(g_dtp, "statusrate", &status) == -1)
844 		dfail("failed to get 'statusrate'");
845 
846 	if (dtrace_getopt(g_dtp, "aggrate", &agg) == -1)
847 		dfail("failed to get 'statusrate'");
848 
849 	/*
850 	 * We would want to awaken at a rate that is the GCD of the statusrate
851 	 * and the aggrate -- but that seems a bit absurd.  Instead, we'll
852 	 * simply awaken at a rate that is the more frequent of the two, which
853 	 * assures that we're never later than the interval implied by the
854 	 * more frequent rate.
855 	 */
856 	val = status < agg ? status : agg;
857 
858 	(void) sigemptyset(&act.sa_mask);
859 	act.sa_flags = 0;
860 	act.sa_handler = status_fire;
861 	(void) sigaction(SIGUSR1, &act, NULL);
862 
863 	ev.sigev_notify = SIGEV_SIGNAL;
864 	ev.sigev_signo = SIGUSR1;
865 
866 	if (timer_create(CLOCK_REALTIME, &ev, &tid) == -1)
867 		dfail("cannot create CLOCK_REALTIME timer");
868 
869 	ts.it_value.tv_sec = val / NANOSEC;
870 	ts.it_value.tv_nsec = val % NANOSEC;
871 	ts.it_interval = ts.it_value;
872 
873 	if (timer_settime(tid, TIMER_RELTIME, &ts, NULL) == -1)
874 		dfail("cannot set time on CLOCK_REALTIME timer");
875 }
876 
877 static void
878 status_check(void)
879 {
880 	if (!g_tracing && dtrace_aggregate_snap(g_dtp) != 0)
881 		dfail("failed to snap aggregate");
882 
883 	if (dtrace_status(g_dtp) == -1)
884 		dfail("dtrace_status()");
885 }
886 
887 static void
888 lsrec_fill(lsrec_t *lsrec, const dtrace_recdesc_t *rec, int nrecs, caddr_t data)
889 {
890 	bzero(lsrec, g_recsize);
891 	lsrec->ls_count = 1;
892 
893 	if ((g_recsize > LS_HIST && nrecs < 4) || (nrecs < 3))
894 		fail(0, "truncated DTrace record");
895 
896 	if (rec->dtrd_size != sizeof (uint64_t))
897 		fail(0, "bad event size in first record");
898 
899 	/* LINTED - alignment */
900 	lsrec->ls_event = (uint32_t)*((uint64_t *)(data + rec->dtrd_offset));
901 	rec++;
902 
903 	if (rec->dtrd_size != sizeof (uintptr_t))
904 		fail(0, "bad lock address size in second record");
905 
906 	/* LINTED - alignment */
907 	lsrec->ls_lock = *((uintptr_t *)(data + rec->dtrd_offset));
908 	rec++;
909 
910 	if (rec->dtrd_size != sizeof (uintptr_t))
911 		fail(0, "bad caller size in third record");
912 
913 	/* LINTED - alignment */
914 	lsrec->ls_caller = *((uintptr_t *)(data + rec->dtrd_offset));
915 	rec++;
916 
917 	if (g_recsize > LS_HIST) {
918 		int frames, i;
919 		pc_t *stack;
920 
921 		frames = rec->dtrd_size / sizeof (pc_t);
922 		/* LINTED - alignment */
923 		stack = (pc_t *)(data + rec->dtrd_offset);
924 
925 		for (i = 1; i < frames; i++)
926 			lsrec->ls_stack[i - 1] = stack[i];
927 	}
928 }
929 
930 /*ARGSUSED*/
931 static int
932 count_aggregate(const dtrace_aggdata_t *agg, void *arg)
933 {
934 	*((size_t *)arg) += 1;
935 
936 	return (DTRACE_AGGWALK_NEXT);
937 }
938 
939 static int
940 process_aggregate(const dtrace_aggdata_t *agg, void *arg)
941 {
942 	const dtrace_aggdesc_t *aggdesc = agg->dtada_desc;
943 	caddr_t data = agg->dtada_data;
944 	lsdata_t *lsdata = arg;
945 	lsrec_t *lsrec = lsdata->lsd_next;
946 	const dtrace_recdesc_t *rec;
947 	uint64_t *avg, *quantized;
948 	int i, j;
949 
950 	assert(lsdata->lsd_count < g_nrecs);
951 
952 	/*
953 	 * Aggregation variable IDs are guaranteed to be generated in program
954 	 * order, and they are guaranteed to start from DTRACE_AGGVARIDNONE
955 	 * plus one.  As "avg" appears before "hist" in program order, we know
956 	 * that "avg" will be allocated the first aggregation variable ID, and
957 	 * "hist" will be allocated the second aggregation variable ID -- and
958 	 * we therefore use the aggregation variable ID to differentiate the
959 	 * cases.
960 	 */
961 	if (aggdesc->dtagd_varid > DTRACE_AGGVARIDNONE + 1) {
962 		/*
963 		 * If this is the histogram entry.  We'll copy the quantized
964 		 * data into lc_hist, and jump over the rest.
965 		 */
966 		rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
967 
968 		if (aggdesc->dtagd_varid != DTRACE_AGGVARIDNONE + 2)
969 			fail(0, "bad variable ID in aggregation record");
970 
971 		if (rec->dtrd_size !=
972 		    DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
973 			fail(0, "bad quantize size in aggregation record");
974 
975 		/* LINTED - alignment */
976 		quantized = (uint64_t *)(data + rec->dtrd_offset);
977 
978 		for (i = DTRACE_QUANTIZE_ZEROBUCKET, j = 0;
979 		    i < DTRACE_QUANTIZE_NBUCKETS; i++, j++)
980 			lsrec->ls_hist[j] = quantized[i];
981 
982 		goto out;
983 	}
984 
985 	lsrec_fill(lsrec, &aggdesc->dtagd_rec[1],
986 	    aggdesc->dtagd_nrecs - 1, data);
987 
988 	rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
989 
990 	if (rec->dtrd_size != 2 * sizeof (uint64_t))
991 		fail(0, "bad avg size in aggregation record");
992 
993 	/* LINTED - alignment */
994 	avg = (uint64_t *)(data + rec->dtrd_offset);
995 	lsrec->ls_count = (uint32_t)avg[0];
996 	lsrec->ls_time = (uintptr_t)avg[1];
997 
998 	if (g_recsize >= LS_HIST)
999 		return (DTRACE_AGGWALK_NEXT);
1000 
1001 out:
1002 	lsdata->lsd_next = (lsrec_t *)((uintptr_t)lsrec + g_recsize);
1003 	lsdata->lsd_count++;
1004 
1005 	return (DTRACE_AGGWALK_NEXT);
1006 }
1007 
1008 static int
1009 process_trace(const dtrace_probedata_t *pdata, void *arg)
1010 {
1011 	lsdata_t *lsdata = arg;
1012 	lsrec_t *lsrec = lsdata->lsd_next;
1013 	dtrace_eprobedesc_t *edesc = pdata->dtpda_edesc;
1014 	caddr_t data = pdata->dtpda_data;
1015 
1016 	if (lsdata->lsd_count >= g_nrecs)
1017 		return (DTRACE_CONSUME_NEXT);
1018 
1019 	lsrec_fill(lsrec, edesc->dtepd_rec, edesc->dtepd_nrecs, data);
1020 
1021 	lsdata->lsd_next = (lsrec_t *)((uintptr_t)lsrec + g_recsize);
1022 	lsdata->lsd_count++;
1023 
1024 	return (DTRACE_CONSUME_NEXT);
1025 }
1026 
1027 static int
1028 process_data(FILE *out, char *data)
1029 {
1030 	lsdata_t lsdata;
1031 
1032 	/* LINTED - alignment */
1033 	lsdata.lsd_next = (lsrec_t *)data;
1034 	lsdata.lsd_count = 0;
1035 
1036 	if (g_tracing) {
1037 		if (dtrace_consume(g_dtp, out,
1038 		    process_trace, NULL, &lsdata) != 0)
1039 			dfail("failed to consume buffer");
1040 
1041 		return (lsdata.lsd_count);
1042 	}
1043 
1044 	if (dtrace_aggregate_walk_keyvarsorted(g_dtp,
1045 	    process_aggregate, &lsdata) != 0)
1046 		dfail("failed to walk aggregate");
1047 
1048 	return (lsdata.lsd_count);
1049 }
1050 
1051 /*ARGSUSED*/
1052 static int
1053 drophandler(const dtrace_dropdata_t *data, void *arg)
1054 {
1055 	g_dropped++;
1056 	(void) fprintf(stderr, "lockstat: warning: %s", data->dtdda_msg);
1057 	return (DTRACE_HANDLE_OK);
1058 }
1059 
1060 int
1061 main(int argc, char **argv)
1062 {
1063 	char *data_buf;
1064 	lsrec_t *lsp, **current, **first, **sort_buf, **merge_buf;
1065 	FILE *out = stdout;
1066 	int c;
1067 	pid_t child;
1068 	int status;
1069 	int i, j;
1070 	hrtime_t duration;
1071 	char *addrp, *offp, *sizep, *evp, *lastp, *p;
1072 	uintptr_t addr;
1073 	size_t size, off;
1074 	int events_specified = 0;
1075 	int exec_errno = 0;
1076 	uint32_t event;
1077 	char *filt = NULL, *ifilt = NULL;
1078 	static uint64_t ev_count[LS_MAX_EVENTS + 1];
1079 	static uint64_t ev_time[LS_MAX_EVENTS + 1];
1080 	dtrace_optval_t aggsize;
1081 	char aggstr[10];
1082 	long ncpus;
1083 	int dynvar = 0;
1084 	int err;
1085 
1086 	if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
1087 		fail(0, "cannot open dtrace library: %s",
1088 		    dtrace_errmsg(NULL, err));
1089 	}
1090 
1091 	if (dtrace_handle_drop(g_dtp, &drophandler, NULL) == -1)
1092 		dfail("couldn't establish drop handler");
1093 
1094 	if (symtab_init() == -1)
1095 		fail(1, "can't load kernel symbols");
1096 
1097 	g_nrecs = DEFAULT_NRECS;
1098 
1099 	while ((c = getopt(argc, argv, LOCKSTAT_OPTSTR)) != GETOPT_EOF) {
1100 		switch (c) {
1101 		case 'b':
1102 			g_recsize = LS_BASIC;
1103 			break;
1104 
1105 		case 't':
1106 			g_recsize = LS_TIME;
1107 			break;
1108 
1109 		case 'h':
1110 			g_recsize = LS_HIST;
1111 			break;
1112 
1113 		case 's':
1114 			if (!isdigit(optarg[0]))
1115 				usage();
1116 			g_stkdepth = atoi(optarg);
1117 			if (g_stkdepth > LS_MAX_STACK_DEPTH)
1118 				fail(0, "max stack depth is %d",
1119 				    LS_MAX_STACK_DEPTH);
1120 			g_recsize = LS_STACK(g_stkdepth);
1121 			break;
1122 
1123 		case 'n':
1124 			if (!isdigit(optarg[0]))
1125 				usage();
1126 			g_nrecs = atoi(optarg);
1127 			break;
1128 
1129 		case 'd':
1130 			if (!isdigit(optarg[0]))
1131 				usage();
1132 			duration = atoll(optarg);
1133 
1134 			/*
1135 			 * XXX -- durations really should be per event
1136 			 * since the units are different, but it's hard
1137 			 * to express this nicely in the interface.
1138 			 * Not clear yet what the cleanest solution is.
1139 			 */
1140 			for (i = 0; i < LS_MAX_EVENTS; i++)
1141 				if (g_event_info[i].ev_type != 'E')
1142 					g_min_duration[i] = duration;
1143 
1144 			break;
1145 
1146 		case 'i':
1147 			if (!isdigit(optarg[0]))
1148 				usage();
1149 			i = atoi(optarg);
1150 			if (i <= 0)
1151 				usage();
1152 			if (i > MAX_HZ)
1153 				fail(0, "max interrupt rate is %d Hz", MAX_HZ);
1154 
1155 			for (j = 0; j < LS_MAX_EVENTS; j++)
1156 				if (strcmp(g_event_info[j].ev_desc,
1157 				    "Profiling interrupt") == 0)
1158 					break;
1159 
1160 			(void) sprintf(g_event_info[j].ev_name,
1161 			    "profile:::profile-%d", i);
1162 			break;
1163 
1164 		case 'l':
1165 		case 'f':
1166 			addrp = strtok(optarg, ",");
1167 			sizep = strtok(NULL, ",");
1168 			addrp = strtok(optarg, ",+");
1169 			offp = strtok(NULL, ",");
1170 
1171 			size = sizep ? strtoul(sizep, NULL, 0) : 1;
1172 			off = offp ? strtoul(offp, NULL, 0) : 0;
1173 
1174 			if (addrp[0] == '0') {
1175 				addr = strtoul(addrp, NULL, 16) + off;
1176 			} else {
1177 				addr = sym_to_addr(addrp) + off;
1178 				if (sizep == NULL)
1179 					size = sym_size(addrp) - off;
1180 				if (addr - off == 0)
1181 					fail(0, "symbol '%s' not found", addrp);
1182 				if (size == 0)
1183 					size = 1;
1184 			}
1185 
1186 
1187 			if (c == 'l') {
1188 				filter_add(&filt, "arg0", addr, size);
1189 			} else {
1190 				filter_add(&filt, "caller", addr, size);
1191 				filter_add(&ifilt, "arg0", addr, size);
1192 			}
1193 			break;
1194 
1195 		case 'e':
1196 			evp = strtok_r(optarg, ",", &lastp);
1197 			while (evp) {
1198 				int ev1, ev2;
1199 				char *evp2;
1200 
1201 				(void) strtok(evp, "-");
1202 				evp2 = strtok(NULL, "-");
1203 				ev1 = atoi(evp);
1204 				ev2 = evp2 ? atoi(evp2) : ev1;
1205 				if ((uint_t)ev1 >= LS_MAX_EVENTS ||
1206 				    (uint_t)ev2 >= LS_MAX_EVENTS || ev1 > ev2)
1207 					fail(0, "-e events out of range");
1208 				for (i = ev1; i <= ev2; i++)
1209 					g_enabled[i] = 1;
1210 				evp = strtok_r(NULL, ",", &lastp);
1211 			}
1212 			events_specified = 1;
1213 			break;
1214 
1215 		case 'c':
1216 			g_cflag = 1;
1217 			break;
1218 
1219 		case 'k':
1220 			g_kflag = 1;
1221 			break;
1222 
1223 		case 'w':
1224 			g_wflag = 1;
1225 			break;
1226 
1227 		case 'W':
1228 			g_Wflag = 1;
1229 			break;
1230 
1231 		case 'g':
1232 			g_gflag = 1;
1233 			break;
1234 
1235 		case 'C':
1236 		case 'E':
1237 		case 'H':
1238 		case 'I':
1239 			for (i = 0; i < LS_MAX_EVENTS; i++)
1240 				if (g_event_info[i].ev_type == c)
1241 					g_enabled[i] = 1;
1242 			events_specified = 1;
1243 			break;
1244 
1245 		case 'A':
1246 			for (i = 0; i < LS_MAX_EVENTS; i++)
1247 				if (strchr("CH", g_event_info[i].ev_type))
1248 					g_enabled[i] = 1;
1249 			events_specified = 1;
1250 			break;
1251 
1252 		case 'T':
1253 			g_tracing = 1;
1254 			break;
1255 
1256 		case 'D':
1257 			if (!isdigit(optarg[0]))
1258 				usage();
1259 			g_topn = atoi(optarg);
1260 			break;
1261 
1262 		case 'R':
1263 			g_rates = 1;
1264 			break;
1265 
1266 		case 'p':
1267 			g_pflag = 1;
1268 			break;
1269 
1270 		case 'P':
1271 			g_Pflag = 1;
1272 			break;
1273 
1274 		case 'o':
1275 			if ((out = fopen(optarg, "w")) == NULL)
1276 				fail(1, "error opening file");
1277 			break;
1278 
1279 		case 'V':
1280 			g_Vflag = 1;
1281 			break;
1282 
1283 		default:
1284 			if (strchr(LOCKSTAT_OPTSTR, c) == NULL)
1285 				usage();
1286 		}
1287 	}
1288 
1289 	if (filt != NULL) {
1290 		predicate_add(&g_predicate, filt, NULL, 0);
1291 		filter_destroy(&filt);
1292 	}
1293 
1294 	if (ifilt != NULL) {
1295 		predicate_add(&g_ipredicate, ifilt, NULL, 0);
1296 		filter_destroy(&ifilt);
1297 	}
1298 
1299 	if (g_recsize == 0) {
1300 		if (g_gflag) {
1301 			g_stkdepth = LS_MAX_STACK_DEPTH;
1302 			g_recsize = LS_STACK(g_stkdepth);
1303 		} else {
1304 			g_recsize = LS_TIME;
1305 		}
1306 	}
1307 
1308 	if (g_gflag && g_recsize <= LS_STACK(0))
1309 		fail(0, "'-g' requires at least '-s 1' data gathering");
1310 
1311 	/*
1312 	 * Make sure the alignment is reasonable
1313 	 */
1314 	g_recsize = -(-g_recsize & -sizeof (uint64_t));
1315 
1316 	for (i = 0; i < LS_MAX_EVENTS; i++) {
1317 		/*
1318 		 * If no events were specified, enable -C.
1319 		 */
1320 		if (!events_specified && g_event_info[i].ev_type == 'C')
1321 			g_enabled[i] = 1;
1322 	}
1323 
1324 	for (i = 0; i < LS_MAX_EVENTS; i++) {
1325 		if (!g_enabled[i])
1326 			continue;
1327 
1328 		if (g_event_info[i].ev_acquire != NULL) {
1329 			/*
1330 			 * If we've enabled a hold event, we must explicitly
1331 			 * allocate dynamic variable space.
1332 			 */
1333 			dynvar = 1;
1334 		}
1335 
1336 		dprog_addevent(i);
1337 	}
1338 
1339 	/*
1340 	 * Make sure there are remaining arguments to specify a child command
1341 	 * to execute.
1342 	 */
1343 	if (argc <= optind)
1344 		usage();
1345 
1346 	if ((ncpus = sysconf(_SC_NPROCESSORS_ONLN)) == -1)
1347 		dfail("couldn't determine number of online CPUs");
1348 
1349 	/*
1350 	 * By default, we set our data buffer size to be the number of records
1351 	 * multiplied by the size of the record, doubled to account for some
1352 	 * DTrace slop and divided by the number of CPUs.  We silently clamp
1353 	 * the aggregation size at both a minimum and a maximum to prevent
1354 	 * absurdly low or high values.
1355 	 */
1356 	if ((aggsize = (g_nrecs * g_recsize * 2) / ncpus) < MIN_AGGSIZE)
1357 		aggsize = MIN_AGGSIZE;
1358 
1359 	if (aggsize > MAX_AGGSIZE)
1360 		aggsize = MAX_AGGSIZE;
1361 
1362 	(void) sprintf(aggstr, "%lld", (long long)aggsize);
1363 
1364 	if (!g_tracing) {
1365 		if (dtrace_setopt(g_dtp, "bufsize", "4k") == -1)
1366 			dfail("failed to set 'bufsize'");
1367 
1368 		if (dtrace_setopt(g_dtp, "aggsize", aggstr) == -1)
1369 			dfail("failed to set 'aggsize'");
1370 
1371 		if (dynvar) {
1372 			/*
1373 			 * If we're using dynamic variables, we set our
1374 			 * dynamic variable size to be one megabyte per CPU,
1375 			 * with a hard-limit of 32 megabytes.  This may still
1376 			 * be too small in some cases, but it can be tuned
1377 			 * manually via -x if need be.
1378 			 */
1379 			(void) sprintf(aggstr, "%ldm", ncpus < 32 ? ncpus : 32);
1380 
1381 			if (dtrace_setopt(g_dtp, "dynvarsize", aggstr) == -1)
1382 				dfail("failed to set 'dynvarsize'");
1383 		}
1384 	} else {
1385 		if (dtrace_setopt(g_dtp, "bufsize", aggstr) == -1)
1386 			dfail("failed to set 'bufsize'");
1387 	}
1388 
1389 	if (dtrace_setopt(g_dtp, "statusrate", "10sec") == -1)
1390 		dfail("failed to set 'statusrate'");
1391 
1392 	optind = 1;
1393 	while ((c = getopt(argc, argv, LOCKSTAT_OPTSTR)) != GETOPT_EOF) {
1394 		switch (c) {
1395 		case 'x':
1396 			if ((p = strchr(optarg, '=')) != NULL)
1397 				*p++ = '\0';
1398 
1399 			if (dtrace_setopt(g_dtp, optarg, p) != 0)
1400 				dfail("failed to set -x %s", optarg);
1401 			break;
1402 		}
1403 	}
1404 
1405 	argc -= optind;
1406 	argv += optind;
1407 
1408 	dprog_compile();
1409 	status_init();
1410 
1411 	g_elapsed = -gethrtime();
1412 
1413 	/*
1414 	 * Spawn the specified command and wait for it to complete.
1415 	 */
1416 	child = fork();
1417 	if (child == -1)
1418 		fail(1, "cannot fork");
1419 	if (child == 0) {
1420 		(void) dtrace_close(g_dtp);
1421 		(void) execvp(argv[0], &argv[0]);
1422 		exec_errno = errno;
1423 		exit(127);
1424 	}
1425 
1426 #if defined(sun)
1427 	while (waitpid(child, &status, WEXITED) != child)
1428 #else
1429 	while (waitpid(child, &status, 0) != child)
1430 #endif
1431 		status_check();
1432 
1433 	g_elapsed += gethrtime();
1434 
1435 	if (WIFEXITED(status)) {
1436 		if (WEXITSTATUS(status) != 0) {
1437 			if (exec_errno != 0) {
1438 				errno = exec_errno;
1439 				fail(1, "could not execute %s", argv[0]);
1440 			}
1441 			(void) fprintf(stderr,
1442 			    "lockstat: warning: %s exited with code %d\n",
1443 			    argv[0], WEXITSTATUS(status));
1444 		}
1445 	} else {
1446 		(void) fprintf(stderr,
1447 		    "lockstat: warning: %s died on signal %d\n",
1448 		    argv[0], WTERMSIG(status));
1449 	}
1450 
1451 	if (dtrace_stop(g_dtp) == -1)
1452 		dfail("failed to stop dtrace");
1453 
1454 	/*
1455 	 * Before we read out the results, we need to allocate our buffer.
1456 	 * If we're tracing, then we'll just use the precalculated size.  If
1457 	 * we're not, then we'll take a snapshot of the aggregate, and walk
1458 	 * it to count the number of records.
1459 	 */
1460 	if (!g_tracing) {
1461 		if (dtrace_aggregate_snap(g_dtp) != 0)
1462 			dfail("failed to snap aggregate");
1463 
1464 		g_nrecs = 0;
1465 
1466 		if (dtrace_aggregate_walk(g_dtp,
1467 		    count_aggregate, &g_nrecs) != 0)
1468 			dfail("failed to walk aggregate");
1469 	}
1470 
1471 #if defined(sun)
1472 	if ((data_buf = memalign(sizeof (uint64_t),
1473 	    (g_nrecs + 1) * g_recsize)) == NULL)
1474 #else
1475 	if (posix_memalign((void **)&data_buf, sizeof (uint64_t),
1476 	    (g_nrecs + 1) * g_recsize) )
1477 #endif
1478 		fail(1, "Memory allocation failed");
1479 
1480 	/*
1481 	 * Read out the DTrace data.
1482 	 */
1483 	g_nrecs_used = process_data(out, data_buf);
1484 
1485 	if (g_nrecs_used > g_nrecs || g_dropped)
1486 		(void) fprintf(stderr, "lockstat: warning: "
1487 		    "ran out of data records (use -n for more)\n");
1488 
1489 	/* LINTED - alignment */
1490 	for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
1491 	    /* LINTED - alignment */
1492 	    lsp = (lsrec_t *)((char *)lsp + g_recsize)) {
1493 		ev_count[lsp->ls_event] += lsp->ls_count;
1494 		ev_time[lsp->ls_event] += lsp->ls_time;
1495 	}
1496 
1497 	/*
1498 	 * If -g was specified, convert stacks into individual records.
1499 	 */
1500 	if (g_gflag) {
1501 		lsrec_t *newlsp, *oldlsp;
1502 
1503 #if defined(sun)
1504 		newlsp = memalign(sizeof (uint64_t),
1505 		    g_nrecs_used * LS_TIME * (g_stkdepth + 1));
1506 #else
1507 		posix_memalign((void **)&newlsp, sizeof (uint64_t),
1508 		    g_nrecs_used * LS_TIME * (g_stkdepth + 1));
1509 #endif
1510 		if (newlsp == NULL)
1511 			fail(1, "Cannot allocate space for -g processing");
1512 		lsp = newlsp;
1513 		/* LINTED - alignment */
1514 		for (i = 0, oldlsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
1515 		    /* LINTED - alignment */
1516 		    oldlsp = (lsrec_t *)((char *)oldlsp + g_recsize)) {
1517 			int fr;
1518 			int caller_in_stack = 0;
1519 
1520 			if (oldlsp->ls_count == 0)
1521 				continue;
1522 
1523 			for (fr = 0; fr < g_stkdepth; fr++) {
1524 				if (oldlsp->ls_stack[fr] == 0)
1525 					break;
1526 				if (oldlsp->ls_stack[fr] == oldlsp->ls_caller)
1527 					caller_in_stack = 1;
1528 				bcopy(oldlsp, lsp, LS_TIME);
1529 				lsp->ls_caller = oldlsp->ls_stack[fr];
1530 				/* LINTED - alignment */
1531 				lsp = (lsrec_t *)((char *)lsp + LS_TIME);
1532 			}
1533 			if (!caller_in_stack) {
1534 				bcopy(oldlsp, lsp, LS_TIME);
1535 				/* LINTED - alignment */
1536 				lsp = (lsrec_t *)((char *)lsp + LS_TIME);
1537 			}
1538 		}
1539 		g_nrecs = g_nrecs_used =
1540 		    ((uintptr_t)lsp - (uintptr_t)newlsp) / LS_TIME;
1541 		g_recsize = LS_TIME;
1542 		g_stkdepth = 0;
1543 		free(data_buf);
1544 		data_buf = (char *)newlsp;
1545 	}
1546 
1547 	if ((sort_buf = calloc(2 * (g_nrecs + 1),
1548 	    sizeof (void *))) == NULL)
1549 		fail(1, "Sort buffer allocation failed");
1550 	merge_buf = sort_buf + (g_nrecs + 1);
1551 
1552 	/*
1553 	 * Build the sort buffer, discarding zero-count records along the way.
1554 	 */
1555 	/* LINTED - alignment */
1556 	for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
1557 	    /* LINTED - alignment */
1558 	    lsp = (lsrec_t *)((char *)lsp + g_recsize)) {
1559 		if (lsp->ls_count == 0)
1560 			lsp->ls_event = LS_MAX_EVENTS;
1561 		sort_buf[i] = lsp;
1562 	}
1563 
1564 	if (g_nrecs_used == 0)
1565 		exit(0);
1566 
1567 	/*
1568 	 * Add a sentinel after the last record
1569 	 */
1570 	sort_buf[i] = lsp;
1571 	lsp->ls_event = LS_MAX_EVENTS;
1572 
1573 	if (g_tracing) {
1574 		report_trace(out, sort_buf);
1575 		return (0);
1576 	}
1577 
1578 	/*
1579 	 * Application of -g may have resulted in multiple records
1580 	 * with the same signature; coalesce them.
1581 	 */
1582 	if (g_gflag) {
1583 		mergesort(lockcmp, sort_buf, merge_buf, g_nrecs_used);
1584 		coalesce(lockcmp, sort_buf, g_nrecs_used);
1585 	}
1586 
1587 	/*
1588 	 * Coalesce locks within the same symbol if -c option specified.
1589 	 * Coalesce PCs within the same function if -k option specified.
1590 	 */
1591 	if (g_cflag || g_kflag) {
1592 		for (i = 0; i < g_nrecs_used; i++) {
1593 			int fr;
1594 			lsp = sort_buf[i];
1595 			if (g_cflag)
1596 				coalesce_symbol(&lsp->ls_lock);
1597 			if (g_kflag) {
1598 				for (fr = 0; fr < g_stkdepth; fr++)
1599 					coalesce_symbol(&lsp->ls_stack[fr]);
1600 				coalesce_symbol(&lsp->ls_caller);
1601 			}
1602 		}
1603 		mergesort(lockcmp, sort_buf, merge_buf, g_nrecs_used);
1604 		coalesce(lockcmp, sort_buf, g_nrecs_used);
1605 	}
1606 
1607 	/*
1608 	 * Coalesce callers if -w option specified
1609 	 */
1610 	if (g_wflag) {
1611 		mergesort(lock_and_count_cmp_anywhere,
1612 		    sort_buf, merge_buf, g_nrecs_used);
1613 		coalesce(lockcmp_anywhere, sort_buf, g_nrecs_used);
1614 	}
1615 
1616 	/*
1617 	 * Coalesce locks if -W option specified
1618 	 */
1619 	if (g_Wflag) {
1620 		mergesort(site_and_count_cmp_anylock,
1621 		    sort_buf, merge_buf, g_nrecs_used);
1622 		coalesce(sitecmp_anylock, sort_buf, g_nrecs_used);
1623 	}
1624 
1625 	/*
1626 	 * Sort data by contention count (ls_count) or total time (ls_time),
1627 	 * depending on g_Pflag.  Override g_Pflag if time wasn't measured.
1628 	 */
1629 	if (g_recsize < LS_TIME)
1630 		g_Pflag = 0;
1631 
1632 	if (g_Pflag)
1633 		mergesort(timecmp, sort_buf, merge_buf, g_nrecs_used);
1634 	else
1635 		mergesort(countcmp, sort_buf, merge_buf, g_nrecs_used);
1636 
1637 	/*
1638 	 * Display data by event type
1639 	 */
1640 	first = &sort_buf[0];
1641 	while ((event = (*first)->ls_event) < LS_MAX_EVENTS) {
1642 		current = first;
1643 		while ((lsp = *current)->ls_event == event)
1644 			current++;
1645 		report_stats(out, first, current - first, ev_count[event],
1646 		    ev_time[event]);
1647 		first = current;
1648 	}
1649 
1650 	return (0);
1651 }
1652 
1653 static char *
1654 format_symbol(char *buf, uintptr_t addr, int show_size)
1655 {
1656 	uintptr_t symoff;
1657 	char *symname;
1658 	size_t symsize;
1659 
1660 	symname = addr_to_sym(addr, &symoff, &symsize);
1661 
1662 	if (show_size && symoff == 0)
1663 		(void) sprintf(buf, "%s[%ld]", symname, (long)symsize);
1664 	else if (symoff == 0)
1665 		(void) sprintf(buf, "%s", symname);
1666 	else if (symoff < 16 && bcmp(symname, "cpu[", 4) == 0)	/* CPU+PIL */
1667 #if defined(sun)
1668 		(void) sprintf(buf, "%s+%ld", symname, (long)symoff);
1669 #else
1670 		(void) sprintf(buf, "%s+%s", symname, g_pri_class[(int)symoff]);
1671 #endif
1672 	else if (symoff <= symsize || (symoff < 256 && addr != symoff))
1673 		(void) sprintf(buf, "%s+0x%llx", symname,
1674 		    (unsigned long long)symoff);
1675 	else
1676 		(void) sprintf(buf, "0x%llx", (unsigned long long)addr);
1677 	return (buf);
1678 }
1679 
1680 static void
1681 report_stats(FILE *out, lsrec_t **sort_buf, size_t nrecs, uint64_t total_count,
1682 	uint64_t total_time)
1683 {
1684 	uint32_t event = sort_buf[0]->ls_event;
1685 	lsrec_t *lsp;
1686 	double ptotal = 0.0;
1687 	double percent;
1688 	int i, j, fr;
1689 	int displayed;
1690 	int first_bin, last_bin, max_bin_count, total_bin_count;
1691 	int rectype;
1692 	char buf[256];
1693 	char lhdr[80], chdr[80];
1694 
1695 	rectype = g_recsize;
1696 
1697 	if (g_topn == 0) {
1698 		(void) fprintf(out, "%20llu %s\n",
1699 		    g_rates == 0 ? total_count :
1700 		    ((unsigned long long)total_count * NANOSEC) / g_elapsed,
1701 		    g_event_info[event].ev_desc);
1702 		return;
1703 	}
1704 
1705 	(void) sprintf(lhdr, "%s%s",
1706 	    g_Wflag ? "Hottest " : "", g_event_info[event].ev_lhdr);
1707 	(void) sprintf(chdr, "%s%s",
1708 	    g_wflag ? "Hottest " : "", "Caller");
1709 
1710 	if (!g_pflag)
1711 		(void) fprintf(out,
1712 		    "\n%s: %.0f events in %.3f seconds (%.0f events/sec)\n\n",
1713 		    g_event_info[event].ev_desc, (double)total_count,
1714 		    (double)g_elapsed / NANOSEC,
1715 		    (double)total_count * NANOSEC / g_elapsed);
1716 
1717 	if (!g_pflag && rectype < LS_HIST) {
1718 		(void) sprintf(buf, "%s", g_event_info[event].ev_units);
1719 		(void) fprintf(out, "%5s %4s %4s %4s %8s %-22s %-24s\n",
1720 		    g_rates ? "ops/s" : "Count",
1721 		    g_gflag ? "genr" : "indv",
1722 		    "cuml", "rcnt", rectype >= LS_TIME ? buf : "", lhdr, chdr);
1723 		(void) fprintf(out, "---------------------------------"
1724 		    "----------------------------------------------\n");
1725 	}
1726 
1727 	displayed = 0;
1728 	for (i = 0; i < nrecs; i++) {
1729 		lsp = sort_buf[i];
1730 
1731 		if (displayed++ >= g_topn)
1732 			break;
1733 
1734 		if (g_pflag) {
1735 			int j;
1736 
1737 			(void) fprintf(out, "%u %u",
1738 			    lsp->ls_event, lsp->ls_count);
1739 			(void) fprintf(out, " %s",
1740 			    format_symbol(buf, lsp->ls_lock, g_cflag));
1741 			(void) fprintf(out, " %s",
1742 			    format_symbol(buf, lsp->ls_caller, 0));
1743 			(void) fprintf(out, " %f",
1744 			    (double)lsp->ls_refcnt / lsp->ls_count);
1745 			if (rectype >= LS_TIME)
1746 				(void) fprintf(out, " %llu",
1747 				    (unsigned long long)lsp->ls_time);
1748 			if (rectype >= LS_HIST) {
1749 				for (j = 0; j < 64; j++)
1750 					(void) fprintf(out, " %u",
1751 					    lsp->ls_hist[j]);
1752 			}
1753 			for (j = 0; j < LS_MAX_STACK_DEPTH; j++) {
1754 				if (rectype <= LS_STACK(j) ||
1755 				    lsp->ls_stack[j] == 0)
1756 					break;
1757 				(void) fprintf(out, " %s",
1758 				    format_symbol(buf, lsp->ls_stack[j], 0));
1759 			}
1760 			(void) fprintf(out, "\n");
1761 			continue;
1762 		}
1763 
1764 		if (rectype >= LS_HIST) {
1765 			(void) fprintf(out, "---------------------------------"
1766 			    "----------------------------------------------\n");
1767 			(void) sprintf(buf, "%s",
1768 			    g_event_info[event].ev_units);
1769 			(void) fprintf(out, "%5s %4s %4s %4s %8s %-22s %-24s\n",
1770 			    g_rates ? "ops/s" : "Count",
1771 			    g_gflag ? "genr" : "indv",
1772 			    "cuml", "rcnt", buf, lhdr, chdr);
1773 		}
1774 
1775 		if (g_Pflag && total_time != 0)
1776 			percent = (lsp->ls_time * 100.00) / total_time;
1777 		else
1778 			percent = (lsp->ls_count * 100.00) / total_count;
1779 
1780 		ptotal += percent;
1781 
1782 		if (rectype >= LS_TIME)
1783 			(void) sprintf(buf, "%llu",
1784 			    (unsigned long long)(lsp->ls_time / lsp->ls_count));
1785 		else
1786 			buf[0] = '\0';
1787 
1788 		(void) fprintf(out, "%5llu ",
1789 		    g_rates == 0 ? lsp->ls_count :
1790 		    ((uint64_t)lsp->ls_count * NANOSEC) / g_elapsed);
1791 
1792 		(void) fprintf(out, "%3.0f%% ", percent);
1793 
1794 		if (g_gflag)
1795 			(void) fprintf(out, "---- ");
1796 		else
1797 			(void) fprintf(out, "%3.0f%% ", ptotal);
1798 
1799 		(void) fprintf(out, "%4.2f %8s ",
1800 		    (double)lsp->ls_refcnt / lsp->ls_count, buf);
1801 
1802 		(void) fprintf(out, "%-22s ",
1803 		    format_symbol(buf, lsp->ls_lock, g_cflag));
1804 
1805 		(void) fprintf(out, "%-24s\n",
1806 		    format_symbol(buf, lsp->ls_caller, 0));
1807 
1808 		if (rectype < LS_HIST)
1809 			continue;
1810 
1811 		(void) fprintf(out, "\n");
1812 		(void) fprintf(out, "%10s %31s %-9s %-24s\n",
1813 		    g_event_info[event].ev_units,
1814 		    "------ Time Distribution ------",
1815 		    g_rates ? "ops/s" : "count",
1816 		    rectype > LS_STACK(0) ? "Stack" : "");
1817 
1818 		first_bin = 0;
1819 		while (lsp->ls_hist[first_bin] == 0)
1820 			first_bin++;
1821 
1822 		last_bin = 63;
1823 		while (lsp->ls_hist[last_bin] == 0)
1824 			last_bin--;
1825 
1826 		max_bin_count = 0;
1827 		total_bin_count = 0;
1828 		for (j = first_bin; j <= last_bin; j++) {
1829 			total_bin_count += lsp->ls_hist[j];
1830 			if (lsp->ls_hist[j] > max_bin_count)
1831 				max_bin_count = lsp->ls_hist[j];
1832 		}
1833 
1834 		/*
1835 		 * If we went a few frames below the caller, ignore them
1836 		 */
1837 		for (fr = 3; fr > 0; fr--)
1838 			if (lsp->ls_stack[fr] == lsp->ls_caller)
1839 				break;
1840 
1841 		for (j = first_bin; j <= last_bin; j++) {
1842 			uint_t depth = (lsp->ls_hist[j] * 30) / total_bin_count;
1843 			(void) fprintf(out, "%10llu |%s%s %-9u ",
1844 			    1ULL << j,
1845 			    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" + 30 - depth,
1846 			    "                              " + depth,
1847 			    g_rates == 0 ? lsp->ls_hist[j] :
1848 			    (uint_t)(((uint64_t)lsp->ls_hist[j] * NANOSEC) /
1849 			    g_elapsed));
1850 			if (rectype <= LS_STACK(fr) || lsp->ls_stack[fr] == 0) {
1851 				(void) fprintf(out, "\n");
1852 				continue;
1853 			}
1854 			(void) fprintf(out, "%-24s\n",
1855 			    format_symbol(buf, lsp->ls_stack[fr], 0));
1856 			fr++;
1857 		}
1858 		while (rectype > LS_STACK(fr) && lsp->ls_stack[fr] != 0) {
1859 			(void) fprintf(out, "%15s %-36s %-24s\n", "", "",
1860 			    format_symbol(buf, lsp->ls_stack[fr], 0));
1861 			fr++;
1862 		}
1863 	}
1864 
1865 	if (!g_pflag)
1866 		(void) fprintf(out, "---------------------------------"
1867 		    "----------------------------------------------\n");
1868 
1869 	(void) fflush(out);
1870 }
1871 
1872 static void
1873 report_trace(FILE *out, lsrec_t **sort_buf)
1874 {
1875 	lsrec_t *lsp;
1876 	int i, fr;
1877 	int rectype;
1878 	char buf[256], buf2[256];
1879 
1880 	rectype = g_recsize;
1881 
1882 	if (!g_pflag) {
1883 		(void) fprintf(out, "%5s  %7s  %11s  %-24s  %-24s\n",
1884 		    "Event", "Time", "Owner", "Lock", "Caller");
1885 		(void) fprintf(out, "---------------------------------"
1886 		    "----------------------------------------------\n");
1887 	}
1888 
1889 	for (i = 0; i < g_nrecs_used; i++) {
1890 
1891 		lsp = sort_buf[i];
1892 
1893 		if (lsp->ls_event >= LS_MAX_EVENTS || lsp->ls_count == 0)
1894 			continue;
1895 
1896 		(void) fprintf(out, "%2d  %10llu  %11p  %-24s  %-24s\n",
1897 		    lsp->ls_event, (unsigned long long)lsp->ls_time,
1898 		    (void *)lsp->ls_next,
1899 		    format_symbol(buf, lsp->ls_lock, 0),
1900 		    format_symbol(buf2, lsp->ls_caller, 0));
1901 
1902 		if (rectype <= LS_STACK(0))
1903 			continue;
1904 
1905 		/*
1906 		 * If we went a few frames below the caller, ignore them
1907 		 */
1908 		for (fr = 3; fr > 0; fr--)
1909 			if (lsp->ls_stack[fr] == lsp->ls_caller)
1910 				break;
1911 
1912 		while (rectype > LS_STACK(fr) && lsp->ls_stack[fr] != 0) {
1913 			(void) fprintf(out, "%53s  %-24s\n", "",
1914 			    format_symbol(buf, lsp->ls_stack[fr], 0));
1915 			fr++;
1916 		}
1917 		(void) fprintf(out, "\n");
1918 	}
1919 
1920 	(void) fflush(out);
1921 }
1922