xref: /titanic_51/usr/src/cmd/arcwatch/arcwatch.c (revision 1e194cd1a618eb48f311652742895fc33026c470)
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <unistd.h>
4 #include <sys/types.h>
5 #include <sys/stat.h>
6 #include <sys/wait.h>
7 #include <fcntl.h>
8 #include <sys/fs/zfs.h>
9 #include <sys/zfs_ioctl.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <dtrace.h>
13 #include <assert.h>
14 #include <sys/avl.h>
15 #include <sys/arc.h>
16 #include <stddef.h>
17 #include <pthread.h>
18 
19 #define ARCWATCH_READ_MAGIC "awrd"
20 #define ARCWATCH_READ_VERSION 1
21 
22 typedef struct arc_read_hdr {
23 	char		arh_magic[4];
24 	uint32_t	arh_version;
25 } arc_read_hdr_t;
26 
27 typedef struct arc_read {
28 	uint64_t	ar_objset;
29 	uint64_t	ar_object;
30 	uint64_t	ar_level;
31 	uint64_t	ar_blkid;
32 	uint64_t	ar_size;
33 	uint64_t	ar_type;
34 	uint64_t	ar_dva0;
35 	uint64_t	ar_dva1;
36 	uint64_t	ar_birth;
37 	uint64_t	ar_spa;
38 } arc_read_t;
39 
40 #define ARNS_IN_L1_CACHE	1
41 #define ARNS_IN_L2_CACHE	2
42 typedef struct arc_read_node {
43 	arc_read_t	arn_ar;
44 	avl_node_t	arn_node;
45 	uint64_t	arn_color;
46 	uint64_t	arn_state;
47 	uint64_t	arn_flags;
48 } arc_read_node_t;
49 
50 #define ARCWATCH_CONTENT_MAGIC "awct"
51 #define ARCWATCH_CONTENT_VERSION 1
52 
53 typedef struct arc_content_hdr {
54 	char		ach_magic[4];
55 	uint32_t	ach_version;
56 	uint64_t	ach_buckets;
57 	uint64_t	ach_buf_locks;
58 } arc_content_hdr_t;
59 
60 static const char *
61 state2str(arc_info_state_t state)
62 {
63 	switch (state) {
64 	case AIS_ANON:		return "anon";
65 	case AIS_MRU:		return "mru";
66 	case AIS_MRU_GHOST:	return "mru_ghost";
67 	case AIS_MFU:		return "mfu";
68 	case AIS_MFU_GHOST:	return "mfu_ghost";
69 	case AIS_L2C_ONLY:	return "l2c_only";
70 	case AIS_NO_L1HDR:	return "no_l1hdr";
71 	default:
72 	case AIS_UNKNOWN:	return "unknown";
73 	}
74 }
75 
76 static int g_verbose = 0;
77 
78 static char *d_prog =
79 	"dtrace:::BEGIN\n"
80 	"{\n"
81 	"        trackedpid[pid] = 0;\n"
82 	"        self->child = 0;\n"
83 	"        OPT_follow = 1;\n"
84 	"}\n"
85 	"syscall::fork*:entry\n"
86 	"/OPT_follow && (pid == $target || self->child)/\n"
87 	"{\n"
88 	"        trackedpid[pid] = 1;\n"
89 	"}\n"
90 	"syscall::fork*:return\n"
91 	"/OPT_follow && trackedpid[ppid]/\n"
92 	"{\n"
93 	"        self->child = 1;\n"
94 	"}\n"
95 	"fbt::dbuf_hold_impl:entry\n"
96 	"/pid == $target || self->child/\n"
97 	"{\n"
98 	"        self->dbp = args[6];\n"
99 	"        self->type = args[0]->dn_type;\n"
100 	"}\n"
101 	"fbt::dbuf_hold_impl:return\n"
102 	"/self->dbp && (*self->dbp)->db_state == 4/\n"
103 	"{\n"
104 	"        this->db = *self->dbp;\n"
105 	"        this->os = this->db->db_objset;\n"
106 	"        this->hdr = this->db->db_buf ? this->db->db_buf->b_hdr : 0;\n"
107 	"        trace(this->os->os_dsl_dataset ?\n"
108 	"              this->os->os_dsl_dataset->ds_object : 0);\n"
109 	"        trace(this->db->db.db_object);\n"
110 	"        trace(this->db->db_level);\n"
111 	"        trace(this->db->db_blkid);\n"
112 	"        trace(this->db->db.db_size);\n"
113 	"        trace(self->type);\n"
114 	"        trace(this->hdr ? this->hdr->b_dva.dva_word[0] : 0);\n"
115 	"        trace(this->hdr ? this->hdr->b_dva.dva_word[1] : 0);\n"
116 	"        trace(this->hdr ? this->hdr->b_birth : 0);\n"
117 	"        trace(this->hdr ? this->hdr->b_spa : 0);\n"
118 	"        self->dbp = 0;\n"
119 	"        self->type = 0;\n"
120 	"}\n";
121 
122 static int
123 awr_cmp(const void *x, const void *y)
124 {
125 	const arc_read_node_t *a = x;
126 	const arc_read_node_t *b = y;
127 
128 	if (a->arn_ar.ar_spa < b->arn_ar.ar_spa)
129 		return -1;
130 	if (a->arn_ar.ar_spa > b->arn_ar.ar_spa)
131 		return 1;
132 	if (a->arn_ar.ar_dva0 < b->arn_ar.ar_dva0)
133 		return -1;
134 	if (a->arn_ar.ar_dva0 > b->arn_ar.ar_dva0)
135 		return 1;
136 	if (a->arn_ar.ar_dva1 < b->arn_ar.ar_dva1)
137 		return -1;
138 	if (a->arn_ar.ar_dva1 > b->arn_ar.ar_dva1)
139 		return 1;
140 	if (a->arn_ar.ar_birth < b->arn_ar.ar_birth)
141 		return -1;
142 	if (a->arn_ar.ar_birth > b->arn_ar.ar_birth)
143 		return 1;
144 	return 0;
145 }
146 
147 static int
148 drophandler(const dtrace_dropdata_t *data, void *arg)
149 {
150 	fprintf(stderr, "type %d drops %lld\n", data->dtdda_kind, data->dtdda_drops);
151 	fprintf(stderr, "dtrace drops encountered. Try increasing buffers.\n");
152 	exit(1);
153 }
154 
155 static void
156 prochandler(struct ps_prochandle *P, const char *msg, void *arg)
157 {
158 	int *proc_done = arg;
159 
160 	*proc_done = 1;
161 }
162 
163 static uint64_t
164 get_val(caddr_t base, dtrace_recdesc_t *rec)
165 {
166 	uint64_t val = 0;
167 
168 	assert(rec->dtrd_action == DTRACEACT_DIFEXPR);
169 	assert(rec->dtrd_size > 0);
170 	assert(rec->dtrd_size <= 8);
171 
172 	memcpy(&val, base + rec->dtrd_offset, rec->dtrd_size);
173 
174 	return val;
175 }
176 
177 typedef struct trace_args {
178 	int		ofd;
179 	avl_tree_t	*awr;
180 	pthread_mutex_t	mtx;
181 	int		ptr;
182 	char		buf[16384];
183 } trace_args_t;
184 
185 static int
186 process_trace(const dtrace_probedata_t *data, void *arg)
187 {
188 	dtrace_eprobedesc_t *edesc = data->dtpda_edesc;
189 	caddr_t base = data->dtpda_data;
190 	dtrace_recdesc_t *rec = edesc->dtepd_rec;
191 	trace_args_t *ta = arg;
192 	arc_read_t ar;
193 	int ret;
194 
195 	assert(edesc->dtepd_nrecs == 15);
196 
197 	ar.ar_objset = get_val(base, rec + 3);
198 	ar.ar_object = get_val(base, rec + 4);
199 	ar.ar_level = get_val(base, rec + 5);
200 	ar.ar_blkid = get_val(base, rec + 6);
201 	ar.ar_size = get_val(base, rec + 7);
202 	ar.ar_type = get_val(base, rec + 8);
203 	ar.ar_dva0 = get_val(base, rec + 9);
204 	ar.ar_dva1 = get_val(base, rec + 10);
205 	ar.ar_birth = get_val(base, rec + 11);
206 	ar.ar_spa = get_val(base, rec + 12);
207 
208 	if (ta->ofd != -1) {
209 		pthread_mutex_lock(&ta->mtx);
210 		if (ta->ptr + sizeof(ar) > sizeof(ta->buf)) {
211 			ret = write(ta->ofd, ta->buf, ta->ptr);
212 			if (ret == -1) {
213 				fprintf(stderr,
214 					"cannot write to output file: %s\n",
215 					strerror(errno));
216 				exit(1);
217 			}
218 			ta->ptr = 0;
219 		}
220 		memcpy(ta->buf + ta->ptr, &ar, sizeof(ar));
221 		ta->ptr += sizeof(ar);
222 		pthread_mutex_unlock(&ta->mtx);
223 	}
224 
225 	if (ta->awr) {
226 		arc_read_node_t *arn;
227 
228 		arn = calloc(sizeof(*arn), 1);
229 		assert(arn);
230 		arn->arn_ar = ar;
231 		pthread_mutex_lock(&ta->mtx);
232 		if (avl_find(ta->awr, arn, NULL) == NULL)
233 			avl_add(ta->awr, arn);
234 		pthread_mutex_unlock(&ta->mtx);
235 	}
236 
237 	if (g_verbose) {
238 		printf("spa %llx objset %lld object %lld level %lld blkid "
239 		    "%lld size %lld type %lld dva %16x:%16x birth %lld\n",
240 		    ar.ar_spa, ar.ar_objset, ar.ar_object, ar.ar_level,
241 		    ar.ar_blkid, ar.ar_size, ar.ar_type,
242 		    ar.ar_dva0, ar.ar_dva1, ar.ar_birth);
243 	}
244 
245 	return (DTRACE_CONSUME_NEXT);
246 }
247 
248 static void
249 d_fatal(dtrace_hdl_t *dtp, char *msg)
250 {
251 	fprintf(stderr, "%s: %s\n", msg, dtrace_errmsg(dtp, dtrace_errno(dtp)));
252 	exit(1);
253 }
254 
255 static int
256 run_dtrace(char *bufsize, char *out_fn, avl_tree_t *awr, int argc, char **argv)
257 {
258 	dtrace_prog_t *dp;
259 	dtrace_hdl_t *dtp;
260 	dtrace_proginfo_t info;
261 	struct ps_prochandle *p;
262 	int err;
263 	int proc_done = 0;
264 	int done = 0;
265 	int ofd = -1;
266 	arc_read_hdr_t arh = { 0 };
267 	trace_args_t ta = { 0 };
268 
269 	ta.ofd = -1;
270 	ta.awr = awr;
271 	pthread_mutex_init(&ta.mtx, NULL);
272 
273 	if (out_fn) {
274 		ofd = open(out_fn, O_CREAT | O_TRUNC | O_WRONLY, 0644);
275 		if (ofd == -1) {
276 			printf("cannot open output file %s: %s\n",
277 			    out_fn, strerror(errno));
278 			exit(1);
279 		}
280 		memcpy(arh.arh_magic, ARCWATCH_READ_MAGIC,
281 		    sizeof(arh.arh_magic));
282 		arh.arh_version = ARCWATCH_READ_VERSION;
283 		err = write(ofd, &arh, sizeof(arh));
284 		if (err == -1) {
285 			printf("cannot write to output file: %s\n",
286 			    strerror(errno));
287 			exit(1);
288 		}
289 		ta.ofd = ofd;
290 	}
291 
292 	dtp = dtrace_open(DTRACE_VERSION, 0, &err);
293 	if (dtp == NULL) {
294 		printf("cannot open dtrace library: %s\n",
295 		    dtrace_errmsg(NULL, err));
296 		exit(1);
297 	}
298 
299 	if (dtrace_handle_drop(dtp, &drophandler, NULL) == -1)
300 		d_fatal(dtp, "couldn't establish drop handler");
301 
302 	if (dtrace_handle_proc(dtp, &prochandler, &proc_done) == -1)
303 		d_fatal(dtp, "failed to establish proc handler");
304 
305 	if (dtrace_setopt(dtp, "bufsize", bufsize) == -1)
306 		d_fatal(dtp, "failed to set bufsize");
307 
308 	/* XXX TODO understand dynvar drops */
309 	if (dtrace_setopt(dtp, "dynvarsize", "4m") == -1)
310 		d_fatal(dtp, "failed to set dynvarsize");
311 
312 	if (dtrace_setopt(dtp, "temporal", "no") == -1)
313 		d_fatal(dtp, "failed to set temporal");
314 
315 	if (dtrace_setopt(dtp, "switchrate", "100hz") == -1)
316 		d_fatal(dtp, "failed to set switchrate");
317 
318 	if (dtrace_setopt(dtp, "cleanrate", "100hz") == -1)
319 		d_fatal(dtp, "failed to set cleanrate");
320 
321 	p = dtrace_proc_create(dtp, argv[0], &argv[0]);
322 	if (p == NULL)
323 		d_fatal(dtp, "creating process failed");
324 
325 	dp = dtrace_program_strcompile(dtp, d_prog, DTRACE_PROBESPEC_NAME, 0,
326 	    0, NULL);
327 	if (dp == NULL)
328 		d_fatal(dtp, "failed to compile program");
329 
330 	if (dtrace_program_exec(dtp, dp, &info) == -1)
331 		d_fatal(dtp, "failed to enable probes");
332 
333 	if (dtrace_go(dtp))
334 		d_fatal(dtp, "couldn't start tracing");
335 
336 	(void) dtrace_proc_continue(dtp, p);
337 
338 	do {
339 		dtrace_sleep(dtp);
340 
341 		if (proc_done) {
342 			done = 1;
343 			(void) dtrace_stop(dtp);
344 		}
345 
346 		err = dtrace_work(dtp, stdout, process_trace, NULL, &ta);
347 		if (err == DTRACE_WORKSTATUS_DONE)
348 			done = 1;
349 	} while (!done);
350 
351 	if (ta.ptr > 0) {
352 		err = write(ta.ofd, ta.buf, ta.ptr);
353 		if (err == -1) {
354 			fprintf(stderr,
355 				"cannot write to output file: %s\n",
356 				strerror(errno));
357 			exit(1);
358 		}
359 	}
360 	(void) dtrace_close(dtp);
361 	if (ofd != -1)
362 		close(ofd);
363 
364 	return (0);
365 }
366 
367 static void
368 read_awr(avl_tree_t *awr, char *in_fn)
369 {
370 	int fd;
371 	int ret;
372 	arc_read_hdr_t arh;
373 	char buf[1000 * sizeof(arc_read_t)];
374 	int blen = 0;
375 	int ptr = 0;
376 
377 	fd = open(in_fn, O_RDONLY);
378 	if (fd == -1) {
379 		fprintf(stderr, "failed to open input: %s\n",
380 			strerror(errno));
381 		exit(1);
382 	}
383 	ret = read(fd, &arh, sizeof(arh));
384 	if (ret == -1) {
385 		fprintf(stderr, "failed to read input: %s\n",
386 			strerror(errno));
387 		exit(1);
388 	}
389 	if (ret != sizeof(arh)) {
390 		fprintf(stderr, "failed to read input: truncated file\n");
391 		exit(1);
392 	}
393 	if (memcmp(arh.arh_magic, ARCWATCH_READ_MAGIC, 4) != 0) {
394 		fprintf(stderr, "failed to read input: bad file magic\n");
395 		exit(1);
396 	}
397 	if (arh.arh_version != ARCWATCH_READ_VERSION) {
398 		fprintf(stderr, "failed to read input: bad file version\n");
399 		exit(1);
400 	}
401 	while (1) {
402 		arc_read_node_t *arn = calloc(sizeof(*arn), 1);
403 
404 		assert(arn);
405 		if (blen == ptr) {
406 			ret = read(fd, buf, sizeof(buf));
407 			if (ret == 0)
408 				break;
409 			if (ret == -1) {
410 				fprintf(stderr, "failed to read input: %s\n",
411 					strerror(errno));
412 				exit(1);
413 			}
414 			blen = ret;
415 			ptr = 0;
416 		}
417 		if ((blen - ptr) < sizeof(arn->arn_ar)) {
418 			fprintf(stderr,
419 				"failed to read input: truncated file\n");
420 			exit(1);
421 		}
422 		memcpy(&arn->arn_ar, buf + ptr, sizeof(arn->arn_ar));
423 		ptr += sizeof(arn->arn_ar);
424 
425 		if (g_verbose >= 2) {
426 			arc_read_t *ar = &arn->arn_ar;
427 
428 			printf("spa %llx objset % 8lld object % 8lld "
429 			    "level %lld blkid % 8lld size % 6lld type % 3lld "
430 			    "dva %016x:%016x birth % 8lld\n",
431 			    ar->ar_spa, ar->ar_objset, ar->ar_object,
432 			    ar->ar_level, ar->ar_blkid, ar->ar_size,
433 			    ar->ar_type, ar->ar_dva0, ar->ar_dva1,
434 			    ar->ar_birth);
435 		}
436 
437 		if (avl_find(awr, arn, NULL) == NULL)
438 			avl_add(awr, arn);
439 	}
440 	close(fd);
441 }
442 
443 static void
444 read_arc(avl_tree_t *awr, char *in_fn, uint64_t color)
445 {
446 	int fd;
447 	int ret;
448 	arc_content_hdr_t ach;
449 	char buf[1000 * sizeof(arc_info_t)];
450 	int ptr = 0;
451 	int blen = 0;
452 
453 	fd = open(in_fn, O_RDONLY);
454 	if (fd == -1) {
455 		fprintf(stderr, "failed to open input: %s\n",
456 			strerror(errno));
457 		exit(1);
458 	}
459 	ret = read(fd, &ach, sizeof(ach));
460 	if (ret == -1) {
461 		fprintf(stderr, "failed to read input: %s\n",
462 			strerror(errno));
463 		exit(1);
464 	}
465 	if (ret != sizeof(ach)) {
466 		fprintf(stderr, "failed to read input: truncated file\n");
467 		exit(1);
468 	}
469 	if (memcmp(ach.ach_magic, ARCWATCH_CONTENT_MAGIC, 4) != 0) {
470 		fprintf(stderr, "failed to read input: bad file magic\n");
471 		exit(1);
472 	}
473 	if (ach.ach_version != ARCWATCH_CONTENT_VERSION) {
474 		fprintf(stderr, "failed to read input: bad file version\n");
475 		exit(1);
476 	}
477 	while (1) {
478 		arc_info_t ai;
479 		arc_read_node_t search;
480 		arc_read_node_t *arn;
481 
482 		if (blen == ptr) {
483 			ret = read(fd, buf, sizeof(buf));
484 			if (ret == 0)
485 				break;
486 			if (ret == -1) {
487 				fprintf(stderr, "failed to read input: %s\n",
488 					strerror(errno));
489 				exit(1);
490 			}
491 			blen = ret;
492 			ptr = 0;
493 		}
494 		if ((blen - ptr) < sizeof(ai)) {
495 			fprintf(stderr,
496 				"failed to read input: truncated file\n");
497 			exit(1);
498 		}
499 		memcpy(&ai, buf + ptr, sizeof(ai));
500 		ptr += sizeof(ai);
501 
502 		search.arn_ar.ar_spa = ai.ai_spa;
503 		search.arn_ar.ar_dva0 = ai.ai_dva.dva_word[0];
504 		search.arn_ar.ar_dva1 = ai.ai_dva.dva_word[1];
505 		search.arn_ar.ar_birth = ai.ai_birth;
506 
507 		arn = avl_find(awr, &search, NULL);
508 		if (arn) {
509 			arn->arn_color = color;
510 			arn->arn_flags = ai.ai_flags;
511 			arn->arn_state = ai.ai_state;
512 		}
513 	}
514 	close(fd);
515 }
516 
517 #define BUFSZ 1048576	/* 1MB */
518 static void
519 get_arc(avl_tree_t *awr, uint64_t color, char *out_fn)
520 {
521 	int ret;
522 	int fd;
523 	void *buf = malloc(BUFSZ);
524 	zfs_cmd_t cmd = {0};
525 	arc_info_t *ai;
526 	arc_info_hdr_t *aih;
527 	int ofd = -1;
528 	int hdr_written = 0;
529 	char wbuf[16384];
530 	int wptr = 0;
531 
532 	fd = open("/dev/zfs", O_RDWR);
533 	if (fd == -1) {
534 		fprintf(stderr, "failed to open /dev/zfs: %s\n",
535 		    strerror(errno));
536 		exit(1);
537 	}
538 	assert(buf);
539 	cmd.zc_obj = 0;
540 	cmd.zc_nvlist_dst = (uint64_t)buf;
541 	cmd.zc_nvlist_dst_size = BUFSZ;
542 
543 	if (out_fn != NULL) {
544 		ofd = open(out_fn, O_CREAT | O_TRUNC | O_WRONLY, 0644);
545 		if (ofd == -1) {
546 			printf("cannot open output file %s: %s\n",
547 			    out_fn, strerror(errno));
548 			exit(1);
549 		}
550 	}
551 	do {
552 		int i;
553 
554 		ret = ioctl(fd, ZFS_IOC_ARC_INFO, &cmd);
555 		if (ret == -1) {
556 			printf("ioctl failed with %d=%s\n", errno,
557 			    strerror(errno));
558 			exit(1);
559 		}
560 		aih = buf;
561 		ai = buf + sizeof(aih);
562 		if (ofd != -1 && !hdr_written) {
563 			arc_content_hdr_t ach;
564 
565 			memcpy(ach.ach_magic, ARCWATCH_CONTENT_MAGIC,
566 			    sizeof(ach.ach_magic));
567 			ach.ach_version = ARCWATCH_CONTENT_VERSION;
568 			ach.ach_buckets = aih->aih_buckets;
569 			ach.ach_buf_locks = aih->aih_buf_locks;
570 			ret = write(ofd, &ach, sizeof(ach));
571 			if (ret == -1) {
572 				printf("cannot write to output file: %s\n",
573 				    strerror(errno));
574 				exit(1);
575 			}
576 			hdr_written = 1;
577 		}
578 		for (i = 0; i < aih->aih_entries; ++i) {
579 			ai = ((arc_info_t *)(aih + 1)) + i;
580 			if (g_verbose) {
581 				printf("dva %016llx:%016llx birth %7d "
582 					"spa %016llx "
583 					"size % 8d flags %016x state %s\n",
584 					ai->ai_dva.dva_word[0],
585 					ai->ai_dva.dva_word[1],
586 					ai->ai_birth,
587 					ai->ai_spa,
588 					ai->ai_size,
589 					ai->ai_flags,
590 					state2str(ai->ai_state));
591 			}
592 			if (awr) {
593 				arc_read_node_t search;
594 				arc_read_node_t *arn;
595 
596 				search.arn_ar.ar_spa = ai->ai_spa;
597 				search.arn_ar.ar_dva0 = ai->ai_dva.dva_word[0];
598 				search.arn_ar.ar_dva1 = ai->ai_dva.dva_word[1];
599 				search.arn_ar.ar_birth = ai->ai_birth;
600 
601 				arn = avl_find(awr, &search, NULL);
602 				if (arn) {
603 					arn->arn_color = color;
604 					arn->arn_flags = ai->ai_flags;
605 					arn->arn_state = ai->ai_state;
606 				}
607 			}
608 			if (ofd != -1) {
609 				if (wptr + sizeof(*ai) > sizeof(wbuf)) {
610 					ret = write(ofd, wbuf, wptr);
611 					if (ret == -1) {
612 						printf("cannot write to output "
613 							"file: %s\n",
614 						    strerror(errno));
615 						exit(1);
616 					}
617 					wptr = 0;
618 				}
619 				memcpy(wbuf + wptr, ai, sizeof(*ai));
620 				wptr += sizeof(*ai);
621 			}
622 		}
623 		cmd.zc_obj = aih->aih_next;
624 	} while (cmd.zc_obj != 0);
625 
626 	if (wptr > 0) {
627 		ret = write(ofd, wbuf, wptr);
628 		if (ret == -1) {
629 			printf("cannot write to output "
630 				"file: %s\n",
631 			    strerror(errno));
632 		}
633 		exit(1);
634 	}
635 	close(fd);
636 	close(ofd);
637 	free(buf);
638 }
639 
640 static void
641 awr_stat(avl_tree_t *awr, uint64_t color)
642 {
643 	arc_read_node_t *arn = avl_first(awr);
644 	uint64_t bufs_total = 0;
645 	uint64_t bufs_in_l1 = 0;
646 	uint64_t bufs_in_l1_ghost = 0;
647 	uint64_t bufs_in_l2 = 0;
648 	uint64_t bytes_total = 0;
649 	uint64_t bytes_in_l1 = 0;
650 	uint64_t bytes_in_l1_ghost = 0;
651 	uint64_t bytes_in_l2 = 0;
652 
653 	while (arn) {
654 		arc_read_t *ar = &arn->arn_ar;
655 		if (g_verbose) {
656 			printf("dva %016llx:%016llx birth % 8d "
657 			       "spa %016llx size % 8d ",
658 				ar->ar_dva0,
659 				ar->ar_dva1,
660 				ar->ar_birth,
661 				ar->ar_spa,
662 				ar->ar_size,
663 				arn->arn_color);
664 			if (arn->arn_color == color)
665 				printf("flags %016x state %s\n",
666 				    arn->arn_flags,
667 				    state2str(arn->arn_state));
668 			else
669 				printf("not in ARC\n");
670 		}
671 		if (arn->arn_color == color) {
672 			if (arn->arn_state == AIS_MRU ||
673 			    arn->arn_state == AIS_MFU) {
674 				++bufs_in_l1;
675 				bytes_in_l1 += ar->ar_size;
676 			} else if (arn->arn_state == AIS_MRU_GHOST ||
677 			           arn->arn_state == AIS_MFU_GHOST) {
678 				++bufs_in_l1_ghost;
679 				bytes_in_l1_ghost =+ ar->ar_size;
680 			}
681 			if (arn->arn_flags & ARC_FLAG_HAS_L2HDR) {
682 				++bufs_in_l2;
683 				bytes_in_l2 += ar->ar_size;
684 			}
685 		}
686 		++bufs_total;
687 		bytes_total += ar->ar_size;
688 		arn = AVL_NEXT(awr, arn);
689 	}
690 	if (g_verbose) {
691 		printf("\n");
692 	}
693 	printf("         |       bufs |            bytes\n");
694 	printf("---------+------------+-----------------\n");
695 	printf("   in l1 | % 10lld | %16lld\n", bufs_in_l1, bytes_in_l1);
696 	printf("l1 ghost | % 10lld | %16lld\n", bufs_in_l1_ghost,
697 	    bytes_in_l1_ghost);
698 	printf("   in l2 | % 10lld | %16lld\n", bufs_in_l2, bytes_in_l2);
699 	printf("   total | % 10lld | %16lld\n", bufs_total, bytes_total);
700 	printf("\n");
701 }
702 
703 static void
704 usage(const char *basename)
705 {
706 	(void) fprintf(stderr,
707 	    "Usage: %s -d [options]\n"
708 	    "       %s {-c | -i} [options] [command [args]]\n\n"
709 	    "\tOptions:\n"
710 	    "\t  -c           run command and record read blocks\n"
711 	    "\t  -i filename  read previously recorded output from -o instead\n"
712 	    "\t               of running a command\n"
713 	    "\t  -b bufsize   change tracing bufsize\n"
714 	    "\t  -a           dump arc\n"
715 	    "\t  -v           verbose\n"
716 	    "\t  -w           watch decay of buffers in arc\n"
717 	    "\t  -d seconds   watch interval\n"
718 	    "\t  -o filename  write output to file\n",
719 	    basename, basename);
720 	exit(1);
721 }
722 
723 /*
724  * TODO: compare 2 traces
725  * TODO: compare 2 arc infos
726  * TODO: persistent spa numbering
727  */
728 int
729 main(int argc, char **argv)
730 {
731         extern char *optarg;
732         extern int optind;
733         int c;
734 	char *bufsize = "4m";
735 	int run_cmd = 0;
736 	int watch = 0;
737 	char *basename;
738 	char *out_fn = NULL;
739 	char *in_fn = NULL;
740 	avl_tree_t awr;
741 	uint64_t color = 0;
742 	int interval = 10;
743 	int dump_arc = 0;
744 	char *arc_fn = NULL;
745 
746 	avl_create(&awr, awr_cmp, sizeof(arc_read_node_t),
747 	    offsetof(arc_read_node_t, arn_node));
748 	basename = strrchr(argv[0], '/');
749 	if (basename == NULL)
750 		basename = argv[0];
751 
752 	while ((c = getopt(argc, argv, "b:o:i:cvwhd:aI:")) != EOF) {
753 		switch(c) {
754 		case 'b':
755 			bufsize = optarg;
756 			break;
757 		case 'c':
758 			run_cmd = 1;
759 			break;
760 		case 'w':
761 			watch = 1;
762 			break;
763 		case 'v':
764 			++g_verbose;
765 			break;
766 		case 'o':
767 			out_fn = optarg;
768 			break;
769 		case 'i':
770 			in_fn = optarg;
771 			break;
772 		case 'I':
773 			arc_fn = optarg;
774 			break;
775 		case 'a':
776 			dump_arc = 1;
777 			break;
778 		case 'd':
779 			interval = atoi(optarg);
780 			break;
781 		case 'h':
782 		default:
783 			usage(basename);
784 		}
785 	}
786 
787 	if (optind != argc && !run_cmd) {
788 		fprintf(stderr, "command given without -c switch\n");
789 		exit(1);
790 	}
791 	if (dump_arc) {
792 		get_arc(NULL, 0, out_fn);
793 		exit(0);
794 	}
795 	if (arc_fn != NULL && !run_cmd && in_fn == NULL) {
796 		fprintf(stderr, "-I given without -c and -i\n");
797 		exit(1);
798 	}
799 	if (arc_fn != NULL && watch) {
800 		fprintf(stderr, "-I given with -w\n");
801 		exit(1);
802 	}
803 	if (run_cmd && (in_fn != NULL)) {
804 		fprintf(stderr, "-i and -c are mutually exclusive\n");
805 		exit(1);
806 	}
807 	if (run_cmd) {
808 		if (optind == argc) {
809 			fprintf(stderr, "no command given\n");
810 			exit(1);
811 		}
812 		run_dtrace(bufsize, out_fn, &awr, argc - optind, argv + optind);
813 	}
814 	if (in_fn)
815 		read_awr(&awr, in_fn);
816 	if (watch) {
817 		while (1) {
818 			get_arc(&awr, ++color, NULL);
819 			awr_stat(&awr, color);
820 			sleep(10);
821 		}
822 	}
823 	if (arc_fn) {
824 		read_arc(&awr, arc_fn, 1);
825 		awr_stat(&awr, 1);
826 	}
827 
828 	exit(0);
829 }
830