xref: /titanic_50/usr/src/cmd/arcwatch/arcwatch.c (revision 87aafc05e247a75cc8434e694c3b98c74a1287f0)
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <unistd.h>
4 #include <sys/types.h>
5 #include <sys/stat.h>
6 #include <sys/wait.h>
7 #include <fcntl.h>
8 #include <sys/fs/zfs.h>
9 #include <sys/zfs_ioctl.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <dtrace.h>
13 #include <assert.h>
14 #include <sys/avl.h>
15 #include <sys/arc.h>
16 #include <stddef.h>
17 #include <pthread.h>
18 
19 #define ARCWATCH_READ_MAGIC "awrd"
20 #define ARCWATCH_READ_VERSION 1
21 
22 typedef struct arc_read_hdr {
23 	char		arh_magic[4];
24 	uint32_t	arh_version;
25 } arc_read_hdr_t;
26 
27 typedef struct arc_read {
28 	uint64_t	ar_objset;
29 	uint64_t	ar_object;
30 	uint64_t	ar_level;
31 	uint64_t	ar_blkid;
32 	uint64_t	ar_size;
33 	uint64_t	ar_type;
34 	uint64_t	ar_dva0;
35 	uint64_t	ar_dva1;
36 	uint64_t	ar_birth;
37 	uint64_t	ar_spa;
38 } arc_read_t;
39 
40 #define ARNS_IN_L1_CACHE	1
41 #define ARNS_IN_L2_CACHE	2
42 typedef struct arc_read_node {
43 	arc_read_t	arn_ar;
44 	avl_node_t	arn_node;
45 	uint64_t	arn_color;
46 	uint64_t	arn_state;
47 	uint64_t	arn_flags;
48 } arc_read_node_t;
49 
50 #define ARCWATCH_CONTENT_MAGIC "awct"
51 #define ARCWATCH_CONTENT_VERSION 1
52 
53 typedef struct arc_content_hdr {
54 	char		ach_magic[4];
55 	uint32_t	ach_version;
56 	uint64_t	ach_buckets;
57 	uint64_t	ach_buf_locks;
58 } arc_content_hdr_t;
59 
60 static const char *
61 state2str(arc_info_state_t state)
62 {
63 	switch (state) {
64 	case AIS_ANON:		return "anon";
65 	case AIS_MRU:		return "mru";
66 	case AIS_MRU_GHOST:	return "mru_ghost";
67 	case AIS_MFU:		return "mfu";
68 	case AIS_MFU_GHOST:	return "mfu_ghost";
69 	case AIS_L2C_ONLY:	return "l2c_only";
70 	case AIS_NO_L1HDR:	return "no_l1hdr";
71 	default:
72 	case AIS_UNKNOWN:	return "unknown";
73 	}
74 }
75 
76 static int g_verbose = 0;
77 
78 static char *d_prog =
79 	"dtrace:::BEGIN\n"
80 	"{\n"
81 	"        trackedpid[pid] = 0;\n"
82 	"        self->child = 0;\n"
83 	"        OPT_follow = 1;\n"
84 	"}\n"
85 	"syscall::fork*:entry\n"
86 	"/OPT_follow && (pid == $target || self->child)/\n"
87 	"{\n"
88 	"        trackedpid[pid] = 1;\n"
89 	"}\n"
90 	"syscall::fork*:return\n"
91 	"/OPT_follow && trackedpid[ppid]/\n"
92 	"{\n"
93 	"        self->child = 1;\n"
94 	"}\n"
95 	"fbt::dbuf_hold_impl:entry\n"
96 	"/pid == $target || self->child/\n"
97 	"{\n"
98 	"        self->dbp = args[6];\n"
99 	"        self->type = args[0]->dn_type;\n"
100 	"}\n"
101 	"fbt::dbuf_hold_impl:return\n"
102 	"/self->dbp && (*self->dbp)->db_state == 4/\n"
103 	"{\n"
104 	"        this->db = *self->dbp;\n"
105 	"        this->os = this->db->db_objset;\n"
106 	"        this->hdr = this->db->db_buf ? this->db->db_buf->b_hdr : 0;\n"
107 	"        trace(this->os->os_dsl_dataset ?\n"
108 	"              this->os->os_dsl_dataset->ds_object : 0);\n"
109 	"        trace(this->db->db.db_object);\n"
110 	"        trace(this->db->db_level);\n"
111 	"        trace(this->db->db_blkid);\n"
112 	"        trace(this->db->db.db_size);\n"
113 	"        trace(self->type);\n"
114 	"        trace(this->hdr ? this->hdr->b_dva.dva_word[0] : 0);\n"
115 	"        trace(this->hdr ? this->hdr->b_dva.dva_word[1] : 0);\n"
116 	"        trace(this->hdr ? this->hdr->b_birth : 0);\n"
117 	"        trace(this->hdr ? this->hdr->b_spa : 0);\n"
118 	"        self->dbp = 0;\n"
119 	"        self->type = 0;\n"
120 	"}\n";
121 
122 static int
123 awr_cmp(const void *x, const void *y)
124 {
125 	const arc_read_node_t *a = x;
126 	const arc_read_node_t *b = y;
127 
128 	if (a->arn_ar.ar_spa < b->arn_ar.ar_spa)
129 		return -1;
130 	if (a->arn_ar.ar_spa > b->arn_ar.ar_spa)
131 		return 1;
132 	if (a->arn_ar.ar_dva0 < b->arn_ar.ar_dva0)
133 		return -1;
134 	if (a->arn_ar.ar_dva0 > b->arn_ar.ar_dva0)
135 		return 1;
136 	if (a->arn_ar.ar_dva1 < b->arn_ar.ar_dva1)
137 		return -1;
138 	if (a->arn_ar.ar_dva1 > b->arn_ar.ar_dva1)
139 		return 1;
140 	if (a->arn_ar.ar_birth < b->arn_ar.ar_birth)
141 		return -1;
142 	if (a->arn_ar.ar_birth > b->arn_ar.ar_birth)
143 		return 1;
144 	return 0;
145 }
146 
147 static int
148 drophandler(const dtrace_dropdata_t *data, void *arg)
149 {
150 	fprintf(stderr, "type %d drops %lld\n", data->dtdda_kind, data->dtdda_drops);
151 	fprintf(stderr, "dtrace drops encountered. Try increasing buffers.\n");
152 	exit(1);
153 }
154 
155 static void
156 prochandler(struct ps_prochandle *P, const char *msg, void *arg)
157 {
158 	int *proc_done = arg;
159 
160 	*proc_done = 1;
161 }
162 
163 static uint64_t
164 get_val(caddr_t base, dtrace_recdesc_t *rec)
165 {
166 	uint64_t val = 0;
167 
168 	assert(rec->dtrd_action == DTRACEACT_DIFEXPR);
169 	assert(rec->dtrd_size > 0);
170 	assert(rec->dtrd_size <= 8);
171 
172 	memcpy(&val, base + rec->dtrd_offset, rec->dtrd_size);
173 
174 	return val;
175 }
176 
177 typedef struct trace_args {
178 	int		ofd;
179 	avl_tree_t	*awr;
180 	pthread_mutex_t	mtx;
181 	int		ptr;
182 	char		buf[16384];
183 } trace_args_t;
184 
185 static int
186 process_trace(const dtrace_probedata_t *data, void *arg)
187 {
188 	dtrace_eprobedesc_t *edesc = data->dtpda_edesc;
189 	caddr_t base = data->dtpda_data;
190 	dtrace_recdesc_t *rec = edesc->dtepd_rec;
191 	trace_args_t *ta = arg;
192 	arc_read_t ar;
193 	int ret;
194 
195 	assert(edesc->dtepd_nrecs == 15);
196 
197 	ar.ar_objset = get_val(base, rec + 3);
198 	ar.ar_object = get_val(base, rec + 4);
199 	ar.ar_level = get_val(base, rec + 5);
200 	ar.ar_blkid = get_val(base, rec + 6);
201 	ar.ar_size = get_val(base, rec + 7);
202 	ar.ar_type = get_val(base, rec + 8);
203 	ar.ar_dva0 = get_val(base, rec + 9);
204 	ar.ar_dva1 = get_val(base, rec + 10);
205 	ar.ar_birth = get_val(base, rec + 11);
206 	ar.ar_spa = get_val(base, rec + 12);
207 
208 	if (ta->ofd != -1) {
209 		pthread_mutex_lock(&ta->mtx);
210 		if (ta->ptr + sizeof(ar) > sizeof(ta->buf)) {
211 			ret = write(ta->ofd, ta->buf, ta->ptr);
212 			if (ret == -1) {
213 				fprintf(stderr,
214 					"cannot write to output file: %s\n",
215 					strerror(errno));
216 				exit(1);
217 			}
218 			ta->ptr = 0;
219 		}
220 		memcpy(ta->buf + ta->ptr, &ar, sizeof(ar));
221 		ta->ptr += sizeof(ar);
222 		pthread_mutex_unlock(&ta->mtx);
223 	}
224 
225 	if (ta->awr) {
226 		arc_read_node_t *arn;
227 
228 		arn = calloc(sizeof(*arn), 1);
229 		assert(arn);
230 		arn->arn_ar = ar;
231 		pthread_mutex_lock(&ta->mtx);
232 		if (avl_find(ta->awr, arn, NULL) == NULL)
233 			avl_add(ta->awr, arn);
234 		pthread_mutex_unlock(&ta->mtx);
235 	}
236 
237 	if (g_verbose) {
238 		printf("spa %llx objset %lld object %lld level %lld blkid "
239 		    "%lld size %lld type %lld dva %16x:%16x birth %lld\n",
240 		    ar.ar_spa, ar.ar_objset, ar.ar_object, ar.ar_level,
241 		    ar.ar_blkid, ar.ar_size, ar.ar_type,
242 		    ar.ar_dva0, ar.ar_dva1, ar.ar_birth);
243 	}
244 
245 	return (DTRACE_CONSUME_NEXT);
246 }
247 
248 static void
249 d_fatal(dtrace_hdl_t *dtp, char *msg)
250 {
251 	fprintf(stderr, "%s: %s\n", msg, dtrace_errmsg(dtp, dtrace_errno(dtp)));
252 	exit(1);
253 }
254 
255 static int
256 run_dtrace(char *bufsize, char *out_fn, avl_tree_t *awr, int argc, char **argv)
257 {
258 	dtrace_prog_t *dp;
259 	dtrace_hdl_t *dtp;
260 	dtrace_proginfo_t info;
261 	struct ps_prochandle *p;
262 	int err;
263 	int proc_done = 0;
264 	int done = 0;
265 	int ofd = -1;
266 	arc_read_hdr_t arh = { 0 };
267 	trace_args_t ta = { 0 };
268 
269 	ta.ofd = -1;
270 	ta.awr = awr;
271 	pthread_mutex_init(&ta.mtx, NULL);
272 
273 	if (out_fn) {
274 		ofd = open(out_fn, O_CREAT | O_TRUNC | O_WRONLY, 0644);
275 		if (ofd == -1) {
276 			printf("cannot open output file %s: %s\n",
277 			    out_fn, strerror(errno));
278 			exit(1);
279 		}
280 		memcpy(arh.arh_magic, ARCWATCH_READ_MAGIC,
281 		    sizeof(arh.arh_magic));
282 		arh.arh_version = ARCWATCH_READ_VERSION;
283 		err = write(ofd, &arh, sizeof(arh));
284 		if (err == -1) {
285 			printf("cannot write to output file: %s\n",
286 			    strerror(errno));
287 			exit(1);
288 		}
289 		ta.ofd = ofd;
290 	}
291 
292 	dtp = dtrace_open(DTRACE_VERSION, 0, &err);
293 	if (dtp == NULL) {
294 		printf("cannot open dtrace library: %s\n",
295 		    dtrace_errmsg(NULL, err));
296 		exit(1);
297 	}
298 
299 	if (dtrace_handle_drop(dtp, &drophandler, NULL) == -1)
300 		d_fatal(dtp, "couldn't establish drop handler");
301 
302 	if (dtrace_handle_proc(dtp, &prochandler, &proc_done) == -1)
303 		d_fatal(dtp, "failed to establish proc handler");
304 
305 	if (dtrace_setopt(dtp, "bufsize", bufsize) == -1)
306 		d_fatal(dtp, "failed to set bufsize");
307 
308 	/* XXX TODO understand dynvar drops */
309 	if (dtrace_setopt(dtp, "dynvarsize", "4m") == -1)
310 		d_fatal(dtp, "failed to set dynvarsize");
311 
312 	if (dtrace_setopt(dtp, "temporal", "no") == -1)
313 		d_fatal(dtp, "failed to set temporal");
314 
315 	if (dtrace_setopt(dtp, "switchrate", "100hz") == -1)
316 		d_fatal(dtp, "failed to set switchrate");
317 
318 	if (dtrace_setopt(dtp, "cleanrate", "100hz") == -1)
319 		d_fatal(dtp, "failed to set cleanrate");
320 
321 	p = dtrace_proc_create(dtp, argv[0], &argv[0]);
322 	if (p == NULL)
323 		d_fatal(dtp, "creating process failed");
324 
325 	dp = dtrace_program_strcompile(dtp, d_prog, DTRACE_PROBESPEC_NAME, 0,
326 	    0, NULL);
327 	if (dp == NULL)
328 		d_fatal(dtp, "failed to compile program");
329 
330 	if (dtrace_program_exec(dtp, dp, &info) == -1)
331 		d_fatal(dtp, "failed to enable probes");
332 
333 	if (dtrace_go(dtp))
334 		d_fatal(dtp, "couldn't start tracing");
335 
336 	(void) dtrace_proc_continue(dtp, p);
337 
338 	do {
339 		dtrace_sleep(dtp);
340 
341 		if (proc_done) {
342 			done = 1;
343 			(void) dtrace_stop(dtp);
344 		}
345 
346 		err = dtrace_work(dtp, stdout, process_trace, NULL, &ta);
347 		if (err == DTRACE_WORKSTATUS_DONE)
348 			done = 1;
349 	} while (!done);
350 
351 	if (ta.ptr > 0) {
352 		err = write(ta.ofd, ta.buf, ta.ptr);
353 		if (err == -1) {
354 			fprintf(stderr,
355 				"cannot write to output file: %s\n",
356 				strerror(errno));
357 			exit(1);
358 		}
359 	}
360 	(void) dtrace_close(dtp);
361 	if (ofd != -1)
362 		close(ofd);
363 
364 	return (0);
365 }
366 
367 static void
368 read_awr(avl_tree_t *awr, char *in_fn)
369 {
370 	int fd;
371 	int ret;
372 	arc_read_hdr_t arh;
373 	char buf[1000 * sizeof(arc_read_t)];
374 	int blen = 0;
375 	int ptr = 0;
376 
377 	fd = open(in_fn, O_RDONLY);
378 	if (fd == -1) {
379 		fprintf(stderr, "failed to open input: %s\n",
380 			strerror(errno));
381 		exit(1);
382 	}
383 	ret = read(fd, &arh, sizeof(arh));
384 	if (ret == -1) {
385 		fprintf(stderr, "failed to read input: %s\n",
386 			strerror(errno));
387 		exit(1);
388 	}
389 	if (ret != sizeof(arh)) {
390 		fprintf(stderr, "failed to read input: truncated file\n");
391 		exit(1);
392 	}
393 	if (memcmp(arh.arh_magic, ARCWATCH_READ_MAGIC, 4) != 0) {
394 		fprintf(stderr, "failed to read input: bad file magic\n");
395 		exit(1);
396 	}
397 	if (arh.arh_version != ARCWATCH_READ_VERSION) {
398 		fprintf(stderr, "failed to read input: bad file version\n");
399 		exit(1);
400 	}
401 	while (1) {
402 		arc_read_node_t *arn = calloc(sizeof(*arn), 1);
403 
404 		assert(arn);
405 		if (blen == ptr) {
406 			ret = read(fd, buf, sizeof(buf));
407 			if (ret == 0)
408 				break;
409 			if (ret == -1) {
410 				fprintf(stderr, "failed to read input: %s\n",
411 					strerror(errno));
412 				exit(1);
413 			}
414 			blen = ret;
415 			ptr = 0;
416 		}
417 		if ((blen - ptr) < sizeof(arn->arn_ar)) {
418 			fprintf(stderr,
419 				"failed to read input: truncated file\n");
420 			exit(1);
421 		}
422 		memcpy(&arn->arn_ar, buf + ptr, sizeof(arn->arn_ar));
423 		ptr += sizeof(arn->arn_ar);
424 
425 		if (g_verbose >= 2) {
426 			arc_read_t *ar = &arn->arn_ar;
427 
428 			printf("spa %llx objset % 8lld object % 8lld "
429 			    "level %lld blkid % 8lld size % 6lld type % 3lld "
430 			    "dva %016x:%016x birth % 8lld\n",
431 			    ar->ar_spa, ar->ar_objset, ar->ar_object,
432 			    ar->ar_level, ar->ar_blkid, ar->ar_size,
433 			    ar->ar_type, ar->ar_dva0, ar->ar_dva1,
434 			    ar->ar_birth);
435 		}
436 
437 		if (avl_find(awr, arn, NULL) == NULL)
438 			avl_add(awr, arn);
439 	}
440 	close(fd);
441 }
442 
443 static void
444 read_arc(avl_tree_t *awr, char *in_fn, uint64_t color, int just_dump)
445 {
446 	int fd;
447 	int ret;
448 	arc_content_hdr_t ach;
449 	char buf[1000 * sizeof(arc_info_t)];
450 	int ptr = 0;
451 	int blen = 0;
452 
453 	fd = open(in_fn, O_RDONLY);
454 	if (fd == -1) {
455 		fprintf(stderr, "failed to open input: %s\n",
456 			strerror(errno));
457 		exit(1);
458 	}
459 	ret = read(fd, &ach, sizeof(ach));
460 	if (ret == -1) {
461 		fprintf(stderr, "failed to read input: %s\n",
462 			strerror(errno));
463 		exit(1);
464 	}
465 	if (ret != sizeof(ach)) {
466 		fprintf(stderr, "failed to read input: truncated file\n");
467 		exit(1);
468 	}
469 	if (memcmp(ach.ach_magic, ARCWATCH_CONTENT_MAGIC, 4) != 0) {
470 		fprintf(stderr, "failed to read input: bad file magic\n");
471 		exit(1);
472 	}
473 	if (ach.ach_version != ARCWATCH_CONTENT_VERSION) {
474 		fprintf(stderr, "failed to read input: bad file version\n");
475 		exit(1);
476 	}
477 	while (1) {
478 		arc_info_t ai;
479 		arc_read_node_t search;
480 		arc_read_node_t *arn;
481 
482 		if (blen == ptr) {
483 			ret = read(fd, buf, sizeof(buf));
484 			if (ret == 0)
485 				break;
486 			if (ret == -1) {
487 				fprintf(stderr, "failed to read input: %s\n",
488 					strerror(errno));
489 				exit(1);
490 			}
491 			blen = ret;
492 			ptr = 0;
493 		}
494 		if ((blen - ptr) < sizeof(ai)) {
495 			fprintf(stderr,
496 				"failed to read input: truncated file\n");
497 			exit(1);
498 		}
499 		memcpy(&ai, buf + ptr, sizeof(ai));
500 		ptr += sizeof(ai);
501 
502 		if (just_dump) {
503 			printf("dva %016llx:%016llx birth %8d "
504 				"spa %016llx "
505 				"size % 8x flags %016x state %s\n",
506 				ai.ai_dva.dva_word[0],
507 				ai.ai_dva.dva_word[1],
508 				ai.ai_birth,
509 				ai.ai_spa,
510 				ai.ai_size,
511 				ai.ai_flags,
512 				state2str(ai.ai_state));
513 		} else {
514 			search.arn_ar.ar_spa = ai.ai_spa;
515 			search.arn_ar.ar_dva0 = ai.ai_dva.dva_word[0];
516 			search.arn_ar.ar_dva1 = ai.ai_dva.dva_word[1];
517 			search.arn_ar.ar_birth = ai.ai_birth;
518 
519 			arn = avl_find(awr, &search, NULL);
520 			if (arn) {
521 				arn->arn_color = color;
522 				arn->arn_flags = ai.ai_flags;
523 				arn->arn_state = ai.ai_state;
524 			}
525 		}
526 	}
527 	close(fd);
528 }
529 
530 #define BUFSZ 1048576	/* 1MB */
531 static void
532 get_arc(avl_tree_t *awr, uint64_t color, char *out_fn)
533 {
534 	int ret;
535 	int fd;
536 	void *buf = malloc(BUFSZ);
537 	zfs_cmd_t cmd = {0};
538 	arc_info_t *ai;
539 	arc_info_hdr_t *aih;
540 	int ofd = -1;
541 	int hdr_written = 0;
542 	char wbuf[16384];
543 	int wptr = 0;
544 
545 	fd = open("/dev/zfs", O_RDWR);
546 	if (fd == -1) {
547 		fprintf(stderr, "failed to open /dev/zfs: %s\n",
548 		    strerror(errno));
549 		exit(1);
550 	}
551 	assert(buf);
552 	cmd.zc_obj = 0;
553 	cmd.zc_nvlist_dst = (uint64_t)buf;
554 	cmd.zc_nvlist_dst_size = BUFSZ;
555 
556 	if (out_fn != NULL) {
557 		ofd = open(out_fn, O_CREAT | O_TRUNC | O_WRONLY, 0644);
558 		if (ofd == -1) {
559 			printf("cannot open output file %s: %s\n",
560 			    out_fn, strerror(errno));
561 			exit(1);
562 		}
563 	}
564 	do {
565 		int i;
566 
567 		ret = ioctl(fd, ZFS_IOC_ARC_INFO, &cmd);
568 		if (ret == -1) {
569 			printf("ioctl failed with %d=%s\n", errno,
570 			    strerror(errno));
571 			exit(1);
572 		}
573 		aih = buf;
574 		ai = buf + sizeof(aih);
575 		if (ofd != -1 && !hdr_written) {
576 			arc_content_hdr_t ach;
577 
578 			memcpy(ach.ach_magic, ARCWATCH_CONTENT_MAGIC,
579 			    sizeof(ach.ach_magic));
580 			ach.ach_version = ARCWATCH_CONTENT_VERSION;
581 			ach.ach_buckets = aih->aih_buckets;
582 			ach.ach_buf_locks = aih->aih_buf_locks;
583 			ret = write(ofd, &ach, sizeof(ach));
584 			if (ret == -1) {
585 				printf("cannot write to output file: %s\n",
586 				    strerror(errno));
587 				exit(1);
588 			}
589 			hdr_written = 1;
590 		}
591 		for (i = 0; i < aih->aih_entries; ++i) {
592 			ai = ((arc_info_t *)(aih + 1)) + i;
593 			if (g_verbose) {
594 				printf("dva %016llx:%016llx birth %7d "
595 					"spa %016llx "
596 					"size % 8d flags %016x state %s\n",
597 					ai->ai_dva.dva_word[0],
598 					ai->ai_dva.dva_word[1],
599 					ai->ai_birth,
600 					ai->ai_spa,
601 					ai->ai_size,
602 					ai->ai_flags,
603 					state2str(ai->ai_state));
604 			}
605 			if (awr) {
606 				arc_read_node_t search;
607 				arc_read_node_t *arn;
608 
609 				search.arn_ar.ar_spa = ai->ai_spa;
610 				search.arn_ar.ar_dva0 = ai->ai_dva.dva_word[0];
611 				search.arn_ar.ar_dva1 = ai->ai_dva.dva_word[1];
612 				search.arn_ar.ar_birth = ai->ai_birth;
613 
614 				arn = avl_find(awr, &search, NULL);
615 				if (arn) {
616 					arn->arn_color = color;
617 					arn->arn_flags = ai->ai_flags;
618 					arn->arn_state = ai->ai_state;
619 				}
620 			}
621 			if (ofd != -1) {
622 				if (wptr + sizeof(*ai) > sizeof(wbuf)) {
623 					ret = write(ofd, wbuf, wptr);
624 					if (ret == -1) {
625 						printf("cannot write to output "
626 							"file: %s\n",
627 						    strerror(errno));
628 						exit(1);
629 					}
630 					wptr = 0;
631 				}
632 				memcpy(wbuf + wptr, ai, sizeof(*ai));
633 				wptr += sizeof(*ai);
634 			}
635 		}
636 		cmd.zc_obj = aih->aih_next;
637 	} while (cmd.zc_obj != 0);
638 
639 	if (wptr > 0) {
640 		ret = write(ofd, wbuf, wptr);
641 		if (ret == -1) {
642 			printf("cannot write to output "
643 				"file: %s\n",
644 			    strerror(errno));
645 		}
646 		exit(1);
647 	}
648 	close(fd);
649 	close(ofd);
650 	free(buf);
651 }
652 
653 static void
654 awr_stat(avl_tree_t *awr, uint64_t color)
655 {
656 	arc_read_node_t *arn = avl_first(awr);
657 	uint64_t bufs_total = 0;
658 	uint64_t bufs_in_l1 = 0;
659 	uint64_t bufs_in_l1_ghost = 0;
660 	uint64_t bufs_in_l2 = 0;
661 	uint64_t bytes_total = 0;
662 	uint64_t bytes_in_l1 = 0;
663 	uint64_t bytes_in_l1_ghost = 0;
664 	uint64_t bytes_in_l2 = 0;
665 
666 	while (arn) {
667 		arc_read_t *ar = &arn->arn_ar;
668 		if (g_verbose) {
669 			printf("dva %016llx:%016llx birth % 8d "
670 			       "spa %016llx size % 8d ",
671 				ar->ar_dva0,
672 				ar->ar_dva1,
673 				ar->ar_birth,
674 				ar->ar_spa,
675 				ar->ar_size,
676 				arn->arn_color);
677 			if (arn->arn_color == color)
678 				printf("flags %016x state %s\n",
679 				    arn->arn_flags,
680 				    state2str(arn->arn_state));
681 			else
682 				printf("not in ARC\n");
683 		}
684 		if (arn->arn_color == color) {
685 			if (arn->arn_state == AIS_MRU ||
686 			    arn->arn_state == AIS_MFU) {
687 				++bufs_in_l1;
688 				bytes_in_l1 += ar->ar_size;
689 			} else if (arn->arn_state == AIS_MRU_GHOST ||
690 			           arn->arn_state == AIS_MFU_GHOST) {
691 				++bufs_in_l1_ghost;
692 				bytes_in_l1_ghost =+ ar->ar_size;
693 			}
694 			if (arn->arn_flags & ARC_FLAG_HAS_L2HDR) {
695 				++bufs_in_l2;
696 				bytes_in_l2 += ar->ar_size;
697 			}
698 		}
699 		++bufs_total;
700 		bytes_total += ar->ar_size;
701 		arn = AVL_NEXT(awr, arn);
702 	}
703 	if (g_verbose) {
704 		printf("\n");
705 	}
706 	printf("         |       bufs |            bytes\n");
707 	printf("---------+------------+-----------------\n");
708 	printf("   in l1 | % 10lld | %16lld\n", bufs_in_l1, bytes_in_l1);
709 	printf("l1 ghost | % 10lld | %16lld\n", bufs_in_l1_ghost,
710 	    bytes_in_l1_ghost);
711 	printf("   in l2 | % 10lld | %16lld\n", bufs_in_l2, bytes_in_l2);
712 	printf("   total | % 10lld | %16lld\n", bufs_total, bytes_total);
713 	printf("\n");
714 }
715 
716 static void
717 usage(const char *basename)
718 {
719 	(void) fprintf(stderr,
720 	    "Usage: %s -d [options]\n"
721 	    "       %s {-c | -i} [options] [command [args]]\n\n"
722 	    "\tOptions:\n"
723 	    "\t  -c           run command and record read blocks\n"
724 	    "\t  -i filename  read previously recorded output from -o instead\n"
725 	    "\t               of running a command\n"
726 	    "\t  -b bufsize   change tracing bufsize\n"
727 	    "\t  -a           dump arc\n"
728 	    "\t  -v           verbose\n"
729 	    "\t  -w           watch decay of buffers in arc\n"
730 	    "\t  -d seconds   watch interval\n"
731 	    "\t  -o filename  write output to file\n",
732 	    basename, basename);
733 	exit(1);
734 }
735 
736 /*
737  * TODO: compare 2 traces
738  * TODO: compare 2 arc infos
739  * TODO: persistent spa numbering
740  */
741 int
742 main(int argc, char **argv)
743 {
744         extern char *optarg;
745         extern int optind;
746         int c;
747 	char *bufsize = "4m";
748 	int run_cmd = 0;
749 	int watch = 0;
750 	char *basename;
751 	char *out_fn = NULL;
752 	char *in_fn = NULL;
753 	avl_tree_t awr;
754 	uint64_t color = 0;
755 	int interval = 10;
756 	int dump_arc = 0;
757 	char *arc_fn = NULL;
758 
759 	avl_create(&awr, awr_cmp, sizeof(arc_read_node_t),
760 	    offsetof(arc_read_node_t, arn_node));
761 	basename = strrchr(argv[0], '/');
762 	if (basename == NULL)
763 		basename = argv[0];
764 
765 	while ((c = getopt(argc, argv, "b:o:i:cvwhd:aI:")) != EOF) {
766 		switch(c) {
767 		case 'b':
768 			bufsize = optarg;
769 			break;
770 		case 'c':
771 			run_cmd = 1;
772 			break;
773 		case 'w':
774 			watch = 1;
775 			break;
776 		case 'v':
777 			++g_verbose;
778 			break;
779 		case 'o':
780 			out_fn = optarg;
781 			break;
782 		case 'i':
783 			in_fn = optarg;
784 			break;
785 		case 'I':
786 			arc_fn = optarg;
787 			break;
788 		case 'a':
789 			dump_arc = 1;
790 			break;
791 		case 'd':
792 			interval = atoi(optarg);
793 			break;
794 		case 'h':
795 		default:
796 			usage(basename);
797 		}
798 	}
799 
800 	if (optind != argc && !run_cmd) {
801 		fprintf(stderr, "command given without -c switch\n");
802 		exit(1);
803 	}
804 	if (dump_arc) {
805 		get_arc(NULL, 0, out_fn);
806 		exit(0);
807 	}
808 	if (arc_fn != NULL && !run_cmd && in_fn == NULL) {
809 		read_arc(&awr, arc_fn, 1, 1);
810 		exit(1);
811 	}
812 	if (arc_fn != NULL && watch) {
813 		fprintf(stderr, "-I given with -w\n");
814 		exit(1);
815 	}
816 	if (run_cmd && (in_fn != NULL)) {
817 		fprintf(stderr, "-i and -c are mutually exclusive\n");
818 		exit(1);
819 	}
820 	if (run_cmd) {
821 		if (optind == argc) {
822 			fprintf(stderr, "no command given\n");
823 			exit(1);
824 		}
825 		run_dtrace(bufsize, out_fn, &awr, argc - optind, argv + optind);
826 	}
827 	if (in_fn)
828 		read_awr(&awr, in_fn);
829 	if (watch) {
830 		while (1) {
831 			get_arc(&awr, ++color, NULL);
832 			awr_stat(&awr, color);
833 			sleep(10);
834 		}
835 	}
836 	if (arc_fn) {
837 		read_arc(&awr, arc_fn, 1, 0);
838 		awr_stat(&awr, 1);
839 	}
840 
841 	exit(0);
842 }
843