xref: /illumos-gate/usr/src/cmd/mdb/common/modules/genunix/memory.c (revision 82beb6028da8d7d7f8562908ca027bd4a1cc7d37)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <mdb/mdb_param.h>
26 #include <mdb/mdb_modapi.h>
27 #include <mdb/mdb_ks.h>
28 #include <sys/types.h>
29 #include <sys/memlist.h>
30 #include <sys/swap.h>
31 #include <sys/systm.h>
32 #include <sys/thread.h>
33 #include <vm/anon.h>
34 #include <vm/as.h>
35 #include <vm/page.h>
36 #include <sys/thread.h>
37 #include <sys/swap.h>
38 #include <sys/memlist.h>
39 #include <sys/vnode.h>
40 #include <vm/seg_map.h>
41 #include <vm/seg_vn.h>
42 #if defined(__i386) || defined(__amd64)
43 #include <sys/balloon_impl.h>
44 #endif
45 
46 #include "avl.h"
47 #include "memory.h"
48 
49 /*
50  * Page walker.
51  * By default, this will walk all pages in the system.  If given an
52  * address, it will walk all pages belonging to the vnode at that
53  * address.
54  */
55 
56 /*
57  * page_walk_data
58  *
59  * pw_hashleft is set to -1 when walking a vnode's pages, and holds the
60  * number of hash locations remaining in the page hash table when
61  * walking all pages.
62  *
63  * The astute reader will notice that pw_hashloc is only used when
64  * reading all pages (to hold a pointer to our location in the page
65  * hash table), and that pw_first is only used when reading the pages
66  * belonging to a particular vnode (to hold a pointer to the first
67  * page).  While these could be combined to be a single pointer, they
68  * are left separate for clarity.
69  */
70 typedef struct page_walk_data {
71 	long		pw_hashleft;
72 	void		**pw_hashloc;
73 	uintptr_t	pw_first;
74 } page_walk_data_t;
75 
76 int
77 page_walk_init(mdb_walk_state_t *wsp)
78 {
79 	page_walk_data_t	*pwd;
80 	void	**ptr;
81 	size_t	hashsz;
82 	vnode_t	vn;
83 
84 	if (wsp->walk_addr == NULL) {
85 
86 		/*
87 		 * Walk all pages
88 		 */
89 
90 		if ((mdb_readvar(&ptr, "page_hash") == -1) ||
91 		    (mdb_readvar(&hashsz, "page_hashsz") == -1) ||
92 		    (ptr == NULL) || (hashsz == 0)) {
93 			mdb_warn("page_hash, page_hashsz not found or invalid");
94 			return (WALK_ERR);
95 		}
96 
97 		/*
98 		 * Since we are walking all pages, initialize hashleft
99 		 * to be the remaining number of entries in the page
100 		 * hash.  hashloc is set the start of the page hash
101 		 * table.  Setting the walk address to 0 indicates that
102 		 * we aren't currently following a hash chain, and that
103 		 * we need to scan the page hash table for a page.
104 		 */
105 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
106 		pwd->pw_hashleft = hashsz;
107 		pwd->pw_hashloc = ptr;
108 		wsp->walk_addr = 0;
109 	} else {
110 
111 		/*
112 		 * Walk just this vnode
113 		 */
114 
115 		if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) {
116 			mdb_warn("unable to read vnode_t at %#lx",
117 			    wsp->walk_addr);
118 			return (WALK_ERR);
119 		}
120 
121 		/*
122 		 * We set hashleft to -1 to indicate that we are
123 		 * walking a vnode, and initialize first to 0 (it is
124 		 * used to terminate the walk, so it must not be set
125 		 * until after we have walked the first page).  The
126 		 * walk address is set to the first page.
127 		 */
128 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
129 		pwd->pw_hashleft = -1;
130 		pwd->pw_first = 0;
131 
132 		wsp->walk_addr = (uintptr_t)vn.v_pages;
133 	}
134 
135 	wsp->walk_data = pwd;
136 
137 	return (WALK_NEXT);
138 }
139 
140 int
141 page_walk_step(mdb_walk_state_t *wsp)
142 {
143 	page_walk_data_t	*pwd = wsp->walk_data;
144 	page_t		page;
145 	uintptr_t	pp;
146 
147 	pp = wsp->walk_addr;
148 
149 	if (pwd->pw_hashleft < 0) {
150 
151 		/* We're walking a vnode's pages */
152 
153 		/*
154 		 * If we don't have any pages to walk, we have come
155 		 * back around to the first one (we finished), or we
156 		 * can't read the page we're looking at, we are done.
157 		 */
158 		if (pp == NULL || pp == pwd->pw_first)
159 			return (WALK_DONE);
160 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
161 			mdb_warn("unable to read page_t at %#lx", pp);
162 			return (WALK_ERR);
163 		}
164 
165 		/*
166 		 * Set the walk address to the next page, and if the
167 		 * first page hasn't been set yet (i.e. we are on the
168 		 * first page), set it.
169 		 */
170 		wsp->walk_addr = (uintptr_t)page.p_vpnext;
171 		if (pwd->pw_first == NULL)
172 			pwd->pw_first = pp;
173 
174 	} else if (pwd->pw_hashleft > 0) {
175 
176 		/* We're walking all pages */
177 
178 		/*
179 		 * If pp (the walk address) is NULL, we scan through
180 		 * the page hash table until we find a page.
181 		 */
182 		if (pp == NULL) {
183 
184 			/*
185 			 * Iterate through the page hash table until we
186 			 * find a page or reach the end.
187 			 */
188 			do {
189 				if (mdb_vread(&pp, sizeof (uintptr_t),
190 				    (uintptr_t)pwd->pw_hashloc) == -1) {
191 					mdb_warn("unable to read from %#p",
192 					    pwd->pw_hashloc);
193 					return (WALK_ERR);
194 				}
195 				pwd->pw_hashleft--;
196 				pwd->pw_hashloc++;
197 			} while (pwd->pw_hashleft && (pp == NULL));
198 
199 			/*
200 			 * We've reached the end; exit.
201 			 */
202 			if (pp == NULL)
203 				return (WALK_DONE);
204 		}
205 
206 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
207 			mdb_warn("unable to read page_t at %#lx", pp);
208 			return (WALK_ERR);
209 		}
210 
211 		/*
212 		 * Set the walk address to the next page.
213 		 */
214 		wsp->walk_addr = (uintptr_t)page.p_hash;
215 
216 	} else {
217 		/* We've finished walking all pages. */
218 		return (WALK_DONE);
219 	}
220 
221 	return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
222 }
223 
224 void
225 page_walk_fini(mdb_walk_state_t *wsp)
226 {
227 	mdb_free(wsp->walk_data, sizeof (page_walk_data_t));
228 }
229 
230 /*
231  * allpages walks all pages in the system in order they appear in
232  * the memseg structure
233  */
234 
235 #define	PAGE_BUFFER	128
236 
237 int
238 allpages_walk_init(mdb_walk_state_t *wsp)
239 {
240 	if (wsp->walk_addr != 0) {
241 		mdb_warn("allpages only supports global walks.\n");
242 		return (WALK_ERR);
243 	}
244 
245 	if (mdb_layered_walk("memseg", wsp) == -1) {
246 		mdb_warn("couldn't walk 'memseg'");
247 		return (WALK_ERR);
248 	}
249 
250 	wsp->walk_data = mdb_alloc(sizeof (page_t) * PAGE_BUFFER, UM_SLEEP);
251 	return (WALK_NEXT);
252 }
253 
254 int
255 allpages_walk_step(mdb_walk_state_t *wsp)
256 {
257 	const struct memseg *msp = wsp->walk_layer;
258 	page_t *buf = wsp->walk_data;
259 	size_t pg_read, i;
260 	size_t pg_num = msp->pages_end - msp->pages_base;
261 	const page_t *pg_addr = msp->pages;
262 
263 	while (pg_num > 0) {
264 		pg_read = MIN(pg_num, PAGE_BUFFER);
265 
266 		if (mdb_vread(buf, pg_read * sizeof (page_t),
267 		    (uintptr_t)pg_addr) == -1) {
268 			mdb_warn("can't read page_t's at %#lx", pg_addr);
269 			return (WALK_ERR);
270 		}
271 		for (i = 0; i < pg_read; i++) {
272 			int ret = wsp->walk_callback((uintptr_t)&pg_addr[i],
273 			    &buf[i], wsp->walk_cbdata);
274 
275 			if (ret != WALK_NEXT)
276 				return (ret);
277 		}
278 		pg_num -= pg_read;
279 		pg_addr += pg_read;
280 	}
281 
282 	return (WALK_NEXT);
283 }
284 
285 void
286 allpages_walk_fini(mdb_walk_state_t *wsp)
287 {
288 	mdb_free(wsp->walk_data, sizeof (page_t) * PAGE_BUFFER);
289 }
290 
291 /*
292  * Hash table + LRU queue.
293  * This table is used to cache recently read vnodes for the memstat
294  * command, to reduce the number of mdb_vread calls.  This greatly
295  * speeds the memstat command on on live, large CPU count systems.
296  */
297 
298 #define	VN_SMALL	401
299 #define	VN_LARGE	10007
300 #define	VN_HTABLE_KEY(p, hp)	((p) % ((hp)->vn_htable_buckets))
301 
302 struct vn_htable_list {
303 	uint_t vn_flag;				/* v_flag from vnode	*/
304 	uintptr_t vn_ptr;			/* pointer to vnode	*/
305 	struct vn_htable_list *vn_q_next;	/* queue next pointer	*/
306 	struct vn_htable_list *vn_q_prev;	/* queue prev pointer	*/
307 	struct vn_htable_list *vn_h_next;	/* hash table pointer	*/
308 };
309 
310 /*
311  * vn_q_first        -> points to to head of queue: the vnode that was most
312  *                      recently used
313  * vn_q_last         -> points to the oldest used vnode, and is freed once a new
314  *                      vnode is read.
315  * vn_htable         -> hash table
316  * vn_htable_buf     -> contains htable objects
317  * vn_htable_size    -> total number of items in the hash table
318  * vn_htable_buckets -> number of buckets in the hash table
319  */
320 typedef struct vn_htable {
321 	struct vn_htable_list  *vn_q_first;
322 	struct vn_htable_list  *vn_q_last;
323 	struct vn_htable_list **vn_htable;
324 	struct vn_htable_list  *vn_htable_buf;
325 	int vn_htable_size;
326 	int vn_htable_buckets;
327 } vn_htable_t;
328 
329 
330 /* allocate memory, initilize hash table and LRU queue */
331 static void
332 vn_htable_init(vn_htable_t *hp, size_t vn_size)
333 {
334 	int i;
335 	int htable_size = MAX(vn_size, VN_LARGE);
336 
337 	if ((hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
338 	    * htable_size, UM_NOSLEEP|UM_GC)) == NULL) {
339 		htable_size = VN_SMALL;
340 		hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
341 		    * htable_size, UM_SLEEP|UM_GC);
342 	}
343 
344 	hp->vn_htable = mdb_zalloc(sizeof (struct vn_htable_list *)
345 	    * htable_size, UM_SLEEP|UM_GC);
346 
347 	hp->vn_q_first  = &hp->vn_htable_buf[0];
348 	hp->vn_q_last   = &hp->vn_htable_buf[htable_size - 1];
349 	hp->vn_q_first->vn_q_next = &hp->vn_htable_buf[1];
350 	hp->vn_q_last->vn_q_prev = &hp->vn_htable_buf[htable_size - 2];
351 
352 	for (i = 1; i < (htable_size-1); i++) {
353 		hp->vn_htable_buf[i].vn_q_next = &hp->vn_htable_buf[i + 1];
354 		hp->vn_htable_buf[i].vn_q_prev = &hp->vn_htable_buf[i - 1];
355 	}
356 
357 	hp->vn_htable_size = htable_size;
358 	hp->vn_htable_buckets = htable_size;
359 }
360 
361 
362 /*
363  * Find the vnode whose address is ptr, and return its v_flag in vp->v_flag.
364  * The function tries to find needed information in the following order:
365  *
366  * 1. check if ptr is the first in queue
367  * 2. check if ptr is in hash table (if so move it to the top of queue)
368  * 3. do mdb_vread, remove last queue item from queue and hash table.
369  *    Insert new information to freed object, and put this object in to the
370  *    top of the queue.
371  */
372 static int
373 vn_get(vn_htable_t *hp, struct vnode *vp, uintptr_t ptr)
374 {
375 	int hkey;
376 	struct vn_htable_list *hent, **htmp, *q_next, *q_prev;
377 	struct vn_htable_list  *q_first = hp->vn_q_first;
378 
379 	/* 1. vnode ptr is the first in queue, just get v_flag and return */
380 	if (q_first->vn_ptr == ptr) {
381 		vp->v_flag = q_first->vn_flag;
382 
383 		return (0);
384 	}
385 
386 	/* 2. search the hash table for this ptr */
387 	hkey = VN_HTABLE_KEY(ptr, hp);
388 	hent = hp->vn_htable[hkey];
389 	while (hent && (hent->vn_ptr != ptr))
390 		hent = hent->vn_h_next;
391 
392 	/* 3. if hent is NULL, we did not find in hash table, do mdb_vread */
393 	if (hent == NULL) {
394 		struct vnode vn;
395 
396 		if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) {
397 			mdb_warn("unable to read vnode_t at %#lx", ptr);
398 			return (-1);
399 		}
400 
401 		/* we will insert read data into the last element in queue */
402 		hent = hp->vn_q_last;
403 
404 		/* remove last hp->vn_q_last object from hash table */
405 		if (hent->vn_ptr) {
406 			htmp = &hp->vn_htable[VN_HTABLE_KEY(hent->vn_ptr, hp)];
407 			while (*htmp != hent)
408 				htmp = &(*htmp)->vn_h_next;
409 			*htmp = hent->vn_h_next;
410 		}
411 
412 		/* insert data into new free object */
413 		hent->vn_ptr  = ptr;
414 		hent->vn_flag = vn.v_flag;
415 
416 		/* insert new object into hash table */
417 		hent->vn_h_next = hp->vn_htable[hkey];
418 		hp->vn_htable[hkey] = hent;
419 	}
420 
421 	/* Remove from queue. hent is not first, vn_q_prev is not NULL */
422 	q_next = hent->vn_q_next;
423 	q_prev = hent->vn_q_prev;
424 	if (q_next == NULL)
425 		hp->vn_q_last = q_prev;
426 	else
427 		q_next->vn_q_prev = q_prev;
428 	q_prev->vn_q_next = q_next;
429 
430 	/* Add to the front of queue */
431 	hent->vn_q_prev = NULL;
432 	hent->vn_q_next = q_first;
433 	q_first->vn_q_prev = hent;
434 	hp->vn_q_first = hent;
435 
436 	/* Set v_flag in vnode pointer from hent */
437 	vp->v_flag = hent->vn_flag;
438 
439 	return (0);
440 }
441 
442 /* Summary statistics of pages */
443 typedef struct memstat {
444 	struct vnode    *ms_kvp;	/* Cached address of kernel vnode */
445 	struct vnode    *ms_unused_vp;	/* Unused pages vnode pointer	  */
446 	struct vnode    *ms_zvp;	/* Cached address of zio vnode    */
447 	uint64_t	ms_kmem;	/* Pages of kernel memory	  */
448 	uint64_t	ms_zfs_data;	/* Pages of zfs data		  */
449 	uint64_t	ms_anon;	/* Pages of anonymous memory	  */
450 	uint64_t	ms_vnode;	/* Pages of named (vnode) memory  */
451 	uint64_t	ms_exec;	/* Pages of exec/library memory	  */
452 	uint64_t	ms_cachelist;	/* Pages on the cachelist (free)  */
453 	uint64_t	ms_total;	/* Pages on page hash		  */
454 	vn_htable_t	*ms_vn_htable;	/* Pointer to hash table	  */
455 	struct vnode	ms_vn;		/* vnode buffer			  */
456 } memstat_t;
457 
458 #define	MS_PP_ISKAS(pp, stats)				\
459 	((pp)->p_vnode == (stats)->ms_kvp)
460 
461 #define	MS_PP_ISZFS_DATA(pp, stats)			\
462 	(((stats)->ms_zvp != NULL) && ((pp)->p_vnode == (stats)->ms_zvp))
463 
464 /*
465  * Summarize pages by type and update stat information
466  */
467 
468 /* ARGSUSED */
469 static int
470 memstat_callback(page_t *page, page_t *pp, memstat_t *stats)
471 {
472 	struct vnode *vp = &stats->ms_vn;
473 
474 	if (pp->p_vnode == NULL || pp->p_vnode == stats->ms_unused_vp)
475 		return (WALK_NEXT);
476 	else if (MS_PP_ISKAS(pp, stats))
477 		stats->ms_kmem++;
478 	else if (MS_PP_ISZFS_DATA(pp, stats))
479 		stats->ms_zfs_data++;
480 	else if (PP_ISFREE(pp))
481 		stats->ms_cachelist++;
482 	else if (vn_get(stats->ms_vn_htable, vp, (uintptr_t)pp->p_vnode))
483 		return (WALK_ERR);
484 	else if (IS_SWAPFSVP(vp))
485 		stats->ms_anon++;
486 	else if ((vp->v_flag & VVMEXEC) != 0)
487 		stats->ms_exec++;
488 	else
489 		stats->ms_vnode++;
490 
491 	stats->ms_total++;
492 
493 	return (WALK_NEXT);
494 }
495 
496 /* ARGSUSED */
497 int
498 memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
499 {
500 	pgcnt_t total_pages, physmem;
501 	ulong_t freemem;
502 	memstat_t stats;
503 	GElf_Sym sym;
504 	vn_htable_t ht;
505 	struct vnode *kvps;
506 	uintptr_t vn_size = 0;
507 #if defined(__i386) || defined(__amd64)
508 	bln_stats_t bln_stats;
509 	ssize_t bln_size;
510 #endif
511 
512 	bzero(&stats, sizeof (memstat_t));
513 
514 	/*
515 	 * -s size, is an internal option. It specifies the size of vn_htable.
516 	 * Hash table size is set in the following order:
517 	 * If user has specified the size that is larger than VN_LARGE: try it,
518 	 * but if malloc failed default to VN_SMALL. Otherwise try VN_LARGE, if
519 	 * failed to allocate default to VN_SMALL.
520 	 * For a better efficiency of hash table it is highly recommended to
521 	 * set size to a prime number.
522 	 */
523 	if ((flags & DCMD_ADDRSPEC) || mdb_getopts(argc, argv,
524 	    's', MDB_OPT_UINTPTR, &vn_size, NULL) != argc)
525 		return (DCMD_USAGE);
526 
527 	/* Initialize vnode hash list and queue */
528 	vn_htable_init(&ht, vn_size);
529 	stats.ms_vn_htable = &ht;
530 
531 	/* Total physical memory */
532 	if (mdb_readvar(&total_pages, "total_pages") == -1) {
533 		mdb_warn("unable to read total_pages");
534 		return (DCMD_ERR);
535 	}
536 
537 	/* Artificially limited memory */
538 	if (mdb_readvar(&physmem, "physmem") == -1) {
539 		mdb_warn("unable to read physmem");
540 		return (DCMD_ERR);
541 	}
542 
543 	/* read kernel vnode array pointer */
544 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvps",
545 	    (GElf_Sym *)&sym) == -1) {
546 		mdb_warn("unable to read kvps");
547 		return (DCMD_ERR);
548 	}
549 	kvps = (struct vnode *)(uintptr_t)sym.st_value;
550 	stats.ms_kvp =  &kvps[KV_KVP];
551 
552 	/*
553 	 * Read the zio vnode pointer.
554 	 */
555 	stats.ms_zvp = &kvps[KV_ZVP];
556 
557 	/*
558 	 * If physmem != total_pages, then the administrator has limited the
559 	 * number of pages available in the system.  Excluded pages are
560 	 * associated with the unused pages vnode.  Read this vnode so the
561 	 * pages can be excluded in the page accounting.
562 	 */
563 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp",
564 	    (GElf_Sym *)&sym) == -1) {
565 		mdb_warn("unable to read unused_pages_vp");
566 		return (DCMD_ERR);
567 	}
568 	stats.ms_unused_vp = (struct vnode *)(uintptr_t)sym.st_value;
569 
570 	/* walk all pages, collect statistics */
571 	if (mdb_walk("allpages", (mdb_walk_cb_t)memstat_callback,
572 	    &stats) == -1) {
573 		mdb_warn("can't walk memseg");
574 		return (DCMD_ERR);
575 	}
576 
577 #define	MS_PCT_TOTAL(x)	((ulong_t)((((5 * total_pages) + ((x) * 1000ull))) / \
578 		((physmem) * 10)))
579 
580 	mdb_printf("Page Summary                Pages                MB"
581 	    "  %%Tot\n");
582 	mdb_printf("------------     ----------------  ----------------"
583 	    "  ----\n");
584 	mdb_printf("Kernel           %16llu  %16llu  %3lu%%\n",
585 	    stats.ms_kmem,
586 	    (uint64_t)stats.ms_kmem * PAGESIZE / (1024 * 1024),
587 	    MS_PCT_TOTAL(stats.ms_kmem));
588 
589 	if (stats.ms_zfs_data != 0)
590 		mdb_printf("ZFS File Data    %16llu  %16llu  %3lu%%\n",
591 		    stats.ms_zfs_data,
592 		    (uint64_t)stats.ms_zfs_data * PAGESIZE / (1024 * 1024),
593 		    MS_PCT_TOTAL(stats.ms_zfs_data));
594 
595 	mdb_printf("Anon             %16llu  %16llu  %3lu%%\n",
596 	    stats.ms_anon,
597 	    (uint64_t)stats.ms_anon * PAGESIZE / (1024 * 1024),
598 	    MS_PCT_TOTAL(stats.ms_anon));
599 	mdb_printf("Exec and libs    %16llu  %16llu  %3lu%%\n",
600 	    stats.ms_exec,
601 	    (uint64_t)stats.ms_exec * PAGESIZE / (1024 * 1024),
602 	    MS_PCT_TOTAL(stats.ms_exec));
603 	mdb_printf("Page cache       %16llu  %16llu  %3lu%%\n",
604 	    stats.ms_vnode,
605 	    (uint64_t)stats.ms_vnode * PAGESIZE / (1024 * 1024),
606 	    MS_PCT_TOTAL(stats.ms_vnode));
607 	mdb_printf("Free (cachelist) %16llu  %16llu  %3lu%%\n",
608 	    stats.ms_cachelist,
609 	    (uint64_t)stats.ms_cachelist * PAGESIZE / (1024 * 1024),
610 	    MS_PCT_TOTAL(stats.ms_cachelist));
611 
612 	/*
613 	 * occasionally, we double count pages above.  To avoid printing
614 	 * absurdly large values for freemem, we clamp it at zero.
615 	 */
616 	if (physmem > stats.ms_total)
617 		freemem = physmem - stats.ms_total;
618 	else
619 		freemem = 0;
620 
621 #if defined(__i386) || defined(__amd64)
622 	/* Are we running under Xen?  If so, get balloon memory usage. */
623 	if ((bln_size = mdb_readvar(&bln_stats, "bln_stats")) != -1) {
624 		if (freemem > bln_stats.bln_hv_pages)
625 			freemem -= bln_stats.bln_hv_pages;
626 		else
627 			freemem = 0;
628 	}
629 #endif
630 
631 	mdb_printf("Free (freelist)  %16lu  %16llu  %3lu%%\n", freemem,
632 	    (uint64_t)freemem * PAGESIZE / (1024 * 1024),
633 	    MS_PCT_TOTAL(freemem));
634 
635 #if defined(__i386) || defined(__amd64)
636 	if (bln_size != -1) {
637 		mdb_printf("Balloon          %16lu  %16llu  %3lu%%\n",
638 		    bln_stats.bln_hv_pages,
639 		    (uint64_t)bln_stats.bln_hv_pages * PAGESIZE / (1024 * 1024),
640 		    MS_PCT_TOTAL(bln_stats.bln_hv_pages));
641 	}
642 #endif
643 
644 	mdb_printf("\nTotal            %16lu  %16lu\n",
645 	    physmem,
646 	    (uint64_t)physmem * PAGESIZE / (1024 * 1024));
647 
648 	if (physmem != total_pages) {
649 		mdb_printf("Physical         %16lu  %16lu\n",
650 		    total_pages,
651 		    (uint64_t)total_pages * PAGESIZE / (1024 * 1024));
652 	}
653 
654 #undef MS_PCT_TOTAL
655 
656 	return (DCMD_OK);
657 }
658 
659 void
660 pagelookup_help(void)
661 {
662 	mdb_printf(
663 	    "Finds the page with name { %<b>vp%</b>, %<b>offset%</b> }.\n"
664 	    "\n"
665 	    "Can be invoked three different ways:\n\n"
666 	    "    ::pagelookup -v %<b>vp%</b> -o %<b>offset%</b>\n"
667 	    "    %<b>vp%</b>::pagelookup -o %<b>offset%</b>\n"
668 	    "    %<b>offset%</b>::pagelookup -v %<b>vp%</b>\n"
669 	    "\n"
670 	    "The latter two forms are useful in pipelines.\n");
671 }
672 
673 int
674 pagelookup(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
675 {
676 	uintptr_t vp = -(uintptr_t)1;
677 	uint64_t offset = -(uint64_t)1;
678 
679 	uintptr_t pageaddr;
680 	int hasaddr = (flags & DCMD_ADDRSPEC);
681 	int usedaddr = 0;
682 
683 	if (mdb_getopts(argc, argv,
684 	    'v', MDB_OPT_UINTPTR, &vp,
685 	    'o', MDB_OPT_UINT64, &offset,
686 	    0) != argc) {
687 		return (DCMD_USAGE);
688 	}
689 
690 	if (vp == -(uintptr_t)1) {
691 		if (offset == -(uint64_t)1) {
692 			mdb_warn(
693 			    "pagelookup: at least one of -v vp or -o offset "
694 			    "required.\n");
695 			return (DCMD_USAGE);
696 		}
697 		vp = addr;
698 		usedaddr = 1;
699 	} else if (offset == -(uint64_t)1) {
700 		offset = mdb_get_dot();
701 		usedaddr = 1;
702 	}
703 	if (usedaddr && !hasaddr) {
704 		mdb_warn("pagelookup: address required\n");
705 		return (DCMD_USAGE);
706 	}
707 	if (!usedaddr && hasaddr) {
708 		mdb_warn(
709 		    "pagelookup: address specified when both -v and -o were "
710 		    "passed");
711 		return (DCMD_USAGE);
712 	}
713 
714 	pageaddr = mdb_page_lookup(vp, offset);
715 	if (pageaddr == 0) {
716 		mdb_warn("pagelookup: no page for {vp = %p, offset = %llp)\n",
717 		    vp, offset);
718 		return (DCMD_OK);
719 	}
720 	mdb_printf("%#lr\n", pageaddr);		/* this is PIPE_OUT friendly */
721 	return (DCMD_OK);
722 }
723 
724 /*ARGSUSED*/
725 int
726 page_num2pp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
727 {
728 	uintptr_t pp;
729 
730 	if (argc != 0 || !(flags & DCMD_ADDRSPEC)) {
731 		return (DCMD_USAGE);
732 	}
733 
734 	pp = mdb_pfn2page((pfn_t)addr);
735 	if (pp == 0) {
736 		return (DCMD_ERR);
737 	}
738 
739 	if (flags & DCMD_PIPE_OUT) {
740 		mdb_printf("%#lr\n", pp);
741 	} else {
742 		mdb_printf("%lx has page_t at %#lx\n", (pfn_t)addr, pp);
743 	}
744 
745 	return (DCMD_OK);
746 }
747 
748 int
749 page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
750 {
751 	page_t	p;
752 
753 	if (!(flags & DCMD_ADDRSPEC)) {
754 		if (mdb_walk_dcmd("page", "page", argc, argv) == -1) {
755 			mdb_warn("can't walk pages");
756 			return (DCMD_ERR);
757 		}
758 		return (DCMD_OK);
759 	}
760 
761 	if (DCMD_HDRSPEC(flags)) {
762 		mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n",
763 		    "PAGE", "VNODE", "OFFSET", "SELOCK",
764 		    "LCT", "COW", "IO", "FS", "ST");
765 	}
766 
767 	if (mdb_vread(&p, sizeof (page_t), addr) == -1) {
768 		mdb_warn("can't read page_t at %#lx", addr);
769 		return (DCMD_ERR);
770 	}
771 
772 	mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n",
773 	    addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt,
774 	    p.p_iolock_state, p.p_fsdata, p.p_state);
775 
776 	return (DCMD_OK);
777 }
778 
779 int
780 swap_walk_init(mdb_walk_state_t *wsp)
781 {
782 	void	*ptr;
783 
784 	if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) {
785 		mdb_warn("swapinfo not found or invalid");
786 		return (WALK_ERR);
787 	}
788 
789 	wsp->walk_addr = (uintptr_t)ptr;
790 
791 	return (WALK_NEXT);
792 }
793 
794 int
795 swap_walk_step(mdb_walk_state_t *wsp)
796 {
797 	uintptr_t	sip;
798 	struct swapinfo	si;
799 
800 	sip = wsp->walk_addr;
801 
802 	if (sip == NULL)
803 		return (WALK_DONE);
804 
805 	if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) {
806 		mdb_warn("unable to read swapinfo at %#lx", sip);
807 		return (WALK_ERR);
808 	}
809 
810 	wsp->walk_addr = (uintptr_t)si.si_next;
811 
812 	return (wsp->walk_callback(sip, &si, wsp->walk_cbdata));
813 }
814 
815 int
816 swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
817 {
818 	struct swapinfo	si;
819 	char		*name;
820 
821 	if (!(flags & DCMD_ADDRSPEC)) {
822 		if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) {
823 			mdb_warn("can't walk swapinfo");
824 			return (DCMD_ERR);
825 		}
826 		return (DCMD_OK);
827 	}
828 
829 	if (DCMD_HDRSPEC(flags)) {
830 		mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n",
831 		    "ADDR", "VNODE", "PAGES", "FREE", "NAME");
832 	}
833 
834 	if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) {
835 		mdb_warn("can't read swapinfo at %#lx", addr);
836 		return (DCMD_ERR);
837 	}
838 
839 	name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC);
840 	if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1)
841 		name = "*error*";
842 
843 	mdb_printf("%0?lx %?p %9d %9d %s\n",
844 	    addr, si.si_vp, si.si_npgs, si.si_nfpgs, name);
845 
846 	return (DCMD_OK);
847 }
848 
849 int
850 memlist_walk_step(mdb_walk_state_t *wsp)
851 {
852 	uintptr_t	mlp;
853 	struct memlist	ml;
854 
855 	mlp = wsp->walk_addr;
856 
857 	if (mlp == NULL)
858 		return (WALK_DONE);
859 
860 	if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) {
861 		mdb_warn("unable to read memlist at %#lx", mlp);
862 		return (WALK_ERR);
863 	}
864 
865 	wsp->walk_addr = (uintptr_t)ml.ml_next;
866 
867 	return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata));
868 }
869 
870 int
871 memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
872 {
873 	struct memlist	ml;
874 
875 	if (!(flags & DCMD_ADDRSPEC)) {
876 		uintptr_t ptr;
877 		uint_t list = 0;
878 		int i;
879 		static const char *lists[] = {
880 			"phys_install",
881 			"phys_avail",
882 			"virt_avail"
883 		};
884 
885 		if (mdb_getopts(argc, argv,
886 		    'i', MDB_OPT_SETBITS, (1 << 0), &list,
887 		    'a', MDB_OPT_SETBITS, (1 << 1), &list,
888 		    'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc)
889 			return (DCMD_USAGE);
890 
891 		if (!list)
892 			list = 1;
893 
894 		for (i = 0; list; i++, list >>= 1) {
895 			if (!(list & 1))
896 				continue;
897 			if ((mdb_readvar(&ptr, lists[i]) == -1) ||
898 			    (ptr == NULL)) {
899 				mdb_warn("%s not found or invalid", lists[i]);
900 				return (DCMD_ERR);
901 			}
902 
903 			mdb_printf("%s:\n", lists[i]);
904 			if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL,
905 			    ptr) == -1) {
906 				mdb_warn("can't walk memlist");
907 				return (DCMD_ERR);
908 			}
909 		}
910 		return (DCMD_OK);
911 	}
912 
913 	if (DCMD_HDRSPEC(flags))
914 		mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE");
915 
916 	if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) {
917 		mdb_warn("can't read memlist at %#lx", addr);
918 		return (DCMD_ERR);
919 	}
920 
921 	mdb_printf("%0?lx %16llx %16llx\n", addr, ml.ml_address, ml.ml_size);
922 
923 	return (DCMD_OK);
924 }
925 
926 int
927 seg_walk_init(mdb_walk_state_t *wsp)
928 {
929 	if (wsp->walk_addr == NULL) {
930 		mdb_warn("seg walk must begin at struct as *\n");
931 		return (WALK_ERR);
932 	}
933 
934 	/*
935 	 * this is really just a wrapper to AVL tree walk
936 	 */
937 	wsp->walk_addr = (uintptr_t)&((struct as *)wsp->walk_addr)->a_segtree;
938 	return (avl_walk_init(wsp));
939 }
940 
941 /*ARGSUSED*/
942 int
943 seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
944 {
945 	struct seg s;
946 
947 	if (argc != 0)
948 		return (DCMD_USAGE);
949 
950 	if ((flags & DCMD_LOOPFIRST) || !(flags & DCMD_LOOP)) {
951 		mdb_printf("%<u>%?s %?s %?s %?s %s%</u>\n",
952 		    "SEG", "BASE", "SIZE", "DATA", "OPS");
953 	}
954 
955 	if (mdb_vread(&s, sizeof (s), addr) == -1) {
956 		mdb_warn("failed to read seg at %p", addr);
957 		return (DCMD_ERR);
958 	}
959 
960 	mdb_printf("%?p %?p %?lx %?p %a\n",
961 	    addr, s.s_base, s.s_size, s.s_data, s.s_ops);
962 
963 	return (DCMD_OK);
964 }
965 
966 /*ARGSUSED*/
967 static int
968 pmap_walk_count_pages(uintptr_t addr, const void *data, void *out)
969 {
970 	pgcnt_t *nres = out;
971 
972 	(*nres)++;
973 
974 	return (WALK_NEXT);
975 }
976 
977 static int
978 pmap_walk_seg(uintptr_t addr, const struct seg *seg, uintptr_t segvn)
979 {
980 
981 	mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024);
982 
983 	if (segvn == (uintptr_t)seg->s_ops && seg->s_data != NULL) {
984 		struct segvn_data svn;
985 		pgcnt_t nres = 0;
986 
987 		svn.vp = NULL;
988 		(void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data);
989 
990 		/*
991 		 * Use the segvn_pages walker to find all of the in-core pages
992 		 * for this mapping.
993 		 */
994 		if (mdb_pwalk("segvn_pages", pmap_walk_count_pages, &nres,
995 		    (uintptr_t)seg->s_data) == -1) {
996 			mdb_warn("failed to walk segvn_pages (s_data=%p)",
997 			    seg->s_data);
998 		}
999 		mdb_printf(" %7ldk", (nres * PAGESIZE) / 1024);
1000 
1001 		if (svn.vp != NULL) {
1002 			char buf[29];
1003 
1004 			mdb_vnode2path((uintptr_t)svn.vp, buf, sizeof (buf));
1005 			mdb_printf(" %s", buf);
1006 		} else {
1007 			mdb_printf(" [ anon ]");
1008 		}
1009 	} else {
1010 		mdb_printf(" %8s [ &%a ]", "?", seg->s_ops);
1011 	}
1012 
1013 	mdb_printf("\n");
1014 	return (WALK_NEXT);
1015 }
1016 
1017 static int
1018 pmap_walk_seg_quick(uintptr_t addr, const struct seg *seg, uintptr_t segvn)
1019 {
1020 	mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024);
1021 
1022 	if (segvn == (uintptr_t)seg->s_ops && seg->s_data != NULL) {
1023 		struct segvn_data svn;
1024 
1025 		svn.vp = NULL;
1026 		(void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data);
1027 
1028 		if (svn.vp != NULL) {
1029 			mdb_printf(" %0?p", svn.vp);
1030 		} else {
1031 			mdb_printf(" [ anon ]");
1032 		}
1033 	} else {
1034 		mdb_printf(" [ &%a ]", seg->s_ops);
1035 	}
1036 
1037 	mdb_printf("\n");
1038 	return (WALK_NEXT);
1039 }
1040 
1041 /*ARGSUSED*/
1042 int
1043 pmap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1044 {
1045 	uintptr_t segvn;
1046 	proc_t proc;
1047 	uint_t quick = FALSE;
1048 	mdb_walk_cb_t cb = (mdb_walk_cb_t)pmap_walk_seg;
1049 
1050 	GElf_Sym sym;
1051 
1052 	if (!(flags & DCMD_ADDRSPEC))
1053 		return (DCMD_USAGE);
1054 
1055 	if (mdb_getopts(argc, argv,
1056 	    'q', MDB_OPT_SETBITS, TRUE, &quick, NULL) != argc)
1057 		return (DCMD_USAGE);
1058 
1059 	if (mdb_vread(&proc, sizeof (proc), addr) == -1) {
1060 		mdb_warn("failed to read proc at %p", addr);
1061 		return (DCMD_ERR);
1062 	}
1063 
1064 	if (mdb_lookup_by_name("segvn_ops", &sym) == 0)
1065 		segvn = (uintptr_t)sym.st_value;
1066 	else
1067 		segvn = NULL;
1068 
1069 	mdb_printf("%?s %?s %8s ", "SEG", "BASE", "SIZE");
1070 
1071 	if (quick) {
1072 		mdb_printf("VNODE\n");
1073 		cb = (mdb_walk_cb_t)pmap_walk_seg_quick;
1074 	} else {
1075 		mdb_printf("%8s %s\n", "RES", "PATH");
1076 	}
1077 
1078 	if (mdb_pwalk("seg", cb, (void *)segvn, (uintptr_t)proc.p_as) == -1) {
1079 		mdb_warn("failed to walk segments of as %p", proc.p_as);
1080 		return (DCMD_ERR);
1081 	}
1082 
1083 	return (DCMD_OK);
1084 }
1085 
1086 typedef struct anon_walk_data {
1087 	uintptr_t *aw_levone;
1088 	uintptr_t *aw_levtwo;
1089 	size_t aw_minslot;
1090 	size_t aw_maxslot;
1091 	pgcnt_t aw_nlevone;
1092 	pgcnt_t aw_levone_ndx;
1093 	size_t aw_levtwo_ndx;
1094 	struct anon_map	*aw_ampp;
1095 	struct anon_map aw_amp;
1096 	struct anon_hdr	aw_ahp;
1097 	int		aw_all;	/* report all anon pointers, even NULLs */
1098 } anon_walk_data_t;
1099 
1100 int
1101 anon_walk_init_common(mdb_walk_state_t *wsp, ulong_t minslot, ulong_t maxslot)
1102 {
1103 	anon_walk_data_t *aw;
1104 
1105 	if (wsp->walk_addr == NULL) {
1106 		mdb_warn("anon walk doesn't support global walks\n");
1107 		return (WALK_ERR);
1108 	}
1109 
1110 	aw = mdb_alloc(sizeof (anon_walk_data_t), UM_SLEEP);
1111 	aw->aw_ampp = (struct anon_map *)wsp->walk_addr;
1112 
1113 	if (mdb_vread(&aw->aw_amp, sizeof (aw->aw_amp), wsp->walk_addr) == -1) {
1114 		mdb_warn("failed to read anon map at %p", wsp->walk_addr);
1115 		mdb_free(aw, sizeof (anon_walk_data_t));
1116 		return (WALK_ERR);
1117 	}
1118 
1119 	if (mdb_vread(&aw->aw_ahp, sizeof (aw->aw_ahp),
1120 	    (uintptr_t)(aw->aw_amp.ahp)) == -1) {
1121 		mdb_warn("failed to read anon hdr ptr at %p", aw->aw_amp.ahp);
1122 		mdb_free(aw, sizeof (anon_walk_data_t));
1123 		return (WALK_ERR);
1124 	}
1125 
1126 	/* update min and maxslot with the given constraints */
1127 	maxslot = MIN(maxslot, aw->aw_ahp.size);
1128 	minslot = MIN(minslot, maxslot);
1129 
1130 	if (aw->aw_ahp.size <= ANON_CHUNK_SIZE ||
1131 	    (aw->aw_ahp.flags & ANON_ALLOC_FORCE)) {
1132 		aw->aw_nlevone = maxslot;
1133 		aw->aw_levone_ndx = minslot;
1134 		aw->aw_levtwo = NULL;
1135 	} else {
1136 		aw->aw_nlevone =
1137 		    (maxslot + ANON_CHUNK_OFF) >> ANON_CHUNK_SHIFT;
1138 		aw->aw_levone_ndx = 0;
1139 		aw->aw_levtwo =
1140 		    mdb_zalloc(ANON_CHUNK_SIZE * sizeof (uintptr_t), UM_SLEEP);
1141 	}
1142 
1143 	aw->aw_levone =
1144 	    mdb_alloc(aw->aw_nlevone * sizeof (uintptr_t), UM_SLEEP);
1145 	aw->aw_all = (wsp->walk_arg == ANON_WALK_ALL);
1146 
1147 	mdb_vread(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t),
1148 	    (uintptr_t)aw->aw_ahp.array_chunk);
1149 
1150 	aw->aw_levtwo_ndx = 0;
1151 	aw->aw_minslot = minslot;
1152 	aw->aw_maxslot = maxslot;
1153 
1154 out:
1155 	wsp->walk_data = aw;
1156 	return (0);
1157 }
1158 
1159 int
1160 anon_walk_step(mdb_walk_state_t *wsp)
1161 {
1162 	anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data;
1163 	struct anon anon;
1164 	uintptr_t anonptr;
1165 	ulong_t slot;
1166 
1167 	/*
1168 	 * Once we've walked through level one, we're done.
1169 	 */
1170 	if (aw->aw_levone_ndx >= aw->aw_nlevone) {
1171 		return (WALK_DONE);
1172 	}
1173 
1174 	if (aw->aw_levtwo == NULL) {
1175 		anonptr = aw->aw_levone[aw->aw_levone_ndx];
1176 		aw->aw_levone_ndx++;
1177 	} else {
1178 		if (aw->aw_levtwo_ndx == 0) {
1179 			uintptr_t levtwoptr;
1180 
1181 			/* The first time through, skip to our first index. */
1182 			if (aw->aw_levone_ndx == 0) {
1183 				aw->aw_levone_ndx =
1184 				    aw->aw_minslot / ANON_CHUNK_SIZE;
1185 				aw->aw_levtwo_ndx =
1186 				    aw->aw_minslot % ANON_CHUNK_SIZE;
1187 			}
1188 
1189 			levtwoptr = (uintptr_t)aw->aw_levone[aw->aw_levone_ndx];
1190 
1191 			if (levtwoptr == NULL) {
1192 				if (!aw->aw_all) {
1193 					aw->aw_levtwo_ndx = 0;
1194 					aw->aw_levone_ndx++;
1195 					return (WALK_NEXT);
1196 				}
1197 				bzero(aw->aw_levtwo,
1198 				    ANON_CHUNK_SIZE * sizeof (uintptr_t));
1199 
1200 			} else if (mdb_vread(aw->aw_levtwo,
1201 			    ANON_CHUNK_SIZE * sizeof (uintptr_t), levtwoptr) ==
1202 			    -1) {
1203 				mdb_warn("unable to read anon_map %p's "
1204 				    "second-level map %d at %p",
1205 				    aw->aw_ampp, aw->aw_levone_ndx,
1206 				    levtwoptr);
1207 				return (WALK_ERR);
1208 			}
1209 		}
1210 		slot = aw->aw_levone_ndx * ANON_CHUNK_SIZE + aw->aw_levtwo_ndx;
1211 		anonptr = aw->aw_levtwo[aw->aw_levtwo_ndx];
1212 
1213 		/* update the indices for next time */
1214 		aw->aw_levtwo_ndx++;
1215 		if (aw->aw_levtwo_ndx == ANON_CHUNK_SIZE) {
1216 			aw->aw_levtwo_ndx = 0;
1217 			aw->aw_levone_ndx++;
1218 		}
1219 
1220 		/* make sure the slot # is in the requested range */
1221 		if (slot >= aw->aw_maxslot) {
1222 			return (WALK_DONE);
1223 		}
1224 	}
1225 
1226 	if (anonptr != NULL) {
1227 		mdb_vread(&anon, sizeof (anon), anonptr);
1228 		return (wsp->walk_callback(anonptr, &anon, wsp->walk_cbdata));
1229 	}
1230 	if (aw->aw_all) {
1231 		return (wsp->walk_callback(NULL, NULL, wsp->walk_cbdata));
1232 	}
1233 	return (WALK_NEXT);
1234 }
1235 
1236 void
1237 anon_walk_fini(mdb_walk_state_t *wsp)
1238 {
1239 	anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data;
1240 
1241 	if (aw->aw_levtwo != NULL)
1242 		mdb_free(aw->aw_levtwo, ANON_CHUNK_SIZE * sizeof (uintptr_t));
1243 
1244 	mdb_free(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t));
1245 	mdb_free(aw, sizeof (anon_walk_data_t));
1246 }
1247 
1248 int
1249 anon_walk_init(mdb_walk_state_t *wsp)
1250 {
1251 	return (anon_walk_init_common(wsp, 0, ULONG_MAX));
1252 }
1253 
1254 int
1255 segvn_anon_walk_init(mdb_walk_state_t *wsp)
1256 {
1257 	const uintptr_t		svd_addr = wsp->walk_addr;
1258 	uintptr_t		amp_addr;
1259 	uintptr_t		seg_addr;
1260 	struct segvn_data	svd;
1261 	struct anon_map		amp;
1262 	struct seg		seg;
1263 
1264 	if (svd_addr == NULL) {
1265 		mdb_warn("segvn_anon walk doesn't support global walks\n");
1266 		return (WALK_ERR);
1267 	}
1268 	if (mdb_vread(&svd, sizeof (svd), svd_addr) == -1) {
1269 		mdb_warn("segvn_anon walk: unable to read segvn_data at %p",
1270 		    svd_addr);
1271 		return (WALK_ERR);
1272 	}
1273 	if (svd.amp == NULL) {
1274 		mdb_warn("segvn_anon walk: segvn_data at %p has no anon map\n",
1275 		    svd_addr);
1276 		return (WALK_ERR);
1277 	}
1278 	amp_addr = (uintptr_t)svd.amp;
1279 	if (mdb_vread(&amp, sizeof (amp), amp_addr) == -1) {
1280 		mdb_warn("segvn_anon walk: unable to read amp %p for "
1281 		    "segvn_data %p", amp_addr, svd_addr);
1282 		return (WALK_ERR);
1283 	}
1284 	seg_addr = (uintptr_t)svd.seg;
1285 	if (mdb_vread(&seg, sizeof (seg), seg_addr) == -1) {
1286 		mdb_warn("segvn_anon walk: unable to read seg %p for "
1287 		    "segvn_data %p", seg_addr, svd_addr);
1288 		return (WALK_ERR);
1289 	}
1290 	if ((seg.s_size + (svd.anon_index << PAGESHIFT)) > amp.size) {
1291 		mdb_warn("anon map %p is too small for segment %p\n",
1292 		    amp_addr, seg_addr);
1293 		return (WALK_ERR);
1294 	}
1295 
1296 	wsp->walk_addr = amp_addr;
1297 	return (anon_walk_init_common(wsp,
1298 	    svd.anon_index, svd.anon_index + (seg.s_size >> PAGESHIFT)));
1299 }
1300 
1301 
1302 typedef struct {
1303 	u_offset_t		svs_offset;
1304 	uintptr_t		svs_page;
1305 } segvn_sparse_t;
1306 #define	SEGVN_MAX_SPARSE	((128 * 1024) / sizeof (segvn_sparse_t))
1307 
1308 typedef struct {
1309 	uintptr_t		svw_svdp;
1310 	struct segvn_data	svw_svd;
1311 	struct seg		svw_seg;
1312 	size_t			svw_walkoff;
1313 	ulong_t			svw_anonskip;
1314 	segvn_sparse_t		*svw_sparse;
1315 	size_t			svw_sparse_idx;
1316 	size_t			svw_sparse_count;
1317 	size_t			svw_sparse_size;
1318 	uint8_t			svw_sparse_overflow;
1319 	uint8_t			svw_all;
1320 } segvn_walk_data_t;
1321 
1322 static int
1323 segvn_sparse_fill(uintptr_t addr, const void *pp_arg, void *arg)
1324 {
1325 	segvn_walk_data_t	*const	svw = arg;
1326 	const page_t		*const	pp = pp_arg;
1327 	const u_offset_t		offset = pp->p_offset;
1328 	segvn_sparse_t		*const	cur =
1329 	    &svw->svw_sparse[svw->svw_sparse_count];
1330 
1331 	/* See if the page is of interest */
1332 	if ((u_offset_t)(offset - svw->svw_svd.offset) >= svw->svw_seg.s_size) {
1333 		return (WALK_NEXT);
1334 	}
1335 	/* See if we have space for the new entry, then add it. */
1336 	if (svw->svw_sparse_count >= svw->svw_sparse_size) {
1337 		svw->svw_sparse_overflow = 1;
1338 		return (WALK_DONE);
1339 	}
1340 	svw->svw_sparse_count++;
1341 	cur->svs_offset = offset;
1342 	cur->svs_page = addr;
1343 	return (WALK_NEXT);
1344 }
1345 
1346 static int
1347 segvn_sparse_cmp(const void *lp, const void *rp)
1348 {
1349 	const segvn_sparse_t *const	l = lp;
1350 	const segvn_sparse_t *const	r = rp;
1351 
1352 	if (l->svs_offset < r->svs_offset) {
1353 		return (-1);
1354 	}
1355 	if (l->svs_offset > r->svs_offset) {
1356 		return (1);
1357 	}
1358 	return (0);
1359 }
1360 
1361 /*
1362  * Builds on the "anon_all" walker to walk all resident pages in a segvn_data
1363  * structure.  For segvn_datas without an anon structure, it just looks up
1364  * pages in the vnode.  For segvn_datas with an anon structure, NULL slots
1365  * pass through to the vnode, and non-null slots are checked for residency.
1366  */
1367 int
1368 segvn_pages_walk_init(mdb_walk_state_t *wsp)
1369 {
1370 	segvn_walk_data_t	*svw;
1371 	struct segvn_data	*svd;
1372 
1373 	if (wsp->walk_addr == NULL) {
1374 		mdb_warn("segvn walk doesn't support global walks\n");
1375 		return (WALK_ERR);
1376 	}
1377 
1378 	svw = mdb_zalloc(sizeof (*svw), UM_SLEEP);
1379 	svw->svw_svdp = wsp->walk_addr;
1380 	svw->svw_anonskip = 0;
1381 	svw->svw_sparse_idx = 0;
1382 	svw->svw_walkoff = 0;
1383 	svw->svw_all = (wsp->walk_arg == SEGVN_PAGES_ALL);
1384 
1385 	if (mdb_vread(&svw->svw_svd, sizeof (svw->svw_svd), wsp->walk_addr) ==
1386 	    -1) {
1387 		mdb_warn("failed to read segvn_data at %p", wsp->walk_addr);
1388 		mdb_free(svw, sizeof (*svw));
1389 		return (WALK_ERR);
1390 	}
1391 
1392 	svd = &svw->svw_svd;
1393 	if (mdb_vread(&svw->svw_seg, sizeof (svw->svw_seg),
1394 	    (uintptr_t)svd->seg) == -1) {
1395 		mdb_warn("failed to read seg at %p (from %p)",
1396 		    svd->seg, &((struct segvn_data *)(wsp->walk_addr))->seg);
1397 		mdb_free(svw, sizeof (*svw));
1398 		return (WALK_ERR);
1399 	}
1400 
1401 	if (svd->amp == NULL && svd->vp == NULL) {
1402 		/* make the walk terminate immediately;  no pages */
1403 		svw->svw_walkoff = svw->svw_seg.s_size;
1404 
1405 	} else if (svd->amp == NULL &&
1406 	    (svw->svw_seg.s_size >> PAGESHIFT) >= SEGVN_MAX_SPARSE) {
1407 		/*
1408 		 * If we don't have an anon pointer, and the segment is large,
1409 		 * we try to load the in-memory pages into a fixed-size array,
1410 		 * which is then sorted and reported directly.  This is much
1411 		 * faster than doing a mdb_page_lookup() for each possible
1412 		 * offset.
1413 		 *
1414 		 * If the allocation fails, or there are too many pages
1415 		 * in-core, we fall back to looking up the pages individually.
1416 		 */
1417 		svw->svw_sparse = mdb_alloc(
1418 		    SEGVN_MAX_SPARSE * sizeof (*svw->svw_sparse), UM_NOSLEEP);
1419 		if (svw->svw_sparse != NULL) {
1420 			svw->svw_sparse_size = SEGVN_MAX_SPARSE;
1421 
1422 			if (mdb_pwalk("page", segvn_sparse_fill, svw,
1423 			    (uintptr_t)svd->vp) == -1 ||
1424 			    svw->svw_sparse_overflow) {
1425 				mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE *
1426 				    sizeof (*svw->svw_sparse));
1427 				svw->svw_sparse = NULL;
1428 			} else {
1429 				qsort(svw->svw_sparse, svw->svw_sparse_count,
1430 				    sizeof (*svw->svw_sparse),
1431 				    segvn_sparse_cmp);
1432 			}
1433 		}
1434 
1435 	} else if (svd->amp != NULL) {
1436 		const char *const layer = (!svw->svw_all && svd->vp == NULL) ?
1437 		    "segvn_anon" : "segvn_anon_all";
1438 		/*
1439 		 * If we're not printing all offsets, and the segvn_data has
1440 		 * no backing VP, we can use the "segvn_anon" walker, which
1441 		 * efficiently skips NULL slots.
1442 		 *
1443 		 * Otherwise, we layer over the "segvn_anon_all" walker
1444 		 * (which reports all anon slots, even NULL ones), so that
1445 		 * segvn_pages_walk_step() knows the precise offset for each
1446 		 * element.  It uses that offset information to look up the
1447 		 * backing pages for NULL anon slots.
1448 		 */
1449 		if (mdb_layered_walk(layer, wsp) == -1) {
1450 			mdb_warn("segvn_pages: failed to layer \"%s\" "
1451 			    "for segvn_data %p", layer, svw->svw_svdp);
1452 			mdb_free(svw, sizeof (*svw));
1453 			return (WALK_ERR);
1454 		}
1455 	}
1456 
1457 	wsp->walk_data = svw;
1458 	return (WALK_NEXT);
1459 }
1460 
1461 int
1462 segvn_pages_walk_step(mdb_walk_state_t *wsp)
1463 {
1464 	segvn_walk_data_t	*const	svw = wsp->walk_data;
1465 	struct seg		*const	seg = &svw->svw_seg;
1466 	struct segvn_data	*const	svd = &svw->svw_svd;
1467 	uintptr_t		pp;
1468 	page_t			page;
1469 
1470 	/* If we've walked off the end of the segment, we're done. */
1471 	if (svw->svw_walkoff >= seg->s_size) {
1472 		return (WALK_DONE);
1473 	}
1474 
1475 	/*
1476 	 * If we've got a sparse page array, just send it directly.
1477 	 */
1478 	if (svw->svw_sparse != NULL) {
1479 		u_offset_t off;
1480 
1481 		if (svw->svw_sparse_idx >= svw->svw_sparse_count) {
1482 			pp = NULL;
1483 			if (!svw->svw_all) {
1484 				return (WALK_DONE);
1485 			}
1486 		} else {
1487 			segvn_sparse_t	*const svs =
1488 			    &svw->svw_sparse[svw->svw_sparse_idx];
1489 			off = svs->svs_offset - svd->offset;
1490 			if (svw->svw_all && svw->svw_walkoff != off) {
1491 				pp = NULL;
1492 			} else {
1493 				pp = svs->svs_page;
1494 				svw->svw_sparse_idx++;
1495 			}
1496 		}
1497 
1498 	} else if (svd->amp == NULL || wsp->walk_addr == NULL) {
1499 		/*
1500 		 * If there's no anon, or the anon slot is NULL, look up
1501 		 * <vp, offset>.
1502 		 */
1503 		if (svd->vp != NULL) {
1504 			pp = mdb_page_lookup((uintptr_t)svd->vp,
1505 			    svd->offset + svw->svw_walkoff);
1506 		} else {
1507 			pp = NULL;
1508 		}
1509 
1510 	} else {
1511 		const struct anon	*const	anon = wsp->walk_layer;
1512 
1513 		/*
1514 		 * We have a "struct anon"; if it's not swapped out,
1515 		 * look up the page.
1516 		 */
1517 		if (anon->an_vp != NULL || anon->an_off != 0) {
1518 			pp = mdb_page_lookup((uintptr_t)anon->an_vp,
1519 			    anon->an_off);
1520 			if (pp == 0 && mdb_get_state() != MDB_STATE_RUNNING) {
1521 				mdb_warn("walk segvn_pages: segvn_data %p "
1522 				    "offset %ld, anon page <%p, %llx> not "
1523 				    "found.\n", svw->svw_svdp, svw->svw_walkoff,
1524 				    anon->an_vp, anon->an_off);
1525 			}
1526 		} else {
1527 			if (anon->an_pvp == NULL) {
1528 				mdb_warn("walk segvn_pages: useless struct "
1529 				    "anon at %p\n", wsp->walk_addr);
1530 			}
1531 			pp = NULL;	/* nothing at this offset */
1532 		}
1533 	}
1534 
1535 	svw->svw_walkoff += PAGESIZE;	/* Update for the next call */
1536 	if (pp != NULL) {
1537 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
1538 			mdb_warn("unable to read page_t at %#lx", pp);
1539 			return (WALK_ERR);
1540 		}
1541 		return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
1542 	}
1543 	if (svw->svw_all) {
1544 		return (wsp->walk_callback(NULL, NULL, wsp->walk_cbdata));
1545 	}
1546 	return (WALK_NEXT);
1547 }
1548 
1549 void
1550 segvn_pages_walk_fini(mdb_walk_state_t *wsp)
1551 {
1552 	segvn_walk_data_t	*const	svw = wsp->walk_data;
1553 
1554 	if (svw->svw_sparse != NULL) {
1555 		mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE *
1556 		    sizeof (*svw->svw_sparse));
1557 	}
1558 	mdb_free(svw, sizeof (*svw));
1559 }
1560 
1561 /*
1562  * Grumble, grumble.
1563  */
1564 #define	SMAP_HASHFUNC(vp, off)	\
1565 	((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
1566 	((off) >> MAXBSHIFT)) & smd_hashmsk)
1567 
1568 int
1569 vnode2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1570 {
1571 	long smd_hashmsk;
1572 	int hash;
1573 	uintptr_t offset = 0;
1574 	struct smap smp;
1575 	uintptr_t saddr, kaddr;
1576 	uintptr_t smd_hash, smd_smap;
1577 	struct seg seg;
1578 
1579 	if (!(flags & DCMD_ADDRSPEC))
1580 		return (DCMD_USAGE);
1581 
1582 	if (mdb_readvar(&smd_hashmsk, "smd_hashmsk") == -1) {
1583 		mdb_warn("failed to read smd_hashmsk");
1584 		return (DCMD_ERR);
1585 	}
1586 
1587 	if (mdb_readvar(&smd_hash, "smd_hash") == -1) {
1588 		mdb_warn("failed to read smd_hash");
1589 		return (DCMD_ERR);
1590 	}
1591 
1592 	if (mdb_readvar(&smd_smap, "smd_smap") == -1) {
1593 		mdb_warn("failed to read smd_hash");
1594 		return (DCMD_ERR);
1595 	}
1596 
1597 	if (mdb_readvar(&kaddr, "segkmap") == -1) {
1598 		mdb_warn("failed to read segkmap");
1599 		return (DCMD_ERR);
1600 	}
1601 
1602 	if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) {
1603 		mdb_warn("failed to read segkmap at %p", kaddr);
1604 		return (DCMD_ERR);
1605 	}
1606 
1607 	if (argc != 0) {
1608 		const mdb_arg_t *arg = &argv[0];
1609 
1610 		if (arg->a_type == MDB_TYPE_IMMEDIATE)
1611 			offset = arg->a_un.a_val;
1612 		else
1613 			offset = (uintptr_t)mdb_strtoull(arg->a_un.a_str);
1614 	}
1615 
1616 	hash = SMAP_HASHFUNC(addr, offset);
1617 
1618 	if (mdb_vread(&saddr, sizeof (saddr),
1619 	    smd_hash + hash * sizeof (uintptr_t)) == -1) {
1620 		mdb_warn("couldn't read smap at %p",
1621 		    smd_hash + hash * sizeof (uintptr_t));
1622 		return (DCMD_ERR);
1623 	}
1624 
1625 	do {
1626 		if (mdb_vread(&smp, sizeof (smp), saddr) == -1) {
1627 			mdb_warn("couldn't read smap at %p", saddr);
1628 			return (DCMD_ERR);
1629 		}
1630 
1631 		if ((uintptr_t)smp.sm_vp == addr && smp.sm_off == offset) {
1632 			mdb_printf("vnode %p, offs %p is smap %p, vaddr %p\n",
1633 			    addr, offset, saddr, ((saddr - smd_smap) /
1634 			    sizeof (smp)) * MAXBSIZE + seg.s_base);
1635 			return (DCMD_OK);
1636 		}
1637 
1638 		saddr = (uintptr_t)smp.sm_hash;
1639 	} while (saddr != NULL);
1640 
1641 	mdb_printf("no smap for vnode %p, offs %p\n", addr, offset);
1642 	return (DCMD_OK);
1643 }
1644 
1645 /*ARGSUSED*/
1646 int
1647 addr2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1648 {
1649 	uintptr_t kaddr;
1650 	struct seg seg;
1651 	struct segmap_data sd;
1652 
1653 	if (!(flags & DCMD_ADDRSPEC))
1654 		return (DCMD_USAGE);
1655 
1656 	if (mdb_readvar(&kaddr, "segkmap") == -1) {
1657 		mdb_warn("failed to read segkmap");
1658 		return (DCMD_ERR);
1659 	}
1660 
1661 	if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) {
1662 		mdb_warn("failed to read segkmap at %p", kaddr);
1663 		return (DCMD_ERR);
1664 	}
1665 
1666 	if (mdb_vread(&sd, sizeof (sd), (uintptr_t)seg.s_data) == -1) {
1667 		mdb_warn("failed to read segmap_data at %p", seg.s_data);
1668 		return (DCMD_ERR);
1669 	}
1670 
1671 	mdb_printf("%p is smap %p\n", addr,
1672 	    ((addr - (uintptr_t)seg.s_base) >> MAXBSHIFT) *
1673 	    sizeof (struct smap) + (uintptr_t)sd.smd_sm);
1674 
1675 	return (DCMD_OK);
1676 }
1677