xref: /illumos-gate/usr/src/cmd/mdb/common/modules/genunix/memory.c (revision 1de082f7b7fd4b6629e14b0f9b8f94f6c0bda3c2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <mdb/mdb_modapi.h>
27 #include <sys/types.h>
28 #include <vm/page.h>
29 #include <sys/thread.h>
30 #include <sys/swap.h>
31 #include <sys/memlist.h>
32 #if defined(__i386) || defined(__amd64)
33 #include <sys/balloon_impl.h>
34 #endif
35 
36 /*
37  * Page walker.
38  * By default, this will walk all pages in the system.  If given an
39  * address, it will walk all pages belonging to the vnode at that
40  * address.
41  */
42 
43 /*
44  * page_walk_data
45  *
46  * pw_hashleft is set to -1 when walking a vnode's pages, and holds the
47  * number of hash locations remaining in the page hash table when
48  * walking all pages.
49  *
50  * The astute reader will notice that pw_hashloc is only used when
51  * reading all pages (to hold a pointer to our location in the page
52  * hash table), and that pw_first is only used when reading the pages
53  * belonging to a particular vnode (to hold a pointer to the first
54  * page).  While these could be combined to be a single pointer, they
55  * are left separate for clarity.
56  */
57 typedef struct page_walk_data {
58 	long		pw_hashleft;
59 	void		**pw_hashloc;
60 	uintptr_t	pw_first;
61 } page_walk_data_t;
62 
63 int
64 page_walk_init(mdb_walk_state_t *wsp)
65 {
66 	page_walk_data_t	*pwd;
67 	void	**ptr;
68 	size_t	hashsz;
69 	vnode_t	vn;
70 
71 	if (wsp->walk_addr == NULL) {
72 
73 		/*
74 		 * Walk all pages
75 		 */
76 
77 		if ((mdb_readvar(&ptr, "page_hash") == -1) ||
78 		    (mdb_readvar(&hashsz, "page_hashsz") == -1) ||
79 		    (ptr == NULL) || (hashsz == 0)) {
80 			mdb_warn("page_hash, page_hashsz not found or invalid");
81 			return (WALK_ERR);
82 		}
83 
84 		/*
85 		 * Since we are walking all pages, initialize hashleft
86 		 * to be the remaining number of entries in the page
87 		 * hash.  hashloc is set the start of the page hash
88 		 * table.  Setting the walk address to 0 indicates that
89 		 * we aren't currently following a hash chain, and that
90 		 * we need to scan the page hash table for a page.
91 		 */
92 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
93 		pwd->pw_hashleft = hashsz;
94 		pwd->pw_hashloc = ptr;
95 		wsp->walk_addr = 0;
96 	} else {
97 
98 		/*
99 		 * Walk just this vnode
100 		 */
101 
102 		if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) {
103 			mdb_warn("unable to read vnode_t at %#lx",
104 			    wsp->walk_addr);
105 			return (WALK_ERR);
106 		}
107 
108 		/*
109 		 * We set hashleft to -1 to indicate that we are
110 		 * walking a vnode, and initialize first to 0 (it is
111 		 * used to terminate the walk, so it must not be set
112 		 * until after we have walked the first page).  The
113 		 * walk address is set to the first page.
114 		 */
115 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
116 		pwd->pw_hashleft = -1;
117 		pwd->pw_first = 0;
118 
119 		wsp->walk_addr = (uintptr_t)vn.v_pages;
120 	}
121 
122 	wsp->walk_data = pwd;
123 
124 	return (WALK_NEXT);
125 }
126 
127 int
128 page_walk_step(mdb_walk_state_t *wsp)
129 {
130 	page_walk_data_t	*pwd = wsp->walk_data;
131 	page_t		page;
132 	uintptr_t	pp;
133 
134 	pp = wsp->walk_addr;
135 
136 	if (pwd->pw_hashleft < 0) {
137 
138 		/* We're walking a vnode's pages */
139 
140 		/*
141 		 * If we don't have any pages to walk, we have come
142 		 * back around to the first one (we finished), or we
143 		 * can't read the page we're looking at, we are done.
144 		 */
145 		if (pp == NULL || pp == pwd->pw_first)
146 			return (WALK_DONE);
147 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
148 			mdb_warn("unable to read page_t at %#lx", pp);
149 			return (WALK_ERR);
150 		}
151 
152 		/*
153 		 * Set the walk address to the next page, and if the
154 		 * first page hasn't been set yet (i.e. we are on the
155 		 * first page), set it.
156 		 */
157 		wsp->walk_addr = (uintptr_t)page.p_vpnext;
158 		if (pwd->pw_first == NULL)
159 			pwd->pw_first = pp;
160 
161 	} else if (pwd->pw_hashleft > 0) {
162 
163 		/* We're walking all pages */
164 
165 		/*
166 		 * If pp (the walk address) is NULL, we scan through
167 		 * the page hash table until we find a page.
168 		 */
169 		if (pp == NULL) {
170 
171 			/*
172 			 * Iterate through the page hash table until we
173 			 * find a page or reach the end.
174 			 */
175 			do {
176 				if (mdb_vread(&pp, sizeof (uintptr_t),
177 				    (uintptr_t)pwd->pw_hashloc) == -1) {
178 					mdb_warn("unable to read from %#p",
179 					    pwd->pw_hashloc);
180 					return (WALK_ERR);
181 				}
182 				pwd->pw_hashleft--;
183 				pwd->pw_hashloc++;
184 			} while (pwd->pw_hashleft && (pp == NULL));
185 
186 			/*
187 			 * We've reached the end; exit.
188 			 */
189 			if (pp == NULL)
190 				return (WALK_DONE);
191 		}
192 
193 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
194 			mdb_warn("unable to read page_t at %#lx", pp);
195 			return (WALK_ERR);
196 		}
197 
198 		/*
199 		 * Set the walk address to the next page.
200 		 */
201 		wsp->walk_addr = (uintptr_t)page.p_hash;
202 
203 	} else {
204 		/* We've finished walking all pages. */
205 		return (WALK_DONE);
206 	}
207 
208 	return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
209 }
210 
211 void
212 page_walk_fini(mdb_walk_state_t *wsp)
213 {
214 	mdb_free(wsp->walk_data, sizeof (page_walk_data_t));
215 }
216 
217 /*
218  * allpages walks all pages in the system in order they appear in
219  * the memseg structure
220  */
221 
222 #define	PAGE_BUFFER	128
223 
224 int
225 allpages_walk_init(mdb_walk_state_t *wsp)
226 {
227 	if (wsp->walk_addr != 0) {
228 		mdb_warn("allpages only supports global walks.\n");
229 		return (WALK_ERR);
230 	}
231 
232 	if (mdb_layered_walk("memseg", wsp) == -1) {
233 		mdb_warn("couldn't walk 'memseg'");
234 		return (WALK_ERR);
235 	}
236 
237 	wsp->walk_data = mdb_alloc(sizeof (page_t) * PAGE_BUFFER, UM_SLEEP);
238 	return (WALK_NEXT);
239 }
240 
241 int
242 allpages_walk_step(mdb_walk_state_t *wsp)
243 {
244 	const struct memseg *msp = wsp->walk_layer;
245 	page_t *buf = wsp->walk_data;
246 	size_t pg_read, i;
247 	size_t pg_num = msp->pages_end - msp->pages_base;
248 	const page_t *pg_addr = msp->pages;
249 
250 	while (pg_num > 0) {
251 		pg_read = MIN(pg_num, PAGE_BUFFER);
252 
253 		if (mdb_vread(buf, pg_read * sizeof (page_t),
254 		    (uintptr_t)pg_addr) == -1) {
255 			mdb_warn("can't read page_t's at %#lx", pg_addr);
256 			return (WALK_ERR);
257 		}
258 		for (i = 0; i < pg_read; i++) {
259 			int ret = wsp->walk_callback((uintptr_t)&pg_addr[i],
260 			    &buf[i], wsp->walk_cbdata);
261 
262 			if (ret != WALK_NEXT)
263 				return (ret);
264 		}
265 		pg_num -= pg_read;
266 		pg_addr += pg_read;
267 	}
268 
269 	return (WALK_NEXT);
270 }
271 
272 void
273 allpages_walk_fini(mdb_walk_state_t *wsp)
274 {
275 	mdb_free(wsp->walk_data, sizeof (page_t) * PAGE_BUFFER);
276 }
277 
278 /*
279  * Hash table + LRU queue.
280  * This table is used to cache recently read vnodes for the memstat
281  * command, to reduce the number of mdb_vread calls.  This greatly
282  * speeds the memstat command on on live, large CPU count systems.
283  */
284 
285 #define	VN_SMALL	401
286 #define	VN_LARGE	10007
287 #define	VN_HTABLE_KEY(p, hp)	((p) % ((hp)->vn_htable_buckets))
288 
289 struct vn_htable_list {
290 	uint_t vn_flag;				/* v_flag from vnode	*/
291 	uintptr_t vn_ptr;			/* pointer to vnode	*/
292 	struct vn_htable_list *vn_q_next;	/* queue next pointer	*/
293 	struct vn_htable_list *vn_q_prev;	/* queue prev pointer	*/
294 	struct vn_htable_list *vn_h_next;	/* hash table pointer	*/
295 };
296 
297 /*
298  * vn_q_first        -> points to to head of queue: the vnode that was most
299  *                      recently used
300  * vn_q_last         -> points to the oldest used vnode, and is freed once a new
301  *                      vnode is read.
302  * vn_htable         -> hash table
303  * vn_htable_buf     -> contains htable objects
304  * vn_htable_size    -> total number of items in the hash table
305  * vn_htable_buckets -> number of buckets in the hash table
306  */
307 typedef struct vn_htable {
308 	struct vn_htable_list  *vn_q_first;
309 	struct vn_htable_list  *vn_q_last;
310 	struct vn_htable_list **vn_htable;
311 	struct vn_htable_list  *vn_htable_buf;
312 	int vn_htable_size;
313 	int vn_htable_buckets;
314 } vn_htable_t;
315 
316 
317 /* allocate memory, initilize hash table and LRU queue */
318 static void
319 vn_htable_init(vn_htable_t *hp, size_t vn_size)
320 {
321 	int i;
322 	int htable_size = MAX(vn_size, VN_LARGE);
323 
324 	if ((hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
325 	    * htable_size, UM_NOSLEEP|UM_GC)) == NULL) {
326 		htable_size = VN_SMALL;
327 		hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
328 		    * htable_size, UM_SLEEP|UM_GC);
329 	}
330 
331 	hp->vn_htable = mdb_zalloc(sizeof (struct vn_htable_list *)
332 	    * htable_size, UM_SLEEP|UM_GC);
333 
334 	hp->vn_q_first  = &hp->vn_htable_buf[0];
335 	hp->vn_q_last   = &hp->vn_htable_buf[htable_size - 1];
336 	hp->vn_q_first->vn_q_next = &hp->vn_htable_buf[1];
337 	hp->vn_q_last->vn_q_prev = &hp->vn_htable_buf[htable_size - 2];
338 
339 	for (i = 1; i < (htable_size-1); i++) {
340 		hp->vn_htable_buf[i].vn_q_next = &hp->vn_htable_buf[i + 1];
341 		hp->vn_htable_buf[i].vn_q_prev = &hp->vn_htable_buf[i - 1];
342 	}
343 
344 	hp->vn_htable_size = htable_size;
345 	hp->vn_htable_buckets = htable_size;
346 }
347 
348 
349 /*
350  * Find the vnode whose address is ptr, and return its v_flag in vp->v_flag.
351  * The function tries to find needed information in the following order:
352  *
353  * 1. check if ptr is the first in queue
354  * 2. check if ptr is in hash table (if so move it to the top of queue)
355  * 3. do mdb_vread, remove last queue item from queue and hash table.
356  *    Insert new information to freed object, and put this object in to the
357  *    top of the queue.
358  */
359 static int
360 vn_get(vn_htable_t *hp, struct vnode *vp, uintptr_t ptr)
361 {
362 	int hkey;
363 	struct vn_htable_list *hent, **htmp, *q_next, *q_prev;
364 	struct vn_htable_list  *q_first = hp->vn_q_first;
365 
366 	/* 1. vnode ptr is the first in queue, just get v_flag and return */
367 	if (q_first->vn_ptr == ptr) {
368 		vp->v_flag = q_first->vn_flag;
369 
370 		return (0);
371 	}
372 
373 	/* 2. search the hash table for this ptr */
374 	hkey = VN_HTABLE_KEY(ptr, hp);
375 	hent = hp->vn_htable[hkey];
376 	while (hent && (hent->vn_ptr != ptr))
377 		hent = hent->vn_h_next;
378 
379 	/* 3. if hent is NULL, we did not find in hash table, do mdb_vread */
380 	if (hent == NULL) {
381 		struct vnode vn;
382 
383 		if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) {
384 			mdb_warn("unable to read vnode_t at %#lx", ptr);
385 			return (-1);
386 		}
387 
388 		/* we will insert read data into the last element in queue */
389 		hent = hp->vn_q_last;
390 
391 		/* remove last hp->vn_q_last object from hash table */
392 		if (hent->vn_ptr) {
393 			htmp = &hp->vn_htable[VN_HTABLE_KEY(hent->vn_ptr, hp)];
394 			while (*htmp != hent)
395 				htmp = &(*htmp)->vn_h_next;
396 			*htmp = hent->vn_h_next;
397 		}
398 
399 		/* insert data into new free object */
400 		hent->vn_ptr  = ptr;
401 		hent->vn_flag = vn.v_flag;
402 
403 		/* insert new object into hash table */
404 		hent->vn_h_next = hp->vn_htable[hkey];
405 		hp->vn_htable[hkey] = hent;
406 	}
407 
408 	/* Remove from queue. hent is not first, vn_q_prev is not NULL */
409 	q_next = hent->vn_q_next;
410 	q_prev = hent->vn_q_prev;
411 	if (q_next == NULL)
412 		hp->vn_q_last = q_prev;
413 	else
414 		q_next->vn_q_prev = q_prev;
415 	q_prev->vn_q_next = q_next;
416 
417 	/* Add to the front of queue */
418 	hent->vn_q_prev = NULL;
419 	hent->vn_q_next = q_first;
420 	q_first->vn_q_prev = hent;
421 	hp->vn_q_first = hent;
422 
423 	/* Set v_flag in vnode pointer from hent */
424 	vp->v_flag = hent->vn_flag;
425 
426 	return (0);
427 }
428 
429 /* Summary statistics of pages */
430 typedef struct memstat {
431 	struct vnode    *ms_kvp;	/* Cached address of kernel vnode */
432 	struct vnode    *ms_unused_vp;	/* Unused pages vnode pointer	  */
433 	struct vnode    *ms_zvp;	/* Cached address of zio vnode    */
434 	uint64_t	ms_kmem;	/* Pages of kernel memory	  */
435 	uint64_t	ms_zfs_data;	/* Pages of zfs data		  */
436 	uint64_t	ms_anon;	/* Pages of anonymous memory	  */
437 	uint64_t	ms_vnode;	/* Pages of named (vnode) memory  */
438 	uint64_t	ms_exec;	/* Pages of exec/library memory	  */
439 	uint64_t	ms_cachelist;	/* Pages on the cachelist (free)  */
440 	uint64_t	ms_total;	/* Pages on page hash		  */
441 	vn_htable_t	*ms_vn_htable;	/* Pointer to hash table	  */
442 	struct vnode	ms_vn;		/* vnode buffer			  */
443 } memstat_t;
444 
445 #define	MS_PP_ISKAS(pp, stats)				\
446 	((pp)->p_vnode == (stats)->ms_kvp)
447 
448 #define	MS_PP_ISZFS_DATA(pp, stats)			\
449 	(((stats)->ms_zvp != NULL) && ((pp)->p_vnode == (stats)->ms_zvp))
450 
451 /*
452  * Summarize pages by type and update stat information
453  */
454 
455 /* ARGSUSED */
456 static int
457 memstat_callback(page_t *page, page_t *pp, memstat_t *stats)
458 {
459 	struct vnode *vp = &stats->ms_vn;
460 
461 	if (pp->p_vnode == NULL || pp->p_vnode == stats->ms_unused_vp)
462 		return (WALK_NEXT);
463 	else if (MS_PP_ISKAS(pp, stats))
464 		stats->ms_kmem++;
465 	else if (MS_PP_ISZFS_DATA(pp, stats))
466 		stats->ms_zfs_data++;
467 	else if (PP_ISFREE(pp))
468 		stats->ms_cachelist++;
469 	else if (vn_get(stats->ms_vn_htable, vp, (uintptr_t)pp->p_vnode))
470 		return (WALK_ERR);
471 	else if (IS_SWAPFSVP(vp))
472 		stats->ms_anon++;
473 	else if ((vp->v_flag & VVMEXEC) != 0)
474 		stats->ms_exec++;
475 	else
476 		stats->ms_vnode++;
477 
478 	stats->ms_total++;
479 
480 	return (WALK_NEXT);
481 }
482 
483 /* ARGSUSED */
484 int
485 memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
486 {
487 	ulong_t pagesize;
488 	pgcnt_t total_pages, physmem;
489 	ulong_t freemem;
490 	memstat_t stats;
491 	GElf_Sym sym;
492 	vn_htable_t ht;
493 	uintptr_t vn_size = 0;
494 #if defined(__i386) || defined(__amd64)
495 	bln_stats_t bln_stats;
496 	ssize_t bln_size;
497 #endif
498 
499 	bzero(&stats, sizeof (memstat_t));
500 
501 	/*
502 	 * -s size, is an internal option. It specifies the size of vn_htable.
503 	 * Hash table size is set in the following order:
504 	 * If user has specified the size that is larger than VN_LARGE: try it,
505 	 * but if malloc failed default to VN_SMALL. Otherwise try VN_LARGE, if
506 	 * failed to allocate default to VN_SMALL.
507 	 * For a better efficiency of hash table it is highly recommended to
508 	 * set size to a prime number.
509 	 */
510 	if ((flags & DCMD_ADDRSPEC) || mdb_getopts(argc, argv,
511 	    's', MDB_OPT_UINTPTR, &vn_size, NULL) != argc)
512 		return (DCMD_USAGE);
513 
514 	/* Initialize vnode hash list and queue */
515 	vn_htable_init(&ht, vn_size);
516 	stats.ms_vn_htable = &ht;
517 
518 	/* Grab base page size */
519 	if (mdb_readvar(&pagesize, "_pagesize") == -1) {
520 		mdb_warn("unable to read _pagesize");
521 		return (DCMD_ERR);
522 	}
523 
524 	/* Total physical memory */
525 	if (mdb_readvar(&total_pages, "total_pages") == -1) {
526 		mdb_warn("unable to read total_pages");
527 		return (DCMD_ERR);
528 	}
529 
530 	/* Artificially limited memory */
531 	if (mdb_readvar(&physmem, "physmem") == -1) {
532 		mdb_warn("unable to read physmem");
533 		return (DCMD_ERR);
534 	}
535 
536 	/* read kernel vnode pointer */
537 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvp",
538 	    (GElf_Sym *)&sym) == -1) {
539 		mdb_warn("unable to read kvp");
540 		return (DCMD_ERR);
541 	}
542 
543 	stats.ms_kvp = (struct vnode *)(uintptr_t)sym.st_value;
544 
545 	/*
546 	 * Read the zio vnode pointer.  It may not exist on all kernels, so it
547 	 * it isn't found, it's not a fatal error.
548 	 */
549 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "zvp",
550 	    (GElf_Sym *)&sym) == -1) {
551 		stats.ms_zvp = NULL;
552 	} else {
553 		stats.ms_zvp = (struct vnode *)(uintptr_t)sym.st_value;
554 	}
555 
556 	/*
557 	 * If physmem != total_pages, then the administrator has limited the
558 	 * number of pages available in the system.  Excluded pages are
559 	 * associated with the unused pages vnode.  Read this vnode so the
560 	 * pages can be excluded in the page accounting.
561 	 */
562 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp",
563 	    (GElf_Sym *)&sym) == -1) {
564 		mdb_warn("unable to read unused_pages_vp");
565 		return (DCMD_ERR);
566 	}
567 	stats.ms_unused_vp = (struct vnode *)(uintptr_t)sym.st_value;
568 
569 	/* walk all pages, collect statistics */
570 	if (mdb_walk("allpages", (mdb_walk_cb_t)memstat_callback,
571 	    &stats) == -1) {
572 		mdb_warn("can't walk memseg");
573 		return (DCMD_ERR);
574 	}
575 
576 #define	MS_PCT_TOTAL(x)	((ulong_t)((((5 * total_pages) + ((x) * 1000ull))) / \
577 		((physmem) * 10)))
578 
579 	mdb_printf("Page Summary                Pages                MB"
580 	    "  %%Tot\n");
581 	mdb_printf("------------     ----------------  ----------------"
582 	    "  ----\n");
583 	mdb_printf("Kernel           %16llu  %16llu  %3lu%%\n",
584 	    stats.ms_kmem,
585 	    (uint64_t)stats.ms_kmem * pagesize / (1024 * 1024),
586 	    MS_PCT_TOTAL(stats.ms_kmem));
587 
588 	if (stats.ms_zfs_data != 0)
589 		mdb_printf("ZFS File Data    %16llu  %16llu  %3lu%%\n",
590 		    stats.ms_zfs_data,
591 		    (uint64_t)stats.ms_zfs_data * pagesize / (1024 * 1024),
592 		    MS_PCT_TOTAL(stats.ms_zfs_data));
593 
594 	mdb_printf("Anon             %16llu  %16llu  %3lu%%\n",
595 	    stats.ms_anon,
596 	    (uint64_t)stats.ms_anon * pagesize / (1024 * 1024),
597 	    MS_PCT_TOTAL(stats.ms_anon));
598 	mdb_printf("Exec and libs    %16llu  %16llu  %3lu%%\n",
599 	    stats.ms_exec,
600 	    (uint64_t)stats.ms_exec * pagesize / (1024 * 1024),
601 	    MS_PCT_TOTAL(stats.ms_exec));
602 	mdb_printf("Page cache       %16llu  %16llu  %3lu%%\n",
603 	    stats.ms_vnode,
604 	    (uint64_t)stats.ms_vnode * pagesize / (1024 * 1024),
605 	    MS_PCT_TOTAL(stats.ms_vnode));
606 	mdb_printf("Free (cachelist) %16llu  %16llu  %3lu%%\n",
607 	    stats.ms_cachelist,
608 	    (uint64_t)stats.ms_cachelist * pagesize / (1024 * 1024),
609 	    MS_PCT_TOTAL(stats.ms_cachelist));
610 
611 	/*
612 	 * occasionally, we double count pages above.  To avoid printing
613 	 * absurdly large values for freemem, we clamp it at zero.
614 	 */
615 	if (physmem > stats.ms_total)
616 		freemem = physmem - stats.ms_total;
617 	else
618 		freemem = 0;
619 
620 #if defined(__i386) || defined(__amd64)
621 	/* Are we running under Xen?  If so, get balloon memory usage. */
622 	if ((bln_size = mdb_readvar(&bln_stats, "bln_stats")) != -1) {
623 		if (freemem > bln_stats.bln_hv_pages)
624 			freemem -= bln_stats.bln_hv_pages;
625 		else
626 			freemem = 0;
627 	}
628 #endif
629 
630 	mdb_printf("Free (freelist)  %16lu  %16llu  %3lu%%\n", freemem,
631 	    (uint64_t)freemem * pagesize / (1024 * 1024),
632 	    MS_PCT_TOTAL(freemem));
633 
634 #if defined(__i386) || defined(__amd64)
635 	if (bln_size != -1) {
636 		mdb_printf("Balloon          %16lu  %16llu  %3lu%%\n",
637 		    bln_stats.bln_hv_pages,
638 		    (uint64_t)bln_stats.bln_hv_pages * pagesize / (1024 * 1024),
639 		    MS_PCT_TOTAL(bln_stats.bln_hv_pages));
640 	}
641 #endif
642 
643 	mdb_printf("\nTotal            %16lu  %16lu\n",
644 	    physmem,
645 	    (uint64_t)physmem * pagesize / (1024 * 1024));
646 
647 	if (physmem != total_pages) {
648 		mdb_printf("Physical         %16lu  %16lu\n",
649 		    total_pages,
650 		    (uint64_t)total_pages * pagesize / (1024 * 1024));
651 	}
652 
653 #undef MS_PCT_TOTAL
654 
655 	return (DCMD_OK);
656 }
657 
658 int
659 page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
660 {
661 	page_t	p;
662 
663 	if (!(flags & DCMD_ADDRSPEC)) {
664 		if (mdb_walk_dcmd("page", "page", argc, argv) == -1) {
665 			mdb_warn("can't walk pages");
666 			return (DCMD_ERR);
667 		}
668 		return (DCMD_OK);
669 	}
670 
671 	if (DCMD_HDRSPEC(flags)) {
672 		mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n",
673 		    "PAGE", "VNODE", "OFFSET", "SELOCK",
674 		    "LCT", "COW", "IO", "FS", "ST");
675 	}
676 
677 	if (mdb_vread(&p, sizeof (page_t), addr) == -1) {
678 		mdb_warn("can't read page_t at %#lx", addr);
679 		return (DCMD_ERR);
680 	}
681 
682 	mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n",
683 	    addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt,
684 	    p.p_iolock_state, p.p_fsdata, p.p_state);
685 
686 	return (DCMD_OK);
687 }
688 
689 int
690 swap_walk_init(mdb_walk_state_t *wsp)
691 {
692 	void	*ptr;
693 
694 	if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) {
695 		mdb_warn("swapinfo not found or invalid");
696 		return (WALK_ERR);
697 	}
698 
699 	wsp->walk_addr = (uintptr_t)ptr;
700 
701 	return (WALK_NEXT);
702 }
703 
704 int
705 swap_walk_step(mdb_walk_state_t *wsp)
706 {
707 	uintptr_t	sip;
708 	struct swapinfo	si;
709 
710 	sip = wsp->walk_addr;
711 
712 	if (sip == NULL)
713 		return (WALK_DONE);
714 
715 	if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) {
716 		mdb_warn("unable to read swapinfo at %#lx", sip);
717 		return (WALK_ERR);
718 	}
719 
720 	wsp->walk_addr = (uintptr_t)si.si_next;
721 
722 	return (wsp->walk_callback(sip, &si, wsp->walk_cbdata));
723 }
724 
725 int
726 swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
727 {
728 	struct swapinfo	si;
729 	char		*name;
730 
731 	if (!(flags & DCMD_ADDRSPEC)) {
732 		if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) {
733 			mdb_warn("can't walk swapinfo");
734 			return (DCMD_ERR);
735 		}
736 		return (DCMD_OK);
737 	}
738 
739 	if (DCMD_HDRSPEC(flags)) {
740 		mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n",
741 		    "ADDR", "VNODE", "PAGES", "FREE", "NAME");
742 	}
743 
744 	if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) {
745 		mdb_warn("can't read swapinfo at %#lx", addr);
746 		return (DCMD_ERR);
747 	}
748 
749 	name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC);
750 	if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1)
751 		name = "*error*";
752 
753 	mdb_printf("%0?lx %?p %9d %9d %s\n",
754 	    addr, si.si_vp, si.si_npgs, si.si_nfpgs, name);
755 
756 	return (DCMD_OK);
757 }
758 
759 int
760 memlist_walk_step(mdb_walk_state_t *wsp)
761 {
762 	uintptr_t	mlp;
763 	struct memlist	ml;
764 
765 	mlp = wsp->walk_addr;
766 
767 	if (mlp == NULL)
768 		return (WALK_DONE);
769 
770 	if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) {
771 		mdb_warn("unable to read memlist at %#lx", mlp);
772 		return (WALK_ERR);
773 	}
774 
775 	wsp->walk_addr = (uintptr_t)ml.next;
776 
777 	return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata));
778 }
779 
780 int
781 memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
782 {
783 	struct memlist	ml;
784 
785 	if (!(flags & DCMD_ADDRSPEC)) {
786 		uintptr_t ptr;
787 		uint_t list = 0;
788 		int i;
789 		static const char *lists[] = {
790 			"phys_install",
791 			"phys_avail",
792 			"virt_avail"
793 		};
794 
795 		if (mdb_getopts(argc, argv,
796 		    'i', MDB_OPT_SETBITS, (1 << 0), &list,
797 		    'a', MDB_OPT_SETBITS, (1 << 1), &list,
798 		    'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc)
799 			return (DCMD_USAGE);
800 
801 		if (!list)
802 			list = 1;
803 
804 		for (i = 0; list; i++, list >>= 1) {
805 			if (!(list & 1))
806 				continue;
807 			if ((mdb_readvar(&ptr, lists[i]) == -1) ||
808 			    (ptr == NULL)) {
809 				mdb_warn("%s not found or invalid", lists[i]);
810 				return (DCMD_ERR);
811 			}
812 
813 			mdb_printf("%s:\n", lists[i]);
814 			if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL,
815 			    ptr) == -1) {
816 				mdb_warn("can't walk memlist");
817 				return (DCMD_ERR);
818 			}
819 		}
820 		return (DCMD_OK);
821 	}
822 
823 	if (DCMD_HDRSPEC(flags))
824 		mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE");
825 
826 	if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) {
827 		mdb_warn("can't read memlist at %#lx", addr);
828 		return (DCMD_ERR);
829 	}
830 
831 	mdb_printf("%0?lx %16llx %16llx\n", addr, ml.address, ml.size);
832 
833 	return (DCMD_OK);
834 }
835