xref: /illumos-gate/usr/src/cmd/mdb/common/modules/genunix/memory.c (revision d042c5a26452797afc4fe8c2ceddebff94d88745)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2015 Joyent, Inc.
24  */
25 
26 #include <mdb/mdb_param.h>
27 #include <mdb/mdb_modapi.h>
28 #include <mdb/mdb_ks.h>
29 #include <sys/types.h>
30 #include <sys/memlist.h>
31 #include <sys/swap.h>
32 #include <sys/systm.h>
33 #include <sys/thread.h>
34 #include <vm/anon.h>
35 #include <vm/as.h>
36 #include <vm/page.h>
37 #include <sys/thread.h>
38 #include <sys/swap.h>
39 #include <sys/memlist.h>
40 #include <sys/vnode.h>
41 #include <vm/seg_map.h>
42 #include <vm/seg_vn.h>
43 #if defined(__i386) || defined(__amd64)
44 #include <sys/balloon_impl.h>
45 #endif
46 
47 #include "avl.h"
48 #include "memory.h"
49 
50 /*
51  * Page walker.
52  * By default, this will walk all pages in the system.  If given an
53  * address, it will walk all pages belonging to the vnode at that
54  * address.
55  */
56 
57 /*
58  * page_walk_data
59  *
60  * pw_hashleft is set to -1 when walking a vnode's pages, and holds the
61  * number of hash locations remaining in the page hash table when
62  * walking all pages.
63  *
64  * The astute reader will notice that pw_hashloc is only used when
65  * reading all pages (to hold a pointer to our location in the page
66  * hash table), and that pw_first is only used when reading the pages
67  * belonging to a particular vnode (to hold a pointer to the first
68  * page).  While these could be combined to be a single pointer, they
69  * are left separate for clarity.
70  */
71 typedef struct page_walk_data {
72 	long		pw_hashleft;
73 	void		**pw_hashloc;
74 	uintptr_t	pw_first;
75 } page_walk_data_t;
76 
77 int
78 page_walk_init(mdb_walk_state_t *wsp)
79 {
80 	page_walk_data_t	*pwd;
81 	void	**ptr;
82 	size_t	hashsz;
83 	vnode_t	vn;
84 
85 	if (wsp->walk_addr == NULL) {
86 
87 		/*
88 		 * Walk all pages
89 		 */
90 
91 		if ((mdb_readvar(&ptr, "page_hash") == -1) ||
92 		    (mdb_readvar(&hashsz, "page_hashsz") == -1) ||
93 		    (ptr == NULL) || (hashsz == 0)) {
94 			mdb_warn("page_hash, page_hashsz not found or invalid");
95 			return (WALK_ERR);
96 		}
97 
98 		/*
99 		 * Since we are walking all pages, initialize hashleft
100 		 * to be the remaining number of entries in the page
101 		 * hash.  hashloc is set the start of the page hash
102 		 * table.  Setting the walk address to 0 indicates that
103 		 * we aren't currently following a hash chain, and that
104 		 * we need to scan the page hash table for a page.
105 		 */
106 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
107 		pwd->pw_hashleft = hashsz;
108 		pwd->pw_hashloc = ptr;
109 		wsp->walk_addr = 0;
110 	} else {
111 
112 		/*
113 		 * Walk just this vnode
114 		 */
115 
116 		if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) {
117 			mdb_warn("unable to read vnode_t at %#lx",
118 			    wsp->walk_addr);
119 			return (WALK_ERR);
120 		}
121 
122 		/*
123 		 * We set hashleft to -1 to indicate that we are
124 		 * walking a vnode, and initialize first to 0 (it is
125 		 * used to terminate the walk, so it must not be set
126 		 * until after we have walked the first page).  The
127 		 * walk address is set to the first page.
128 		 */
129 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
130 		pwd->pw_hashleft = -1;
131 		pwd->pw_first = 0;
132 
133 		wsp->walk_addr = (uintptr_t)vn.v_pages;
134 	}
135 
136 	wsp->walk_data = pwd;
137 
138 	return (WALK_NEXT);
139 }
140 
141 int
142 page_walk_step(mdb_walk_state_t *wsp)
143 {
144 	page_walk_data_t	*pwd = wsp->walk_data;
145 	page_t		page;
146 	uintptr_t	pp;
147 
148 	pp = wsp->walk_addr;
149 
150 	if (pwd->pw_hashleft < 0) {
151 
152 		/* We're walking a vnode's pages */
153 
154 		/*
155 		 * If we don't have any pages to walk, we have come
156 		 * back around to the first one (we finished), or we
157 		 * can't read the page we're looking at, we are done.
158 		 */
159 		if (pp == NULL || pp == pwd->pw_first)
160 			return (WALK_DONE);
161 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
162 			mdb_warn("unable to read page_t at %#lx", pp);
163 			return (WALK_ERR);
164 		}
165 
166 		/*
167 		 * Set the walk address to the next page, and if the
168 		 * first page hasn't been set yet (i.e. we are on the
169 		 * first page), set it.
170 		 */
171 		wsp->walk_addr = (uintptr_t)page.p_vpnext;
172 		if (pwd->pw_first == NULL)
173 			pwd->pw_first = pp;
174 
175 	} else if (pwd->pw_hashleft > 0) {
176 
177 		/* We're walking all pages */
178 
179 		/*
180 		 * If pp (the walk address) is NULL, we scan through
181 		 * the page hash table until we find a page.
182 		 */
183 		if (pp == NULL) {
184 
185 			/*
186 			 * Iterate through the page hash table until we
187 			 * find a page or reach the end.
188 			 */
189 			do {
190 				if (mdb_vread(&pp, sizeof (uintptr_t),
191 				    (uintptr_t)pwd->pw_hashloc) == -1) {
192 					mdb_warn("unable to read from %#p",
193 					    pwd->pw_hashloc);
194 					return (WALK_ERR);
195 				}
196 				pwd->pw_hashleft--;
197 				pwd->pw_hashloc++;
198 			} while (pwd->pw_hashleft && (pp == NULL));
199 
200 			/*
201 			 * We've reached the end; exit.
202 			 */
203 			if (pp == NULL)
204 				return (WALK_DONE);
205 		}
206 
207 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
208 			mdb_warn("unable to read page_t at %#lx", pp);
209 			return (WALK_ERR);
210 		}
211 
212 		/*
213 		 * Set the walk address to the next page.
214 		 */
215 		wsp->walk_addr = (uintptr_t)page.p_hash;
216 
217 	} else {
218 		/* We've finished walking all pages. */
219 		return (WALK_DONE);
220 	}
221 
222 	return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
223 }
224 
225 void
226 page_walk_fini(mdb_walk_state_t *wsp)
227 {
228 	mdb_free(wsp->walk_data, sizeof (page_walk_data_t));
229 }
230 
231 /*
232  * allpages walks all pages in the system in order they appear in
233  * the memseg structure
234  */
235 
236 #define	PAGE_BUFFER	128
237 
238 int
239 allpages_walk_init(mdb_walk_state_t *wsp)
240 {
241 	if (wsp->walk_addr != 0) {
242 		mdb_warn("allpages only supports global walks.\n");
243 		return (WALK_ERR);
244 	}
245 
246 	if (mdb_layered_walk("memseg", wsp) == -1) {
247 		mdb_warn("couldn't walk 'memseg'");
248 		return (WALK_ERR);
249 	}
250 
251 	wsp->walk_data = mdb_alloc(sizeof (page_t) * PAGE_BUFFER, UM_SLEEP);
252 	return (WALK_NEXT);
253 }
254 
255 int
256 allpages_walk_step(mdb_walk_state_t *wsp)
257 {
258 	const struct memseg *msp = wsp->walk_layer;
259 	page_t *buf = wsp->walk_data;
260 	size_t pg_read, i;
261 	size_t pg_num = msp->pages_end - msp->pages_base;
262 	const page_t *pg_addr = msp->pages;
263 
264 	while (pg_num > 0) {
265 		pg_read = MIN(pg_num, PAGE_BUFFER);
266 
267 		if (mdb_vread(buf, pg_read * sizeof (page_t),
268 		    (uintptr_t)pg_addr) == -1) {
269 			mdb_warn("can't read page_t's at %#lx", pg_addr);
270 			return (WALK_ERR);
271 		}
272 		for (i = 0; i < pg_read; i++) {
273 			int ret = wsp->walk_callback((uintptr_t)&pg_addr[i],
274 			    &buf[i], wsp->walk_cbdata);
275 
276 			if (ret != WALK_NEXT)
277 				return (ret);
278 		}
279 		pg_num -= pg_read;
280 		pg_addr += pg_read;
281 	}
282 
283 	return (WALK_NEXT);
284 }
285 
286 void
287 allpages_walk_fini(mdb_walk_state_t *wsp)
288 {
289 	mdb_free(wsp->walk_data, sizeof (page_t) * PAGE_BUFFER);
290 }
291 
292 /*
293  * Hash table + LRU queue.
294  * This table is used to cache recently read vnodes for the memstat
295  * command, to reduce the number of mdb_vread calls.  This greatly
296  * speeds the memstat command on on live, large CPU count systems.
297  */
298 
299 #define	VN_SMALL	401
300 #define	VN_LARGE	10007
301 #define	VN_HTABLE_KEY(p, hp)	((p) % ((hp)->vn_htable_buckets))
302 
303 struct vn_htable_list {
304 	uint_t vn_flag;				/* v_flag from vnode	*/
305 	uintptr_t vn_ptr;			/* pointer to vnode	*/
306 	struct vn_htable_list *vn_q_next;	/* queue next pointer	*/
307 	struct vn_htable_list *vn_q_prev;	/* queue prev pointer	*/
308 	struct vn_htable_list *vn_h_next;	/* hash table pointer	*/
309 };
310 
311 /*
312  * vn_q_first        -> points to to head of queue: the vnode that was most
313  *                      recently used
314  * vn_q_last         -> points to the oldest used vnode, and is freed once a new
315  *                      vnode is read.
316  * vn_htable         -> hash table
317  * vn_htable_buf     -> contains htable objects
318  * vn_htable_size    -> total number of items in the hash table
319  * vn_htable_buckets -> number of buckets in the hash table
320  */
321 typedef struct vn_htable {
322 	struct vn_htable_list  *vn_q_first;
323 	struct vn_htable_list  *vn_q_last;
324 	struct vn_htable_list **vn_htable;
325 	struct vn_htable_list  *vn_htable_buf;
326 	int vn_htable_size;
327 	int vn_htable_buckets;
328 } vn_htable_t;
329 
330 
331 /* allocate memory, initilize hash table and LRU queue */
332 static void
333 vn_htable_init(vn_htable_t *hp, size_t vn_size)
334 {
335 	int i;
336 	int htable_size = MAX(vn_size, VN_LARGE);
337 
338 	if ((hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
339 	    * htable_size, UM_NOSLEEP|UM_GC)) == NULL) {
340 		htable_size = VN_SMALL;
341 		hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
342 		    * htable_size, UM_SLEEP|UM_GC);
343 	}
344 
345 	hp->vn_htable = mdb_zalloc(sizeof (struct vn_htable_list *)
346 	    * htable_size, UM_SLEEP|UM_GC);
347 
348 	hp->vn_q_first  = &hp->vn_htable_buf[0];
349 	hp->vn_q_last   = &hp->vn_htable_buf[htable_size - 1];
350 	hp->vn_q_first->vn_q_next = &hp->vn_htable_buf[1];
351 	hp->vn_q_last->vn_q_prev = &hp->vn_htable_buf[htable_size - 2];
352 
353 	for (i = 1; i < (htable_size-1); i++) {
354 		hp->vn_htable_buf[i].vn_q_next = &hp->vn_htable_buf[i + 1];
355 		hp->vn_htable_buf[i].vn_q_prev = &hp->vn_htable_buf[i - 1];
356 	}
357 
358 	hp->vn_htable_size = htable_size;
359 	hp->vn_htable_buckets = htable_size;
360 }
361 
362 
363 /*
364  * Find the vnode whose address is ptr, and return its v_flag in vp->v_flag.
365  * The function tries to find needed information in the following order:
366  *
367  * 1. check if ptr is the first in queue
368  * 2. check if ptr is in hash table (if so move it to the top of queue)
369  * 3. do mdb_vread, remove last queue item from queue and hash table.
370  *    Insert new information to freed object, and put this object in to the
371  *    top of the queue.
372  */
373 static int
374 vn_get(vn_htable_t *hp, struct vnode *vp, uintptr_t ptr)
375 {
376 	int hkey;
377 	struct vn_htable_list *hent, **htmp, *q_next, *q_prev;
378 	struct vn_htable_list  *q_first = hp->vn_q_first;
379 
380 	/* 1. vnode ptr is the first in queue, just get v_flag and return */
381 	if (q_first->vn_ptr == ptr) {
382 		vp->v_flag = q_first->vn_flag;
383 
384 		return (0);
385 	}
386 
387 	/* 2. search the hash table for this ptr */
388 	hkey = VN_HTABLE_KEY(ptr, hp);
389 	hent = hp->vn_htable[hkey];
390 	while (hent && (hent->vn_ptr != ptr))
391 		hent = hent->vn_h_next;
392 
393 	/* 3. if hent is NULL, we did not find in hash table, do mdb_vread */
394 	if (hent == NULL) {
395 		struct vnode vn;
396 
397 		if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) {
398 			mdb_warn("unable to read vnode_t at %#lx", ptr);
399 			return (-1);
400 		}
401 
402 		/* we will insert read data into the last element in queue */
403 		hent = hp->vn_q_last;
404 
405 		/* remove last hp->vn_q_last object from hash table */
406 		if (hent->vn_ptr) {
407 			htmp = &hp->vn_htable[VN_HTABLE_KEY(hent->vn_ptr, hp)];
408 			while (*htmp != hent)
409 				htmp = &(*htmp)->vn_h_next;
410 			*htmp = hent->vn_h_next;
411 		}
412 
413 		/* insert data into new free object */
414 		hent->vn_ptr  = ptr;
415 		hent->vn_flag = vn.v_flag;
416 
417 		/* insert new object into hash table */
418 		hent->vn_h_next = hp->vn_htable[hkey];
419 		hp->vn_htable[hkey] = hent;
420 	}
421 
422 	/* Remove from queue. hent is not first, vn_q_prev is not NULL */
423 	q_next = hent->vn_q_next;
424 	q_prev = hent->vn_q_prev;
425 	if (q_next == NULL)
426 		hp->vn_q_last = q_prev;
427 	else
428 		q_next->vn_q_prev = q_prev;
429 	q_prev->vn_q_next = q_next;
430 
431 	/* Add to the front of queue */
432 	hent->vn_q_prev = NULL;
433 	hent->vn_q_next = q_first;
434 	q_first->vn_q_prev = hent;
435 	hp->vn_q_first = hent;
436 
437 	/* Set v_flag in vnode pointer from hent */
438 	vp->v_flag = hent->vn_flag;
439 
440 	return (0);
441 }
442 
443 /* Summary statistics of pages */
444 typedef struct memstat {
445 	struct vnode    *ms_kvp;	/* Cached address of kernel vnode */
446 	struct vnode    *ms_unused_vp;	/* Unused pages vnode pointer	  */
447 	struct vnode    *ms_zvp;	/* Cached address of zio vnode    */
448 	uint64_t	ms_kmem;	/* Pages of kernel memory	  */
449 	uint64_t	ms_zfs_data;	/* Pages of zfs data		  */
450 	uint64_t	ms_anon;	/* Pages of anonymous memory	  */
451 	uint64_t	ms_vnode;	/* Pages of named (vnode) memory  */
452 	uint64_t	ms_exec;	/* Pages of exec/library memory	  */
453 	uint64_t	ms_cachelist;	/* Pages on the cachelist (free)  */
454 	uint64_t	ms_bootpages;	/* Pages on the bootpages list    */
455 	uint64_t	ms_total;	/* Pages on page hash		  */
456 	vn_htable_t	*ms_vn_htable;	/* Pointer to hash table	  */
457 	struct vnode	ms_vn;		/* vnode buffer			  */
458 } memstat_t;
459 
460 #define	MS_PP_ISKAS(pp, stats)				\
461 	((pp)->p_vnode == (stats)->ms_kvp)
462 
463 #define	MS_PP_ISZFS_DATA(pp, stats)			\
464 	(((stats)->ms_zvp != NULL) && ((pp)->p_vnode == (stats)->ms_zvp))
465 
466 /*
467  * Summarize pages by type and update stat information
468  */
469 
470 /* ARGSUSED */
471 static int
472 memstat_callback(page_t *page, page_t *pp, memstat_t *stats)
473 {
474 	struct vnode *vp = &stats->ms_vn;
475 
476 	if (PP_ISBOOTPAGES(pp))
477 		stats->ms_bootpages++;
478 	else if (pp->p_vnode == NULL || pp->p_vnode == stats->ms_unused_vp)
479 		return (WALK_NEXT);
480 	else if (MS_PP_ISKAS(pp, stats))
481 		stats->ms_kmem++;
482 	else if (MS_PP_ISZFS_DATA(pp, stats))
483 		stats->ms_zfs_data++;
484 	else if (PP_ISFREE(pp))
485 		stats->ms_cachelist++;
486 	else if (vn_get(stats->ms_vn_htable, vp, (uintptr_t)pp->p_vnode))
487 		return (WALK_ERR);
488 	else if (IS_SWAPFSVP(vp))
489 		stats->ms_anon++;
490 	else if ((vp->v_flag & VVMEXEC) != 0)
491 		stats->ms_exec++;
492 	else
493 		stats->ms_vnode++;
494 
495 	stats->ms_total++;
496 
497 	return (WALK_NEXT);
498 }
499 
500 /* ARGSUSED */
501 int
502 memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
503 {
504 	pgcnt_t total_pages, physmem;
505 	ulong_t freemem;
506 	memstat_t stats;
507 	GElf_Sym sym;
508 	vn_htable_t ht;
509 	struct vnode *kvps;
510 	uintptr_t vn_size = 0;
511 #if defined(__i386) || defined(__amd64)
512 	bln_stats_t bln_stats;
513 	ssize_t bln_size;
514 #endif
515 
516 	bzero(&stats, sizeof (memstat_t));
517 
518 	/*
519 	 * -s size, is an internal option. It specifies the size of vn_htable.
520 	 * Hash table size is set in the following order:
521 	 * If user has specified the size that is larger than VN_LARGE: try it,
522 	 * but if malloc failed default to VN_SMALL. Otherwise try VN_LARGE, if
523 	 * failed to allocate default to VN_SMALL.
524 	 * For a better efficiency of hash table it is highly recommended to
525 	 * set size to a prime number.
526 	 */
527 	if ((flags & DCMD_ADDRSPEC) || mdb_getopts(argc, argv,
528 	    's', MDB_OPT_UINTPTR, &vn_size, NULL) != argc)
529 		return (DCMD_USAGE);
530 
531 	/* Initialize vnode hash list and queue */
532 	vn_htable_init(&ht, vn_size);
533 	stats.ms_vn_htable = &ht;
534 
535 	/* Total physical memory */
536 	if (mdb_readvar(&total_pages, "total_pages") == -1) {
537 		mdb_warn("unable to read total_pages");
538 		return (DCMD_ERR);
539 	}
540 
541 	/* Artificially limited memory */
542 	if (mdb_readvar(&physmem, "physmem") == -1) {
543 		mdb_warn("unable to read physmem");
544 		return (DCMD_ERR);
545 	}
546 
547 	/* read kernel vnode array pointer */
548 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvps",
549 	    (GElf_Sym *)&sym) == -1) {
550 		mdb_warn("unable to read kvps");
551 		return (DCMD_ERR);
552 	}
553 	kvps = (struct vnode *)(uintptr_t)sym.st_value;
554 	stats.ms_kvp =  &kvps[KV_KVP];
555 
556 	/*
557 	 * Read the zio vnode pointer.
558 	 */
559 	stats.ms_zvp = &kvps[KV_ZVP];
560 
561 	/*
562 	 * If physmem != total_pages, then the administrator has limited the
563 	 * number of pages available in the system.  Excluded pages are
564 	 * associated with the unused pages vnode.  Read this vnode so the
565 	 * pages can be excluded in the page accounting.
566 	 */
567 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp",
568 	    (GElf_Sym *)&sym) == -1) {
569 		mdb_warn("unable to read unused_pages_vp");
570 		return (DCMD_ERR);
571 	}
572 	stats.ms_unused_vp = (struct vnode *)(uintptr_t)sym.st_value;
573 
574 	/* walk all pages, collect statistics */
575 	if (mdb_walk("allpages", (mdb_walk_cb_t)memstat_callback,
576 	    &stats) == -1) {
577 		mdb_warn("can't walk memseg");
578 		return (DCMD_ERR);
579 	}
580 
581 #define	MS_PCT_TOTAL(x)	((ulong_t)((((5 * total_pages) + ((x) * 1000ull))) / \
582 		((physmem) * 10)))
583 
584 	mdb_printf("Page Summary                Pages                MB"
585 	    "  %%Tot\n");
586 	mdb_printf("------------     ----------------  ----------------"
587 	    "  ----\n");
588 	mdb_printf("Kernel           %16llu  %16llu  %3lu%%\n",
589 	    stats.ms_kmem,
590 	    (uint64_t)stats.ms_kmem * PAGESIZE / (1024 * 1024),
591 	    MS_PCT_TOTAL(stats.ms_kmem));
592 
593 	if (stats.ms_bootpages != 0) {
594 		mdb_printf("Boot pages       %16llu  %16llu  %3lu%%\n",
595 		    stats.ms_bootpages,
596 		    (uint64_t)stats.ms_bootpages * PAGESIZE / (1024 * 1024),
597 		    MS_PCT_TOTAL(stats.ms_bootpages));
598 	}
599 
600 	if (stats.ms_zfs_data != 0) {
601 		mdb_printf("ZFS File Data    %16llu  %16llu  %3lu%%\n",
602 		    stats.ms_zfs_data,
603 		    (uint64_t)stats.ms_zfs_data * PAGESIZE / (1024 * 1024),
604 		    MS_PCT_TOTAL(stats.ms_zfs_data));
605 	}
606 
607 	mdb_printf("Anon             %16llu  %16llu  %3lu%%\n",
608 	    stats.ms_anon,
609 	    (uint64_t)stats.ms_anon * PAGESIZE / (1024 * 1024),
610 	    MS_PCT_TOTAL(stats.ms_anon));
611 	mdb_printf("Exec and libs    %16llu  %16llu  %3lu%%\n",
612 	    stats.ms_exec,
613 	    (uint64_t)stats.ms_exec * PAGESIZE / (1024 * 1024),
614 	    MS_PCT_TOTAL(stats.ms_exec));
615 	mdb_printf("Page cache       %16llu  %16llu  %3lu%%\n",
616 	    stats.ms_vnode,
617 	    (uint64_t)stats.ms_vnode * PAGESIZE / (1024 * 1024),
618 	    MS_PCT_TOTAL(stats.ms_vnode));
619 	mdb_printf("Free (cachelist) %16llu  %16llu  %3lu%%\n",
620 	    stats.ms_cachelist,
621 	    (uint64_t)stats.ms_cachelist * PAGESIZE / (1024 * 1024),
622 	    MS_PCT_TOTAL(stats.ms_cachelist));
623 
624 	/*
625 	 * occasionally, we double count pages above.  To avoid printing
626 	 * absurdly large values for freemem, we clamp it at zero.
627 	 */
628 	if (physmem > stats.ms_total)
629 		freemem = physmem - stats.ms_total;
630 	else
631 		freemem = 0;
632 
633 #if defined(__i386) || defined(__amd64)
634 	/* Are we running under Xen?  If so, get balloon memory usage. */
635 	if ((bln_size = mdb_readvar(&bln_stats, "bln_stats")) != -1) {
636 		if (freemem > bln_stats.bln_hv_pages)
637 			freemem -= bln_stats.bln_hv_pages;
638 		else
639 			freemem = 0;
640 	}
641 #endif
642 
643 	mdb_printf("Free (freelist)  %16lu  %16llu  %3lu%%\n", freemem,
644 	    (uint64_t)freemem * PAGESIZE / (1024 * 1024),
645 	    MS_PCT_TOTAL(freemem));
646 
647 #if defined(__i386) || defined(__amd64)
648 	if (bln_size != -1) {
649 		mdb_printf("Balloon          %16lu  %16llu  %3lu%%\n",
650 		    bln_stats.bln_hv_pages,
651 		    (uint64_t)bln_stats.bln_hv_pages * PAGESIZE / (1024 * 1024),
652 		    MS_PCT_TOTAL(bln_stats.bln_hv_pages));
653 	}
654 #endif
655 
656 	mdb_printf("\nTotal            %16lu  %16lu\n",
657 	    physmem,
658 	    (uint64_t)physmem * PAGESIZE / (1024 * 1024));
659 
660 	if (physmem != total_pages) {
661 		mdb_printf("Physical         %16lu  %16lu\n",
662 		    total_pages,
663 		    (uint64_t)total_pages * PAGESIZE / (1024 * 1024));
664 	}
665 
666 #undef MS_PCT_TOTAL
667 
668 	return (DCMD_OK);
669 }
670 
671 void
672 pagelookup_help(void)
673 {
674 	mdb_printf(
675 	    "Finds the page with name { %<b>vp%</b>, %<b>offset%</b> }.\n"
676 	    "\n"
677 	    "Can be invoked three different ways:\n\n"
678 	    "    ::pagelookup -v %<b>vp%</b> -o %<b>offset%</b>\n"
679 	    "    %<b>vp%</b>::pagelookup -o %<b>offset%</b>\n"
680 	    "    %<b>offset%</b>::pagelookup -v %<b>vp%</b>\n"
681 	    "\n"
682 	    "The latter two forms are useful in pipelines.\n");
683 }
684 
685 int
686 pagelookup(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
687 {
688 	uintptr_t vp = -(uintptr_t)1;
689 	uint64_t offset = -(uint64_t)1;
690 
691 	uintptr_t pageaddr;
692 	int hasaddr = (flags & DCMD_ADDRSPEC);
693 	int usedaddr = 0;
694 
695 	if (mdb_getopts(argc, argv,
696 	    'v', MDB_OPT_UINTPTR, &vp,
697 	    'o', MDB_OPT_UINT64, &offset,
698 	    0) != argc) {
699 		return (DCMD_USAGE);
700 	}
701 
702 	if (vp == -(uintptr_t)1) {
703 		if (offset == -(uint64_t)1) {
704 			mdb_warn(
705 			    "pagelookup: at least one of -v vp or -o offset "
706 			    "required.\n");
707 			return (DCMD_USAGE);
708 		}
709 		vp = addr;
710 		usedaddr = 1;
711 	} else if (offset == -(uint64_t)1) {
712 		offset = mdb_get_dot();
713 		usedaddr = 1;
714 	}
715 	if (usedaddr && !hasaddr) {
716 		mdb_warn("pagelookup: address required\n");
717 		return (DCMD_USAGE);
718 	}
719 	if (!usedaddr && hasaddr) {
720 		mdb_warn(
721 		    "pagelookup: address specified when both -v and -o were "
722 		    "passed");
723 		return (DCMD_USAGE);
724 	}
725 
726 	pageaddr = mdb_page_lookup(vp, offset);
727 	if (pageaddr == 0) {
728 		mdb_warn("pagelookup: no page for {vp = %p, offset = %llp)\n",
729 		    vp, offset);
730 		return (DCMD_OK);
731 	}
732 	mdb_printf("%#lr\n", pageaddr);		/* this is PIPE_OUT friendly */
733 	return (DCMD_OK);
734 }
735 
736 /*ARGSUSED*/
737 int
738 page_num2pp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
739 {
740 	uintptr_t pp;
741 
742 	if (argc != 0 || !(flags & DCMD_ADDRSPEC)) {
743 		return (DCMD_USAGE);
744 	}
745 
746 	pp = mdb_pfn2page((pfn_t)addr);
747 	if (pp == 0) {
748 		return (DCMD_ERR);
749 	}
750 
751 	if (flags & DCMD_PIPE_OUT) {
752 		mdb_printf("%#lr\n", pp);
753 	} else {
754 		mdb_printf("%lx has page_t at %#lx\n", (pfn_t)addr, pp);
755 	}
756 
757 	return (DCMD_OK);
758 }
759 
760 int
761 page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
762 {
763 	page_t	p;
764 
765 	if (!(flags & DCMD_ADDRSPEC)) {
766 		if (mdb_walk_dcmd("page", "page", argc, argv) == -1) {
767 			mdb_warn("can't walk pages");
768 			return (DCMD_ERR);
769 		}
770 		return (DCMD_OK);
771 	}
772 
773 	if (DCMD_HDRSPEC(flags)) {
774 		mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n",
775 		    "PAGE", "VNODE", "OFFSET", "SELOCK",
776 		    "LCT", "COW", "IO", "FS", "ST");
777 	}
778 
779 	if (mdb_vread(&p, sizeof (page_t), addr) == -1) {
780 		mdb_warn("can't read page_t at %#lx", addr);
781 		return (DCMD_ERR);
782 	}
783 
784 	mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n",
785 	    addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt,
786 	    p.p_iolock_state, p.p_fsdata, p.p_state);
787 
788 	return (DCMD_OK);
789 }
790 
791 int
792 swap_walk_init(mdb_walk_state_t *wsp)
793 {
794 	void	*ptr;
795 
796 	if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) {
797 		mdb_warn("swapinfo not found or invalid");
798 		return (WALK_ERR);
799 	}
800 
801 	wsp->walk_addr = (uintptr_t)ptr;
802 
803 	return (WALK_NEXT);
804 }
805 
806 int
807 swap_walk_step(mdb_walk_state_t *wsp)
808 {
809 	uintptr_t	sip;
810 	struct swapinfo	si;
811 
812 	sip = wsp->walk_addr;
813 
814 	if (sip == NULL)
815 		return (WALK_DONE);
816 
817 	if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) {
818 		mdb_warn("unable to read swapinfo at %#lx", sip);
819 		return (WALK_ERR);
820 	}
821 
822 	wsp->walk_addr = (uintptr_t)si.si_next;
823 
824 	return (wsp->walk_callback(sip, &si, wsp->walk_cbdata));
825 }
826 
827 int
828 swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
829 {
830 	struct swapinfo	si;
831 	char		*name;
832 
833 	if (!(flags & DCMD_ADDRSPEC)) {
834 		if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) {
835 			mdb_warn("can't walk swapinfo");
836 			return (DCMD_ERR);
837 		}
838 		return (DCMD_OK);
839 	}
840 
841 	if (DCMD_HDRSPEC(flags)) {
842 		mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n",
843 		    "ADDR", "VNODE", "PAGES", "FREE", "NAME");
844 	}
845 
846 	if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) {
847 		mdb_warn("can't read swapinfo at %#lx", addr);
848 		return (DCMD_ERR);
849 	}
850 
851 	name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC);
852 	if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1)
853 		name = "*error*";
854 
855 	mdb_printf("%0?lx %?p %9d %9d %s\n",
856 	    addr, si.si_vp, si.si_npgs, si.si_nfpgs, name);
857 
858 	return (DCMD_OK);
859 }
860 
861 int
862 memlist_walk_step(mdb_walk_state_t *wsp)
863 {
864 	uintptr_t	mlp;
865 	struct memlist	ml;
866 
867 	mlp = wsp->walk_addr;
868 
869 	if (mlp == NULL)
870 		return (WALK_DONE);
871 
872 	if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) {
873 		mdb_warn("unable to read memlist at %#lx", mlp);
874 		return (WALK_ERR);
875 	}
876 
877 	wsp->walk_addr = (uintptr_t)ml.ml_next;
878 
879 	return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata));
880 }
881 
882 int
883 memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
884 {
885 	struct memlist	ml;
886 
887 	if (!(flags & DCMD_ADDRSPEC)) {
888 		uintptr_t ptr;
889 		uint_t list = 0;
890 		int i;
891 		static const char *lists[] = {
892 			"phys_install",
893 			"phys_avail",
894 			"virt_avail"
895 		};
896 
897 		if (mdb_getopts(argc, argv,
898 		    'i', MDB_OPT_SETBITS, (1 << 0), &list,
899 		    'a', MDB_OPT_SETBITS, (1 << 1), &list,
900 		    'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc)
901 			return (DCMD_USAGE);
902 
903 		if (!list)
904 			list = 1;
905 
906 		for (i = 0; list; i++, list >>= 1) {
907 			if (!(list & 1))
908 				continue;
909 			if ((mdb_readvar(&ptr, lists[i]) == -1) ||
910 			    (ptr == NULL)) {
911 				mdb_warn("%s not found or invalid", lists[i]);
912 				return (DCMD_ERR);
913 			}
914 
915 			mdb_printf("%s:\n", lists[i]);
916 			if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL,
917 			    ptr) == -1) {
918 				mdb_warn("can't walk memlist");
919 				return (DCMD_ERR);
920 			}
921 		}
922 		return (DCMD_OK);
923 	}
924 
925 	if (DCMD_HDRSPEC(flags))
926 		mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE");
927 
928 	if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) {
929 		mdb_warn("can't read memlist at %#lx", addr);
930 		return (DCMD_ERR);
931 	}
932 
933 	mdb_printf("%0?lx %16llx %16llx\n", addr, ml.ml_address, ml.ml_size);
934 
935 	return (DCMD_OK);
936 }
937 
938 int
939 seg_walk_init(mdb_walk_state_t *wsp)
940 {
941 	if (wsp->walk_addr == NULL) {
942 		mdb_warn("seg walk must begin at struct as *\n");
943 		return (WALK_ERR);
944 	}
945 
946 	/*
947 	 * this is really just a wrapper to AVL tree walk
948 	 */
949 	wsp->walk_addr = (uintptr_t)&((struct as *)wsp->walk_addr)->a_segtree;
950 	return (avl_walk_init(wsp));
951 }
952 
953 /*ARGSUSED*/
954 int
955 seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
956 {
957 	struct seg s;
958 
959 	if (argc != 0)
960 		return (DCMD_USAGE);
961 
962 	if ((flags & DCMD_LOOPFIRST) || !(flags & DCMD_LOOP)) {
963 		mdb_printf("%<u>%?s %?s %?s %?s %s%</u>\n",
964 		    "SEG", "BASE", "SIZE", "DATA", "OPS");
965 	}
966 
967 	if (mdb_vread(&s, sizeof (s), addr) == -1) {
968 		mdb_warn("failed to read seg at %p", addr);
969 		return (DCMD_ERR);
970 	}
971 
972 	mdb_printf("%?p %?p %?lx %?p %a\n",
973 	    addr, s.s_base, s.s_size, s.s_data, s.s_ops);
974 
975 	return (DCMD_OK);
976 }
977 
978 /*ARGSUSED*/
979 static int
980 pmap_walk_count_pages(uintptr_t addr, const void *data, void *out)
981 {
982 	pgcnt_t *nres = out;
983 
984 	(*nres)++;
985 
986 	return (WALK_NEXT);
987 }
988 
989 static int
990 pmap_walk_seg(uintptr_t addr, const struct seg *seg, uintptr_t segvn)
991 {
992 
993 	mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024);
994 
995 	if (segvn == (uintptr_t)seg->s_ops && seg->s_data != NULL) {
996 		struct segvn_data svn;
997 		pgcnt_t nres = 0;
998 
999 		svn.vp = NULL;
1000 		(void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data);
1001 
1002 		/*
1003 		 * Use the segvn_pages walker to find all of the in-core pages
1004 		 * for this mapping.
1005 		 */
1006 		if (mdb_pwalk("segvn_pages", pmap_walk_count_pages, &nres,
1007 		    (uintptr_t)seg->s_data) == -1) {
1008 			mdb_warn("failed to walk segvn_pages (s_data=%p)",
1009 			    seg->s_data);
1010 		}
1011 		mdb_printf(" %7ldk", (nres * PAGESIZE) / 1024);
1012 
1013 		if (svn.vp != NULL) {
1014 			char buf[29];
1015 
1016 			mdb_vnode2path((uintptr_t)svn.vp, buf, sizeof (buf));
1017 			mdb_printf(" %s", buf);
1018 		} else {
1019 			mdb_printf(" [ anon ]");
1020 		}
1021 	} else {
1022 		mdb_printf(" %8s [ &%a ]", "?", seg->s_ops);
1023 	}
1024 
1025 	mdb_printf("\n");
1026 	return (WALK_NEXT);
1027 }
1028 
1029 static int
1030 pmap_walk_seg_quick(uintptr_t addr, const struct seg *seg, uintptr_t segvn)
1031 {
1032 	mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024);
1033 
1034 	if (segvn == (uintptr_t)seg->s_ops && seg->s_data != NULL) {
1035 		struct segvn_data svn;
1036 
1037 		svn.vp = NULL;
1038 		(void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data);
1039 
1040 		if (svn.vp != NULL) {
1041 			mdb_printf(" %0?p", svn.vp);
1042 		} else {
1043 			mdb_printf(" [ anon ]");
1044 		}
1045 	} else {
1046 		mdb_printf(" [ &%a ]", seg->s_ops);
1047 	}
1048 
1049 	mdb_printf("\n");
1050 	return (WALK_NEXT);
1051 }
1052 
1053 /*ARGSUSED*/
1054 int
1055 pmap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1056 {
1057 	uintptr_t segvn;
1058 	proc_t proc;
1059 	uint_t quick = FALSE;
1060 	mdb_walk_cb_t cb = (mdb_walk_cb_t)pmap_walk_seg;
1061 
1062 	GElf_Sym sym;
1063 
1064 	if (!(flags & DCMD_ADDRSPEC))
1065 		return (DCMD_USAGE);
1066 
1067 	if (mdb_getopts(argc, argv,
1068 	    'q', MDB_OPT_SETBITS, TRUE, &quick, NULL) != argc)
1069 		return (DCMD_USAGE);
1070 
1071 	if (mdb_vread(&proc, sizeof (proc), addr) == -1) {
1072 		mdb_warn("failed to read proc at %p", addr);
1073 		return (DCMD_ERR);
1074 	}
1075 
1076 	if (mdb_lookup_by_name("segvn_ops", &sym) == 0)
1077 		segvn = (uintptr_t)sym.st_value;
1078 	else
1079 		segvn = NULL;
1080 
1081 	mdb_printf("%?s %?s %8s ", "SEG", "BASE", "SIZE");
1082 
1083 	if (quick) {
1084 		mdb_printf("VNODE\n");
1085 		cb = (mdb_walk_cb_t)pmap_walk_seg_quick;
1086 	} else {
1087 		mdb_printf("%8s %s\n", "RES", "PATH");
1088 	}
1089 
1090 	if (mdb_pwalk("seg", cb, (void *)segvn, (uintptr_t)proc.p_as) == -1) {
1091 		mdb_warn("failed to walk segments of as %p", proc.p_as);
1092 		return (DCMD_ERR);
1093 	}
1094 
1095 	return (DCMD_OK);
1096 }
1097 
1098 typedef struct anon_walk_data {
1099 	uintptr_t *aw_levone;
1100 	uintptr_t *aw_levtwo;
1101 	size_t aw_minslot;
1102 	size_t aw_maxslot;
1103 	pgcnt_t aw_nlevone;
1104 	pgcnt_t aw_levone_ndx;
1105 	size_t aw_levtwo_ndx;
1106 	struct anon_map	*aw_ampp;
1107 	struct anon_map aw_amp;
1108 	struct anon_hdr	aw_ahp;
1109 	int		aw_all;	/* report all anon pointers, even NULLs */
1110 } anon_walk_data_t;
1111 
1112 int
1113 anon_walk_init_common(mdb_walk_state_t *wsp, ulong_t minslot, ulong_t maxslot)
1114 {
1115 	anon_walk_data_t *aw;
1116 
1117 	if (wsp->walk_addr == NULL) {
1118 		mdb_warn("anon walk doesn't support global walks\n");
1119 		return (WALK_ERR);
1120 	}
1121 
1122 	aw = mdb_alloc(sizeof (anon_walk_data_t), UM_SLEEP);
1123 	aw->aw_ampp = (struct anon_map *)wsp->walk_addr;
1124 
1125 	if (mdb_vread(&aw->aw_amp, sizeof (aw->aw_amp), wsp->walk_addr) == -1) {
1126 		mdb_warn("failed to read anon map at %p", wsp->walk_addr);
1127 		mdb_free(aw, sizeof (anon_walk_data_t));
1128 		return (WALK_ERR);
1129 	}
1130 
1131 	if (mdb_vread(&aw->aw_ahp, sizeof (aw->aw_ahp),
1132 	    (uintptr_t)(aw->aw_amp.ahp)) == -1) {
1133 		mdb_warn("failed to read anon hdr ptr at %p", aw->aw_amp.ahp);
1134 		mdb_free(aw, sizeof (anon_walk_data_t));
1135 		return (WALK_ERR);
1136 	}
1137 
1138 	/* update min and maxslot with the given constraints */
1139 	maxslot = MIN(maxslot, aw->aw_ahp.size);
1140 	minslot = MIN(minslot, maxslot);
1141 
1142 	if (aw->aw_ahp.size <= ANON_CHUNK_SIZE ||
1143 	    (aw->aw_ahp.flags & ANON_ALLOC_FORCE)) {
1144 		aw->aw_nlevone = maxslot;
1145 		aw->aw_levone_ndx = minslot;
1146 		aw->aw_levtwo = NULL;
1147 	} else {
1148 		aw->aw_nlevone =
1149 		    (maxslot + ANON_CHUNK_OFF) >> ANON_CHUNK_SHIFT;
1150 		aw->aw_levone_ndx = 0;
1151 		aw->aw_levtwo =
1152 		    mdb_zalloc(ANON_CHUNK_SIZE * sizeof (uintptr_t), UM_SLEEP);
1153 	}
1154 
1155 	aw->aw_levone =
1156 	    mdb_alloc(aw->aw_nlevone * sizeof (uintptr_t), UM_SLEEP);
1157 	aw->aw_all = (wsp->walk_arg == ANON_WALK_ALL);
1158 
1159 	mdb_vread(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t),
1160 	    (uintptr_t)aw->aw_ahp.array_chunk);
1161 
1162 	aw->aw_levtwo_ndx = 0;
1163 	aw->aw_minslot = minslot;
1164 	aw->aw_maxslot = maxslot;
1165 
1166 out:
1167 	wsp->walk_data = aw;
1168 	return (0);
1169 }
1170 
1171 int
1172 anon_walk_step(mdb_walk_state_t *wsp)
1173 {
1174 	anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data;
1175 	struct anon anon;
1176 	uintptr_t anonptr;
1177 	ulong_t slot;
1178 
1179 	/*
1180 	 * Once we've walked through level one, we're done.
1181 	 */
1182 	if (aw->aw_levone_ndx >= aw->aw_nlevone) {
1183 		return (WALK_DONE);
1184 	}
1185 
1186 	if (aw->aw_levtwo == NULL) {
1187 		anonptr = aw->aw_levone[aw->aw_levone_ndx];
1188 		aw->aw_levone_ndx++;
1189 	} else {
1190 		if (aw->aw_levtwo_ndx == 0) {
1191 			uintptr_t levtwoptr;
1192 
1193 			/* The first time through, skip to our first index. */
1194 			if (aw->aw_levone_ndx == 0) {
1195 				aw->aw_levone_ndx =
1196 				    aw->aw_minslot / ANON_CHUNK_SIZE;
1197 				aw->aw_levtwo_ndx =
1198 				    aw->aw_minslot % ANON_CHUNK_SIZE;
1199 			}
1200 
1201 			levtwoptr = (uintptr_t)aw->aw_levone[aw->aw_levone_ndx];
1202 
1203 			if (levtwoptr == NULL) {
1204 				if (!aw->aw_all) {
1205 					aw->aw_levtwo_ndx = 0;
1206 					aw->aw_levone_ndx++;
1207 					return (WALK_NEXT);
1208 				}
1209 				bzero(aw->aw_levtwo,
1210 				    ANON_CHUNK_SIZE * sizeof (uintptr_t));
1211 
1212 			} else if (mdb_vread(aw->aw_levtwo,
1213 			    ANON_CHUNK_SIZE * sizeof (uintptr_t), levtwoptr) ==
1214 			    -1) {
1215 				mdb_warn("unable to read anon_map %p's "
1216 				    "second-level map %d at %p",
1217 				    aw->aw_ampp, aw->aw_levone_ndx,
1218 				    levtwoptr);
1219 				return (WALK_ERR);
1220 			}
1221 		}
1222 		slot = aw->aw_levone_ndx * ANON_CHUNK_SIZE + aw->aw_levtwo_ndx;
1223 		anonptr = aw->aw_levtwo[aw->aw_levtwo_ndx];
1224 
1225 		/* update the indices for next time */
1226 		aw->aw_levtwo_ndx++;
1227 		if (aw->aw_levtwo_ndx == ANON_CHUNK_SIZE) {
1228 			aw->aw_levtwo_ndx = 0;
1229 			aw->aw_levone_ndx++;
1230 		}
1231 
1232 		/* make sure the slot # is in the requested range */
1233 		if (slot >= aw->aw_maxslot) {
1234 			return (WALK_DONE);
1235 		}
1236 	}
1237 
1238 	if (anonptr != NULL) {
1239 		mdb_vread(&anon, sizeof (anon), anonptr);
1240 		return (wsp->walk_callback(anonptr, &anon, wsp->walk_cbdata));
1241 	}
1242 	if (aw->aw_all) {
1243 		return (wsp->walk_callback(NULL, NULL, wsp->walk_cbdata));
1244 	}
1245 	return (WALK_NEXT);
1246 }
1247 
1248 void
1249 anon_walk_fini(mdb_walk_state_t *wsp)
1250 {
1251 	anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data;
1252 
1253 	if (aw->aw_levtwo != NULL)
1254 		mdb_free(aw->aw_levtwo, ANON_CHUNK_SIZE * sizeof (uintptr_t));
1255 
1256 	mdb_free(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t));
1257 	mdb_free(aw, sizeof (anon_walk_data_t));
1258 }
1259 
1260 int
1261 anon_walk_init(mdb_walk_state_t *wsp)
1262 {
1263 	return (anon_walk_init_common(wsp, 0, ULONG_MAX));
1264 }
1265 
1266 int
1267 segvn_anon_walk_init(mdb_walk_state_t *wsp)
1268 {
1269 	const uintptr_t		svd_addr = wsp->walk_addr;
1270 	uintptr_t		amp_addr;
1271 	uintptr_t		seg_addr;
1272 	struct segvn_data	svd;
1273 	struct anon_map		amp;
1274 	struct seg		seg;
1275 
1276 	if (svd_addr == NULL) {
1277 		mdb_warn("segvn_anon walk doesn't support global walks\n");
1278 		return (WALK_ERR);
1279 	}
1280 	if (mdb_vread(&svd, sizeof (svd), svd_addr) == -1) {
1281 		mdb_warn("segvn_anon walk: unable to read segvn_data at %p",
1282 		    svd_addr);
1283 		return (WALK_ERR);
1284 	}
1285 	if (svd.amp == NULL) {
1286 		mdb_warn("segvn_anon walk: segvn_data at %p has no anon map\n",
1287 		    svd_addr);
1288 		return (WALK_ERR);
1289 	}
1290 	amp_addr = (uintptr_t)svd.amp;
1291 	if (mdb_vread(&amp, sizeof (amp), amp_addr) == -1) {
1292 		mdb_warn("segvn_anon walk: unable to read amp %p for "
1293 		    "segvn_data %p", amp_addr, svd_addr);
1294 		return (WALK_ERR);
1295 	}
1296 	seg_addr = (uintptr_t)svd.seg;
1297 	if (mdb_vread(&seg, sizeof (seg), seg_addr) == -1) {
1298 		mdb_warn("segvn_anon walk: unable to read seg %p for "
1299 		    "segvn_data %p", seg_addr, svd_addr);
1300 		return (WALK_ERR);
1301 	}
1302 	if ((seg.s_size + (svd.anon_index << PAGESHIFT)) > amp.size) {
1303 		mdb_warn("anon map %p is too small for segment %p\n",
1304 		    amp_addr, seg_addr);
1305 		return (WALK_ERR);
1306 	}
1307 
1308 	wsp->walk_addr = amp_addr;
1309 	return (anon_walk_init_common(wsp,
1310 	    svd.anon_index, svd.anon_index + (seg.s_size >> PAGESHIFT)));
1311 }
1312 
1313 
1314 typedef struct {
1315 	u_offset_t		svs_offset;
1316 	uintptr_t		svs_page;
1317 } segvn_sparse_t;
1318 #define	SEGVN_MAX_SPARSE	((128 * 1024) / sizeof (segvn_sparse_t))
1319 
1320 typedef struct {
1321 	uintptr_t		svw_svdp;
1322 	struct segvn_data	svw_svd;
1323 	struct seg		svw_seg;
1324 	size_t			svw_walkoff;
1325 	ulong_t			svw_anonskip;
1326 	segvn_sparse_t		*svw_sparse;
1327 	size_t			svw_sparse_idx;
1328 	size_t			svw_sparse_count;
1329 	size_t			svw_sparse_size;
1330 	uint8_t			svw_sparse_overflow;
1331 	uint8_t			svw_all;
1332 } segvn_walk_data_t;
1333 
1334 static int
1335 segvn_sparse_fill(uintptr_t addr, const void *pp_arg, void *arg)
1336 {
1337 	segvn_walk_data_t	*const	svw = arg;
1338 	const page_t		*const	pp = pp_arg;
1339 	const u_offset_t		offset = pp->p_offset;
1340 	segvn_sparse_t		*const	cur =
1341 	    &svw->svw_sparse[svw->svw_sparse_count];
1342 
1343 	/* See if the page is of interest */
1344 	if ((u_offset_t)(offset - svw->svw_svd.offset) >= svw->svw_seg.s_size) {
1345 		return (WALK_NEXT);
1346 	}
1347 	/* See if we have space for the new entry, then add it. */
1348 	if (svw->svw_sparse_count >= svw->svw_sparse_size) {
1349 		svw->svw_sparse_overflow = 1;
1350 		return (WALK_DONE);
1351 	}
1352 	svw->svw_sparse_count++;
1353 	cur->svs_offset = offset;
1354 	cur->svs_page = addr;
1355 	return (WALK_NEXT);
1356 }
1357 
1358 static int
1359 segvn_sparse_cmp(const void *lp, const void *rp)
1360 {
1361 	const segvn_sparse_t *const	l = lp;
1362 	const segvn_sparse_t *const	r = rp;
1363 
1364 	if (l->svs_offset < r->svs_offset) {
1365 		return (-1);
1366 	}
1367 	if (l->svs_offset > r->svs_offset) {
1368 		return (1);
1369 	}
1370 	return (0);
1371 }
1372 
1373 /*
1374  * Builds on the "anon_all" walker to walk all resident pages in a segvn_data
1375  * structure.  For segvn_datas without an anon structure, it just looks up
1376  * pages in the vnode.  For segvn_datas with an anon structure, NULL slots
1377  * pass through to the vnode, and non-null slots are checked for residency.
1378  */
1379 int
1380 segvn_pages_walk_init(mdb_walk_state_t *wsp)
1381 {
1382 	segvn_walk_data_t	*svw;
1383 	struct segvn_data	*svd;
1384 
1385 	if (wsp->walk_addr == NULL) {
1386 		mdb_warn("segvn walk doesn't support global walks\n");
1387 		return (WALK_ERR);
1388 	}
1389 
1390 	svw = mdb_zalloc(sizeof (*svw), UM_SLEEP);
1391 	svw->svw_svdp = wsp->walk_addr;
1392 	svw->svw_anonskip = 0;
1393 	svw->svw_sparse_idx = 0;
1394 	svw->svw_walkoff = 0;
1395 	svw->svw_all = (wsp->walk_arg == SEGVN_PAGES_ALL);
1396 
1397 	if (mdb_vread(&svw->svw_svd, sizeof (svw->svw_svd), wsp->walk_addr) ==
1398 	    -1) {
1399 		mdb_warn("failed to read segvn_data at %p", wsp->walk_addr);
1400 		mdb_free(svw, sizeof (*svw));
1401 		return (WALK_ERR);
1402 	}
1403 
1404 	svd = &svw->svw_svd;
1405 	if (mdb_vread(&svw->svw_seg, sizeof (svw->svw_seg),
1406 	    (uintptr_t)svd->seg) == -1) {
1407 		mdb_warn("failed to read seg at %p (from %p)",
1408 		    svd->seg, &((struct segvn_data *)(wsp->walk_addr))->seg);
1409 		mdb_free(svw, sizeof (*svw));
1410 		return (WALK_ERR);
1411 	}
1412 
1413 	if (svd->amp == NULL && svd->vp == NULL) {
1414 		/* make the walk terminate immediately;  no pages */
1415 		svw->svw_walkoff = svw->svw_seg.s_size;
1416 
1417 	} else if (svd->amp == NULL &&
1418 	    (svw->svw_seg.s_size >> PAGESHIFT) >= SEGVN_MAX_SPARSE) {
1419 		/*
1420 		 * If we don't have an anon pointer, and the segment is large,
1421 		 * we try to load the in-memory pages into a fixed-size array,
1422 		 * which is then sorted and reported directly.  This is much
1423 		 * faster than doing a mdb_page_lookup() for each possible
1424 		 * offset.
1425 		 *
1426 		 * If the allocation fails, or there are too many pages
1427 		 * in-core, we fall back to looking up the pages individually.
1428 		 */
1429 		svw->svw_sparse = mdb_alloc(
1430 		    SEGVN_MAX_SPARSE * sizeof (*svw->svw_sparse), UM_NOSLEEP);
1431 		if (svw->svw_sparse != NULL) {
1432 			svw->svw_sparse_size = SEGVN_MAX_SPARSE;
1433 
1434 			if (mdb_pwalk("page", segvn_sparse_fill, svw,
1435 			    (uintptr_t)svd->vp) == -1 ||
1436 			    svw->svw_sparse_overflow) {
1437 				mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE *
1438 				    sizeof (*svw->svw_sparse));
1439 				svw->svw_sparse = NULL;
1440 			} else {
1441 				qsort(svw->svw_sparse, svw->svw_sparse_count,
1442 				    sizeof (*svw->svw_sparse),
1443 				    segvn_sparse_cmp);
1444 			}
1445 		}
1446 
1447 	} else if (svd->amp != NULL) {
1448 		const char *const layer = (!svw->svw_all && svd->vp == NULL) ?
1449 		    "segvn_anon" : "segvn_anon_all";
1450 		/*
1451 		 * If we're not printing all offsets, and the segvn_data has
1452 		 * no backing VP, we can use the "segvn_anon" walker, which
1453 		 * efficiently skips NULL slots.
1454 		 *
1455 		 * Otherwise, we layer over the "segvn_anon_all" walker
1456 		 * (which reports all anon slots, even NULL ones), so that
1457 		 * segvn_pages_walk_step() knows the precise offset for each
1458 		 * element.  It uses that offset information to look up the
1459 		 * backing pages for NULL anon slots.
1460 		 */
1461 		if (mdb_layered_walk(layer, wsp) == -1) {
1462 			mdb_warn("segvn_pages: failed to layer \"%s\" "
1463 			    "for segvn_data %p", layer, svw->svw_svdp);
1464 			mdb_free(svw, sizeof (*svw));
1465 			return (WALK_ERR);
1466 		}
1467 	}
1468 
1469 	wsp->walk_data = svw;
1470 	return (WALK_NEXT);
1471 }
1472 
1473 int
1474 segvn_pages_walk_step(mdb_walk_state_t *wsp)
1475 {
1476 	segvn_walk_data_t	*const	svw = wsp->walk_data;
1477 	struct seg		*const	seg = &svw->svw_seg;
1478 	struct segvn_data	*const	svd = &svw->svw_svd;
1479 	uintptr_t		pp;
1480 	page_t			page;
1481 
1482 	/* If we've walked off the end of the segment, we're done. */
1483 	if (svw->svw_walkoff >= seg->s_size) {
1484 		return (WALK_DONE);
1485 	}
1486 
1487 	/*
1488 	 * If we've got a sparse page array, just send it directly.
1489 	 */
1490 	if (svw->svw_sparse != NULL) {
1491 		u_offset_t off;
1492 
1493 		if (svw->svw_sparse_idx >= svw->svw_sparse_count) {
1494 			pp = NULL;
1495 			if (!svw->svw_all) {
1496 				return (WALK_DONE);
1497 			}
1498 		} else {
1499 			segvn_sparse_t	*const svs =
1500 			    &svw->svw_sparse[svw->svw_sparse_idx];
1501 			off = svs->svs_offset - svd->offset;
1502 			if (svw->svw_all && svw->svw_walkoff != off) {
1503 				pp = NULL;
1504 			} else {
1505 				pp = svs->svs_page;
1506 				svw->svw_sparse_idx++;
1507 			}
1508 		}
1509 
1510 	} else if (svd->amp == NULL || wsp->walk_addr == NULL) {
1511 		/*
1512 		 * If there's no anon, or the anon slot is NULL, look up
1513 		 * <vp, offset>.
1514 		 */
1515 		if (svd->vp != NULL) {
1516 			pp = mdb_page_lookup((uintptr_t)svd->vp,
1517 			    svd->offset + svw->svw_walkoff);
1518 		} else {
1519 			pp = NULL;
1520 		}
1521 
1522 	} else {
1523 		const struct anon	*const	anon = wsp->walk_layer;
1524 
1525 		/*
1526 		 * We have a "struct anon"; if it's not swapped out,
1527 		 * look up the page.
1528 		 */
1529 		if (anon->an_vp != NULL || anon->an_off != 0) {
1530 			pp = mdb_page_lookup((uintptr_t)anon->an_vp,
1531 			    anon->an_off);
1532 			if (pp == 0 && mdb_get_state() != MDB_STATE_RUNNING) {
1533 				mdb_warn("walk segvn_pages: segvn_data %p "
1534 				    "offset %ld, anon page <%p, %llx> not "
1535 				    "found.\n", svw->svw_svdp, svw->svw_walkoff,
1536 				    anon->an_vp, anon->an_off);
1537 			}
1538 		} else {
1539 			if (anon->an_pvp == NULL) {
1540 				mdb_warn("walk segvn_pages: useless struct "
1541 				    "anon at %p\n", wsp->walk_addr);
1542 			}
1543 			pp = NULL;	/* nothing at this offset */
1544 		}
1545 	}
1546 
1547 	svw->svw_walkoff += PAGESIZE;	/* Update for the next call */
1548 	if (pp != NULL) {
1549 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
1550 			mdb_warn("unable to read page_t at %#lx", pp);
1551 			return (WALK_ERR);
1552 		}
1553 		return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
1554 	}
1555 	if (svw->svw_all) {
1556 		return (wsp->walk_callback(NULL, NULL, wsp->walk_cbdata));
1557 	}
1558 	return (WALK_NEXT);
1559 }
1560 
1561 void
1562 segvn_pages_walk_fini(mdb_walk_state_t *wsp)
1563 {
1564 	segvn_walk_data_t	*const	svw = wsp->walk_data;
1565 
1566 	if (svw->svw_sparse != NULL) {
1567 		mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE *
1568 		    sizeof (*svw->svw_sparse));
1569 	}
1570 	mdb_free(svw, sizeof (*svw));
1571 }
1572 
1573 /*
1574  * Grumble, grumble.
1575  */
1576 #define	SMAP_HASHFUNC(vp, off)	\
1577 	((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
1578 	((off) >> MAXBSHIFT)) & smd_hashmsk)
1579 
1580 int
1581 vnode2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1582 {
1583 	long smd_hashmsk;
1584 	int hash;
1585 	uintptr_t offset = 0;
1586 	struct smap smp;
1587 	uintptr_t saddr, kaddr;
1588 	uintptr_t smd_hash, smd_smap;
1589 	struct seg seg;
1590 
1591 	if (!(flags & DCMD_ADDRSPEC))
1592 		return (DCMD_USAGE);
1593 
1594 	if (mdb_readvar(&smd_hashmsk, "smd_hashmsk") == -1) {
1595 		mdb_warn("failed to read smd_hashmsk");
1596 		return (DCMD_ERR);
1597 	}
1598 
1599 	if (mdb_readvar(&smd_hash, "smd_hash") == -1) {
1600 		mdb_warn("failed to read smd_hash");
1601 		return (DCMD_ERR);
1602 	}
1603 
1604 	if (mdb_readvar(&smd_smap, "smd_smap") == -1) {
1605 		mdb_warn("failed to read smd_hash");
1606 		return (DCMD_ERR);
1607 	}
1608 
1609 	if (mdb_readvar(&kaddr, "segkmap") == -1) {
1610 		mdb_warn("failed to read segkmap");
1611 		return (DCMD_ERR);
1612 	}
1613 
1614 	if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) {
1615 		mdb_warn("failed to read segkmap at %p", kaddr);
1616 		return (DCMD_ERR);
1617 	}
1618 
1619 	if (argc != 0) {
1620 		const mdb_arg_t *arg = &argv[0];
1621 
1622 		if (arg->a_type == MDB_TYPE_IMMEDIATE)
1623 			offset = arg->a_un.a_val;
1624 		else
1625 			offset = (uintptr_t)mdb_strtoull(arg->a_un.a_str);
1626 	}
1627 
1628 	hash = SMAP_HASHFUNC(addr, offset);
1629 
1630 	if (mdb_vread(&saddr, sizeof (saddr),
1631 	    smd_hash + hash * sizeof (uintptr_t)) == -1) {
1632 		mdb_warn("couldn't read smap at %p",
1633 		    smd_hash + hash * sizeof (uintptr_t));
1634 		return (DCMD_ERR);
1635 	}
1636 
1637 	do {
1638 		if (mdb_vread(&smp, sizeof (smp), saddr) == -1) {
1639 			mdb_warn("couldn't read smap at %p", saddr);
1640 			return (DCMD_ERR);
1641 		}
1642 
1643 		if ((uintptr_t)smp.sm_vp == addr && smp.sm_off == offset) {
1644 			mdb_printf("vnode %p, offs %p is smap %p, vaddr %p\n",
1645 			    addr, offset, saddr, ((saddr - smd_smap) /
1646 			    sizeof (smp)) * MAXBSIZE + seg.s_base);
1647 			return (DCMD_OK);
1648 		}
1649 
1650 		saddr = (uintptr_t)smp.sm_hash;
1651 	} while (saddr != NULL);
1652 
1653 	mdb_printf("no smap for vnode %p, offs %p\n", addr, offset);
1654 	return (DCMD_OK);
1655 }
1656 
1657 /*ARGSUSED*/
1658 int
1659 addr2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1660 {
1661 	uintptr_t kaddr;
1662 	struct seg seg;
1663 	struct segmap_data sd;
1664 
1665 	if (!(flags & DCMD_ADDRSPEC))
1666 		return (DCMD_USAGE);
1667 
1668 	if (mdb_readvar(&kaddr, "segkmap") == -1) {
1669 		mdb_warn("failed to read segkmap");
1670 		return (DCMD_ERR);
1671 	}
1672 
1673 	if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) {
1674 		mdb_warn("failed to read segkmap at %p", kaddr);
1675 		return (DCMD_ERR);
1676 	}
1677 
1678 	if (mdb_vread(&sd, sizeof (sd), (uintptr_t)seg.s_data) == -1) {
1679 		mdb_warn("failed to read segmap_data at %p", seg.s_data);
1680 		return (DCMD_ERR);
1681 	}
1682 
1683 	mdb_printf("%p is smap %p\n", addr,
1684 	    ((addr - (uintptr_t)seg.s_base) >> MAXBSHIFT) *
1685 	    sizeof (struct smap) + (uintptr_t)sd.smd_sm);
1686 
1687 	return (DCMD_OK);
1688 }
1689