xref: /illumos-gate/usr/src/cmd/mdb/common/modules/genunix/memory.c (revision 8c4cbc5227c35cbf837b0144a642e55e7cf84a15)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2019 Joyent, Inc.
24  * Copyright 2025 Oxide Computer Company
25  */
26 
27 #include <mdb/mdb_param.h>
28 #include <mdb/mdb_modapi.h>
29 #include <mdb/mdb_ks.h>
30 #include <sys/types.h>
31 #include <sys/memlist.h>
32 #include <sys/swap.h>
33 #include <sys/systm.h>
34 #include <sys/thread.h>
35 #include <vm/anon.h>
36 #include <vm/as.h>
37 #include <vm/page.h>
38 #include <sys/thread.h>
39 #include <sys/swap.h>
40 #include <sys/memlist.h>
41 #include <sys/vnode.h>
42 #include <vm/seg_map.h>
43 #include <vm/seg_vn.h>
44 #include <vm/seg_hole.h>
45 #if defined(__i386) || defined(__amd64)
46 #include <sys/balloon_impl.h>
47 #endif
48 
49 #include "avl.h"
50 #include "memory.h"
51 
52 /*
53  * Page walker.
54  * By default, this will walk all pages in the system.  If given an
55  * address, it will walk all pages belonging to the vnode at that
56  * address.
57  */
58 
59 /*
60  * page_walk_data
61  *
62  * pw_hashleft is set to -1 when walking a vnode's pages, and holds the
63  * number of hash locations remaining in the page hash table when
64  * walking all pages.
65  *
66  * The astute reader will notice that pw_hashloc is only used when
67  * reading all pages (to hold a pointer to our location in the page
68  * hash table), and that pw_first is only used when reading the pages
69  * belonging to a particular vnode (to hold a pointer to the first
70  * page).  While these could be combined to be a single pointer, they
71  * are left separate for clarity.
72  */
73 typedef struct page_walk_data {
74 	long		pw_hashleft;
75 	void		**pw_hashloc;
76 	uintptr_t	pw_first;
77 } page_walk_data_t;
78 
79 int
page_walk_init(mdb_walk_state_t * wsp)80 page_walk_init(mdb_walk_state_t *wsp)
81 {
82 	page_walk_data_t	*pwd;
83 	void	**ptr;
84 	size_t	hashsz;
85 	vnode_t	vn;
86 
87 	if (wsp->walk_addr == 0) {
88 
89 		/*
90 		 * Walk all pages
91 		 */
92 
93 		if ((mdb_readvar(&ptr, "page_hash") == -1) ||
94 		    (mdb_readvar(&hashsz, "page_hashsz") == -1) ||
95 		    (ptr == NULL) || (hashsz == 0)) {
96 			mdb_warn("page_hash, page_hashsz not found or invalid");
97 			return (WALK_ERR);
98 		}
99 
100 		/*
101 		 * Since we are walking all pages, initialize hashleft
102 		 * to be the remaining number of entries in the page
103 		 * hash.  hashloc is set the start of the page hash
104 		 * table.  Setting the walk address to 0 indicates that
105 		 * we aren't currently following a hash chain, and that
106 		 * we need to scan the page hash table for a page.
107 		 */
108 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
109 		pwd->pw_hashleft = hashsz;
110 		pwd->pw_hashloc = ptr;
111 		wsp->walk_addr = 0;
112 	} else {
113 
114 		/*
115 		 * Walk just this vnode
116 		 */
117 
118 		if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) {
119 			mdb_warn("unable to read vnode_t at %#lx",
120 			    wsp->walk_addr);
121 			return (WALK_ERR);
122 		}
123 
124 		/*
125 		 * We set hashleft to -1 to indicate that we are
126 		 * walking a vnode, and initialize first to 0 (it is
127 		 * used to terminate the walk, so it must not be set
128 		 * until after we have walked the first page).  The
129 		 * walk address is set to the first page.
130 		 */
131 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
132 		pwd->pw_hashleft = -1;
133 		pwd->pw_first = 0;
134 
135 		wsp->walk_addr = (uintptr_t)vn.v_pages;
136 	}
137 
138 	wsp->walk_data = pwd;
139 
140 	return (WALK_NEXT);
141 }
142 
143 int
page_walk_step(mdb_walk_state_t * wsp)144 page_walk_step(mdb_walk_state_t *wsp)
145 {
146 	page_walk_data_t	*pwd = wsp->walk_data;
147 	page_t		page;
148 	uintptr_t	pp;
149 
150 	pp = wsp->walk_addr;
151 
152 	if (pwd->pw_hashleft < 0) {
153 
154 		/* We're walking a vnode's pages */
155 
156 		/*
157 		 * If we don't have any pages to walk, we have come
158 		 * back around to the first one (we finished), or we
159 		 * can't read the page we're looking at, we are done.
160 		 */
161 		if (pp == 0 || pp == pwd->pw_first)
162 			return (WALK_DONE);
163 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
164 			mdb_warn("unable to read page_t at %#lx", pp);
165 			return (WALK_ERR);
166 		}
167 
168 		/*
169 		 * Set the walk address to the next page, and if the
170 		 * first page hasn't been set yet (i.e. we are on the
171 		 * first page), set it.
172 		 */
173 		wsp->walk_addr = (uintptr_t)page.p_vpnext;
174 		if (pwd->pw_first == 0)
175 			pwd->pw_first = pp;
176 
177 	} else if (pwd->pw_hashleft > 0) {
178 
179 		/* We're walking all pages */
180 
181 		/*
182 		 * If pp (the walk address) is NULL, we scan through
183 		 * the page hash table until we find a page.
184 		 */
185 		if (pp == 0) {
186 
187 			/*
188 			 * Iterate through the page hash table until we
189 			 * find a page or reach the end.
190 			 */
191 			do {
192 				if (mdb_vread(&pp, sizeof (uintptr_t),
193 				    (uintptr_t)pwd->pw_hashloc) == -1) {
194 					mdb_warn("unable to read from %#p",
195 					    pwd->pw_hashloc);
196 					return (WALK_ERR);
197 				}
198 				pwd->pw_hashleft--;
199 				pwd->pw_hashloc++;
200 			} while (pwd->pw_hashleft && (pp == 0));
201 
202 			/*
203 			 * We've reached the end; exit.
204 			 */
205 			if (pp == 0)
206 				return (WALK_DONE);
207 		}
208 
209 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
210 			mdb_warn("unable to read page_t at %#lx", pp);
211 			return (WALK_ERR);
212 		}
213 
214 		/*
215 		 * Set the walk address to the next page.
216 		 */
217 		wsp->walk_addr = (uintptr_t)page.p_hash;
218 
219 	} else {
220 		/* We've finished walking all pages. */
221 		return (WALK_DONE);
222 	}
223 
224 	return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
225 }
226 
227 void
page_walk_fini(mdb_walk_state_t * wsp)228 page_walk_fini(mdb_walk_state_t *wsp)
229 {
230 	mdb_free(wsp->walk_data, sizeof (page_walk_data_t));
231 }
232 
233 /*
234  * allpages walks all pages in the system in order they appear in
235  * the memseg structure
236  */
237 
238 #define	PAGE_BUFFER	128
239 
240 int
allpages_walk_init(mdb_walk_state_t * wsp)241 allpages_walk_init(mdb_walk_state_t *wsp)
242 {
243 	if (wsp->walk_addr != 0) {
244 		mdb_warn("allpages only supports global walks.\n");
245 		return (WALK_ERR);
246 	}
247 
248 	if (mdb_layered_walk("memseg", wsp) == -1) {
249 		mdb_warn("couldn't walk 'memseg'");
250 		return (WALK_ERR);
251 	}
252 
253 	wsp->walk_data = mdb_alloc(sizeof (page_t) * PAGE_BUFFER, UM_SLEEP);
254 	return (WALK_NEXT);
255 }
256 
257 int
allpages_walk_step(mdb_walk_state_t * wsp)258 allpages_walk_step(mdb_walk_state_t *wsp)
259 {
260 	const struct memseg *msp = wsp->walk_layer;
261 	page_t *buf = wsp->walk_data;
262 	size_t pg_read, i;
263 	size_t pg_num = msp->pages_end - msp->pages_base;
264 	const page_t *pg_addr = msp->pages;
265 
266 	while (pg_num > 0) {
267 		pg_read = MIN(pg_num, PAGE_BUFFER);
268 
269 		if (mdb_vread(buf, pg_read * sizeof (page_t),
270 		    (uintptr_t)pg_addr) == -1) {
271 			mdb_warn("can't read page_t's at %#lx", pg_addr);
272 			return (WALK_ERR);
273 		}
274 		for (i = 0; i < pg_read; i++) {
275 			int ret = wsp->walk_callback((uintptr_t)&pg_addr[i],
276 			    &buf[i], wsp->walk_cbdata);
277 
278 			if (ret != WALK_NEXT)
279 				return (ret);
280 		}
281 		pg_num -= pg_read;
282 		pg_addr += pg_read;
283 	}
284 
285 	return (WALK_NEXT);
286 }
287 
288 void
allpages_walk_fini(mdb_walk_state_t * wsp)289 allpages_walk_fini(mdb_walk_state_t *wsp)
290 {
291 	mdb_free(wsp->walk_data, sizeof (page_t) * PAGE_BUFFER);
292 }
293 
294 /*
295  * Hash table + LRU queue.
296  * This table is used to cache recently read vnodes for the memstat
297  * command, to reduce the number of mdb_vread calls.  This greatly
298  * speeds the memstat command on on live, large CPU count systems.
299  */
300 
301 #define	VN_SMALL	401
302 #define	VN_LARGE	10007
303 #define	VN_HTABLE_KEY(p, hp)	((p) % ((hp)->vn_htable_buckets))
304 
305 struct vn_htable_list {
306 	uint_t vn_flag;				/* v_flag from vnode	*/
307 	uintptr_t vn_ptr;			/* pointer to vnode	*/
308 	struct vn_htable_list *vn_q_next;	/* queue next pointer	*/
309 	struct vn_htable_list *vn_q_prev;	/* queue prev pointer	*/
310 	struct vn_htable_list *vn_h_next;	/* hash table pointer	*/
311 };
312 
313 /*
314  * vn_q_first        -> points to to head of queue: the vnode that was most
315  *                      recently used
316  * vn_q_last         -> points to the oldest used vnode, and is freed once a new
317  *                      vnode is read.
318  * vn_htable         -> hash table
319  * vn_htable_buf     -> contains htable objects
320  * vn_htable_size    -> total number of items in the hash table
321  * vn_htable_buckets -> number of buckets in the hash table
322  */
323 typedef struct vn_htable {
324 	struct vn_htable_list  *vn_q_first;
325 	struct vn_htable_list  *vn_q_last;
326 	struct vn_htable_list **vn_htable;
327 	struct vn_htable_list  *vn_htable_buf;
328 	int vn_htable_size;
329 	int vn_htable_buckets;
330 } vn_htable_t;
331 
332 
333 /* allocate memory, initilize hash table and LRU queue */
334 static void
vn_htable_init(vn_htable_t * hp,size_t vn_size)335 vn_htable_init(vn_htable_t *hp, size_t vn_size)
336 {
337 	int i;
338 	int htable_size = MAX(vn_size, VN_LARGE);
339 
340 	if ((hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
341 	    * htable_size, UM_NOSLEEP|UM_GC)) == NULL) {
342 		htable_size = VN_SMALL;
343 		hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
344 		    * htable_size, UM_SLEEP|UM_GC);
345 	}
346 
347 	hp->vn_htable = mdb_zalloc(sizeof (struct vn_htable_list *)
348 	    * htable_size, UM_SLEEP|UM_GC);
349 
350 	hp->vn_q_first  = &hp->vn_htable_buf[0];
351 	hp->vn_q_last   = &hp->vn_htable_buf[htable_size - 1];
352 	hp->vn_q_first->vn_q_next = &hp->vn_htable_buf[1];
353 	hp->vn_q_last->vn_q_prev = &hp->vn_htable_buf[htable_size - 2];
354 
355 	for (i = 1; i < (htable_size-1); i++) {
356 		hp->vn_htable_buf[i].vn_q_next = &hp->vn_htable_buf[i + 1];
357 		hp->vn_htable_buf[i].vn_q_prev = &hp->vn_htable_buf[i - 1];
358 	}
359 
360 	hp->vn_htable_size = htable_size;
361 	hp->vn_htable_buckets = htable_size;
362 }
363 
364 
365 /*
366  * Find the vnode whose address is ptr, and return its v_flag in vp->v_flag.
367  * The function tries to find needed information in the following order:
368  *
369  * 1. check if ptr is the first in queue
370  * 2. check if ptr is in hash table (if so move it to the top of queue)
371  * 3. do mdb_vread, remove last queue item from queue and hash table.
372  *    Insert new information to freed object, and put this object in to the
373  *    top of the queue.
374  */
375 static int
vn_get(vn_htable_t * hp,struct vnode * vp,uintptr_t ptr)376 vn_get(vn_htable_t *hp, struct vnode *vp, uintptr_t ptr)
377 {
378 	int hkey;
379 	struct vn_htable_list *hent, **htmp, *q_next, *q_prev;
380 	struct vn_htable_list  *q_first = hp->vn_q_first;
381 
382 	/* 1. vnode ptr is the first in queue, just get v_flag and return */
383 	if (q_first->vn_ptr == ptr) {
384 		vp->v_flag = q_first->vn_flag;
385 
386 		return (0);
387 	}
388 
389 	/* 2. search the hash table for this ptr */
390 	hkey = VN_HTABLE_KEY(ptr, hp);
391 	hent = hp->vn_htable[hkey];
392 	while (hent && (hent->vn_ptr != ptr))
393 		hent = hent->vn_h_next;
394 
395 	/* 3. if hent is NULL, we did not find in hash table, do mdb_vread */
396 	if (hent == NULL) {
397 		struct vnode vn;
398 
399 		if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) {
400 			mdb_warn("unable to read vnode_t at %#lx", ptr);
401 			return (-1);
402 		}
403 
404 		/* we will insert read data into the last element in queue */
405 		hent = hp->vn_q_last;
406 
407 		/* remove last hp->vn_q_last object from hash table */
408 		if (hent->vn_ptr) {
409 			htmp = &hp->vn_htable[VN_HTABLE_KEY(hent->vn_ptr, hp)];
410 			while (*htmp != hent)
411 				htmp = &(*htmp)->vn_h_next;
412 			*htmp = hent->vn_h_next;
413 		}
414 
415 		/* insert data into new free object */
416 		hent->vn_ptr  = ptr;
417 		hent->vn_flag = vn.v_flag;
418 
419 		/* insert new object into hash table */
420 		hent->vn_h_next = hp->vn_htable[hkey];
421 		hp->vn_htable[hkey] = hent;
422 	}
423 
424 	/* Remove from queue. hent is not first, vn_q_prev is not NULL */
425 	q_next = hent->vn_q_next;
426 	q_prev = hent->vn_q_prev;
427 	if (q_next == NULL)
428 		hp->vn_q_last = q_prev;
429 	else
430 		q_next->vn_q_prev = q_prev;
431 	q_prev->vn_q_next = q_next;
432 
433 	/* Add to the front of queue */
434 	hent->vn_q_prev = NULL;
435 	hent->vn_q_next = q_first;
436 	q_first->vn_q_prev = hent;
437 	hp->vn_q_first = hent;
438 
439 	/* Set v_flag in vnode pointer from hent */
440 	vp->v_flag = hent->vn_flag;
441 
442 	return (0);
443 }
444 
445 /* Summary statistics of pages */
446 typedef struct memstat {
447 	struct vnode    *ms_unused_vp;	/* Unused pages vnode pointer	  */
448 	struct vnode    *ms_kvps;	/* Cached address of vnode array  */
449 	uint64_t	ms_kmem;	/* Pages of kernel memory	  */
450 	uint64_t	ms_zfs_data;	/* Pages of zfs data		  */
451 	uint64_t	ms_vmm_mem;	/* Pages of VMM mem		  */
452 	uint64_t	ms_anon;	/* Pages of anonymous memory	  */
453 	uint64_t	ms_vnode;	/* Pages of named (vnode) memory  */
454 	uint64_t	ms_exec;	/* Pages of exec/library memory	  */
455 	uint64_t	ms_cachelist;	/* Pages on the cachelist (free)  */
456 	uint64_t	ms_bootpages;	/* Pages on the bootpages list    */
457 	uint64_t	ms_total;	/* Pages on page hash		  */
458 	vn_htable_t	*ms_vn_htable;	/* Pointer to hash table	  */
459 	struct vnode	ms_vn;		/* vnode buffer			  */
460 } memstat_t;
461 
462 #define	MS_PP_ISTYPE(pp, stats, index) \
463 	((pp)->p_vnode == &(stats->ms_kvps[index]))
464 
465 /*
466  * Summarize pages by type and update stat information
467  */
468 
469 /* ARGSUSED */
470 static int
memstat_callback(page_t * page,page_t * pp,memstat_t * stats)471 memstat_callback(page_t *page, page_t *pp, memstat_t *stats)
472 {
473 	struct vnode *vp = &stats->ms_vn;
474 
475 	if (PP_ISBOOTPAGES(pp))
476 		stats->ms_bootpages++;
477 	else if (pp->p_vnode == NULL || pp->p_vnode == stats->ms_unused_vp)
478 		return (WALK_NEXT);
479 	else if (MS_PP_ISTYPE(pp, stats, KV_KVP))
480 		stats->ms_kmem++;
481 	else if (MS_PP_ISTYPE(pp, stats, KV_ZVP))
482 		stats->ms_zfs_data++;
483 	else if (MS_PP_ISTYPE(pp, stats, KV_VVP))
484 		stats->ms_vmm_mem++;
485 	else if (PP_ISFREE(pp))
486 		stats->ms_cachelist++;
487 	else if (vn_get(stats->ms_vn_htable, vp, (uintptr_t)pp->p_vnode))
488 		return (WALK_ERR);
489 	else if (IS_SWAPFSVP(vp))
490 		stats->ms_anon++;
491 	else if ((vp->v_flag & VVMEXEC) != 0)
492 		stats->ms_exec++;
493 	else
494 		stats->ms_vnode++;
495 
496 	stats->ms_total++;
497 
498 	return (WALK_NEXT);
499 }
500 
501 /* ARGSUSED */
502 int
memstat(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)503 memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
504 {
505 	pgcnt_t total_pages, physmem;
506 	ulong_t freemem;
507 	memstat_t stats;
508 	GElf_Sym sym;
509 	vn_htable_t ht;
510 	uintptr_t vn_size = 0;
511 #if defined(__i386) || defined(__amd64)
512 	bln_stats_t bln_stats;
513 	ssize_t bln_size;
514 #endif
515 
516 	bzero(&stats, sizeof (memstat_t));
517 
518 	/*
519 	 * -s size, is an internal option. It specifies the size of vn_htable.
520 	 * Hash table size is set in the following order:
521 	 * If user has specified the size that is larger than VN_LARGE: try it,
522 	 * but if malloc failed default to VN_SMALL. Otherwise try VN_LARGE, if
523 	 * failed to allocate default to VN_SMALL.
524 	 * For a better efficiency of hash table it is highly recommended to
525 	 * set size to a prime number.
526 	 */
527 	if ((flags & DCMD_ADDRSPEC) || mdb_getopts(argc, argv,
528 	    's', MDB_OPT_UINTPTR, &vn_size, NULL) != argc)
529 		return (DCMD_USAGE);
530 
531 	/* Initialize vnode hash list and queue */
532 	vn_htable_init(&ht, vn_size);
533 	stats.ms_vn_htable = &ht;
534 
535 	/* Total physical memory */
536 	if (mdb_readvar(&total_pages, "total_pages") == -1) {
537 		mdb_warn("unable to read total_pages");
538 		return (DCMD_ERR);
539 	}
540 
541 	/* Artificially limited memory */
542 	if (mdb_readvar(&physmem, "physmem") == -1) {
543 		mdb_warn("unable to read physmem");
544 		return (DCMD_ERR);
545 	}
546 
547 	/* read kernel vnode array pointer */
548 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvps",
549 	    (GElf_Sym *)&sym) == -1) {
550 		mdb_warn("unable to look up kvps");
551 		return (DCMD_ERR);
552 	}
553 	stats.ms_kvps = (struct vnode *)(uintptr_t)sym.st_value;
554 
555 	/*
556 	 * If physmem != total_pages, then the administrator has limited the
557 	 * number of pages available in the system.  Excluded pages are
558 	 * associated with the unused pages vnode.  Read this vnode so the
559 	 * pages can be excluded in the page accounting.
560 	 */
561 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp",
562 	    (GElf_Sym *)&sym) == -1) {
563 		mdb_warn("unable to read unused_pages_vp");
564 		return (DCMD_ERR);
565 	}
566 	stats.ms_unused_vp = (struct vnode *)(uintptr_t)sym.st_value;
567 
568 	/* walk all pages, collect statistics */
569 	if (mdb_walk("allpages", (mdb_walk_cb_t)(uintptr_t)memstat_callback,
570 	    &stats) == -1) {
571 		mdb_warn("can't walk memseg");
572 		return (DCMD_ERR);
573 	}
574 
575 #define	MS_PCT_TOTAL(x)	((ulong_t)((((5 * total_pages) + ((x) * 1000ull))) / \
576 		((physmem) * 10)))
577 
578 	mdb_printf("Page Summary                Pages                MB"
579 	    "  %%Tot\n");
580 	mdb_printf("------------     ----------------  ----------------"
581 	    "  ----\n");
582 	mdb_printf("Kernel           %16llu  %16llu  %3lu%%\n",
583 	    stats.ms_kmem,
584 	    (uint64_t)stats.ms_kmem * PAGESIZE / (1024 * 1024),
585 	    MS_PCT_TOTAL(stats.ms_kmem));
586 
587 	if (stats.ms_bootpages != 0) {
588 		mdb_printf("Boot pages       %16llu  %16llu  %3lu%%\n",
589 		    stats.ms_bootpages,
590 		    (uint64_t)stats.ms_bootpages * PAGESIZE / (1024 * 1024),
591 		    MS_PCT_TOTAL(stats.ms_bootpages));
592 	}
593 
594 	if (stats.ms_zfs_data != 0) {
595 		mdb_printf("ZFS File Data    %16llu  %16llu  %3lu%%\n",
596 		    stats.ms_zfs_data,
597 		    (uint64_t)stats.ms_zfs_data * PAGESIZE / (1024 * 1024),
598 		    MS_PCT_TOTAL(stats.ms_zfs_data));
599 	}
600 
601 	if (stats.ms_vmm_mem != 0) {
602 		mdb_printf("VMM Memory       %16llu  %16llu  %3lu%%\n",
603 		    stats.ms_vmm_mem,
604 		    (uint64_t)stats.ms_vmm_mem * PAGESIZE / (1024 * 1024),
605 		    MS_PCT_TOTAL(stats.ms_vmm_mem));
606 	}
607 
608 	mdb_printf("Anon             %16llu  %16llu  %3lu%%\n",
609 	    stats.ms_anon,
610 	    (uint64_t)stats.ms_anon * PAGESIZE / (1024 * 1024),
611 	    MS_PCT_TOTAL(stats.ms_anon));
612 	mdb_printf("Exec and libs    %16llu  %16llu  %3lu%%\n",
613 	    stats.ms_exec,
614 	    (uint64_t)stats.ms_exec * PAGESIZE / (1024 * 1024),
615 	    MS_PCT_TOTAL(stats.ms_exec));
616 	mdb_printf("Page cache       %16llu  %16llu  %3lu%%\n",
617 	    stats.ms_vnode,
618 	    (uint64_t)stats.ms_vnode * PAGESIZE / (1024 * 1024),
619 	    MS_PCT_TOTAL(stats.ms_vnode));
620 	mdb_printf("Free (cachelist) %16llu  %16llu  %3lu%%\n",
621 	    stats.ms_cachelist,
622 	    (uint64_t)stats.ms_cachelist * PAGESIZE / (1024 * 1024),
623 	    MS_PCT_TOTAL(stats.ms_cachelist));
624 
625 	/*
626 	 * occasionally, we double count pages above.  To avoid printing
627 	 * absurdly large values for freemem, we clamp it at zero.
628 	 */
629 	if (physmem > stats.ms_total)
630 		freemem = physmem - stats.ms_total;
631 	else
632 		freemem = 0;
633 
634 #if defined(__i386) || defined(__amd64)
635 	/* Are we running under Xen?  If so, get balloon memory usage. */
636 	if ((bln_size = mdb_readvar(&bln_stats, "bln_stats")) != -1) {
637 		if (freemem > bln_stats.bln_hv_pages)
638 			freemem -= bln_stats.bln_hv_pages;
639 		else
640 			freemem = 0;
641 	}
642 #endif
643 
644 	mdb_printf("Free (freelist)  %16lu  %16llu  %3lu%%\n", freemem,
645 	    (uint64_t)freemem * PAGESIZE / (1024 * 1024),
646 	    MS_PCT_TOTAL(freemem));
647 
648 #if defined(__i386) || defined(__amd64)
649 	if (bln_size != -1) {
650 		mdb_printf("Balloon          %16lu  %16llu  %3lu%%\n",
651 		    bln_stats.bln_hv_pages,
652 		    (uint64_t)bln_stats.bln_hv_pages * PAGESIZE / (1024 * 1024),
653 		    MS_PCT_TOTAL(bln_stats.bln_hv_pages));
654 	}
655 #endif
656 
657 	mdb_printf("\nTotal            %16lu  %16lu\n",
658 	    physmem,
659 	    (uint64_t)physmem * PAGESIZE / (1024 * 1024));
660 
661 	if (physmem != total_pages) {
662 		mdb_printf("Physical         %16lu  %16lu\n",
663 		    total_pages,
664 		    (uint64_t)total_pages * PAGESIZE / (1024 * 1024));
665 	}
666 
667 #undef MS_PCT_TOTAL
668 
669 	return (DCMD_OK);
670 }
671 
672 void
pagelookup_help(void)673 pagelookup_help(void)
674 {
675 	mdb_printf(
676 	    "Finds the page with name { %<b>vp%</b>, %<b>offset%</b> }.\n"
677 	    "\n"
678 	    "Can be invoked three different ways:\n\n"
679 	    "    ::pagelookup -v %<b>vp%</b> -o %<b>offset%</b>\n"
680 	    "    %<b>vp%</b>::pagelookup -o %<b>offset%</b>\n"
681 	    "    %<b>offset%</b>::pagelookup -v %<b>vp%</b>\n"
682 	    "\n"
683 	    "The latter two forms are useful in pipelines.\n");
684 }
685 
686 int
pagelookup(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)687 pagelookup(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
688 {
689 	uintptr_t vp = -(uintptr_t)1;
690 	uint64_t offset = -(uint64_t)1;
691 
692 	uintptr_t pageaddr;
693 	int hasaddr = (flags & DCMD_ADDRSPEC);
694 	int usedaddr = 0;
695 
696 	if (mdb_getopts(argc, argv,
697 	    'v', MDB_OPT_UINTPTR, &vp,
698 	    'o', MDB_OPT_UINT64, &offset,
699 	    NULL) != argc) {
700 		return (DCMD_USAGE);
701 	}
702 
703 	if (vp == -(uintptr_t)1) {
704 		if (offset == -(uint64_t)1) {
705 			mdb_warn(
706 			    "pagelookup: at least one of -v vp or -o offset "
707 			    "required.\n");
708 			return (DCMD_USAGE);
709 		}
710 		vp = addr;
711 		usedaddr = 1;
712 	} else if (offset == -(uint64_t)1) {
713 		offset = mdb_get_dot();
714 		usedaddr = 1;
715 	}
716 	if (usedaddr && !hasaddr) {
717 		mdb_warn("pagelookup: address required\n");
718 		return (DCMD_USAGE);
719 	}
720 	if (!usedaddr && hasaddr) {
721 		mdb_warn(
722 		    "pagelookup: address specified when both -v and -o were "
723 		    "passed");
724 		return (DCMD_USAGE);
725 	}
726 
727 	pageaddr = mdb_page_lookup(vp, offset);
728 	if (pageaddr == 0) {
729 		mdb_warn("pagelookup: no page for {vp = %p, offset = %llp)\n",
730 		    vp, offset);
731 		return (DCMD_OK);
732 	}
733 	mdb_printf("%#lr\n", pageaddr);		/* this is PIPE_OUT friendly */
734 	return (DCMD_OK);
735 }
736 
737 /*ARGSUSED*/
738 int
page_num2pp(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)739 page_num2pp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
740 {
741 	uintptr_t pp;
742 
743 	if (argc != 0 || !(flags & DCMD_ADDRSPEC)) {
744 		return (DCMD_USAGE);
745 	}
746 
747 	pp = mdb_pfn2page((pfn_t)addr);
748 	if (pp == 0) {
749 		return (DCMD_ERR);
750 	}
751 
752 	if (flags & DCMD_PIPE_OUT) {
753 		mdb_printf("%#lr\n", pp);
754 	} else {
755 		mdb_printf("%lx has page_t at %#lx\n", (pfn_t)addr, pp);
756 	}
757 
758 	return (DCMD_OK);
759 }
760 
761 int
page(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)762 page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
763 {
764 	page_t	p;
765 
766 	if (!(flags & DCMD_ADDRSPEC)) {
767 		if (mdb_walk_dcmd("page", "page", argc, argv) == -1) {
768 			mdb_warn("can't walk pages");
769 			return (DCMD_ERR);
770 		}
771 		return (DCMD_OK);
772 	}
773 
774 	if (DCMD_HDRSPEC(flags)) {
775 		mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n",
776 		    "PAGE", "VNODE", "OFFSET", "SELOCK",
777 		    "LCT", "COW", "IO", "FS", "ST");
778 	}
779 
780 	if (mdb_vread(&p, sizeof (page_t), addr) == -1) {
781 		mdb_warn("can't read page_t at %#lx", addr);
782 		return (DCMD_ERR);
783 	}
784 
785 	mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n",
786 	    addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt,
787 	    p.p_iolock_state, p.p_fsdata, p.p_state);
788 
789 	return (DCMD_OK);
790 }
791 
792 int
swap_walk_init(mdb_walk_state_t * wsp)793 swap_walk_init(mdb_walk_state_t *wsp)
794 {
795 	void	*ptr;
796 
797 	if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) {
798 		mdb_warn("swapinfo not found or invalid");
799 		return (WALK_ERR);
800 	}
801 
802 	wsp->walk_addr = (uintptr_t)ptr;
803 
804 	return (WALK_NEXT);
805 }
806 
807 int
swap_walk_step(mdb_walk_state_t * wsp)808 swap_walk_step(mdb_walk_state_t *wsp)
809 {
810 	uintptr_t	sip;
811 	struct swapinfo	si;
812 
813 	sip = wsp->walk_addr;
814 
815 	if (sip == 0)
816 		return (WALK_DONE);
817 
818 	if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) {
819 		mdb_warn("unable to read swapinfo at %#lx", sip);
820 		return (WALK_ERR);
821 	}
822 
823 	wsp->walk_addr = (uintptr_t)si.si_next;
824 
825 	return (wsp->walk_callback(sip, &si, wsp->walk_cbdata));
826 }
827 
828 int
swapinfof(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)829 swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
830 {
831 	struct swapinfo	si;
832 	char		*name;
833 
834 	if (!(flags & DCMD_ADDRSPEC)) {
835 		if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) {
836 			mdb_warn("can't walk swapinfo");
837 			return (DCMD_ERR);
838 		}
839 		return (DCMD_OK);
840 	}
841 
842 	if (DCMD_HDRSPEC(flags)) {
843 		mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n",
844 		    "ADDR", "VNODE", "PAGES", "FREE", "NAME");
845 	}
846 
847 	if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) {
848 		mdb_warn("can't read swapinfo at %#lx", addr);
849 		return (DCMD_ERR);
850 	}
851 
852 	name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC);
853 	if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1)
854 		name = "*error*";
855 
856 	mdb_printf("%0?lx %?p %9d %9d %s\n",
857 	    addr, si.si_vp, si.si_npgs, si.si_nfpgs, name);
858 
859 	return (DCMD_OK);
860 }
861 
862 int
memlist_walk_step(mdb_walk_state_t * wsp)863 memlist_walk_step(mdb_walk_state_t *wsp)
864 {
865 	uintptr_t	mlp;
866 	struct memlist	ml;
867 
868 	mlp = wsp->walk_addr;
869 
870 	if (mlp == 0)
871 		return (WALK_DONE);
872 
873 	if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) {
874 		mdb_warn("unable to read memlist at %#lx", mlp);
875 		return (WALK_ERR);
876 	}
877 
878 	wsp->walk_addr = (uintptr_t)ml.ml_next;
879 
880 	return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata));
881 }
882 
883 int
memlist(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)884 memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
885 {
886 	struct memlist	ml;
887 
888 	if (!(flags & DCMD_ADDRSPEC)) {
889 		uintptr_t ptr;
890 		uint_t list = 0;
891 		int i;
892 		static const char *lists[] = {
893 			"phys_install",
894 			"phys_avail",
895 			"virt_avail"
896 		};
897 
898 		if (mdb_getopts(argc, argv,
899 		    'i', MDB_OPT_SETBITS, (1 << 0), &list,
900 		    'a', MDB_OPT_SETBITS, (1 << 1), &list,
901 		    'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc)
902 			return (DCMD_USAGE);
903 
904 		if (!list)
905 			list = 1;
906 
907 		for (i = 0; list; i++, list >>= 1) {
908 			if (!(list & 1))
909 				continue;
910 			if ((mdb_readvar(&ptr, lists[i]) == -1) ||
911 			    (ptr == 0)) {
912 				mdb_warn("%s not found or invalid", lists[i]);
913 				return (DCMD_ERR);
914 			}
915 
916 			mdb_printf("%s:\n", lists[i]);
917 			if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL,
918 			    ptr) == -1) {
919 				mdb_warn("can't walk memlist");
920 				return (DCMD_ERR);
921 			}
922 		}
923 		return (DCMD_OK);
924 	}
925 
926 	if (DCMD_HDRSPEC(flags))
927 		mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE");
928 
929 	if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) {
930 		mdb_warn("can't read memlist at %#lx", addr);
931 		return (DCMD_ERR);
932 	}
933 
934 	mdb_printf("%0?lx %16llx %16llx\n", addr, ml.ml_address, ml.ml_size);
935 
936 	return (DCMD_OK);
937 }
938 
939 int
seg_walk_init(mdb_walk_state_t * wsp)940 seg_walk_init(mdb_walk_state_t *wsp)
941 {
942 	if (wsp->walk_addr == 0) {
943 		mdb_warn("seg walk must begin at struct as *\n");
944 		return (WALK_ERR);
945 	}
946 
947 	/*
948 	 * this is really just a wrapper to AVL tree walk
949 	 */
950 	wsp->walk_addr = (uintptr_t)&((struct as *)wsp->walk_addr)->a_segtree;
951 	return (avl_walk_init(wsp));
952 }
953 
954 /*ARGSUSED*/
955 int
seg(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)956 seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
957 {
958 	struct seg s;
959 
960 	if (argc != 0)
961 		return (DCMD_USAGE);
962 
963 	if ((flags & DCMD_LOOPFIRST) || !(flags & DCMD_LOOP)) {
964 		mdb_printf("%<u>%?s %?s %?s %?s %s%</u>\n",
965 		    "SEG", "BASE", "SIZE", "DATA", "OPS");
966 	}
967 
968 	if (mdb_vread(&s, sizeof (s), addr) == -1) {
969 		mdb_warn("failed to read seg at %p", addr);
970 		return (DCMD_ERR);
971 	}
972 
973 	mdb_printf("%?p %?p %?lx %?p %a\n",
974 	    addr, s.s_base, s.s_size, s.s_data, s.s_ops);
975 
976 	return (DCMD_OK);
977 }
978 
979 typedef struct pmap_walk_types {
980 	uintptr_t pwt_segvn;
981 	uintptr_t pwt_seghole;
982 } pmap_walk_types_t;
983 
984 /*ARGSUSED*/
985 static int
pmap_walk_count_pages(uintptr_t addr,const void * data,void * out)986 pmap_walk_count_pages(uintptr_t addr, const void *data, void *out)
987 {
988 	pgcnt_t *nres = out;
989 
990 	(*nres)++;
991 
992 	return (WALK_NEXT);
993 }
994 
995 static int
pmap_walk_seg(uintptr_t addr,const struct seg * seg,const pmap_walk_types_t * types)996 pmap_walk_seg(uintptr_t addr, const struct seg *seg,
997     const pmap_walk_types_t *types)
998 {
999 	const uintptr_t ops = (uintptr_t)seg->s_ops;
1000 
1001 	mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024);
1002 
1003 	if (ops == types->pwt_segvn && seg->s_data != NULL) {
1004 		struct segvn_data svn;
1005 		pgcnt_t nres = 0;
1006 
1007 		svn.vp = NULL;
1008 		(void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data);
1009 
1010 		/*
1011 		 * Use the segvn_pages walker to find all of the in-core pages
1012 		 * for this mapping.
1013 		 */
1014 		if (mdb_pwalk("segvn_pages", pmap_walk_count_pages, &nres,
1015 		    (uintptr_t)seg->s_data) == -1) {
1016 			mdb_warn("failed to walk segvn_pages (s_data=%p)",
1017 			    seg->s_data);
1018 		}
1019 		mdb_printf(" %7ldk", (nres * PAGESIZE) / 1024);
1020 
1021 		if (svn.vp != NULL) {
1022 			char buf[29];
1023 
1024 			mdb_vnode2path((uintptr_t)svn.vp, buf, sizeof (buf));
1025 			mdb_printf(" %s", buf);
1026 		} else {
1027 			mdb_printf(" [ anon ]");
1028 		}
1029 	} else if (ops == types->pwt_seghole && seg->s_data != NULL) {
1030 		seghole_data_t shd;
1031 		char name[16];
1032 
1033 		(void) mdb_vread(&shd, sizeof (shd), (uintptr_t)seg->s_data);
1034 		if (shd.shd_name == NULL || mdb_readstr(name, sizeof (name),
1035 		    (uintptr_t)shd.shd_name) == 0) {
1036 			name[0] = '\0';
1037 		}
1038 
1039 		mdb_printf(" %8s [ hole%s%s ]", "-",
1040 		    name[0] == '0' ? "" : ":", name);
1041 	} else {
1042 		mdb_printf(" %8s [ &%a ]", "?", seg->s_ops);
1043 	}
1044 
1045 	mdb_printf("\n");
1046 	return (WALK_NEXT);
1047 }
1048 
1049 static int
pmap_walk_seg_quick(uintptr_t addr,const struct seg * seg,const pmap_walk_types_t * types)1050 pmap_walk_seg_quick(uintptr_t addr, const struct seg *seg,
1051     const pmap_walk_types_t *types)
1052 {
1053 	const uintptr_t ops = (uintptr_t)seg->s_ops;
1054 
1055 	mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024);
1056 
1057 	if (ops == types->pwt_segvn && seg->s_data != NULL) {
1058 		struct segvn_data svn;
1059 
1060 		svn.vp = NULL;
1061 		(void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data);
1062 
1063 		if (svn.vp != NULL) {
1064 			mdb_printf(" %0?p", svn.vp);
1065 		} else {
1066 			mdb_printf(" [ anon ]");
1067 		}
1068 	} else {
1069 		mdb_printf(" [ &%a ]", seg->s_ops);
1070 	}
1071 
1072 	mdb_printf("\n");
1073 	return (WALK_NEXT);
1074 }
1075 
1076 /*ARGSUSED*/
1077 int
pmap(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)1078 pmap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1079 {
1080 	proc_t proc;
1081 	uint_t quick = FALSE;
1082 	mdb_walk_cb_t cb = (mdb_walk_cb_t)pmap_walk_seg;
1083 	pmap_walk_types_t wtypes = { 0 };
1084 
1085 	GElf_Sym sym;
1086 
1087 	if (!(flags & DCMD_ADDRSPEC))
1088 		return (DCMD_USAGE);
1089 
1090 	if (mdb_getopts(argc, argv,
1091 	    'q', MDB_OPT_SETBITS, TRUE, &quick, NULL) != argc)
1092 		return (DCMD_USAGE);
1093 
1094 	if (mdb_vread(&proc, sizeof (proc), addr) == -1) {
1095 		mdb_warn("failed to read proc at %p", addr);
1096 		return (DCMD_ERR);
1097 	}
1098 
1099 	if (mdb_lookup_by_name("segvn_ops", &sym) == 0)
1100 		wtypes.pwt_segvn = (uintptr_t)sym.st_value;
1101 	if (mdb_lookup_by_name("seghole_ops", &sym) == 0)
1102 		wtypes.pwt_seghole = (uintptr_t)sym.st_value;
1103 
1104 	mdb_printf("%?s %?s %8s ", "SEG", "BASE", "SIZE");
1105 
1106 	if (quick) {
1107 		mdb_printf("VNODE\n");
1108 		cb = (mdb_walk_cb_t)pmap_walk_seg_quick;
1109 	} else {
1110 		mdb_printf("%8s %s\n", "RES", "PATH");
1111 	}
1112 
1113 	if (mdb_pwalk("seg", cb, (void *)&wtypes, (uintptr_t)proc.p_as) == -1) {
1114 		mdb_warn("failed to walk segments of as %p", proc.p_as);
1115 		return (DCMD_ERR);
1116 	}
1117 
1118 	return (DCMD_OK);
1119 }
1120 
1121 typedef struct anon_walk_data {
1122 	uintptr_t *aw_levone;
1123 	uintptr_t *aw_levtwo;
1124 	size_t aw_minslot;
1125 	size_t aw_maxslot;
1126 	pgcnt_t aw_nlevone;
1127 	pgcnt_t aw_levone_ndx;
1128 	size_t aw_levtwo_ndx;
1129 	struct anon_map	*aw_ampp;
1130 	struct anon_map aw_amp;
1131 	struct anon_hdr	aw_ahp;
1132 	int		aw_all;	/* report all anon pointers, even NULLs */
1133 } anon_walk_data_t;
1134 
1135 int
anon_walk_init_common(mdb_walk_state_t * wsp,ulong_t minslot,ulong_t maxslot)1136 anon_walk_init_common(mdb_walk_state_t *wsp, ulong_t minslot, ulong_t maxslot)
1137 {
1138 	anon_walk_data_t *aw;
1139 
1140 	if (wsp->walk_addr == 0) {
1141 		mdb_warn("anon walk doesn't support global walks\n");
1142 		return (WALK_ERR);
1143 	}
1144 
1145 	aw = mdb_alloc(sizeof (anon_walk_data_t), UM_SLEEP);
1146 	aw->aw_ampp = (struct anon_map *)wsp->walk_addr;
1147 
1148 	if (mdb_vread(&aw->aw_amp, sizeof (aw->aw_amp), wsp->walk_addr) == -1) {
1149 		mdb_warn("failed to read anon map at %p", wsp->walk_addr);
1150 		mdb_free(aw, sizeof (anon_walk_data_t));
1151 		return (WALK_ERR);
1152 	}
1153 
1154 	if (mdb_vread(&aw->aw_ahp, sizeof (aw->aw_ahp),
1155 	    (uintptr_t)(aw->aw_amp.ahp)) == -1) {
1156 		mdb_warn("failed to read anon hdr ptr at %p", aw->aw_amp.ahp);
1157 		mdb_free(aw, sizeof (anon_walk_data_t));
1158 		return (WALK_ERR);
1159 	}
1160 
1161 	/* update min and maxslot with the given constraints */
1162 	maxslot = MIN(maxslot, aw->aw_ahp.size);
1163 	minslot = MIN(minslot, maxslot);
1164 
1165 	if (aw->aw_ahp.size <= ANON_CHUNK_SIZE ||
1166 	    (aw->aw_ahp.flags & ANON_ALLOC_FORCE)) {
1167 		aw->aw_nlevone = maxslot;
1168 		aw->aw_levone_ndx = minslot;
1169 		aw->aw_levtwo = NULL;
1170 	} else {
1171 		aw->aw_nlevone =
1172 		    (maxslot + ANON_CHUNK_OFF) >> ANON_CHUNK_SHIFT;
1173 		aw->aw_levone_ndx = 0;
1174 		aw->aw_levtwo =
1175 		    mdb_zalloc(ANON_CHUNK_SIZE * sizeof (uintptr_t), UM_SLEEP);
1176 	}
1177 
1178 	aw->aw_levone =
1179 	    mdb_alloc(aw->aw_nlevone * sizeof (uintptr_t), UM_SLEEP);
1180 	aw->aw_all = (wsp->walk_arg == ANON_WALK_ALL);
1181 
1182 	mdb_vread(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t),
1183 	    (uintptr_t)aw->aw_ahp.array_chunk);
1184 
1185 	aw->aw_levtwo_ndx = 0;
1186 	aw->aw_minslot = minslot;
1187 	aw->aw_maxslot = maxslot;
1188 
1189 out:
1190 	wsp->walk_data = aw;
1191 	return (0);
1192 }
1193 
1194 int
anon_walk_step(mdb_walk_state_t * wsp)1195 anon_walk_step(mdb_walk_state_t *wsp)
1196 {
1197 	anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data;
1198 	struct anon anon;
1199 	uintptr_t anonptr;
1200 	ulong_t slot;
1201 
1202 	/*
1203 	 * Once we've walked through level one, we're done.
1204 	 */
1205 	if (aw->aw_levone_ndx >= aw->aw_nlevone) {
1206 		return (WALK_DONE);
1207 	}
1208 
1209 	if (aw->aw_levtwo == NULL) {
1210 		anonptr = aw->aw_levone[aw->aw_levone_ndx];
1211 		aw->aw_levone_ndx++;
1212 	} else {
1213 		if (aw->aw_levtwo_ndx == 0) {
1214 			uintptr_t levtwoptr;
1215 
1216 			/* The first time through, skip to our first index. */
1217 			if (aw->aw_levone_ndx == 0) {
1218 				aw->aw_levone_ndx =
1219 				    aw->aw_minslot / ANON_CHUNK_SIZE;
1220 				aw->aw_levtwo_ndx =
1221 				    aw->aw_minslot % ANON_CHUNK_SIZE;
1222 			}
1223 
1224 			levtwoptr = (uintptr_t)aw->aw_levone[aw->aw_levone_ndx];
1225 
1226 			if (levtwoptr == 0) {
1227 				if (!aw->aw_all) {
1228 					aw->aw_levtwo_ndx = 0;
1229 					aw->aw_levone_ndx++;
1230 					return (WALK_NEXT);
1231 				}
1232 				bzero(aw->aw_levtwo,
1233 				    ANON_CHUNK_SIZE * sizeof (uintptr_t));
1234 
1235 			} else if (mdb_vread(aw->aw_levtwo,
1236 			    ANON_CHUNK_SIZE * sizeof (uintptr_t), levtwoptr) ==
1237 			    -1) {
1238 				mdb_warn("unable to read anon_map %p's "
1239 				    "second-level map %d at %p",
1240 				    aw->aw_ampp, aw->aw_levone_ndx,
1241 				    levtwoptr);
1242 				return (WALK_ERR);
1243 			}
1244 		}
1245 		slot = aw->aw_levone_ndx * ANON_CHUNK_SIZE + aw->aw_levtwo_ndx;
1246 		anonptr = aw->aw_levtwo[aw->aw_levtwo_ndx];
1247 
1248 		/* update the indices for next time */
1249 		aw->aw_levtwo_ndx++;
1250 		if (aw->aw_levtwo_ndx == ANON_CHUNK_SIZE) {
1251 			aw->aw_levtwo_ndx = 0;
1252 			aw->aw_levone_ndx++;
1253 		}
1254 
1255 		/* make sure the slot # is in the requested range */
1256 		if (slot >= aw->aw_maxslot) {
1257 			return (WALK_DONE);
1258 		}
1259 	}
1260 
1261 	if (anonptr != 0) {
1262 		mdb_vread(&anon, sizeof (anon), anonptr);
1263 		return (wsp->walk_callback(anonptr, &anon, wsp->walk_cbdata));
1264 	}
1265 	if (aw->aw_all) {
1266 		return (wsp->walk_callback(0, NULL, wsp->walk_cbdata));
1267 	}
1268 	return (WALK_NEXT);
1269 }
1270 
1271 void
anon_walk_fini(mdb_walk_state_t * wsp)1272 anon_walk_fini(mdb_walk_state_t *wsp)
1273 {
1274 	anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data;
1275 
1276 	if (aw->aw_levtwo != NULL)
1277 		mdb_free(aw->aw_levtwo, ANON_CHUNK_SIZE * sizeof (uintptr_t));
1278 
1279 	mdb_free(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t));
1280 	mdb_free(aw, sizeof (anon_walk_data_t));
1281 }
1282 
1283 int
anon_walk_init(mdb_walk_state_t * wsp)1284 anon_walk_init(mdb_walk_state_t *wsp)
1285 {
1286 	return (anon_walk_init_common(wsp, 0, ULONG_MAX));
1287 }
1288 
1289 int
segvn_anon_walk_init(mdb_walk_state_t * wsp)1290 segvn_anon_walk_init(mdb_walk_state_t *wsp)
1291 {
1292 	const uintptr_t		svd_addr = wsp->walk_addr;
1293 	uintptr_t		amp_addr;
1294 	uintptr_t		seg_addr;
1295 	struct segvn_data	svd;
1296 	struct anon_map		amp;
1297 	struct seg		seg;
1298 
1299 	if (svd_addr == 0) {
1300 		mdb_warn("segvn_anon walk doesn't support global walks\n");
1301 		return (WALK_ERR);
1302 	}
1303 	if (mdb_vread(&svd, sizeof (svd), svd_addr) == -1) {
1304 		mdb_warn("segvn_anon walk: unable to read segvn_data at %p",
1305 		    svd_addr);
1306 		return (WALK_ERR);
1307 	}
1308 	if (svd.amp == NULL) {
1309 		mdb_warn("segvn_anon walk: segvn_data at %p has no anon map\n",
1310 		    svd_addr);
1311 		return (WALK_ERR);
1312 	}
1313 	amp_addr = (uintptr_t)svd.amp;
1314 	if (mdb_vread(&amp, sizeof (amp), amp_addr) == -1) {
1315 		mdb_warn("segvn_anon walk: unable to read amp %p for "
1316 		    "segvn_data %p", amp_addr, svd_addr);
1317 		return (WALK_ERR);
1318 	}
1319 	seg_addr = (uintptr_t)svd.seg;
1320 	if (mdb_vread(&seg, sizeof (seg), seg_addr) == -1) {
1321 		mdb_warn("segvn_anon walk: unable to read seg %p for "
1322 		    "segvn_data %p", seg_addr, svd_addr);
1323 		return (WALK_ERR);
1324 	}
1325 	if ((seg.s_size + (svd.anon_index << PAGESHIFT)) > amp.size) {
1326 		mdb_warn("anon map %p is too small for segment %p\n",
1327 		    amp_addr, seg_addr);
1328 		return (WALK_ERR);
1329 	}
1330 
1331 	wsp->walk_addr = amp_addr;
1332 	return (anon_walk_init_common(wsp,
1333 	    svd.anon_index, svd.anon_index + (seg.s_size >> PAGESHIFT)));
1334 }
1335 
1336 
1337 typedef struct {
1338 	u_offset_t		svs_offset;
1339 	uintptr_t		svs_page;
1340 } segvn_sparse_t;
1341 #define	SEGVN_MAX_SPARSE	((128 * 1024) / sizeof (segvn_sparse_t))
1342 
1343 typedef struct {
1344 	uintptr_t		svw_svdp;
1345 	struct segvn_data	svw_svd;
1346 	struct seg		svw_seg;
1347 	size_t			svw_walkoff;
1348 	ulong_t			svw_anonskip;
1349 	segvn_sparse_t		*svw_sparse;
1350 	size_t			svw_sparse_idx;
1351 	size_t			svw_sparse_count;
1352 	size_t			svw_sparse_size;
1353 	uint8_t			svw_sparse_overflow;
1354 	uint8_t			svw_all;
1355 } segvn_walk_data_t;
1356 
1357 static int
segvn_sparse_fill(uintptr_t addr,const void * pp_arg,void * arg)1358 segvn_sparse_fill(uintptr_t addr, const void *pp_arg, void *arg)
1359 {
1360 	segvn_walk_data_t	*const	svw = arg;
1361 	const page_t		*const	pp = pp_arg;
1362 	const u_offset_t		offset = pp->p_offset;
1363 	segvn_sparse_t		*const	cur =
1364 	    &svw->svw_sparse[svw->svw_sparse_count];
1365 
1366 	/* See if the page is of interest */
1367 	if ((u_offset_t)(offset - svw->svw_svd.offset) >= svw->svw_seg.s_size) {
1368 		return (WALK_NEXT);
1369 	}
1370 	/* See if we have space for the new entry, then add it. */
1371 	if (svw->svw_sparse_count >= svw->svw_sparse_size) {
1372 		svw->svw_sparse_overflow = 1;
1373 		return (WALK_DONE);
1374 	}
1375 	svw->svw_sparse_count++;
1376 	cur->svs_offset = offset;
1377 	cur->svs_page = addr;
1378 	return (WALK_NEXT);
1379 }
1380 
1381 static int
segvn_sparse_cmp(const void * lp,const void * rp)1382 segvn_sparse_cmp(const void *lp, const void *rp)
1383 {
1384 	const segvn_sparse_t *const	l = lp;
1385 	const segvn_sparse_t *const	r = rp;
1386 
1387 	if (l->svs_offset < r->svs_offset) {
1388 		return (-1);
1389 	}
1390 	if (l->svs_offset > r->svs_offset) {
1391 		return (1);
1392 	}
1393 	return (0);
1394 }
1395 
1396 /*
1397  * Builds on the "anon_all" walker to walk all resident pages in a segvn_data
1398  * structure.  For segvn_datas without an anon structure, it just looks up
1399  * pages in the vnode.  For segvn_datas with an anon structure, NULL slots
1400  * pass through to the vnode, and non-null slots are checked for residency.
1401  */
1402 int
segvn_pages_walk_init(mdb_walk_state_t * wsp)1403 segvn_pages_walk_init(mdb_walk_state_t *wsp)
1404 {
1405 	segvn_walk_data_t	*svw;
1406 	struct segvn_data	*svd;
1407 
1408 	if (wsp->walk_addr == 0) {
1409 		mdb_warn("segvn walk doesn't support global walks\n");
1410 		return (WALK_ERR);
1411 	}
1412 
1413 	svw = mdb_zalloc(sizeof (*svw), UM_SLEEP);
1414 	svw->svw_svdp = wsp->walk_addr;
1415 	svw->svw_anonskip = 0;
1416 	svw->svw_sparse_idx = 0;
1417 	svw->svw_walkoff = 0;
1418 	svw->svw_all = (wsp->walk_arg == SEGVN_PAGES_ALL);
1419 
1420 	if (mdb_vread(&svw->svw_svd, sizeof (svw->svw_svd), wsp->walk_addr) ==
1421 	    -1) {
1422 		mdb_warn("failed to read segvn_data at %p", wsp->walk_addr);
1423 		mdb_free(svw, sizeof (*svw));
1424 		return (WALK_ERR);
1425 	}
1426 
1427 	svd = &svw->svw_svd;
1428 	if (mdb_vread(&svw->svw_seg, sizeof (svw->svw_seg),
1429 	    (uintptr_t)svd->seg) == -1) {
1430 		mdb_warn("failed to read seg at %p (from %p)",
1431 		    svd->seg, &((struct segvn_data *)(wsp->walk_addr))->seg);
1432 		mdb_free(svw, sizeof (*svw));
1433 		return (WALK_ERR);
1434 	}
1435 
1436 	if (svd->amp == NULL && svd->vp == NULL) {
1437 		/* make the walk terminate immediately;  no pages */
1438 		svw->svw_walkoff = svw->svw_seg.s_size;
1439 
1440 	} else if (svd->amp == NULL &&
1441 	    (svw->svw_seg.s_size >> PAGESHIFT) >= SEGVN_MAX_SPARSE) {
1442 		/*
1443 		 * If we don't have an anon pointer, and the segment is large,
1444 		 * we try to load the in-memory pages into a fixed-size array,
1445 		 * which is then sorted and reported directly.  This is much
1446 		 * faster than doing a mdb_page_lookup() for each possible
1447 		 * offset.
1448 		 *
1449 		 * If the allocation fails, or there are too many pages
1450 		 * in-core, we fall back to looking up the pages individually.
1451 		 */
1452 		svw->svw_sparse = mdb_alloc(
1453 		    SEGVN_MAX_SPARSE * sizeof (*svw->svw_sparse), UM_NOSLEEP);
1454 		if (svw->svw_sparse != NULL) {
1455 			svw->svw_sparse_size = SEGVN_MAX_SPARSE;
1456 
1457 			if (mdb_pwalk("page", segvn_sparse_fill, svw,
1458 			    (uintptr_t)svd->vp) == -1 ||
1459 			    svw->svw_sparse_overflow) {
1460 				mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE *
1461 				    sizeof (*svw->svw_sparse));
1462 				svw->svw_sparse = NULL;
1463 			} else {
1464 				qsort(svw->svw_sparse, svw->svw_sparse_count,
1465 				    sizeof (*svw->svw_sparse),
1466 				    segvn_sparse_cmp);
1467 			}
1468 		}
1469 
1470 	} else if (svd->amp != NULL) {
1471 		const char *const layer = (!svw->svw_all && svd->vp == NULL) ?
1472 		    "segvn_anon" : "segvn_anon_all";
1473 		/*
1474 		 * If we're not printing all offsets, and the segvn_data has
1475 		 * no backing VP, we can use the "segvn_anon" walker, which
1476 		 * efficiently skips NULL slots.
1477 		 *
1478 		 * Otherwise, we layer over the "segvn_anon_all" walker
1479 		 * (which reports all anon slots, even NULL ones), so that
1480 		 * segvn_pages_walk_step() knows the precise offset for each
1481 		 * element.  It uses that offset information to look up the
1482 		 * backing pages for NULL anon slots.
1483 		 */
1484 		if (mdb_layered_walk(layer, wsp) == -1) {
1485 			mdb_warn("segvn_pages: failed to layer \"%s\" "
1486 			    "for segvn_data %p", layer, svw->svw_svdp);
1487 			mdb_free(svw, sizeof (*svw));
1488 			return (WALK_ERR);
1489 		}
1490 	}
1491 
1492 	wsp->walk_data = svw;
1493 	return (WALK_NEXT);
1494 }
1495 
1496 int
segvn_pages_walk_step(mdb_walk_state_t * wsp)1497 segvn_pages_walk_step(mdb_walk_state_t *wsp)
1498 {
1499 	segvn_walk_data_t	*const	svw = wsp->walk_data;
1500 	struct seg		*const	seg = &svw->svw_seg;
1501 	struct segvn_data	*const	svd = &svw->svw_svd;
1502 	uintptr_t		pp;
1503 	page_t			page;
1504 
1505 	/* If we've walked off the end of the segment, we're done. */
1506 	if (svw->svw_walkoff >= seg->s_size) {
1507 		return (WALK_DONE);
1508 	}
1509 
1510 	/*
1511 	 * If we've got a sparse page array, just send it directly.
1512 	 */
1513 	if (svw->svw_sparse != NULL) {
1514 		u_offset_t off;
1515 
1516 		if (svw->svw_sparse_idx >= svw->svw_sparse_count) {
1517 			pp = 0;
1518 			if (!svw->svw_all) {
1519 				return (WALK_DONE);
1520 			}
1521 		} else {
1522 			segvn_sparse_t	*const svs =
1523 			    &svw->svw_sparse[svw->svw_sparse_idx];
1524 			off = svs->svs_offset - svd->offset;
1525 			if (svw->svw_all && svw->svw_walkoff != off) {
1526 				pp = 0;
1527 			} else {
1528 				pp = svs->svs_page;
1529 				svw->svw_sparse_idx++;
1530 			}
1531 		}
1532 
1533 	} else if (svd->amp == NULL || wsp->walk_addr == 0) {
1534 		/*
1535 		 * If there's no anon, or the anon slot is NULL, look up
1536 		 * <vp, offset>.
1537 		 */
1538 		if (svd->vp != NULL) {
1539 			pp = mdb_page_lookup((uintptr_t)svd->vp,
1540 			    svd->offset + svw->svw_walkoff);
1541 		} else {
1542 			pp = 0;
1543 		}
1544 
1545 	} else {
1546 		const struct anon	*const	anon = wsp->walk_layer;
1547 
1548 		/*
1549 		 * We have a "struct anon"; if it's not swapped out,
1550 		 * look up the page.
1551 		 */
1552 		if (anon->an_vp != NULL || anon->an_off != 0) {
1553 			pp = mdb_page_lookup((uintptr_t)anon->an_vp,
1554 			    anon->an_off);
1555 			if (pp == 0 && mdb_get_state() != MDB_STATE_RUNNING) {
1556 				mdb_warn("walk segvn_pages: segvn_data %p "
1557 				    "offset %ld, anon page <%p, %llx> not "
1558 				    "found.\n", svw->svw_svdp, svw->svw_walkoff,
1559 				    anon->an_vp, anon->an_off);
1560 			}
1561 		} else {
1562 			if (anon->an_pvp == NULL) {
1563 				mdb_warn("walk segvn_pages: useless struct "
1564 				    "anon at %p\n", wsp->walk_addr);
1565 			}
1566 			pp = 0;	/* nothing at this offset */
1567 		}
1568 	}
1569 
1570 	svw->svw_walkoff += PAGESIZE;	/* Update for the next call */
1571 	if (pp != 0) {
1572 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
1573 			mdb_warn("unable to read page_t at %#lx", pp);
1574 			return (WALK_ERR);
1575 		}
1576 		return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
1577 	}
1578 	if (svw->svw_all) {
1579 		return (wsp->walk_callback(0, NULL, wsp->walk_cbdata));
1580 	}
1581 	return (WALK_NEXT);
1582 }
1583 
1584 void
segvn_pages_walk_fini(mdb_walk_state_t * wsp)1585 segvn_pages_walk_fini(mdb_walk_state_t *wsp)
1586 {
1587 	segvn_walk_data_t	*const	svw = wsp->walk_data;
1588 
1589 	if (svw->svw_sparse != NULL) {
1590 		mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE *
1591 		    sizeof (*svw->svw_sparse));
1592 	}
1593 	mdb_free(svw, sizeof (*svw));
1594 }
1595 
1596 /*
1597  * Grumble, grumble.
1598  */
1599 #define	SMAP_HASHFUNC(vp, off)	\
1600 	((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
1601 	((off) >> MAXBSHIFT)) & smd_hashmsk)
1602 
1603 int
vnode2smap(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)1604 vnode2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1605 {
1606 	long smd_hashmsk;
1607 	int hash;
1608 	uintptr_t offset = 0;
1609 	struct smap smp;
1610 	uintptr_t saddr, kaddr;
1611 	uintptr_t smd_hash, smd_smap;
1612 	struct seg seg;
1613 
1614 	if (!(flags & DCMD_ADDRSPEC))
1615 		return (DCMD_USAGE);
1616 
1617 	if (mdb_readvar(&smd_hashmsk, "smd_hashmsk") == -1) {
1618 		mdb_warn("failed to read smd_hashmsk");
1619 		return (DCMD_ERR);
1620 	}
1621 
1622 	if (mdb_readvar(&smd_hash, "smd_hash") == -1) {
1623 		mdb_warn("failed to read smd_hash");
1624 		return (DCMD_ERR);
1625 	}
1626 
1627 	if (mdb_readvar(&smd_smap, "smd_smap") == -1) {
1628 		mdb_warn("failed to read smd_hash");
1629 		return (DCMD_ERR);
1630 	}
1631 
1632 	if (mdb_readvar(&kaddr, "segkmap") == -1) {
1633 		mdb_warn("failed to read segkmap");
1634 		return (DCMD_ERR);
1635 	}
1636 
1637 	if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) {
1638 		mdb_warn("failed to read segkmap at %p", kaddr);
1639 		return (DCMD_ERR);
1640 	}
1641 
1642 	if (argc != 0) {
1643 		const mdb_arg_t *arg = &argv[0];
1644 
1645 		offset = (uintptr_t)mdb_argtoull(arg);
1646 	}
1647 
1648 	hash = SMAP_HASHFUNC(addr, offset);
1649 
1650 	if (mdb_vread(&saddr, sizeof (saddr),
1651 	    smd_hash + hash * sizeof (uintptr_t)) == -1) {
1652 		mdb_warn("couldn't read smap at %p",
1653 		    smd_hash + hash * sizeof (uintptr_t));
1654 		return (DCMD_ERR);
1655 	}
1656 
1657 	do {
1658 		if (mdb_vread(&smp, sizeof (smp), saddr) == -1) {
1659 			mdb_warn("couldn't read smap at %p", saddr);
1660 			return (DCMD_ERR);
1661 		}
1662 
1663 		if ((uintptr_t)smp.sm_vp == addr && smp.sm_off == offset) {
1664 			mdb_printf("vnode %p, offs %p is smap %p, vaddr %p\n",
1665 			    addr, offset, saddr, ((saddr - smd_smap) /
1666 			    sizeof (smp)) * MAXBSIZE + seg.s_base);
1667 			return (DCMD_OK);
1668 		}
1669 
1670 		saddr = (uintptr_t)smp.sm_hash;
1671 	} while (saddr != 0);
1672 
1673 	mdb_printf("no smap for vnode %p, offs %p\n", addr, offset);
1674 	return (DCMD_OK);
1675 }
1676 
1677 /*ARGSUSED*/
1678 int
addr2smap(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)1679 addr2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1680 {
1681 	uintptr_t kaddr;
1682 	struct seg seg;
1683 	struct segmap_data sd;
1684 
1685 	if (!(flags & DCMD_ADDRSPEC))
1686 		return (DCMD_USAGE);
1687 
1688 	if (mdb_readvar(&kaddr, "segkmap") == -1) {
1689 		mdb_warn("failed to read segkmap");
1690 		return (DCMD_ERR);
1691 	}
1692 
1693 	if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) {
1694 		mdb_warn("failed to read segkmap at %p", kaddr);
1695 		return (DCMD_ERR);
1696 	}
1697 
1698 	if (mdb_vread(&sd, sizeof (sd), (uintptr_t)seg.s_data) == -1) {
1699 		mdb_warn("failed to read segmap_data at %p", seg.s_data);
1700 		return (DCMD_ERR);
1701 	}
1702 
1703 	mdb_printf("%p is smap %p\n", addr,
1704 	    ((addr - (uintptr_t)seg.s_base) >> MAXBSHIFT) *
1705 	    sizeof (struct smap) + (uintptr_t)sd.smd_sm);
1706 
1707 	return (DCMD_OK);
1708 }
1709