xref: /illumos-gate/usr/src/cmd/mdb/common/modules/genunix/memory.c (revision 3cf6f95f0e20ed31de99608fdb0a120190d5438f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <mdb/mdb_modapi.h>
29 #include <sys/types.h>
30 #include <vm/page.h>
31 #include <sys/thread.h>
32 #include <sys/swap.h>
33 #include <sys/memlist.h>
34 #if defined(__i386) || defined(__amd64)
35 #include <sys/balloon_impl.h>
36 #endif
37 
38 /*
39  * Page walker.
40  * By default, this will walk all pages in the system.  If given an
41  * address, it will walk all pages belonging to the vnode at that
42  * address.
43  */
44 
45 /*
46  * page_walk_data
47  *
48  * pw_hashleft is set to -1 when walking a vnode's pages, and holds the
49  * number of hash locations remaining in the page hash table when
50  * walking all pages.
51  *
52  * The astute reader will notice that pw_hashloc is only used when
53  * reading all pages (to hold a pointer to our location in the page
54  * hash table), and that pw_first is only used when reading the pages
55  * belonging to a particular vnode (to hold a pointer to the first
56  * page).  While these could be combined to be a single pointer, they
57  * are left separate for clarity.
58  */
59 typedef struct page_walk_data {
60 	long		pw_hashleft;
61 	void		**pw_hashloc;
62 	uintptr_t	pw_first;
63 } page_walk_data_t;
64 
65 int
66 page_walk_init(mdb_walk_state_t *wsp)
67 {
68 	page_walk_data_t	*pwd;
69 	void	**ptr;
70 	size_t	hashsz;
71 	vnode_t	vn;
72 
73 	if (wsp->walk_addr == NULL) {
74 
75 		/*
76 		 * Walk all pages
77 		 */
78 
79 		if ((mdb_readvar(&ptr, "page_hash") == -1) ||
80 		    (mdb_readvar(&hashsz, "page_hashsz") == -1) ||
81 		    (ptr == NULL) || (hashsz == 0)) {
82 			mdb_warn("page_hash, page_hashsz not found or invalid");
83 			return (WALK_ERR);
84 		}
85 
86 		/*
87 		 * Since we are walking all pages, initialize hashleft
88 		 * to be the remaining number of entries in the page
89 		 * hash.  hashloc is set the start of the page hash
90 		 * table.  Setting the walk address to 0 indicates that
91 		 * we aren't currently following a hash chain, and that
92 		 * we need to scan the page hash table for a page.
93 		 */
94 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
95 		pwd->pw_hashleft = hashsz;
96 		pwd->pw_hashloc = ptr;
97 		wsp->walk_addr = 0;
98 	} else {
99 
100 		/*
101 		 * Walk just this vnode
102 		 */
103 
104 		if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) {
105 			mdb_warn("unable to read vnode_t at %#lx",
106 			    wsp->walk_addr);
107 			return (WALK_ERR);
108 		}
109 
110 		/*
111 		 * We set hashleft to -1 to indicate that we are
112 		 * walking a vnode, and initialize first to 0 (it is
113 		 * used to terminate the walk, so it must not be set
114 		 * until after we have walked the first page).  The
115 		 * walk address is set to the first page.
116 		 */
117 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
118 		pwd->pw_hashleft = -1;
119 		pwd->pw_first = 0;
120 
121 		wsp->walk_addr = (uintptr_t)vn.v_pages;
122 	}
123 
124 	wsp->walk_data = pwd;
125 
126 	return (WALK_NEXT);
127 }
128 
129 int
130 page_walk_step(mdb_walk_state_t *wsp)
131 {
132 	page_walk_data_t	*pwd = wsp->walk_data;
133 	page_t		page;
134 	uintptr_t	pp;
135 
136 	pp = wsp->walk_addr;
137 
138 	if (pwd->pw_hashleft < 0) {
139 
140 		/* We're walking a vnode's pages */
141 
142 		/*
143 		 * If we don't have any pages to walk, we have come
144 		 * back around to the first one (we finished), or we
145 		 * can't read the page we're looking at, we are done.
146 		 */
147 		if (pp == NULL || pp == pwd->pw_first)
148 			return (WALK_DONE);
149 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
150 			mdb_warn("unable to read page_t at %#lx", pp);
151 			return (WALK_ERR);
152 		}
153 
154 		/*
155 		 * Set the walk address to the next page, and if the
156 		 * first page hasn't been set yet (i.e. we are on the
157 		 * first page), set it.
158 		 */
159 		wsp->walk_addr = (uintptr_t)page.p_vpnext;
160 		if (pwd->pw_first == NULL)
161 			pwd->pw_first = pp;
162 
163 	} else if (pwd->pw_hashleft > 0) {
164 
165 		/* We're walking all pages */
166 
167 		/*
168 		 * If pp (the walk address) is NULL, we scan through
169 		 * the page hash table until we find a page.
170 		 */
171 		if (pp == NULL) {
172 
173 			/*
174 			 * Iterate through the page hash table until we
175 			 * find a page or reach the end.
176 			 */
177 			do {
178 				if (mdb_vread(&pp, sizeof (uintptr_t),
179 				    (uintptr_t)pwd->pw_hashloc) == -1) {
180 					mdb_warn("unable to read from %#p",
181 					    pwd->pw_hashloc);
182 					return (WALK_ERR);
183 				}
184 				pwd->pw_hashleft--;
185 				pwd->pw_hashloc++;
186 			} while (pwd->pw_hashleft && (pp == NULL));
187 
188 			/*
189 			 * We've reached the end; exit.
190 			 */
191 			if (pp == NULL)
192 				return (WALK_DONE);
193 		}
194 
195 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
196 			mdb_warn("unable to read page_t at %#lx", pp);
197 			return (WALK_ERR);
198 		}
199 
200 		/*
201 		 * Set the walk address to the next page.
202 		 */
203 		wsp->walk_addr = (uintptr_t)page.p_hash;
204 
205 	} else {
206 		/* We've finished walking all pages. */
207 		return (WALK_DONE);
208 	}
209 
210 	return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
211 }
212 
213 void
214 page_walk_fini(mdb_walk_state_t *wsp)
215 {
216 	mdb_free(wsp->walk_data, sizeof (page_walk_data_t));
217 }
218 
219 /* Summary statistics of pages */
220 typedef struct memstat {
221 	struct vnode    *ms_kvp;	/* Cached address of kernel vnode */
222 	struct vnode    *ms_zvp;	/* Cached address of zio vnode    */
223 	uint64_t	ms_kmem;	/* Pages of kernel memory	  */
224 	uint64_t	ms_anon;	/* Pages of anonymous memory	  */
225 	uint64_t	ms_vnode;	/* Pages of named (vnode) memory  */
226 	uint64_t	ms_exec;	/* Pages of exec/library memory	  */
227 	uint64_t	ms_cachelist;	/* Pages on the cachelist (free)  */
228 	uint64_t	ms_total;	/* Pages on page hash		  */
229 } memstat_t;
230 
231 #define	MS_PP_ISKAS(pp, stats)				\
232 	(((pp)->p_vnode == (stats)->ms_kvp) ||		\
233 	    (((stats)->ms_zvp != NULL) && ((pp)->p_vnode == (stats)->ms_zvp)))
234 
235 /*
236  * Summarize pages by type; called from page walker.
237  */
238 
239 /* ARGSUSED */
240 static int
241 memstat_callback(page_t *page, page_t *pp, memstat_t *stats)
242 {
243 	struct vnode vn, *vp;
244 	uintptr_t ptr;
245 
246 	/* read page's vnode pointer */
247 	if ((ptr = (uintptr_t)(pp->p_vnode)) != NULL) {
248 		if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) {
249 			mdb_warn("unable to read vnode_t at %#lx",
250 			    ptr);
251 			return (WALK_ERR);
252 		}
253 		vp = &vn;
254 	} else
255 		vp = NULL;
256 
257 	if (PP_ISFREE(pp))
258 		stats->ms_cachelist++;
259 	else if (vp && IS_SWAPFSVP(vp))
260 		stats->ms_anon++;
261 	else if (MS_PP_ISKAS(pp, stats))
262 		stats->ms_kmem++;
263 	else if (vp && (((vp)->v_flag & VVMEXEC)) != 0)
264 		stats->ms_exec++;
265 	else
266 		stats->ms_vnode++;
267 
268 	stats->ms_total++;
269 
270 	return (WALK_NEXT);
271 }
272 
273 /* ARGSUSED */
274 int
275 memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
276 {
277 	ulong_t pagesize;
278 	pgcnt_t total_pages, physmem;
279 	ulong_t freemem;
280 	memstat_t stats;
281 	memstat_t unused_stats;
282 	GElf_Sym sym;
283 #if defined(__i386) || defined(__amd64)
284 	bln_stats_t bln_stats;
285 	ssize_t bln_size;
286 #endif
287 
288 	bzero(&stats, sizeof (memstat_t));
289 	bzero(&unused_stats, sizeof (memstat_t));
290 
291 	if (argc != 0 || (flags & DCMD_ADDRSPEC))
292 		return (DCMD_USAGE);
293 
294 	/* Grab base page size */
295 	if (mdb_readvar(&pagesize, "_pagesize") == -1) {
296 		mdb_warn("unable to read _pagesize");
297 		return (DCMD_ERR);
298 	}
299 
300 	/* Total physical memory */
301 	if (mdb_readvar(&total_pages, "total_pages") == -1) {
302 		mdb_warn("unable to read total_pages");
303 		return (DCMD_ERR);
304 	}
305 
306 	/* Artificially limited memory */
307 	if (mdb_readvar(&physmem, "physmem") == -1) {
308 		mdb_warn("unable to read physmem");
309 		return (DCMD_ERR);
310 	}
311 
312 	/* read kernel vnode pointer */
313 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvp",
314 	    (GElf_Sym *)&sym) == -1) {
315 		mdb_warn("unable to read kvp");
316 		return (DCMD_ERR);
317 	}
318 
319 	stats.ms_kvp = (struct vnode *)(uintptr_t)sym.st_value;
320 
321 	/*
322 	 * Read the zio vnode pointer.  It may not exist on all kernels, so it
323 	 * it isn't found, it's not a fatal error.
324 	 */
325 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "zvp",
326 	    (GElf_Sym *)&sym) == -1) {
327 		stats.ms_zvp = NULL;
328 	} else {
329 		stats.ms_zvp = (struct vnode *)(uintptr_t)sym.st_value;
330 	}
331 
332 	/* Walk page structures, summarizing usage */
333 	if (mdb_walk("page", (mdb_walk_cb_t)memstat_callback,
334 	    &stats) == -1) {
335 		mdb_warn("can't walk pages");
336 		return (DCMD_ERR);
337 	}
338 
339 	/* read unused pages vnode */
340 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp",
341 	    (GElf_Sym *)&sym) == -1) {
342 		mdb_warn("unable to read unused_pages_vp");
343 		return (DCMD_ERR);
344 	}
345 
346 	unused_stats.ms_kvp = (struct vnode *)(uintptr_t)sym.st_value;
347 
348 	/* Find unused pages */
349 	if (mdb_walk("page", (mdb_walk_cb_t)memstat_callback,
350 	    &unused_stats) == -1) {
351 		mdb_warn("can't walk pages");
352 		return (DCMD_ERR);
353 	}
354 
355 	/*
356 	 * If physmem != total_pages, then the administrator has limited the
357 	 * number of pages available in the system.  In order to account for
358 	 * this, we reduce the amount normally attributed to the page cache.
359 	 */
360 	stats.ms_vnode -= unused_stats.ms_kmem;
361 	stats.ms_total -= unused_stats.ms_kmem;
362 
363 #define	MS_PCT_TOTAL(x)	((ulong_t)((((5 * total_pages) + ((x) * 1000ull))) / \
364 		((physmem) * 10)))
365 
366 	mdb_printf("Page Summary                Pages                MB"
367 	    "  %%Tot\n");
368 	mdb_printf("------------     ----------------  ----------------"
369 	    "  ----\n");
370 	mdb_printf("Kernel           %16llu  %16llu  %3lu%%\n",
371 	    stats.ms_kmem,
372 	    (uint64_t)stats.ms_kmem * pagesize / (1024 * 1024),
373 	    MS_PCT_TOTAL(stats.ms_kmem));
374 	mdb_printf("Anon             %16llu  %16llu  %3lu%%\n",
375 	    stats.ms_anon,
376 	    (uint64_t)stats.ms_anon * pagesize / (1024 * 1024),
377 	    MS_PCT_TOTAL(stats.ms_anon));
378 	mdb_printf("Exec and libs    %16llu  %16llu  %3lu%%\n",
379 	    stats.ms_exec,
380 	    (uint64_t)stats.ms_exec * pagesize / (1024 * 1024),
381 	    MS_PCT_TOTAL(stats.ms_exec));
382 	mdb_printf("Page cache       %16llu  %16llu  %3lu%%\n",
383 	    stats.ms_vnode,
384 	    (uint64_t)stats.ms_vnode * pagesize / (1024 * 1024),
385 	    MS_PCT_TOTAL(stats.ms_vnode));
386 	mdb_printf("Free (cachelist) %16llu  %16llu  %3lu%%\n",
387 	    stats.ms_cachelist,
388 	    (uint64_t)stats.ms_cachelist * pagesize / (1024 * 1024),
389 	    MS_PCT_TOTAL(stats.ms_cachelist));
390 
391 	/*
392 	 * occasionally, we double count pages above.  To avoid printing
393 	 * absurdly large values for freemem, we clamp it at zero.
394 	 */
395 	if (physmem > stats.ms_total)
396 		freemem = physmem - stats.ms_total;
397 	else
398 		freemem = 0;
399 
400 #if defined(__i386) || defined(__amd64)
401 	/* Are we running under Xen?  If so, get balloon memory usage. */
402 	if ((bln_size = mdb_readvar(&bln_stats, "bln_stats")) != -1) {
403 		if (freemem > bln_stats.bln_hv_pages)
404 			freemem -= bln_stats.bln_hv_pages;
405 		else
406 			freemem = 0;
407 	}
408 #endif
409 
410 	mdb_printf("Free (freelist)  %16lu  %16llu  %3lu%%\n", freemem,
411 	    (uint64_t)freemem * pagesize / (1024 * 1024),
412 	    MS_PCT_TOTAL(freemem));
413 
414 #if defined(__i386) || defined(__amd64)
415 	if (bln_size != -1) {
416 		mdb_printf("Balloon          %16lu  %16llu  %3lu%%\n",
417 		    bln_stats.bln_hv_pages,
418 		    (uint64_t)bln_stats.bln_hv_pages * pagesize / (1024 * 1024),
419 		    MS_PCT_TOTAL(bln_stats.bln_hv_pages));
420 	}
421 #endif
422 
423 	mdb_printf("\nTotal            %16lu  %16lu\n",
424 	    physmem,
425 	    (uint64_t)physmem * pagesize / (1024 * 1024));
426 
427 	if (physmem != total_pages) {
428 		mdb_printf("Physical         %16lu  %16lu\n",
429 		    total_pages,
430 		    (uint64_t)total_pages * pagesize / (1024 * 1024));
431 	}
432 
433 #undef MS_PCT_TOTAL
434 
435 	return (DCMD_OK);
436 }
437 
438 int
439 page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
440 {
441 	page_t	p;
442 
443 	if (!(flags & DCMD_ADDRSPEC)) {
444 		if (mdb_walk_dcmd("page", "page", argc, argv) == -1) {
445 			mdb_warn("can't walk pages");
446 			return (DCMD_ERR);
447 		}
448 		return (DCMD_OK);
449 	}
450 
451 	if (DCMD_HDRSPEC(flags)) {
452 		mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n",
453 		    "PAGE", "VNODE", "OFFSET", "SELOCK",
454 		    "LCT", "COW", "IO", "FS", "ST");
455 	}
456 
457 	if (mdb_vread(&p, sizeof (page_t), addr) == -1) {
458 		mdb_warn("can't read page_t at %#lx", addr);
459 		return (DCMD_ERR);
460 	}
461 
462 	mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n",
463 	    addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt,
464 	    p.p_iolock_state, p.p_fsdata, p.p_state);
465 
466 	return (DCMD_OK);
467 }
468 
469 int
470 swap_walk_init(mdb_walk_state_t *wsp)
471 {
472 	void	*ptr;
473 
474 	if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) {
475 		mdb_warn("swapinfo not found or invalid");
476 		return (WALK_ERR);
477 	}
478 
479 	wsp->walk_addr = (uintptr_t)ptr;
480 
481 	return (WALK_NEXT);
482 }
483 
484 int
485 swap_walk_step(mdb_walk_state_t *wsp)
486 {
487 	uintptr_t	sip;
488 	struct swapinfo	si;
489 
490 	sip = wsp->walk_addr;
491 
492 	if (sip == NULL)
493 		return (WALK_DONE);
494 
495 	if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) {
496 		mdb_warn("unable to read swapinfo at %#lx", sip);
497 		return (WALK_ERR);
498 	}
499 
500 	wsp->walk_addr = (uintptr_t)si.si_next;
501 
502 	return (wsp->walk_callback(sip, &si, wsp->walk_cbdata));
503 }
504 
505 int
506 swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
507 {
508 	struct swapinfo	si;
509 	char		*name;
510 
511 	if (!(flags & DCMD_ADDRSPEC)) {
512 		if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) {
513 			mdb_warn("can't walk swapinfo");
514 			return (DCMD_ERR);
515 		}
516 		return (DCMD_OK);
517 	}
518 
519 	if (DCMD_HDRSPEC(flags)) {
520 		mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n",
521 		    "ADDR", "VNODE", "PAGES", "FREE", "NAME");
522 	}
523 
524 	if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) {
525 		mdb_warn("can't read swapinfo at %#lx", addr);
526 		return (DCMD_ERR);
527 	}
528 
529 	name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC);
530 	if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1)
531 		name = "*error*";
532 
533 	mdb_printf("%0?lx %?p %9d %9d %s\n",
534 	    addr, si.si_vp, si.si_npgs, si.si_nfpgs, name);
535 
536 	return (DCMD_OK);
537 }
538 
539 int
540 memlist_walk_step(mdb_walk_state_t *wsp)
541 {
542 	uintptr_t	mlp;
543 	struct memlist	ml;
544 
545 	mlp = wsp->walk_addr;
546 
547 	if (mlp == NULL)
548 		return (WALK_DONE);
549 
550 	if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) {
551 		mdb_warn("unable to read memlist at %#lx", mlp);
552 		return (WALK_ERR);
553 	}
554 
555 	wsp->walk_addr = (uintptr_t)ml.next;
556 
557 	return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata));
558 }
559 
560 int
561 memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
562 {
563 	struct memlist	ml;
564 
565 	if (!(flags & DCMD_ADDRSPEC)) {
566 		uintptr_t ptr;
567 		uint_t list = 0;
568 		int i;
569 		static const char *lists[] = {
570 			"phys_install",
571 			"phys_avail",
572 			"virt_avail"
573 		};
574 
575 		if (mdb_getopts(argc, argv,
576 		    'i', MDB_OPT_SETBITS, (1 << 0), &list,
577 		    'a', MDB_OPT_SETBITS, (1 << 1), &list,
578 		    'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc)
579 			return (DCMD_USAGE);
580 
581 		if (!list)
582 			list = 1;
583 
584 		for (i = 0; list; i++, list >>= 1) {
585 			if (!(list & 1))
586 				continue;
587 			if ((mdb_readvar(&ptr, lists[i]) == -1) ||
588 			    (ptr == NULL)) {
589 				mdb_warn("%s not found or invalid", lists[i]);
590 				return (DCMD_ERR);
591 			}
592 
593 			mdb_printf("%s:\n", lists[i]);
594 			if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL,
595 			    ptr) == -1) {
596 				mdb_warn("can't walk memlist");
597 				return (DCMD_ERR);
598 			}
599 		}
600 		return (DCMD_OK);
601 	}
602 
603 	if (DCMD_HDRSPEC(flags))
604 		mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE");
605 
606 	if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) {
607 		mdb_warn("can't read memlist at %#lx", addr);
608 		return (DCMD_ERR);
609 	}
610 
611 	mdb_printf("%0?lx %16llx %16llx\n", addr, ml.address, ml.size);
612 
613 	return (DCMD_OK);
614 }
615