xref: /illumos-gate/usr/src/cmd/mdb/common/modules/genunix/memory.c (revision 62c8caf3fac65817982e780c1efa988846153bf0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <mdb/mdb_modapi.h>
27 #include <sys/types.h>
28 #include <vm/page.h>
29 #include <sys/thread.h>
30 #include <sys/swap.h>
31 #include <sys/memlist.h>
32 #if defined(__i386) || defined(__amd64)
33 #include <sys/balloon_impl.h>
34 #endif
35 
36 /*
37  * Page walker.
38  * By default, this will walk all pages in the system.  If given an
39  * address, it will walk all pages belonging to the vnode at that
40  * address.
41  */
42 
43 /*
44  * page_walk_data
45  *
46  * pw_hashleft is set to -1 when walking a vnode's pages, and holds the
47  * number of hash locations remaining in the page hash table when
48  * walking all pages.
49  *
50  * The astute reader will notice that pw_hashloc is only used when
51  * reading all pages (to hold a pointer to our location in the page
52  * hash table), and that pw_first is only used when reading the pages
53  * belonging to a particular vnode (to hold a pointer to the first
54  * page).  While these could be combined to be a single pointer, they
55  * are left separate for clarity.
56  */
57 typedef struct page_walk_data {
58 	long		pw_hashleft;
59 	void		**pw_hashloc;
60 	uintptr_t	pw_first;
61 } page_walk_data_t;
62 
63 int
64 page_walk_init(mdb_walk_state_t *wsp)
65 {
66 	page_walk_data_t	*pwd;
67 	void	**ptr;
68 	size_t	hashsz;
69 	vnode_t	vn;
70 
71 	if (wsp->walk_addr == NULL) {
72 
73 		/*
74 		 * Walk all pages
75 		 */
76 
77 		if ((mdb_readvar(&ptr, "page_hash") == -1) ||
78 		    (mdb_readvar(&hashsz, "page_hashsz") == -1) ||
79 		    (ptr == NULL) || (hashsz == 0)) {
80 			mdb_warn("page_hash, page_hashsz not found or invalid");
81 			return (WALK_ERR);
82 		}
83 
84 		/*
85 		 * Since we are walking all pages, initialize hashleft
86 		 * to be the remaining number of entries in the page
87 		 * hash.  hashloc is set the start of the page hash
88 		 * table.  Setting the walk address to 0 indicates that
89 		 * we aren't currently following a hash chain, and that
90 		 * we need to scan the page hash table for a page.
91 		 */
92 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
93 		pwd->pw_hashleft = hashsz;
94 		pwd->pw_hashloc = ptr;
95 		wsp->walk_addr = 0;
96 	} else {
97 
98 		/*
99 		 * Walk just this vnode
100 		 */
101 
102 		if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) {
103 			mdb_warn("unable to read vnode_t at %#lx",
104 			    wsp->walk_addr);
105 			return (WALK_ERR);
106 		}
107 
108 		/*
109 		 * We set hashleft to -1 to indicate that we are
110 		 * walking a vnode, and initialize first to 0 (it is
111 		 * used to terminate the walk, so it must not be set
112 		 * until after we have walked the first page).  The
113 		 * walk address is set to the first page.
114 		 */
115 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
116 		pwd->pw_hashleft = -1;
117 		pwd->pw_first = 0;
118 
119 		wsp->walk_addr = (uintptr_t)vn.v_pages;
120 	}
121 
122 	wsp->walk_data = pwd;
123 
124 	return (WALK_NEXT);
125 }
126 
127 int
128 page_walk_step(mdb_walk_state_t *wsp)
129 {
130 	page_walk_data_t	*pwd = wsp->walk_data;
131 	page_t		page;
132 	uintptr_t	pp;
133 
134 	pp = wsp->walk_addr;
135 
136 	if (pwd->pw_hashleft < 0) {
137 
138 		/* We're walking a vnode's pages */
139 
140 		/*
141 		 * If we don't have any pages to walk, we have come
142 		 * back around to the first one (we finished), or we
143 		 * can't read the page we're looking at, we are done.
144 		 */
145 		if (pp == NULL || pp == pwd->pw_first)
146 			return (WALK_DONE);
147 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
148 			mdb_warn("unable to read page_t at %#lx", pp);
149 			return (WALK_ERR);
150 		}
151 
152 		/*
153 		 * Set the walk address to the next page, and if the
154 		 * first page hasn't been set yet (i.e. we are on the
155 		 * first page), set it.
156 		 */
157 		wsp->walk_addr = (uintptr_t)page.p_vpnext;
158 		if (pwd->pw_first == NULL)
159 			pwd->pw_first = pp;
160 
161 	} else if (pwd->pw_hashleft > 0) {
162 
163 		/* We're walking all pages */
164 
165 		/*
166 		 * If pp (the walk address) is NULL, we scan through
167 		 * the page hash table until we find a page.
168 		 */
169 		if (pp == NULL) {
170 
171 			/*
172 			 * Iterate through the page hash table until we
173 			 * find a page or reach the end.
174 			 */
175 			do {
176 				if (mdb_vread(&pp, sizeof (uintptr_t),
177 				    (uintptr_t)pwd->pw_hashloc) == -1) {
178 					mdb_warn("unable to read from %#p",
179 					    pwd->pw_hashloc);
180 					return (WALK_ERR);
181 				}
182 				pwd->pw_hashleft--;
183 				pwd->pw_hashloc++;
184 			} while (pwd->pw_hashleft && (pp == NULL));
185 
186 			/*
187 			 * We've reached the end; exit.
188 			 */
189 			if (pp == NULL)
190 				return (WALK_DONE);
191 		}
192 
193 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
194 			mdb_warn("unable to read page_t at %#lx", pp);
195 			return (WALK_ERR);
196 		}
197 
198 		/*
199 		 * Set the walk address to the next page.
200 		 */
201 		wsp->walk_addr = (uintptr_t)page.p_hash;
202 
203 	} else {
204 		/* We've finished walking all pages. */
205 		return (WALK_DONE);
206 	}
207 
208 	return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
209 }
210 
211 void
212 page_walk_fini(mdb_walk_state_t *wsp)
213 {
214 	mdb_free(wsp->walk_data, sizeof (page_walk_data_t));
215 }
216 
217 /* Summary statistics of pages */
218 typedef struct memstat {
219 	struct vnode    *ms_kvp;	/* Cached address of kernel vnode */
220 	struct vnode    *ms_zvp;	/* Cached address of zio vnode    */
221 	uint64_t	ms_kmem;	/* Pages of kernel memory	  */
222 	uint64_t	ms_zfs_data;	/* Pages of zfs data		  */
223 	uint64_t	ms_anon;	/* Pages of anonymous memory	  */
224 	uint64_t	ms_vnode;	/* Pages of named (vnode) memory  */
225 	uint64_t	ms_exec;	/* Pages of exec/library memory	  */
226 	uint64_t	ms_cachelist;	/* Pages on the cachelist (free)  */
227 	uint64_t	ms_total;	/* Pages on page hash		  */
228 } memstat_t;
229 
230 #define	MS_PP_ISKAS(pp, stats)				\
231 	((pp)->p_vnode == (stats)->ms_kvp)
232 
233 #define	MS_PP_ISZFS_DATA(pp, stats)			\
234 	(((stats)->ms_zvp != NULL) && ((pp)->p_vnode == (stats)->ms_zvp))
235 
236 /*
237  * Summarize pages by type; called from page walker.
238  */
239 
240 /* ARGSUSED */
241 static int
242 memstat_callback(page_t *page, page_t *pp, memstat_t *stats)
243 {
244 	struct vnode vn, *vp;
245 	uintptr_t ptr;
246 
247 	/* read page's vnode pointer */
248 	if ((ptr = (uintptr_t)(pp->p_vnode)) != NULL) {
249 		if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) {
250 			mdb_warn("unable to read vnode_t at %#lx",
251 			    ptr);
252 			return (WALK_ERR);
253 		}
254 		vp = &vn;
255 	} else
256 		vp = NULL;
257 
258 	if (PP_ISFREE(pp))
259 		stats->ms_cachelist++;
260 	else if (vp && IS_SWAPFSVP(vp))
261 		stats->ms_anon++;
262 	else if (MS_PP_ISZFS_DATA(pp, stats))
263 		stats->ms_zfs_data++;
264 	else if (MS_PP_ISKAS(pp, stats))
265 		stats->ms_kmem++;
266 	else if (vp && (((vp)->v_flag & VVMEXEC)) != 0)
267 		stats->ms_exec++;
268 	else
269 		stats->ms_vnode++;
270 
271 	stats->ms_total++;
272 
273 	return (WALK_NEXT);
274 }
275 
276 /* ARGSUSED */
277 int
278 memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
279 {
280 	ulong_t pagesize;
281 	pgcnt_t total_pages, physmem;
282 	ulong_t freemem;
283 	memstat_t stats;
284 	memstat_t unused_stats;
285 	GElf_Sym sym;
286 #if defined(__i386) || defined(__amd64)
287 	bln_stats_t bln_stats;
288 	ssize_t bln_size;
289 #endif
290 
291 	bzero(&stats, sizeof (memstat_t));
292 	bzero(&unused_stats, sizeof (memstat_t));
293 
294 	if (argc != 0 || (flags & DCMD_ADDRSPEC))
295 		return (DCMD_USAGE);
296 
297 	/* Grab base page size */
298 	if (mdb_readvar(&pagesize, "_pagesize") == -1) {
299 		mdb_warn("unable to read _pagesize");
300 		return (DCMD_ERR);
301 	}
302 
303 	/* Total physical memory */
304 	if (mdb_readvar(&total_pages, "total_pages") == -1) {
305 		mdb_warn("unable to read total_pages");
306 		return (DCMD_ERR);
307 	}
308 
309 	/* Artificially limited memory */
310 	if (mdb_readvar(&physmem, "physmem") == -1) {
311 		mdb_warn("unable to read physmem");
312 		return (DCMD_ERR);
313 	}
314 
315 	/* read kernel vnode pointer */
316 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvp",
317 	    (GElf_Sym *)&sym) == -1) {
318 		mdb_warn("unable to read kvp");
319 		return (DCMD_ERR);
320 	}
321 
322 	stats.ms_kvp = (struct vnode *)(uintptr_t)sym.st_value;
323 
324 	/*
325 	 * Read the zio vnode pointer.  It may not exist on all kernels, so it
326 	 * it isn't found, it's not a fatal error.
327 	 */
328 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "zvp",
329 	    (GElf_Sym *)&sym) == -1) {
330 		stats.ms_zvp = NULL;
331 	} else {
332 		stats.ms_zvp = (struct vnode *)(uintptr_t)sym.st_value;
333 	}
334 
335 	/* Walk page structures, summarizing usage */
336 	if (mdb_walk("page", (mdb_walk_cb_t)memstat_callback,
337 	    &stats) == -1) {
338 		mdb_warn("can't walk pages");
339 		return (DCMD_ERR);
340 	}
341 
342 	/* read unused pages vnode */
343 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp",
344 	    (GElf_Sym *)&sym) == -1) {
345 		mdb_warn("unable to read unused_pages_vp");
346 		return (DCMD_ERR);
347 	}
348 
349 	unused_stats.ms_kvp = (struct vnode *)(uintptr_t)sym.st_value;
350 
351 	/* Find unused pages */
352 	if (mdb_walk("page", (mdb_walk_cb_t)memstat_callback,
353 	    &unused_stats) == -1) {
354 		mdb_warn("can't walk pages");
355 		return (DCMD_ERR);
356 	}
357 
358 	/*
359 	 * If physmem != total_pages, then the administrator has limited the
360 	 * number of pages available in the system.  In order to account for
361 	 * this, we reduce the amount normally attributed to the page cache.
362 	 */
363 	stats.ms_vnode -= unused_stats.ms_kmem;
364 	stats.ms_total -= unused_stats.ms_kmem;
365 
366 #define	MS_PCT_TOTAL(x)	((ulong_t)((((5 * total_pages) + ((x) * 1000ull))) / \
367 		((physmem) * 10)))
368 
369 	mdb_printf("Page Summary                Pages                MB"
370 	    "  %%Tot\n");
371 	mdb_printf("------------     ----------------  ----------------"
372 	    "  ----\n");
373 	mdb_printf("Kernel           %16llu  %16llu  %3lu%%\n",
374 	    stats.ms_kmem,
375 	    (uint64_t)stats.ms_kmem * pagesize / (1024 * 1024),
376 	    MS_PCT_TOTAL(stats.ms_kmem));
377 
378 	if (stats.ms_zfs_data != 0)
379 		mdb_printf("ZFS File Data    %16llu  %16llu  %3lu%%\n",
380 		    stats.ms_zfs_data,
381 		    (uint64_t)stats.ms_zfs_data * pagesize / (1024 * 1024),
382 		    MS_PCT_TOTAL(stats.ms_zfs_data));
383 
384 	mdb_printf("Anon             %16llu  %16llu  %3lu%%\n",
385 	    stats.ms_anon,
386 	    (uint64_t)stats.ms_anon * pagesize / (1024 * 1024),
387 	    MS_PCT_TOTAL(stats.ms_anon));
388 	mdb_printf("Exec and libs    %16llu  %16llu  %3lu%%\n",
389 	    stats.ms_exec,
390 	    (uint64_t)stats.ms_exec * pagesize / (1024 * 1024),
391 	    MS_PCT_TOTAL(stats.ms_exec));
392 	mdb_printf("Page cache       %16llu  %16llu  %3lu%%\n",
393 	    stats.ms_vnode,
394 	    (uint64_t)stats.ms_vnode * pagesize / (1024 * 1024),
395 	    MS_PCT_TOTAL(stats.ms_vnode));
396 	mdb_printf("Free (cachelist) %16llu  %16llu  %3lu%%\n",
397 	    stats.ms_cachelist,
398 	    (uint64_t)stats.ms_cachelist * pagesize / (1024 * 1024),
399 	    MS_PCT_TOTAL(stats.ms_cachelist));
400 
401 	/*
402 	 * occasionally, we double count pages above.  To avoid printing
403 	 * absurdly large values for freemem, we clamp it at zero.
404 	 */
405 	if (physmem > stats.ms_total)
406 		freemem = physmem - stats.ms_total;
407 	else
408 		freemem = 0;
409 
410 #if defined(__i386) || defined(__amd64)
411 	/* Are we running under Xen?  If so, get balloon memory usage. */
412 	if ((bln_size = mdb_readvar(&bln_stats, "bln_stats")) != -1) {
413 		if (freemem > bln_stats.bln_hv_pages)
414 			freemem -= bln_stats.bln_hv_pages;
415 		else
416 			freemem = 0;
417 	}
418 #endif
419 
420 	mdb_printf("Free (freelist)  %16lu  %16llu  %3lu%%\n", freemem,
421 	    (uint64_t)freemem * pagesize / (1024 * 1024),
422 	    MS_PCT_TOTAL(freemem));
423 
424 #if defined(__i386) || defined(__amd64)
425 	if (bln_size != -1) {
426 		mdb_printf("Balloon          %16lu  %16llu  %3lu%%\n",
427 		    bln_stats.bln_hv_pages,
428 		    (uint64_t)bln_stats.bln_hv_pages * pagesize / (1024 * 1024),
429 		    MS_PCT_TOTAL(bln_stats.bln_hv_pages));
430 	}
431 #endif
432 
433 	mdb_printf("\nTotal            %16lu  %16lu\n",
434 	    physmem,
435 	    (uint64_t)physmem * pagesize / (1024 * 1024));
436 
437 	if (physmem != total_pages) {
438 		mdb_printf("Physical         %16lu  %16lu\n",
439 		    total_pages,
440 		    (uint64_t)total_pages * pagesize / (1024 * 1024));
441 	}
442 
443 #undef MS_PCT_TOTAL
444 
445 	return (DCMD_OK);
446 }
447 
448 int
449 page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
450 {
451 	page_t	p;
452 
453 	if (!(flags & DCMD_ADDRSPEC)) {
454 		if (mdb_walk_dcmd("page", "page", argc, argv) == -1) {
455 			mdb_warn("can't walk pages");
456 			return (DCMD_ERR);
457 		}
458 		return (DCMD_OK);
459 	}
460 
461 	if (DCMD_HDRSPEC(flags)) {
462 		mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n",
463 		    "PAGE", "VNODE", "OFFSET", "SELOCK",
464 		    "LCT", "COW", "IO", "FS", "ST");
465 	}
466 
467 	if (mdb_vread(&p, sizeof (page_t), addr) == -1) {
468 		mdb_warn("can't read page_t at %#lx", addr);
469 		return (DCMD_ERR);
470 	}
471 
472 	mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n",
473 	    addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt,
474 	    p.p_iolock_state, p.p_fsdata, p.p_state);
475 
476 	return (DCMD_OK);
477 }
478 
479 int
480 swap_walk_init(mdb_walk_state_t *wsp)
481 {
482 	void	*ptr;
483 
484 	if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) {
485 		mdb_warn("swapinfo not found or invalid");
486 		return (WALK_ERR);
487 	}
488 
489 	wsp->walk_addr = (uintptr_t)ptr;
490 
491 	return (WALK_NEXT);
492 }
493 
494 int
495 swap_walk_step(mdb_walk_state_t *wsp)
496 {
497 	uintptr_t	sip;
498 	struct swapinfo	si;
499 
500 	sip = wsp->walk_addr;
501 
502 	if (sip == NULL)
503 		return (WALK_DONE);
504 
505 	if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) {
506 		mdb_warn("unable to read swapinfo at %#lx", sip);
507 		return (WALK_ERR);
508 	}
509 
510 	wsp->walk_addr = (uintptr_t)si.si_next;
511 
512 	return (wsp->walk_callback(sip, &si, wsp->walk_cbdata));
513 }
514 
515 int
516 swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
517 {
518 	struct swapinfo	si;
519 	char		*name;
520 
521 	if (!(flags & DCMD_ADDRSPEC)) {
522 		if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) {
523 			mdb_warn("can't walk swapinfo");
524 			return (DCMD_ERR);
525 		}
526 		return (DCMD_OK);
527 	}
528 
529 	if (DCMD_HDRSPEC(flags)) {
530 		mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n",
531 		    "ADDR", "VNODE", "PAGES", "FREE", "NAME");
532 	}
533 
534 	if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) {
535 		mdb_warn("can't read swapinfo at %#lx", addr);
536 		return (DCMD_ERR);
537 	}
538 
539 	name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC);
540 	if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1)
541 		name = "*error*";
542 
543 	mdb_printf("%0?lx %?p %9d %9d %s\n",
544 	    addr, si.si_vp, si.si_npgs, si.si_nfpgs, name);
545 
546 	return (DCMD_OK);
547 }
548 
549 int
550 memlist_walk_step(mdb_walk_state_t *wsp)
551 {
552 	uintptr_t	mlp;
553 	struct memlist	ml;
554 
555 	mlp = wsp->walk_addr;
556 
557 	if (mlp == NULL)
558 		return (WALK_DONE);
559 
560 	if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) {
561 		mdb_warn("unable to read memlist at %#lx", mlp);
562 		return (WALK_ERR);
563 	}
564 
565 	wsp->walk_addr = (uintptr_t)ml.next;
566 
567 	return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata));
568 }
569 
570 int
571 memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
572 {
573 	struct memlist	ml;
574 
575 	if (!(flags & DCMD_ADDRSPEC)) {
576 		uintptr_t ptr;
577 		uint_t list = 0;
578 		int i;
579 		static const char *lists[] = {
580 			"phys_install",
581 			"phys_avail",
582 			"virt_avail"
583 		};
584 
585 		if (mdb_getopts(argc, argv,
586 		    'i', MDB_OPT_SETBITS, (1 << 0), &list,
587 		    'a', MDB_OPT_SETBITS, (1 << 1), &list,
588 		    'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc)
589 			return (DCMD_USAGE);
590 
591 		if (!list)
592 			list = 1;
593 
594 		for (i = 0; list; i++, list >>= 1) {
595 			if (!(list & 1))
596 				continue;
597 			if ((mdb_readvar(&ptr, lists[i]) == -1) ||
598 			    (ptr == NULL)) {
599 				mdb_warn("%s not found or invalid", lists[i]);
600 				return (DCMD_ERR);
601 			}
602 
603 			mdb_printf("%s:\n", lists[i]);
604 			if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL,
605 			    ptr) == -1) {
606 				mdb_warn("can't walk memlist");
607 				return (DCMD_ERR);
608 			}
609 		}
610 		return (DCMD_OK);
611 	}
612 
613 	if (DCMD_HDRSPEC(flags))
614 		mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE");
615 
616 	if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) {
617 		mdb_warn("can't read memlist at %#lx", addr);
618 		return (DCMD_ERR);
619 	}
620 
621 	mdb_printf("%0?lx %16llx %16llx\n", addr, ml.address, ml.size);
622 
623 	return (DCMD_OK);
624 }
625