1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <mdb/mdb_modapi.h> 29 #include <sys/types.h> 30 #include <vm/page.h> 31 #include <sys/thread.h> 32 #include <sys/swap.h> 33 #include <sys/memlist.h> 34 #if defined(__i386) || defined(__amd64) 35 #include <sys/balloon_impl.h> 36 #endif 37 38 /* 39 * Page walker. 40 * By default, this will walk all pages in the system. If given an 41 * address, it will walk all pages belonging to the vnode at that 42 * address. 43 */ 44 45 /* 46 * page_walk_data 47 * 48 * pw_hashleft is set to -1 when walking a vnode's pages, and holds the 49 * number of hash locations remaining in the page hash table when 50 * walking all pages. 51 * 52 * The astute reader will notice that pw_hashloc is only used when 53 * reading all pages (to hold a pointer to our location in the page 54 * hash table), and that pw_first is only used when reading the pages 55 * belonging to a particular vnode (to hold a pointer to the first 56 * page). While these could be combined to be a single pointer, they 57 * are left separate for clarity. 58 */ 59 typedef struct page_walk_data { 60 long pw_hashleft; 61 void **pw_hashloc; 62 uintptr_t pw_first; 63 } page_walk_data_t; 64 65 int 66 page_walk_init(mdb_walk_state_t *wsp) 67 { 68 page_walk_data_t *pwd; 69 void **ptr; 70 size_t hashsz; 71 vnode_t vn; 72 73 if (wsp->walk_addr == NULL) { 74 75 /* 76 * Walk all pages 77 */ 78 79 if ((mdb_readvar(&ptr, "page_hash") == -1) || 80 (mdb_readvar(&hashsz, "page_hashsz") == -1) || 81 (ptr == NULL) || (hashsz == 0)) { 82 mdb_warn("page_hash, page_hashsz not found or invalid"); 83 return (WALK_ERR); 84 } 85 86 /* 87 * Since we are walking all pages, initialize hashleft 88 * to be the remaining number of entries in the page 89 * hash. hashloc is set the start of the page hash 90 * table. Setting the walk address to 0 indicates that 91 * we aren't currently following a hash chain, and that 92 * we need to scan the page hash table for a page. 93 */ 94 pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP); 95 pwd->pw_hashleft = hashsz; 96 pwd->pw_hashloc = ptr; 97 wsp->walk_addr = 0; 98 } else { 99 100 /* 101 * Walk just this vnode 102 */ 103 104 if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) { 105 mdb_warn("unable to read vnode_t at %#lx", 106 wsp->walk_addr); 107 return (WALK_ERR); 108 } 109 110 /* 111 * We set hashleft to -1 to indicate that we are 112 * walking a vnode, and initialize first to 0 (it is 113 * used to terminate the walk, so it must not be set 114 * until after we have walked the first page). The 115 * walk address is set to the first page. 116 */ 117 pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP); 118 pwd->pw_hashleft = -1; 119 pwd->pw_first = 0; 120 121 wsp->walk_addr = (uintptr_t)vn.v_pages; 122 } 123 124 wsp->walk_data = pwd; 125 126 return (WALK_NEXT); 127 } 128 129 int 130 page_walk_step(mdb_walk_state_t *wsp) 131 { 132 page_walk_data_t *pwd = wsp->walk_data; 133 page_t page; 134 uintptr_t pp; 135 136 pp = wsp->walk_addr; 137 138 if (pwd->pw_hashleft < 0) { 139 140 /* We're walking a vnode's pages */ 141 142 /* 143 * If we don't have any pages to walk, we have come 144 * back around to the first one (we finished), or we 145 * can't read the page we're looking at, we are done. 146 */ 147 if (pp == NULL || pp == pwd->pw_first) 148 return (WALK_DONE); 149 if (mdb_vread(&page, sizeof (page_t), pp) == -1) { 150 mdb_warn("unable to read page_t at %#lx", pp); 151 return (WALK_ERR); 152 } 153 154 /* 155 * Set the walk address to the next page, and if the 156 * first page hasn't been set yet (i.e. we are on the 157 * first page), set it. 158 */ 159 wsp->walk_addr = (uintptr_t)page.p_vpnext; 160 if (pwd->pw_first == NULL) 161 pwd->pw_first = pp; 162 163 } else if (pwd->pw_hashleft > 0) { 164 165 /* We're walking all pages */ 166 167 /* 168 * If pp (the walk address) is NULL, we scan through 169 * the page hash table until we find a page. 170 */ 171 if (pp == NULL) { 172 173 /* 174 * Iterate through the page hash table until we 175 * find a page or reach the end. 176 */ 177 do { 178 if (mdb_vread(&pp, sizeof (uintptr_t), 179 (uintptr_t)pwd->pw_hashloc) == -1) { 180 mdb_warn("unable to read from %#p", 181 pwd->pw_hashloc); 182 return (WALK_ERR); 183 } 184 pwd->pw_hashleft--; 185 pwd->pw_hashloc++; 186 } while (pwd->pw_hashleft && (pp == NULL)); 187 188 /* 189 * We've reached the end; exit. 190 */ 191 if (pp == NULL) 192 return (WALK_DONE); 193 } 194 195 if (mdb_vread(&page, sizeof (page_t), pp) == -1) { 196 mdb_warn("unable to read page_t at %#lx", pp); 197 return (WALK_ERR); 198 } 199 200 /* 201 * Set the walk address to the next page. 202 */ 203 wsp->walk_addr = (uintptr_t)page.p_hash; 204 205 } else { 206 /* We've finished walking all pages. */ 207 return (WALK_DONE); 208 } 209 210 return (wsp->walk_callback(pp, &page, wsp->walk_cbdata)); 211 } 212 213 void 214 page_walk_fini(mdb_walk_state_t *wsp) 215 { 216 mdb_free(wsp->walk_data, sizeof (page_walk_data_t)); 217 } 218 219 /* Summary statistics of pages */ 220 typedef struct memstat { 221 struct vnode *ms_kvp; /* Cached address of kernel vnode */ 222 struct vnode *ms_zvp; /* Cached address of zio vnode */ 223 uint64_t ms_kmem; /* Pages of kernel memory */ 224 uint64_t ms_anon; /* Pages of anonymous memory */ 225 uint64_t ms_vnode; /* Pages of named (vnode) memory */ 226 uint64_t ms_exec; /* Pages of exec/library memory */ 227 uint64_t ms_cachelist; /* Pages on the cachelist (free) */ 228 uint64_t ms_total; /* Pages on page hash */ 229 } memstat_t; 230 231 #define MS_PP_ISKAS(pp, stats) \ 232 (((pp)->p_vnode == (stats)->ms_kvp) || \ 233 (((stats)->ms_zvp != NULL) && ((pp)->p_vnode == (stats)->ms_zvp))) 234 235 /* 236 * Summarize pages by type; called from page walker. 237 */ 238 239 /* ARGSUSED */ 240 static int 241 memstat_callback(page_t *page, page_t *pp, memstat_t *stats) 242 { 243 struct vnode vn, *vp; 244 uintptr_t ptr; 245 246 /* read page's vnode pointer */ 247 if ((ptr = (uintptr_t)(pp->p_vnode)) != NULL) { 248 if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) { 249 mdb_warn("unable to read vnode_t at %#lx", 250 ptr); 251 return (WALK_ERR); 252 } 253 vp = &vn; 254 } else 255 vp = NULL; 256 257 if (PP_ISFREE(pp)) 258 stats->ms_cachelist++; 259 else if (vp && IS_SWAPFSVP(vp)) 260 stats->ms_anon++; 261 else if (MS_PP_ISKAS(pp, stats)) 262 stats->ms_kmem++; 263 else if (vp && (((vp)->v_flag & VVMEXEC)) != 0) 264 stats->ms_exec++; 265 else 266 stats->ms_vnode++; 267 268 stats->ms_total++; 269 270 return (WALK_NEXT); 271 } 272 273 /* ARGSUSED */ 274 int 275 memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 276 { 277 ulong_t pagesize; 278 pgcnt_t total_pages, physmem; 279 ulong_t freemem; 280 memstat_t stats; 281 memstat_t unused_stats; 282 GElf_Sym sym; 283 #if defined(__i386) || defined(__amd64) 284 bln_stats_t bln_stats; 285 ssize_t bln_size; 286 #endif 287 288 bzero(&stats, sizeof (memstat_t)); 289 bzero(&unused_stats, sizeof (memstat_t)); 290 291 if (argc != 0 || (flags & DCMD_ADDRSPEC)) 292 return (DCMD_USAGE); 293 294 /* Grab base page size */ 295 if (mdb_readvar(&pagesize, "_pagesize") == -1) { 296 mdb_warn("unable to read _pagesize"); 297 return (DCMD_ERR); 298 } 299 300 /* Total physical memory */ 301 if (mdb_readvar(&total_pages, "total_pages") == -1) { 302 mdb_warn("unable to read total_pages"); 303 return (DCMD_ERR); 304 } 305 306 /* Artificially limited memory */ 307 if (mdb_readvar(&physmem, "physmem") == -1) { 308 mdb_warn("unable to read physmem"); 309 return (DCMD_ERR); 310 } 311 312 /* read kernel vnode pointer */ 313 if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvp", 314 (GElf_Sym *)&sym) == -1) { 315 mdb_warn("unable to read kvp"); 316 return (DCMD_ERR); 317 } 318 319 stats.ms_kvp = (struct vnode *)(uintptr_t)sym.st_value; 320 321 /* 322 * Read the zio vnode pointer. It may not exist on all kernels, so it 323 * it isn't found, it's not a fatal error. 324 */ 325 if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "zvp", 326 (GElf_Sym *)&sym) == -1) { 327 stats.ms_zvp = NULL; 328 } else { 329 stats.ms_zvp = (struct vnode *)(uintptr_t)sym.st_value; 330 } 331 332 /* Walk page structures, summarizing usage */ 333 if (mdb_walk("page", (mdb_walk_cb_t)memstat_callback, 334 &stats) == -1) { 335 mdb_warn("can't walk pages"); 336 return (DCMD_ERR); 337 } 338 339 /* read unused pages vnode */ 340 if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp", 341 (GElf_Sym *)&sym) == -1) { 342 mdb_warn("unable to read unused_pages_vp"); 343 return (DCMD_ERR); 344 } 345 346 unused_stats.ms_kvp = (struct vnode *)(uintptr_t)sym.st_value; 347 348 /* Find unused pages */ 349 if (mdb_walk("page", (mdb_walk_cb_t)memstat_callback, 350 &unused_stats) == -1) { 351 mdb_warn("can't walk pages"); 352 return (DCMD_ERR); 353 } 354 355 /* 356 * If physmem != total_pages, then the administrator has limited the 357 * number of pages available in the system. In order to account for 358 * this, we reduce the amount normally attributed to the page cache. 359 */ 360 stats.ms_vnode -= unused_stats.ms_kmem; 361 stats.ms_total -= unused_stats.ms_kmem; 362 363 #define MS_PCT_TOTAL(x) ((ulong_t)((((5 * total_pages) + ((x) * 1000ull))) / \ 364 ((physmem) * 10))) 365 366 mdb_printf("Page Summary Pages MB" 367 " %%Tot\n"); 368 mdb_printf("------------ ---------------- ----------------" 369 " ----\n"); 370 mdb_printf("Kernel %16llu %16llu %3lu%%\n", 371 stats.ms_kmem, 372 (uint64_t)stats.ms_kmem * pagesize / (1024 * 1024), 373 MS_PCT_TOTAL(stats.ms_kmem)); 374 mdb_printf("Anon %16llu %16llu %3lu%%\n", 375 stats.ms_anon, 376 (uint64_t)stats.ms_anon * pagesize / (1024 * 1024), 377 MS_PCT_TOTAL(stats.ms_anon)); 378 mdb_printf("Exec and libs %16llu %16llu %3lu%%\n", 379 stats.ms_exec, 380 (uint64_t)stats.ms_exec * pagesize / (1024 * 1024), 381 MS_PCT_TOTAL(stats.ms_exec)); 382 mdb_printf("Page cache %16llu %16llu %3lu%%\n", 383 stats.ms_vnode, 384 (uint64_t)stats.ms_vnode * pagesize / (1024 * 1024), 385 MS_PCT_TOTAL(stats.ms_vnode)); 386 mdb_printf("Free (cachelist) %16llu %16llu %3lu%%\n", 387 stats.ms_cachelist, 388 (uint64_t)stats.ms_cachelist * pagesize / (1024 * 1024), 389 MS_PCT_TOTAL(stats.ms_cachelist)); 390 391 /* 392 * occasionally, we double count pages above. To avoid printing 393 * absurdly large values for freemem, we clamp it at zero. 394 */ 395 if (physmem > stats.ms_total) 396 freemem = physmem - stats.ms_total; 397 else 398 freemem = 0; 399 400 #if defined(__i386) || defined(__amd64) 401 /* Are we running under Xen? If so, get balloon memory usage. */ 402 if ((bln_size = mdb_readvar(&bln_stats, "bln_stats")) != -1) { 403 if (freemem > bln_stats.bln_hv_pages) 404 freemem -= bln_stats.bln_hv_pages; 405 else 406 freemem = 0; 407 } 408 #endif 409 410 mdb_printf("Free (freelist) %16lu %16llu %3lu%%\n", freemem, 411 (uint64_t)freemem * pagesize / (1024 * 1024), 412 MS_PCT_TOTAL(freemem)); 413 414 #if defined(__i386) || defined(__amd64) 415 if (bln_size != -1) { 416 mdb_printf("Balloon %16lu %16llu %3lu%%\n", 417 bln_stats.bln_hv_pages, 418 (uint64_t)bln_stats.bln_hv_pages * pagesize / (1024 * 1024), 419 MS_PCT_TOTAL(bln_stats.bln_hv_pages)); 420 } 421 #endif 422 423 mdb_printf("\nTotal %16lu %16lu\n", 424 physmem, 425 (uint64_t)physmem * pagesize / (1024 * 1024)); 426 427 if (physmem != total_pages) { 428 mdb_printf("Physical %16lu %16lu\n", 429 total_pages, 430 (uint64_t)total_pages * pagesize / (1024 * 1024)); 431 } 432 433 #undef MS_PCT_TOTAL 434 435 return (DCMD_OK); 436 } 437 438 int 439 page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 440 { 441 page_t p; 442 443 if (!(flags & DCMD_ADDRSPEC)) { 444 if (mdb_walk_dcmd("page", "page", argc, argv) == -1) { 445 mdb_warn("can't walk pages"); 446 return (DCMD_ERR); 447 } 448 return (DCMD_OK); 449 } 450 451 if (DCMD_HDRSPEC(flags)) { 452 mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n", 453 "PAGE", "VNODE", "OFFSET", "SELOCK", 454 "LCT", "COW", "IO", "FS", "ST"); 455 } 456 457 if (mdb_vread(&p, sizeof (page_t), addr) == -1) { 458 mdb_warn("can't read page_t at %#lx", addr); 459 return (DCMD_ERR); 460 } 461 462 mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n", 463 addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt, 464 p.p_iolock_state, p.p_fsdata, p.p_state); 465 466 return (DCMD_OK); 467 } 468 469 int 470 swap_walk_init(mdb_walk_state_t *wsp) 471 { 472 void *ptr; 473 474 if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) { 475 mdb_warn("swapinfo not found or invalid"); 476 return (WALK_ERR); 477 } 478 479 wsp->walk_addr = (uintptr_t)ptr; 480 481 return (WALK_NEXT); 482 } 483 484 int 485 swap_walk_step(mdb_walk_state_t *wsp) 486 { 487 uintptr_t sip; 488 struct swapinfo si; 489 490 sip = wsp->walk_addr; 491 492 if (sip == NULL) 493 return (WALK_DONE); 494 495 if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) { 496 mdb_warn("unable to read swapinfo at %#lx", sip); 497 return (WALK_ERR); 498 } 499 500 wsp->walk_addr = (uintptr_t)si.si_next; 501 502 return (wsp->walk_callback(sip, &si, wsp->walk_cbdata)); 503 } 504 505 int 506 swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 507 { 508 struct swapinfo si; 509 char *name; 510 511 if (!(flags & DCMD_ADDRSPEC)) { 512 if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) { 513 mdb_warn("can't walk swapinfo"); 514 return (DCMD_ERR); 515 } 516 return (DCMD_OK); 517 } 518 519 if (DCMD_HDRSPEC(flags)) { 520 mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n", 521 "ADDR", "VNODE", "PAGES", "FREE", "NAME"); 522 } 523 524 if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) { 525 mdb_warn("can't read swapinfo at %#lx", addr); 526 return (DCMD_ERR); 527 } 528 529 name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC); 530 if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1) 531 name = "*error*"; 532 533 mdb_printf("%0?lx %?p %9d %9d %s\n", 534 addr, si.si_vp, si.si_npgs, si.si_nfpgs, name); 535 536 return (DCMD_OK); 537 } 538 539 int 540 memlist_walk_step(mdb_walk_state_t *wsp) 541 { 542 uintptr_t mlp; 543 struct memlist ml; 544 545 mlp = wsp->walk_addr; 546 547 if (mlp == NULL) 548 return (WALK_DONE); 549 550 if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) { 551 mdb_warn("unable to read memlist at %#lx", mlp); 552 return (WALK_ERR); 553 } 554 555 wsp->walk_addr = (uintptr_t)ml.next; 556 557 return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata)); 558 } 559 560 int 561 memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 562 { 563 struct memlist ml; 564 565 if (!(flags & DCMD_ADDRSPEC)) { 566 uintptr_t ptr; 567 uint_t list = 0; 568 int i; 569 static const char *lists[] = { 570 "phys_install", 571 "phys_avail", 572 "virt_avail" 573 }; 574 575 if (mdb_getopts(argc, argv, 576 'i', MDB_OPT_SETBITS, (1 << 0), &list, 577 'a', MDB_OPT_SETBITS, (1 << 1), &list, 578 'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc) 579 return (DCMD_USAGE); 580 581 if (!list) 582 list = 1; 583 584 for (i = 0; list; i++, list >>= 1) { 585 if (!(list & 1)) 586 continue; 587 if ((mdb_readvar(&ptr, lists[i]) == -1) || 588 (ptr == NULL)) { 589 mdb_warn("%s not found or invalid", lists[i]); 590 return (DCMD_ERR); 591 } 592 593 mdb_printf("%s:\n", lists[i]); 594 if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL, 595 ptr) == -1) { 596 mdb_warn("can't walk memlist"); 597 return (DCMD_ERR); 598 } 599 } 600 return (DCMD_OK); 601 } 602 603 if (DCMD_HDRSPEC(flags)) 604 mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE"); 605 606 if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) { 607 mdb_warn("can't read memlist at %#lx", addr); 608 return (DCMD_ERR); 609 } 610 611 mdb_printf("%0?lx %16llx %16llx\n", addr, ml.address, ml.size); 612 613 return (DCMD_OK); 614 } 615