1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <mdb/mdb_modapi.h> 27 #include <sys/types.h> 28 #include <vm/page.h> 29 #include <sys/thread.h> 30 #include <sys/swap.h> 31 #include <sys/memlist.h> 32 #if defined(__i386) || defined(__amd64) 33 #include <sys/balloon_impl.h> 34 #endif 35 36 /* 37 * Page walker. 38 * By default, this will walk all pages in the system. If given an 39 * address, it will walk all pages belonging to the vnode at that 40 * address. 41 */ 42 43 /* 44 * page_walk_data 45 * 46 * pw_hashleft is set to -1 when walking a vnode's pages, and holds the 47 * number of hash locations remaining in the page hash table when 48 * walking all pages. 49 * 50 * The astute reader will notice that pw_hashloc is only used when 51 * reading all pages (to hold a pointer to our location in the page 52 * hash table), and that pw_first is only used when reading the pages 53 * belonging to a particular vnode (to hold a pointer to the first 54 * page). While these could be combined to be a single pointer, they 55 * are left separate for clarity. 56 */ 57 typedef struct page_walk_data { 58 long pw_hashleft; 59 void **pw_hashloc; 60 uintptr_t pw_first; 61 } page_walk_data_t; 62 63 int 64 page_walk_init(mdb_walk_state_t *wsp) 65 { 66 page_walk_data_t *pwd; 67 void **ptr; 68 size_t hashsz; 69 vnode_t vn; 70 71 if (wsp->walk_addr == NULL) { 72 73 /* 74 * Walk all pages 75 */ 76 77 if ((mdb_readvar(&ptr, "page_hash") == -1) || 78 (mdb_readvar(&hashsz, "page_hashsz") == -1) || 79 (ptr == NULL) || (hashsz == 0)) { 80 mdb_warn("page_hash, page_hashsz not found or invalid"); 81 return (WALK_ERR); 82 } 83 84 /* 85 * Since we are walking all pages, initialize hashleft 86 * to be the remaining number of entries in the page 87 * hash. hashloc is set the start of the page hash 88 * table. Setting the walk address to 0 indicates that 89 * we aren't currently following a hash chain, and that 90 * we need to scan the page hash table for a page. 91 */ 92 pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP); 93 pwd->pw_hashleft = hashsz; 94 pwd->pw_hashloc = ptr; 95 wsp->walk_addr = 0; 96 } else { 97 98 /* 99 * Walk just this vnode 100 */ 101 102 if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) { 103 mdb_warn("unable to read vnode_t at %#lx", 104 wsp->walk_addr); 105 return (WALK_ERR); 106 } 107 108 /* 109 * We set hashleft to -1 to indicate that we are 110 * walking a vnode, and initialize first to 0 (it is 111 * used to terminate the walk, so it must not be set 112 * until after we have walked the first page). The 113 * walk address is set to the first page. 114 */ 115 pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP); 116 pwd->pw_hashleft = -1; 117 pwd->pw_first = 0; 118 119 wsp->walk_addr = (uintptr_t)vn.v_pages; 120 } 121 122 wsp->walk_data = pwd; 123 124 return (WALK_NEXT); 125 } 126 127 int 128 page_walk_step(mdb_walk_state_t *wsp) 129 { 130 page_walk_data_t *pwd = wsp->walk_data; 131 page_t page; 132 uintptr_t pp; 133 134 pp = wsp->walk_addr; 135 136 if (pwd->pw_hashleft < 0) { 137 138 /* We're walking a vnode's pages */ 139 140 /* 141 * If we don't have any pages to walk, we have come 142 * back around to the first one (we finished), or we 143 * can't read the page we're looking at, we are done. 144 */ 145 if (pp == NULL || pp == pwd->pw_first) 146 return (WALK_DONE); 147 if (mdb_vread(&page, sizeof (page_t), pp) == -1) { 148 mdb_warn("unable to read page_t at %#lx", pp); 149 return (WALK_ERR); 150 } 151 152 /* 153 * Set the walk address to the next page, and if the 154 * first page hasn't been set yet (i.e. we are on the 155 * first page), set it. 156 */ 157 wsp->walk_addr = (uintptr_t)page.p_vpnext; 158 if (pwd->pw_first == NULL) 159 pwd->pw_first = pp; 160 161 } else if (pwd->pw_hashleft > 0) { 162 163 /* We're walking all pages */ 164 165 /* 166 * If pp (the walk address) is NULL, we scan through 167 * the page hash table until we find a page. 168 */ 169 if (pp == NULL) { 170 171 /* 172 * Iterate through the page hash table until we 173 * find a page or reach the end. 174 */ 175 do { 176 if (mdb_vread(&pp, sizeof (uintptr_t), 177 (uintptr_t)pwd->pw_hashloc) == -1) { 178 mdb_warn("unable to read from %#p", 179 pwd->pw_hashloc); 180 return (WALK_ERR); 181 } 182 pwd->pw_hashleft--; 183 pwd->pw_hashloc++; 184 } while (pwd->pw_hashleft && (pp == NULL)); 185 186 /* 187 * We've reached the end; exit. 188 */ 189 if (pp == NULL) 190 return (WALK_DONE); 191 } 192 193 if (mdb_vread(&page, sizeof (page_t), pp) == -1) { 194 mdb_warn("unable to read page_t at %#lx", pp); 195 return (WALK_ERR); 196 } 197 198 /* 199 * Set the walk address to the next page. 200 */ 201 wsp->walk_addr = (uintptr_t)page.p_hash; 202 203 } else { 204 /* We've finished walking all pages. */ 205 return (WALK_DONE); 206 } 207 208 return (wsp->walk_callback(pp, &page, wsp->walk_cbdata)); 209 } 210 211 void 212 page_walk_fini(mdb_walk_state_t *wsp) 213 { 214 mdb_free(wsp->walk_data, sizeof (page_walk_data_t)); 215 } 216 217 /* Summary statistics of pages */ 218 typedef struct memstat { 219 struct vnode *ms_kvp; /* Cached address of kernel vnode */ 220 struct vnode *ms_zvp; /* Cached address of zio vnode */ 221 uint64_t ms_kmem; /* Pages of kernel memory */ 222 uint64_t ms_zfs_data; /* Pages of zfs data */ 223 uint64_t ms_anon; /* Pages of anonymous memory */ 224 uint64_t ms_vnode; /* Pages of named (vnode) memory */ 225 uint64_t ms_exec; /* Pages of exec/library memory */ 226 uint64_t ms_cachelist; /* Pages on the cachelist (free) */ 227 uint64_t ms_total; /* Pages on page hash */ 228 } memstat_t; 229 230 #define MS_PP_ISKAS(pp, stats) \ 231 ((pp)->p_vnode == (stats)->ms_kvp) 232 233 #define MS_PP_ISZFS_DATA(pp, stats) \ 234 (((stats)->ms_zvp != NULL) && ((pp)->p_vnode == (stats)->ms_zvp)) 235 236 /* 237 * Summarize pages by type; called from page walker. 238 */ 239 240 /* ARGSUSED */ 241 static int 242 memstat_callback(page_t *page, page_t *pp, memstat_t *stats) 243 { 244 struct vnode vn, *vp; 245 uintptr_t ptr; 246 247 /* read page's vnode pointer */ 248 if ((ptr = (uintptr_t)(pp->p_vnode)) != NULL) { 249 if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) { 250 mdb_warn("unable to read vnode_t at %#lx", 251 ptr); 252 return (WALK_ERR); 253 } 254 vp = &vn; 255 } else 256 vp = NULL; 257 258 if (PP_ISFREE(pp)) 259 stats->ms_cachelist++; 260 else if (vp && IS_SWAPFSVP(vp)) 261 stats->ms_anon++; 262 else if (MS_PP_ISZFS_DATA(pp, stats)) 263 stats->ms_zfs_data++; 264 else if (MS_PP_ISKAS(pp, stats)) 265 stats->ms_kmem++; 266 else if (vp && (((vp)->v_flag & VVMEXEC)) != 0) 267 stats->ms_exec++; 268 else 269 stats->ms_vnode++; 270 271 stats->ms_total++; 272 273 return (WALK_NEXT); 274 } 275 276 /* ARGSUSED */ 277 int 278 memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 279 { 280 ulong_t pagesize; 281 pgcnt_t total_pages, physmem; 282 ulong_t freemem; 283 memstat_t stats; 284 memstat_t unused_stats; 285 GElf_Sym sym; 286 #if defined(__i386) || defined(__amd64) 287 bln_stats_t bln_stats; 288 ssize_t bln_size; 289 #endif 290 291 bzero(&stats, sizeof (memstat_t)); 292 bzero(&unused_stats, sizeof (memstat_t)); 293 294 if (argc != 0 || (flags & DCMD_ADDRSPEC)) 295 return (DCMD_USAGE); 296 297 /* Grab base page size */ 298 if (mdb_readvar(&pagesize, "_pagesize") == -1) { 299 mdb_warn("unable to read _pagesize"); 300 return (DCMD_ERR); 301 } 302 303 /* Total physical memory */ 304 if (mdb_readvar(&total_pages, "total_pages") == -1) { 305 mdb_warn("unable to read total_pages"); 306 return (DCMD_ERR); 307 } 308 309 /* Artificially limited memory */ 310 if (mdb_readvar(&physmem, "physmem") == -1) { 311 mdb_warn("unable to read physmem"); 312 return (DCMD_ERR); 313 } 314 315 /* read kernel vnode pointer */ 316 if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvp", 317 (GElf_Sym *)&sym) == -1) { 318 mdb_warn("unable to read kvp"); 319 return (DCMD_ERR); 320 } 321 322 stats.ms_kvp = (struct vnode *)(uintptr_t)sym.st_value; 323 324 /* 325 * Read the zio vnode pointer. It may not exist on all kernels, so it 326 * it isn't found, it's not a fatal error. 327 */ 328 if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "zvp", 329 (GElf_Sym *)&sym) == -1) { 330 stats.ms_zvp = NULL; 331 } else { 332 stats.ms_zvp = (struct vnode *)(uintptr_t)sym.st_value; 333 } 334 335 /* Walk page structures, summarizing usage */ 336 if (mdb_walk("page", (mdb_walk_cb_t)memstat_callback, 337 &stats) == -1) { 338 mdb_warn("can't walk pages"); 339 return (DCMD_ERR); 340 } 341 342 /* read unused pages vnode */ 343 if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp", 344 (GElf_Sym *)&sym) == -1) { 345 mdb_warn("unable to read unused_pages_vp"); 346 return (DCMD_ERR); 347 } 348 349 unused_stats.ms_kvp = (struct vnode *)(uintptr_t)sym.st_value; 350 351 /* Find unused pages */ 352 if (mdb_walk("page", (mdb_walk_cb_t)memstat_callback, 353 &unused_stats) == -1) { 354 mdb_warn("can't walk pages"); 355 return (DCMD_ERR); 356 } 357 358 /* 359 * If physmem != total_pages, then the administrator has limited the 360 * number of pages available in the system. In order to account for 361 * this, we reduce the amount normally attributed to the page cache. 362 */ 363 stats.ms_vnode -= unused_stats.ms_kmem; 364 stats.ms_total -= unused_stats.ms_kmem; 365 366 #define MS_PCT_TOTAL(x) ((ulong_t)((((5 * total_pages) + ((x) * 1000ull))) / \ 367 ((physmem) * 10))) 368 369 mdb_printf("Page Summary Pages MB" 370 " %%Tot\n"); 371 mdb_printf("------------ ---------------- ----------------" 372 " ----\n"); 373 mdb_printf("Kernel %16llu %16llu %3lu%%\n", 374 stats.ms_kmem, 375 (uint64_t)stats.ms_kmem * pagesize / (1024 * 1024), 376 MS_PCT_TOTAL(stats.ms_kmem)); 377 378 if (stats.ms_zfs_data != 0) 379 mdb_printf("ZFS File Data %16llu %16llu %3lu%%\n", 380 stats.ms_zfs_data, 381 (uint64_t)stats.ms_zfs_data * pagesize / (1024 * 1024), 382 MS_PCT_TOTAL(stats.ms_zfs_data)); 383 384 mdb_printf("Anon %16llu %16llu %3lu%%\n", 385 stats.ms_anon, 386 (uint64_t)stats.ms_anon * pagesize / (1024 * 1024), 387 MS_PCT_TOTAL(stats.ms_anon)); 388 mdb_printf("Exec and libs %16llu %16llu %3lu%%\n", 389 stats.ms_exec, 390 (uint64_t)stats.ms_exec * pagesize / (1024 * 1024), 391 MS_PCT_TOTAL(stats.ms_exec)); 392 mdb_printf("Page cache %16llu %16llu %3lu%%\n", 393 stats.ms_vnode, 394 (uint64_t)stats.ms_vnode * pagesize / (1024 * 1024), 395 MS_PCT_TOTAL(stats.ms_vnode)); 396 mdb_printf("Free (cachelist) %16llu %16llu %3lu%%\n", 397 stats.ms_cachelist, 398 (uint64_t)stats.ms_cachelist * pagesize / (1024 * 1024), 399 MS_PCT_TOTAL(stats.ms_cachelist)); 400 401 /* 402 * occasionally, we double count pages above. To avoid printing 403 * absurdly large values for freemem, we clamp it at zero. 404 */ 405 if (physmem > stats.ms_total) 406 freemem = physmem - stats.ms_total; 407 else 408 freemem = 0; 409 410 #if defined(__i386) || defined(__amd64) 411 /* Are we running under Xen? If so, get balloon memory usage. */ 412 if ((bln_size = mdb_readvar(&bln_stats, "bln_stats")) != -1) { 413 if (freemem > bln_stats.bln_hv_pages) 414 freemem -= bln_stats.bln_hv_pages; 415 else 416 freemem = 0; 417 } 418 #endif 419 420 mdb_printf("Free (freelist) %16lu %16llu %3lu%%\n", freemem, 421 (uint64_t)freemem * pagesize / (1024 * 1024), 422 MS_PCT_TOTAL(freemem)); 423 424 #if defined(__i386) || defined(__amd64) 425 if (bln_size != -1) { 426 mdb_printf("Balloon %16lu %16llu %3lu%%\n", 427 bln_stats.bln_hv_pages, 428 (uint64_t)bln_stats.bln_hv_pages * pagesize / (1024 * 1024), 429 MS_PCT_TOTAL(bln_stats.bln_hv_pages)); 430 } 431 #endif 432 433 mdb_printf("\nTotal %16lu %16lu\n", 434 physmem, 435 (uint64_t)physmem * pagesize / (1024 * 1024)); 436 437 if (physmem != total_pages) { 438 mdb_printf("Physical %16lu %16lu\n", 439 total_pages, 440 (uint64_t)total_pages * pagesize / (1024 * 1024)); 441 } 442 443 #undef MS_PCT_TOTAL 444 445 return (DCMD_OK); 446 } 447 448 int 449 page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 450 { 451 page_t p; 452 453 if (!(flags & DCMD_ADDRSPEC)) { 454 if (mdb_walk_dcmd("page", "page", argc, argv) == -1) { 455 mdb_warn("can't walk pages"); 456 return (DCMD_ERR); 457 } 458 return (DCMD_OK); 459 } 460 461 if (DCMD_HDRSPEC(flags)) { 462 mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n", 463 "PAGE", "VNODE", "OFFSET", "SELOCK", 464 "LCT", "COW", "IO", "FS", "ST"); 465 } 466 467 if (mdb_vread(&p, sizeof (page_t), addr) == -1) { 468 mdb_warn("can't read page_t at %#lx", addr); 469 return (DCMD_ERR); 470 } 471 472 mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n", 473 addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt, 474 p.p_iolock_state, p.p_fsdata, p.p_state); 475 476 return (DCMD_OK); 477 } 478 479 int 480 swap_walk_init(mdb_walk_state_t *wsp) 481 { 482 void *ptr; 483 484 if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) { 485 mdb_warn("swapinfo not found or invalid"); 486 return (WALK_ERR); 487 } 488 489 wsp->walk_addr = (uintptr_t)ptr; 490 491 return (WALK_NEXT); 492 } 493 494 int 495 swap_walk_step(mdb_walk_state_t *wsp) 496 { 497 uintptr_t sip; 498 struct swapinfo si; 499 500 sip = wsp->walk_addr; 501 502 if (sip == NULL) 503 return (WALK_DONE); 504 505 if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) { 506 mdb_warn("unable to read swapinfo at %#lx", sip); 507 return (WALK_ERR); 508 } 509 510 wsp->walk_addr = (uintptr_t)si.si_next; 511 512 return (wsp->walk_callback(sip, &si, wsp->walk_cbdata)); 513 } 514 515 int 516 swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 517 { 518 struct swapinfo si; 519 char *name; 520 521 if (!(flags & DCMD_ADDRSPEC)) { 522 if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) { 523 mdb_warn("can't walk swapinfo"); 524 return (DCMD_ERR); 525 } 526 return (DCMD_OK); 527 } 528 529 if (DCMD_HDRSPEC(flags)) { 530 mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n", 531 "ADDR", "VNODE", "PAGES", "FREE", "NAME"); 532 } 533 534 if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) { 535 mdb_warn("can't read swapinfo at %#lx", addr); 536 return (DCMD_ERR); 537 } 538 539 name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC); 540 if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1) 541 name = "*error*"; 542 543 mdb_printf("%0?lx %?p %9d %9d %s\n", 544 addr, si.si_vp, si.si_npgs, si.si_nfpgs, name); 545 546 return (DCMD_OK); 547 } 548 549 int 550 memlist_walk_step(mdb_walk_state_t *wsp) 551 { 552 uintptr_t mlp; 553 struct memlist ml; 554 555 mlp = wsp->walk_addr; 556 557 if (mlp == NULL) 558 return (WALK_DONE); 559 560 if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) { 561 mdb_warn("unable to read memlist at %#lx", mlp); 562 return (WALK_ERR); 563 } 564 565 wsp->walk_addr = (uintptr_t)ml.next; 566 567 return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata)); 568 } 569 570 int 571 memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 572 { 573 struct memlist ml; 574 575 if (!(flags & DCMD_ADDRSPEC)) { 576 uintptr_t ptr; 577 uint_t list = 0; 578 int i; 579 static const char *lists[] = { 580 "phys_install", 581 "phys_avail", 582 "virt_avail" 583 }; 584 585 if (mdb_getopts(argc, argv, 586 'i', MDB_OPT_SETBITS, (1 << 0), &list, 587 'a', MDB_OPT_SETBITS, (1 << 1), &list, 588 'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc) 589 return (DCMD_USAGE); 590 591 if (!list) 592 list = 1; 593 594 for (i = 0; list; i++, list >>= 1) { 595 if (!(list & 1)) 596 continue; 597 if ((mdb_readvar(&ptr, lists[i]) == -1) || 598 (ptr == NULL)) { 599 mdb_warn("%s not found or invalid", lists[i]); 600 return (DCMD_ERR); 601 } 602 603 mdb_printf("%s:\n", lists[i]); 604 if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL, 605 ptr) == -1) { 606 mdb_warn("can't walk memlist"); 607 return (DCMD_ERR); 608 } 609 } 610 return (DCMD_OK); 611 } 612 613 if (DCMD_HDRSPEC(flags)) 614 mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE"); 615 616 if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) { 617 mdb_warn("can't read memlist at %#lx", addr); 618 return (DCMD_ERR); 619 } 620 621 mdb_printf("%0?lx %16llx %16llx\n", addr, ml.address, ml.size); 622 623 return (DCMD_OK); 624 } 625