1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2015 Joyent, Inc. 24 */ 25 26 #include <mdb/mdb_param.h> 27 #include <mdb/mdb_modapi.h> 28 #include <mdb/mdb_ks.h> 29 #include <sys/types.h> 30 #include <sys/memlist.h> 31 #include <sys/swap.h> 32 #include <sys/systm.h> 33 #include <sys/thread.h> 34 #include <vm/anon.h> 35 #include <vm/as.h> 36 #include <vm/page.h> 37 #include <sys/thread.h> 38 #include <sys/swap.h> 39 #include <sys/memlist.h> 40 #include <sys/vnode.h> 41 #include <vm/seg_map.h> 42 #include <vm/seg_vn.h> 43 #if defined(__i386) || defined(__amd64) 44 #include <sys/balloon_impl.h> 45 #endif 46 47 #include "avl.h" 48 #include "memory.h" 49 50 /* 51 * Page walker. 52 * By default, this will walk all pages in the system. If given an 53 * address, it will walk all pages belonging to the vnode at that 54 * address. 55 */ 56 57 /* 58 * page_walk_data 59 * 60 * pw_hashleft is set to -1 when walking a vnode's pages, and holds the 61 * number of hash locations remaining in the page hash table when 62 * walking all pages. 63 * 64 * The astute reader will notice that pw_hashloc is only used when 65 * reading all pages (to hold a pointer to our location in the page 66 * hash table), and that pw_first is only used when reading the pages 67 * belonging to a particular vnode (to hold a pointer to the first 68 * page). While these could be combined to be a single pointer, they 69 * are left separate for clarity. 70 */ 71 typedef struct page_walk_data { 72 long pw_hashleft; 73 void **pw_hashloc; 74 uintptr_t pw_first; 75 } page_walk_data_t; 76 77 int 78 page_walk_init(mdb_walk_state_t *wsp) 79 { 80 page_walk_data_t *pwd; 81 void **ptr; 82 size_t hashsz; 83 vnode_t vn; 84 85 if (wsp->walk_addr == NULL) { 86 87 /* 88 * Walk all pages 89 */ 90 91 if ((mdb_readvar(&ptr, "page_hash") == -1) || 92 (mdb_readvar(&hashsz, "page_hashsz") == -1) || 93 (ptr == NULL) || (hashsz == 0)) { 94 mdb_warn("page_hash, page_hashsz not found or invalid"); 95 return (WALK_ERR); 96 } 97 98 /* 99 * Since we are walking all pages, initialize hashleft 100 * to be the remaining number of entries in the page 101 * hash. hashloc is set the start of the page hash 102 * table. Setting the walk address to 0 indicates that 103 * we aren't currently following a hash chain, and that 104 * we need to scan the page hash table for a page. 105 */ 106 pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP); 107 pwd->pw_hashleft = hashsz; 108 pwd->pw_hashloc = ptr; 109 wsp->walk_addr = 0; 110 } else { 111 112 /* 113 * Walk just this vnode 114 */ 115 116 if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) { 117 mdb_warn("unable to read vnode_t at %#lx", 118 wsp->walk_addr); 119 return (WALK_ERR); 120 } 121 122 /* 123 * We set hashleft to -1 to indicate that we are 124 * walking a vnode, and initialize first to 0 (it is 125 * used to terminate the walk, so it must not be set 126 * until after we have walked the first page). The 127 * walk address is set to the first page. 128 */ 129 pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP); 130 pwd->pw_hashleft = -1; 131 pwd->pw_first = 0; 132 133 wsp->walk_addr = (uintptr_t)vn.v_pages; 134 } 135 136 wsp->walk_data = pwd; 137 138 return (WALK_NEXT); 139 } 140 141 int 142 page_walk_step(mdb_walk_state_t *wsp) 143 { 144 page_walk_data_t *pwd = wsp->walk_data; 145 page_t page; 146 uintptr_t pp; 147 148 pp = wsp->walk_addr; 149 150 if (pwd->pw_hashleft < 0) { 151 152 /* We're walking a vnode's pages */ 153 154 /* 155 * If we don't have any pages to walk, we have come 156 * back around to the first one (we finished), or we 157 * can't read the page we're looking at, we are done. 158 */ 159 if (pp == NULL || pp == pwd->pw_first) 160 return (WALK_DONE); 161 if (mdb_vread(&page, sizeof (page_t), pp) == -1) { 162 mdb_warn("unable to read page_t at %#lx", pp); 163 return (WALK_ERR); 164 } 165 166 /* 167 * Set the walk address to the next page, and if the 168 * first page hasn't been set yet (i.e. we are on the 169 * first page), set it. 170 */ 171 wsp->walk_addr = (uintptr_t)page.p_vpnext; 172 if (pwd->pw_first == NULL) 173 pwd->pw_first = pp; 174 175 } else if (pwd->pw_hashleft > 0) { 176 177 /* We're walking all pages */ 178 179 /* 180 * If pp (the walk address) is NULL, we scan through 181 * the page hash table until we find a page. 182 */ 183 if (pp == NULL) { 184 185 /* 186 * Iterate through the page hash table until we 187 * find a page or reach the end. 188 */ 189 do { 190 if (mdb_vread(&pp, sizeof (uintptr_t), 191 (uintptr_t)pwd->pw_hashloc) == -1) { 192 mdb_warn("unable to read from %#p", 193 pwd->pw_hashloc); 194 return (WALK_ERR); 195 } 196 pwd->pw_hashleft--; 197 pwd->pw_hashloc++; 198 } while (pwd->pw_hashleft && (pp == NULL)); 199 200 /* 201 * We've reached the end; exit. 202 */ 203 if (pp == NULL) 204 return (WALK_DONE); 205 } 206 207 if (mdb_vread(&page, sizeof (page_t), pp) == -1) { 208 mdb_warn("unable to read page_t at %#lx", pp); 209 return (WALK_ERR); 210 } 211 212 /* 213 * Set the walk address to the next page. 214 */ 215 wsp->walk_addr = (uintptr_t)page.p_hash; 216 217 } else { 218 /* We've finished walking all pages. */ 219 return (WALK_DONE); 220 } 221 222 return (wsp->walk_callback(pp, &page, wsp->walk_cbdata)); 223 } 224 225 void 226 page_walk_fini(mdb_walk_state_t *wsp) 227 { 228 mdb_free(wsp->walk_data, sizeof (page_walk_data_t)); 229 } 230 231 /* 232 * allpages walks all pages in the system in order they appear in 233 * the memseg structure 234 */ 235 236 #define PAGE_BUFFER 128 237 238 int 239 allpages_walk_init(mdb_walk_state_t *wsp) 240 { 241 if (wsp->walk_addr != 0) { 242 mdb_warn("allpages only supports global walks.\n"); 243 return (WALK_ERR); 244 } 245 246 if (mdb_layered_walk("memseg", wsp) == -1) { 247 mdb_warn("couldn't walk 'memseg'"); 248 return (WALK_ERR); 249 } 250 251 wsp->walk_data = mdb_alloc(sizeof (page_t) * PAGE_BUFFER, UM_SLEEP); 252 return (WALK_NEXT); 253 } 254 255 int 256 allpages_walk_step(mdb_walk_state_t *wsp) 257 { 258 const struct memseg *msp = wsp->walk_layer; 259 page_t *buf = wsp->walk_data; 260 size_t pg_read, i; 261 size_t pg_num = msp->pages_end - msp->pages_base; 262 const page_t *pg_addr = msp->pages; 263 264 while (pg_num > 0) { 265 pg_read = MIN(pg_num, PAGE_BUFFER); 266 267 if (mdb_vread(buf, pg_read * sizeof (page_t), 268 (uintptr_t)pg_addr) == -1) { 269 mdb_warn("can't read page_t's at %#lx", pg_addr); 270 return (WALK_ERR); 271 } 272 for (i = 0; i < pg_read; i++) { 273 int ret = wsp->walk_callback((uintptr_t)&pg_addr[i], 274 &buf[i], wsp->walk_cbdata); 275 276 if (ret != WALK_NEXT) 277 return (ret); 278 } 279 pg_num -= pg_read; 280 pg_addr += pg_read; 281 } 282 283 return (WALK_NEXT); 284 } 285 286 void 287 allpages_walk_fini(mdb_walk_state_t *wsp) 288 { 289 mdb_free(wsp->walk_data, sizeof (page_t) * PAGE_BUFFER); 290 } 291 292 /* 293 * Hash table + LRU queue. 294 * This table is used to cache recently read vnodes for the memstat 295 * command, to reduce the number of mdb_vread calls. This greatly 296 * speeds the memstat command on on live, large CPU count systems. 297 */ 298 299 #define VN_SMALL 401 300 #define VN_LARGE 10007 301 #define VN_HTABLE_KEY(p, hp) ((p) % ((hp)->vn_htable_buckets)) 302 303 struct vn_htable_list { 304 uint_t vn_flag; /* v_flag from vnode */ 305 uintptr_t vn_ptr; /* pointer to vnode */ 306 struct vn_htable_list *vn_q_next; /* queue next pointer */ 307 struct vn_htable_list *vn_q_prev; /* queue prev pointer */ 308 struct vn_htable_list *vn_h_next; /* hash table pointer */ 309 }; 310 311 /* 312 * vn_q_first -> points to to head of queue: the vnode that was most 313 * recently used 314 * vn_q_last -> points to the oldest used vnode, and is freed once a new 315 * vnode is read. 316 * vn_htable -> hash table 317 * vn_htable_buf -> contains htable objects 318 * vn_htable_size -> total number of items in the hash table 319 * vn_htable_buckets -> number of buckets in the hash table 320 */ 321 typedef struct vn_htable { 322 struct vn_htable_list *vn_q_first; 323 struct vn_htable_list *vn_q_last; 324 struct vn_htable_list **vn_htable; 325 struct vn_htable_list *vn_htable_buf; 326 int vn_htable_size; 327 int vn_htable_buckets; 328 } vn_htable_t; 329 330 331 /* allocate memory, initilize hash table and LRU queue */ 332 static void 333 vn_htable_init(vn_htable_t *hp, size_t vn_size) 334 { 335 int i; 336 int htable_size = MAX(vn_size, VN_LARGE); 337 338 if ((hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list) 339 * htable_size, UM_NOSLEEP|UM_GC)) == NULL) { 340 htable_size = VN_SMALL; 341 hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list) 342 * htable_size, UM_SLEEP|UM_GC); 343 } 344 345 hp->vn_htable = mdb_zalloc(sizeof (struct vn_htable_list *) 346 * htable_size, UM_SLEEP|UM_GC); 347 348 hp->vn_q_first = &hp->vn_htable_buf[0]; 349 hp->vn_q_last = &hp->vn_htable_buf[htable_size - 1]; 350 hp->vn_q_first->vn_q_next = &hp->vn_htable_buf[1]; 351 hp->vn_q_last->vn_q_prev = &hp->vn_htable_buf[htable_size - 2]; 352 353 for (i = 1; i < (htable_size-1); i++) { 354 hp->vn_htable_buf[i].vn_q_next = &hp->vn_htable_buf[i + 1]; 355 hp->vn_htable_buf[i].vn_q_prev = &hp->vn_htable_buf[i - 1]; 356 } 357 358 hp->vn_htable_size = htable_size; 359 hp->vn_htable_buckets = htable_size; 360 } 361 362 363 /* 364 * Find the vnode whose address is ptr, and return its v_flag in vp->v_flag. 365 * The function tries to find needed information in the following order: 366 * 367 * 1. check if ptr is the first in queue 368 * 2. check if ptr is in hash table (if so move it to the top of queue) 369 * 3. do mdb_vread, remove last queue item from queue and hash table. 370 * Insert new information to freed object, and put this object in to the 371 * top of the queue. 372 */ 373 static int 374 vn_get(vn_htable_t *hp, struct vnode *vp, uintptr_t ptr) 375 { 376 int hkey; 377 struct vn_htable_list *hent, **htmp, *q_next, *q_prev; 378 struct vn_htable_list *q_first = hp->vn_q_first; 379 380 /* 1. vnode ptr is the first in queue, just get v_flag and return */ 381 if (q_first->vn_ptr == ptr) { 382 vp->v_flag = q_first->vn_flag; 383 384 return (0); 385 } 386 387 /* 2. search the hash table for this ptr */ 388 hkey = VN_HTABLE_KEY(ptr, hp); 389 hent = hp->vn_htable[hkey]; 390 while (hent && (hent->vn_ptr != ptr)) 391 hent = hent->vn_h_next; 392 393 /* 3. if hent is NULL, we did not find in hash table, do mdb_vread */ 394 if (hent == NULL) { 395 struct vnode vn; 396 397 if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) { 398 mdb_warn("unable to read vnode_t at %#lx", ptr); 399 return (-1); 400 } 401 402 /* we will insert read data into the last element in queue */ 403 hent = hp->vn_q_last; 404 405 /* remove last hp->vn_q_last object from hash table */ 406 if (hent->vn_ptr) { 407 htmp = &hp->vn_htable[VN_HTABLE_KEY(hent->vn_ptr, hp)]; 408 while (*htmp != hent) 409 htmp = &(*htmp)->vn_h_next; 410 *htmp = hent->vn_h_next; 411 } 412 413 /* insert data into new free object */ 414 hent->vn_ptr = ptr; 415 hent->vn_flag = vn.v_flag; 416 417 /* insert new object into hash table */ 418 hent->vn_h_next = hp->vn_htable[hkey]; 419 hp->vn_htable[hkey] = hent; 420 } 421 422 /* Remove from queue. hent is not first, vn_q_prev is not NULL */ 423 q_next = hent->vn_q_next; 424 q_prev = hent->vn_q_prev; 425 if (q_next == NULL) 426 hp->vn_q_last = q_prev; 427 else 428 q_next->vn_q_prev = q_prev; 429 q_prev->vn_q_next = q_next; 430 431 /* Add to the front of queue */ 432 hent->vn_q_prev = NULL; 433 hent->vn_q_next = q_first; 434 q_first->vn_q_prev = hent; 435 hp->vn_q_first = hent; 436 437 /* Set v_flag in vnode pointer from hent */ 438 vp->v_flag = hent->vn_flag; 439 440 return (0); 441 } 442 443 /* Summary statistics of pages */ 444 typedef struct memstat { 445 struct vnode *ms_kvp; /* Cached address of kernel vnode */ 446 struct vnode *ms_unused_vp; /* Unused pages vnode pointer */ 447 struct vnode *ms_zvp; /* Cached address of zio vnode */ 448 uint64_t ms_kmem; /* Pages of kernel memory */ 449 uint64_t ms_zfs_data; /* Pages of zfs data */ 450 uint64_t ms_anon; /* Pages of anonymous memory */ 451 uint64_t ms_vnode; /* Pages of named (vnode) memory */ 452 uint64_t ms_exec; /* Pages of exec/library memory */ 453 uint64_t ms_cachelist; /* Pages on the cachelist (free) */ 454 uint64_t ms_bootpages; /* Pages on the bootpages list */ 455 uint64_t ms_total; /* Pages on page hash */ 456 vn_htable_t *ms_vn_htable; /* Pointer to hash table */ 457 struct vnode ms_vn; /* vnode buffer */ 458 } memstat_t; 459 460 #define MS_PP_ISKAS(pp, stats) \ 461 ((pp)->p_vnode == (stats)->ms_kvp) 462 463 #define MS_PP_ISZFS_DATA(pp, stats) \ 464 (((stats)->ms_zvp != NULL) && ((pp)->p_vnode == (stats)->ms_zvp)) 465 466 /* 467 * Summarize pages by type and update stat information 468 */ 469 470 /* ARGSUSED */ 471 static int 472 memstat_callback(page_t *page, page_t *pp, memstat_t *stats) 473 { 474 struct vnode *vp = &stats->ms_vn; 475 476 if (PP_ISBOOTPAGES(pp)) 477 stats->ms_bootpages++; 478 else if (pp->p_vnode == NULL || pp->p_vnode == stats->ms_unused_vp) 479 return (WALK_NEXT); 480 else if (MS_PP_ISKAS(pp, stats)) 481 stats->ms_kmem++; 482 else if (MS_PP_ISZFS_DATA(pp, stats)) 483 stats->ms_zfs_data++; 484 else if (PP_ISFREE(pp)) 485 stats->ms_cachelist++; 486 else if (vn_get(stats->ms_vn_htable, vp, (uintptr_t)pp->p_vnode)) 487 return (WALK_ERR); 488 else if (IS_SWAPFSVP(vp)) 489 stats->ms_anon++; 490 else if ((vp->v_flag & VVMEXEC) != 0) 491 stats->ms_exec++; 492 else 493 stats->ms_vnode++; 494 495 stats->ms_total++; 496 497 return (WALK_NEXT); 498 } 499 500 /* ARGSUSED */ 501 int 502 memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 503 { 504 pgcnt_t total_pages, physmem; 505 ulong_t freemem; 506 memstat_t stats; 507 GElf_Sym sym; 508 vn_htable_t ht; 509 struct vnode *kvps; 510 uintptr_t vn_size = 0; 511 #if defined(__i386) || defined(__amd64) 512 bln_stats_t bln_stats; 513 ssize_t bln_size; 514 #endif 515 516 bzero(&stats, sizeof (memstat_t)); 517 518 /* 519 * -s size, is an internal option. It specifies the size of vn_htable. 520 * Hash table size is set in the following order: 521 * If user has specified the size that is larger than VN_LARGE: try it, 522 * but if malloc failed default to VN_SMALL. Otherwise try VN_LARGE, if 523 * failed to allocate default to VN_SMALL. 524 * For a better efficiency of hash table it is highly recommended to 525 * set size to a prime number. 526 */ 527 if ((flags & DCMD_ADDRSPEC) || mdb_getopts(argc, argv, 528 's', MDB_OPT_UINTPTR, &vn_size, NULL) != argc) 529 return (DCMD_USAGE); 530 531 /* Initialize vnode hash list and queue */ 532 vn_htable_init(&ht, vn_size); 533 stats.ms_vn_htable = &ht; 534 535 /* Total physical memory */ 536 if (mdb_readvar(&total_pages, "total_pages") == -1) { 537 mdb_warn("unable to read total_pages"); 538 return (DCMD_ERR); 539 } 540 541 /* Artificially limited memory */ 542 if (mdb_readvar(&physmem, "physmem") == -1) { 543 mdb_warn("unable to read physmem"); 544 return (DCMD_ERR); 545 } 546 547 /* read kernel vnode array pointer */ 548 if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvps", 549 (GElf_Sym *)&sym) == -1) { 550 mdb_warn("unable to read kvps"); 551 return (DCMD_ERR); 552 } 553 kvps = (struct vnode *)(uintptr_t)sym.st_value; 554 stats.ms_kvp = &kvps[KV_KVP]; 555 556 /* 557 * Read the zio vnode pointer. 558 */ 559 stats.ms_zvp = &kvps[KV_ZVP]; 560 561 /* 562 * If physmem != total_pages, then the administrator has limited the 563 * number of pages available in the system. Excluded pages are 564 * associated with the unused pages vnode. Read this vnode so the 565 * pages can be excluded in the page accounting. 566 */ 567 if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp", 568 (GElf_Sym *)&sym) == -1) { 569 mdb_warn("unable to read unused_pages_vp"); 570 return (DCMD_ERR); 571 } 572 stats.ms_unused_vp = (struct vnode *)(uintptr_t)sym.st_value; 573 574 /* walk all pages, collect statistics */ 575 if (mdb_walk("allpages", (mdb_walk_cb_t)memstat_callback, 576 &stats) == -1) { 577 mdb_warn("can't walk memseg"); 578 return (DCMD_ERR); 579 } 580 581 #define MS_PCT_TOTAL(x) ((ulong_t)((((5 * total_pages) + ((x) * 1000ull))) / \ 582 ((physmem) * 10))) 583 584 mdb_printf("Page Summary Pages MB" 585 " %%Tot\n"); 586 mdb_printf("------------ ---------------- ----------------" 587 " ----\n"); 588 mdb_printf("Kernel %16llu %16llu %3lu%%\n", 589 stats.ms_kmem, 590 (uint64_t)stats.ms_kmem * PAGESIZE / (1024 * 1024), 591 MS_PCT_TOTAL(stats.ms_kmem)); 592 593 if (stats.ms_bootpages != 0) { 594 mdb_printf("Boot pages %16llu %16llu %3lu%%\n", 595 stats.ms_bootpages, 596 (uint64_t)stats.ms_bootpages * PAGESIZE / (1024 * 1024), 597 MS_PCT_TOTAL(stats.ms_bootpages)); 598 } 599 600 if (stats.ms_zfs_data != 0) { 601 mdb_printf("ZFS File Data %16llu %16llu %3lu%%\n", 602 stats.ms_zfs_data, 603 (uint64_t)stats.ms_zfs_data * PAGESIZE / (1024 * 1024), 604 MS_PCT_TOTAL(stats.ms_zfs_data)); 605 } 606 607 mdb_printf("Anon %16llu %16llu %3lu%%\n", 608 stats.ms_anon, 609 (uint64_t)stats.ms_anon * PAGESIZE / (1024 * 1024), 610 MS_PCT_TOTAL(stats.ms_anon)); 611 mdb_printf("Exec and libs %16llu %16llu %3lu%%\n", 612 stats.ms_exec, 613 (uint64_t)stats.ms_exec * PAGESIZE / (1024 * 1024), 614 MS_PCT_TOTAL(stats.ms_exec)); 615 mdb_printf("Page cache %16llu %16llu %3lu%%\n", 616 stats.ms_vnode, 617 (uint64_t)stats.ms_vnode * PAGESIZE / (1024 * 1024), 618 MS_PCT_TOTAL(stats.ms_vnode)); 619 mdb_printf("Free (cachelist) %16llu %16llu %3lu%%\n", 620 stats.ms_cachelist, 621 (uint64_t)stats.ms_cachelist * PAGESIZE / (1024 * 1024), 622 MS_PCT_TOTAL(stats.ms_cachelist)); 623 624 /* 625 * occasionally, we double count pages above. To avoid printing 626 * absurdly large values for freemem, we clamp it at zero. 627 */ 628 if (physmem > stats.ms_total) 629 freemem = physmem - stats.ms_total; 630 else 631 freemem = 0; 632 633 #if defined(__i386) || defined(__amd64) 634 /* Are we running under Xen? If so, get balloon memory usage. */ 635 if ((bln_size = mdb_readvar(&bln_stats, "bln_stats")) != -1) { 636 if (freemem > bln_stats.bln_hv_pages) 637 freemem -= bln_stats.bln_hv_pages; 638 else 639 freemem = 0; 640 } 641 #endif 642 643 mdb_printf("Free (freelist) %16lu %16llu %3lu%%\n", freemem, 644 (uint64_t)freemem * PAGESIZE / (1024 * 1024), 645 MS_PCT_TOTAL(freemem)); 646 647 #if defined(__i386) || defined(__amd64) 648 if (bln_size != -1) { 649 mdb_printf("Balloon %16lu %16llu %3lu%%\n", 650 bln_stats.bln_hv_pages, 651 (uint64_t)bln_stats.bln_hv_pages * PAGESIZE / (1024 * 1024), 652 MS_PCT_TOTAL(bln_stats.bln_hv_pages)); 653 } 654 #endif 655 656 mdb_printf("\nTotal %16lu %16lu\n", 657 physmem, 658 (uint64_t)physmem * PAGESIZE / (1024 * 1024)); 659 660 if (physmem != total_pages) { 661 mdb_printf("Physical %16lu %16lu\n", 662 total_pages, 663 (uint64_t)total_pages * PAGESIZE / (1024 * 1024)); 664 } 665 666 #undef MS_PCT_TOTAL 667 668 return (DCMD_OK); 669 } 670 671 void 672 pagelookup_help(void) 673 { 674 mdb_printf( 675 "Finds the page with name { %<b>vp%</b>, %<b>offset%</b> }.\n" 676 "\n" 677 "Can be invoked three different ways:\n\n" 678 " ::pagelookup -v %<b>vp%</b> -o %<b>offset%</b>\n" 679 " %<b>vp%</b>::pagelookup -o %<b>offset%</b>\n" 680 " %<b>offset%</b>::pagelookup -v %<b>vp%</b>\n" 681 "\n" 682 "The latter two forms are useful in pipelines.\n"); 683 } 684 685 int 686 pagelookup(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 687 { 688 uintptr_t vp = -(uintptr_t)1; 689 uint64_t offset = -(uint64_t)1; 690 691 uintptr_t pageaddr; 692 int hasaddr = (flags & DCMD_ADDRSPEC); 693 int usedaddr = 0; 694 695 if (mdb_getopts(argc, argv, 696 'v', MDB_OPT_UINTPTR, &vp, 697 'o', MDB_OPT_UINT64, &offset, 698 0) != argc) { 699 return (DCMD_USAGE); 700 } 701 702 if (vp == -(uintptr_t)1) { 703 if (offset == -(uint64_t)1) { 704 mdb_warn( 705 "pagelookup: at least one of -v vp or -o offset " 706 "required.\n"); 707 return (DCMD_USAGE); 708 } 709 vp = addr; 710 usedaddr = 1; 711 } else if (offset == -(uint64_t)1) { 712 offset = mdb_get_dot(); 713 usedaddr = 1; 714 } 715 if (usedaddr && !hasaddr) { 716 mdb_warn("pagelookup: address required\n"); 717 return (DCMD_USAGE); 718 } 719 if (!usedaddr && hasaddr) { 720 mdb_warn( 721 "pagelookup: address specified when both -v and -o were " 722 "passed"); 723 return (DCMD_USAGE); 724 } 725 726 pageaddr = mdb_page_lookup(vp, offset); 727 if (pageaddr == 0) { 728 mdb_warn("pagelookup: no page for {vp = %p, offset = %llp)\n", 729 vp, offset); 730 return (DCMD_OK); 731 } 732 mdb_printf("%#lr\n", pageaddr); /* this is PIPE_OUT friendly */ 733 return (DCMD_OK); 734 } 735 736 /*ARGSUSED*/ 737 int 738 page_num2pp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 739 { 740 uintptr_t pp; 741 742 if (argc != 0 || !(flags & DCMD_ADDRSPEC)) { 743 return (DCMD_USAGE); 744 } 745 746 pp = mdb_pfn2page((pfn_t)addr); 747 if (pp == 0) { 748 return (DCMD_ERR); 749 } 750 751 if (flags & DCMD_PIPE_OUT) { 752 mdb_printf("%#lr\n", pp); 753 } else { 754 mdb_printf("%lx has page_t at %#lx\n", (pfn_t)addr, pp); 755 } 756 757 return (DCMD_OK); 758 } 759 760 int 761 page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 762 { 763 page_t p; 764 765 if (!(flags & DCMD_ADDRSPEC)) { 766 if (mdb_walk_dcmd("page", "page", argc, argv) == -1) { 767 mdb_warn("can't walk pages"); 768 return (DCMD_ERR); 769 } 770 return (DCMD_OK); 771 } 772 773 if (DCMD_HDRSPEC(flags)) { 774 mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n", 775 "PAGE", "VNODE", "OFFSET", "SELOCK", 776 "LCT", "COW", "IO", "FS", "ST"); 777 } 778 779 if (mdb_vread(&p, sizeof (page_t), addr) == -1) { 780 mdb_warn("can't read page_t at %#lx", addr); 781 return (DCMD_ERR); 782 } 783 784 mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n", 785 addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt, 786 p.p_iolock_state, p.p_fsdata, p.p_state); 787 788 return (DCMD_OK); 789 } 790 791 int 792 swap_walk_init(mdb_walk_state_t *wsp) 793 { 794 void *ptr; 795 796 if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) { 797 mdb_warn("swapinfo not found or invalid"); 798 return (WALK_ERR); 799 } 800 801 wsp->walk_addr = (uintptr_t)ptr; 802 803 return (WALK_NEXT); 804 } 805 806 int 807 swap_walk_step(mdb_walk_state_t *wsp) 808 { 809 uintptr_t sip; 810 struct swapinfo si; 811 812 sip = wsp->walk_addr; 813 814 if (sip == NULL) 815 return (WALK_DONE); 816 817 if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) { 818 mdb_warn("unable to read swapinfo at %#lx", sip); 819 return (WALK_ERR); 820 } 821 822 wsp->walk_addr = (uintptr_t)si.si_next; 823 824 return (wsp->walk_callback(sip, &si, wsp->walk_cbdata)); 825 } 826 827 int 828 swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 829 { 830 struct swapinfo si; 831 char *name; 832 833 if (!(flags & DCMD_ADDRSPEC)) { 834 if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) { 835 mdb_warn("can't walk swapinfo"); 836 return (DCMD_ERR); 837 } 838 return (DCMD_OK); 839 } 840 841 if (DCMD_HDRSPEC(flags)) { 842 mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n", 843 "ADDR", "VNODE", "PAGES", "FREE", "NAME"); 844 } 845 846 if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) { 847 mdb_warn("can't read swapinfo at %#lx", addr); 848 return (DCMD_ERR); 849 } 850 851 name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC); 852 if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1) 853 name = "*error*"; 854 855 mdb_printf("%0?lx %?p %9d %9d %s\n", 856 addr, si.si_vp, si.si_npgs, si.si_nfpgs, name); 857 858 return (DCMD_OK); 859 } 860 861 int 862 memlist_walk_step(mdb_walk_state_t *wsp) 863 { 864 uintptr_t mlp; 865 struct memlist ml; 866 867 mlp = wsp->walk_addr; 868 869 if (mlp == NULL) 870 return (WALK_DONE); 871 872 if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) { 873 mdb_warn("unable to read memlist at %#lx", mlp); 874 return (WALK_ERR); 875 } 876 877 wsp->walk_addr = (uintptr_t)ml.ml_next; 878 879 return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata)); 880 } 881 882 int 883 memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 884 { 885 struct memlist ml; 886 887 if (!(flags & DCMD_ADDRSPEC)) { 888 uintptr_t ptr; 889 uint_t list = 0; 890 int i; 891 static const char *lists[] = { 892 "phys_install", 893 "phys_avail", 894 "virt_avail" 895 }; 896 897 if (mdb_getopts(argc, argv, 898 'i', MDB_OPT_SETBITS, (1 << 0), &list, 899 'a', MDB_OPT_SETBITS, (1 << 1), &list, 900 'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc) 901 return (DCMD_USAGE); 902 903 if (!list) 904 list = 1; 905 906 for (i = 0; list; i++, list >>= 1) { 907 if (!(list & 1)) 908 continue; 909 if ((mdb_readvar(&ptr, lists[i]) == -1) || 910 (ptr == NULL)) { 911 mdb_warn("%s not found or invalid", lists[i]); 912 return (DCMD_ERR); 913 } 914 915 mdb_printf("%s:\n", lists[i]); 916 if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL, 917 ptr) == -1) { 918 mdb_warn("can't walk memlist"); 919 return (DCMD_ERR); 920 } 921 } 922 return (DCMD_OK); 923 } 924 925 if (DCMD_HDRSPEC(flags)) 926 mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE"); 927 928 if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) { 929 mdb_warn("can't read memlist at %#lx", addr); 930 return (DCMD_ERR); 931 } 932 933 mdb_printf("%0?lx %16llx %16llx\n", addr, ml.ml_address, ml.ml_size); 934 935 return (DCMD_OK); 936 } 937 938 int 939 seg_walk_init(mdb_walk_state_t *wsp) 940 { 941 if (wsp->walk_addr == NULL) { 942 mdb_warn("seg walk must begin at struct as *\n"); 943 return (WALK_ERR); 944 } 945 946 /* 947 * this is really just a wrapper to AVL tree walk 948 */ 949 wsp->walk_addr = (uintptr_t)&((struct as *)wsp->walk_addr)->a_segtree; 950 return (avl_walk_init(wsp)); 951 } 952 953 /*ARGSUSED*/ 954 int 955 seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 956 { 957 struct seg s; 958 959 if (argc != 0) 960 return (DCMD_USAGE); 961 962 if ((flags & DCMD_LOOPFIRST) || !(flags & DCMD_LOOP)) { 963 mdb_printf("%<u>%?s %?s %?s %?s %s%</u>\n", 964 "SEG", "BASE", "SIZE", "DATA", "OPS"); 965 } 966 967 if (mdb_vread(&s, sizeof (s), addr) == -1) { 968 mdb_warn("failed to read seg at %p", addr); 969 return (DCMD_ERR); 970 } 971 972 mdb_printf("%?p %?p %?lx %?p %a\n", 973 addr, s.s_base, s.s_size, s.s_data, s.s_ops); 974 975 return (DCMD_OK); 976 } 977 978 /*ARGSUSED*/ 979 static int 980 pmap_walk_count_pages(uintptr_t addr, const void *data, void *out) 981 { 982 pgcnt_t *nres = out; 983 984 (*nres)++; 985 986 return (WALK_NEXT); 987 } 988 989 static int 990 pmap_walk_seg(uintptr_t addr, const struct seg *seg, uintptr_t segvn) 991 { 992 993 mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024); 994 995 if (segvn == (uintptr_t)seg->s_ops && seg->s_data != NULL) { 996 struct segvn_data svn; 997 pgcnt_t nres = 0; 998 999 svn.vp = NULL; 1000 (void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data); 1001 1002 /* 1003 * Use the segvn_pages walker to find all of the in-core pages 1004 * for this mapping. 1005 */ 1006 if (mdb_pwalk("segvn_pages", pmap_walk_count_pages, &nres, 1007 (uintptr_t)seg->s_data) == -1) { 1008 mdb_warn("failed to walk segvn_pages (s_data=%p)", 1009 seg->s_data); 1010 } 1011 mdb_printf(" %7ldk", (nres * PAGESIZE) / 1024); 1012 1013 if (svn.vp != NULL) { 1014 char buf[29]; 1015 1016 mdb_vnode2path((uintptr_t)svn.vp, buf, sizeof (buf)); 1017 mdb_printf(" %s", buf); 1018 } else { 1019 mdb_printf(" [ anon ]"); 1020 } 1021 } else { 1022 mdb_printf(" %8s [ &%a ]", "?", seg->s_ops); 1023 } 1024 1025 mdb_printf("\n"); 1026 return (WALK_NEXT); 1027 } 1028 1029 static int 1030 pmap_walk_seg_quick(uintptr_t addr, const struct seg *seg, uintptr_t segvn) 1031 { 1032 mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024); 1033 1034 if (segvn == (uintptr_t)seg->s_ops && seg->s_data != NULL) { 1035 struct segvn_data svn; 1036 1037 svn.vp = NULL; 1038 (void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data); 1039 1040 if (svn.vp != NULL) { 1041 mdb_printf(" %0?p", svn.vp); 1042 } else { 1043 mdb_printf(" [ anon ]"); 1044 } 1045 } else { 1046 mdb_printf(" [ &%a ]", seg->s_ops); 1047 } 1048 1049 mdb_printf("\n"); 1050 return (WALK_NEXT); 1051 } 1052 1053 /*ARGSUSED*/ 1054 int 1055 pmap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1056 { 1057 uintptr_t segvn; 1058 proc_t proc; 1059 uint_t quick = FALSE; 1060 mdb_walk_cb_t cb = (mdb_walk_cb_t)pmap_walk_seg; 1061 1062 GElf_Sym sym; 1063 1064 if (!(flags & DCMD_ADDRSPEC)) 1065 return (DCMD_USAGE); 1066 1067 if (mdb_getopts(argc, argv, 1068 'q', MDB_OPT_SETBITS, TRUE, &quick, NULL) != argc) 1069 return (DCMD_USAGE); 1070 1071 if (mdb_vread(&proc, sizeof (proc), addr) == -1) { 1072 mdb_warn("failed to read proc at %p", addr); 1073 return (DCMD_ERR); 1074 } 1075 1076 if (mdb_lookup_by_name("segvn_ops", &sym) == 0) 1077 segvn = (uintptr_t)sym.st_value; 1078 else 1079 segvn = NULL; 1080 1081 mdb_printf("%?s %?s %8s ", "SEG", "BASE", "SIZE"); 1082 1083 if (quick) { 1084 mdb_printf("VNODE\n"); 1085 cb = (mdb_walk_cb_t)pmap_walk_seg_quick; 1086 } else { 1087 mdb_printf("%8s %s\n", "RES", "PATH"); 1088 } 1089 1090 if (mdb_pwalk("seg", cb, (void *)segvn, (uintptr_t)proc.p_as) == -1) { 1091 mdb_warn("failed to walk segments of as %p", proc.p_as); 1092 return (DCMD_ERR); 1093 } 1094 1095 return (DCMD_OK); 1096 } 1097 1098 typedef struct anon_walk_data { 1099 uintptr_t *aw_levone; 1100 uintptr_t *aw_levtwo; 1101 size_t aw_minslot; 1102 size_t aw_maxslot; 1103 pgcnt_t aw_nlevone; 1104 pgcnt_t aw_levone_ndx; 1105 size_t aw_levtwo_ndx; 1106 struct anon_map *aw_ampp; 1107 struct anon_map aw_amp; 1108 struct anon_hdr aw_ahp; 1109 int aw_all; /* report all anon pointers, even NULLs */ 1110 } anon_walk_data_t; 1111 1112 int 1113 anon_walk_init_common(mdb_walk_state_t *wsp, ulong_t minslot, ulong_t maxslot) 1114 { 1115 anon_walk_data_t *aw; 1116 1117 if (wsp->walk_addr == NULL) { 1118 mdb_warn("anon walk doesn't support global walks\n"); 1119 return (WALK_ERR); 1120 } 1121 1122 aw = mdb_alloc(sizeof (anon_walk_data_t), UM_SLEEP); 1123 aw->aw_ampp = (struct anon_map *)wsp->walk_addr; 1124 1125 if (mdb_vread(&aw->aw_amp, sizeof (aw->aw_amp), wsp->walk_addr) == -1) { 1126 mdb_warn("failed to read anon map at %p", wsp->walk_addr); 1127 mdb_free(aw, sizeof (anon_walk_data_t)); 1128 return (WALK_ERR); 1129 } 1130 1131 if (mdb_vread(&aw->aw_ahp, sizeof (aw->aw_ahp), 1132 (uintptr_t)(aw->aw_amp.ahp)) == -1) { 1133 mdb_warn("failed to read anon hdr ptr at %p", aw->aw_amp.ahp); 1134 mdb_free(aw, sizeof (anon_walk_data_t)); 1135 return (WALK_ERR); 1136 } 1137 1138 /* update min and maxslot with the given constraints */ 1139 maxslot = MIN(maxslot, aw->aw_ahp.size); 1140 minslot = MIN(minslot, maxslot); 1141 1142 if (aw->aw_ahp.size <= ANON_CHUNK_SIZE || 1143 (aw->aw_ahp.flags & ANON_ALLOC_FORCE)) { 1144 aw->aw_nlevone = maxslot; 1145 aw->aw_levone_ndx = minslot; 1146 aw->aw_levtwo = NULL; 1147 } else { 1148 aw->aw_nlevone = 1149 (maxslot + ANON_CHUNK_OFF) >> ANON_CHUNK_SHIFT; 1150 aw->aw_levone_ndx = 0; 1151 aw->aw_levtwo = 1152 mdb_zalloc(ANON_CHUNK_SIZE * sizeof (uintptr_t), UM_SLEEP); 1153 } 1154 1155 aw->aw_levone = 1156 mdb_alloc(aw->aw_nlevone * sizeof (uintptr_t), UM_SLEEP); 1157 aw->aw_all = (wsp->walk_arg == ANON_WALK_ALL); 1158 1159 mdb_vread(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t), 1160 (uintptr_t)aw->aw_ahp.array_chunk); 1161 1162 aw->aw_levtwo_ndx = 0; 1163 aw->aw_minslot = minslot; 1164 aw->aw_maxslot = maxslot; 1165 1166 out: 1167 wsp->walk_data = aw; 1168 return (0); 1169 } 1170 1171 int 1172 anon_walk_step(mdb_walk_state_t *wsp) 1173 { 1174 anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data; 1175 struct anon anon; 1176 uintptr_t anonptr; 1177 ulong_t slot; 1178 1179 /* 1180 * Once we've walked through level one, we're done. 1181 */ 1182 if (aw->aw_levone_ndx >= aw->aw_nlevone) { 1183 return (WALK_DONE); 1184 } 1185 1186 if (aw->aw_levtwo == NULL) { 1187 anonptr = aw->aw_levone[aw->aw_levone_ndx]; 1188 aw->aw_levone_ndx++; 1189 } else { 1190 if (aw->aw_levtwo_ndx == 0) { 1191 uintptr_t levtwoptr; 1192 1193 /* The first time through, skip to our first index. */ 1194 if (aw->aw_levone_ndx == 0) { 1195 aw->aw_levone_ndx = 1196 aw->aw_minslot / ANON_CHUNK_SIZE; 1197 aw->aw_levtwo_ndx = 1198 aw->aw_minslot % ANON_CHUNK_SIZE; 1199 } 1200 1201 levtwoptr = (uintptr_t)aw->aw_levone[aw->aw_levone_ndx]; 1202 1203 if (levtwoptr == NULL) { 1204 if (!aw->aw_all) { 1205 aw->aw_levtwo_ndx = 0; 1206 aw->aw_levone_ndx++; 1207 return (WALK_NEXT); 1208 } 1209 bzero(aw->aw_levtwo, 1210 ANON_CHUNK_SIZE * sizeof (uintptr_t)); 1211 1212 } else if (mdb_vread(aw->aw_levtwo, 1213 ANON_CHUNK_SIZE * sizeof (uintptr_t), levtwoptr) == 1214 -1) { 1215 mdb_warn("unable to read anon_map %p's " 1216 "second-level map %d at %p", 1217 aw->aw_ampp, aw->aw_levone_ndx, 1218 levtwoptr); 1219 return (WALK_ERR); 1220 } 1221 } 1222 slot = aw->aw_levone_ndx * ANON_CHUNK_SIZE + aw->aw_levtwo_ndx; 1223 anonptr = aw->aw_levtwo[aw->aw_levtwo_ndx]; 1224 1225 /* update the indices for next time */ 1226 aw->aw_levtwo_ndx++; 1227 if (aw->aw_levtwo_ndx == ANON_CHUNK_SIZE) { 1228 aw->aw_levtwo_ndx = 0; 1229 aw->aw_levone_ndx++; 1230 } 1231 1232 /* make sure the slot # is in the requested range */ 1233 if (slot >= aw->aw_maxslot) { 1234 return (WALK_DONE); 1235 } 1236 } 1237 1238 if (anonptr != NULL) { 1239 mdb_vread(&anon, sizeof (anon), anonptr); 1240 return (wsp->walk_callback(anonptr, &anon, wsp->walk_cbdata)); 1241 } 1242 if (aw->aw_all) { 1243 return (wsp->walk_callback(NULL, NULL, wsp->walk_cbdata)); 1244 } 1245 return (WALK_NEXT); 1246 } 1247 1248 void 1249 anon_walk_fini(mdb_walk_state_t *wsp) 1250 { 1251 anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data; 1252 1253 if (aw->aw_levtwo != NULL) 1254 mdb_free(aw->aw_levtwo, ANON_CHUNK_SIZE * sizeof (uintptr_t)); 1255 1256 mdb_free(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t)); 1257 mdb_free(aw, sizeof (anon_walk_data_t)); 1258 } 1259 1260 int 1261 anon_walk_init(mdb_walk_state_t *wsp) 1262 { 1263 return (anon_walk_init_common(wsp, 0, ULONG_MAX)); 1264 } 1265 1266 int 1267 segvn_anon_walk_init(mdb_walk_state_t *wsp) 1268 { 1269 const uintptr_t svd_addr = wsp->walk_addr; 1270 uintptr_t amp_addr; 1271 uintptr_t seg_addr; 1272 struct segvn_data svd; 1273 struct anon_map amp; 1274 struct seg seg; 1275 1276 if (svd_addr == NULL) { 1277 mdb_warn("segvn_anon walk doesn't support global walks\n"); 1278 return (WALK_ERR); 1279 } 1280 if (mdb_vread(&svd, sizeof (svd), svd_addr) == -1) { 1281 mdb_warn("segvn_anon walk: unable to read segvn_data at %p", 1282 svd_addr); 1283 return (WALK_ERR); 1284 } 1285 if (svd.amp == NULL) { 1286 mdb_warn("segvn_anon walk: segvn_data at %p has no anon map\n", 1287 svd_addr); 1288 return (WALK_ERR); 1289 } 1290 amp_addr = (uintptr_t)svd.amp; 1291 if (mdb_vread(&, sizeof (amp), amp_addr) == -1) { 1292 mdb_warn("segvn_anon walk: unable to read amp %p for " 1293 "segvn_data %p", amp_addr, svd_addr); 1294 return (WALK_ERR); 1295 } 1296 seg_addr = (uintptr_t)svd.seg; 1297 if (mdb_vread(&seg, sizeof (seg), seg_addr) == -1) { 1298 mdb_warn("segvn_anon walk: unable to read seg %p for " 1299 "segvn_data %p", seg_addr, svd_addr); 1300 return (WALK_ERR); 1301 } 1302 if ((seg.s_size + (svd.anon_index << PAGESHIFT)) > amp.size) { 1303 mdb_warn("anon map %p is too small for segment %p\n", 1304 amp_addr, seg_addr); 1305 return (WALK_ERR); 1306 } 1307 1308 wsp->walk_addr = amp_addr; 1309 return (anon_walk_init_common(wsp, 1310 svd.anon_index, svd.anon_index + (seg.s_size >> PAGESHIFT))); 1311 } 1312 1313 1314 typedef struct { 1315 u_offset_t svs_offset; 1316 uintptr_t svs_page; 1317 } segvn_sparse_t; 1318 #define SEGVN_MAX_SPARSE ((128 * 1024) / sizeof (segvn_sparse_t)) 1319 1320 typedef struct { 1321 uintptr_t svw_svdp; 1322 struct segvn_data svw_svd; 1323 struct seg svw_seg; 1324 size_t svw_walkoff; 1325 ulong_t svw_anonskip; 1326 segvn_sparse_t *svw_sparse; 1327 size_t svw_sparse_idx; 1328 size_t svw_sparse_count; 1329 size_t svw_sparse_size; 1330 uint8_t svw_sparse_overflow; 1331 uint8_t svw_all; 1332 } segvn_walk_data_t; 1333 1334 static int 1335 segvn_sparse_fill(uintptr_t addr, const void *pp_arg, void *arg) 1336 { 1337 segvn_walk_data_t *const svw = arg; 1338 const page_t *const pp = pp_arg; 1339 const u_offset_t offset = pp->p_offset; 1340 segvn_sparse_t *const cur = 1341 &svw->svw_sparse[svw->svw_sparse_count]; 1342 1343 /* See if the page is of interest */ 1344 if ((u_offset_t)(offset - svw->svw_svd.offset) >= svw->svw_seg.s_size) { 1345 return (WALK_NEXT); 1346 } 1347 /* See if we have space for the new entry, then add it. */ 1348 if (svw->svw_sparse_count >= svw->svw_sparse_size) { 1349 svw->svw_sparse_overflow = 1; 1350 return (WALK_DONE); 1351 } 1352 svw->svw_sparse_count++; 1353 cur->svs_offset = offset; 1354 cur->svs_page = addr; 1355 return (WALK_NEXT); 1356 } 1357 1358 static int 1359 segvn_sparse_cmp(const void *lp, const void *rp) 1360 { 1361 const segvn_sparse_t *const l = lp; 1362 const segvn_sparse_t *const r = rp; 1363 1364 if (l->svs_offset < r->svs_offset) { 1365 return (-1); 1366 } 1367 if (l->svs_offset > r->svs_offset) { 1368 return (1); 1369 } 1370 return (0); 1371 } 1372 1373 /* 1374 * Builds on the "anon_all" walker to walk all resident pages in a segvn_data 1375 * structure. For segvn_datas without an anon structure, it just looks up 1376 * pages in the vnode. For segvn_datas with an anon structure, NULL slots 1377 * pass through to the vnode, and non-null slots are checked for residency. 1378 */ 1379 int 1380 segvn_pages_walk_init(mdb_walk_state_t *wsp) 1381 { 1382 segvn_walk_data_t *svw; 1383 struct segvn_data *svd; 1384 1385 if (wsp->walk_addr == NULL) { 1386 mdb_warn("segvn walk doesn't support global walks\n"); 1387 return (WALK_ERR); 1388 } 1389 1390 svw = mdb_zalloc(sizeof (*svw), UM_SLEEP); 1391 svw->svw_svdp = wsp->walk_addr; 1392 svw->svw_anonskip = 0; 1393 svw->svw_sparse_idx = 0; 1394 svw->svw_walkoff = 0; 1395 svw->svw_all = (wsp->walk_arg == SEGVN_PAGES_ALL); 1396 1397 if (mdb_vread(&svw->svw_svd, sizeof (svw->svw_svd), wsp->walk_addr) == 1398 -1) { 1399 mdb_warn("failed to read segvn_data at %p", wsp->walk_addr); 1400 mdb_free(svw, sizeof (*svw)); 1401 return (WALK_ERR); 1402 } 1403 1404 svd = &svw->svw_svd; 1405 if (mdb_vread(&svw->svw_seg, sizeof (svw->svw_seg), 1406 (uintptr_t)svd->seg) == -1) { 1407 mdb_warn("failed to read seg at %p (from %p)", 1408 svd->seg, &((struct segvn_data *)(wsp->walk_addr))->seg); 1409 mdb_free(svw, sizeof (*svw)); 1410 return (WALK_ERR); 1411 } 1412 1413 if (svd->amp == NULL && svd->vp == NULL) { 1414 /* make the walk terminate immediately; no pages */ 1415 svw->svw_walkoff = svw->svw_seg.s_size; 1416 1417 } else if (svd->amp == NULL && 1418 (svw->svw_seg.s_size >> PAGESHIFT) >= SEGVN_MAX_SPARSE) { 1419 /* 1420 * If we don't have an anon pointer, and the segment is large, 1421 * we try to load the in-memory pages into a fixed-size array, 1422 * which is then sorted and reported directly. This is much 1423 * faster than doing a mdb_page_lookup() for each possible 1424 * offset. 1425 * 1426 * If the allocation fails, or there are too many pages 1427 * in-core, we fall back to looking up the pages individually. 1428 */ 1429 svw->svw_sparse = mdb_alloc( 1430 SEGVN_MAX_SPARSE * sizeof (*svw->svw_sparse), UM_NOSLEEP); 1431 if (svw->svw_sparse != NULL) { 1432 svw->svw_sparse_size = SEGVN_MAX_SPARSE; 1433 1434 if (mdb_pwalk("page", segvn_sparse_fill, svw, 1435 (uintptr_t)svd->vp) == -1 || 1436 svw->svw_sparse_overflow) { 1437 mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE * 1438 sizeof (*svw->svw_sparse)); 1439 svw->svw_sparse = NULL; 1440 } else { 1441 qsort(svw->svw_sparse, svw->svw_sparse_count, 1442 sizeof (*svw->svw_sparse), 1443 segvn_sparse_cmp); 1444 } 1445 } 1446 1447 } else if (svd->amp != NULL) { 1448 const char *const layer = (!svw->svw_all && svd->vp == NULL) ? 1449 "segvn_anon" : "segvn_anon_all"; 1450 /* 1451 * If we're not printing all offsets, and the segvn_data has 1452 * no backing VP, we can use the "segvn_anon" walker, which 1453 * efficiently skips NULL slots. 1454 * 1455 * Otherwise, we layer over the "segvn_anon_all" walker 1456 * (which reports all anon slots, even NULL ones), so that 1457 * segvn_pages_walk_step() knows the precise offset for each 1458 * element. It uses that offset information to look up the 1459 * backing pages for NULL anon slots. 1460 */ 1461 if (mdb_layered_walk(layer, wsp) == -1) { 1462 mdb_warn("segvn_pages: failed to layer \"%s\" " 1463 "for segvn_data %p", layer, svw->svw_svdp); 1464 mdb_free(svw, sizeof (*svw)); 1465 return (WALK_ERR); 1466 } 1467 } 1468 1469 wsp->walk_data = svw; 1470 return (WALK_NEXT); 1471 } 1472 1473 int 1474 segvn_pages_walk_step(mdb_walk_state_t *wsp) 1475 { 1476 segvn_walk_data_t *const svw = wsp->walk_data; 1477 struct seg *const seg = &svw->svw_seg; 1478 struct segvn_data *const svd = &svw->svw_svd; 1479 uintptr_t pp; 1480 page_t page; 1481 1482 /* If we've walked off the end of the segment, we're done. */ 1483 if (svw->svw_walkoff >= seg->s_size) { 1484 return (WALK_DONE); 1485 } 1486 1487 /* 1488 * If we've got a sparse page array, just send it directly. 1489 */ 1490 if (svw->svw_sparse != NULL) { 1491 u_offset_t off; 1492 1493 if (svw->svw_sparse_idx >= svw->svw_sparse_count) { 1494 pp = NULL; 1495 if (!svw->svw_all) { 1496 return (WALK_DONE); 1497 } 1498 } else { 1499 segvn_sparse_t *const svs = 1500 &svw->svw_sparse[svw->svw_sparse_idx]; 1501 off = svs->svs_offset - svd->offset; 1502 if (svw->svw_all && svw->svw_walkoff != off) { 1503 pp = NULL; 1504 } else { 1505 pp = svs->svs_page; 1506 svw->svw_sparse_idx++; 1507 } 1508 } 1509 1510 } else if (svd->amp == NULL || wsp->walk_addr == NULL) { 1511 /* 1512 * If there's no anon, or the anon slot is NULL, look up 1513 * <vp, offset>. 1514 */ 1515 if (svd->vp != NULL) { 1516 pp = mdb_page_lookup((uintptr_t)svd->vp, 1517 svd->offset + svw->svw_walkoff); 1518 } else { 1519 pp = NULL; 1520 } 1521 1522 } else { 1523 const struct anon *const anon = wsp->walk_layer; 1524 1525 /* 1526 * We have a "struct anon"; if it's not swapped out, 1527 * look up the page. 1528 */ 1529 if (anon->an_vp != NULL || anon->an_off != 0) { 1530 pp = mdb_page_lookup((uintptr_t)anon->an_vp, 1531 anon->an_off); 1532 if (pp == 0 && mdb_get_state() != MDB_STATE_RUNNING) { 1533 mdb_warn("walk segvn_pages: segvn_data %p " 1534 "offset %ld, anon page <%p, %llx> not " 1535 "found.\n", svw->svw_svdp, svw->svw_walkoff, 1536 anon->an_vp, anon->an_off); 1537 } 1538 } else { 1539 if (anon->an_pvp == NULL) { 1540 mdb_warn("walk segvn_pages: useless struct " 1541 "anon at %p\n", wsp->walk_addr); 1542 } 1543 pp = NULL; /* nothing at this offset */ 1544 } 1545 } 1546 1547 svw->svw_walkoff += PAGESIZE; /* Update for the next call */ 1548 if (pp != NULL) { 1549 if (mdb_vread(&page, sizeof (page_t), pp) == -1) { 1550 mdb_warn("unable to read page_t at %#lx", pp); 1551 return (WALK_ERR); 1552 } 1553 return (wsp->walk_callback(pp, &page, wsp->walk_cbdata)); 1554 } 1555 if (svw->svw_all) { 1556 return (wsp->walk_callback(NULL, NULL, wsp->walk_cbdata)); 1557 } 1558 return (WALK_NEXT); 1559 } 1560 1561 void 1562 segvn_pages_walk_fini(mdb_walk_state_t *wsp) 1563 { 1564 segvn_walk_data_t *const svw = wsp->walk_data; 1565 1566 if (svw->svw_sparse != NULL) { 1567 mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE * 1568 sizeof (*svw->svw_sparse)); 1569 } 1570 mdb_free(svw, sizeof (*svw)); 1571 } 1572 1573 /* 1574 * Grumble, grumble. 1575 */ 1576 #define SMAP_HASHFUNC(vp, off) \ 1577 ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \ 1578 ((off) >> MAXBSHIFT)) & smd_hashmsk) 1579 1580 int 1581 vnode2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1582 { 1583 long smd_hashmsk; 1584 int hash; 1585 uintptr_t offset = 0; 1586 struct smap smp; 1587 uintptr_t saddr, kaddr; 1588 uintptr_t smd_hash, smd_smap; 1589 struct seg seg; 1590 1591 if (!(flags & DCMD_ADDRSPEC)) 1592 return (DCMD_USAGE); 1593 1594 if (mdb_readvar(&smd_hashmsk, "smd_hashmsk") == -1) { 1595 mdb_warn("failed to read smd_hashmsk"); 1596 return (DCMD_ERR); 1597 } 1598 1599 if (mdb_readvar(&smd_hash, "smd_hash") == -1) { 1600 mdb_warn("failed to read smd_hash"); 1601 return (DCMD_ERR); 1602 } 1603 1604 if (mdb_readvar(&smd_smap, "smd_smap") == -1) { 1605 mdb_warn("failed to read smd_hash"); 1606 return (DCMD_ERR); 1607 } 1608 1609 if (mdb_readvar(&kaddr, "segkmap") == -1) { 1610 mdb_warn("failed to read segkmap"); 1611 return (DCMD_ERR); 1612 } 1613 1614 if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) { 1615 mdb_warn("failed to read segkmap at %p", kaddr); 1616 return (DCMD_ERR); 1617 } 1618 1619 if (argc != 0) { 1620 const mdb_arg_t *arg = &argv[0]; 1621 1622 if (arg->a_type == MDB_TYPE_IMMEDIATE) 1623 offset = arg->a_un.a_val; 1624 else 1625 offset = (uintptr_t)mdb_strtoull(arg->a_un.a_str); 1626 } 1627 1628 hash = SMAP_HASHFUNC(addr, offset); 1629 1630 if (mdb_vread(&saddr, sizeof (saddr), 1631 smd_hash + hash * sizeof (uintptr_t)) == -1) { 1632 mdb_warn("couldn't read smap at %p", 1633 smd_hash + hash * sizeof (uintptr_t)); 1634 return (DCMD_ERR); 1635 } 1636 1637 do { 1638 if (mdb_vread(&smp, sizeof (smp), saddr) == -1) { 1639 mdb_warn("couldn't read smap at %p", saddr); 1640 return (DCMD_ERR); 1641 } 1642 1643 if ((uintptr_t)smp.sm_vp == addr && smp.sm_off == offset) { 1644 mdb_printf("vnode %p, offs %p is smap %p, vaddr %p\n", 1645 addr, offset, saddr, ((saddr - smd_smap) / 1646 sizeof (smp)) * MAXBSIZE + seg.s_base); 1647 return (DCMD_OK); 1648 } 1649 1650 saddr = (uintptr_t)smp.sm_hash; 1651 } while (saddr != NULL); 1652 1653 mdb_printf("no smap for vnode %p, offs %p\n", addr, offset); 1654 return (DCMD_OK); 1655 } 1656 1657 /*ARGSUSED*/ 1658 int 1659 addr2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1660 { 1661 uintptr_t kaddr; 1662 struct seg seg; 1663 struct segmap_data sd; 1664 1665 if (!(flags & DCMD_ADDRSPEC)) 1666 return (DCMD_USAGE); 1667 1668 if (mdb_readvar(&kaddr, "segkmap") == -1) { 1669 mdb_warn("failed to read segkmap"); 1670 return (DCMD_ERR); 1671 } 1672 1673 if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) { 1674 mdb_warn("failed to read segkmap at %p", kaddr); 1675 return (DCMD_ERR); 1676 } 1677 1678 if (mdb_vread(&sd, sizeof (sd), (uintptr_t)seg.s_data) == -1) { 1679 mdb_warn("failed to read segmap_data at %p", seg.s_data); 1680 return (DCMD_ERR); 1681 } 1682 1683 mdb_printf("%p is smap %p\n", addr, 1684 ((addr - (uintptr_t)seg.s_base) >> MAXBSHIFT) * 1685 sizeof (struct smap) + (uintptr_t)sd.smd_sm); 1686 1687 return (DCMD_OK); 1688 } 1689