1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <mdb/mdb_param.h>
26 #include <mdb/mdb_modapi.h>
27 #include <mdb/mdb_ks.h>
28 #include <sys/types.h>
29 #include <sys/memlist.h>
30 #include <sys/swap.h>
31 #include <sys/systm.h>
32 #include <sys/thread.h>
33 #include <vm/anon.h>
34 #include <vm/as.h>
35 #include <vm/page.h>
36 #include <sys/thread.h>
37 #include <sys/swap.h>
38 #include <sys/memlist.h>
39 #include <sys/vnode.h>
40 #include <vm/seg_map.h>
41 #include <vm/seg_vn.h>
42 #if defined(__i386) || defined(__amd64)
43 #include <sys/balloon_impl.h>
44 #endif
45
46 #include "avl.h"
47 #include "memory.h"
48
49 /*
50 * Page walker.
51 * By default, this will walk all pages in the system. If given an
52 * address, it will walk all pages belonging to the vnode at that
53 * address.
54 */
55
56 /*
57 * page_walk_data
58 *
59 * pw_hashleft is set to -1 when walking a vnode's pages, and holds the
60 * number of hash locations remaining in the page hash table when
61 * walking all pages.
62 *
63 * The astute reader will notice that pw_hashloc is only used when
64 * reading all pages (to hold a pointer to our location in the page
65 * hash table), and that pw_first is only used when reading the pages
66 * belonging to a particular vnode (to hold a pointer to the first
67 * page). While these could be combined to be a single pointer, they
68 * are left separate for clarity.
69 */
70 typedef struct page_walk_data {
71 long pw_hashleft;
72 void **pw_hashloc;
73 uintptr_t pw_first;
74 } page_walk_data_t;
75
76 int
page_walk_init(mdb_walk_state_t * wsp)77 page_walk_init(mdb_walk_state_t *wsp)
78 {
79 page_walk_data_t *pwd;
80 void **ptr;
81 size_t hashsz;
82 vnode_t vn;
83
84 if (wsp->walk_addr == NULL) {
85
86 /*
87 * Walk all pages
88 */
89
90 if ((mdb_readvar(&ptr, "page_hash") == -1) ||
91 (mdb_readvar(&hashsz, "page_hashsz") == -1) ||
92 (ptr == NULL) || (hashsz == 0)) {
93 mdb_warn("page_hash, page_hashsz not found or invalid");
94 return (WALK_ERR);
95 }
96
97 /*
98 * Since we are walking all pages, initialize hashleft
99 * to be the remaining number of entries in the page
100 * hash. hashloc is set the start of the page hash
101 * table. Setting the walk address to 0 indicates that
102 * we aren't currently following a hash chain, and that
103 * we need to scan the page hash table for a page.
104 */
105 pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
106 pwd->pw_hashleft = hashsz;
107 pwd->pw_hashloc = ptr;
108 wsp->walk_addr = 0;
109 } else {
110
111 /*
112 * Walk just this vnode
113 */
114
115 if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) {
116 mdb_warn("unable to read vnode_t at %#lx",
117 wsp->walk_addr);
118 return (WALK_ERR);
119 }
120
121 /*
122 * We set hashleft to -1 to indicate that we are
123 * walking a vnode, and initialize first to 0 (it is
124 * used to terminate the walk, so it must not be set
125 * until after we have walked the first page). The
126 * walk address is set to the first page.
127 */
128 pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
129 pwd->pw_hashleft = -1;
130 pwd->pw_first = 0;
131
132 wsp->walk_addr = (uintptr_t)vn.v_pages;
133 }
134
135 wsp->walk_data = pwd;
136
137 return (WALK_NEXT);
138 }
139
140 int
page_walk_step(mdb_walk_state_t * wsp)141 page_walk_step(mdb_walk_state_t *wsp)
142 {
143 page_walk_data_t *pwd = wsp->walk_data;
144 page_t page;
145 uintptr_t pp;
146
147 pp = wsp->walk_addr;
148
149 if (pwd->pw_hashleft < 0) {
150
151 /* We're walking a vnode's pages */
152
153 /*
154 * If we don't have any pages to walk, we have come
155 * back around to the first one (we finished), or we
156 * can't read the page we're looking at, we are done.
157 */
158 if (pp == NULL || pp == pwd->pw_first)
159 return (WALK_DONE);
160 if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
161 mdb_warn("unable to read page_t at %#lx", pp);
162 return (WALK_ERR);
163 }
164
165 /*
166 * Set the walk address to the next page, and if the
167 * first page hasn't been set yet (i.e. we are on the
168 * first page), set it.
169 */
170 wsp->walk_addr = (uintptr_t)page.p_vpnext;
171 if (pwd->pw_first == NULL)
172 pwd->pw_first = pp;
173
174 } else if (pwd->pw_hashleft > 0) {
175
176 /* We're walking all pages */
177
178 /*
179 * If pp (the walk address) is NULL, we scan through
180 * the page hash table until we find a page.
181 */
182 if (pp == NULL) {
183
184 /*
185 * Iterate through the page hash table until we
186 * find a page or reach the end.
187 */
188 do {
189 if (mdb_vread(&pp, sizeof (uintptr_t),
190 (uintptr_t)pwd->pw_hashloc) == -1) {
191 mdb_warn("unable to read from %#p",
192 pwd->pw_hashloc);
193 return (WALK_ERR);
194 }
195 pwd->pw_hashleft--;
196 pwd->pw_hashloc++;
197 } while (pwd->pw_hashleft && (pp == NULL));
198
199 /*
200 * We've reached the end; exit.
201 */
202 if (pp == NULL)
203 return (WALK_DONE);
204 }
205
206 if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
207 mdb_warn("unable to read page_t at %#lx", pp);
208 return (WALK_ERR);
209 }
210
211 /*
212 * Set the walk address to the next page.
213 */
214 wsp->walk_addr = (uintptr_t)page.p_hash;
215
216 } else {
217 /* We've finished walking all pages. */
218 return (WALK_DONE);
219 }
220
221 return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
222 }
223
224 void
page_walk_fini(mdb_walk_state_t * wsp)225 page_walk_fini(mdb_walk_state_t *wsp)
226 {
227 mdb_free(wsp->walk_data, sizeof (page_walk_data_t));
228 }
229
230 /*
231 * allpages walks all pages in the system in order they appear in
232 * the memseg structure
233 */
234
235 #define PAGE_BUFFER 128
236
237 int
allpages_walk_init(mdb_walk_state_t * wsp)238 allpages_walk_init(mdb_walk_state_t *wsp)
239 {
240 if (wsp->walk_addr != 0) {
241 mdb_warn("allpages only supports global walks.\n");
242 return (WALK_ERR);
243 }
244
245 if (mdb_layered_walk("memseg", wsp) == -1) {
246 mdb_warn("couldn't walk 'memseg'");
247 return (WALK_ERR);
248 }
249
250 wsp->walk_data = mdb_alloc(sizeof (page_t) * PAGE_BUFFER, UM_SLEEP);
251 return (WALK_NEXT);
252 }
253
254 int
allpages_walk_step(mdb_walk_state_t * wsp)255 allpages_walk_step(mdb_walk_state_t *wsp)
256 {
257 const struct memseg *msp = wsp->walk_layer;
258 page_t *buf = wsp->walk_data;
259 size_t pg_read, i;
260 size_t pg_num = msp->pages_end - msp->pages_base;
261 const page_t *pg_addr = msp->pages;
262
263 while (pg_num > 0) {
264 pg_read = MIN(pg_num, PAGE_BUFFER);
265
266 if (mdb_vread(buf, pg_read * sizeof (page_t),
267 (uintptr_t)pg_addr) == -1) {
268 mdb_warn("can't read page_t's at %#lx", pg_addr);
269 return (WALK_ERR);
270 }
271 for (i = 0; i < pg_read; i++) {
272 int ret = wsp->walk_callback((uintptr_t)&pg_addr[i],
273 &buf[i], wsp->walk_cbdata);
274
275 if (ret != WALK_NEXT)
276 return (ret);
277 }
278 pg_num -= pg_read;
279 pg_addr += pg_read;
280 }
281
282 return (WALK_NEXT);
283 }
284
285 void
allpages_walk_fini(mdb_walk_state_t * wsp)286 allpages_walk_fini(mdb_walk_state_t *wsp)
287 {
288 mdb_free(wsp->walk_data, sizeof (page_t) * PAGE_BUFFER);
289 }
290
291 /*
292 * Hash table + LRU queue.
293 * This table is used to cache recently read vnodes for the memstat
294 * command, to reduce the number of mdb_vread calls. This greatly
295 * speeds the memstat command on on live, large CPU count systems.
296 */
297
298 #define VN_SMALL 401
299 #define VN_LARGE 10007
300 #define VN_HTABLE_KEY(p, hp) ((p) % ((hp)->vn_htable_buckets))
301
302 struct vn_htable_list {
303 uint_t vn_flag; /* v_flag from vnode */
304 uintptr_t vn_ptr; /* pointer to vnode */
305 struct vn_htable_list *vn_q_next; /* queue next pointer */
306 struct vn_htable_list *vn_q_prev; /* queue prev pointer */
307 struct vn_htable_list *vn_h_next; /* hash table pointer */
308 };
309
310 /*
311 * vn_q_first -> points to to head of queue: the vnode that was most
312 * recently used
313 * vn_q_last -> points to the oldest used vnode, and is freed once a new
314 * vnode is read.
315 * vn_htable -> hash table
316 * vn_htable_buf -> contains htable objects
317 * vn_htable_size -> total number of items in the hash table
318 * vn_htable_buckets -> number of buckets in the hash table
319 */
320 typedef struct vn_htable {
321 struct vn_htable_list *vn_q_first;
322 struct vn_htable_list *vn_q_last;
323 struct vn_htable_list **vn_htable;
324 struct vn_htable_list *vn_htable_buf;
325 int vn_htable_size;
326 int vn_htable_buckets;
327 } vn_htable_t;
328
329
330 /* allocate memory, initilize hash table and LRU queue */
331 static void
vn_htable_init(vn_htable_t * hp,size_t vn_size)332 vn_htable_init(vn_htable_t *hp, size_t vn_size)
333 {
334 int i;
335 int htable_size = MAX(vn_size, VN_LARGE);
336
337 if ((hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
338 * htable_size, UM_NOSLEEP|UM_GC)) == NULL) {
339 htable_size = VN_SMALL;
340 hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
341 * htable_size, UM_SLEEP|UM_GC);
342 }
343
344 hp->vn_htable = mdb_zalloc(sizeof (struct vn_htable_list *)
345 * htable_size, UM_SLEEP|UM_GC);
346
347 hp->vn_q_first = &hp->vn_htable_buf[0];
348 hp->vn_q_last = &hp->vn_htable_buf[htable_size - 1];
349 hp->vn_q_first->vn_q_next = &hp->vn_htable_buf[1];
350 hp->vn_q_last->vn_q_prev = &hp->vn_htable_buf[htable_size - 2];
351
352 for (i = 1; i < (htable_size-1); i++) {
353 hp->vn_htable_buf[i].vn_q_next = &hp->vn_htable_buf[i + 1];
354 hp->vn_htable_buf[i].vn_q_prev = &hp->vn_htable_buf[i - 1];
355 }
356
357 hp->vn_htable_size = htable_size;
358 hp->vn_htable_buckets = htable_size;
359 }
360
361
362 /*
363 * Find the vnode whose address is ptr, and return its v_flag in vp->v_flag.
364 * The function tries to find needed information in the following order:
365 *
366 * 1. check if ptr is the first in queue
367 * 2. check if ptr is in hash table (if so move it to the top of queue)
368 * 3. do mdb_vread, remove last queue item from queue and hash table.
369 * Insert new information to freed object, and put this object in to the
370 * top of the queue.
371 */
372 static int
vn_get(vn_htable_t * hp,struct vnode * vp,uintptr_t ptr)373 vn_get(vn_htable_t *hp, struct vnode *vp, uintptr_t ptr)
374 {
375 int hkey;
376 struct vn_htable_list *hent, **htmp, *q_next, *q_prev;
377 struct vn_htable_list *q_first = hp->vn_q_first;
378
379 /* 1. vnode ptr is the first in queue, just get v_flag and return */
380 if (q_first->vn_ptr == ptr) {
381 vp->v_flag = q_first->vn_flag;
382
383 return (0);
384 }
385
386 /* 2. search the hash table for this ptr */
387 hkey = VN_HTABLE_KEY(ptr, hp);
388 hent = hp->vn_htable[hkey];
389 while (hent && (hent->vn_ptr != ptr))
390 hent = hent->vn_h_next;
391
392 /* 3. if hent is NULL, we did not find in hash table, do mdb_vread */
393 if (hent == NULL) {
394 struct vnode vn;
395
396 if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) {
397 mdb_warn("unable to read vnode_t at %#lx", ptr);
398 return (-1);
399 }
400
401 /* we will insert read data into the last element in queue */
402 hent = hp->vn_q_last;
403
404 /* remove last hp->vn_q_last object from hash table */
405 if (hent->vn_ptr) {
406 htmp = &hp->vn_htable[VN_HTABLE_KEY(hent->vn_ptr, hp)];
407 while (*htmp != hent)
408 htmp = &(*htmp)->vn_h_next;
409 *htmp = hent->vn_h_next;
410 }
411
412 /* insert data into new free object */
413 hent->vn_ptr = ptr;
414 hent->vn_flag = vn.v_flag;
415
416 /* insert new object into hash table */
417 hent->vn_h_next = hp->vn_htable[hkey];
418 hp->vn_htable[hkey] = hent;
419 }
420
421 /* Remove from queue. hent is not first, vn_q_prev is not NULL */
422 q_next = hent->vn_q_next;
423 q_prev = hent->vn_q_prev;
424 if (q_next == NULL)
425 hp->vn_q_last = q_prev;
426 else
427 q_next->vn_q_prev = q_prev;
428 q_prev->vn_q_next = q_next;
429
430 /* Add to the front of queue */
431 hent->vn_q_prev = NULL;
432 hent->vn_q_next = q_first;
433 q_first->vn_q_prev = hent;
434 hp->vn_q_first = hent;
435
436 /* Set v_flag in vnode pointer from hent */
437 vp->v_flag = hent->vn_flag;
438
439 return (0);
440 }
441
442 /* Summary statistics of pages */
443 typedef struct memstat {
444 struct vnode *ms_kvp; /* Cached address of kernel vnode */
445 struct vnode *ms_unused_vp; /* Unused pages vnode pointer */
446 struct vnode *ms_zvp; /* Cached address of zio vnode */
447 uint64_t ms_kmem; /* Pages of kernel memory */
448 uint64_t ms_zfs_data; /* Pages of zfs data */
449 uint64_t ms_anon; /* Pages of anonymous memory */
450 uint64_t ms_vnode; /* Pages of named (vnode) memory */
451 uint64_t ms_exec; /* Pages of exec/library memory */
452 uint64_t ms_cachelist; /* Pages on the cachelist (free) */
453 uint64_t ms_total; /* Pages on page hash */
454 vn_htable_t *ms_vn_htable; /* Pointer to hash table */
455 struct vnode ms_vn; /* vnode buffer */
456 } memstat_t;
457
458 #define MS_PP_ISKAS(pp, stats) \
459 ((pp)->p_vnode == (stats)->ms_kvp)
460
461 #define MS_PP_ISZFS_DATA(pp, stats) \
462 (((stats)->ms_zvp != NULL) && ((pp)->p_vnode == (stats)->ms_zvp))
463
464 /*
465 * Summarize pages by type and update stat information
466 */
467
468 /* ARGSUSED */
469 static int
memstat_callback(page_t * page,page_t * pp,memstat_t * stats)470 memstat_callback(page_t *page, page_t *pp, memstat_t *stats)
471 {
472 struct vnode *vp = &stats->ms_vn;
473
474 if (pp->p_vnode == NULL || pp->p_vnode == stats->ms_unused_vp)
475 return (WALK_NEXT);
476 else if (MS_PP_ISKAS(pp, stats))
477 stats->ms_kmem++;
478 else if (MS_PP_ISZFS_DATA(pp, stats))
479 stats->ms_zfs_data++;
480 else if (PP_ISFREE(pp))
481 stats->ms_cachelist++;
482 else if (vn_get(stats->ms_vn_htable, vp, (uintptr_t)pp->p_vnode))
483 return (WALK_ERR);
484 else if (IS_SWAPFSVP(vp))
485 stats->ms_anon++;
486 else if ((vp->v_flag & VVMEXEC) != 0)
487 stats->ms_exec++;
488 else
489 stats->ms_vnode++;
490
491 stats->ms_total++;
492
493 return (WALK_NEXT);
494 }
495
496 /* ARGSUSED */
497 int
memstat(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)498 memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
499 {
500 pgcnt_t total_pages, physmem;
501 ulong_t freemem;
502 memstat_t stats;
503 GElf_Sym sym;
504 vn_htable_t ht;
505 struct vnode *kvps;
506 uintptr_t vn_size = 0;
507 #if defined(__i386) || defined(__amd64)
508 bln_stats_t bln_stats;
509 ssize_t bln_size;
510 #endif
511
512 bzero(&stats, sizeof (memstat_t));
513
514 /*
515 * -s size, is an internal option. It specifies the size of vn_htable.
516 * Hash table size is set in the following order:
517 * If user has specified the size that is larger than VN_LARGE: try it,
518 * but if malloc failed default to VN_SMALL. Otherwise try VN_LARGE, if
519 * failed to allocate default to VN_SMALL.
520 * For a better efficiency of hash table it is highly recommended to
521 * set size to a prime number.
522 */
523 if ((flags & DCMD_ADDRSPEC) || mdb_getopts(argc, argv,
524 's', MDB_OPT_UINTPTR, &vn_size, NULL) != argc)
525 return (DCMD_USAGE);
526
527 /* Initialize vnode hash list and queue */
528 vn_htable_init(&ht, vn_size);
529 stats.ms_vn_htable = &ht;
530
531 /* Total physical memory */
532 if (mdb_readvar(&total_pages, "total_pages") == -1) {
533 mdb_warn("unable to read total_pages");
534 return (DCMD_ERR);
535 }
536
537 /* Artificially limited memory */
538 if (mdb_readvar(&physmem, "physmem") == -1) {
539 mdb_warn("unable to read physmem");
540 return (DCMD_ERR);
541 }
542
543 /* read kernel vnode array pointer */
544 if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvps",
545 (GElf_Sym *)&sym) == -1) {
546 mdb_warn("unable to read kvps");
547 return (DCMD_ERR);
548 }
549 kvps = (struct vnode *)(uintptr_t)sym.st_value;
550 stats.ms_kvp = &kvps[KV_KVP];
551
552 /*
553 * Read the zio vnode pointer.
554 */
555 stats.ms_zvp = &kvps[KV_ZVP];
556
557 /*
558 * If physmem != total_pages, then the administrator has limited the
559 * number of pages available in the system. Excluded pages are
560 * associated with the unused pages vnode. Read this vnode so the
561 * pages can be excluded in the page accounting.
562 */
563 if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp",
564 (GElf_Sym *)&sym) == -1) {
565 mdb_warn("unable to read unused_pages_vp");
566 return (DCMD_ERR);
567 }
568 stats.ms_unused_vp = (struct vnode *)(uintptr_t)sym.st_value;
569
570 /* walk all pages, collect statistics */
571 if (mdb_walk("allpages", (mdb_walk_cb_t)memstat_callback,
572 &stats) == -1) {
573 mdb_warn("can't walk memseg");
574 return (DCMD_ERR);
575 }
576
577 #define MS_PCT_TOTAL(x) ((ulong_t)((((5 * total_pages) + ((x) * 1000ull))) / \
578 ((physmem) * 10)))
579
580 mdb_printf("Page Summary Pages MB"
581 " %%Tot\n");
582 mdb_printf("------------ ---------------- ----------------"
583 " ----\n");
584 mdb_printf("Kernel %16llu %16llu %3lu%%\n",
585 stats.ms_kmem,
586 (uint64_t)stats.ms_kmem * PAGESIZE / (1024 * 1024),
587 MS_PCT_TOTAL(stats.ms_kmem));
588
589 if (stats.ms_zfs_data != 0)
590 mdb_printf("ZFS File Data %16llu %16llu %3lu%%\n",
591 stats.ms_zfs_data,
592 (uint64_t)stats.ms_zfs_data * PAGESIZE / (1024 * 1024),
593 MS_PCT_TOTAL(stats.ms_zfs_data));
594
595 mdb_printf("Anon %16llu %16llu %3lu%%\n",
596 stats.ms_anon,
597 (uint64_t)stats.ms_anon * PAGESIZE / (1024 * 1024),
598 MS_PCT_TOTAL(stats.ms_anon));
599 mdb_printf("Exec and libs %16llu %16llu %3lu%%\n",
600 stats.ms_exec,
601 (uint64_t)stats.ms_exec * PAGESIZE / (1024 * 1024),
602 MS_PCT_TOTAL(stats.ms_exec));
603 mdb_printf("Page cache %16llu %16llu %3lu%%\n",
604 stats.ms_vnode,
605 (uint64_t)stats.ms_vnode * PAGESIZE / (1024 * 1024),
606 MS_PCT_TOTAL(stats.ms_vnode));
607 mdb_printf("Free (cachelist) %16llu %16llu %3lu%%\n",
608 stats.ms_cachelist,
609 (uint64_t)stats.ms_cachelist * PAGESIZE / (1024 * 1024),
610 MS_PCT_TOTAL(stats.ms_cachelist));
611
612 /*
613 * occasionally, we double count pages above. To avoid printing
614 * absurdly large values for freemem, we clamp it at zero.
615 */
616 if (physmem > stats.ms_total)
617 freemem = physmem - stats.ms_total;
618 else
619 freemem = 0;
620
621 #if defined(__i386) || defined(__amd64)
622 /* Are we running under Xen? If so, get balloon memory usage. */
623 if ((bln_size = mdb_readvar(&bln_stats, "bln_stats")) != -1) {
624 if (freemem > bln_stats.bln_hv_pages)
625 freemem -= bln_stats.bln_hv_pages;
626 else
627 freemem = 0;
628 }
629 #endif
630
631 mdb_printf("Free (freelist) %16lu %16llu %3lu%%\n", freemem,
632 (uint64_t)freemem * PAGESIZE / (1024 * 1024),
633 MS_PCT_TOTAL(freemem));
634
635 #if defined(__i386) || defined(__amd64)
636 if (bln_size != -1) {
637 mdb_printf("Balloon %16lu %16llu %3lu%%\n",
638 bln_stats.bln_hv_pages,
639 (uint64_t)bln_stats.bln_hv_pages * PAGESIZE / (1024 * 1024),
640 MS_PCT_TOTAL(bln_stats.bln_hv_pages));
641 }
642 #endif
643
644 mdb_printf("\nTotal %16lu %16lu\n",
645 physmem,
646 (uint64_t)physmem * PAGESIZE / (1024 * 1024));
647
648 if (physmem != total_pages) {
649 mdb_printf("Physical %16lu %16lu\n",
650 total_pages,
651 (uint64_t)total_pages * PAGESIZE / (1024 * 1024));
652 }
653
654 #undef MS_PCT_TOTAL
655
656 return (DCMD_OK);
657 }
658
659 void
pagelookup_help(void)660 pagelookup_help(void)
661 {
662 mdb_printf(
663 "Finds the page with name { %<b>vp%</b>, %<b>offset%</b> }.\n"
664 "\n"
665 "Can be invoked three different ways:\n\n"
666 " ::pagelookup -v %<b>vp%</b> -o %<b>offset%</b>\n"
667 " %<b>vp%</b>::pagelookup -o %<b>offset%</b>\n"
668 " %<b>offset%</b>::pagelookup -v %<b>vp%</b>\n"
669 "\n"
670 "The latter two forms are useful in pipelines.\n");
671 }
672
673 int
pagelookup(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)674 pagelookup(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
675 {
676 uintptr_t vp = -(uintptr_t)1;
677 uint64_t offset = -(uint64_t)1;
678
679 uintptr_t pageaddr;
680 int hasaddr = (flags & DCMD_ADDRSPEC);
681 int usedaddr = 0;
682
683 if (mdb_getopts(argc, argv,
684 'v', MDB_OPT_UINTPTR, &vp,
685 'o', MDB_OPT_UINT64, &offset,
686 0) != argc) {
687 return (DCMD_USAGE);
688 }
689
690 if (vp == -(uintptr_t)1) {
691 if (offset == -(uint64_t)1) {
692 mdb_warn(
693 "pagelookup: at least one of -v vp or -o offset "
694 "required.\n");
695 return (DCMD_USAGE);
696 }
697 vp = addr;
698 usedaddr = 1;
699 } else if (offset == -(uint64_t)1) {
700 offset = mdb_get_dot();
701 usedaddr = 1;
702 }
703 if (usedaddr && !hasaddr) {
704 mdb_warn("pagelookup: address required\n");
705 return (DCMD_USAGE);
706 }
707 if (!usedaddr && hasaddr) {
708 mdb_warn(
709 "pagelookup: address specified when both -v and -o were "
710 "passed");
711 return (DCMD_USAGE);
712 }
713
714 pageaddr = mdb_page_lookup(vp, offset);
715 if (pageaddr == 0) {
716 mdb_warn("pagelookup: no page for {vp = %p, offset = %llp)\n",
717 vp, offset);
718 return (DCMD_OK);
719 }
720 mdb_printf("%#lr\n", pageaddr); /* this is PIPE_OUT friendly */
721 return (DCMD_OK);
722 }
723
724 /*ARGSUSED*/
725 int
page_num2pp(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)726 page_num2pp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
727 {
728 uintptr_t pp;
729
730 if (argc != 0 || !(flags & DCMD_ADDRSPEC)) {
731 return (DCMD_USAGE);
732 }
733
734 pp = mdb_pfn2page((pfn_t)addr);
735 if (pp == 0) {
736 return (DCMD_ERR);
737 }
738
739 if (flags & DCMD_PIPE_OUT) {
740 mdb_printf("%#lr\n", pp);
741 } else {
742 mdb_printf("%lx has page_t at %#lx\n", (pfn_t)addr, pp);
743 }
744
745 return (DCMD_OK);
746 }
747
748 int
page(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)749 page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
750 {
751 page_t p;
752
753 if (!(flags & DCMD_ADDRSPEC)) {
754 if (mdb_walk_dcmd("page", "page", argc, argv) == -1) {
755 mdb_warn("can't walk pages");
756 return (DCMD_ERR);
757 }
758 return (DCMD_OK);
759 }
760
761 if (DCMD_HDRSPEC(flags)) {
762 mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n",
763 "PAGE", "VNODE", "OFFSET", "SELOCK",
764 "LCT", "COW", "IO", "FS", "ST");
765 }
766
767 if (mdb_vread(&p, sizeof (page_t), addr) == -1) {
768 mdb_warn("can't read page_t at %#lx", addr);
769 return (DCMD_ERR);
770 }
771
772 mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n",
773 addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt,
774 p.p_iolock_state, p.p_fsdata, p.p_state);
775
776 return (DCMD_OK);
777 }
778
779 int
swap_walk_init(mdb_walk_state_t * wsp)780 swap_walk_init(mdb_walk_state_t *wsp)
781 {
782 void *ptr;
783
784 if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) {
785 mdb_warn("swapinfo not found or invalid");
786 return (WALK_ERR);
787 }
788
789 wsp->walk_addr = (uintptr_t)ptr;
790
791 return (WALK_NEXT);
792 }
793
794 int
swap_walk_step(mdb_walk_state_t * wsp)795 swap_walk_step(mdb_walk_state_t *wsp)
796 {
797 uintptr_t sip;
798 struct swapinfo si;
799
800 sip = wsp->walk_addr;
801
802 if (sip == NULL)
803 return (WALK_DONE);
804
805 if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) {
806 mdb_warn("unable to read swapinfo at %#lx", sip);
807 return (WALK_ERR);
808 }
809
810 wsp->walk_addr = (uintptr_t)si.si_next;
811
812 return (wsp->walk_callback(sip, &si, wsp->walk_cbdata));
813 }
814
815 int
swapinfof(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)816 swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
817 {
818 struct swapinfo si;
819 char *name;
820
821 if (!(flags & DCMD_ADDRSPEC)) {
822 if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) {
823 mdb_warn("can't walk swapinfo");
824 return (DCMD_ERR);
825 }
826 return (DCMD_OK);
827 }
828
829 if (DCMD_HDRSPEC(flags)) {
830 mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n",
831 "ADDR", "VNODE", "PAGES", "FREE", "NAME");
832 }
833
834 if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) {
835 mdb_warn("can't read swapinfo at %#lx", addr);
836 return (DCMD_ERR);
837 }
838
839 name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC);
840 if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1)
841 name = "*error*";
842
843 mdb_printf("%0?lx %?p %9d %9d %s\n",
844 addr, si.si_vp, si.si_npgs, si.si_nfpgs, name);
845
846 return (DCMD_OK);
847 }
848
849 int
memlist_walk_step(mdb_walk_state_t * wsp)850 memlist_walk_step(mdb_walk_state_t *wsp)
851 {
852 uintptr_t mlp;
853 struct memlist ml;
854
855 mlp = wsp->walk_addr;
856
857 if (mlp == NULL)
858 return (WALK_DONE);
859
860 if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) {
861 mdb_warn("unable to read memlist at %#lx", mlp);
862 return (WALK_ERR);
863 }
864
865 wsp->walk_addr = (uintptr_t)ml.ml_next;
866
867 return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata));
868 }
869
870 int
memlist(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)871 memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
872 {
873 struct memlist ml;
874
875 if (!(flags & DCMD_ADDRSPEC)) {
876 uintptr_t ptr;
877 uint_t list = 0;
878 int i;
879 static const char *lists[] = {
880 "phys_install",
881 "phys_avail",
882 "virt_avail"
883 };
884
885 if (mdb_getopts(argc, argv,
886 'i', MDB_OPT_SETBITS, (1 << 0), &list,
887 'a', MDB_OPT_SETBITS, (1 << 1), &list,
888 'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc)
889 return (DCMD_USAGE);
890
891 if (!list)
892 list = 1;
893
894 for (i = 0; list; i++, list >>= 1) {
895 if (!(list & 1))
896 continue;
897 if ((mdb_readvar(&ptr, lists[i]) == -1) ||
898 (ptr == NULL)) {
899 mdb_warn("%s not found or invalid", lists[i]);
900 return (DCMD_ERR);
901 }
902
903 mdb_printf("%s:\n", lists[i]);
904 if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL,
905 ptr) == -1) {
906 mdb_warn("can't walk memlist");
907 return (DCMD_ERR);
908 }
909 }
910 return (DCMD_OK);
911 }
912
913 if (DCMD_HDRSPEC(flags))
914 mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE");
915
916 if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) {
917 mdb_warn("can't read memlist at %#lx", addr);
918 return (DCMD_ERR);
919 }
920
921 mdb_printf("%0?lx %16llx %16llx\n", addr, ml.ml_address, ml.ml_size);
922
923 return (DCMD_OK);
924 }
925
926 int
seg_walk_init(mdb_walk_state_t * wsp)927 seg_walk_init(mdb_walk_state_t *wsp)
928 {
929 if (wsp->walk_addr == NULL) {
930 mdb_warn("seg walk must begin at struct as *\n");
931 return (WALK_ERR);
932 }
933
934 /*
935 * this is really just a wrapper to AVL tree walk
936 */
937 wsp->walk_addr = (uintptr_t)&((struct as *)wsp->walk_addr)->a_segtree;
938 return (avl_walk_init(wsp));
939 }
940
941 /*ARGSUSED*/
942 int
seg(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)943 seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
944 {
945 struct seg s;
946
947 if (argc != 0)
948 return (DCMD_USAGE);
949
950 if ((flags & DCMD_LOOPFIRST) || !(flags & DCMD_LOOP)) {
951 mdb_printf("%<u>%?s %?s %?s %?s %s%</u>\n",
952 "SEG", "BASE", "SIZE", "DATA", "OPS");
953 }
954
955 if (mdb_vread(&s, sizeof (s), addr) == -1) {
956 mdb_warn("failed to read seg at %p", addr);
957 return (DCMD_ERR);
958 }
959
960 mdb_printf("%?p %?p %?lx %?p %a\n",
961 addr, s.s_base, s.s_size, s.s_data, s.s_ops);
962
963 return (DCMD_OK);
964 }
965
966 /*ARGSUSED*/
967 static int
pmap_walk_count_pages(uintptr_t addr,const void * data,void * out)968 pmap_walk_count_pages(uintptr_t addr, const void *data, void *out)
969 {
970 pgcnt_t *nres = out;
971
972 (*nres)++;
973
974 return (WALK_NEXT);
975 }
976
977 static int
pmap_walk_seg(uintptr_t addr,const struct seg * seg,uintptr_t segvn)978 pmap_walk_seg(uintptr_t addr, const struct seg *seg, uintptr_t segvn)
979 {
980
981 mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024);
982
983 if (segvn == (uintptr_t)seg->s_ops && seg->s_data != NULL) {
984 struct segvn_data svn;
985 pgcnt_t nres = 0;
986
987 svn.vp = NULL;
988 (void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data);
989
990 /*
991 * Use the segvn_pages walker to find all of the in-core pages
992 * for this mapping.
993 */
994 if (mdb_pwalk("segvn_pages", pmap_walk_count_pages, &nres,
995 (uintptr_t)seg->s_data) == -1) {
996 mdb_warn("failed to walk segvn_pages (s_data=%p)",
997 seg->s_data);
998 }
999 mdb_printf(" %7ldk", (nres * PAGESIZE) / 1024);
1000
1001 if (svn.vp != NULL) {
1002 char buf[29];
1003
1004 mdb_vnode2path((uintptr_t)svn.vp, buf, sizeof (buf));
1005 mdb_printf(" %s", buf);
1006 } else {
1007 mdb_printf(" [ anon ]");
1008 }
1009 } else {
1010 mdb_printf(" %8s [ &%a ]", "?", seg->s_ops);
1011 }
1012
1013 mdb_printf("\n");
1014 return (WALK_NEXT);
1015 }
1016
1017 static int
pmap_walk_seg_quick(uintptr_t addr,const struct seg * seg,uintptr_t segvn)1018 pmap_walk_seg_quick(uintptr_t addr, const struct seg *seg, uintptr_t segvn)
1019 {
1020 mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024);
1021
1022 if (segvn == (uintptr_t)seg->s_ops && seg->s_data != NULL) {
1023 struct segvn_data svn;
1024
1025 svn.vp = NULL;
1026 (void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data);
1027
1028 if (svn.vp != NULL) {
1029 mdb_printf(" %0?p", svn.vp);
1030 } else {
1031 mdb_printf(" [ anon ]");
1032 }
1033 } else {
1034 mdb_printf(" [ &%a ]", seg->s_ops);
1035 }
1036
1037 mdb_printf("\n");
1038 return (WALK_NEXT);
1039 }
1040
1041 /*ARGSUSED*/
1042 int
pmap(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)1043 pmap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1044 {
1045 uintptr_t segvn;
1046 proc_t proc;
1047 uint_t quick = FALSE;
1048 mdb_walk_cb_t cb = (mdb_walk_cb_t)pmap_walk_seg;
1049
1050 GElf_Sym sym;
1051
1052 if (!(flags & DCMD_ADDRSPEC))
1053 return (DCMD_USAGE);
1054
1055 if (mdb_getopts(argc, argv,
1056 'q', MDB_OPT_SETBITS, TRUE, &quick, NULL) != argc)
1057 return (DCMD_USAGE);
1058
1059 if (mdb_vread(&proc, sizeof (proc), addr) == -1) {
1060 mdb_warn("failed to read proc at %p", addr);
1061 return (DCMD_ERR);
1062 }
1063
1064 if (mdb_lookup_by_name("segvn_ops", &sym) == 0)
1065 segvn = (uintptr_t)sym.st_value;
1066 else
1067 segvn = NULL;
1068
1069 mdb_printf("%?s %?s %8s ", "SEG", "BASE", "SIZE");
1070
1071 if (quick) {
1072 mdb_printf("VNODE\n");
1073 cb = (mdb_walk_cb_t)pmap_walk_seg_quick;
1074 } else {
1075 mdb_printf("%8s %s\n", "RES", "PATH");
1076 }
1077
1078 if (mdb_pwalk("seg", cb, (void *)segvn, (uintptr_t)proc.p_as) == -1) {
1079 mdb_warn("failed to walk segments of as %p", proc.p_as);
1080 return (DCMD_ERR);
1081 }
1082
1083 return (DCMD_OK);
1084 }
1085
1086 typedef struct anon_walk_data {
1087 uintptr_t *aw_levone;
1088 uintptr_t *aw_levtwo;
1089 size_t aw_minslot;
1090 size_t aw_maxslot;
1091 pgcnt_t aw_nlevone;
1092 pgcnt_t aw_levone_ndx;
1093 size_t aw_levtwo_ndx;
1094 struct anon_map *aw_ampp;
1095 struct anon_map aw_amp;
1096 struct anon_hdr aw_ahp;
1097 int aw_all; /* report all anon pointers, even NULLs */
1098 } anon_walk_data_t;
1099
1100 int
anon_walk_init_common(mdb_walk_state_t * wsp,ulong_t minslot,ulong_t maxslot)1101 anon_walk_init_common(mdb_walk_state_t *wsp, ulong_t minslot, ulong_t maxslot)
1102 {
1103 anon_walk_data_t *aw;
1104
1105 if (wsp->walk_addr == NULL) {
1106 mdb_warn("anon walk doesn't support global walks\n");
1107 return (WALK_ERR);
1108 }
1109
1110 aw = mdb_alloc(sizeof (anon_walk_data_t), UM_SLEEP);
1111 aw->aw_ampp = (struct anon_map *)wsp->walk_addr;
1112
1113 if (mdb_vread(&aw->aw_amp, sizeof (aw->aw_amp), wsp->walk_addr) == -1) {
1114 mdb_warn("failed to read anon map at %p", wsp->walk_addr);
1115 mdb_free(aw, sizeof (anon_walk_data_t));
1116 return (WALK_ERR);
1117 }
1118
1119 if (mdb_vread(&aw->aw_ahp, sizeof (aw->aw_ahp),
1120 (uintptr_t)(aw->aw_amp.ahp)) == -1) {
1121 mdb_warn("failed to read anon hdr ptr at %p", aw->aw_amp.ahp);
1122 mdb_free(aw, sizeof (anon_walk_data_t));
1123 return (WALK_ERR);
1124 }
1125
1126 /* update min and maxslot with the given constraints */
1127 maxslot = MIN(maxslot, aw->aw_ahp.size);
1128 minslot = MIN(minslot, maxslot);
1129
1130 if (aw->aw_ahp.size <= ANON_CHUNK_SIZE ||
1131 (aw->aw_ahp.flags & ANON_ALLOC_FORCE)) {
1132 aw->aw_nlevone = maxslot;
1133 aw->aw_levone_ndx = minslot;
1134 aw->aw_levtwo = NULL;
1135 } else {
1136 aw->aw_nlevone =
1137 (maxslot + ANON_CHUNK_OFF) >> ANON_CHUNK_SHIFT;
1138 aw->aw_levone_ndx = 0;
1139 aw->aw_levtwo =
1140 mdb_zalloc(ANON_CHUNK_SIZE * sizeof (uintptr_t), UM_SLEEP);
1141 }
1142
1143 aw->aw_levone =
1144 mdb_alloc(aw->aw_nlevone * sizeof (uintptr_t), UM_SLEEP);
1145 aw->aw_all = (wsp->walk_arg == ANON_WALK_ALL);
1146
1147 mdb_vread(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t),
1148 (uintptr_t)aw->aw_ahp.array_chunk);
1149
1150 aw->aw_levtwo_ndx = 0;
1151 aw->aw_minslot = minslot;
1152 aw->aw_maxslot = maxslot;
1153
1154 out:
1155 wsp->walk_data = aw;
1156 return (0);
1157 }
1158
1159 int
anon_walk_step(mdb_walk_state_t * wsp)1160 anon_walk_step(mdb_walk_state_t *wsp)
1161 {
1162 anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data;
1163 struct anon anon;
1164 uintptr_t anonptr;
1165 ulong_t slot;
1166
1167 /*
1168 * Once we've walked through level one, we're done.
1169 */
1170 if (aw->aw_levone_ndx >= aw->aw_nlevone) {
1171 return (WALK_DONE);
1172 }
1173
1174 if (aw->aw_levtwo == NULL) {
1175 anonptr = aw->aw_levone[aw->aw_levone_ndx];
1176 aw->aw_levone_ndx++;
1177 } else {
1178 if (aw->aw_levtwo_ndx == 0) {
1179 uintptr_t levtwoptr;
1180
1181 /* The first time through, skip to our first index. */
1182 if (aw->aw_levone_ndx == 0) {
1183 aw->aw_levone_ndx =
1184 aw->aw_minslot / ANON_CHUNK_SIZE;
1185 aw->aw_levtwo_ndx =
1186 aw->aw_minslot % ANON_CHUNK_SIZE;
1187 }
1188
1189 levtwoptr = (uintptr_t)aw->aw_levone[aw->aw_levone_ndx];
1190
1191 if (levtwoptr == NULL) {
1192 if (!aw->aw_all) {
1193 aw->aw_levtwo_ndx = 0;
1194 aw->aw_levone_ndx++;
1195 return (WALK_NEXT);
1196 }
1197 bzero(aw->aw_levtwo,
1198 ANON_CHUNK_SIZE * sizeof (uintptr_t));
1199
1200 } else if (mdb_vread(aw->aw_levtwo,
1201 ANON_CHUNK_SIZE * sizeof (uintptr_t), levtwoptr) ==
1202 -1) {
1203 mdb_warn("unable to read anon_map %p's "
1204 "second-level map %d at %p",
1205 aw->aw_ampp, aw->aw_levone_ndx,
1206 levtwoptr);
1207 return (WALK_ERR);
1208 }
1209 }
1210 slot = aw->aw_levone_ndx * ANON_CHUNK_SIZE + aw->aw_levtwo_ndx;
1211 anonptr = aw->aw_levtwo[aw->aw_levtwo_ndx];
1212
1213 /* update the indices for next time */
1214 aw->aw_levtwo_ndx++;
1215 if (aw->aw_levtwo_ndx == ANON_CHUNK_SIZE) {
1216 aw->aw_levtwo_ndx = 0;
1217 aw->aw_levone_ndx++;
1218 }
1219
1220 /* make sure the slot # is in the requested range */
1221 if (slot >= aw->aw_maxslot) {
1222 return (WALK_DONE);
1223 }
1224 }
1225
1226 if (anonptr != NULL) {
1227 mdb_vread(&anon, sizeof (anon), anonptr);
1228 return (wsp->walk_callback(anonptr, &anon, wsp->walk_cbdata));
1229 }
1230 if (aw->aw_all) {
1231 return (wsp->walk_callback(NULL, NULL, wsp->walk_cbdata));
1232 }
1233 return (WALK_NEXT);
1234 }
1235
1236 void
anon_walk_fini(mdb_walk_state_t * wsp)1237 anon_walk_fini(mdb_walk_state_t *wsp)
1238 {
1239 anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data;
1240
1241 if (aw->aw_levtwo != NULL)
1242 mdb_free(aw->aw_levtwo, ANON_CHUNK_SIZE * sizeof (uintptr_t));
1243
1244 mdb_free(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t));
1245 mdb_free(aw, sizeof (anon_walk_data_t));
1246 }
1247
1248 int
anon_walk_init(mdb_walk_state_t * wsp)1249 anon_walk_init(mdb_walk_state_t *wsp)
1250 {
1251 return (anon_walk_init_common(wsp, 0, ULONG_MAX));
1252 }
1253
1254 int
segvn_anon_walk_init(mdb_walk_state_t * wsp)1255 segvn_anon_walk_init(mdb_walk_state_t *wsp)
1256 {
1257 const uintptr_t svd_addr = wsp->walk_addr;
1258 uintptr_t amp_addr;
1259 uintptr_t seg_addr;
1260 struct segvn_data svd;
1261 struct anon_map amp;
1262 struct seg seg;
1263
1264 if (svd_addr == NULL) {
1265 mdb_warn("segvn_anon walk doesn't support global walks\n");
1266 return (WALK_ERR);
1267 }
1268 if (mdb_vread(&svd, sizeof (svd), svd_addr) == -1) {
1269 mdb_warn("segvn_anon walk: unable to read segvn_data at %p",
1270 svd_addr);
1271 return (WALK_ERR);
1272 }
1273 if (svd.amp == NULL) {
1274 mdb_warn("segvn_anon walk: segvn_data at %p has no anon map\n",
1275 svd_addr);
1276 return (WALK_ERR);
1277 }
1278 amp_addr = (uintptr_t)svd.amp;
1279 if (mdb_vread(&, sizeof (amp), amp_addr) == -1) {
1280 mdb_warn("segvn_anon walk: unable to read amp %p for "
1281 "segvn_data %p", amp_addr, svd_addr);
1282 return (WALK_ERR);
1283 }
1284 seg_addr = (uintptr_t)svd.seg;
1285 if (mdb_vread(&seg, sizeof (seg), seg_addr) == -1) {
1286 mdb_warn("segvn_anon walk: unable to read seg %p for "
1287 "segvn_data %p", seg_addr, svd_addr);
1288 return (WALK_ERR);
1289 }
1290 if ((seg.s_size + (svd.anon_index << PAGESHIFT)) > amp.size) {
1291 mdb_warn("anon map %p is too small for segment %p\n",
1292 amp_addr, seg_addr);
1293 return (WALK_ERR);
1294 }
1295
1296 wsp->walk_addr = amp_addr;
1297 return (anon_walk_init_common(wsp,
1298 svd.anon_index, svd.anon_index + (seg.s_size >> PAGESHIFT)));
1299 }
1300
1301
1302 typedef struct {
1303 u_offset_t svs_offset;
1304 uintptr_t svs_page;
1305 } segvn_sparse_t;
1306 #define SEGVN_MAX_SPARSE ((128 * 1024) / sizeof (segvn_sparse_t))
1307
1308 typedef struct {
1309 uintptr_t svw_svdp;
1310 struct segvn_data svw_svd;
1311 struct seg svw_seg;
1312 size_t svw_walkoff;
1313 ulong_t svw_anonskip;
1314 segvn_sparse_t *svw_sparse;
1315 size_t svw_sparse_idx;
1316 size_t svw_sparse_count;
1317 size_t svw_sparse_size;
1318 uint8_t svw_sparse_overflow;
1319 uint8_t svw_all;
1320 } segvn_walk_data_t;
1321
1322 static int
segvn_sparse_fill(uintptr_t addr,const void * pp_arg,void * arg)1323 segvn_sparse_fill(uintptr_t addr, const void *pp_arg, void *arg)
1324 {
1325 segvn_walk_data_t *const svw = arg;
1326 const page_t *const pp = pp_arg;
1327 const u_offset_t offset = pp->p_offset;
1328 segvn_sparse_t *const cur =
1329 &svw->svw_sparse[svw->svw_sparse_count];
1330
1331 /* See if the page is of interest */
1332 if ((u_offset_t)(offset - svw->svw_svd.offset) >= svw->svw_seg.s_size) {
1333 return (WALK_NEXT);
1334 }
1335 /* See if we have space for the new entry, then add it. */
1336 if (svw->svw_sparse_count >= svw->svw_sparse_size) {
1337 svw->svw_sparse_overflow = 1;
1338 return (WALK_DONE);
1339 }
1340 svw->svw_sparse_count++;
1341 cur->svs_offset = offset;
1342 cur->svs_page = addr;
1343 return (WALK_NEXT);
1344 }
1345
1346 static int
segvn_sparse_cmp(const void * lp,const void * rp)1347 segvn_sparse_cmp(const void *lp, const void *rp)
1348 {
1349 const segvn_sparse_t *const l = lp;
1350 const segvn_sparse_t *const r = rp;
1351
1352 if (l->svs_offset < r->svs_offset) {
1353 return (-1);
1354 }
1355 if (l->svs_offset > r->svs_offset) {
1356 return (1);
1357 }
1358 return (0);
1359 }
1360
1361 /*
1362 * Builds on the "anon_all" walker to walk all resident pages in a segvn_data
1363 * structure. For segvn_datas without an anon structure, it just looks up
1364 * pages in the vnode. For segvn_datas with an anon structure, NULL slots
1365 * pass through to the vnode, and non-null slots are checked for residency.
1366 */
1367 int
segvn_pages_walk_init(mdb_walk_state_t * wsp)1368 segvn_pages_walk_init(mdb_walk_state_t *wsp)
1369 {
1370 segvn_walk_data_t *svw;
1371 struct segvn_data *svd;
1372
1373 if (wsp->walk_addr == NULL) {
1374 mdb_warn("segvn walk doesn't support global walks\n");
1375 return (WALK_ERR);
1376 }
1377
1378 svw = mdb_zalloc(sizeof (*svw), UM_SLEEP);
1379 svw->svw_svdp = wsp->walk_addr;
1380 svw->svw_anonskip = 0;
1381 svw->svw_sparse_idx = 0;
1382 svw->svw_walkoff = 0;
1383 svw->svw_all = (wsp->walk_arg == SEGVN_PAGES_ALL);
1384
1385 if (mdb_vread(&svw->svw_svd, sizeof (svw->svw_svd), wsp->walk_addr) ==
1386 -1) {
1387 mdb_warn("failed to read segvn_data at %p", wsp->walk_addr);
1388 mdb_free(svw, sizeof (*svw));
1389 return (WALK_ERR);
1390 }
1391
1392 svd = &svw->svw_svd;
1393 if (mdb_vread(&svw->svw_seg, sizeof (svw->svw_seg),
1394 (uintptr_t)svd->seg) == -1) {
1395 mdb_warn("failed to read seg at %p (from %p)",
1396 svd->seg, &((struct segvn_data *)(wsp->walk_addr))->seg);
1397 mdb_free(svw, sizeof (*svw));
1398 return (WALK_ERR);
1399 }
1400
1401 if (svd->amp == NULL && svd->vp == NULL) {
1402 /* make the walk terminate immediately; no pages */
1403 svw->svw_walkoff = svw->svw_seg.s_size;
1404
1405 } else if (svd->amp == NULL &&
1406 (svw->svw_seg.s_size >> PAGESHIFT) >= SEGVN_MAX_SPARSE) {
1407 /*
1408 * If we don't have an anon pointer, and the segment is large,
1409 * we try to load the in-memory pages into a fixed-size array,
1410 * which is then sorted and reported directly. This is much
1411 * faster than doing a mdb_page_lookup() for each possible
1412 * offset.
1413 *
1414 * If the allocation fails, or there are too many pages
1415 * in-core, we fall back to looking up the pages individually.
1416 */
1417 svw->svw_sparse = mdb_alloc(
1418 SEGVN_MAX_SPARSE * sizeof (*svw->svw_sparse), UM_NOSLEEP);
1419 if (svw->svw_sparse != NULL) {
1420 svw->svw_sparse_size = SEGVN_MAX_SPARSE;
1421
1422 if (mdb_pwalk("page", segvn_sparse_fill, svw,
1423 (uintptr_t)svd->vp) == -1 ||
1424 svw->svw_sparse_overflow) {
1425 mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE *
1426 sizeof (*svw->svw_sparse));
1427 svw->svw_sparse = NULL;
1428 } else {
1429 qsort(svw->svw_sparse, svw->svw_sparse_count,
1430 sizeof (*svw->svw_sparse),
1431 segvn_sparse_cmp);
1432 }
1433 }
1434
1435 } else if (svd->amp != NULL) {
1436 const char *const layer = (!svw->svw_all && svd->vp == NULL) ?
1437 "segvn_anon" : "segvn_anon_all";
1438 /*
1439 * If we're not printing all offsets, and the segvn_data has
1440 * no backing VP, we can use the "segvn_anon" walker, which
1441 * efficiently skips NULL slots.
1442 *
1443 * Otherwise, we layer over the "segvn_anon_all" walker
1444 * (which reports all anon slots, even NULL ones), so that
1445 * segvn_pages_walk_step() knows the precise offset for each
1446 * element. It uses that offset information to look up the
1447 * backing pages for NULL anon slots.
1448 */
1449 if (mdb_layered_walk(layer, wsp) == -1) {
1450 mdb_warn("segvn_pages: failed to layer \"%s\" "
1451 "for segvn_data %p", layer, svw->svw_svdp);
1452 mdb_free(svw, sizeof (*svw));
1453 return (WALK_ERR);
1454 }
1455 }
1456
1457 wsp->walk_data = svw;
1458 return (WALK_NEXT);
1459 }
1460
1461 int
segvn_pages_walk_step(mdb_walk_state_t * wsp)1462 segvn_pages_walk_step(mdb_walk_state_t *wsp)
1463 {
1464 segvn_walk_data_t *const svw = wsp->walk_data;
1465 struct seg *const seg = &svw->svw_seg;
1466 struct segvn_data *const svd = &svw->svw_svd;
1467 uintptr_t pp;
1468 page_t page;
1469
1470 /* If we've walked off the end of the segment, we're done. */
1471 if (svw->svw_walkoff >= seg->s_size) {
1472 return (WALK_DONE);
1473 }
1474
1475 /*
1476 * If we've got a sparse page array, just send it directly.
1477 */
1478 if (svw->svw_sparse != NULL) {
1479 u_offset_t off;
1480
1481 if (svw->svw_sparse_idx >= svw->svw_sparse_count) {
1482 pp = NULL;
1483 if (!svw->svw_all) {
1484 return (WALK_DONE);
1485 }
1486 } else {
1487 segvn_sparse_t *const svs =
1488 &svw->svw_sparse[svw->svw_sparse_idx];
1489 off = svs->svs_offset - svd->offset;
1490 if (svw->svw_all && svw->svw_walkoff != off) {
1491 pp = NULL;
1492 } else {
1493 pp = svs->svs_page;
1494 svw->svw_sparse_idx++;
1495 }
1496 }
1497
1498 } else if (svd->amp == NULL || wsp->walk_addr == NULL) {
1499 /*
1500 * If there's no anon, or the anon slot is NULL, look up
1501 * <vp, offset>.
1502 */
1503 if (svd->vp != NULL) {
1504 pp = mdb_page_lookup((uintptr_t)svd->vp,
1505 svd->offset + svw->svw_walkoff);
1506 } else {
1507 pp = NULL;
1508 }
1509
1510 } else {
1511 const struct anon *const anon = wsp->walk_layer;
1512
1513 /*
1514 * We have a "struct anon"; if it's not swapped out,
1515 * look up the page.
1516 */
1517 if (anon->an_vp != NULL || anon->an_off != 0) {
1518 pp = mdb_page_lookup((uintptr_t)anon->an_vp,
1519 anon->an_off);
1520 if (pp == 0 && mdb_get_state() != MDB_STATE_RUNNING) {
1521 mdb_warn("walk segvn_pages: segvn_data %p "
1522 "offset %ld, anon page <%p, %llx> not "
1523 "found.\n", svw->svw_svdp, svw->svw_walkoff,
1524 anon->an_vp, anon->an_off);
1525 }
1526 } else {
1527 if (anon->an_pvp == NULL) {
1528 mdb_warn("walk segvn_pages: useless struct "
1529 "anon at %p\n", wsp->walk_addr);
1530 }
1531 pp = NULL; /* nothing at this offset */
1532 }
1533 }
1534
1535 svw->svw_walkoff += PAGESIZE; /* Update for the next call */
1536 if (pp != NULL) {
1537 if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
1538 mdb_warn("unable to read page_t at %#lx", pp);
1539 return (WALK_ERR);
1540 }
1541 return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
1542 }
1543 if (svw->svw_all) {
1544 return (wsp->walk_callback(NULL, NULL, wsp->walk_cbdata));
1545 }
1546 return (WALK_NEXT);
1547 }
1548
1549 void
segvn_pages_walk_fini(mdb_walk_state_t * wsp)1550 segvn_pages_walk_fini(mdb_walk_state_t *wsp)
1551 {
1552 segvn_walk_data_t *const svw = wsp->walk_data;
1553
1554 if (svw->svw_sparse != NULL) {
1555 mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE *
1556 sizeof (*svw->svw_sparse));
1557 }
1558 mdb_free(svw, sizeof (*svw));
1559 }
1560
1561 /*
1562 * Grumble, grumble.
1563 */
1564 #define SMAP_HASHFUNC(vp, off) \
1565 ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
1566 ((off) >> MAXBSHIFT)) & smd_hashmsk)
1567
1568 int
vnode2smap(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)1569 vnode2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1570 {
1571 long smd_hashmsk;
1572 int hash;
1573 uintptr_t offset = 0;
1574 struct smap smp;
1575 uintptr_t saddr, kaddr;
1576 uintptr_t smd_hash, smd_smap;
1577 struct seg seg;
1578
1579 if (!(flags & DCMD_ADDRSPEC))
1580 return (DCMD_USAGE);
1581
1582 if (mdb_readvar(&smd_hashmsk, "smd_hashmsk") == -1) {
1583 mdb_warn("failed to read smd_hashmsk");
1584 return (DCMD_ERR);
1585 }
1586
1587 if (mdb_readvar(&smd_hash, "smd_hash") == -1) {
1588 mdb_warn("failed to read smd_hash");
1589 return (DCMD_ERR);
1590 }
1591
1592 if (mdb_readvar(&smd_smap, "smd_smap") == -1) {
1593 mdb_warn("failed to read smd_hash");
1594 return (DCMD_ERR);
1595 }
1596
1597 if (mdb_readvar(&kaddr, "segkmap") == -1) {
1598 mdb_warn("failed to read segkmap");
1599 return (DCMD_ERR);
1600 }
1601
1602 if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) {
1603 mdb_warn("failed to read segkmap at %p", kaddr);
1604 return (DCMD_ERR);
1605 }
1606
1607 if (argc != 0) {
1608 const mdb_arg_t *arg = &argv[0];
1609
1610 if (arg->a_type == MDB_TYPE_IMMEDIATE)
1611 offset = arg->a_un.a_val;
1612 else
1613 offset = (uintptr_t)mdb_strtoull(arg->a_un.a_str);
1614 }
1615
1616 hash = SMAP_HASHFUNC(addr, offset);
1617
1618 if (mdb_vread(&saddr, sizeof (saddr),
1619 smd_hash + hash * sizeof (uintptr_t)) == -1) {
1620 mdb_warn("couldn't read smap at %p",
1621 smd_hash + hash * sizeof (uintptr_t));
1622 return (DCMD_ERR);
1623 }
1624
1625 do {
1626 if (mdb_vread(&smp, sizeof (smp), saddr) == -1) {
1627 mdb_warn("couldn't read smap at %p", saddr);
1628 return (DCMD_ERR);
1629 }
1630
1631 if ((uintptr_t)smp.sm_vp == addr && smp.sm_off == offset) {
1632 mdb_printf("vnode %p, offs %p is smap %p, vaddr %p\n",
1633 addr, offset, saddr, ((saddr - smd_smap) /
1634 sizeof (smp)) * MAXBSIZE + seg.s_base);
1635 return (DCMD_OK);
1636 }
1637
1638 saddr = (uintptr_t)smp.sm_hash;
1639 } while (saddr != NULL);
1640
1641 mdb_printf("no smap for vnode %p, offs %p\n", addr, offset);
1642 return (DCMD_OK);
1643 }
1644
1645 /*ARGSUSED*/
1646 int
addr2smap(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)1647 addr2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1648 {
1649 uintptr_t kaddr;
1650 struct seg seg;
1651 struct segmap_data sd;
1652
1653 if (!(flags & DCMD_ADDRSPEC))
1654 return (DCMD_USAGE);
1655
1656 if (mdb_readvar(&kaddr, "segkmap") == -1) {
1657 mdb_warn("failed to read segkmap");
1658 return (DCMD_ERR);
1659 }
1660
1661 if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) {
1662 mdb_warn("failed to read segkmap at %p", kaddr);
1663 return (DCMD_ERR);
1664 }
1665
1666 if (mdb_vread(&sd, sizeof (sd), (uintptr_t)seg.s_data) == -1) {
1667 mdb_warn("failed to read segmap_data at %p", seg.s_data);
1668 return (DCMD_ERR);
1669 }
1670
1671 mdb_printf("%p is smap %p\n", addr,
1672 ((addr - (uintptr_t)seg.s_base) >> MAXBSHIFT) *
1673 sizeof (struct smap) + (uintptr_t)sd.smd_sm);
1674
1675 return (DCMD_OK);
1676 }
1677