1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2019 Joyent, Inc.
24 */
25
26 #include <mdb/mdb_param.h>
27 #include <mdb/mdb_modapi.h>
28 #include <mdb/mdb_ks.h>
29 #include <sys/types.h>
30 #include <sys/memlist.h>
31 #include <sys/swap.h>
32 #include <sys/systm.h>
33 #include <sys/thread.h>
34 #include <vm/anon.h>
35 #include <vm/as.h>
36 #include <vm/page.h>
37 #include <sys/thread.h>
38 #include <sys/swap.h>
39 #include <sys/memlist.h>
40 #include <sys/vnode.h>
41 #include <vm/seg_map.h>
42 #include <vm/seg_vn.h>
43 #include <vm/seg_hole.h>
44 #if defined(__i386) || defined(__amd64)
45 #include <sys/balloon_impl.h>
46 #endif
47
48 #include "avl.h"
49 #include "memory.h"
50
51 /*
52 * Page walker.
53 * By default, this will walk all pages in the system. If given an
54 * address, it will walk all pages belonging to the vnode at that
55 * address.
56 */
57
58 /*
59 * page_walk_data
60 *
61 * pw_hashleft is set to -1 when walking a vnode's pages, and holds the
62 * number of hash locations remaining in the page hash table when
63 * walking all pages.
64 *
65 * The astute reader will notice that pw_hashloc is only used when
66 * reading all pages (to hold a pointer to our location in the page
67 * hash table), and that pw_first is only used when reading the pages
68 * belonging to a particular vnode (to hold a pointer to the first
69 * page). While these could be combined to be a single pointer, they
70 * are left separate for clarity.
71 */
72 typedef struct page_walk_data {
73 long pw_hashleft;
74 void **pw_hashloc;
75 uintptr_t pw_first;
76 } page_walk_data_t;
77
78 int
page_walk_init(mdb_walk_state_t * wsp)79 page_walk_init(mdb_walk_state_t *wsp)
80 {
81 page_walk_data_t *pwd;
82 void **ptr;
83 size_t hashsz;
84 vnode_t vn;
85
86 if (wsp->walk_addr == 0) {
87
88 /*
89 * Walk all pages
90 */
91
92 if ((mdb_readvar(&ptr, "page_hash") == -1) ||
93 (mdb_readvar(&hashsz, "page_hashsz") == -1) ||
94 (ptr == NULL) || (hashsz == 0)) {
95 mdb_warn("page_hash, page_hashsz not found or invalid");
96 return (WALK_ERR);
97 }
98
99 /*
100 * Since we are walking all pages, initialize hashleft
101 * to be the remaining number of entries in the page
102 * hash. hashloc is set the start of the page hash
103 * table. Setting the walk address to 0 indicates that
104 * we aren't currently following a hash chain, and that
105 * we need to scan the page hash table for a page.
106 */
107 pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
108 pwd->pw_hashleft = hashsz;
109 pwd->pw_hashloc = ptr;
110 wsp->walk_addr = 0;
111 } else {
112
113 /*
114 * Walk just this vnode
115 */
116
117 if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) {
118 mdb_warn("unable to read vnode_t at %#lx",
119 wsp->walk_addr);
120 return (WALK_ERR);
121 }
122
123 /*
124 * We set hashleft to -1 to indicate that we are
125 * walking a vnode, and initialize first to 0 (it is
126 * used to terminate the walk, so it must not be set
127 * until after we have walked the first page). The
128 * walk address is set to the first page.
129 */
130 pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
131 pwd->pw_hashleft = -1;
132 pwd->pw_first = 0;
133
134 wsp->walk_addr = (uintptr_t)vn.v_pages;
135 }
136
137 wsp->walk_data = pwd;
138
139 return (WALK_NEXT);
140 }
141
142 int
page_walk_step(mdb_walk_state_t * wsp)143 page_walk_step(mdb_walk_state_t *wsp)
144 {
145 page_walk_data_t *pwd = wsp->walk_data;
146 page_t page;
147 uintptr_t pp;
148
149 pp = wsp->walk_addr;
150
151 if (pwd->pw_hashleft < 0) {
152
153 /* We're walking a vnode's pages */
154
155 /*
156 * If we don't have any pages to walk, we have come
157 * back around to the first one (we finished), or we
158 * can't read the page we're looking at, we are done.
159 */
160 if (pp == 0 || pp == pwd->pw_first)
161 return (WALK_DONE);
162 if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
163 mdb_warn("unable to read page_t at %#lx", pp);
164 return (WALK_ERR);
165 }
166
167 /*
168 * Set the walk address to the next page, and if the
169 * first page hasn't been set yet (i.e. we are on the
170 * first page), set it.
171 */
172 wsp->walk_addr = (uintptr_t)page.p_vpnext;
173 if (pwd->pw_first == 0)
174 pwd->pw_first = pp;
175
176 } else if (pwd->pw_hashleft > 0) {
177
178 /* We're walking all pages */
179
180 /*
181 * If pp (the walk address) is NULL, we scan through
182 * the page hash table until we find a page.
183 */
184 if (pp == 0) {
185
186 /*
187 * Iterate through the page hash table until we
188 * find a page or reach the end.
189 */
190 do {
191 if (mdb_vread(&pp, sizeof (uintptr_t),
192 (uintptr_t)pwd->pw_hashloc) == -1) {
193 mdb_warn("unable to read from %#p",
194 pwd->pw_hashloc);
195 return (WALK_ERR);
196 }
197 pwd->pw_hashleft--;
198 pwd->pw_hashloc++;
199 } while (pwd->pw_hashleft && (pp == 0));
200
201 /*
202 * We've reached the end; exit.
203 */
204 if (pp == 0)
205 return (WALK_DONE);
206 }
207
208 if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
209 mdb_warn("unable to read page_t at %#lx", pp);
210 return (WALK_ERR);
211 }
212
213 /*
214 * Set the walk address to the next page.
215 */
216 wsp->walk_addr = (uintptr_t)page.p_hash;
217
218 } else {
219 /* We've finished walking all pages. */
220 return (WALK_DONE);
221 }
222
223 return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
224 }
225
226 void
page_walk_fini(mdb_walk_state_t * wsp)227 page_walk_fini(mdb_walk_state_t *wsp)
228 {
229 mdb_free(wsp->walk_data, sizeof (page_walk_data_t));
230 }
231
232 /*
233 * allpages walks all pages in the system in order they appear in
234 * the memseg structure
235 */
236
237 #define PAGE_BUFFER 128
238
239 int
allpages_walk_init(mdb_walk_state_t * wsp)240 allpages_walk_init(mdb_walk_state_t *wsp)
241 {
242 if (wsp->walk_addr != 0) {
243 mdb_warn("allpages only supports global walks.\n");
244 return (WALK_ERR);
245 }
246
247 if (mdb_layered_walk("memseg", wsp) == -1) {
248 mdb_warn("couldn't walk 'memseg'");
249 return (WALK_ERR);
250 }
251
252 wsp->walk_data = mdb_alloc(sizeof (page_t) * PAGE_BUFFER, UM_SLEEP);
253 return (WALK_NEXT);
254 }
255
256 int
allpages_walk_step(mdb_walk_state_t * wsp)257 allpages_walk_step(mdb_walk_state_t *wsp)
258 {
259 const struct memseg *msp = wsp->walk_layer;
260 page_t *buf = wsp->walk_data;
261 size_t pg_read, i;
262 size_t pg_num = msp->pages_end - msp->pages_base;
263 const page_t *pg_addr = msp->pages;
264
265 while (pg_num > 0) {
266 pg_read = MIN(pg_num, PAGE_BUFFER);
267
268 if (mdb_vread(buf, pg_read * sizeof (page_t),
269 (uintptr_t)pg_addr) == -1) {
270 mdb_warn("can't read page_t's at %#lx", pg_addr);
271 return (WALK_ERR);
272 }
273 for (i = 0; i < pg_read; i++) {
274 int ret = wsp->walk_callback((uintptr_t)&pg_addr[i],
275 &buf[i], wsp->walk_cbdata);
276
277 if (ret != WALK_NEXT)
278 return (ret);
279 }
280 pg_num -= pg_read;
281 pg_addr += pg_read;
282 }
283
284 return (WALK_NEXT);
285 }
286
287 void
allpages_walk_fini(mdb_walk_state_t * wsp)288 allpages_walk_fini(mdb_walk_state_t *wsp)
289 {
290 mdb_free(wsp->walk_data, sizeof (page_t) * PAGE_BUFFER);
291 }
292
293 /*
294 * Hash table + LRU queue.
295 * This table is used to cache recently read vnodes for the memstat
296 * command, to reduce the number of mdb_vread calls. This greatly
297 * speeds the memstat command on on live, large CPU count systems.
298 */
299
300 #define VN_SMALL 401
301 #define VN_LARGE 10007
302 #define VN_HTABLE_KEY(p, hp) ((p) % ((hp)->vn_htable_buckets))
303
304 struct vn_htable_list {
305 uint_t vn_flag; /* v_flag from vnode */
306 uintptr_t vn_ptr; /* pointer to vnode */
307 struct vn_htable_list *vn_q_next; /* queue next pointer */
308 struct vn_htable_list *vn_q_prev; /* queue prev pointer */
309 struct vn_htable_list *vn_h_next; /* hash table pointer */
310 };
311
312 /*
313 * vn_q_first -> points to to head of queue: the vnode that was most
314 * recently used
315 * vn_q_last -> points to the oldest used vnode, and is freed once a new
316 * vnode is read.
317 * vn_htable -> hash table
318 * vn_htable_buf -> contains htable objects
319 * vn_htable_size -> total number of items in the hash table
320 * vn_htable_buckets -> number of buckets in the hash table
321 */
322 typedef struct vn_htable {
323 struct vn_htable_list *vn_q_first;
324 struct vn_htable_list *vn_q_last;
325 struct vn_htable_list **vn_htable;
326 struct vn_htable_list *vn_htable_buf;
327 int vn_htable_size;
328 int vn_htable_buckets;
329 } vn_htable_t;
330
331
332 /* allocate memory, initilize hash table and LRU queue */
333 static void
vn_htable_init(vn_htable_t * hp,size_t vn_size)334 vn_htable_init(vn_htable_t *hp, size_t vn_size)
335 {
336 int i;
337 int htable_size = MAX(vn_size, VN_LARGE);
338
339 if ((hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
340 * htable_size, UM_NOSLEEP|UM_GC)) == NULL) {
341 htable_size = VN_SMALL;
342 hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
343 * htable_size, UM_SLEEP|UM_GC);
344 }
345
346 hp->vn_htable = mdb_zalloc(sizeof (struct vn_htable_list *)
347 * htable_size, UM_SLEEP|UM_GC);
348
349 hp->vn_q_first = &hp->vn_htable_buf[0];
350 hp->vn_q_last = &hp->vn_htable_buf[htable_size - 1];
351 hp->vn_q_first->vn_q_next = &hp->vn_htable_buf[1];
352 hp->vn_q_last->vn_q_prev = &hp->vn_htable_buf[htable_size - 2];
353
354 for (i = 1; i < (htable_size-1); i++) {
355 hp->vn_htable_buf[i].vn_q_next = &hp->vn_htable_buf[i + 1];
356 hp->vn_htable_buf[i].vn_q_prev = &hp->vn_htable_buf[i - 1];
357 }
358
359 hp->vn_htable_size = htable_size;
360 hp->vn_htable_buckets = htable_size;
361 }
362
363
364 /*
365 * Find the vnode whose address is ptr, and return its v_flag in vp->v_flag.
366 * The function tries to find needed information in the following order:
367 *
368 * 1. check if ptr is the first in queue
369 * 2. check if ptr is in hash table (if so move it to the top of queue)
370 * 3. do mdb_vread, remove last queue item from queue and hash table.
371 * Insert new information to freed object, and put this object in to the
372 * top of the queue.
373 */
374 static int
vn_get(vn_htable_t * hp,struct vnode * vp,uintptr_t ptr)375 vn_get(vn_htable_t *hp, struct vnode *vp, uintptr_t ptr)
376 {
377 int hkey;
378 struct vn_htable_list *hent, **htmp, *q_next, *q_prev;
379 struct vn_htable_list *q_first = hp->vn_q_first;
380
381 /* 1. vnode ptr is the first in queue, just get v_flag and return */
382 if (q_first->vn_ptr == ptr) {
383 vp->v_flag = q_first->vn_flag;
384
385 return (0);
386 }
387
388 /* 2. search the hash table for this ptr */
389 hkey = VN_HTABLE_KEY(ptr, hp);
390 hent = hp->vn_htable[hkey];
391 while (hent && (hent->vn_ptr != ptr))
392 hent = hent->vn_h_next;
393
394 /* 3. if hent is NULL, we did not find in hash table, do mdb_vread */
395 if (hent == NULL) {
396 struct vnode vn;
397
398 if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) {
399 mdb_warn("unable to read vnode_t at %#lx", ptr);
400 return (-1);
401 }
402
403 /* we will insert read data into the last element in queue */
404 hent = hp->vn_q_last;
405
406 /* remove last hp->vn_q_last object from hash table */
407 if (hent->vn_ptr) {
408 htmp = &hp->vn_htable[VN_HTABLE_KEY(hent->vn_ptr, hp)];
409 while (*htmp != hent)
410 htmp = &(*htmp)->vn_h_next;
411 *htmp = hent->vn_h_next;
412 }
413
414 /* insert data into new free object */
415 hent->vn_ptr = ptr;
416 hent->vn_flag = vn.v_flag;
417
418 /* insert new object into hash table */
419 hent->vn_h_next = hp->vn_htable[hkey];
420 hp->vn_htable[hkey] = hent;
421 }
422
423 /* Remove from queue. hent is not first, vn_q_prev is not NULL */
424 q_next = hent->vn_q_next;
425 q_prev = hent->vn_q_prev;
426 if (q_next == NULL)
427 hp->vn_q_last = q_prev;
428 else
429 q_next->vn_q_prev = q_prev;
430 q_prev->vn_q_next = q_next;
431
432 /* Add to the front of queue */
433 hent->vn_q_prev = NULL;
434 hent->vn_q_next = q_first;
435 q_first->vn_q_prev = hent;
436 hp->vn_q_first = hent;
437
438 /* Set v_flag in vnode pointer from hent */
439 vp->v_flag = hent->vn_flag;
440
441 return (0);
442 }
443
444 /* Summary statistics of pages */
445 typedef struct memstat {
446 struct vnode *ms_unused_vp; /* Unused pages vnode pointer */
447 struct vnode *ms_kvps; /* Cached address of vnode array */
448 uint64_t ms_kmem; /* Pages of kernel memory */
449 uint64_t ms_zfs_data; /* Pages of zfs data */
450 uint64_t ms_vmm_mem; /* Pages of VMM mem */
451 uint64_t ms_anon; /* Pages of anonymous memory */
452 uint64_t ms_vnode; /* Pages of named (vnode) memory */
453 uint64_t ms_exec; /* Pages of exec/library memory */
454 uint64_t ms_cachelist; /* Pages on the cachelist (free) */
455 uint64_t ms_bootpages; /* Pages on the bootpages list */
456 uint64_t ms_total; /* Pages on page hash */
457 vn_htable_t *ms_vn_htable; /* Pointer to hash table */
458 struct vnode ms_vn; /* vnode buffer */
459 } memstat_t;
460
461 #define MS_PP_ISTYPE(pp, stats, index) \
462 ((pp)->p_vnode == &(stats->ms_kvps[index]))
463
464 /*
465 * Summarize pages by type and update stat information
466 */
467
468 /* ARGSUSED */
469 static int
memstat_callback(page_t * page,page_t * pp,memstat_t * stats)470 memstat_callback(page_t *page, page_t *pp, memstat_t *stats)
471 {
472 struct vnode *vp = &stats->ms_vn;
473
474 if (PP_ISBOOTPAGES(pp))
475 stats->ms_bootpages++;
476 else if (pp->p_vnode == NULL || pp->p_vnode == stats->ms_unused_vp)
477 return (WALK_NEXT);
478 else if (MS_PP_ISTYPE(pp, stats, KV_KVP))
479 stats->ms_kmem++;
480 else if (MS_PP_ISTYPE(pp, stats, KV_ZVP))
481 stats->ms_zfs_data++;
482 else if (MS_PP_ISTYPE(pp, stats, KV_VVP))
483 stats->ms_vmm_mem++;
484 else if (PP_ISFREE(pp))
485 stats->ms_cachelist++;
486 else if (vn_get(stats->ms_vn_htable, vp, (uintptr_t)pp->p_vnode))
487 return (WALK_ERR);
488 else if (IS_SWAPFSVP(vp))
489 stats->ms_anon++;
490 else if ((vp->v_flag & VVMEXEC) != 0)
491 stats->ms_exec++;
492 else
493 stats->ms_vnode++;
494
495 stats->ms_total++;
496
497 return (WALK_NEXT);
498 }
499
500 /* ARGSUSED */
501 int
memstat(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)502 memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
503 {
504 pgcnt_t total_pages, physmem;
505 ulong_t freemem;
506 memstat_t stats;
507 GElf_Sym sym;
508 vn_htable_t ht;
509 uintptr_t vn_size = 0;
510 #if defined(__i386) || defined(__amd64)
511 bln_stats_t bln_stats;
512 ssize_t bln_size;
513 #endif
514
515 bzero(&stats, sizeof (memstat_t));
516
517 /*
518 * -s size, is an internal option. It specifies the size of vn_htable.
519 * Hash table size is set in the following order:
520 * If user has specified the size that is larger than VN_LARGE: try it,
521 * but if malloc failed default to VN_SMALL. Otherwise try VN_LARGE, if
522 * failed to allocate default to VN_SMALL.
523 * For a better efficiency of hash table it is highly recommended to
524 * set size to a prime number.
525 */
526 if ((flags & DCMD_ADDRSPEC) || mdb_getopts(argc, argv,
527 's', MDB_OPT_UINTPTR, &vn_size, NULL) != argc)
528 return (DCMD_USAGE);
529
530 /* Initialize vnode hash list and queue */
531 vn_htable_init(&ht, vn_size);
532 stats.ms_vn_htable = &ht;
533
534 /* Total physical memory */
535 if (mdb_readvar(&total_pages, "total_pages") == -1) {
536 mdb_warn("unable to read total_pages");
537 return (DCMD_ERR);
538 }
539
540 /* Artificially limited memory */
541 if (mdb_readvar(&physmem, "physmem") == -1) {
542 mdb_warn("unable to read physmem");
543 return (DCMD_ERR);
544 }
545
546 /* read kernel vnode array pointer */
547 if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvps",
548 (GElf_Sym *)&sym) == -1) {
549 mdb_warn("unable to look up kvps");
550 return (DCMD_ERR);
551 }
552 stats.ms_kvps = (struct vnode *)(uintptr_t)sym.st_value;
553
554 /*
555 * If physmem != total_pages, then the administrator has limited the
556 * number of pages available in the system. Excluded pages are
557 * associated with the unused pages vnode. Read this vnode so the
558 * pages can be excluded in the page accounting.
559 */
560 if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp",
561 (GElf_Sym *)&sym) == -1) {
562 mdb_warn("unable to read unused_pages_vp");
563 return (DCMD_ERR);
564 }
565 stats.ms_unused_vp = (struct vnode *)(uintptr_t)sym.st_value;
566
567 /* walk all pages, collect statistics */
568 if (mdb_walk("allpages", (mdb_walk_cb_t)(uintptr_t)memstat_callback,
569 &stats) == -1) {
570 mdb_warn("can't walk memseg");
571 return (DCMD_ERR);
572 }
573
574 #define MS_PCT_TOTAL(x) ((ulong_t)((((5 * total_pages) + ((x) * 1000ull))) / \
575 ((physmem) * 10)))
576
577 mdb_printf("Page Summary Pages MB"
578 " %%Tot\n");
579 mdb_printf("------------ ---------------- ----------------"
580 " ----\n");
581 mdb_printf("Kernel %16llu %16llu %3lu%%\n",
582 stats.ms_kmem,
583 (uint64_t)stats.ms_kmem * PAGESIZE / (1024 * 1024),
584 MS_PCT_TOTAL(stats.ms_kmem));
585
586 if (stats.ms_bootpages != 0) {
587 mdb_printf("Boot pages %16llu %16llu %3lu%%\n",
588 stats.ms_bootpages,
589 (uint64_t)stats.ms_bootpages * PAGESIZE / (1024 * 1024),
590 MS_PCT_TOTAL(stats.ms_bootpages));
591 }
592
593 if (stats.ms_zfs_data != 0) {
594 mdb_printf("ZFS File Data %16llu %16llu %3lu%%\n",
595 stats.ms_zfs_data,
596 (uint64_t)stats.ms_zfs_data * PAGESIZE / (1024 * 1024),
597 MS_PCT_TOTAL(stats.ms_zfs_data));
598 }
599
600 if (stats.ms_vmm_mem != 0) {
601 mdb_printf("VMM Memory %16llu %16llu %3lu%%\n",
602 stats.ms_vmm_mem,
603 (uint64_t)stats.ms_vmm_mem * PAGESIZE / (1024 * 1024),
604 MS_PCT_TOTAL(stats.ms_vmm_mem));
605 }
606
607 mdb_printf("Anon %16llu %16llu %3lu%%\n",
608 stats.ms_anon,
609 (uint64_t)stats.ms_anon * PAGESIZE / (1024 * 1024),
610 MS_PCT_TOTAL(stats.ms_anon));
611 mdb_printf("Exec and libs %16llu %16llu %3lu%%\n",
612 stats.ms_exec,
613 (uint64_t)stats.ms_exec * PAGESIZE / (1024 * 1024),
614 MS_PCT_TOTAL(stats.ms_exec));
615 mdb_printf("Page cache %16llu %16llu %3lu%%\n",
616 stats.ms_vnode,
617 (uint64_t)stats.ms_vnode * PAGESIZE / (1024 * 1024),
618 MS_PCT_TOTAL(stats.ms_vnode));
619 mdb_printf("Free (cachelist) %16llu %16llu %3lu%%\n",
620 stats.ms_cachelist,
621 (uint64_t)stats.ms_cachelist * PAGESIZE / (1024 * 1024),
622 MS_PCT_TOTAL(stats.ms_cachelist));
623
624 /*
625 * occasionally, we double count pages above. To avoid printing
626 * absurdly large values for freemem, we clamp it at zero.
627 */
628 if (physmem > stats.ms_total)
629 freemem = physmem - stats.ms_total;
630 else
631 freemem = 0;
632
633 #if defined(__i386) || defined(__amd64)
634 /* Are we running under Xen? If so, get balloon memory usage. */
635 if ((bln_size = mdb_readvar(&bln_stats, "bln_stats")) != -1) {
636 if (freemem > bln_stats.bln_hv_pages)
637 freemem -= bln_stats.bln_hv_pages;
638 else
639 freemem = 0;
640 }
641 #endif
642
643 mdb_printf("Free (freelist) %16lu %16llu %3lu%%\n", freemem,
644 (uint64_t)freemem * PAGESIZE / (1024 * 1024),
645 MS_PCT_TOTAL(freemem));
646
647 #if defined(__i386) || defined(__amd64)
648 if (bln_size != -1) {
649 mdb_printf("Balloon %16lu %16llu %3lu%%\n",
650 bln_stats.bln_hv_pages,
651 (uint64_t)bln_stats.bln_hv_pages * PAGESIZE / (1024 * 1024),
652 MS_PCT_TOTAL(bln_stats.bln_hv_pages));
653 }
654 #endif
655
656 mdb_printf("\nTotal %16lu %16lu\n",
657 physmem,
658 (uint64_t)physmem * PAGESIZE / (1024 * 1024));
659
660 if (physmem != total_pages) {
661 mdb_printf("Physical %16lu %16lu\n",
662 total_pages,
663 (uint64_t)total_pages * PAGESIZE / (1024 * 1024));
664 }
665
666 #undef MS_PCT_TOTAL
667
668 return (DCMD_OK);
669 }
670
671 void
pagelookup_help(void)672 pagelookup_help(void)
673 {
674 mdb_printf(
675 "Finds the page with name { %<b>vp%</b>, %<b>offset%</b> }.\n"
676 "\n"
677 "Can be invoked three different ways:\n\n"
678 " ::pagelookup -v %<b>vp%</b> -o %<b>offset%</b>\n"
679 " %<b>vp%</b>::pagelookup -o %<b>offset%</b>\n"
680 " %<b>offset%</b>::pagelookup -v %<b>vp%</b>\n"
681 "\n"
682 "The latter two forms are useful in pipelines.\n");
683 }
684
685 int
pagelookup(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)686 pagelookup(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
687 {
688 uintptr_t vp = -(uintptr_t)1;
689 uint64_t offset = -(uint64_t)1;
690
691 uintptr_t pageaddr;
692 int hasaddr = (flags & DCMD_ADDRSPEC);
693 int usedaddr = 0;
694
695 if (mdb_getopts(argc, argv,
696 'v', MDB_OPT_UINTPTR, &vp,
697 'o', MDB_OPT_UINT64, &offset,
698 NULL) != argc) {
699 return (DCMD_USAGE);
700 }
701
702 if (vp == -(uintptr_t)1) {
703 if (offset == -(uint64_t)1) {
704 mdb_warn(
705 "pagelookup: at least one of -v vp or -o offset "
706 "required.\n");
707 return (DCMD_USAGE);
708 }
709 vp = addr;
710 usedaddr = 1;
711 } else if (offset == -(uint64_t)1) {
712 offset = mdb_get_dot();
713 usedaddr = 1;
714 }
715 if (usedaddr && !hasaddr) {
716 mdb_warn("pagelookup: address required\n");
717 return (DCMD_USAGE);
718 }
719 if (!usedaddr && hasaddr) {
720 mdb_warn(
721 "pagelookup: address specified when both -v and -o were "
722 "passed");
723 return (DCMD_USAGE);
724 }
725
726 pageaddr = mdb_page_lookup(vp, offset);
727 if (pageaddr == 0) {
728 mdb_warn("pagelookup: no page for {vp = %p, offset = %llp)\n",
729 vp, offset);
730 return (DCMD_OK);
731 }
732 mdb_printf("%#lr\n", pageaddr); /* this is PIPE_OUT friendly */
733 return (DCMD_OK);
734 }
735
736 /*ARGSUSED*/
737 int
page_num2pp(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)738 page_num2pp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
739 {
740 uintptr_t pp;
741
742 if (argc != 0 || !(flags & DCMD_ADDRSPEC)) {
743 return (DCMD_USAGE);
744 }
745
746 pp = mdb_pfn2page((pfn_t)addr);
747 if (pp == 0) {
748 return (DCMD_ERR);
749 }
750
751 if (flags & DCMD_PIPE_OUT) {
752 mdb_printf("%#lr\n", pp);
753 } else {
754 mdb_printf("%lx has page_t at %#lx\n", (pfn_t)addr, pp);
755 }
756
757 return (DCMD_OK);
758 }
759
760 int
page(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)761 page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
762 {
763 page_t p;
764
765 if (!(flags & DCMD_ADDRSPEC)) {
766 if (mdb_walk_dcmd("page", "page", argc, argv) == -1) {
767 mdb_warn("can't walk pages");
768 return (DCMD_ERR);
769 }
770 return (DCMD_OK);
771 }
772
773 if (DCMD_HDRSPEC(flags)) {
774 mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n",
775 "PAGE", "VNODE", "OFFSET", "SELOCK",
776 "LCT", "COW", "IO", "FS", "ST");
777 }
778
779 if (mdb_vread(&p, sizeof (page_t), addr) == -1) {
780 mdb_warn("can't read page_t at %#lx", addr);
781 return (DCMD_ERR);
782 }
783
784 mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n",
785 addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt,
786 p.p_iolock_state, p.p_fsdata, p.p_state);
787
788 return (DCMD_OK);
789 }
790
791 int
swap_walk_init(mdb_walk_state_t * wsp)792 swap_walk_init(mdb_walk_state_t *wsp)
793 {
794 void *ptr;
795
796 if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) {
797 mdb_warn("swapinfo not found or invalid");
798 return (WALK_ERR);
799 }
800
801 wsp->walk_addr = (uintptr_t)ptr;
802
803 return (WALK_NEXT);
804 }
805
806 int
swap_walk_step(mdb_walk_state_t * wsp)807 swap_walk_step(mdb_walk_state_t *wsp)
808 {
809 uintptr_t sip;
810 struct swapinfo si;
811
812 sip = wsp->walk_addr;
813
814 if (sip == 0)
815 return (WALK_DONE);
816
817 if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) {
818 mdb_warn("unable to read swapinfo at %#lx", sip);
819 return (WALK_ERR);
820 }
821
822 wsp->walk_addr = (uintptr_t)si.si_next;
823
824 return (wsp->walk_callback(sip, &si, wsp->walk_cbdata));
825 }
826
827 int
swapinfof(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)828 swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
829 {
830 struct swapinfo si;
831 char *name;
832
833 if (!(flags & DCMD_ADDRSPEC)) {
834 if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) {
835 mdb_warn("can't walk swapinfo");
836 return (DCMD_ERR);
837 }
838 return (DCMD_OK);
839 }
840
841 if (DCMD_HDRSPEC(flags)) {
842 mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n",
843 "ADDR", "VNODE", "PAGES", "FREE", "NAME");
844 }
845
846 if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) {
847 mdb_warn("can't read swapinfo at %#lx", addr);
848 return (DCMD_ERR);
849 }
850
851 name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC);
852 if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1)
853 name = "*error*";
854
855 mdb_printf("%0?lx %?p %9d %9d %s\n",
856 addr, si.si_vp, si.si_npgs, si.si_nfpgs, name);
857
858 return (DCMD_OK);
859 }
860
861 int
memlist_walk_step(mdb_walk_state_t * wsp)862 memlist_walk_step(mdb_walk_state_t *wsp)
863 {
864 uintptr_t mlp;
865 struct memlist ml;
866
867 mlp = wsp->walk_addr;
868
869 if (mlp == 0)
870 return (WALK_DONE);
871
872 if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) {
873 mdb_warn("unable to read memlist at %#lx", mlp);
874 return (WALK_ERR);
875 }
876
877 wsp->walk_addr = (uintptr_t)ml.ml_next;
878
879 return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata));
880 }
881
882 int
memlist(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)883 memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
884 {
885 struct memlist ml;
886
887 if (!(flags & DCMD_ADDRSPEC)) {
888 uintptr_t ptr;
889 uint_t list = 0;
890 int i;
891 static const char *lists[] = {
892 "phys_install",
893 "phys_avail",
894 "virt_avail"
895 };
896
897 if (mdb_getopts(argc, argv,
898 'i', MDB_OPT_SETBITS, (1 << 0), &list,
899 'a', MDB_OPT_SETBITS, (1 << 1), &list,
900 'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc)
901 return (DCMD_USAGE);
902
903 if (!list)
904 list = 1;
905
906 for (i = 0; list; i++, list >>= 1) {
907 if (!(list & 1))
908 continue;
909 if ((mdb_readvar(&ptr, lists[i]) == -1) ||
910 (ptr == 0)) {
911 mdb_warn("%s not found or invalid", lists[i]);
912 return (DCMD_ERR);
913 }
914
915 mdb_printf("%s:\n", lists[i]);
916 if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL,
917 ptr) == -1) {
918 mdb_warn("can't walk memlist");
919 return (DCMD_ERR);
920 }
921 }
922 return (DCMD_OK);
923 }
924
925 if (DCMD_HDRSPEC(flags))
926 mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE");
927
928 if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) {
929 mdb_warn("can't read memlist at %#lx", addr);
930 return (DCMD_ERR);
931 }
932
933 mdb_printf("%0?lx %16llx %16llx\n", addr, ml.ml_address, ml.ml_size);
934
935 return (DCMD_OK);
936 }
937
938 int
seg_walk_init(mdb_walk_state_t * wsp)939 seg_walk_init(mdb_walk_state_t *wsp)
940 {
941 if (wsp->walk_addr == 0) {
942 mdb_warn("seg walk must begin at struct as *\n");
943 return (WALK_ERR);
944 }
945
946 /*
947 * this is really just a wrapper to AVL tree walk
948 */
949 wsp->walk_addr = (uintptr_t)&((struct as *)wsp->walk_addr)->a_segtree;
950 return (avl_walk_init(wsp));
951 }
952
953 /*ARGSUSED*/
954 int
seg(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)955 seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
956 {
957 struct seg s;
958
959 if (argc != 0)
960 return (DCMD_USAGE);
961
962 if ((flags & DCMD_LOOPFIRST) || !(flags & DCMD_LOOP)) {
963 mdb_printf("%<u>%?s %?s %?s %?s %s%</u>\n",
964 "SEG", "BASE", "SIZE", "DATA", "OPS");
965 }
966
967 if (mdb_vread(&s, sizeof (s), addr) == -1) {
968 mdb_warn("failed to read seg at %p", addr);
969 return (DCMD_ERR);
970 }
971
972 mdb_printf("%?p %?p %?lx %?p %a\n",
973 addr, s.s_base, s.s_size, s.s_data, s.s_ops);
974
975 return (DCMD_OK);
976 }
977
978 typedef struct pmap_walk_types {
979 uintptr_t pwt_segvn;
980 uintptr_t pwt_seghole;
981 } pmap_walk_types_t;
982
983 /*ARGSUSED*/
984 static int
pmap_walk_count_pages(uintptr_t addr,const void * data,void * out)985 pmap_walk_count_pages(uintptr_t addr, const void *data, void *out)
986 {
987 pgcnt_t *nres = out;
988
989 (*nres)++;
990
991 return (WALK_NEXT);
992 }
993
994 static int
pmap_walk_seg(uintptr_t addr,const struct seg * seg,const pmap_walk_types_t * types)995 pmap_walk_seg(uintptr_t addr, const struct seg *seg,
996 const pmap_walk_types_t *types)
997 {
998 const uintptr_t ops = (uintptr_t)seg->s_ops;
999
1000 mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024);
1001
1002 if (ops == types->pwt_segvn && seg->s_data != NULL) {
1003 struct segvn_data svn;
1004 pgcnt_t nres = 0;
1005
1006 svn.vp = NULL;
1007 (void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data);
1008
1009 /*
1010 * Use the segvn_pages walker to find all of the in-core pages
1011 * for this mapping.
1012 */
1013 if (mdb_pwalk("segvn_pages", pmap_walk_count_pages, &nres,
1014 (uintptr_t)seg->s_data) == -1) {
1015 mdb_warn("failed to walk segvn_pages (s_data=%p)",
1016 seg->s_data);
1017 }
1018 mdb_printf(" %7ldk", (nres * PAGESIZE) / 1024);
1019
1020 if (svn.vp != NULL) {
1021 char buf[29];
1022
1023 mdb_vnode2path((uintptr_t)svn.vp, buf, sizeof (buf));
1024 mdb_printf(" %s", buf);
1025 } else {
1026 mdb_printf(" [ anon ]");
1027 }
1028 } else if (ops == types->pwt_seghole && seg->s_data != NULL) {
1029 seghole_data_t shd;
1030 char name[16];
1031
1032 (void) mdb_vread(&shd, sizeof (shd), (uintptr_t)seg->s_data);
1033 if (shd.shd_name == NULL || mdb_readstr(name, sizeof (name),
1034 (uintptr_t)shd.shd_name) == 0) {
1035 name[0] = '\0';
1036 }
1037
1038 mdb_printf(" %8s [ hole%s%s ]", "-",
1039 name[0] == '0' ? "" : ":", name);
1040 } else {
1041 mdb_printf(" %8s [ &%a ]", "?", seg->s_ops);
1042 }
1043
1044 mdb_printf("\n");
1045 return (WALK_NEXT);
1046 }
1047
1048 static int
pmap_walk_seg_quick(uintptr_t addr,const struct seg * seg,const pmap_walk_types_t * types)1049 pmap_walk_seg_quick(uintptr_t addr, const struct seg *seg,
1050 const pmap_walk_types_t *types)
1051 {
1052 const uintptr_t ops = (uintptr_t)seg->s_ops;
1053
1054 mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024);
1055
1056 if (ops == types->pwt_segvn && seg->s_data != NULL) {
1057 struct segvn_data svn;
1058
1059 svn.vp = NULL;
1060 (void) mdb_vread(&svn, sizeof (svn), (uintptr_t)seg->s_data);
1061
1062 if (svn.vp != NULL) {
1063 mdb_printf(" %0?p", svn.vp);
1064 } else {
1065 mdb_printf(" [ anon ]");
1066 }
1067 } else {
1068 mdb_printf(" [ &%a ]", seg->s_ops);
1069 }
1070
1071 mdb_printf("\n");
1072 return (WALK_NEXT);
1073 }
1074
1075 /*ARGSUSED*/
1076 int
pmap(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)1077 pmap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1078 {
1079 proc_t proc;
1080 uint_t quick = FALSE;
1081 mdb_walk_cb_t cb = (mdb_walk_cb_t)pmap_walk_seg;
1082 pmap_walk_types_t wtypes = { 0 };
1083
1084 GElf_Sym sym;
1085
1086 if (!(flags & DCMD_ADDRSPEC))
1087 return (DCMD_USAGE);
1088
1089 if (mdb_getopts(argc, argv,
1090 'q', MDB_OPT_SETBITS, TRUE, &quick, NULL) != argc)
1091 return (DCMD_USAGE);
1092
1093 if (mdb_vread(&proc, sizeof (proc), addr) == -1) {
1094 mdb_warn("failed to read proc at %p", addr);
1095 return (DCMD_ERR);
1096 }
1097
1098 if (mdb_lookup_by_name("segvn_ops", &sym) == 0)
1099 wtypes.pwt_segvn = (uintptr_t)sym.st_value;
1100 if (mdb_lookup_by_name("seghole_ops", &sym) == 0)
1101 wtypes.pwt_seghole = (uintptr_t)sym.st_value;
1102
1103 mdb_printf("%?s %?s %8s ", "SEG", "BASE", "SIZE");
1104
1105 if (quick) {
1106 mdb_printf("VNODE\n");
1107 cb = (mdb_walk_cb_t)pmap_walk_seg_quick;
1108 } else {
1109 mdb_printf("%8s %s\n", "RES", "PATH");
1110 }
1111
1112 if (mdb_pwalk("seg", cb, (void *)&wtypes, (uintptr_t)proc.p_as) == -1) {
1113 mdb_warn("failed to walk segments of as %p", proc.p_as);
1114 return (DCMD_ERR);
1115 }
1116
1117 return (DCMD_OK);
1118 }
1119
1120 typedef struct anon_walk_data {
1121 uintptr_t *aw_levone;
1122 uintptr_t *aw_levtwo;
1123 size_t aw_minslot;
1124 size_t aw_maxslot;
1125 pgcnt_t aw_nlevone;
1126 pgcnt_t aw_levone_ndx;
1127 size_t aw_levtwo_ndx;
1128 struct anon_map *aw_ampp;
1129 struct anon_map aw_amp;
1130 struct anon_hdr aw_ahp;
1131 int aw_all; /* report all anon pointers, even NULLs */
1132 } anon_walk_data_t;
1133
1134 int
anon_walk_init_common(mdb_walk_state_t * wsp,ulong_t minslot,ulong_t maxslot)1135 anon_walk_init_common(mdb_walk_state_t *wsp, ulong_t minslot, ulong_t maxslot)
1136 {
1137 anon_walk_data_t *aw;
1138
1139 if (wsp->walk_addr == 0) {
1140 mdb_warn("anon walk doesn't support global walks\n");
1141 return (WALK_ERR);
1142 }
1143
1144 aw = mdb_alloc(sizeof (anon_walk_data_t), UM_SLEEP);
1145 aw->aw_ampp = (struct anon_map *)wsp->walk_addr;
1146
1147 if (mdb_vread(&aw->aw_amp, sizeof (aw->aw_amp), wsp->walk_addr) == -1) {
1148 mdb_warn("failed to read anon map at %p", wsp->walk_addr);
1149 mdb_free(aw, sizeof (anon_walk_data_t));
1150 return (WALK_ERR);
1151 }
1152
1153 if (mdb_vread(&aw->aw_ahp, sizeof (aw->aw_ahp),
1154 (uintptr_t)(aw->aw_amp.ahp)) == -1) {
1155 mdb_warn("failed to read anon hdr ptr at %p", aw->aw_amp.ahp);
1156 mdb_free(aw, sizeof (anon_walk_data_t));
1157 return (WALK_ERR);
1158 }
1159
1160 /* update min and maxslot with the given constraints */
1161 maxslot = MIN(maxslot, aw->aw_ahp.size);
1162 minslot = MIN(minslot, maxslot);
1163
1164 if (aw->aw_ahp.size <= ANON_CHUNK_SIZE ||
1165 (aw->aw_ahp.flags & ANON_ALLOC_FORCE)) {
1166 aw->aw_nlevone = maxslot;
1167 aw->aw_levone_ndx = minslot;
1168 aw->aw_levtwo = NULL;
1169 } else {
1170 aw->aw_nlevone =
1171 (maxslot + ANON_CHUNK_OFF) >> ANON_CHUNK_SHIFT;
1172 aw->aw_levone_ndx = 0;
1173 aw->aw_levtwo =
1174 mdb_zalloc(ANON_CHUNK_SIZE * sizeof (uintptr_t), UM_SLEEP);
1175 }
1176
1177 aw->aw_levone =
1178 mdb_alloc(aw->aw_nlevone * sizeof (uintptr_t), UM_SLEEP);
1179 aw->aw_all = (wsp->walk_arg == ANON_WALK_ALL);
1180
1181 mdb_vread(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t),
1182 (uintptr_t)aw->aw_ahp.array_chunk);
1183
1184 aw->aw_levtwo_ndx = 0;
1185 aw->aw_minslot = minslot;
1186 aw->aw_maxslot = maxslot;
1187
1188 out:
1189 wsp->walk_data = aw;
1190 return (0);
1191 }
1192
1193 int
anon_walk_step(mdb_walk_state_t * wsp)1194 anon_walk_step(mdb_walk_state_t *wsp)
1195 {
1196 anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data;
1197 struct anon anon;
1198 uintptr_t anonptr;
1199 ulong_t slot;
1200
1201 /*
1202 * Once we've walked through level one, we're done.
1203 */
1204 if (aw->aw_levone_ndx >= aw->aw_nlevone) {
1205 return (WALK_DONE);
1206 }
1207
1208 if (aw->aw_levtwo == NULL) {
1209 anonptr = aw->aw_levone[aw->aw_levone_ndx];
1210 aw->aw_levone_ndx++;
1211 } else {
1212 if (aw->aw_levtwo_ndx == 0) {
1213 uintptr_t levtwoptr;
1214
1215 /* The first time through, skip to our first index. */
1216 if (aw->aw_levone_ndx == 0) {
1217 aw->aw_levone_ndx =
1218 aw->aw_minslot / ANON_CHUNK_SIZE;
1219 aw->aw_levtwo_ndx =
1220 aw->aw_minslot % ANON_CHUNK_SIZE;
1221 }
1222
1223 levtwoptr = (uintptr_t)aw->aw_levone[aw->aw_levone_ndx];
1224
1225 if (levtwoptr == 0) {
1226 if (!aw->aw_all) {
1227 aw->aw_levtwo_ndx = 0;
1228 aw->aw_levone_ndx++;
1229 return (WALK_NEXT);
1230 }
1231 bzero(aw->aw_levtwo,
1232 ANON_CHUNK_SIZE * sizeof (uintptr_t));
1233
1234 } else if (mdb_vread(aw->aw_levtwo,
1235 ANON_CHUNK_SIZE * sizeof (uintptr_t), levtwoptr) ==
1236 -1) {
1237 mdb_warn("unable to read anon_map %p's "
1238 "second-level map %d at %p",
1239 aw->aw_ampp, aw->aw_levone_ndx,
1240 levtwoptr);
1241 return (WALK_ERR);
1242 }
1243 }
1244 slot = aw->aw_levone_ndx * ANON_CHUNK_SIZE + aw->aw_levtwo_ndx;
1245 anonptr = aw->aw_levtwo[aw->aw_levtwo_ndx];
1246
1247 /* update the indices for next time */
1248 aw->aw_levtwo_ndx++;
1249 if (aw->aw_levtwo_ndx == ANON_CHUNK_SIZE) {
1250 aw->aw_levtwo_ndx = 0;
1251 aw->aw_levone_ndx++;
1252 }
1253
1254 /* make sure the slot # is in the requested range */
1255 if (slot >= aw->aw_maxslot) {
1256 return (WALK_DONE);
1257 }
1258 }
1259
1260 if (anonptr != 0) {
1261 mdb_vread(&anon, sizeof (anon), anonptr);
1262 return (wsp->walk_callback(anonptr, &anon, wsp->walk_cbdata));
1263 }
1264 if (aw->aw_all) {
1265 return (wsp->walk_callback(0, NULL, wsp->walk_cbdata));
1266 }
1267 return (WALK_NEXT);
1268 }
1269
1270 void
anon_walk_fini(mdb_walk_state_t * wsp)1271 anon_walk_fini(mdb_walk_state_t *wsp)
1272 {
1273 anon_walk_data_t *aw = (anon_walk_data_t *)wsp->walk_data;
1274
1275 if (aw->aw_levtwo != NULL)
1276 mdb_free(aw->aw_levtwo, ANON_CHUNK_SIZE * sizeof (uintptr_t));
1277
1278 mdb_free(aw->aw_levone, aw->aw_nlevone * sizeof (uintptr_t));
1279 mdb_free(aw, sizeof (anon_walk_data_t));
1280 }
1281
1282 int
anon_walk_init(mdb_walk_state_t * wsp)1283 anon_walk_init(mdb_walk_state_t *wsp)
1284 {
1285 return (anon_walk_init_common(wsp, 0, ULONG_MAX));
1286 }
1287
1288 int
segvn_anon_walk_init(mdb_walk_state_t * wsp)1289 segvn_anon_walk_init(mdb_walk_state_t *wsp)
1290 {
1291 const uintptr_t svd_addr = wsp->walk_addr;
1292 uintptr_t amp_addr;
1293 uintptr_t seg_addr;
1294 struct segvn_data svd;
1295 struct anon_map amp;
1296 struct seg seg;
1297
1298 if (svd_addr == 0) {
1299 mdb_warn("segvn_anon walk doesn't support global walks\n");
1300 return (WALK_ERR);
1301 }
1302 if (mdb_vread(&svd, sizeof (svd), svd_addr) == -1) {
1303 mdb_warn("segvn_anon walk: unable to read segvn_data at %p",
1304 svd_addr);
1305 return (WALK_ERR);
1306 }
1307 if (svd.amp == NULL) {
1308 mdb_warn("segvn_anon walk: segvn_data at %p has no anon map\n",
1309 svd_addr);
1310 return (WALK_ERR);
1311 }
1312 amp_addr = (uintptr_t)svd.amp;
1313 if (mdb_vread(&, sizeof (amp), amp_addr) == -1) {
1314 mdb_warn("segvn_anon walk: unable to read amp %p for "
1315 "segvn_data %p", amp_addr, svd_addr);
1316 return (WALK_ERR);
1317 }
1318 seg_addr = (uintptr_t)svd.seg;
1319 if (mdb_vread(&seg, sizeof (seg), seg_addr) == -1) {
1320 mdb_warn("segvn_anon walk: unable to read seg %p for "
1321 "segvn_data %p", seg_addr, svd_addr);
1322 return (WALK_ERR);
1323 }
1324 if ((seg.s_size + (svd.anon_index << PAGESHIFT)) > amp.size) {
1325 mdb_warn("anon map %p is too small for segment %p\n",
1326 amp_addr, seg_addr);
1327 return (WALK_ERR);
1328 }
1329
1330 wsp->walk_addr = amp_addr;
1331 return (anon_walk_init_common(wsp,
1332 svd.anon_index, svd.anon_index + (seg.s_size >> PAGESHIFT)));
1333 }
1334
1335
1336 typedef struct {
1337 u_offset_t svs_offset;
1338 uintptr_t svs_page;
1339 } segvn_sparse_t;
1340 #define SEGVN_MAX_SPARSE ((128 * 1024) / sizeof (segvn_sparse_t))
1341
1342 typedef struct {
1343 uintptr_t svw_svdp;
1344 struct segvn_data svw_svd;
1345 struct seg svw_seg;
1346 size_t svw_walkoff;
1347 ulong_t svw_anonskip;
1348 segvn_sparse_t *svw_sparse;
1349 size_t svw_sparse_idx;
1350 size_t svw_sparse_count;
1351 size_t svw_sparse_size;
1352 uint8_t svw_sparse_overflow;
1353 uint8_t svw_all;
1354 } segvn_walk_data_t;
1355
1356 static int
segvn_sparse_fill(uintptr_t addr,const void * pp_arg,void * arg)1357 segvn_sparse_fill(uintptr_t addr, const void *pp_arg, void *arg)
1358 {
1359 segvn_walk_data_t *const svw = arg;
1360 const page_t *const pp = pp_arg;
1361 const u_offset_t offset = pp->p_offset;
1362 segvn_sparse_t *const cur =
1363 &svw->svw_sparse[svw->svw_sparse_count];
1364
1365 /* See if the page is of interest */
1366 if ((u_offset_t)(offset - svw->svw_svd.offset) >= svw->svw_seg.s_size) {
1367 return (WALK_NEXT);
1368 }
1369 /* See if we have space for the new entry, then add it. */
1370 if (svw->svw_sparse_count >= svw->svw_sparse_size) {
1371 svw->svw_sparse_overflow = 1;
1372 return (WALK_DONE);
1373 }
1374 svw->svw_sparse_count++;
1375 cur->svs_offset = offset;
1376 cur->svs_page = addr;
1377 return (WALK_NEXT);
1378 }
1379
1380 static int
segvn_sparse_cmp(const void * lp,const void * rp)1381 segvn_sparse_cmp(const void *lp, const void *rp)
1382 {
1383 const segvn_sparse_t *const l = lp;
1384 const segvn_sparse_t *const r = rp;
1385
1386 if (l->svs_offset < r->svs_offset) {
1387 return (-1);
1388 }
1389 if (l->svs_offset > r->svs_offset) {
1390 return (1);
1391 }
1392 return (0);
1393 }
1394
1395 /*
1396 * Builds on the "anon_all" walker to walk all resident pages in a segvn_data
1397 * structure. For segvn_datas without an anon structure, it just looks up
1398 * pages in the vnode. For segvn_datas with an anon structure, NULL slots
1399 * pass through to the vnode, and non-null slots are checked for residency.
1400 */
1401 int
segvn_pages_walk_init(mdb_walk_state_t * wsp)1402 segvn_pages_walk_init(mdb_walk_state_t *wsp)
1403 {
1404 segvn_walk_data_t *svw;
1405 struct segvn_data *svd;
1406
1407 if (wsp->walk_addr == 0) {
1408 mdb_warn("segvn walk doesn't support global walks\n");
1409 return (WALK_ERR);
1410 }
1411
1412 svw = mdb_zalloc(sizeof (*svw), UM_SLEEP);
1413 svw->svw_svdp = wsp->walk_addr;
1414 svw->svw_anonskip = 0;
1415 svw->svw_sparse_idx = 0;
1416 svw->svw_walkoff = 0;
1417 svw->svw_all = (wsp->walk_arg == SEGVN_PAGES_ALL);
1418
1419 if (mdb_vread(&svw->svw_svd, sizeof (svw->svw_svd), wsp->walk_addr) ==
1420 -1) {
1421 mdb_warn("failed to read segvn_data at %p", wsp->walk_addr);
1422 mdb_free(svw, sizeof (*svw));
1423 return (WALK_ERR);
1424 }
1425
1426 svd = &svw->svw_svd;
1427 if (mdb_vread(&svw->svw_seg, sizeof (svw->svw_seg),
1428 (uintptr_t)svd->seg) == -1) {
1429 mdb_warn("failed to read seg at %p (from %p)",
1430 svd->seg, &((struct segvn_data *)(wsp->walk_addr))->seg);
1431 mdb_free(svw, sizeof (*svw));
1432 return (WALK_ERR);
1433 }
1434
1435 if (svd->amp == NULL && svd->vp == NULL) {
1436 /* make the walk terminate immediately; no pages */
1437 svw->svw_walkoff = svw->svw_seg.s_size;
1438
1439 } else if (svd->amp == NULL &&
1440 (svw->svw_seg.s_size >> PAGESHIFT) >= SEGVN_MAX_SPARSE) {
1441 /*
1442 * If we don't have an anon pointer, and the segment is large,
1443 * we try to load the in-memory pages into a fixed-size array,
1444 * which is then sorted and reported directly. This is much
1445 * faster than doing a mdb_page_lookup() for each possible
1446 * offset.
1447 *
1448 * If the allocation fails, or there are too many pages
1449 * in-core, we fall back to looking up the pages individually.
1450 */
1451 svw->svw_sparse = mdb_alloc(
1452 SEGVN_MAX_SPARSE * sizeof (*svw->svw_sparse), UM_NOSLEEP);
1453 if (svw->svw_sparse != NULL) {
1454 svw->svw_sparse_size = SEGVN_MAX_SPARSE;
1455
1456 if (mdb_pwalk("page", segvn_sparse_fill, svw,
1457 (uintptr_t)svd->vp) == -1 ||
1458 svw->svw_sparse_overflow) {
1459 mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE *
1460 sizeof (*svw->svw_sparse));
1461 svw->svw_sparse = NULL;
1462 } else {
1463 qsort(svw->svw_sparse, svw->svw_sparse_count,
1464 sizeof (*svw->svw_sparse),
1465 segvn_sparse_cmp);
1466 }
1467 }
1468
1469 } else if (svd->amp != NULL) {
1470 const char *const layer = (!svw->svw_all && svd->vp == NULL) ?
1471 "segvn_anon" : "segvn_anon_all";
1472 /*
1473 * If we're not printing all offsets, and the segvn_data has
1474 * no backing VP, we can use the "segvn_anon" walker, which
1475 * efficiently skips NULL slots.
1476 *
1477 * Otherwise, we layer over the "segvn_anon_all" walker
1478 * (which reports all anon slots, even NULL ones), so that
1479 * segvn_pages_walk_step() knows the precise offset for each
1480 * element. It uses that offset information to look up the
1481 * backing pages for NULL anon slots.
1482 */
1483 if (mdb_layered_walk(layer, wsp) == -1) {
1484 mdb_warn("segvn_pages: failed to layer \"%s\" "
1485 "for segvn_data %p", layer, svw->svw_svdp);
1486 mdb_free(svw, sizeof (*svw));
1487 return (WALK_ERR);
1488 }
1489 }
1490
1491 wsp->walk_data = svw;
1492 return (WALK_NEXT);
1493 }
1494
1495 int
segvn_pages_walk_step(mdb_walk_state_t * wsp)1496 segvn_pages_walk_step(mdb_walk_state_t *wsp)
1497 {
1498 segvn_walk_data_t *const svw = wsp->walk_data;
1499 struct seg *const seg = &svw->svw_seg;
1500 struct segvn_data *const svd = &svw->svw_svd;
1501 uintptr_t pp;
1502 page_t page;
1503
1504 /* If we've walked off the end of the segment, we're done. */
1505 if (svw->svw_walkoff >= seg->s_size) {
1506 return (WALK_DONE);
1507 }
1508
1509 /*
1510 * If we've got a sparse page array, just send it directly.
1511 */
1512 if (svw->svw_sparse != NULL) {
1513 u_offset_t off;
1514
1515 if (svw->svw_sparse_idx >= svw->svw_sparse_count) {
1516 pp = 0;
1517 if (!svw->svw_all) {
1518 return (WALK_DONE);
1519 }
1520 } else {
1521 segvn_sparse_t *const svs =
1522 &svw->svw_sparse[svw->svw_sparse_idx];
1523 off = svs->svs_offset - svd->offset;
1524 if (svw->svw_all && svw->svw_walkoff != off) {
1525 pp = 0;
1526 } else {
1527 pp = svs->svs_page;
1528 svw->svw_sparse_idx++;
1529 }
1530 }
1531
1532 } else if (svd->amp == NULL || wsp->walk_addr == 0) {
1533 /*
1534 * If there's no anon, or the anon slot is NULL, look up
1535 * <vp, offset>.
1536 */
1537 if (svd->vp != NULL) {
1538 pp = mdb_page_lookup((uintptr_t)svd->vp,
1539 svd->offset + svw->svw_walkoff);
1540 } else {
1541 pp = 0;
1542 }
1543
1544 } else {
1545 const struct anon *const anon = wsp->walk_layer;
1546
1547 /*
1548 * We have a "struct anon"; if it's not swapped out,
1549 * look up the page.
1550 */
1551 if (anon->an_vp != NULL || anon->an_off != 0) {
1552 pp = mdb_page_lookup((uintptr_t)anon->an_vp,
1553 anon->an_off);
1554 if (pp == 0 && mdb_get_state() != MDB_STATE_RUNNING) {
1555 mdb_warn("walk segvn_pages: segvn_data %p "
1556 "offset %ld, anon page <%p, %llx> not "
1557 "found.\n", svw->svw_svdp, svw->svw_walkoff,
1558 anon->an_vp, anon->an_off);
1559 }
1560 } else {
1561 if (anon->an_pvp == NULL) {
1562 mdb_warn("walk segvn_pages: useless struct "
1563 "anon at %p\n", wsp->walk_addr);
1564 }
1565 pp = 0; /* nothing at this offset */
1566 }
1567 }
1568
1569 svw->svw_walkoff += PAGESIZE; /* Update for the next call */
1570 if (pp != 0) {
1571 if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
1572 mdb_warn("unable to read page_t at %#lx", pp);
1573 return (WALK_ERR);
1574 }
1575 return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
1576 }
1577 if (svw->svw_all) {
1578 return (wsp->walk_callback(0, NULL, wsp->walk_cbdata));
1579 }
1580 return (WALK_NEXT);
1581 }
1582
1583 void
segvn_pages_walk_fini(mdb_walk_state_t * wsp)1584 segvn_pages_walk_fini(mdb_walk_state_t *wsp)
1585 {
1586 segvn_walk_data_t *const svw = wsp->walk_data;
1587
1588 if (svw->svw_sparse != NULL) {
1589 mdb_free(svw->svw_sparse, SEGVN_MAX_SPARSE *
1590 sizeof (*svw->svw_sparse));
1591 }
1592 mdb_free(svw, sizeof (*svw));
1593 }
1594
1595 /*
1596 * Grumble, grumble.
1597 */
1598 #define SMAP_HASHFUNC(vp, off) \
1599 ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
1600 ((off) >> MAXBSHIFT)) & smd_hashmsk)
1601
1602 int
vnode2smap(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)1603 vnode2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1604 {
1605 long smd_hashmsk;
1606 int hash;
1607 uintptr_t offset = 0;
1608 struct smap smp;
1609 uintptr_t saddr, kaddr;
1610 uintptr_t smd_hash, smd_smap;
1611 struct seg seg;
1612
1613 if (!(flags & DCMD_ADDRSPEC))
1614 return (DCMD_USAGE);
1615
1616 if (mdb_readvar(&smd_hashmsk, "smd_hashmsk") == -1) {
1617 mdb_warn("failed to read smd_hashmsk");
1618 return (DCMD_ERR);
1619 }
1620
1621 if (mdb_readvar(&smd_hash, "smd_hash") == -1) {
1622 mdb_warn("failed to read smd_hash");
1623 return (DCMD_ERR);
1624 }
1625
1626 if (mdb_readvar(&smd_smap, "smd_smap") == -1) {
1627 mdb_warn("failed to read smd_hash");
1628 return (DCMD_ERR);
1629 }
1630
1631 if (mdb_readvar(&kaddr, "segkmap") == -1) {
1632 mdb_warn("failed to read segkmap");
1633 return (DCMD_ERR);
1634 }
1635
1636 if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) {
1637 mdb_warn("failed to read segkmap at %p", kaddr);
1638 return (DCMD_ERR);
1639 }
1640
1641 if (argc != 0) {
1642 const mdb_arg_t *arg = &argv[0];
1643
1644 if (arg->a_type == MDB_TYPE_IMMEDIATE)
1645 offset = arg->a_un.a_val;
1646 else
1647 offset = (uintptr_t)mdb_strtoull(arg->a_un.a_str);
1648 }
1649
1650 hash = SMAP_HASHFUNC(addr, offset);
1651
1652 if (mdb_vread(&saddr, sizeof (saddr),
1653 smd_hash + hash * sizeof (uintptr_t)) == -1) {
1654 mdb_warn("couldn't read smap at %p",
1655 smd_hash + hash * sizeof (uintptr_t));
1656 return (DCMD_ERR);
1657 }
1658
1659 do {
1660 if (mdb_vread(&smp, sizeof (smp), saddr) == -1) {
1661 mdb_warn("couldn't read smap at %p", saddr);
1662 return (DCMD_ERR);
1663 }
1664
1665 if ((uintptr_t)smp.sm_vp == addr && smp.sm_off == offset) {
1666 mdb_printf("vnode %p, offs %p is smap %p, vaddr %p\n",
1667 addr, offset, saddr, ((saddr - smd_smap) /
1668 sizeof (smp)) * MAXBSIZE + seg.s_base);
1669 return (DCMD_OK);
1670 }
1671
1672 saddr = (uintptr_t)smp.sm_hash;
1673 } while (saddr != 0);
1674
1675 mdb_printf("no smap for vnode %p, offs %p\n", addr, offset);
1676 return (DCMD_OK);
1677 }
1678
1679 /*ARGSUSED*/
1680 int
addr2smap(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)1681 addr2smap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1682 {
1683 uintptr_t kaddr;
1684 struct seg seg;
1685 struct segmap_data sd;
1686
1687 if (!(flags & DCMD_ADDRSPEC))
1688 return (DCMD_USAGE);
1689
1690 if (mdb_readvar(&kaddr, "segkmap") == -1) {
1691 mdb_warn("failed to read segkmap");
1692 return (DCMD_ERR);
1693 }
1694
1695 if (mdb_vread(&seg, sizeof (seg), kaddr) == -1) {
1696 mdb_warn("failed to read segkmap at %p", kaddr);
1697 return (DCMD_ERR);
1698 }
1699
1700 if (mdb_vread(&sd, sizeof (sd), (uintptr_t)seg.s_data) == -1) {
1701 mdb_warn("failed to read segmap_data at %p", seg.s_data);
1702 return (DCMD_ERR);
1703 }
1704
1705 mdb_printf("%p is smap %p\n", addr,
1706 ((addr - (uintptr_t)seg.s_base) >> MAXBSHIFT) *
1707 sizeof (struct smap) + (uintptr_t)sd.smd_sm);
1708
1709 return (DCMD_OK);
1710 }
1711