1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright 2018 Joyent, Inc. All rights reserved.
28 * Copyright (c) 2012 by Delphix. All rights reserved.
29 * Copyright 2025 Oxide Computer Company
30 */
31
32 #include <mdb/mdb_param.h>
33 #include <mdb/mdb_modapi.h>
34 #include <mdb/mdb_ctf.h>
35 #include <mdb/mdb_whatis.h>
36 #include <sys/cpuvar.h>
37 #include <sys/kmem_impl.h>
38 #include <sys/vmem_impl.h>
39 #include <sys/machelf.h>
40 #include <sys/modctl.h>
41 #include <sys/kobj.h>
42 #include <sys/panic.h>
43 #include <sys/stack.h>
44 #include <sys/sysmacros.h>
45 #include <vm/page.h>
46
47 #include "avl.h"
48 #include "combined.h"
49 #include "dist.h"
50 #include "kmem.h"
51 #include "list.h"
52
53 #define dprintf(x) if (mdb_debug_level) { \
54 mdb_printf("kmem debug: "); \
55 /*CSTYLED*/\
56 mdb_printf x ;\
57 }
58
59 #define KM_ALLOCATED 0x01
60 #define KM_FREE 0x02
61 #define KM_BUFCTL 0x04
62 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */
63 #define KM_HASH 0x10
64
65 static int mdb_debug_level = 0;
66
67 /*ARGSUSED*/
68 static int
kmem_init_walkers(uintptr_t addr,const kmem_cache_t * c,void * ignored)69 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
70 {
71 mdb_walker_t w;
72 char descr[64];
73
74 (void) mdb_snprintf(descr, sizeof (descr),
75 "walk the %s cache", c->cache_name);
76
77 w.walk_name = c->cache_name;
78 w.walk_descr = descr;
79 w.walk_init = kmem_walk_init;
80 w.walk_step = kmem_walk_step;
81 w.walk_fini = kmem_walk_fini;
82 w.walk_init_arg = (void *)addr;
83
84 if (mdb_add_walker(&w) == -1)
85 mdb_warn("failed to add %s walker", c->cache_name);
86
87 return (WALK_NEXT);
88 }
89
90 /*ARGSUSED*/
91 int
kmem_debug(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)92 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
93 {
94 mdb_debug_level ^= 1;
95
96 mdb_printf("kmem: debugging is now %s\n",
97 mdb_debug_level ? "on" : "off");
98
99 return (DCMD_OK);
100 }
101
102 int
kmem_cache_walk_init(mdb_walk_state_t * wsp)103 kmem_cache_walk_init(mdb_walk_state_t *wsp)
104 {
105 GElf_Sym sym;
106
107 if (mdb_lookup_by_name("kmem_caches", &sym) == -1) {
108 mdb_warn("couldn't find kmem_caches");
109 return (WALK_ERR);
110 }
111
112 wsp->walk_addr = (uintptr_t)sym.st_value;
113
114 return (list_walk_init_named(wsp, "cache list", "cache"));
115 }
116
117 int
kmem_cpu_cache_walk_init(mdb_walk_state_t * wsp)118 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
119 {
120 if (wsp->walk_addr == 0) {
121 mdb_warn("kmem_cpu_cache doesn't support global walks");
122 return (WALK_ERR);
123 }
124
125 if (mdb_layered_walk("cpu", wsp) == -1) {
126 mdb_warn("couldn't walk 'cpu'");
127 return (WALK_ERR);
128 }
129
130 wsp->walk_data = (void *)wsp->walk_addr;
131
132 return (WALK_NEXT);
133 }
134
135 int
kmem_cpu_cache_walk_step(mdb_walk_state_t * wsp)136 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
137 {
138 uintptr_t caddr = (uintptr_t)wsp->walk_data;
139 const cpu_t *cpu = wsp->walk_layer;
140 kmem_cpu_cache_t cc;
141
142 caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]);
143
144 if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
145 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
146 return (WALK_ERR);
147 }
148
149 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
150 }
151
152 static int
kmem_slab_check(void * p,uintptr_t saddr,void * arg)153 kmem_slab_check(void *p, uintptr_t saddr, void *arg)
154 {
155 kmem_slab_t *sp = p;
156 uintptr_t caddr = (uintptr_t)arg;
157 if ((uintptr_t)sp->slab_cache != caddr) {
158 mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
159 saddr, caddr, sp->slab_cache);
160 return (-1);
161 }
162
163 return (0);
164 }
165
166 static int
kmem_partial_slab_check(void * p,uintptr_t saddr,void * arg)167 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg)
168 {
169 kmem_slab_t *sp = p;
170
171 int rc = kmem_slab_check(p, saddr, arg);
172 if (rc != 0) {
173 return (rc);
174 }
175
176 if (!KMEM_SLAB_IS_PARTIAL(sp)) {
177 mdb_warn("slab %p is not a partial slab\n", saddr);
178 return (-1);
179 }
180
181 return (0);
182 }
183
184 static int
kmem_complete_slab_check(void * p,uintptr_t saddr,void * arg)185 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg)
186 {
187 kmem_slab_t *sp = p;
188
189 int rc = kmem_slab_check(p, saddr, arg);
190 if (rc != 0) {
191 return (rc);
192 }
193
194 if (!KMEM_SLAB_IS_ALL_USED(sp)) {
195 mdb_warn("slab %p is not completely allocated\n", saddr);
196 return (-1);
197 }
198
199 return (0);
200 }
201
202 typedef struct {
203 uintptr_t kns_cache_addr;
204 int kns_nslabs;
205 } kmem_nth_slab_t;
206
207 static int
kmem_nth_slab_check(void * p,uintptr_t saddr,void * arg)208 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg)
209 {
210 kmem_nth_slab_t *chkp = arg;
211
212 int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr);
213 if (rc != 0) {
214 return (rc);
215 }
216
217 return (chkp->kns_nslabs-- == 0 ? 1 : 0);
218 }
219
220 static int
kmem_complete_slab_walk_init(mdb_walk_state_t * wsp)221 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp)
222 {
223 uintptr_t caddr = wsp->walk_addr;
224
225 wsp->walk_addr = (uintptr_t)(caddr +
226 offsetof(kmem_cache_t, cache_complete_slabs));
227
228 return (list_walk_init_checked(wsp, "slab list", "slab",
229 kmem_complete_slab_check, (void *)caddr));
230 }
231
232 static int
kmem_partial_slab_walk_init(mdb_walk_state_t * wsp)233 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp)
234 {
235 uintptr_t caddr = wsp->walk_addr;
236
237 wsp->walk_addr = (uintptr_t)(caddr +
238 offsetof(kmem_cache_t, cache_partial_slabs));
239
240 return (avl_walk_init_checked(wsp, "slab list", "slab",
241 kmem_partial_slab_check, (void *)caddr));
242 }
243
244 int
kmem_slab_walk_init(mdb_walk_state_t * wsp)245 kmem_slab_walk_init(mdb_walk_state_t *wsp)
246 {
247 uintptr_t caddr = wsp->walk_addr;
248
249 if (caddr == 0) {
250 mdb_warn("kmem_slab doesn't support global walks\n");
251 return (WALK_ERR);
252 }
253
254 combined_walk_init(wsp);
255 combined_walk_add(wsp,
256 kmem_complete_slab_walk_init, list_walk_step, list_walk_fini);
257 combined_walk_add(wsp,
258 kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini);
259
260 return (WALK_NEXT);
261 }
262
263 static int
kmem_first_complete_slab_walk_init(mdb_walk_state_t * wsp)264 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp)
265 {
266 uintptr_t caddr = wsp->walk_addr;
267 kmem_nth_slab_t *chk;
268
269 chk = mdb_alloc(sizeof (kmem_nth_slab_t),
270 UM_SLEEP | UM_GC);
271 chk->kns_cache_addr = caddr;
272 chk->kns_nslabs = 1;
273 wsp->walk_addr = (uintptr_t)(caddr +
274 offsetof(kmem_cache_t, cache_complete_slabs));
275
276 return (list_walk_init_checked(wsp, "slab list", "slab",
277 kmem_nth_slab_check, chk));
278 }
279
280 int
kmem_slab_walk_partial_init(mdb_walk_state_t * wsp)281 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
282 {
283 uintptr_t caddr = wsp->walk_addr;
284 kmem_cache_t c;
285
286 if (caddr == 0) {
287 mdb_warn("kmem_slab_partial doesn't support global walks\n");
288 return (WALK_ERR);
289 }
290
291 if (mdb_vread(&c, sizeof (c), caddr) == -1) {
292 mdb_warn("couldn't read kmem_cache at %p", caddr);
293 return (WALK_ERR);
294 }
295
296 combined_walk_init(wsp);
297
298 /*
299 * Some consumers (umem_walk_step(), in particular) require at
300 * least one callback if there are any buffers in the cache. So
301 * if there are *no* partial slabs, report the first full slab, if
302 * any.
303 *
304 * Yes, this is ugly, but it's cleaner than the other possibilities.
305 */
306 if (c.cache_partial_slabs.avl_numnodes == 0) {
307 combined_walk_add(wsp, kmem_first_complete_slab_walk_init,
308 list_walk_step, list_walk_fini);
309 } else {
310 combined_walk_add(wsp, kmem_partial_slab_walk_init,
311 avl_walk_step, avl_walk_fini);
312 }
313
314 return (WALK_NEXT);
315 }
316
317 int
kmem_cache(uintptr_t addr,uint_t flags,int ac,const mdb_arg_t * argv)318 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
319 {
320 kmem_cache_t c;
321 const char *filter = NULL;
322
323 if (mdb_getopts(ac, argv,
324 'n', MDB_OPT_STR, &filter,
325 NULL) != ac) {
326 return (DCMD_USAGE);
327 }
328
329 if (!(flags & DCMD_ADDRSPEC)) {
330 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
331 mdb_warn("can't walk kmem_cache");
332 return (DCMD_ERR);
333 }
334 return (DCMD_OK);
335 }
336
337 if (DCMD_HDRSPEC(flags))
338 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
339 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
340
341 if (mdb_vread(&c, sizeof (c), addr) == -1) {
342 mdb_warn("couldn't read kmem_cache at %p", addr);
343 return (DCMD_ERR);
344 }
345
346 if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL))
347 return (DCMD_OK);
348
349 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
350 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
351
352 return (DCMD_OK);
353 }
354
355 void
kmem_cache_help(void)356 kmem_cache_help(void)
357 {
358 mdb_printf("%s", "Print kernel memory caches.\n\n");
359 mdb_dec_indent(2);
360 mdb_printf("%<b>OPTIONS%</b>\n");
361 mdb_inc_indent(2);
362 mdb_printf("%s",
363 " -n name\n"
364 " name of kmem cache (or matching partial name)\n"
365 "\n"
366 "Column\tDescription\n"
367 "\n"
368 "ADDR\t\taddress of kmem cache\n"
369 "NAME\t\tname of kmem cache\n"
370 "FLAG\t\tvarious cache state flags\n"
371 "CFLAG\t\tcache creation flags\n"
372 "BUFSIZE\tobject size in bytes\n"
373 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
374 }
375
376 #define LABEL_WIDTH 11
377 static void
kmem_slabs_print_dist(uint_t * ks_bucket,size_t buffers_per_slab,size_t maxbuckets,size_t minbucketsize)378 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab,
379 size_t maxbuckets, size_t minbucketsize)
380 {
381 uint64_t total;
382 int buckets;
383 int i;
384 const int *distarray;
385 int complete[2];
386
387 buckets = buffers_per_slab;
388
389 total = 0;
390 for (i = 0; i <= buffers_per_slab; i++)
391 total += ks_bucket[i];
392
393 if (maxbuckets > 1)
394 buckets = MIN(buckets, maxbuckets);
395
396 if (minbucketsize > 1) {
397 /*
398 * minbucketsize does not apply to the first bucket reserved
399 * for completely allocated slabs
400 */
401 buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) /
402 minbucketsize));
403 if ((buckets < 2) && (buffers_per_slab > 1)) {
404 buckets = 2;
405 minbucketsize = (buffers_per_slab - 1);
406 }
407 }
408
409 /*
410 * The first printed bucket is reserved for completely allocated slabs.
411 * Passing (buckets - 1) excludes that bucket from the generated
412 * distribution, since we're handling it as a special case.
413 */
414 complete[0] = buffers_per_slab;
415 complete[1] = buffers_per_slab + 1;
416 distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1);
417
418 mdb_printf("%*s\n", LABEL_WIDTH, "Allocated");
419 dist_print_header("Buffers", LABEL_WIDTH, "Slabs");
420
421 dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH);
422 /*
423 * Print bucket ranges in descending order after the first bucket for
424 * completely allocated slabs, so a person can see immediately whether
425 * or not there is fragmentation without having to scan possibly
426 * multiple screens of output. Starting at (buckets - 2) excludes the
427 * extra terminating bucket.
428 */
429 for (i = buckets - 2; i >= 0; i--) {
430 dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH);
431 }
432 mdb_printf("\n");
433 }
434 #undef LABEL_WIDTH
435
436 /*ARGSUSED*/
437 static int
kmem_first_slab(uintptr_t addr,const kmem_slab_t * sp,boolean_t * is_slab)438 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab)
439 {
440 *is_slab = B_TRUE;
441 return (WALK_DONE);
442 }
443
444 /*ARGSUSED*/
445 static int
kmem_first_partial_slab(uintptr_t addr,const kmem_slab_t * sp,boolean_t * is_slab)446 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp,
447 boolean_t *is_slab)
448 {
449 /*
450 * The "kmem_partial_slab" walker reports the first full slab if there
451 * are no partial slabs (for the sake of consumers that require at least
452 * one callback if there are any buffers in the cache).
453 */
454 *is_slab = KMEM_SLAB_IS_PARTIAL(sp);
455 return (WALK_DONE);
456 }
457
458 typedef struct kmem_slab_usage {
459 int ksu_refcnt; /* count of allocated buffers on slab */
460 boolean_t ksu_nomove; /* slab marked non-reclaimable */
461 } kmem_slab_usage_t;
462
463 typedef struct kmem_slab_stats {
464 const kmem_cache_t *ks_cp;
465 int ks_slabs; /* slabs in cache */
466 int ks_partial_slabs; /* partially allocated slabs in cache */
467 uint64_t ks_unused_buffers; /* total unused buffers in cache */
468 int ks_max_buffers_per_slab; /* max buffers per slab */
469 int ks_usage_len; /* ks_usage array length */
470 kmem_slab_usage_t *ks_usage; /* partial slab usage */
471 uint_t *ks_bucket; /* slab usage distribution */
472 } kmem_slab_stats_t;
473
474 /*ARGSUSED*/
475 static int
kmem_slablist_stat(uintptr_t addr,const kmem_slab_t * sp,kmem_slab_stats_t * ks)476 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp,
477 kmem_slab_stats_t *ks)
478 {
479 kmem_slab_usage_t *ksu;
480 long unused;
481
482 ks->ks_slabs++;
483 ks->ks_bucket[sp->slab_refcnt]++;
484
485 unused = (sp->slab_chunks - sp->slab_refcnt);
486 if (unused == 0) {
487 return (WALK_NEXT);
488 }
489
490 ks->ks_partial_slabs++;
491 ks->ks_unused_buffers += unused;
492
493 if (ks->ks_partial_slabs > ks->ks_usage_len) {
494 kmem_slab_usage_t *usage;
495 int len = ks->ks_usage_len;
496
497 len = (len == 0 ? 16 : len * 2);
498 usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP);
499 if (ks->ks_usage != NULL) {
500 bcopy(ks->ks_usage, usage,
501 ks->ks_usage_len * sizeof (kmem_slab_usage_t));
502 mdb_free(ks->ks_usage,
503 ks->ks_usage_len * sizeof (kmem_slab_usage_t));
504 }
505 ks->ks_usage = usage;
506 ks->ks_usage_len = len;
507 }
508
509 ksu = &ks->ks_usage[ks->ks_partial_slabs - 1];
510 ksu->ksu_refcnt = sp->slab_refcnt;
511 ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE);
512 return (WALK_NEXT);
513 }
514
515 static void
kmem_slabs_header()516 kmem_slabs_header()
517 {
518 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
519 "", "", "Partial", "", "Unused", "");
520 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
521 "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
522 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
523 "-------------------------", "--------", "--------", "---------",
524 "---------", "------");
525 }
526
527 int
kmem_slabs(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)528 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
529 {
530 kmem_cache_t c;
531 kmem_slab_stats_t stats;
532 mdb_walk_cb_t cb;
533 int pct;
534 int tenths_pct;
535 size_t maxbuckets = 1;
536 size_t minbucketsize = 0;
537 const char *filter = NULL;
538 const char *name = NULL;
539 uint_t opt_v = FALSE;
540 boolean_t buckets = B_FALSE;
541 boolean_t skip = B_FALSE;
542
543 if (mdb_getopts(argc, argv,
544 'B', MDB_OPT_UINTPTR, &minbucketsize,
545 'b', MDB_OPT_UINTPTR, &maxbuckets,
546 'n', MDB_OPT_STR, &filter,
547 'N', MDB_OPT_STR, &name,
548 'v', MDB_OPT_SETBITS, TRUE, &opt_v,
549 NULL) != argc) {
550 return (DCMD_USAGE);
551 }
552
553 if ((maxbuckets != 1) || (minbucketsize != 0)) {
554 buckets = B_TRUE;
555 }
556
557 if (!(flags & DCMD_ADDRSPEC)) {
558 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc,
559 argv) == -1) {
560 mdb_warn("can't walk kmem_cache");
561 return (DCMD_ERR);
562 }
563 return (DCMD_OK);
564 }
565
566 if (mdb_vread(&c, sizeof (c), addr) == -1) {
567 mdb_warn("couldn't read kmem_cache at %p", addr);
568 return (DCMD_ERR);
569 }
570
571 if (name == NULL) {
572 skip = ((filter != NULL) &&
573 (strstr(c.cache_name, filter) == NULL));
574 } else if (filter == NULL) {
575 skip = (strcmp(c.cache_name, name) != 0);
576 } else {
577 /* match either -n or -N */
578 skip = ((strcmp(c.cache_name, name) != 0) &&
579 (strstr(c.cache_name, filter) == NULL));
580 }
581
582 if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) {
583 kmem_slabs_header();
584 } else if ((opt_v || buckets) && !skip) {
585 if (DCMD_HDRSPEC(flags)) {
586 kmem_slabs_header();
587 } else {
588 boolean_t is_slab = B_FALSE;
589 const char *walker_name;
590 if (opt_v) {
591 cb = (mdb_walk_cb_t)kmem_first_partial_slab;
592 walker_name = "kmem_slab_partial";
593 } else {
594 cb = (mdb_walk_cb_t)kmem_first_slab;
595 walker_name = "kmem_slab";
596 }
597 (void) mdb_pwalk(walker_name, cb, &is_slab, addr);
598 if (is_slab) {
599 kmem_slabs_header();
600 }
601 }
602 }
603
604 if (skip) {
605 return (DCMD_OK);
606 }
607
608 bzero(&stats, sizeof (kmem_slab_stats_t));
609 stats.ks_cp = &c;
610 stats.ks_max_buffers_per_slab = c.cache_maxchunks;
611 /* +1 to include a zero bucket */
612 stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) *
613 sizeof (*stats.ks_bucket), UM_SLEEP);
614 cb = (mdb_walk_cb_t)kmem_slablist_stat;
615 (void) mdb_pwalk("kmem_slab", cb, &stats, addr);
616
617 if (c.cache_buftotal == 0) {
618 pct = 0;
619 tenths_pct = 0;
620 } else {
621 uint64_t n = stats.ks_unused_buffers * 10000;
622 pct = (int)(n / c.cache_buftotal);
623 tenths_pct = pct - ((pct / 100) * 100);
624 tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */
625 if (tenths_pct == 10) {
626 pct += 100;
627 tenths_pct = 0;
628 }
629 }
630
631 pct /= 100;
632 mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name,
633 stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal,
634 stats.ks_unused_buffers, pct, tenths_pct);
635
636 if (maxbuckets == 0) {
637 maxbuckets = stats.ks_max_buffers_per_slab;
638 }
639
640 if (((maxbuckets > 1) || (minbucketsize > 0)) &&
641 (stats.ks_slabs > 0)) {
642 mdb_printf("\n");
643 kmem_slabs_print_dist(stats.ks_bucket,
644 stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize);
645 }
646
647 mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) *
648 sizeof (*stats.ks_bucket));
649
650 if (!opt_v) {
651 return (DCMD_OK);
652 }
653
654 if (opt_v && (stats.ks_partial_slabs > 0)) {
655 int i;
656 kmem_slab_usage_t *ksu;
657
658 mdb_printf(" %d complete (%d), %d partial:",
659 (stats.ks_slabs - stats.ks_partial_slabs),
660 stats.ks_max_buffers_per_slab,
661 stats.ks_partial_slabs);
662
663 for (i = 0; i < stats.ks_partial_slabs; i++) {
664 ksu = &stats.ks_usage[i];
665 mdb_printf(" %d%s", ksu->ksu_refcnt,
666 (ksu->ksu_nomove ? "*" : ""));
667 }
668 mdb_printf("\n\n");
669 }
670
671 if (stats.ks_usage_len > 0) {
672 mdb_free(stats.ks_usage,
673 stats.ks_usage_len * sizeof (kmem_slab_usage_t));
674 }
675
676 return (DCMD_OK);
677 }
678
679 void
kmem_slabs_help(void)680 kmem_slabs_help(void)
681 {
682 mdb_printf("%s",
683 "Display slab usage per kmem cache.\n\n");
684 mdb_dec_indent(2);
685 mdb_printf("%<b>OPTIONS%</b>\n");
686 mdb_inc_indent(2);
687 mdb_printf("%s",
688 " -n name\n"
689 " name of kmem cache (or matching partial name)\n"
690 " -N name\n"
691 " exact name of kmem cache\n"
692 " -b maxbins\n"
693 " Print a distribution of allocated buffers per slab using at\n"
694 " most maxbins bins. The first bin is reserved for completely\n"
695 " allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
696 " effect as specifying the maximum allocated buffers per slab\n"
697 " or setting minbinsize to 1 (-B 1).\n"
698 " -B minbinsize\n"
699 " Print a distribution of allocated buffers per slab, making\n"
700 " all bins (except the first, reserved for completely allocated\n"
701 " slabs) at least minbinsize buffers apart.\n"
702 " -v verbose output: List the allocated buffer count of each partial\n"
703 " slab on the free list in order from front to back to show how\n"
704 " closely the slabs are ordered by usage. For example\n"
705 "\n"
706 " 10 complete, 3 partial (8): 7 3 1\n"
707 "\n"
708 " means there are thirteen slabs with eight buffers each, including\n"
709 " three partially allocated slabs with less than all eight buffers\n"
710 " allocated.\n"
711 "\n"
712 " Buffer allocations are always from the front of the partial slab\n"
713 " list. When a buffer is freed from a completely used slab, that\n"
714 " slab is added to the front of the partial slab list. Assuming\n"
715 " that all buffers are equally likely to be freed soon, the\n"
716 " desired order of partial slabs is most-used at the front of the\n"
717 " list and least-used at the back (as in the example above).\n"
718 " However, if a slab contains an allocated buffer that will not\n"
719 " soon be freed, it would be better for that slab to be at the\n"
720 " front where all of its buffers can be allocated. Taking a slab\n"
721 " off the partial slab list (either with all buffers freed or all\n"
722 " buffers allocated) reduces cache fragmentation.\n"
723 "\n"
724 " A slab's allocated buffer count representing a partial slab (9 in\n"
725 " the example below) may be marked as follows:\n"
726 "\n"
727 " 9* An asterisk indicates that kmem has marked the slab non-\n"
728 " reclaimable because the kmem client refused to move one of the\n"
729 " slab's buffers. Since kmem does not expect to completely free the\n"
730 " slab, it moves it to the front of the list in the hope of\n"
731 " completely allocating it instead. A slab marked with an asterisk\n"
732 " stays marked for as long as it remains on the partial slab list.\n"
733 "\n"
734 "Column\t\tDescription\n"
735 "\n"
736 "Cache Name\t\tname of kmem cache\n"
737 "Slabs\t\t\ttotal slab count\n"
738 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
739 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
740 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
741 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
742 "\t\t\t for accounting structures (debug mode), slab\n"
743 "\t\t\t coloring (incremental small offsets to stagger\n"
744 "\t\t\t buffer alignment), or the per-CPU magazine layer\n");
745 }
746
747 static int
addrcmp(const void * lhs,const void * rhs)748 addrcmp(const void *lhs, const void *rhs)
749 {
750 uintptr_t p1 = *((uintptr_t *)lhs);
751 uintptr_t p2 = *((uintptr_t *)rhs);
752
753 if (p1 < p2)
754 return (-1);
755 if (p1 > p2)
756 return (1);
757 return (0);
758 }
759
760 static int
bufctlcmp(const kmem_bufctl_audit_t ** lhs,const kmem_bufctl_audit_t ** rhs)761 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
762 {
763 const kmem_bufctl_audit_t *bcp1 = *lhs;
764 const kmem_bufctl_audit_t *bcp2 = *rhs;
765
766 if (bcp1->bc_timestamp > bcp2->bc_timestamp)
767 return (-1);
768
769 if (bcp1->bc_timestamp < bcp2->bc_timestamp)
770 return (1);
771
772 return (0);
773 }
774
775 typedef struct kmem_hash_walk {
776 uintptr_t *kmhw_table;
777 size_t kmhw_nelems;
778 size_t kmhw_pos;
779 kmem_bufctl_t kmhw_cur;
780 } kmem_hash_walk_t;
781
782 int
kmem_hash_walk_init(mdb_walk_state_t * wsp)783 kmem_hash_walk_init(mdb_walk_state_t *wsp)
784 {
785 kmem_hash_walk_t *kmhw;
786 uintptr_t *hash;
787 kmem_cache_t c;
788 uintptr_t haddr, addr = wsp->walk_addr;
789 size_t nelems;
790 size_t hsize;
791
792 if (addr == 0) {
793 mdb_warn("kmem_hash doesn't support global walks\n");
794 return (WALK_ERR);
795 }
796
797 if (mdb_vread(&c, sizeof (c), addr) == -1) {
798 mdb_warn("couldn't read cache at addr %p", addr);
799 return (WALK_ERR);
800 }
801
802 if (!(c.cache_flags & KMF_HASH)) {
803 mdb_warn("cache %p doesn't have a hash table\n", addr);
804 return (WALK_DONE); /* nothing to do */
805 }
806
807 kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
808 kmhw->kmhw_cur.bc_next = NULL;
809 kmhw->kmhw_pos = 0;
810
811 kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
812 hsize = nelems * sizeof (uintptr_t);
813 haddr = (uintptr_t)c.cache_hash_table;
814
815 kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
816 if (mdb_vread(hash, hsize, haddr) == -1) {
817 mdb_warn("failed to read hash table at %p", haddr);
818 mdb_free(hash, hsize);
819 mdb_free(kmhw, sizeof (kmem_hash_walk_t));
820 return (WALK_ERR);
821 }
822
823 wsp->walk_data = kmhw;
824
825 return (WALK_NEXT);
826 }
827
828 int
kmem_hash_walk_step(mdb_walk_state_t * wsp)829 kmem_hash_walk_step(mdb_walk_state_t *wsp)
830 {
831 kmem_hash_walk_t *kmhw = wsp->walk_data;
832 uintptr_t addr = 0;
833
834 if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == 0) {
835 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
836 if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != 0)
837 break;
838 }
839 }
840 if (addr == 0)
841 return (WALK_DONE);
842
843 if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
844 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
845 return (WALK_ERR);
846 }
847
848 return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
849 }
850
851 void
kmem_hash_walk_fini(mdb_walk_state_t * wsp)852 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
853 {
854 kmem_hash_walk_t *kmhw = wsp->walk_data;
855
856 if (kmhw == NULL)
857 return;
858
859 mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
860 mdb_free(kmhw, sizeof (kmem_hash_walk_t));
861 }
862
863 /*
864 * Find the address of the bufctl structure for the address 'buf' in cache
865 * 'cp', which is at address caddr, and place it in *out.
866 */
867 static int
kmem_hash_lookup(kmem_cache_t * cp,uintptr_t caddr,void * buf,uintptr_t * out)868 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
869 {
870 uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
871 kmem_bufctl_t *bcp;
872 kmem_bufctl_t bc;
873
874 if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
875 mdb_warn("unable to read hash bucket for %p in cache %p",
876 buf, caddr);
877 return (-1);
878 }
879
880 while (bcp != NULL) {
881 if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
882 (uintptr_t)bcp) == -1) {
883 mdb_warn("unable to read bufctl at %p", bcp);
884 return (-1);
885 }
886 if (bc.bc_addr == buf) {
887 *out = (uintptr_t)bcp;
888 return (0);
889 }
890 bcp = bc.bc_next;
891 }
892
893 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
894 return (-1);
895 }
896
897 int
kmem_get_magsize(const kmem_cache_t * cp)898 kmem_get_magsize(const kmem_cache_t *cp)
899 {
900 uintptr_t addr = (uintptr_t)cp->cache_magtype;
901 GElf_Sym mt_sym;
902 kmem_magtype_t mt;
903 int res;
904
905 /*
906 * if cpu 0 has a non-zero magsize, it must be correct. caches
907 * with KMF_NOMAGAZINE have disabled their magazine layers, so
908 * it is okay to return 0 for them.
909 */
910 if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
911 (cp->cache_flags & KMF_NOMAGAZINE))
912 return (res);
913
914 if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
915 mdb_warn("unable to read 'kmem_magtype'");
916 } else if (addr < mt_sym.st_value ||
917 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
918 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
919 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
920 cp->cache_name, addr);
921 return (0);
922 }
923 if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
924 mdb_warn("unable to read magtype at %a", addr);
925 return (0);
926 }
927 return (mt.mt_magsize);
928 }
929
930 /*ARGSUSED*/
931 static int
kmem_estimate_slab(uintptr_t addr,const kmem_slab_t * sp,size_t * est)932 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
933 {
934 *est -= (sp->slab_chunks - sp->slab_refcnt);
935
936 return (WALK_NEXT);
937 }
938
939 /*
940 * Returns an upper bound on the number of allocated buffers in a given
941 * cache.
942 */
943 size_t
kmem_estimate_allocated(uintptr_t addr,const kmem_cache_t * cp)944 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
945 {
946 int magsize;
947 size_t cache_est;
948
949 cache_est = cp->cache_buftotal;
950
951 (void) mdb_pwalk("kmem_slab_partial",
952 (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
953
954 if ((magsize = kmem_get_magsize(cp)) != 0) {
955 size_t mag_est = cp->cache_full.ml_total * magsize;
956
957 if (cache_est >= mag_est) {
958 cache_est -= mag_est;
959 } else {
960 mdb_warn("cache %p's magazine layer holds more buffers "
961 "than the slab layer.\n", addr);
962 }
963 }
964 return (cache_est);
965 }
966
967 #define READMAG_ROUNDS(rounds) { \
968 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
969 mdb_warn("couldn't read magazine at %p", kmp); \
970 goto fail; \
971 } \
972 for (i = 0; i < rounds; i++) { \
973 maglist[magcnt++] = mp->mag_round[i]; \
974 if (magcnt == magmax) { \
975 mdb_warn("%d magazines exceeds fudge factor\n", \
976 magcnt); \
977 goto fail; \
978 } \
979 } \
980 }
981
982 int
kmem_read_magazines(kmem_cache_t * cp,uintptr_t addr,int ncpus,void *** maglistp,size_t * magcntp,size_t * magmaxp,int alloc_flags)983 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
984 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
985 {
986 kmem_magazine_t *kmp, *mp;
987 void **maglist = NULL;
988 int i, cpu;
989 size_t magsize, magmax, magbsize;
990 size_t magcnt = 0;
991
992 /*
993 * Read the magtype out of the cache, after verifying the pointer's
994 * correctness.
995 */
996 magsize = kmem_get_magsize(cp);
997 if (magsize == 0) {
998 *maglistp = NULL;
999 *magcntp = 0;
1000 *magmaxp = 0;
1001 return (WALK_NEXT);
1002 }
1003
1004 /*
1005 * There are several places where we need to go buffer hunting:
1006 * the per-CPU loaded magazine, the per-CPU spare full magazine,
1007 * and the full magazine list in the depot.
1008 *
1009 * For an upper bound on the number of buffers in the magazine
1010 * layer, we have the number of magazines on the cache_full
1011 * list plus at most two magazines per CPU (the loaded and the
1012 * spare). Toss in 100 magazines as a fudge factor in case this
1013 * is live (the number "100" comes from the same fudge factor in
1014 * crash(8)).
1015 */
1016 magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
1017 magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
1018
1019 if (magbsize >= PAGESIZE / 2) {
1020 mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1021 addr, magbsize);
1022 return (WALK_ERR);
1023 }
1024
1025 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
1026 mp = mdb_alloc(magbsize, alloc_flags);
1027 if (mp == NULL || maglist == NULL)
1028 goto fail;
1029
1030 /*
1031 * First up: the magazines in the depot (i.e. on the cache_full list).
1032 */
1033 for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
1034 READMAG_ROUNDS(magsize);
1035 kmp = mp->mag_next;
1036
1037 if (kmp == cp->cache_full.ml_list)
1038 break; /* cache_full list loop detected */
1039 }
1040
1041 dprintf(("cache_full list done\n"));
1042
1043 /*
1044 * Now whip through the CPUs, snagging the loaded magazines
1045 * and full spares.
1046 *
1047 * In order to prevent inconsistent dumps, rounds and prounds
1048 * are copied aside before dumping begins.
1049 */
1050 for (cpu = 0; cpu < ncpus; cpu++) {
1051 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
1052 short rounds, prounds;
1053
1054 if (KMEM_DUMPCC(ccp)) {
1055 rounds = ccp->cc_dump_rounds;
1056 prounds = ccp->cc_dump_prounds;
1057 } else {
1058 rounds = ccp->cc_rounds;
1059 prounds = ccp->cc_prounds;
1060 }
1061
1062 dprintf(("reading cpu cache %p\n",
1063 (uintptr_t)ccp - (uintptr_t)cp + addr));
1064
1065 if (rounds > 0 &&
1066 (kmp = ccp->cc_loaded) != NULL) {
1067 dprintf(("reading %d loaded rounds\n", rounds));
1068 READMAG_ROUNDS(rounds);
1069 }
1070
1071 if (prounds > 0 &&
1072 (kmp = ccp->cc_ploaded) != NULL) {
1073 dprintf(("reading %d previously loaded rounds\n",
1074 prounds));
1075 READMAG_ROUNDS(prounds);
1076 }
1077 }
1078
1079 dprintf(("magazine layer: %d buffers\n", magcnt));
1080
1081 if (!(alloc_flags & UM_GC))
1082 mdb_free(mp, magbsize);
1083
1084 *maglistp = maglist;
1085 *magcntp = magcnt;
1086 *magmaxp = magmax;
1087
1088 return (WALK_NEXT);
1089
1090 fail:
1091 if (!(alloc_flags & UM_GC)) {
1092 if (mp)
1093 mdb_free(mp, magbsize);
1094 if (maglist)
1095 mdb_free(maglist, magmax * sizeof (void *));
1096 }
1097 return (WALK_ERR);
1098 }
1099
1100 static int
kmem_walk_callback(mdb_walk_state_t * wsp,uintptr_t buf)1101 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1102 {
1103 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1104 }
1105
1106 static int
bufctl_walk_callback(kmem_cache_t * cp,mdb_walk_state_t * wsp,uintptr_t buf)1107 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1108 {
1109 kmem_bufctl_audit_t b;
1110
1111 /*
1112 * if KMF_AUDIT is not set, we know that we're looking at a
1113 * kmem_bufctl_t.
1114 */
1115 if (!(cp->cache_flags & KMF_AUDIT) ||
1116 mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
1117 (void) memset(&b, 0, sizeof (b));
1118 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
1119 mdb_warn("unable to read bufctl at %p", buf);
1120 return (WALK_ERR);
1121 }
1122 }
1123
1124 return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
1125 }
1126
1127 typedef struct kmem_walk {
1128 int kmw_type;
1129
1130 uintptr_t kmw_addr; /* cache address */
1131 kmem_cache_t *kmw_cp;
1132 size_t kmw_csize;
1133
1134 /*
1135 * magazine layer
1136 */
1137 void **kmw_maglist;
1138 size_t kmw_max;
1139 size_t kmw_count;
1140 size_t kmw_pos;
1141
1142 /*
1143 * slab layer
1144 */
1145 char *kmw_valid; /* to keep track of freed buffers */
1146 char *kmw_ubase; /* buffer for slab data */
1147 } kmem_walk_t;
1148
1149 static int
kmem_walk_init_common(mdb_walk_state_t * wsp,int type)1150 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
1151 {
1152 kmem_walk_t *kmw;
1153 int ncpus, csize;
1154 kmem_cache_t *cp;
1155 size_t vm_quantum;
1156
1157 size_t magmax, magcnt;
1158 void **maglist = NULL;
1159 uint_t chunksize = 1, slabsize = 1;
1160 int status = WALK_ERR;
1161 uintptr_t addr = wsp->walk_addr;
1162 const char *layered;
1163
1164 type &= ~KM_HASH;
1165
1166 if (addr == 0) {
1167 mdb_warn("kmem walk doesn't support global walks\n");
1168 return (WALK_ERR);
1169 }
1170
1171 dprintf(("walking %p\n", addr));
1172
1173 /*
1174 * First we need to figure out how many CPUs are configured in the
1175 * system to know how much to slurp out.
1176 */
1177 mdb_readvar(&ncpus, "max_ncpus");
1178
1179 csize = KMEM_CACHE_SIZE(ncpus);
1180 cp = mdb_alloc(csize, UM_SLEEP);
1181
1182 if (mdb_vread(cp, csize, addr) == -1) {
1183 mdb_warn("couldn't read cache at addr %p", addr);
1184 goto out2;
1185 }
1186
1187 /*
1188 * It's easy for someone to hand us an invalid cache address.
1189 * Unfortunately, it is hard for this walker to survive an
1190 * invalid cache cleanly. So we make sure that:
1191 *
1192 * 1. the vmem arena for the cache is readable,
1193 * 2. the vmem arena's quantum is a power of 2,
1194 * 3. our slabsize is a multiple of the quantum, and
1195 * 4. our chunksize is >0 and less than our slabsize.
1196 */
1197 if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1198 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1199 vm_quantum == 0 ||
1200 (vm_quantum & (vm_quantum - 1)) != 0 ||
1201 cp->cache_slabsize < vm_quantum ||
1202 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1203 cp->cache_chunksize == 0 ||
1204 cp->cache_chunksize > cp->cache_slabsize) {
1205 mdb_warn("%p is not a valid kmem_cache_t\n", addr);
1206 goto out2;
1207 }
1208
1209 dprintf(("buf total is %d\n", cp->cache_buftotal));
1210
1211 if (cp->cache_buftotal == 0) {
1212 mdb_free(cp, csize);
1213 return (WALK_DONE);
1214 }
1215
1216 /*
1217 * If they ask for bufctls, but it's a small-slab cache,
1218 * there is nothing to report.
1219 */
1220 if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
1221 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1222 cp->cache_flags));
1223 mdb_free(cp, csize);
1224 return (WALK_DONE);
1225 }
1226
1227 /*
1228 * If they want constructed buffers, but there's no constructor or
1229 * the cache has DEADBEEF checking enabled, there is nothing to report.
1230 */
1231 if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
1232 cp->cache_constructor == NULL ||
1233 (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
1234 mdb_free(cp, csize);
1235 return (WALK_DONE);
1236 }
1237
1238 /*
1239 * Read in the contents of the magazine layer
1240 */
1241 if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
1242 &magmax, UM_SLEEP) == WALK_ERR)
1243 goto out2;
1244
1245 /*
1246 * We have all of the buffers from the magazines; if we are walking
1247 * allocated buffers, sort them so we can bsearch them later.
1248 */
1249 if (type & KM_ALLOCATED)
1250 qsort(maglist, magcnt, sizeof (void *), addrcmp);
1251
1252 wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
1253
1254 kmw->kmw_type = type;
1255 kmw->kmw_addr = addr;
1256 kmw->kmw_cp = cp;
1257 kmw->kmw_csize = csize;
1258 kmw->kmw_maglist = maglist;
1259 kmw->kmw_max = magmax;
1260 kmw->kmw_count = magcnt;
1261 kmw->kmw_pos = 0;
1262
1263 /*
1264 * When walking allocated buffers in a KMF_HASH cache, we walk the
1265 * hash table instead of the slab layer.
1266 */
1267 if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
1268 layered = "kmem_hash";
1269
1270 kmw->kmw_type |= KM_HASH;
1271 } else {
1272 /*
1273 * If we are walking freed buffers, we only need the
1274 * magazine layer plus the partially allocated slabs.
1275 * To walk allocated buffers, we need all of the slabs.
1276 */
1277 if (type & KM_ALLOCATED)
1278 layered = "kmem_slab";
1279 else
1280 layered = "kmem_slab_partial";
1281
1282 /*
1283 * for small-slab caches, we read in the entire slab. For
1284 * freed buffers, we can just walk the freelist. For
1285 * allocated buffers, we use a 'valid' array to track
1286 * the freed buffers.
1287 */
1288 if (!(cp->cache_flags & KMF_HASH)) {
1289 chunksize = cp->cache_chunksize;
1290 slabsize = cp->cache_slabsize;
1291
1292 kmw->kmw_ubase = mdb_alloc(slabsize +
1293 sizeof (kmem_bufctl_t), UM_SLEEP);
1294
1295 if (type & KM_ALLOCATED)
1296 kmw->kmw_valid =
1297 mdb_alloc(slabsize / chunksize, UM_SLEEP);
1298 }
1299 }
1300
1301 status = WALK_NEXT;
1302
1303 if (mdb_layered_walk(layered, wsp) == -1) {
1304 mdb_warn("unable to start layered '%s' walk", layered);
1305 status = WALK_ERR;
1306 }
1307
1308 if (status == WALK_ERR) {
1309 if (kmw->kmw_valid)
1310 mdb_free(kmw->kmw_valid, slabsize / chunksize);
1311
1312 if (kmw->kmw_ubase)
1313 mdb_free(kmw->kmw_ubase, slabsize +
1314 sizeof (kmem_bufctl_t));
1315
1316 if (kmw->kmw_maglist)
1317 mdb_free(kmw->kmw_maglist,
1318 kmw->kmw_max * sizeof (uintptr_t));
1319
1320 mdb_free(kmw, sizeof (kmem_walk_t));
1321 wsp->walk_data = NULL;
1322 }
1323
1324 out2:
1325 if (status == WALK_ERR)
1326 mdb_free(cp, csize);
1327
1328 return (status);
1329 }
1330
1331 int
kmem_walk_step(mdb_walk_state_t * wsp)1332 kmem_walk_step(mdb_walk_state_t *wsp)
1333 {
1334 kmem_walk_t *kmw = wsp->walk_data;
1335 int type = kmw->kmw_type;
1336 kmem_cache_t *cp = kmw->kmw_cp;
1337
1338 void **maglist = kmw->kmw_maglist;
1339 int magcnt = kmw->kmw_count;
1340
1341 uintptr_t chunksize, slabsize;
1342 uintptr_t addr;
1343 const kmem_slab_t *sp;
1344 const kmem_bufctl_t *bcp;
1345 kmem_bufctl_t bc;
1346
1347 int chunks;
1348 char *kbase;
1349 void *buf;
1350 int i, ret;
1351
1352 char *valid, *ubase;
1353
1354 /*
1355 * first, handle the 'kmem_hash' layered walk case
1356 */
1357 if (type & KM_HASH) {
1358 /*
1359 * We have a buffer which has been allocated out of the
1360 * global layer. We need to make sure that it's not
1361 * actually sitting in a magazine before we report it as
1362 * an allocated buffer.
1363 */
1364 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
1365
1366 if (magcnt > 0 &&
1367 bsearch(&buf, maglist, magcnt, sizeof (void *),
1368 addrcmp) != NULL)
1369 return (WALK_NEXT);
1370
1371 if (type & KM_BUFCTL)
1372 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1373
1374 return (kmem_walk_callback(wsp, (uintptr_t)buf));
1375 }
1376
1377 ret = WALK_NEXT;
1378
1379 addr = kmw->kmw_addr;
1380
1381 /*
1382 * If we're walking freed buffers, report everything in the
1383 * magazine layer before processing the first slab.
1384 */
1385 if ((type & KM_FREE) && magcnt != 0) {
1386 kmw->kmw_count = 0; /* only do this once */
1387 for (i = 0; i < magcnt; i++) {
1388 buf = maglist[i];
1389
1390 if (type & KM_BUFCTL) {
1391 uintptr_t out;
1392
1393 if (cp->cache_flags & KMF_BUFTAG) {
1394 kmem_buftag_t *btp;
1395 kmem_buftag_t tag;
1396
1397 /* LINTED - alignment */
1398 btp = KMEM_BUFTAG(cp, buf);
1399 if (mdb_vread(&tag, sizeof (tag),
1400 (uintptr_t)btp) == -1) {
1401 mdb_warn("reading buftag for "
1402 "%p at %p", buf, btp);
1403 continue;
1404 }
1405 out = (uintptr_t)tag.bt_bufctl;
1406 } else {
1407 if (kmem_hash_lookup(cp, addr, buf,
1408 &out) == -1)
1409 continue;
1410 }
1411 ret = bufctl_walk_callback(cp, wsp, out);
1412 } else {
1413 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1414 }
1415
1416 if (ret != WALK_NEXT)
1417 return (ret);
1418 }
1419 }
1420
1421 /*
1422 * If they want constructed buffers, we're finished, since the
1423 * magazine layer holds them all.
1424 */
1425 if (type & KM_CONSTRUCTED)
1426 return (WALK_DONE);
1427
1428 /*
1429 * Handle the buffers in the current slab
1430 */
1431 chunksize = cp->cache_chunksize;
1432 slabsize = cp->cache_slabsize;
1433
1434 sp = wsp->walk_layer;
1435 chunks = sp->slab_chunks;
1436 kbase = sp->slab_base;
1437
1438 dprintf(("kbase is %p\n", kbase));
1439
1440 if (!(cp->cache_flags & KMF_HASH)) {
1441 valid = kmw->kmw_valid;
1442 ubase = kmw->kmw_ubase;
1443
1444 if (mdb_vread(ubase, chunks * chunksize,
1445 (uintptr_t)kbase) == -1) {
1446 mdb_warn("failed to read slab contents at %p", kbase);
1447 return (WALK_ERR);
1448 }
1449
1450 /*
1451 * Set up the valid map as fully allocated -- we'll punch
1452 * out the freelist.
1453 */
1454 if (type & KM_ALLOCATED)
1455 (void) memset(valid, 1, chunks);
1456 } else {
1457 valid = NULL;
1458 ubase = NULL;
1459 }
1460
1461 /*
1462 * walk the slab's freelist
1463 */
1464 bcp = sp->slab_head;
1465
1466 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1467
1468 /*
1469 * since we could be in the middle of allocating a buffer,
1470 * our refcnt could be one higher than it aught. So we
1471 * check one further on the freelist than the count allows.
1472 */
1473 for (i = sp->slab_refcnt; i <= chunks; i++) {
1474 uint_t ndx;
1475
1476 dprintf(("bcp is %p\n", bcp));
1477
1478 if (bcp == NULL) {
1479 if (i == chunks)
1480 break;
1481 mdb_warn(
1482 "slab %p in cache %p freelist too short by %d\n",
1483 sp, addr, chunks - i);
1484 break;
1485 }
1486
1487 if (cp->cache_flags & KMF_HASH) {
1488 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1489 mdb_warn("failed to read bufctl ptr at %p",
1490 bcp);
1491 break;
1492 }
1493 buf = bc.bc_addr;
1494 } else {
1495 /*
1496 * Otherwise the buffer is (or should be) in the slab
1497 * that we've read in; determine its offset in the
1498 * slab, validate that it's not corrupt, and add to
1499 * our base address to find the umem_bufctl_t. (Note
1500 * that we don't need to add the size of the bufctl
1501 * to our offset calculation because of the slop that's
1502 * allocated for the buffer at ubase.)
1503 */
1504 uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1505
1506 if (offs > chunks * chunksize) {
1507 mdb_warn("found corrupt bufctl ptr %p"
1508 " in slab %p in cache %p\n", bcp,
1509 wsp->walk_addr, addr);
1510 break;
1511 }
1512
1513 bc = *((kmem_bufctl_t *)((uintptr_t)ubase + offs));
1514 buf = KMEM_BUF(cp, bcp);
1515 }
1516
1517 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1518
1519 if (ndx > slabsize / cp->cache_bufsize) {
1520 /*
1521 * This is very wrong; we have managed to find
1522 * a buffer in the slab which shouldn't
1523 * actually be here. Emit a warning, and
1524 * try to continue.
1525 */
1526 mdb_warn("buf %p is out of range for "
1527 "slab %p, cache %p\n", buf, sp, addr);
1528 } else if (type & KM_ALLOCATED) {
1529 /*
1530 * we have found a buffer on the slab's freelist;
1531 * clear its entry
1532 */
1533 valid[ndx] = 0;
1534 } else {
1535 /*
1536 * Report this freed buffer
1537 */
1538 if (type & KM_BUFCTL) {
1539 ret = bufctl_walk_callback(cp, wsp,
1540 (uintptr_t)bcp);
1541 } else {
1542 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1543 }
1544 if (ret != WALK_NEXT)
1545 return (ret);
1546 }
1547
1548 bcp = bc.bc_next;
1549 }
1550
1551 if (bcp != NULL) {
1552 dprintf(("slab %p in cache %p freelist too long (%p)\n",
1553 sp, addr, bcp));
1554 }
1555
1556 /*
1557 * If we are walking freed buffers, the loop above handled reporting
1558 * them.
1559 */
1560 if (type & KM_FREE)
1561 return (WALK_NEXT);
1562
1563 if (type & KM_BUFCTL) {
1564 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1565 "cache %p\n", addr);
1566 return (WALK_ERR);
1567 }
1568
1569 /*
1570 * Report allocated buffers, skipping buffers in the magazine layer.
1571 * We only get this far for small-slab caches.
1572 */
1573 for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1574 buf = (char *)kbase + i * chunksize;
1575
1576 if (!valid[i])
1577 continue; /* on slab freelist */
1578
1579 if (magcnt > 0 &&
1580 bsearch(&buf, maglist, magcnt, sizeof (void *),
1581 addrcmp) != NULL)
1582 continue; /* in magazine layer */
1583
1584 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1585 }
1586 return (ret);
1587 }
1588
1589 void
kmem_walk_fini(mdb_walk_state_t * wsp)1590 kmem_walk_fini(mdb_walk_state_t *wsp)
1591 {
1592 kmem_walk_t *kmw = wsp->walk_data;
1593 uintptr_t chunksize;
1594 uintptr_t slabsize;
1595
1596 if (kmw == NULL)
1597 return;
1598
1599 if (kmw->kmw_maglist != NULL)
1600 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1601
1602 chunksize = kmw->kmw_cp->cache_chunksize;
1603 slabsize = kmw->kmw_cp->cache_slabsize;
1604
1605 if (kmw->kmw_valid != NULL)
1606 mdb_free(kmw->kmw_valid, slabsize / chunksize);
1607 if (kmw->kmw_ubase != NULL)
1608 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1609
1610 mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1611 mdb_free(kmw, sizeof (kmem_walk_t));
1612 }
1613
1614 /*ARGSUSED*/
1615 static int
kmem_walk_all(uintptr_t addr,const kmem_cache_t * c,mdb_walk_state_t * wsp)1616 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1617 {
1618 /*
1619 * Buffers allocated from NOTOUCH caches can also show up as freed
1620 * memory in other caches. This can be a little confusing, so we
1621 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1622 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1623 */
1624 if (c->cache_cflags & KMC_NOTOUCH)
1625 return (WALK_NEXT);
1626
1627 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1628 wsp->walk_cbdata, addr) == -1)
1629 return (WALK_DONE);
1630
1631 return (WALK_NEXT);
1632 }
1633
1634 #define KMEM_WALK_ALL(name, wsp) { \
1635 wsp->walk_data = (name); \
1636 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1637 return (WALK_ERR); \
1638 return (WALK_DONE); \
1639 }
1640
1641 int
kmem_walk_init(mdb_walk_state_t * wsp)1642 kmem_walk_init(mdb_walk_state_t *wsp)
1643 {
1644 if (wsp->walk_arg != NULL)
1645 wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1646
1647 if (wsp->walk_addr == 0)
1648 KMEM_WALK_ALL("kmem", wsp);
1649 return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1650 }
1651
1652 int
bufctl_walk_init(mdb_walk_state_t * wsp)1653 bufctl_walk_init(mdb_walk_state_t *wsp)
1654 {
1655 if (wsp->walk_addr == 0)
1656 KMEM_WALK_ALL("bufctl", wsp);
1657 return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1658 }
1659
1660 int
freemem_walk_init(mdb_walk_state_t * wsp)1661 freemem_walk_init(mdb_walk_state_t *wsp)
1662 {
1663 if (wsp->walk_addr == 0)
1664 KMEM_WALK_ALL("freemem", wsp);
1665 return (kmem_walk_init_common(wsp, KM_FREE));
1666 }
1667
1668 int
freemem_constructed_walk_init(mdb_walk_state_t * wsp)1669 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1670 {
1671 if (wsp->walk_addr == 0)
1672 KMEM_WALK_ALL("freemem_constructed", wsp);
1673 return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1674 }
1675
1676 int
freectl_walk_init(mdb_walk_state_t * wsp)1677 freectl_walk_init(mdb_walk_state_t *wsp)
1678 {
1679 if (wsp->walk_addr == 0)
1680 KMEM_WALK_ALL("freectl", wsp);
1681 return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1682 }
1683
1684 int
freectl_constructed_walk_init(mdb_walk_state_t * wsp)1685 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1686 {
1687 if (wsp->walk_addr == 0)
1688 KMEM_WALK_ALL("freectl_constructed", wsp);
1689 return (kmem_walk_init_common(wsp,
1690 KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1691 }
1692
1693 typedef struct bufctl_history_walk {
1694 void *bhw_next;
1695 kmem_cache_t *bhw_cache;
1696 kmem_slab_t *bhw_slab;
1697 hrtime_t bhw_timestamp;
1698 } bufctl_history_walk_t;
1699
1700 int
bufctl_history_walk_init(mdb_walk_state_t * wsp)1701 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1702 {
1703 bufctl_history_walk_t *bhw;
1704 kmem_bufctl_audit_t bc;
1705 kmem_bufctl_audit_t bcn;
1706
1707 if (wsp->walk_addr == 0) {
1708 mdb_warn("bufctl_history walk doesn't support global walks\n");
1709 return (WALK_ERR);
1710 }
1711
1712 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1713 mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1714 return (WALK_ERR);
1715 }
1716
1717 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1718 bhw->bhw_timestamp = 0;
1719 bhw->bhw_cache = bc.bc_cache;
1720 bhw->bhw_slab = bc.bc_slab;
1721
1722 /*
1723 * sometimes the first log entry matches the base bufctl; in that
1724 * case, skip the base bufctl.
1725 */
1726 if (bc.bc_lastlog != NULL &&
1727 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1728 bc.bc_addr == bcn.bc_addr &&
1729 bc.bc_cache == bcn.bc_cache &&
1730 bc.bc_slab == bcn.bc_slab &&
1731 bc.bc_timestamp == bcn.bc_timestamp &&
1732 bc.bc_thread == bcn.bc_thread)
1733 bhw->bhw_next = bc.bc_lastlog;
1734 else
1735 bhw->bhw_next = (void *)wsp->walk_addr;
1736
1737 wsp->walk_addr = (uintptr_t)bc.bc_addr;
1738 wsp->walk_data = bhw;
1739
1740 return (WALK_NEXT);
1741 }
1742
1743 int
bufctl_history_walk_step(mdb_walk_state_t * wsp)1744 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1745 {
1746 bufctl_history_walk_t *bhw = wsp->walk_data;
1747 uintptr_t addr = (uintptr_t)bhw->bhw_next;
1748 uintptr_t baseaddr = wsp->walk_addr;
1749 kmem_bufctl_audit_t bc;
1750
1751 if (addr == 0)
1752 return (WALK_DONE);
1753
1754 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1755 mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1756 return (WALK_ERR);
1757 }
1758
1759 /*
1760 * The bufctl is only valid if the address, cache, and slab are
1761 * correct. We also check that the timestamp is decreasing, to
1762 * prevent infinite loops.
1763 */
1764 if ((uintptr_t)bc.bc_addr != baseaddr ||
1765 bc.bc_cache != bhw->bhw_cache ||
1766 bc.bc_slab != bhw->bhw_slab ||
1767 (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1768 return (WALK_DONE);
1769
1770 bhw->bhw_next = bc.bc_lastlog;
1771 bhw->bhw_timestamp = bc.bc_timestamp;
1772
1773 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1774 }
1775
1776 void
bufctl_history_walk_fini(mdb_walk_state_t * wsp)1777 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1778 {
1779 bufctl_history_walk_t *bhw = wsp->walk_data;
1780
1781 mdb_free(bhw, sizeof (*bhw));
1782 }
1783
1784 typedef struct kmem_log_walk {
1785 kmem_bufctl_audit_t *klw_base;
1786 kmem_bufctl_audit_t **klw_sorted;
1787 kmem_log_header_t klw_lh;
1788 size_t klw_size;
1789 size_t klw_maxndx;
1790 size_t klw_ndx;
1791 } kmem_log_walk_t;
1792
1793 int
kmem_log_walk_init(mdb_walk_state_t * wsp)1794 kmem_log_walk_init(mdb_walk_state_t *wsp)
1795 {
1796 uintptr_t lp = wsp->walk_addr;
1797 kmem_log_walk_t *klw;
1798 kmem_log_header_t *lhp;
1799 int maxndx, i, j, k;
1800
1801 /*
1802 * By default (global walk), walk the kmem_transaction_log. Otherwise
1803 * read the log whose kmem_log_header_t is stored at walk_addr.
1804 */
1805 if (lp == 0 && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1806 mdb_warn("failed to read 'kmem_transaction_log'");
1807 return (WALK_ERR);
1808 }
1809
1810 if (lp == 0) {
1811 mdb_warn("log is disabled\n");
1812 return (WALK_ERR);
1813 }
1814
1815 klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1816 lhp = &klw->klw_lh;
1817
1818 if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1819 mdb_warn("failed to read log header at %p", lp);
1820 mdb_free(klw, sizeof (kmem_log_walk_t));
1821 return (WALK_ERR);
1822 }
1823
1824 klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1825 klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1826 maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1827
1828 if (mdb_vread(klw->klw_base, klw->klw_size,
1829 (uintptr_t)lhp->lh_base) == -1) {
1830 mdb_warn("failed to read log at base %p", lhp->lh_base);
1831 mdb_free(klw->klw_base, klw->klw_size);
1832 mdb_free(klw, sizeof (kmem_log_walk_t));
1833 return (WALK_ERR);
1834 }
1835
1836 klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1837 sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1838
1839 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1840 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1841 ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1842
1843 for (j = 0; j < maxndx; j++)
1844 klw->klw_sorted[k++] = &chunk[j];
1845 }
1846
1847 qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1848 (int(*)(const void *, const void *))bufctlcmp);
1849
1850 klw->klw_maxndx = k;
1851 wsp->walk_data = klw;
1852
1853 return (WALK_NEXT);
1854 }
1855
1856 int
kmem_log_walk_step(mdb_walk_state_t * wsp)1857 kmem_log_walk_step(mdb_walk_state_t *wsp)
1858 {
1859 kmem_log_walk_t *klw = wsp->walk_data;
1860 kmem_bufctl_audit_t *bcp;
1861
1862 if (klw->klw_ndx == klw->klw_maxndx)
1863 return (WALK_DONE);
1864
1865 bcp = klw->klw_sorted[klw->klw_ndx++];
1866
1867 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1868 (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1869 }
1870
1871 void
kmem_log_walk_fini(mdb_walk_state_t * wsp)1872 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1873 {
1874 kmem_log_walk_t *klw = wsp->walk_data;
1875
1876 mdb_free(klw->klw_base, klw->klw_size);
1877 mdb_free(klw->klw_sorted, klw->klw_maxndx *
1878 sizeof (kmem_bufctl_audit_t *));
1879 mdb_free(klw, sizeof (kmem_log_walk_t));
1880 }
1881
1882 typedef struct allocdby_bufctl {
1883 uintptr_t abb_addr;
1884 hrtime_t abb_ts;
1885 } allocdby_bufctl_t;
1886
1887 typedef struct allocdby_walk {
1888 const char *abw_walk;
1889 uintptr_t abw_thread;
1890 size_t abw_nbufs;
1891 size_t abw_size;
1892 allocdby_bufctl_t *abw_buf;
1893 size_t abw_ndx;
1894 } allocdby_walk_t;
1895
1896 int
allocdby_walk_bufctl(uintptr_t addr,const kmem_bufctl_audit_t * bcp,allocdby_walk_t * abw)1897 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1898 allocdby_walk_t *abw)
1899 {
1900 if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1901 return (WALK_NEXT);
1902
1903 if (abw->abw_nbufs == abw->abw_size) {
1904 allocdby_bufctl_t *buf;
1905 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1906
1907 buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1908
1909 bcopy(abw->abw_buf, buf, oldsize);
1910 mdb_free(abw->abw_buf, oldsize);
1911
1912 abw->abw_size <<= 1;
1913 abw->abw_buf = buf;
1914 }
1915
1916 abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1917 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1918 abw->abw_nbufs++;
1919
1920 return (WALK_NEXT);
1921 }
1922
1923 /*ARGSUSED*/
1924 int
allocdby_walk_cache(uintptr_t addr,const kmem_cache_t * c,allocdby_walk_t * abw)1925 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1926 {
1927 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1928 abw, addr) == -1) {
1929 mdb_warn("couldn't walk bufctl for cache %p", addr);
1930 return (WALK_DONE);
1931 }
1932
1933 return (WALK_NEXT);
1934 }
1935
1936 static int
allocdby_cmp(const allocdby_bufctl_t * lhs,const allocdby_bufctl_t * rhs)1937 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1938 {
1939 if (lhs->abb_ts < rhs->abb_ts)
1940 return (1);
1941 if (lhs->abb_ts > rhs->abb_ts)
1942 return (-1);
1943 return (0);
1944 }
1945
1946 static int
allocdby_walk_init_common(mdb_walk_state_t * wsp,const char * walk)1947 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1948 {
1949 allocdby_walk_t *abw;
1950
1951 if (wsp->walk_addr == 0) {
1952 mdb_warn("allocdby walk doesn't support global walks\n");
1953 return (WALK_ERR);
1954 }
1955
1956 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1957
1958 abw->abw_thread = wsp->walk_addr;
1959 abw->abw_walk = walk;
1960 abw->abw_size = 128; /* something reasonable */
1961 abw->abw_buf =
1962 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1963
1964 wsp->walk_data = abw;
1965
1966 if (mdb_walk("kmem_cache",
1967 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1968 mdb_warn("couldn't walk kmem_cache");
1969 allocdby_walk_fini(wsp);
1970 return (WALK_ERR);
1971 }
1972
1973 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1974 (int(*)(const void *, const void *))allocdby_cmp);
1975
1976 return (WALK_NEXT);
1977 }
1978
1979 int
allocdby_walk_init(mdb_walk_state_t * wsp)1980 allocdby_walk_init(mdb_walk_state_t *wsp)
1981 {
1982 return (allocdby_walk_init_common(wsp, "bufctl"));
1983 }
1984
1985 int
freedby_walk_init(mdb_walk_state_t * wsp)1986 freedby_walk_init(mdb_walk_state_t *wsp)
1987 {
1988 return (allocdby_walk_init_common(wsp, "freectl"));
1989 }
1990
1991 int
allocdby_walk_step(mdb_walk_state_t * wsp)1992 allocdby_walk_step(mdb_walk_state_t *wsp)
1993 {
1994 allocdby_walk_t *abw = wsp->walk_data;
1995 kmem_bufctl_audit_t bc;
1996 uintptr_t addr;
1997
1998 if (abw->abw_ndx == abw->abw_nbufs)
1999 return (WALK_DONE);
2000
2001 addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
2002
2003 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2004 mdb_warn("couldn't read bufctl at %p", addr);
2005 return (WALK_DONE);
2006 }
2007
2008 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
2009 }
2010
2011 void
allocdby_walk_fini(mdb_walk_state_t * wsp)2012 allocdby_walk_fini(mdb_walk_state_t *wsp)
2013 {
2014 allocdby_walk_t *abw = wsp->walk_data;
2015
2016 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
2017 mdb_free(abw, sizeof (allocdby_walk_t));
2018 }
2019
2020 /*ARGSUSED*/
2021 int
allocdby_walk(uintptr_t addr,const kmem_bufctl_audit_t * bcp,void * ignored)2022 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
2023 {
2024 char c[MDB_SYM_NAMLEN];
2025 GElf_Sym sym;
2026 int i;
2027
2028 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
2029 for (i = 0; i < bcp->bc_depth; i++) {
2030 if (mdb_lookup_by_addr(bcp->bc_stack[i],
2031 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2032 continue;
2033 if (strncmp(c, "kmem_", 5) == 0)
2034 continue;
2035 mdb_printf("%s+0x%lx",
2036 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
2037 break;
2038 }
2039 mdb_printf("\n");
2040
2041 return (WALK_NEXT);
2042 }
2043
2044 static int
allocdby_common(uintptr_t addr,uint_t flags,const char * w)2045 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
2046 {
2047 if (!(flags & DCMD_ADDRSPEC))
2048 return (DCMD_USAGE);
2049
2050 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2051
2052 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
2053 mdb_warn("can't walk '%s' for %p", w, addr);
2054 return (DCMD_ERR);
2055 }
2056
2057 return (DCMD_OK);
2058 }
2059
2060 /*ARGSUSED*/
2061 int
allocdby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2062 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2063 {
2064 return (allocdby_common(addr, flags, "allocdby"));
2065 }
2066
2067 /*ARGSUSED*/
2068 int
freedby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2069 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2070 {
2071 return (allocdby_common(addr, flags, "freedby"));
2072 }
2073
2074 /*
2075 * Return a string describing the address in relation to the given thread's
2076 * stack.
2077 *
2078 * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2079 *
2080 * - If the address is above the stack pointer, return an empty string
2081 * signifying that the address is active.
2082 *
2083 * - If the address is below the stack pointer, and the thread is not on proc,
2084 * return " (below sp)".
2085 *
2086 * - If the address is below the stack pointer, and the thread is on proc,
2087 * return " (possibly below sp)". Depending on context, we may or may not
2088 * have an accurate t_sp.
2089 */
2090 static const char *
stack_active(const kthread_t * t,uintptr_t addr)2091 stack_active(const kthread_t *t, uintptr_t addr)
2092 {
2093 uintptr_t panicstk;
2094 GElf_Sym sym;
2095
2096 if (t->t_state == TS_FREE)
2097 return (" (inactive interrupt thread)");
2098
2099 /*
2100 * Check to see if we're on the panic stack. If so, ignore t_sp, as it
2101 * no longer relates to the thread's real stack.
2102 */
2103 if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
2104 panicstk = (uintptr_t)sym.st_value;
2105
2106 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
2107 return ("");
2108 }
2109
2110 if (addr >= t->t_sp + STACK_BIAS)
2111 return ("");
2112
2113 if (t->t_state == TS_ONPROC)
2114 return (" (possibly below sp)");
2115
2116 return (" (below sp)");
2117 }
2118
2119 /*
2120 * Additional state for the kmem and vmem ::whatis handlers
2121 */
2122 typedef struct whatis_info {
2123 mdb_whatis_t *wi_w;
2124 const kmem_cache_t *wi_cache;
2125 const vmem_t *wi_vmem;
2126 vmem_t *wi_msb_arena;
2127 size_t wi_slab_size;
2128 uint_t wi_slab_found;
2129 uint_t wi_kmem_lite_count;
2130 uint_t wi_freemem;
2131 } whatis_info_t;
2132
2133 /* call one of our dcmd functions with "-v" and the provided address */
2134 static void
whatis_call_printer(mdb_dcmd_f * dcmd,uintptr_t addr)2135 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2136 {
2137 mdb_arg_t a;
2138 a.a_type = MDB_TYPE_STRING;
2139 a.a_un.a_str = "-v";
2140
2141 mdb_printf(":\n");
2142 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2143 }
2144
2145 static void
whatis_print_kmf_lite(uintptr_t btaddr,size_t count)2146 whatis_print_kmf_lite(uintptr_t btaddr, size_t count)
2147 {
2148 #define KMEM_LITE_MAX 16
2149 pc_t callers[KMEM_LITE_MAX];
2150 pc_t uninit = (pc_t)KMEM_UNINITIALIZED_PATTERN;
2151
2152 kmem_buftag_t bt;
2153 intptr_t stat;
2154 const char *plural = "";
2155 int i;
2156
2157 /* validate our arguments and read in the buftag */
2158 if (count == 0 || count > KMEM_LITE_MAX ||
2159 mdb_vread(&bt, sizeof (bt), btaddr) == -1)
2160 return;
2161
2162 /* validate the buffer state and read in the callers */
2163 stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
2164
2165 if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE)
2166 return;
2167
2168 if (mdb_vread(callers, count * sizeof (pc_t),
2169 btaddr + offsetof(kmem_buftag_lite_t, bt_history)) == -1)
2170 return;
2171
2172 /* If there aren't any filled in callers, bail */
2173 if (callers[0] == uninit)
2174 return;
2175
2176 plural = (callers[1] == uninit) ? "" : "s";
2177
2178 /* Everything's done and checked; print them out */
2179 mdb_printf(":\n");
2180
2181 mdb_inc_indent(8);
2182 mdb_printf("recent caller%s: %a", plural, callers[0]);
2183 for (i = 1; i < count; i++) {
2184 if (callers[i] == uninit)
2185 break;
2186 mdb_printf(", %a", callers[i]);
2187 }
2188 mdb_dec_indent(8);
2189 }
2190
2191 static void
whatis_print_kmem(whatis_info_t * wi,uintptr_t maddr,uintptr_t addr,uintptr_t baddr)2192 whatis_print_kmem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2193 uintptr_t baddr)
2194 {
2195 mdb_whatis_t *w = wi->wi_w;
2196
2197 const kmem_cache_t *cp = wi->wi_cache;
2198 /* LINTED pointer cast may result in improper alignment */
2199 uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr);
2200 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2201 int call_printer = (!quiet && (cp->cache_flags & KMF_AUDIT));
2202
2203 mdb_whatis_report_object(w, maddr, addr, "");
2204
2205 if (baddr != 0 && !call_printer)
2206 mdb_printf("bufctl %p ", baddr);
2207
2208 mdb_printf("%s from %s",
2209 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2210
2211 if (baddr != 0 && call_printer) {
2212 whatis_call_printer(bufctl, baddr);
2213 return;
2214 }
2215
2216 /* for KMF_LITE caches, try to print out the previous callers */
2217 if (!quiet && (cp->cache_flags & KMF_LITE))
2218 whatis_print_kmf_lite(btaddr, wi->wi_kmem_lite_count);
2219
2220 mdb_printf("\n");
2221 }
2222
2223 /*ARGSUSED*/
2224 static int
whatis_walk_kmem(uintptr_t addr,void * ignored,whatis_info_t * wi)2225 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2226 {
2227 mdb_whatis_t *w = wi->wi_w;
2228
2229 uintptr_t cur;
2230 size_t size = wi->wi_cache->cache_bufsize;
2231
2232 while (mdb_whatis_match(w, addr, size, &cur))
2233 whatis_print_kmem(wi, cur, addr, 0);
2234
2235 return (WHATIS_WALKRET(w));
2236 }
2237
2238 /*ARGSUSED*/
2239 static int
whatis_walk_bufctl(uintptr_t baddr,const kmem_bufctl_t * bcp,whatis_info_t * wi)2240 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_info_t *wi)
2241 {
2242 mdb_whatis_t *w = wi->wi_w;
2243
2244 uintptr_t cur;
2245 uintptr_t addr = (uintptr_t)bcp->bc_addr;
2246 size_t size = wi->wi_cache->cache_bufsize;
2247
2248 while (mdb_whatis_match(w, addr, size, &cur))
2249 whatis_print_kmem(wi, cur, addr, baddr);
2250
2251 return (WHATIS_WALKRET(w));
2252 }
2253
2254 static int
whatis_walk_seg(uintptr_t addr,const vmem_seg_t * vs,whatis_info_t * wi)2255 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2256 {
2257 mdb_whatis_t *w = wi->wi_w;
2258
2259 size_t size = vs->vs_end - vs->vs_start;
2260 uintptr_t cur;
2261
2262 /* We're not interested in anything but alloc and free segments */
2263 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2264 return (WALK_NEXT);
2265
2266 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2267 mdb_whatis_report_object(w, cur, vs->vs_start, "");
2268
2269 /*
2270 * If we're not printing it seperately, provide the vmem_seg
2271 * pointer if it has a stack trace.
2272 */
2273 if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2274 (!(mdb_whatis_flags(w) & WHATIS_BUFCTL) ||
2275 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2276 mdb_printf("vmem_seg %p ", addr);
2277 }
2278
2279 mdb_printf("%s from the %s vmem arena",
2280 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2281 wi->wi_vmem->vm_name);
2282
2283 if (!(mdb_whatis_flags(w) & WHATIS_QUIET))
2284 whatis_call_printer(vmem_seg, addr);
2285 else
2286 mdb_printf("\n");
2287 }
2288
2289 return (WHATIS_WALKRET(w));
2290 }
2291
2292 static int
whatis_walk_vmem(uintptr_t addr,const vmem_t * vmem,whatis_info_t * wi)2293 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2294 {
2295 mdb_whatis_t *w = wi->wi_w;
2296 const char *nm = vmem->vm_name;
2297
2298 int identifier = ((vmem->vm_cflags & VMC_IDENTIFIER) != 0);
2299 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2300
2301 if (identifier != idspace)
2302 return (WALK_NEXT);
2303
2304 wi->wi_vmem = vmem;
2305
2306 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2307 mdb_printf("Searching vmem arena %s...\n", nm);
2308
2309 if (mdb_pwalk("vmem_seg",
2310 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2311 mdb_warn("can't walk vmem_seg for %p", addr);
2312 return (WALK_NEXT);
2313 }
2314
2315 return (WHATIS_WALKRET(w));
2316 }
2317
2318 /*ARGSUSED*/
2319 static int
whatis_walk_slab(uintptr_t saddr,const kmem_slab_t * sp,whatis_info_t * wi)2320 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_info_t *wi)
2321 {
2322 mdb_whatis_t *w = wi->wi_w;
2323
2324 /* It must overlap with the slab data, or it's not interesting */
2325 if (mdb_whatis_overlaps(w,
2326 (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2327 wi->wi_slab_found++;
2328 return (WALK_DONE);
2329 }
2330 return (WALK_NEXT);
2331 }
2332
2333 static int
whatis_walk_cache(uintptr_t addr,const kmem_cache_t * c,whatis_info_t * wi)2334 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2335 {
2336 mdb_whatis_t *w = wi->wi_w;
2337
2338 char *walk, *freewalk;
2339 mdb_walk_cb_t func;
2340 int do_bufctl;
2341
2342 int identifier = ((c->cache_flags & KMC_IDENTIFIER) != 0);
2343 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2344
2345 if (identifier != idspace)
2346 return (WALK_NEXT);
2347
2348 /* Override the '-b' flag as necessary */
2349 if (!(c->cache_flags & KMF_HASH))
2350 do_bufctl = FALSE; /* no bufctls to walk */
2351 else if (c->cache_flags & KMF_AUDIT)
2352 do_bufctl = TRUE; /* we always want debugging info */
2353 else
2354 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2355
2356 if (do_bufctl) {
2357 walk = "bufctl";
2358 freewalk = "freectl";
2359 func = (mdb_walk_cb_t)whatis_walk_bufctl;
2360 } else {
2361 walk = "kmem";
2362 freewalk = "freemem";
2363 func = (mdb_walk_cb_t)whatis_walk_kmem;
2364 }
2365
2366 wi->wi_cache = c;
2367
2368 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2369 mdb_printf("Searching %s...\n", c->cache_name);
2370
2371 /*
2372 * If more then two buffers live on each slab, figure out if we're
2373 * interested in anything in any slab before doing the more expensive
2374 * kmem/freemem (bufctl/freectl) walkers.
2375 */
2376 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2377 if (!(c->cache_flags & KMF_HASH))
2378 wi->wi_slab_size -= sizeof (kmem_slab_t);
2379
2380 if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2381 wi->wi_slab_found = 0;
2382 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2383 addr) == -1) {
2384 mdb_warn("can't find kmem_slab walker");
2385 return (WALK_DONE);
2386 }
2387 if (wi->wi_slab_found == 0)
2388 return (WALK_NEXT);
2389 }
2390
2391 wi->wi_freemem = FALSE;
2392 if (mdb_pwalk(walk, func, wi, addr) == -1) {
2393 mdb_warn("can't find %s walker", walk);
2394 return (WALK_DONE);
2395 }
2396
2397 if (mdb_whatis_done(w))
2398 return (WALK_DONE);
2399
2400 /*
2401 * We have searched for allocated memory; now search for freed memory.
2402 */
2403 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2404 mdb_printf("Searching %s for free memory...\n", c->cache_name);
2405
2406 wi->wi_freemem = TRUE;
2407 if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2408 mdb_warn("can't find %s walker", freewalk);
2409 return (WALK_DONE);
2410 }
2411
2412 return (WHATIS_WALKRET(w));
2413 }
2414
2415 static int
whatis_walk_touch(uintptr_t addr,const kmem_cache_t * c,whatis_info_t * wi)2416 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2417 {
2418 if (c->cache_arena == wi->wi_msb_arena ||
2419 (c->cache_cflags & KMC_NOTOUCH))
2420 return (WALK_NEXT);
2421
2422 return (whatis_walk_cache(addr, c, wi));
2423 }
2424
2425 static int
whatis_walk_metadata(uintptr_t addr,const kmem_cache_t * c,whatis_info_t * wi)2426 whatis_walk_metadata(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2427 {
2428 if (c->cache_arena != wi->wi_msb_arena)
2429 return (WALK_NEXT);
2430
2431 return (whatis_walk_cache(addr, c, wi));
2432 }
2433
2434 static int
whatis_walk_notouch(uintptr_t addr,const kmem_cache_t * c,whatis_info_t * wi)2435 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2436 {
2437 if (c->cache_arena == wi->wi_msb_arena ||
2438 !(c->cache_cflags & KMC_NOTOUCH))
2439 return (WALK_NEXT);
2440
2441 return (whatis_walk_cache(addr, c, wi));
2442 }
2443
2444 static int
whatis_walk_thread(uintptr_t addr,const kthread_t * t,mdb_whatis_t * w)2445 whatis_walk_thread(uintptr_t addr, const kthread_t *t, mdb_whatis_t *w)
2446 {
2447 uintptr_t cur;
2448 uintptr_t saddr;
2449 size_t size;
2450
2451 /*
2452 * Often, one calls ::whatis on an address from a thread structure.
2453 * We use this opportunity to short circuit this case...
2454 */
2455 while (mdb_whatis_match(w, addr, sizeof (kthread_t), &cur))
2456 mdb_whatis_report_object(w, cur, addr,
2457 "allocated as a thread structure\n");
2458
2459 /*
2460 * Now check the stack
2461 */
2462 if (t->t_stkbase == NULL)
2463 return (WALK_NEXT);
2464
2465 /*
2466 * This assumes that t_stk is the end of the stack, but it's really
2467 * only the initial stack pointer for the thread. Arguments to the
2468 * initial procedure, SA(MINFRAME), etc. are all after t_stk. So
2469 * that 't->t_stk::whatis' reports "part of t's stack", we include
2470 * t_stk in the range (the "+ 1", below), but the kernel should
2471 * really include the full stack bounds where we can find it.
2472 */
2473 saddr = (uintptr_t)t->t_stkbase;
2474 size = (uintptr_t)t->t_stk - saddr + 1;
2475 while (mdb_whatis_match(w, saddr, size, &cur))
2476 mdb_whatis_report_object(w, cur, cur,
2477 "in thread %p's stack%s\n", addr, stack_active(t, cur));
2478
2479 return (WHATIS_WALKRET(w));
2480 }
2481
2482 static void
whatis_modctl_match(mdb_whatis_t * w,const char * name,uintptr_t base,size_t size,const char * where)2483 whatis_modctl_match(mdb_whatis_t *w, const char *name,
2484 uintptr_t base, size_t size, const char *where)
2485 {
2486 uintptr_t cur;
2487
2488 /*
2489 * Since we're searching for addresses inside a module, we report
2490 * them as symbols.
2491 */
2492 while (mdb_whatis_match(w, base, size, &cur))
2493 mdb_whatis_report_address(w, cur, "in %s's %s\n", name, where);
2494 }
2495
2496 struct kmem_ctf_module {
2497 Shdr *symhdr;
2498 char *symtbl;
2499 unsigned int nsyms;
2500 char *symspace;
2501 size_t symsize;
2502 char *text;
2503 char *data;
2504 uintptr_t bss;
2505 size_t text_size;
2506 size_t data_size;
2507 size_t bss_size;
2508 };
2509
2510 static int
whatis_walk_modctl(uintptr_t addr,const struct modctl * m,mdb_whatis_t * w)2511 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, mdb_whatis_t *w)
2512 {
2513 char name[MODMAXNAMELEN];
2514 struct kmem_ctf_module mod;
2515 Shdr shdr;
2516
2517 if (m->mod_mp == NULL)
2518 return (WALK_NEXT);
2519
2520 if (mdb_ctf_vread(&mod, "struct module", "struct kmem_ctf_module",
2521 (uintptr_t)m->mod_mp, 0) == -1) {
2522 mdb_warn("couldn't read modctl %p's module", addr);
2523 return (WALK_NEXT);
2524 }
2525
2526 if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2527 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2528
2529 whatis_modctl_match(w, name,
2530 (uintptr_t)mod.text, mod.text_size, "text segment");
2531 whatis_modctl_match(w, name,
2532 (uintptr_t)mod.data, mod.data_size, "data segment");
2533 whatis_modctl_match(w, name,
2534 (uintptr_t)mod.bss, mod.bss_size, "bss segment");
2535
2536 if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2537 mdb_warn("couldn't read symbol header for %p's module", addr);
2538 return (WALK_NEXT);
2539 }
2540
2541 whatis_modctl_match(w, name,
2542 (uintptr_t)mod.symtbl, mod.nsyms * shdr.sh_entsize, "symtab");
2543 whatis_modctl_match(w, name,
2544 (uintptr_t)mod.symspace, mod.symsize, "symtab");
2545
2546 return (WHATIS_WALKRET(w));
2547 }
2548
2549 /*ARGSUSED*/
2550 static int
whatis_walk_memseg(uintptr_t addr,const struct memseg * seg,mdb_whatis_t * w)2551 whatis_walk_memseg(uintptr_t addr, const struct memseg *seg, mdb_whatis_t *w)
2552 {
2553 uintptr_t cur;
2554
2555 uintptr_t base = (uintptr_t)seg->pages;
2556 size_t size = (uintptr_t)seg->epages - base;
2557
2558 while (mdb_whatis_match(w, base, size, &cur)) {
2559 /* round our found pointer down to the page_t base. */
2560 size_t offset = (cur - base) % sizeof (page_t);
2561
2562 mdb_whatis_report_object(w, cur, cur - offset,
2563 "allocated as a page structure\n");
2564 }
2565
2566 return (WHATIS_WALKRET(w));
2567 }
2568
2569 /*ARGSUSED*/
2570 static int
whatis_run_modules(mdb_whatis_t * w,void * arg)2571 whatis_run_modules(mdb_whatis_t *w, void *arg)
2572 {
2573 if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, w) == -1) {
2574 mdb_warn("couldn't find modctl walker");
2575 return (1);
2576 }
2577 return (0);
2578 }
2579
2580 /*ARGSUSED*/
2581 static int
whatis_run_threads(mdb_whatis_t * w,void * ignored)2582 whatis_run_threads(mdb_whatis_t *w, void *ignored)
2583 {
2584 /*
2585 * Now search all thread stacks. Yes, this is a little weak; we
2586 * can save a lot of work by first checking to see if the
2587 * address is in segkp vs. segkmem. But hey, computers are
2588 * fast.
2589 */
2590 if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, w) == -1) {
2591 mdb_warn("couldn't find thread walker");
2592 return (1);
2593 }
2594 return (0);
2595 }
2596
2597 /*ARGSUSED*/
2598 static int
whatis_run_pages(mdb_whatis_t * w,void * ignored)2599 whatis_run_pages(mdb_whatis_t *w, void *ignored)
2600 {
2601 if (mdb_walk("memseg", (mdb_walk_cb_t)whatis_walk_memseg, w) == -1) {
2602 mdb_warn("couldn't find memseg walker");
2603 return (1);
2604 }
2605 return (0);
2606 }
2607
2608 /*ARGSUSED*/
2609 static int
whatis_run_kmem(mdb_whatis_t * w,void * ignored)2610 whatis_run_kmem(mdb_whatis_t *w, void *ignored)
2611 {
2612 whatis_info_t wi;
2613
2614 bzero(&wi, sizeof (wi));
2615 wi.wi_w = w;
2616
2617 if (mdb_readvar(&wi.wi_msb_arena, "kmem_msb_arena") == -1)
2618 mdb_warn("unable to readvar \"kmem_msb_arena\"");
2619
2620 if (mdb_readvar(&wi.wi_kmem_lite_count,
2621 "kmem_lite_count") == -1 || wi.wi_kmem_lite_count > 16)
2622 wi.wi_kmem_lite_count = 0;
2623
2624 /*
2625 * We process kmem caches in the following order:
2626 *
2627 * non-KMC_NOTOUCH, non-metadata (typically the most interesting)
2628 * metadata (can be huge with KMF_AUDIT)
2629 * KMC_NOTOUCH, non-metadata (see kmem_walk_all())
2630 */
2631 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2632 &wi) == -1 ||
2633 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2634 &wi) == -1 ||
2635 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2636 &wi) == -1) {
2637 mdb_warn("couldn't find kmem_cache walker");
2638 return (1);
2639 }
2640 return (0);
2641 }
2642
2643 /*ARGSUSED*/
2644 static int
whatis_run_vmem(mdb_whatis_t * w,void * ignored)2645 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2646 {
2647 whatis_info_t wi;
2648
2649 bzero(&wi, sizeof (wi));
2650 wi.wi_w = w;
2651
2652 if (mdb_walk("vmem_postfix",
2653 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2654 mdb_warn("couldn't find vmem_postfix walker");
2655 return (1);
2656 }
2657 return (0);
2658 }
2659
2660 typedef struct kmem_log_cpu {
2661 uintptr_t kmc_low;
2662 uintptr_t kmc_high;
2663 } kmem_log_cpu_t;
2664
2665 typedef struct kmem_log_data {
2666 uintptr_t kmd_addr;
2667 kmem_log_cpu_t *kmd_cpu;
2668 } kmem_log_data_t;
2669
2670 int
kmem_log_walk(uintptr_t addr,const kmem_bufctl_audit_t * b,kmem_log_data_t * kmd)2671 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2672 kmem_log_data_t *kmd)
2673 {
2674 int i;
2675 kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2676 size_t bufsize;
2677
2678 for (i = 0; i < NCPU; i++) {
2679 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2680 break;
2681 }
2682
2683 if (kmd->kmd_addr) {
2684 if (b->bc_cache == NULL)
2685 return (WALK_NEXT);
2686
2687 if (mdb_vread(&bufsize, sizeof (bufsize),
2688 (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2689 mdb_warn(
2690 "failed to read cache_bufsize for cache at %p",
2691 b->bc_cache);
2692 return (WALK_ERR);
2693 }
2694
2695 if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2696 kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2697 return (WALK_NEXT);
2698 }
2699
2700 if (i == NCPU)
2701 mdb_printf(" ");
2702 else
2703 mdb_printf("%3d", i);
2704
2705 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2706 b->bc_timestamp, b->bc_thread);
2707
2708 return (WALK_NEXT);
2709 }
2710
2711 /*ARGSUSED*/
2712 int
kmem_log(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2713 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2714 {
2715 kmem_log_header_t lh;
2716 kmem_cpu_log_header_t clh;
2717 uintptr_t lhp, clhp;
2718 int ncpus;
2719 uintptr_t *cpu;
2720 GElf_Sym sym;
2721 kmem_log_cpu_t *kmc;
2722 int i;
2723 kmem_log_data_t kmd;
2724 uint_t opt_b = FALSE;
2725
2726 if (mdb_getopts(argc, argv,
2727 'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2728 return (DCMD_USAGE);
2729
2730 if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2731 mdb_warn("failed to read 'kmem_transaction_log'");
2732 return (DCMD_ERR);
2733 }
2734
2735 if (lhp == 0) {
2736 mdb_warn("no kmem transaction log\n");
2737 return (DCMD_ERR);
2738 }
2739
2740 mdb_readvar(&ncpus, "ncpus");
2741
2742 if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2743 mdb_warn("failed to read log header at %p", lhp);
2744 return (DCMD_ERR);
2745 }
2746
2747 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2748
2749 cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2750
2751 if (mdb_lookup_by_name("cpu", &sym) == -1) {
2752 mdb_warn("couldn't find 'cpu' array");
2753 return (DCMD_ERR);
2754 }
2755
2756 if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2757 mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2758 NCPU * sizeof (uintptr_t), sym.st_size);
2759 return (DCMD_ERR);
2760 }
2761
2762 if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2763 mdb_warn("failed to read cpu array at %p", sym.st_value);
2764 return (DCMD_ERR);
2765 }
2766
2767 kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2768 kmd.kmd_addr = 0;
2769 kmd.kmd_cpu = kmc;
2770
2771 for (i = 0; i < NCPU; i++) {
2772
2773 if (cpu[i] == 0)
2774 continue;
2775
2776 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2777 mdb_warn("cannot read cpu %d's log header at %p",
2778 i, clhp);
2779 return (DCMD_ERR);
2780 }
2781
2782 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2783 (uintptr_t)lh.lh_base;
2784 kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2785
2786 clhp += sizeof (kmem_cpu_log_header_t);
2787 }
2788
2789 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2790 "TIMESTAMP", "THREAD");
2791
2792 /*
2793 * If we have been passed an address, print out only log entries
2794 * corresponding to that address. If opt_b is specified, then interpret
2795 * the address as a bufctl.
2796 */
2797 if (flags & DCMD_ADDRSPEC) {
2798 kmem_bufctl_audit_t b;
2799
2800 if (opt_b) {
2801 kmd.kmd_addr = addr;
2802 } else {
2803 if (mdb_vread(&b,
2804 sizeof (kmem_bufctl_audit_t), addr) == -1) {
2805 mdb_warn("failed to read bufctl at %p", addr);
2806 return (DCMD_ERR);
2807 }
2808
2809 (void) kmem_log_walk(addr, &b, &kmd);
2810
2811 return (DCMD_OK);
2812 }
2813 }
2814
2815 if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2816 mdb_warn("can't find kmem log walker");
2817 return (DCMD_ERR);
2818 }
2819
2820 return (DCMD_OK);
2821 }
2822
2823 typedef struct bufctl_history_cb {
2824 int bhc_flags;
2825 int bhc_argc;
2826 const mdb_arg_t *bhc_argv;
2827 int bhc_ret;
2828 } bufctl_history_cb_t;
2829
2830 /*ARGSUSED*/
2831 static int
bufctl_history_callback(uintptr_t addr,const void * ign,void * arg)2832 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2833 {
2834 bufctl_history_cb_t *bhc = arg;
2835
2836 bhc->bhc_ret =
2837 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2838
2839 bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2840
2841 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2842 }
2843
2844 void
bufctl_help(void)2845 bufctl_help(void)
2846 {
2847 mdb_printf("%s",
2848 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2849 mdb_dec_indent(2);
2850 mdb_printf("%<b>OPTIONS%</b>\n");
2851 mdb_inc_indent(2);
2852 mdb_printf("%s",
2853 " -v Display the full content of the bufctl, including its stack trace\n"
2854 " -h retrieve the bufctl's transaction history, if available\n"
2855 " -a addr\n"
2856 " filter out bufctls not involving the buffer at addr\n"
2857 " -c caller\n"
2858 " filter out bufctls without the function/PC in their stack trace\n"
2859 " -e earliest\n"
2860 " filter out bufctls timestamped before earliest\n"
2861 " -l latest\n"
2862 " filter out bufctls timestamped after latest\n"
2863 " -t thread\n"
2864 " filter out bufctls not involving thread\n");
2865 }
2866
2867 int
bufctl(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2868 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2869 {
2870 kmem_bufctl_audit_t bc;
2871 uint_t verbose = FALSE;
2872 uint_t history = FALSE;
2873 uint_t in_history = FALSE;
2874 uintptr_t caller = 0, thread = 0;
2875 uintptr_t laddr, haddr, baddr = 0;
2876 hrtime_t earliest = 0, latest = 0;
2877 int i, depth;
2878 char c[MDB_SYM_NAMLEN];
2879 GElf_Sym sym;
2880
2881 if (mdb_getopts(argc, argv,
2882 'v', MDB_OPT_SETBITS, TRUE, &verbose,
2883 'h', MDB_OPT_SETBITS, TRUE, &history,
2884 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */
2885 'c', MDB_OPT_UINTPTR, &caller,
2886 't', MDB_OPT_UINTPTR, &thread,
2887 'e', MDB_OPT_UINT64, &earliest,
2888 'l', MDB_OPT_UINT64, &latest,
2889 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2890 return (DCMD_USAGE);
2891
2892 if (!(flags & DCMD_ADDRSPEC))
2893 return (DCMD_USAGE);
2894
2895 if (in_history && !history)
2896 return (DCMD_USAGE);
2897
2898 if (history && !in_history) {
2899 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2900 UM_SLEEP | UM_GC);
2901 bufctl_history_cb_t bhc;
2902
2903 nargv[0].a_type = MDB_TYPE_STRING;
2904 nargv[0].a_un.a_str = "-H"; /* prevent recursion */
2905
2906 for (i = 0; i < argc; i++)
2907 nargv[i + 1] = argv[i];
2908
2909 /*
2910 * When in history mode, we treat each element as if it
2911 * were in a seperate loop, so that the headers group
2912 * bufctls with similar histories.
2913 */
2914 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2915 bhc.bhc_argc = argc + 1;
2916 bhc.bhc_argv = nargv;
2917 bhc.bhc_ret = DCMD_OK;
2918
2919 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2920 addr) == -1) {
2921 mdb_warn("unable to walk bufctl_history");
2922 return (DCMD_ERR);
2923 }
2924
2925 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2926 mdb_printf("\n");
2927
2928 return (bhc.bhc_ret);
2929 }
2930
2931 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2932 if (verbose) {
2933 mdb_printf("%16s %16s %16s %16s\n"
2934 "%<u>%16s %16s %16s %16s%</u>\n",
2935 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2936 "", "CACHE", "LASTLOG", "CONTENTS");
2937 } else {
2938 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2939 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2940 }
2941 }
2942
2943 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2944 mdb_warn("couldn't read bufctl at %p", addr);
2945 return (DCMD_ERR);
2946 }
2947
2948 /*
2949 * Guard against bogus bc_depth in case the bufctl is corrupt or
2950 * the address does not really refer to a bufctl.
2951 */
2952 depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2953
2954 if (caller != 0) {
2955 laddr = caller;
2956 haddr = caller + sizeof (caller);
2957
2958 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2959 &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2960 /*
2961 * We were provided an exact symbol value; any
2962 * address in the function is valid.
2963 */
2964 laddr = (uintptr_t)sym.st_value;
2965 haddr = (uintptr_t)sym.st_value + sym.st_size;
2966 }
2967
2968 for (i = 0; i < depth; i++)
2969 if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2970 break;
2971
2972 if (i == depth)
2973 return (DCMD_OK);
2974 }
2975
2976 if (thread != 0 && (uintptr_t)bc.bc_thread != thread)
2977 return (DCMD_OK);
2978
2979 if (earliest != 0 && bc.bc_timestamp < earliest)
2980 return (DCMD_OK);
2981
2982 if (latest != 0 && bc.bc_timestamp > latest)
2983 return (DCMD_OK);
2984
2985 if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2986 return (DCMD_OK);
2987
2988 if (flags & DCMD_PIPE_OUT) {
2989 mdb_printf("%#lr\n", addr);
2990 return (DCMD_OK);
2991 }
2992
2993 if (verbose) {
2994 mdb_printf(
2995 "%<b>%16p%</b> %16p %16llx %16p\n"
2996 "%16s %16p %16p %16p\n",
2997 addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2998 "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
2999
3000 mdb_inc_indent(17);
3001 for (i = 0; i < depth; i++)
3002 mdb_printf("%a\n", bc.bc_stack[i]);
3003 mdb_dec_indent(17);
3004 mdb_printf("\n");
3005 } else {
3006 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
3007 bc.bc_timestamp, bc.bc_thread);
3008
3009 for (i = 0; i < depth; i++) {
3010 if (mdb_lookup_by_addr(bc.bc_stack[i],
3011 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
3012 continue;
3013 if (strncmp(c, "kmem_", 5) == 0)
3014 continue;
3015 mdb_printf(" %a\n", bc.bc_stack[i]);
3016 break;
3017 }
3018
3019 if (i >= depth)
3020 mdb_printf("\n");
3021 }
3022
3023 return (DCMD_OK);
3024 }
3025
3026 typedef struct kmem_verify {
3027 uint64_t *kmv_buf; /* buffer to read cache contents into */
3028 size_t kmv_size; /* number of bytes in kmv_buf */
3029 int kmv_corruption; /* > 0 if corruption found. */
3030 uint_t kmv_flags; /* dcmd flags */
3031 struct kmem_cache kmv_cache; /* the cache we're operating on */
3032 } kmem_verify_t;
3033
3034 /*
3035 * verify_pattern()
3036 * verify that buf is filled with the pattern pat.
3037 */
3038 static int64_t
verify_pattern(uint64_t * buf_arg,size_t size,uint64_t pat)3039 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
3040 {
3041 /*LINTED*/
3042 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
3043 uint64_t *buf;
3044
3045 for (buf = buf_arg; buf < bufend; buf++)
3046 if (*buf != pat)
3047 return ((uintptr_t)buf - (uintptr_t)buf_arg);
3048 return (-1);
3049 }
3050
3051 /*
3052 * verify_buftag()
3053 * verify that btp->bt_bxstat == (bcp ^ pat)
3054 */
3055 static int
verify_buftag(kmem_buftag_t * btp,uintptr_t pat)3056 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
3057 {
3058 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
3059 }
3060
3061 /*
3062 * verify_free()
3063 * verify the integrity of a free block of memory by checking
3064 * that it is filled with 0xdeadbeef and that its buftag is sane.
3065 */
3066 /*ARGSUSED1*/
3067 static int
verify_free(uintptr_t addr,const void * data,void * private)3068 verify_free(uintptr_t addr, const void *data, void *private)
3069 {
3070 kmem_verify_t *kmv = (kmem_verify_t *)private;
3071 uint64_t *buf = kmv->kmv_buf; /* buf to validate */
3072 int64_t corrupt; /* corruption offset */
3073 kmem_buftag_t *buftagp; /* ptr to buftag */
3074 kmem_cache_t *cp = &kmv->kmv_cache;
3075 boolean_t besilent = !!(kmv->kmv_flags & (DCMD_LOOP | DCMD_PIPE_OUT));
3076
3077 /*LINTED*/
3078 buftagp = KMEM_BUFTAG(cp, buf);
3079
3080 /*
3081 * Read the buffer to check.
3082 */
3083 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3084 if (!besilent)
3085 mdb_warn("couldn't read %p", addr);
3086 return (WALK_NEXT);
3087 }
3088
3089 if ((corrupt = verify_pattern(buf, cp->cache_verify,
3090 KMEM_FREE_PATTERN)) >= 0) {
3091 if (!besilent)
3092 mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3093 addr, (uintptr_t)addr + corrupt);
3094 goto corrupt;
3095 }
3096 /*
3097 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3098 * the first bytes of the buffer, hence we cannot check for red
3099 * zone corruption.
3100 */
3101 if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
3102 buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
3103 if (!besilent)
3104 mdb_printf("buffer %p (free) seems to "
3105 "have a corrupt redzone pattern\n", addr);
3106 goto corrupt;
3107 }
3108
3109 /*
3110 * confirm bufctl pointer integrity.
3111 */
3112 if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
3113 if (!besilent)
3114 mdb_printf("buffer %p (free) has a corrupt "
3115 "buftag\n", addr);
3116 goto corrupt;
3117 }
3118
3119 return (WALK_NEXT);
3120 corrupt:
3121 if (kmv->kmv_flags & DCMD_PIPE_OUT)
3122 mdb_printf("%p\n", addr);
3123 kmv->kmv_corruption++;
3124 return (WALK_NEXT);
3125 }
3126
3127 /*
3128 * verify_alloc()
3129 * Verify that the buftag of an allocated buffer makes sense with respect
3130 * to the buffer.
3131 */
3132 /*ARGSUSED1*/
3133 static int
verify_alloc(uintptr_t addr,const void * data,void * private)3134 verify_alloc(uintptr_t addr, const void *data, void *private)
3135 {
3136 kmem_verify_t *kmv = (kmem_verify_t *)private;
3137 kmem_cache_t *cp = &kmv->kmv_cache;
3138 uint64_t *buf = kmv->kmv_buf; /* buf to validate */
3139 /*LINTED*/
3140 kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
3141 uint32_t *ip = (uint32_t *)buftagp;
3142 uint8_t *bp = (uint8_t *)buf;
3143 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */
3144 boolean_t besilent = !!(kmv->kmv_flags & (DCMD_LOOP | DCMD_PIPE_OUT));
3145
3146 /*
3147 * Read the buffer to check.
3148 */
3149 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3150 if (!besilent)
3151 mdb_warn("couldn't read %p", addr);
3152 return (WALK_NEXT);
3153 }
3154
3155 /*
3156 * There are two cases to handle:
3157 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3158 * 0xfeedfacefeedface at the end of it
3159 * 2. If the buf was alloc'd using kmem_alloc, it will have
3160 * 0xbb just past the end of the region in use. At the buftag,
3161 * it will have 0xfeedface (or, if the whole buffer is in use,
3162 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3163 * endianness), followed by 32 bits containing the offset of the
3164 * 0xbb byte in the buffer.
3165 *
3166 * Finally, the two 32-bit words that comprise the second half of the
3167 * buftag should xor to KMEM_BUFTAG_ALLOC
3168 */
3169
3170 if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
3171 looks_ok = 1;
3172 else if (!KMEM_SIZE_VALID(ip[1]))
3173 size_ok = 0;
3174 else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
3175 looks_ok = 1;
3176 else
3177 size_ok = 0;
3178
3179 if (!size_ok) {
3180 if (!besilent)
3181 mdb_printf("buffer %p (allocated) has a corrupt "
3182 "redzone size encoding\n", addr);
3183 goto corrupt;
3184 }
3185
3186 if (!looks_ok) {
3187 if (!besilent)
3188 mdb_printf("buffer %p (allocated) has a corrupt "
3189 "redzone signature\n", addr);
3190 goto corrupt;
3191 }
3192
3193 if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
3194 if (!besilent)
3195 mdb_printf("buffer %p (allocated) has a "
3196 "corrupt buftag\n", addr);
3197 goto corrupt;
3198 }
3199
3200 return (WALK_NEXT);
3201 corrupt:
3202 if (kmv->kmv_flags & DCMD_PIPE_OUT)
3203 mdb_printf("%p\n", addr);
3204
3205 kmv->kmv_corruption++;
3206 return (WALK_NEXT);
3207 }
3208
3209 /*ARGSUSED2*/
3210 int
kmem_verify(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3211 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3212 {
3213 if (flags & DCMD_ADDRSPEC) {
3214 int check_alloc = 0, check_free = 0;
3215 kmem_verify_t kmv;
3216
3217 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
3218 addr) == -1) {
3219 mdb_warn("couldn't read kmem_cache %p", addr);
3220 return (DCMD_ERR);
3221 }
3222
3223 if ((kmv.kmv_cache.cache_dump.kd_unsafe ||
3224 kmv.kmv_cache.cache_dump.kd_alloc_fails) &&
3225 !(flags & (DCMD_LOOP | DCMD_PIPE_OUT))) {
3226 mdb_warn("WARNING: cache was used during dump: "
3227 "corruption may be incorrectly reported\n");
3228 }
3229
3230 kmv.kmv_size = kmv.kmv_cache.cache_buftag +
3231 sizeof (kmem_buftag_t);
3232 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
3233 kmv.kmv_corruption = 0;
3234 kmv.kmv_flags = flags;
3235
3236 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
3237 check_alloc = 1;
3238 if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
3239 check_free = 1;
3240 } else {
3241 if (!(flags & DCMD_LOOP)) {
3242 mdb_warn("cache %p (%s) does not have "
3243 "redzone checking enabled\n", addr,
3244 kmv.kmv_cache.cache_name);
3245 }
3246 return (DCMD_ERR);
3247 }
3248
3249 if (!(flags & (DCMD_LOOP | DCMD_PIPE_OUT))) {
3250 mdb_printf("Summary for cache '%s'\n",
3251 kmv.kmv_cache.cache_name);
3252 mdb_inc_indent(2);
3253 }
3254
3255 if (check_alloc)
3256 (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
3257 if (check_free)
3258 (void) mdb_pwalk("freemem", verify_free, &kmv, addr);
3259
3260 if (!(flags & DCMD_PIPE_OUT)) {
3261 if (flags & DCMD_LOOP) {
3262 if (kmv.kmv_corruption == 0) {
3263 mdb_printf("%-*s %?p clean\n",
3264 KMEM_CACHE_NAMELEN,
3265 kmv.kmv_cache.cache_name, addr);
3266 } else {
3267 mdb_printf("%-*s %?p %d corrupt "
3268 "buffer%s\n", KMEM_CACHE_NAMELEN,
3269 kmv.kmv_cache.cache_name, addr,
3270 kmv.kmv_corruption,
3271 kmv.kmv_corruption > 1 ? "s" : "");
3272 }
3273 } else {
3274 /*
3275 * This is the more verbose mode, when the user
3276 * typed addr::kmem_verify. If the cache was
3277 * clean, nothing will have yet been printed. So
3278 * say something.
3279 */
3280 if (kmv.kmv_corruption == 0)
3281 mdb_printf("clean\n");
3282
3283 mdb_dec_indent(2);
3284 }
3285 }
3286 } else {
3287 /*
3288 * If the user didn't specify a cache to verify, we'll walk all
3289 * kmem_cache's, specifying ourself as a callback for each...
3290 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3291 */
3292
3293 if (!(flags & DCMD_PIPE_OUT)) {
3294 uintptr_t dump_curr;
3295 uintptr_t dump_end;
3296
3297 if (mdb_readvar(&dump_curr, "kmem_dump_curr") != -1 &&
3298 mdb_readvar(&dump_end, "kmem_dump_end") != -1 &&
3299 dump_curr == dump_end) {
3300 mdb_warn("WARNING: exceeded kmem_dump_size; "
3301 "corruption may be incorrectly reported\n");
3302 }
3303
3304 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n",
3305 KMEM_CACHE_NAMELEN, "Cache Name", "Addr",
3306 "Cache Integrity");
3307 }
3308
3309 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
3310 }
3311
3312 return (DCMD_OK);
3313 }
3314
3315 typedef struct vmem_node {
3316 struct vmem_node *vn_next;
3317 struct vmem_node *vn_parent;
3318 struct vmem_node *vn_sibling;
3319 struct vmem_node *vn_children;
3320 uintptr_t vn_addr;
3321 int vn_marked;
3322 vmem_t vn_vmem;
3323 } vmem_node_t;
3324
3325 typedef struct vmem_walk {
3326 vmem_node_t *vw_root;
3327 vmem_node_t *vw_current;
3328 } vmem_walk_t;
3329
3330 int
vmem_walk_init(mdb_walk_state_t * wsp)3331 vmem_walk_init(mdb_walk_state_t *wsp)
3332 {
3333 uintptr_t vaddr, paddr;
3334 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
3335 vmem_walk_t *vw;
3336
3337 if (mdb_readvar(&vaddr, "vmem_list") == -1) {
3338 mdb_warn("couldn't read 'vmem_list'");
3339 return (WALK_ERR);
3340 }
3341
3342 while (vaddr != 0) {
3343 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
3344 vp->vn_addr = vaddr;
3345 vp->vn_next = head;
3346 head = vp;
3347
3348 if (vaddr == wsp->walk_addr)
3349 current = vp;
3350
3351 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
3352 mdb_warn("couldn't read vmem_t at %p", vaddr);
3353 goto err;
3354 }
3355
3356 vaddr = (uintptr_t)vp->vn_vmem.vm_next;
3357 }
3358
3359 for (vp = head; vp != NULL; vp = vp->vn_next) {
3360
3361 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == 0) {
3362 vp->vn_sibling = root;
3363 root = vp;
3364 continue;
3365 }
3366
3367 for (parent = head; parent != NULL; parent = parent->vn_next) {
3368 if (parent->vn_addr != paddr)
3369 continue;
3370 vp->vn_sibling = parent->vn_children;
3371 parent->vn_children = vp;
3372 vp->vn_parent = parent;
3373 break;
3374 }
3375
3376 if (parent == NULL) {
3377 mdb_warn("couldn't find %p's parent (%p)\n",
3378 vp->vn_addr, paddr);
3379 goto err;
3380 }
3381 }
3382
3383 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3384 vw->vw_root = root;
3385
3386 if (current != NULL)
3387 vw->vw_current = current;
3388 else
3389 vw->vw_current = root;
3390
3391 wsp->walk_data = vw;
3392 return (WALK_NEXT);
3393 err:
3394 for (vp = head; head != NULL; vp = head) {
3395 head = vp->vn_next;
3396 mdb_free(vp, sizeof (vmem_node_t));
3397 }
3398
3399 return (WALK_ERR);
3400 }
3401
3402 int
vmem_walk_step(mdb_walk_state_t * wsp)3403 vmem_walk_step(mdb_walk_state_t *wsp)
3404 {
3405 vmem_walk_t *vw = wsp->walk_data;
3406 vmem_node_t *vp;
3407 int rval;
3408
3409 if ((vp = vw->vw_current) == NULL)
3410 return (WALK_DONE);
3411
3412 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3413
3414 if (vp->vn_children != NULL) {
3415 vw->vw_current = vp->vn_children;
3416 return (rval);
3417 }
3418
3419 do {
3420 vw->vw_current = vp->vn_sibling;
3421 vp = vp->vn_parent;
3422 } while (vw->vw_current == NULL && vp != NULL);
3423
3424 return (rval);
3425 }
3426
3427 /*
3428 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3429 * children are visited before their parent. We perform the postfix walk
3430 * iteratively (rather than recursively) to allow mdb to regain control
3431 * after each callback.
3432 */
3433 int
vmem_postfix_walk_step(mdb_walk_state_t * wsp)3434 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3435 {
3436 vmem_walk_t *vw = wsp->walk_data;
3437 vmem_node_t *vp = vw->vw_current;
3438 int rval;
3439
3440 /*
3441 * If this node is marked, then we know that we have already visited
3442 * all of its children. If the node has any siblings, they need to
3443 * be visited next; otherwise, we need to visit the parent. Note
3444 * that vp->vn_marked will only be zero on the first invocation of
3445 * the step function.
3446 */
3447 if (vp->vn_marked) {
3448 if (vp->vn_sibling != NULL)
3449 vp = vp->vn_sibling;
3450 else if (vp->vn_parent != NULL)
3451 vp = vp->vn_parent;
3452 else {
3453 /*
3454 * We have neither a parent, nor a sibling, and we
3455 * have already been visited; we're done.
3456 */
3457 return (WALK_DONE);
3458 }
3459 }
3460
3461 /*
3462 * Before we visit this node, visit its children.
3463 */
3464 while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3465 vp = vp->vn_children;
3466
3467 vp->vn_marked = 1;
3468 vw->vw_current = vp;
3469 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3470
3471 return (rval);
3472 }
3473
3474 void
vmem_walk_fini(mdb_walk_state_t * wsp)3475 vmem_walk_fini(mdb_walk_state_t *wsp)
3476 {
3477 vmem_walk_t *vw = wsp->walk_data;
3478 vmem_node_t *root = vw->vw_root;
3479 int done;
3480
3481 if (root == NULL)
3482 return;
3483
3484 if ((vw->vw_root = root->vn_children) != NULL)
3485 vmem_walk_fini(wsp);
3486
3487 vw->vw_root = root->vn_sibling;
3488 done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3489 mdb_free(root, sizeof (vmem_node_t));
3490
3491 if (done) {
3492 mdb_free(vw, sizeof (vmem_walk_t));
3493 } else {
3494 vmem_walk_fini(wsp);
3495 }
3496 }
3497
3498 typedef struct vmem_seg_walk {
3499 uint8_t vsw_type;
3500 uintptr_t vsw_start;
3501 uintptr_t vsw_current;
3502 } vmem_seg_walk_t;
3503
3504 /*ARGSUSED*/
3505 int
vmem_seg_walk_common_init(mdb_walk_state_t * wsp,uint8_t type,char * name)3506 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3507 {
3508 vmem_seg_walk_t *vsw;
3509
3510 if (wsp->walk_addr == 0) {
3511 mdb_warn("vmem_%s does not support global walks\n", name);
3512 return (WALK_ERR);
3513 }
3514
3515 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3516
3517 vsw->vsw_type = type;
3518 vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3519 vsw->vsw_current = vsw->vsw_start;
3520
3521 return (WALK_NEXT);
3522 }
3523
3524 /*
3525 * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3526 */
3527 #define VMEM_NONE 0
3528
3529 int
vmem_alloc_walk_init(mdb_walk_state_t * wsp)3530 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3531 {
3532 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3533 }
3534
3535 int
vmem_free_walk_init(mdb_walk_state_t * wsp)3536 vmem_free_walk_init(mdb_walk_state_t *wsp)
3537 {
3538 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3539 }
3540
3541 int
vmem_span_walk_init(mdb_walk_state_t * wsp)3542 vmem_span_walk_init(mdb_walk_state_t *wsp)
3543 {
3544 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3545 }
3546
3547 int
vmem_seg_walk_init(mdb_walk_state_t * wsp)3548 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3549 {
3550 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3551 }
3552
3553 int
vmem_seg_walk_step(mdb_walk_state_t * wsp)3554 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3555 {
3556 vmem_seg_t seg;
3557 vmem_seg_walk_t *vsw = wsp->walk_data;
3558 uintptr_t addr = vsw->vsw_current;
3559 static size_t seg_size = 0;
3560 int rval;
3561
3562 if (!seg_size) {
3563 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3564 mdb_warn("failed to read 'vmem_seg_size'");
3565 seg_size = sizeof (vmem_seg_t);
3566 }
3567 }
3568
3569 if (seg_size < sizeof (seg))
3570 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3571
3572 if (mdb_vread(&seg, seg_size, addr) == -1) {
3573 mdb_warn("couldn't read vmem_seg at %p", addr);
3574 return (WALK_ERR);
3575 }
3576
3577 vsw->vsw_current = (uintptr_t)seg.vs_anext;
3578 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3579 rval = WALK_NEXT;
3580 } else {
3581 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3582 }
3583
3584 if (vsw->vsw_current == vsw->vsw_start)
3585 return (WALK_DONE);
3586
3587 return (rval);
3588 }
3589
3590 void
vmem_seg_walk_fini(mdb_walk_state_t * wsp)3591 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3592 {
3593 vmem_seg_walk_t *vsw = wsp->walk_data;
3594
3595 mdb_free(vsw, sizeof (vmem_seg_walk_t));
3596 }
3597
3598 #define VMEM_NAMEWIDTH 22
3599
3600 int
vmem(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3601 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3602 {
3603 vmem_t v, parent;
3604 vmem_kstat_t *vkp = &v.vm_kstat;
3605 uintptr_t paddr;
3606 int ident = 0;
3607 char c[VMEM_NAMEWIDTH];
3608
3609 if (!(flags & DCMD_ADDRSPEC)) {
3610 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3611 mdb_warn("can't walk vmem");
3612 return (DCMD_ERR);
3613 }
3614 return (DCMD_OK);
3615 }
3616
3617 if (DCMD_HDRSPEC(flags))
3618 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3619 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3620 "TOTAL", "SUCCEED", "FAIL");
3621
3622 if (mdb_vread(&v, sizeof (v), addr) == -1) {
3623 mdb_warn("couldn't read vmem at %p", addr);
3624 return (DCMD_ERR);
3625 }
3626
3627 for (paddr = (uintptr_t)v.vm_source; paddr != 0; ident += 2) {
3628 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3629 mdb_warn("couldn't trace %p's ancestry", addr);
3630 ident = 0;
3631 break;
3632 }
3633 paddr = (uintptr_t)parent.vm_source;
3634 }
3635
3636 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3637
3638 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3639 addr, VMEM_NAMEWIDTH, c,
3640 vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3641 vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3642
3643 return (DCMD_OK);
3644 }
3645
3646 void
vmem_seg_help(void)3647 vmem_seg_help(void)
3648 {
3649 mdb_printf("%s",
3650 "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3651 "\n"
3652 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3653 "representing a single chunk of data. Only ALLOC segments have debugging\n"
3654 "information.\n");
3655 mdb_dec_indent(2);
3656 mdb_printf("%<b>OPTIONS%</b>\n");
3657 mdb_inc_indent(2);
3658 mdb_printf("%s",
3659 " -v Display the full content of the vmem_seg, including its stack trace\n"
3660 " -s report the size of the segment, instead of the end address\n"
3661 " -c caller\n"
3662 " filter out segments without the function/PC in their stack trace\n"
3663 " -e earliest\n"
3664 " filter out segments timestamped before earliest\n"
3665 " -l latest\n"
3666 " filter out segments timestamped after latest\n"
3667 " -m minsize\n"
3668 " filer out segments smaller than minsize\n"
3669 " -M maxsize\n"
3670 " filer out segments larger than maxsize\n"
3671 " -t thread\n"
3672 " filter out segments not involving thread\n"
3673 " -T type\n"
3674 " filter out segments not of type 'type'\n"
3675 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3676 }
3677
3678 /*ARGSUSED*/
3679 int
vmem_seg(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3680 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3681 {
3682 vmem_seg_t vs;
3683 pc_t *stk = vs.vs_stack;
3684 uintptr_t sz;
3685 uint8_t t;
3686 const char *type = NULL;
3687 GElf_Sym sym;
3688 char c[MDB_SYM_NAMLEN];
3689 int no_debug;
3690 int i;
3691 int depth;
3692 uintptr_t laddr, haddr;
3693
3694 uintptr_t caller = 0, thread = 0;
3695 uintptr_t minsize = 0, maxsize = 0;
3696
3697 hrtime_t earliest = 0, latest = 0;
3698
3699 uint_t size = 0;
3700 uint_t verbose = 0;
3701
3702 if (!(flags & DCMD_ADDRSPEC))
3703 return (DCMD_USAGE);
3704
3705 if (mdb_getopts(argc, argv,
3706 'c', MDB_OPT_UINTPTR, &caller,
3707 'e', MDB_OPT_UINT64, &earliest,
3708 'l', MDB_OPT_UINT64, &latest,
3709 's', MDB_OPT_SETBITS, TRUE, &size,
3710 'm', MDB_OPT_UINTPTR, &minsize,
3711 'M', MDB_OPT_UINTPTR, &maxsize,
3712 't', MDB_OPT_UINTPTR, &thread,
3713 'T', MDB_OPT_STR, &type,
3714 'v', MDB_OPT_SETBITS, TRUE, &verbose,
3715 NULL) != argc)
3716 return (DCMD_USAGE);
3717
3718 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3719 if (verbose) {
3720 mdb_printf("%16s %4s %16s %16s %16s\n"
3721 "%<u>%16s %4s %16s %16s %16s%</u>\n",
3722 "ADDR", "TYPE", "START", "END", "SIZE",
3723 "", "", "THREAD", "TIMESTAMP", "");
3724 } else {
3725 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3726 "START", size? "SIZE" : "END", "WHO");
3727 }
3728 }
3729
3730 if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3731 mdb_warn("couldn't read vmem_seg at %p", addr);
3732 return (DCMD_ERR);
3733 }
3734
3735 if (type != NULL) {
3736 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3737 t = VMEM_ALLOC;
3738 else if (strcmp(type, "FREE") == 0)
3739 t = VMEM_FREE;
3740 else if (strcmp(type, "SPAN") == 0)
3741 t = VMEM_SPAN;
3742 else if (strcmp(type, "ROTR") == 0 ||
3743 strcmp(type, "ROTOR") == 0)
3744 t = VMEM_ROTOR;
3745 else if (strcmp(type, "WLKR") == 0 ||
3746 strcmp(type, "WALKER") == 0)
3747 t = VMEM_WALKER;
3748 else {
3749 mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3750 type);
3751 return (DCMD_ERR);
3752 }
3753
3754 if (vs.vs_type != t)
3755 return (DCMD_OK);
3756 }
3757
3758 sz = vs.vs_end - vs.vs_start;
3759
3760 if (minsize != 0 && sz < minsize)
3761 return (DCMD_OK);
3762
3763 if (maxsize != 0 && sz > maxsize)
3764 return (DCMD_OK);
3765
3766 t = vs.vs_type;
3767 depth = vs.vs_depth;
3768
3769 /*
3770 * debug info, when present, is only accurate for VMEM_ALLOC segments
3771 */
3772 no_debug = (t != VMEM_ALLOC) ||
3773 (depth == 0 || depth > VMEM_STACK_DEPTH);
3774
3775 if (no_debug) {
3776 if (caller != 0 || thread != 0 || earliest != 0 || latest != 0)
3777 return (DCMD_OK); /* not enough info */
3778 } else {
3779 if (caller != 0) {
3780 laddr = caller;
3781 haddr = caller + sizeof (caller);
3782
3783 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3784 sizeof (c), &sym) != -1 &&
3785 caller == (uintptr_t)sym.st_value) {
3786 /*
3787 * We were provided an exact symbol value; any
3788 * address in the function is valid.
3789 */
3790 laddr = (uintptr_t)sym.st_value;
3791 haddr = (uintptr_t)sym.st_value + sym.st_size;
3792 }
3793
3794 for (i = 0; i < depth; i++)
3795 if (vs.vs_stack[i] >= laddr &&
3796 vs.vs_stack[i] < haddr)
3797 break;
3798
3799 if (i == depth)
3800 return (DCMD_OK);
3801 }
3802
3803 if (thread != 0 && (uintptr_t)vs.vs_thread != thread)
3804 return (DCMD_OK);
3805
3806 if (earliest != 0 && vs.vs_timestamp < earliest)
3807 return (DCMD_OK);
3808
3809 if (latest != 0 && vs.vs_timestamp > latest)
3810 return (DCMD_OK);
3811 }
3812
3813 type = (t == VMEM_ALLOC ? "ALLC" :
3814 t == VMEM_FREE ? "FREE" :
3815 t == VMEM_SPAN ? "SPAN" :
3816 t == VMEM_ROTOR ? "ROTR" :
3817 t == VMEM_WALKER ? "WLKR" :
3818 "????");
3819
3820 if (flags & DCMD_PIPE_OUT) {
3821 mdb_printf("%#lr\n", addr);
3822 return (DCMD_OK);
3823 }
3824
3825 if (verbose) {
3826 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16ld\n",
3827 addr, type, vs.vs_start, vs.vs_end, sz);
3828
3829 if (no_debug)
3830 return (DCMD_OK);
3831
3832 mdb_printf("%16s %4s %16p %16llx\n",
3833 "", "", vs.vs_thread, vs.vs_timestamp);
3834
3835 mdb_inc_indent(17);
3836 for (i = 0; i < depth; i++) {
3837 mdb_printf("%a\n", stk[i]);
3838 }
3839 mdb_dec_indent(17);
3840 mdb_printf("\n");
3841 } else {
3842 mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3843 vs.vs_start, size? sz : vs.vs_end);
3844
3845 if (no_debug) {
3846 mdb_printf("\n");
3847 return (DCMD_OK);
3848 }
3849
3850 for (i = 0; i < depth; i++) {
3851 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3852 c, sizeof (c), &sym) == -1)
3853 continue;
3854 if (strncmp(c, "vmem_", 5) == 0)
3855 continue;
3856 break;
3857 }
3858 mdb_printf(" %a\n", stk[i]);
3859 }
3860 return (DCMD_OK);
3861 }
3862
3863 typedef struct kmalog_data {
3864 uintptr_t kma_addr;
3865 hrtime_t kma_newest;
3866 } kmalog_data_t;
3867
3868 /*ARGSUSED*/
3869 static int
showbc(uintptr_t addr,const kmem_bufctl_audit_t * bcp,kmalog_data_t * kma)3870 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3871 {
3872 char name[KMEM_CACHE_NAMELEN + 1];
3873 hrtime_t delta;
3874 int i, depth;
3875 size_t bufsize;
3876
3877 if (bcp->bc_timestamp == 0)
3878 return (WALK_DONE);
3879
3880 if (kma->kma_newest == 0)
3881 kma->kma_newest = bcp->bc_timestamp;
3882
3883 if (kma->kma_addr) {
3884 if (mdb_vread(&bufsize, sizeof (bufsize),
3885 (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3886 mdb_warn(
3887 "failed to read cache_bufsize for cache at %p",
3888 bcp->bc_cache);
3889 return (WALK_ERR);
3890 }
3891
3892 if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3893 kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3894 return (WALK_NEXT);
3895 }
3896
3897 delta = kma->kma_newest - bcp->bc_timestamp;
3898 depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3899
3900 if (mdb_readstr(name, sizeof (name), (uintptr_t)
3901 &bcp->bc_cache->cache_name) <= 0)
3902 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3903
3904 mdb_printf("\nT-%lld.%09lld addr=%p %s\n",
3905 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3906
3907 for (i = 0; i < depth; i++)
3908 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3909
3910 return (WALK_NEXT);
3911 }
3912
3913 int
kmalog(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3914 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3915 {
3916 const char *logname = "kmem_transaction_log";
3917 kmalog_data_t kma;
3918
3919 if (argc > 1)
3920 return (DCMD_USAGE);
3921
3922 kma.kma_newest = 0;
3923 if (flags & DCMD_ADDRSPEC)
3924 kma.kma_addr = addr;
3925 else
3926 kma.kma_addr = 0;
3927
3928 if (argc > 0) {
3929 if (argv->a_type != MDB_TYPE_STRING)
3930 return (DCMD_USAGE);
3931 if (strcmp(argv->a_un.a_str, "fail") == 0)
3932 logname = "kmem_failure_log";
3933 else if (strcmp(argv->a_un.a_str, "slab") == 0)
3934 logname = "kmem_slab_log";
3935 else if (strcmp(argv->a_un.a_str, "zerosized") == 0)
3936 logname = "kmem_zerosized_log";
3937 else
3938 return (DCMD_USAGE);
3939 }
3940
3941 if (mdb_readvar(&addr, logname) == -1) {
3942 mdb_warn("failed to read %s log header pointer");
3943 return (DCMD_ERR);
3944 }
3945
3946 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3947 mdb_warn("failed to walk kmem log");
3948 return (DCMD_ERR);
3949 }
3950
3951 return (DCMD_OK);
3952 }
3953
3954 /*
3955 * As the final lure for die-hard crash(8) users, we provide ::kmausers here.
3956 * The first piece is a structure which we use to accumulate kmem_cache_t
3957 * addresses of interest. The kmc_add is used as a callback for the kmem_cache
3958 * walker; we either add all caches, or ones named explicitly as arguments.
3959 */
3960
3961 typedef struct kmclist {
3962 const char *kmc_name; /* Name to match (or NULL) */
3963 uintptr_t *kmc_caches; /* List of kmem_cache_t addrs */
3964 int kmc_nelems; /* Num entries in kmc_caches */
3965 int kmc_size; /* Size of kmc_caches array */
3966 } kmclist_t;
3967
3968 static int
kmc_add(uintptr_t addr,const kmem_cache_t * cp,kmclist_t * kmc)3969 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3970 {
3971 void *p;
3972 int s;
3973
3974 if (kmc->kmc_name == NULL ||
3975 strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3976 /*
3977 * If we have a match, grow our array (if necessary), and then
3978 * add the virtual address of the matching cache to our list.
3979 */
3980 if (kmc->kmc_nelems >= kmc->kmc_size) {
3981 s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3982 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3983
3984 bcopy(kmc->kmc_caches, p,
3985 sizeof (uintptr_t) * kmc->kmc_size);
3986
3987 kmc->kmc_caches = p;
3988 kmc->kmc_size = s;
3989 }
3990
3991 kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3992 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3993 }
3994
3995 return (WALK_NEXT);
3996 }
3997
3998 /*
3999 * The second piece of ::kmausers is a hash table of allocations. Each
4000 * allocation owner is identified by its stack trace and data_size. We then
4001 * track the total bytes of all such allocations, and the number of allocations
4002 * to report at the end. Once we have a list of caches, we walk through the
4003 * allocated bufctls of each, and update our hash table accordingly.
4004 */
4005
4006 typedef struct kmowner {
4007 struct kmowner *kmo_head; /* First hash elt in bucket */
4008 struct kmowner *kmo_next; /* Next hash elt in chain */
4009 size_t kmo_signature; /* Hash table signature */
4010 uint_t kmo_num; /* Number of allocations */
4011 size_t kmo_data_size; /* Size of each allocation */
4012 size_t kmo_total_size; /* Total bytes of allocation */
4013 int kmo_depth; /* Depth of stack trace */
4014 uintptr_t kmo_stack[KMEM_STACK_DEPTH]; /* Stack trace */
4015 } kmowner_t;
4016
4017 typedef struct kmusers {
4018 uintptr_t kmu_addr; /* address of interest */
4019 const kmem_cache_t *kmu_cache; /* Current kmem cache */
4020 kmowner_t *kmu_hash; /* Hash table of owners */
4021 int kmu_nelems; /* Number of entries in use */
4022 int kmu_size; /* Total number of entries */
4023 } kmusers_t;
4024
4025 static void
kmu_add(kmusers_t * kmu,const kmem_bufctl_audit_t * bcp,size_t size,size_t data_size)4026 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
4027 size_t size, size_t data_size)
4028 {
4029 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4030 size_t bucket, signature = data_size;
4031 kmowner_t *kmo, *kmoend;
4032
4033 /*
4034 * If the hash table is full, double its size and rehash everything.
4035 */
4036 if (kmu->kmu_nelems >= kmu->kmu_size) {
4037 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
4038
4039 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
4040 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
4041 kmu->kmu_hash = kmo;
4042 kmu->kmu_size = s;
4043
4044 kmoend = kmu->kmu_hash + kmu->kmu_size;
4045 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
4046 kmo->kmo_head = NULL;
4047
4048 kmoend = kmu->kmu_hash + kmu->kmu_nelems;
4049 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
4050 bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
4051 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4052 kmu->kmu_hash[bucket].kmo_head = kmo;
4053 }
4054 }
4055
4056 /*
4057 * Finish computing the hash signature from the stack trace, and then
4058 * see if the owner is in the hash table. If so, update our stats.
4059 */
4060 for (i = 0; i < depth; i++)
4061 signature += bcp->bc_stack[i];
4062
4063 bucket = signature & (kmu->kmu_size - 1);
4064
4065 for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
4066 if (kmo->kmo_signature == signature) {
4067 size_t difference = 0;
4068
4069 difference |= kmo->kmo_data_size - data_size;
4070 difference |= kmo->kmo_depth - depth;
4071
4072 for (i = 0; i < depth; i++) {
4073 difference |= kmo->kmo_stack[i] -
4074 bcp->bc_stack[i];
4075 }
4076
4077 if (difference == 0) {
4078 kmo->kmo_total_size += size;
4079 kmo->kmo_num++;
4080 return;
4081 }
4082 }
4083 }
4084
4085 /*
4086 * If the owner is not yet hashed, grab the next element and fill it
4087 * in based on the allocation information.
4088 */
4089 kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
4090 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4091 kmu->kmu_hash[bucket].kmo_head = kmo;
4092
4093 kmo->kmo_signature = signature;
4094 kmo->kmo_num = 1;
4095 kmo->kmo_data_size = data_size;
4096 kmo->kmo_total_size = size;
4097 kmo->kmo_depth = depth;
4098
4099 for (i = 0; i < depth; i++)
4100 kmo->kmo_stack[i] = bcp->bc_stack[i];
4101 }
4102
4103 /*
4104 * When ::kmausers is invoked without the -f flag, we simply update our hash
4105 * table with the information from each allocated bufctl.
4106 */
4107 /*ARGSUSED*/
4108 static int
kmause1(uintptr_t addr,const kmem_bufctl_audit_t * bcp,kmusers_t * kmu)4109 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4110 {
4111 const kmem_cache_t *cp = kmu->kmu_cache;
4112
4113 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4114 return (WALK_NEXT);
4115 }
4116
4117 /*
4118 * When ::kmausers is invoked with the -f flag, we print out the information
4119 * for each bufctl as well as updating the hash table.
4120 */
4121 static int
kmause2(uintptr_t addr,const kmem_bufctl_audit_t * bcp,kmusers_t * kmu)4122 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4123 {
4124 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4125 const kmem_cache_t *cp = kmu->kmu_cache;
4126 kmem_bufctl_t bufctl;
4127
4128 if (kmu->kmu_addr) {
4129 if (mdb_vread(&bufctl, sizeof (bufctl), addr) == -1)
4130 mdb_warn("couldn't read bufctl at %p", addr);
4131 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
4132 kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
4133 cp->cache_bufsize)
4134 return (WALK_NEXT);
4135 }
4136
4137 mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4138 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
4139
4140 for (i = 0; i < depth; i++)
4141 mdb_printf("\t %a\n", bcp->bc_stack[i]);
4142
4143 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4144 return (WALK_NEXT);
4145 }
4146
4147 /*
4148 * We sort our results by allocation size before printing them.
4149 */
4150 static int
kmownercmp(const void * lp,const void * rp)4151 kmownercmp(const void *lp, const void *rp)
4152 {
4153 const kmowner_t *lhs = lp;
4154 const kmowner_t *rhs = rp;
4155
4156 return (rhs->kmo_total_size - lhs->kmo_total_size);
4157 }
4158
4159 /*
4160 * The main engine of ::kmausers is relatively straightforward: First we
4161 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4162 * iterate over the allocated bufctls of each cache in the list. Finally,
4163 * we sort and print our results.
4164 */
4165 /*ARGSUSED*/
4166 int
kmausers(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)4167 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4168 {
4169 int mem_threshold = 8192; /* Minimum # bytes for printing */
4170 int cnt_threshold = 100; /* Minimum # blocks for printing */
4171 int audited_caches = 0; /* Number of KMF_AUDIT caches found */
4172 int do_all_caches = 1; /* Do all caches (no arguments) */
4173 int opt_e = FALSE; /* Include "small" users */
4174 int opt_f = FALSE; /* Print stack traces */
4175
4176 mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
4177 kmowner_t *kmo, *kmoend;
4178 int i, oelems;
4179
4180 kmclist_t kmc;
4181 kmusers_t kmu;
4182
4183 bzero(&kmc, sizeof (kmc));
4184 bzero(&kmu, sizeof (kmu));
4185
4186 while ((i = mdb_getopts(argc, argv,
4187 'e', MDB_OPT_SETBITS, TRUE, &opt_e,
4188 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
4189
4190 argv += i; /* skip past options we just processed */
4191 argc -= i; /* adjust argc */
4192
4193 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
4194 return (DCMD_USAGE);
4195
4196 oelems = kmc.kmc_nelems;
4197 kmc.kmc_name = argv->a_un.a_str;
4198 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4199
4200 if (kmc.kmc_nelems == oelems) {
4201 mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
4202 return (DCMD_ERR);
4203 }
4204
4205 do_all_caches = 0;
4206 argv++;
4207 argc--;
4208 }
4209
4210 if (flags & DCMD_ADDRSPEC) {
4211 opt_f = TRUE;
4212 kmu.kmu_addr = addr;
4213 } else {
4214 kmu.kmu_addr = 0;
4215 }
4216
4217 if (opt_e)
4218 mem_threshold = cnt_threshold = 0;
4219
4220 if (opt_f)
4221 callback = (mdb_walk_cb_t)kmause2;
4222
4223 if (do_all_caches) {
4224 kmc.kmc_name = NULL; /* match all cache names */
4225 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4226 }
4227
4228 for (i = 0; i < kmc.kmc_nelems; i++) {
4229 uintptr_t cp = kmc.kmc_caches[i];
4230 kmem_cache_t c;
4231
4232 if (mdb_vread(&c, sizeof (c), cp) == -1) {
4233 mdb_warn("failed to read cache at %p", cp);
4234 continue;
4235 }
4236
4237 if (!(c.cache_flags & KMF_AUDIT)) {
4238 if (!do_all_caches) {
4239 mdb_warn("KMF_AUDIT is not enabled for %s\n",
4240 c.cache_name);
4241 }
4242 continue;
4243 }
4244
4245 kmu.kmu_cache = &c;
4246 (void) mdb_pwalk("bufctl", callback, &kmu, cp);
4247 audited_caches++;
4248 }
4249
4250 if (audited_caches == 0 && do_all_caches) {
4251 mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4252 return (DCMD_ERR);
4253 }
4254
4255 qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
4256 kmoend = kmu.kmu_hash + kmu.kmu_nelems;
4257
4258 for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
4259 if (kmo->kmo_total_size < mem_threshold &&
4260 kmo->kmo_num < cnt_threshold)
4261 continue;
4262 mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4263 kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
4264 for (i = 0; i < kmo->kmo_depth; i++)
4265 mdb_printf("\t %a\n", kmo->kmo_stack[i]);
4266 }
4267
4268 return (DCMD_OK);
4269 }
4270
4271 void
kmausers_help(void)4272 kmausers_help(void)
4273 {
4274 mdb_printf(
4275 "Displays the largest users of the kmem allocator, sorted by \n"
4276 "trace. If one or more caches is specified, only those caches\n"
4277 "will be searched. By default, all caches are searched. If an\n"
4278 "address is specified, then only those allocations which include\n"
4279 "the given address are displayed. Specifying an address implies\n"
4280 "-f.\n"
4281 "\n"
4282 "\t-e\tInclude all users, not just the largest\n"
4283 "\t-f\tDisplay individual allocations. By default, users are\n"
4284 "\t\tgrouped by stack\n");
4285 }
4286
4287 static int
kmem_ready_check(void)4288 kmem_ready_check(void)
4289 {
4290 int ready;
4291
4292 if (mdb_readvar(&ready, "kmem_ready") < 0)
4293 return (-1); /* errno is set for us */
4294
4295 return (ready);
4296 }
4297
4298 void
kmem_statechange(void)4299 kmem_statechange(void)
4300 {
4301 static int been_ready = 0;
4302
4303 if (been_ready)
4304 return;
4305
4306 if (kmem_ready_check() <= 0)
4307 return;
4308
4309 been_ready = 1;
4310 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
4311 }
4312
4313 void
kmem_init(void)4314 kmem_init(void)
4315 {
4316 mdb_walker_t w = {
4317 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
4318 list_walk_step, list_walk_fini
4319 };
4320
4321 /*
4322 * If kmem is ready, we'll need to invoke the kmem_cache walker
4323 * immediately. Walkers in the linkage structure won't be ready until
4324 * _mdb_init returns, so we'll need to add this one manually. If kmem
4325 * is ready, we'll use the walker to initialize the caches. If kmem
4326 * isn't ready, we'll register a callback that will allow us to defer
4327 * cache walking until it is.
4328 */
4329 if (mdb_add_walker(&w) != 0) {
4330 mdb_warn("failed to add kmem_cache walker");
4331 return;
4332 }
4333
4334 kmem_statechange();
4335
4336 /* register our ::whatis handlers */
4337 mdb_whatis_register("modules", whatis_run_modules, NULL,
4338 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4339 mdb_whatis_register("threads", whatis_run_threads, NULL,
4340 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4341 mdb_whatis_register("pages", whatis_run_pages, NULL,
4342 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4343 mdb_whatis_register("kmem", whatis_run_kmem, NULL,
4344 WHATIS_PRIO_ALLOCATOR, 0);
4345 mdb_whatis_register("vmem", whatis_run_vmem, NULL,
4346 WHATIS_PRIO_ALLOCATOR, 0);
4347 }
4348
4349 typedef struct whatthread {
4350 uintptr_t wt_target;
4351 int wt_verbose;
4352 } whatthread_t;
4353
4354 static int
whatthread_walk_thread(uintptr_t addr,const kthread_t * t,whatthread_t * w)4355 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
4356 {
4357 uintptr_t current, data;
4358
4359 if (t->t_stkbase == NULL)
4360 return (WALK_NEXT);
4361
4362 /*
4363 * Warn about swapped out threads, but drive on anyway
4364 */
4365 if (!(t->t_schedflag & TS_LOAD)) {
4366 mdb_warn("thread %p's stack swapped out\n", addr);
4367 return (WALK_NEXT);
4368 }
4369
4370 /*
4371 * Search the thread's stack for the given pointer. Note that it would
4372 * be more efficient to follow ::kgrep's lead and read in page-sized
4373 * chunks, but this routine is already fast and simple.
4374 */
4375 for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
4376 current += sizeof (uintptr_t)) {
4377 if (mdb_vread(&data, sizeof (data), current) == -1) {
4378 mdb_warn("couldn't read thread %p's stack at %p",
4379 addr, current);
4380 return (WALK_ERR);
4381 }
4382
4383 if (data == w->wt_target) {
4384 if (w->wt_verbose) {
4385 mdb_printf("%p in thread %p's stack%s\n",
4386 current, addr, stack_active(t, current));
4387 } else {
4388 mdb_printf("%#lr\n", addr);
4389 return (WALK_NEXT);
4390 }
4391 }
4392 }
4393
4394 return (WALK_NEXT);
4395 }
4396
4397 int
whatthread(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)4398 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4399 {
4400 whatthread_t w;
4401
4402 if (!(flags & DCMD_ADDRSPEC))
4403 return (DCMD_USAGE);
4404
4405 w.wt_verbose = FALSE;
4406 w.wt_target = addr;
4407
4408 if (mdb_getopts(argc, argv,
4409 'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
4410 return (DCMD_USAGE);
4411
4412 if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
4413 == -1) {
4414 mdb_warn("couldn't walk threads");
4415 return (DCMD_ERR);
4416 }
4417
4418 return (DCMD_OK);
4419 }
4420