1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright 2018 Joyent, Inc. All rights reserved.
28 * Copyright (c) 2012 by Delphix. All rights reserved.
29 * Copyright 2025 Oxide Computer Company
30 */
31
32 #include <mdb/mdb_param.h>
33 #include <mdb/mdb_modapi.h>
34 #include <mdb/mdb_ctf.h>
35 #include <mdb/mdb_whatis.h>
36 #include <sys/cpuvar.h>
37 #include <sys/kmem_impl.h>
38 #include <sys/vmem_impl.h>
39 #include <sys/machelf.h>
40 #include <sys/modctl.h>
41 #include <sys/kobj.h>
42 #include <sys/panic.h>
43 #include <sys/stack.h>
44 #include <sys/sysmacros.h>
45 #include <vm/page.h>
46
47 #include "avl.h"
48 #include "combined.h"
49 #include "dist.h"
50 #include "kmem.h"
51 #include "list.h"
52
53 #define dprintf(x) if (mdb_debug_level) { \
54 mdb_printf("kmem debug: "); \
55 /*CSTYLED*/\
56 mdb_printf x ;\
57 }
58
59 #define KM_ALLOCATED 0x01
60 #define KM_FREE 0x02
61 #define KM_BUFCTL 0x04
62 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */
63 #define KM_HASH 0x10
64
65 static int mdb_debug_level = 0;
66
67 /*ARGSUSED*/
68 static int
kmem_init_walkers(uintptr_t addr,const kmem_cache_t * c,void * ignored)69 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
70 {
71 mdb_walker_t w;
72 char descr[64];
73
74 (void) mdb_snprintf(descr, sizeof (descr),
75 "walk the %s cache", c->cache_name);
76
77 w.walk_name = c->cache_name;
78 w.walk_descr = descr;
79 w.walk_init = kmem_walk_init;
80 w.walk_step = kmem_walk_step;
81 w.walk_fini = kmem_walk_fini;
82 w.walk_init_arg = (void *)addr;
83
84 if (mdb_add_walker(&w) == -1)
85 mdb_warn("failed to add %s walker", c->cache_name);
86
87 return (WALK_NEXT);
88 }
89
90 /*ARGSUSED*/
91 int
kmem_debug(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)92 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
93 {
94 mdb_debug_level ^= 1;
95
96 mdb_printf("kmem: debugging is now %s\n",
97 mdb_debug_level ? "on" : "off");
98
99 return (DCMD_OK);
100 }
101
102 int
kmem_cache_walk_init(mdb_walk_state_t * wsp)103 kmem_cache_walk_init(mdb_walk_state_t *wsp)
104 {
105 GElf_Sym sym;
106
107 if (mdb_lookup_by_name("kmem_caches", &sym) == -1) {
108 mdb_warn("couldn't find kmem_caches");
109 return (WALK_ERR);
110 }
111
112 wsp->walk_addr = (uintptr_t)sym.st_value;
113
114 return (list_walk_init_named(wsp, "cache list", "cache"));
115 }
116
117 int
kmem_cpu_cache_walk_init(mdb_walk_state_t * wsp)118 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
119 {
120 if (wsp->walk_addr == 0) {
121 mdb_warn("kmem_cpu_cache doesn't support global walks");
122 return (WALK_ERR);
123 }
124
125 if (mdb_layered_walk("cpu", wsp) == -1) {
126 mdb_warn("couldn't walk 'cpu'");
127 return (WALK_ERR);
128 }
129
130 wsp->walk_data = (void *)wsp->walk_addr;
131
132 return (WALK_NEXT);
133 }
134
135 int
kmem_cpu_cache_walk_step(mdb_walk_state_t * wsp)136 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
137 {
138 uintptr_t caddr = (uintptr_t)wsp->walk_data;
139 const cpu_t *cpu = wsp->walk_layer;
140 kmem_cpu_cache_t cc;
141
142 caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]);
143
144 if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
145 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
146 return (WALK_ERR);
147 }
148
149 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
150 }
151
152 static int
kmem_slab_check(void * p,uintptr_t saddr,void * arg)153 kmem_slab_check(void *p, uintptr_t saddr, void *arg)
154 {
155 kmem_slab_t *sp = p;
156 uintptr_t caddr = (uintptr_t)arg;
157 if ((uintptr_t)sp->slab_cache != caddr) {
158 mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
159 saddr, caddr, sp->slab_cache);
160 return (-1);
161 }
162
163 return (0);
164 }
165
166 static int
kmem_partial_slab_check(void * p,uintptr_t saddr,void * arg)167 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg)
168 {
169 kmem_slab_t *sp = p;
170
171 int rc = kmem_slab_check(p, saddr, arg);
172 if (rc != 0) {
173 return (rc);
174 }
175
176 if (!KMEM_SLAB_IS_PARTIAL(sp)) {
177 mdb_warn("slab %p is not a partial slab\n", saddr);
178 return (-1);
179 }
180
181 return (0);
182 }
183
184 static int
kmem_complete_slab_check(void * p,uintptr_t saddr,void * arg)185 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg)
186 {
187 kmem_slab_t *sp = p;
188
189 int rc = kmem_slab_check(p, saddr, arg);
190 if (rc != 0) {
191 return (rc);
192 }
193
194 if (!KMEM_SLAB_IS_ALL_USED(sp)) {
195 mdb_warn("slab %p is not completely allocated\n", saddr);
196 return (-1);
197 }
198
199 return (0);
200 }
201
202 typedef struct {
203 uintptr_t kns_cache_addr;
204 int kns_nslabs;
205 } kmem_nth_slab_t;
206
207 static int
kmem_nth_slab_check(void * p,uintptr_t saddr,void * arg)208 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg)
209 {
210 kmem_nth_slab_t *chkp = arg;
211
212 int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr);
213 if (rc != 0) {
214 return (rc);
215 }
216
217 return (chkp->kns_nslabs-- == 0 ? 1 : 0);
218 }
219
220 static int
kmem_complete_slab_walk_init(mdb_walk_state_t * wsp)221 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp)
222 {
223 uintptr_t caddr = wsp->walk_addr;
224
225 wsp->walk_addr = (uintptr_t)(caddr +
226 offsetof(kmem_cache_t, cache_complete_slabs));
227
228 return (list_walk_init_checked(wsp, "slab list", "slab",
229 kmem_complete_slab_check, (void *)caddr));
230 }
231
232 static int
kmem_partial_slab_walk_init(mdb_walk_state_t * wsp)233 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp)
234 {
235 uintptr_t caddr = wsp->walk_addr;
236
237 wsp->walk_addr = (uintptr_t)(caddr +
238 offsetof(kmem_cache_t, cache_partial_slabs));
239
240 return (avl_walk_init_checked(wsp, "slab list", "slab",
241 kmem_partial_slab_check, (void *)caddr));
242 }
243
244 int
kmem_slab_walk_init(mdb_walk_state_t * wsp)245 kmem_slab_walk_init(mdb_walk_state_t *wsp)
246 {
247 uintptr_t caddr = wsp->walk_addr;
248
249 if (caddr == 0) {
250 mdb_warn("kmem_slab doesn't support global walks\n");
251 return (WALK_ERR);
252 }
253
254 combined_walk_init(wsp);
255 combined_walk_add(wsp,
256 kmem_complete_slab_walk_init, list_walk_step, list_walk_fini);
257 combined_walk_add(wsp,
258 kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini);
259
260 return (WALK_NEXT);
261 }
262
263 static int
kmem_first_complete_slab_walk_init(mdb_walk_state_t * wsp)264 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp)
265 {
266 uintptr_t caddr = wsp->walk_addr;
267 kmem_nth_slab_t *chk;
268
269 chk = mdb_alloc(sizeof (kmem_nth_slab_t),
270 UM_SLEEP | UM_GC);
271 chk->kns_cache_addr = caddr;
272 chk->kns_nslabs = 1;
273 wsp->walk_addr = (uintptr_t)(caddr +
274 offsetof(kmem_cache_t, cache_complete_slabs));
275
276 return (list_walk_init_checked(wsp, "slab list", "slab",
277 kmem_nth_slab_check, chk));
278 }
279
280 int
kmem_slab_walk_partial_init(mdb_walk_state_t * wsp)281 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
282 {
283 uintptr_t caddr = wsp->walk_addr;
284 kmem_cache_t c;
285
286 if (caddr == 0) {
287 mdb_warn("kmem_slab_partial doesn't support global walks\n");
288 return (WALK_ERR);
289 }
290
291 if (mdb_vread(&c, sizeof (c), caddr) == -1) {
292 mdb_warn("couldn't read kmem_cache at %p", caddr);
293 return (WALK_ERR);
294 }
295
296 combined_walk_init(wsp);
297
298 /*
299 * Some consumers (umem_walk_step(), in particular) require at
300 * least one callback if there are any buffers in the cache. So
301 * if there are *no* partial slabs, report the first full slab, if
302 * any.
303 *
304 * Yes, this is ugly, but it's cleaner than the other possibilities.
305 */
306 if (c.cache_partial_slabs.avl_numnodes == 0) {
307 combined_walk_add(wsp, kmem_first_complete_slab_walk_init,
308 list_walk_step, list_walk_fini);
309 } else {
310 combined_walk_add(wsp, kmem_partial_slab_walk_init,
311 avl_walk_step, avl_walk_fini);
312 }
313
314 return (WALK_NEXT);
315 }
316
317 int
kmem_cache(uintptr_t addr,uint_t flags,int ac,const mdb_arg_t * argv)318 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
319 {
320 kmem_cache_t c;
321 const char *filter = NULL;
322
323 if (mdb_getopts(ac, argv,
324 'n', MDB_OPT_STR, &filter,
325 NULL) != ac) {
326 return (DCMD_USAGE);
327 }
328
329 if (!(flags & DCMD_ADDRSPEC)) {
330 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
331 mdb_warn("can't walk kmem_cache");
332 return (DCMD_ERR);
333 }
334 return (DCMD_OK);
335 }
336
337 if (DCMD_HDRSPEC(flags))
338 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
339 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
340
341 if (mdb_vread(&c, sizeof (c), addr) == -1) {
342 mdb_warn("couldn't read kmem_cache at %p", addr);
343 return (DCMD_ERR);
344 }
345
346 if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL))
347 return (DCMD_OK);
348
349 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
350 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
351
352 return (DCMD_OK);
353 }
354
355 void
kmem_cache_help(void)356 kmem_cache_help(void)
357 {
358 mdb_printf("%s", "Print kernel memory caches.\n\n");
359 mdb_dec_indent(2);
360 mdb_printf("%<b>OPTIONS%</b>\n");
361 mdb_inc_indent(2);
362 mdb_printf("%s",
363 " -n name\n"
364 " name of kmem cache (or matching partial name)\n"
365 "\n"
366 "Column\tDescription\n"
367 "\n"
368 "ADDR\t\taddress of kmem cache\n"
369 "NAME\t\tname of kmem cache\n"
370 "FLAG\t\tvarious cache state flags\n"
371 "CFLAG\t\tcache creation flags\n"
372 "BUFSIZE\tobject size in bytes\n"
373 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
374 }
375
376 #define LABEL_WIDTH 11
377 static void
kmem_slabs_print_dist(uint_t * ks_bucket,size_t buffers_per_slab,size_t maxbuckets,size_t minbucketsize)378 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab,
379 size_t maxbuckets, size_t minbucketsize)
380 {
381 uint64_t total;
382 int buckets;
383 int i;
384 const int *distarray;
385 int complete[2];
386
387 buckets = buffers_per_slab;
388
389 total = 0;
390 for (i = 0; i <= buffers_per_slab; i++)
391 total += ks_bucket[i];
392
393 if (maxbuckets > 1)
394 buckets = MIN(buckets, maxbuckets);
395
396 if (minbucketsize > 1) {
397 /*
398 * minbucketsize does not apply to the first bucket reserved
399 * for completely allocated slabs
400 */
401 buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) /
402 minbucketsize));
403 if ((buckets < 2) && (buffers_per_slab > 1)) {
404 buckets = 2;
405 minbucketsize = (buffers_per_slab - 1);
406 }
407 }
408
409 /*
410 * The first printed bucket is reserved for completely allocated slabs.
411 * Passing (buckets - 1) excludes that bucket from the generated
412 * distribution, since we're handling it as a special case.
413 */
414 complete[0] = buffers_per_slab;
415 complete[1] = buffers_per_slab + 1;
416 distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1);
417
418 mdb_printf("%*s\n", LABEL_WIDTH, "Allocated");
419 dist_print_header("Buffers", LABEL_WIDTH, "Slabs");
420
421 dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH);
422 /*
423 * Print bucket ranges in descending order after the first bucket for
424 * completely allocated slabs, so a person can see immediately whether
425 * or not there is fragmentation without having to scan possibly
426 * multiple screens of output. Starting at (buckets - 2) excludes the
427 * extra terminating bucket.
428 */
429 for (i = buckets - 2; i >= 0; i--) {
430 dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH);
431 }
432 mdb_printf("\n");
433 }
434 #undef LABEL_WIDTH
435
436 /*ARGSUSED*/
437 static int
kmem_first_slab(uintptr_t addr,const kmem_slab_t * sp,boolean_t * is_slab)438 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab)
439 {
440 *is_slab = B_TRUE;
441 return (WALK_DONE);
442 }
443
444 /*ARGSUSED*/
445 static int
kmem_first_partial_slab(uintptr_t addr,const kmem_slab_t * sp,boolean_t * is_slab)446 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp,
447 boolean_t *is_slab)
448 {
449 /*
450 * The "kmem_partial_slab" walker reports the first full slab if there
451 * are no partial slabs (for the sake of consumers that require at least
452 * one callback if there are any buffers in the cache).
453 */
454 *is_slab = KMEM_SLAB_IS_PARTIAL(sp);
455 return (WALK_DONE);
456 }
457
458 typedef struct kmem_slab_usage {
459 int ksu_refcnt; /* count of allocated buffers on slab */
460 boolean_t ksu_nomove; /* slab marked non-reclaimable */
461 } kmem_slab_usage_t;
462
463 typedef struct kmem_slab_stats {
464 const kmem_cache_t *ks_cp;
465 int ks_slabs; /* slabs in cache */
466 int ks_partial_slabs; /* partially allocated slabs in cache */
467 uint64_t ks_unused_buffers; /* total unused buffers in cache */
468 int ks_max_buffers_per_slab; /* max buffers per slab */
469 int ks_usage_len; /* ks_usage array length */
470 kmem_slab_usage_t *ks_usage; /* partial slab usage */
471 uint_t *ks_bucket; /* slab usage distribution */
472 } kmem_slab_stats_t;
473
474 /*ARGSUSED*/
475 static int
kmem_slablist_stat(uintptr_t addr,const kmem_slab_t * sp,kmem_slab_stats_t * ks)476 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp,
477 kmem_slab_stats_t *ks)
478 {
479 kmem_slab_usage_t *ksu;
480 long unused;
481
482 ks->ks_slabs++;
483 ks->ks_bucket[sp->slab_refcnt]++;
484
485 unused = (sp->slab_chunks - sp->slab_refcnt);
486 if (unused == 0) {
487 return (WALK_NEXT);
488 }
489
490 ks->ks_partial_slabs++;
491 ks->ks_unused_buffers += unused;
492
493 if (ks->ks_partial_slabs > ks->ks_usage_len) {
494 kmem_slab_usage_t *usage;
495 int len = ks->ks_usage_len;
496
497 len = (len == 0 ? 16 : len * 2);
498 usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP);
499 if (ks->ks_usage != NULL) {
500 bcopy(ks->ks_usage, usage,
501 ks->ks_usage_len * sizeof (kmem_slab_usage_t));
502 mdb_free(ks->ks_usage,
503 ks->ks_usage_len * sizeof (kmem_slab_usage_t));
504 }
505 ks->ks_usage = usage;
506 ks->ks_usage_len = len;
507 }
508
509 ksu = &ks->ks_usage[ks->ks_partial_slabs - 1];
510 ksu->ksu_refcnt = sp->slab_refcnt;
511 ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE);
512 return (WALK_NEXT);
513 }
514
515 static void
kmem_slabs_header()516 kmem_slabs_header()
517 {
518 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
519 "", "", "Partial", "", "Unused", "");
520 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
521 "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
522 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
523 "-------------------------", "--------", "--------", "---------",
524 "---------", "------");
525 }
526
527 int
kmem_slabs(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)528 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
529 {
530 kmem_cache_t c;
531 kmem_slab_stats_t stats;
532 mdb_walk_cb_t cb;
533 int pct;
534 int tenths_pct;
535 size_t maxbuckets = 1;
536 size_t minbucketsize = 0;
537 const char *filter = NULL;
538 const char *name = NULL;
539 uint_t opt_v = FALSE;
540 boolean_t buckets = B_FALSE;
541 boolean_t skip = B_FALSE;
542
543 if (mdb_getopts(argc, argv,
544 'B', MDB_OPT_UINTPTR, &minbucketsize,
545 'b', MDB_OPT_UINTPTR, &maxbuckets,
546 'n', MDB_OPT_STR, &filter,
547 'N', MDB_OPT_STR, &name,
548 'v', MDB_OPT_SETBITS, TRUE, &opt_v,
549 NULL) != argc) {
550 return (DCMD_USAGE);
551 }
552
553 if ((maxbuckets != 1) || (minbucketsize != 0)) {
554 buckets = B_TRUE;
555 }
556
557 if (!(flags & DCMD_ADDRSPEC)) {
558 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc,
559 argv) == -1) {
560 mdb_warn("can't walk kmem_cache");
561 return (DCMD_ERR);
562 }
563 return (DCMD_OK);
564 }
565
566 if (mdb_vread(&c, sizeof (c), addr) == -1) {
567 mdb_warn("couldn't read kmem_cache at %p", addr);
568 return (DCMD_ERR);
569 }
570
571 if (name == NULL) {
572 skip = ((filter != NULL) &&
573 (strstr(c.cache_name, filter) == NULL));
574 } else if (filter == NULL) {
575 skip = (strcmp(c.cache_name, name) != 0);
576 } else {
577 /* match either -n or -N */
578 skip = ((strcmp(c.cache_name, name) != 0) &&
579 (strstr(c.cache_name, filter) == NULL));
580 }
581
582 if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) {
583 kmem_slabs_header();
584 } else if ((opt_v || buckets) && !skip) {
585 if (DCMD_HDRSPEC(flags)) {
586 kmem_slabs_header();
587 } else {
588 boolean_t is_slab = B_FALSE;
589 const char *walker_name;
590 if (opt_v) {
591 cb = (mdb_walk_cb_t)kmem_first_partial_slab;
592 walker_name = "kmem_slab_partial";
593 } else {
594 cb = (mdb_walk_cb_t)kmem_first_slab;
595 walker_name = "kmem_slab";
596 }
597 (void) mdb_pwalk(walker_name, cb, &is_slab, addr);
598 if (is_slab) {
599 kmem_slabs_header();
600 }
601 }
602 }
603
604 if (skip) {
605 return (DCMD_OK);
606 }
607
608 bzero(&stats, sizeof (kmem_slab_stats_t));
609 stats.ks_cp = &c;
610 stats.ks_max_buffers_per_slab = c.cache_maxchunks;
611 /* +1 to include a zero bucket */
612 stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) *
613 sizeof (*stats.ks_bucket), UM_SLEEP);
614 cb = (mdb_walk_cb_t)kmem_slablist_stat;
615 (void) mdb_pwalk("kmem_slab", cb, &stats, addr);
616
617 if (c.cache_buftotal == 0) {
618 pct = 0;
619 tenths_pct = 0;
620 } else {
621 uint64_t n = stats.ks_unused_buffers * 10000;
622 pct = (int)(n / c.cache_buftotal);
623 tenths_pct = pct - ((pct / 100) * 100);
624 tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */
625 if (tenths_pct == 10) {
626 pct += 100;
627 tenths_pct = 0;
628 }
629 }
630
631 pct /= 100;
632 mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name,
633 stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal,
634 stats.ks_unused_buffers, pct, tenths_pct);
635
636 if (maxbuckets == 0) {
637 maxbuckets = stats.ks_max_buffers_per_slab;
638 }
639
640 if (((maxbuckets > 1) || (minbucketsize > 0)) &&
641 (stats.ks_slabs > 0)) {
642 mdb_printf("\n");
643 kmem_slabs_print_dist(stats.ks_bucket,
644 stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize);
645 }
646
647 mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) *
648 sizeof (*stats.ks_bucket));
649
650 if (!opt_v) {
651 return (DCMD_OK);
652 }
653
654 if (opt_v && (stats.ks_partial_slabs > 0)) {
655 int i;
656 kmem_slab_usage_t *ksu;
657
658 mdb_printf(" %d complete (%d), %d partial:",
659 (stats.ks_slabs - stats.ks_partial_slabs),
660 stats.ks_max_buffers_per_slab,
661 stats.ks_partial_slabs);
662
663 for (i = 0; i < stats.ks_partial_slabs; i++) {
664 ksu = &stats.ks_usage[i];
665 mdb_printf(" %d%s", ksu->ksu_refcnt,
666 (ksu->ksu_nomove ? "*" : ""));
667 }
668 mdb_printf("\n\n");
669 }
670
671 if (stats.ks_usage_len > 0) {
672 mdb_free(stats.ks_usage,
673 stats.ks_usage_len * sizeof (kmem_slab_usage_t));
674 }
675
676 return (DCMD_OK);
677 }
678
679 void
kmem_slabs_help(void)680 kmem_slabs_help(void)
681 {
682 mdb_printf("%s",
683 "Display slab usage per kmem cache.\n\n");
684 mdb_dec_indent(2);
685 mdb_printf("%<b>OPTIONS%</b>\n");
686 mdb_inc_indent(2);
687 mdb_printf("%s",
688 " -n name\n"
689 " name of kmem cache (or matching partial name)\n"
690 " -N name\n"
691 " exact name of kmem cache\n"
692 " -b maxbins\n"
693 " Print a distribution of allocated buffers per slab using at\n"
694 " most maxbins bins. The first bin is reserved for completely\n"
695 " allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
696 " effect as specifying the maximum allocated buffers per slab\n"
697 " or setting minbinsize to 1 (-B 1).\n"
698 " -B minbinsize\n"
699 " Print a distribution of allocated buffers per slab, making\n"
700 " all bins (except the first, reserved for completely allocated\n"
701 " slabs) at least minbinsize buffers apart.\n"
702 " -v verbose output: List the allocated buffer count of each partial\n"
703 " slab on the free list in order from front to back to show how\n"
704 " closely the slabs are ordered by usage. For example\n"
705 "\n"
706 " 10 complete, 3 partial (8): 7 3 1\n"
707 "\n"
708 " means there are thirteen slabs with eight buffers each, including\n"
709 " three partially allocated slabs with less than all eight buffers\n"
710 " allocated.\n"
711 "\n"
712 " Buffer allocations are always from the front of the partial slab\n"
713 " list. When a buffer is freed from a completely used slab, that\n"
714 " slab is added to the front of the partial slab list. Assuming\n"
715 " that all buffers are equally likely to be freed soon, the\n"
716 " desired order of partial slabs is most-used at the front of the\n"
717 " list and least-used at the back (as in the example above).\n"
718 " However, if a slab contains an allocated buffer that will not\n"
719 " soon be freed, it would be better for that slab to be at the\n"
720 " front where all of its buffers can be allocated. Taking a slab\n"
721 " off the partial slab list (either with all buffers freed or all\n"
722 " buffers allocated) reduces cache fragmentation.\n"
723 "\n"
724 " A slab's allocated buffer count representing a partial slab (9 in\n"
725 " the example below) may be marked as follows:\n"
726 "\n"
727 " 9* An asterisk indicates that kmem has marked the slab non-\n"
728 " reclaimable because the kmem client refused to move one of the\n"
729 " slab's buffers. Since kmem does not expect to completely free the\n"
730 " slab, it moves it to the front of the list in the hope of\n"
731 " completely allocating it instead. A slab marked with an asterisk\n"
732 " stays marked for as long as it remains on the partial slab list.\n"
733 "\n"
734 "Column\t\tDescription\n"
735 "\n"
736 "Cache Name\t\tname of kmem cache\n"
737 "Slabs\t\t\ttotal slab count\n"
738 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
739 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
740 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
741 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
742 "\t\t\t for accounting structures (debug mode), slab\n"
743 "\t\t\t coloring (incremental small offsets to stagger\n"
744 "\t\t\t buffer alignment), or the per-CPU magazine layer\n");
745 }
746
747 static int
addrcmp(const void * lhs,const void * rhs)748 addrcmp(const void *lhs, const void *rhs)
749 {
750 uintptr_t p1 = *((uintptr_t *)lhs);
751 uintptr_t p2 = *((uintptr_t *)rhs);
752
753 if (p1 < p2)
754 return (-1);
755 if (p1 > p2)
756 return (1);
757 return (0);
758 }
759
760 static int
bufctlcmp(const kmem_bufctl_audit_t ** lhs,const kmem_bufctl_audit_t ** rhs)761 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
762 {
763 const kmem_bufctl_audit_t *bcp1 = *lhs;
764 const kmem_bufctl_audit_t *bcp2 = *rhs;
765
766 if (bcp1->bc_timestamp > bcp2->bc_timestamp)
767 return (-1);
768
769 if (bcp1->bc_timestamp < bcp2->bc_timestamp)
770 return (1);
771
772 return (0);
773 }
774
775 typedef struct kmem_hash_walk {
776 uintptr_t *kmhw_table;
777 size_t kmhw_nelems;
778 size_t kmhw_pos;
779 kmem_bufctl_t kmhw_cur;
780 } kmem_hash_walk_t;
781
782 int
kmem_hash_walk_init(mdb_walk_state_t * wsp)783 kmem_hash_walk_init(mdb_walk_state_t *wsp)
784 {
785 kmem_hash_walk_t *kmhw;
786 uintptr_t *hash;
787 kmem_cache_t c;
788 uintptr_t haddr, addr = wsp->walk_addr;
789 size_t nelems;
790 size_t hsize;
791
792 if (addr == 0) {
793 mdb_warn("kmem_hash doesn't support global walks\n");
794 return (WALK_ERR);
795 }
796
797 if (mdb_vread(&c, sizeof (c), addr) == -1) {
798 mdb_warn("couldn't read cache at addr %p", addr);
799 return (WALK_ERR);
800 }
801
802 if (!(c.cache_flags & KMF_HASH)) {
803 mdb_warn("cache %p doesn't have a hash table\n", addr);
804 return (WALK_DONE); /* nothing to do */
805 }
806
807 kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
808 kmhw->kmhw_cur.bc_next = NULL;
809 kmhw->kmhw_pos = 0;
810
811 kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
812 hsize = nelems * sizeof (uintptr_t);
813 haddr = (uintptr_t)c.cache_hash_table;
814
815 kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
816 if (mdb_vread(hash, hsize, haddr) == -1) {
817 mdb_warn("failed to read hash table at %p", haddr);
818 mdb_free(hash, hsize);
819 mdb_free(kmhw, sizeof (kmem_hash_walk_t));
820 return (WALK_ERR);
821 }
822
823 wsp->walk_data = kmhw;
824
825 return (WALK_NEXT);
826 }
827
828 int
kmem_hash_walk_step(mdb_walk_state_t * wsp)829 kmem_hash_walk_step(mdb_walk_state_t *wsp)
830 {
831 kmem_hash_walk_t *kmhw = wsp->walk_data;
832 uintptr_t addr = 0;
833
834 if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == 0) {
835 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
836 if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != 0)
837 break;
838 }
839 }
840 if (addr == 0)
841 return (WALK_DONE);
842
843 if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
844 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
845 return (WALK_ERR);
846 }
847
848 return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
849 }
850
851 void
kmem_hash_walk_fini(mdb_walk_state_t * wsp)852 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
853 {
854 kmem_hash_walk_t *kmhw = wsp->walk_data;
855
856 if (kmhw == NULL)
857 return;
858
859 mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
860 mdb_free(kmhw, sizeof (kmem_hash_walk_t));
861 }
862
863 /*
864 * Find the address of the bufctl structure for the address 'buf' in cache
865 * 'cp', which is at address caddr, and place it in *out.
866 */
867 static int
kmem_hash_lookup(kmem_cache_t * cp,uintptr_t caddr,void * buf,uintptr_t * out)868 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
869 {
870 uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
871 kmem_bufctl_t *bcp;
872 kmem_bufctl_t bc;
873
874 if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
875 mdb_warn("unable to read hash bucket for %p in cache %p",
876 buf, caddr);
877 return (-1);
878 }
879
880 while (bcp != NULL) {
881 if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
882 (uintptr_t)bcp) == -1) {
883 mdb_warn("unable to read bufctl at %p", bcp);
884 return (-1);
885 }
886 if (bc.bc_addr == buf) {
887 *out = (uintptr_t)bcp;
888 return (0);
889 }
890 bcp = bc.bc_next;
891 }
892
893 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
894 return (-1);
895 }
896
897 int
kmem_get_magsize(const kmem_cache_t * cp)898 kmem_get_magsize(const kmem_cache_t *cp)
899 {
900 uintptr_t addr = (uintptr_t)cp->cache_magtype;
901 GElf_Sym mt_sym;
902 kmem_magtype_t mt;
903 int res;
904
905 /*
906 * if cpu 0 has a non-zero magsize, it must be correct. caches
907 * with KMF_NOMAGAZINE have disabled their magazine layers, so
908 * it is okay to return 0 for them.
909 */
910 if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
911 (cp->cache_flags & KMF_NOMAGAZINE))
912 return (res);
913
914 if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
915 mdb_warn("unable to read 'kmem_magtype'");
916 } else if (addr < mt_sym.st_value ||
917 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
918 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
919 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
920 cp->cache_name, addr);
921 return (0);
922 }
923 if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
924 mdb_warn("unable to read magtype at %a", addr);
925 return (0);
926 }
927 return (mt.mt_magsize);
928 }
929
930 /*ARGSUSED*/
931 static int
kmem_estimate_slab(uintptr_t addr,const kmem_slab_t * sp,size_t * est)932 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
933 {
934 *est -= (sp->slab_chunks - sp->slab_refcnt);
935
936 return (WALK_NEXT);
937 }
938
939 /*
940 * Returns an upper bound on the number of allocated buffers in a given
941 * cache.
942 */
943 size_t
kmem_estimate_allocated(uintptr_t addr,const kmem_cache_t * cp)944 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
945 {
946 int magsize;
947 size_t cache_est;
948
949 cache_est = cp->cache_buftotal;
950
951 (void) mdb_pwalk("kmem_slab_partial",
952 (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
953
954 if ((magsize = kmem_get_magsize(cp)) != 0) {
955 size_t mag_est = cp->cache_full.ml_total * magsize;
956
957 if (cache_est >= mag_est) {
958 cache_est -= mag_est;
959 } else {
960 mdb_warn("cache %p's magazine layer holds more buffers "
961 "than the slab layer.\n", addr);
962 }
963 }
964 return (cache_est);
965 }
966
967 #define READMAG_ROUNDS(rounds) { \
968 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
969 mdb_warn("couldn't read magazine at %p", kmp); \
970 goto fail; \
971 } \
972 for (i = 0; i < rounds; i++) { \
973 maglist[magcnt++] = mp->mag_round[i]; \
974 if (magcnt == magmax) { \
975 mdb_warn("%d magazines exceeds fudge factor\n", \
976 magcnt); \
977 goto fail; \
978 } \
979 } \
980 }
981
982 int
kmem_read_magazines(kmem_cache_t * cp,uintptr_t addr,int ncpus,void *** maglistp,size_t * magcntp,size_t * magmaxp,int alloc_flags)983 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
984 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
985 {
986 kmem_magazine_t *kmp, *mp;
987 void **maglist = NULL;
988 int i, cpu;
989 size_t magsize, magmax, magbsize;
990 size_t magcnt = 0;
991
992 /*
993 * Read the magtype out of the cache, after verifying the pointer's
994 * correctness.
995 */
996 magsize = kmem_get_magsize(cp);
997 if (magsize == 0) {
998 *maglistp = NULL;
999 *magcntp = 0;
1000 *magmaxp = 0;
1001 return (WALK_NEXT);
1002 }
1003
1004 /*
1005 * There are several places where we need to go buffer hunting:
1006 * the per-CPU loaded magazine, the per-CPU spare full magazine,
1007 * and the full magazine list in the depot.
1008 *
1009 * For an upper bound on the number of buffers in the magazine
1010 * layer, we have the number of magazines on the cache_full
1011 * list plus at most two magazines per CPU (the loaded and the
1012 * spare). Toss in 100 magazines as a fudge factor in case this
1013 * is live (the number "100" comes from the same fudge factor in
1014 * crash(8)).
1015 */
1016 magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
1017 magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
1018
1019 if (magbsize >= PAGESIZE / 2) {
1020 mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1021 addr, magbsize);
1022 return (WALK_ERR);
1023 }
1024
1025 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
1026 mp = mdb_alloc(magbsize, alloc_flags);
1027 if (mp == NULL || maglist == NULL)
1028 goto fail;
1029
1030 /*
1031 * First up: the magazines in the depot (i.e. on the cache_full list).
1032 */
1033 for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
1034 READMAG_ROUNDS(magsize);
1035 kmp = mp->mag_next;
1036
1037 if (kmp == cp->cache_full.ml_list)
1038 break; /* cache_full list loop detected */
1039 }
1040
1041 dprintf(("cache_full list done\n"));
1042
1043 /*
1044 * Now whip through the CPUs, snagging the loaded magazines
1045 * and full spares.
1046 *
1047 * In order to prevent inconsistent dumps, rounds and prounds
1048 * are copied aside before dumping begins.
1049 */
1050 for (cpu = 0; cpu < ncpus; cpu++) {
1051 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
1052 short rounds, prounds;
1053
1054 if (KMEM_DUMPCC(ccp)) {
1055 rounds = ccp->cc_dump_rounds;
1056 prounds = ccp->cc_dump_prounds;
1057 } else {
1058 rounds = ccp->cc_rounds;
1059 prounds = ccp->cc_prounds;
1060 }
1061
1062 dprintf(("reading cpu cache %p\n",
1063 (uintptr_t)ccp - (uintptr_t)cp + addr));
1064
1065 if (rounds > 0 &&
1066 (kmp = ccp->cc_loaded) != NULL) {
1067 dprintf(("reading %d loaded rounds\n", rounds));
1068 READMAG_ROUNDS(rounds);
1069 }
1070
1071 if (prounds > 0 &&
1072 (kmp = ccp->cc_ploaded) != NULL) {
1073 dprintf(("reading %d previously loaded rounds\n",
1074 prounds));
1075 READMAG_ROUNDS(prounds);
1076 }
1077 }
1078
1079 dprintf(("magazine layer: %d buffers\n", magcnt));
1080
1081 if (!(alloc_flags & UM_GC))
1082 mdb_free(mp, magbsize);
1083
1084 *maglistp = maglist;
1085 *magcntp = magcnt;
1086 *magmaxp = magmax;
1087
1088 return (WALK_NEXT);
1089
1090 fail:
1091 if (!(alloc_flags & UM_GC)) {
1092 if (mp)
1093 mdb_free(mp, magbsize);
1094 if (maglist)
1095 mdb_free(maglist, magmax * sizeof (void *));
1096 }
1097 return (WALK_ERR);
1098 }
1099
1100 static int
kmem_walk_callback(mdb_walk_state_t * wsp,uintptr_t buf)1101 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1102 {
1103 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1104 }
1105
1106 static int
bufctl_walk_callback(kmem_cache_t * cp,mdb_walk_state_t * wsp,uintptr_t buf)1107 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1108 {
1109 kmem_bufctl_audit_t b;
1110
1111 /*
1112 * if KMF_AUDIT is not set, we know that we're looking at a
1113 * kmem_bufctl_t.
1114 */
1115 if (!(cp->cache_flags & KMF_AUDIT) ||
1116 mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
1117 (void) memset(&b, 0, sizeof (b));
1118 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
1119 mdb_warn("unable to read bufctl at %p", buf);
1120 return (WALK_ERR);
1121 }
1122 }
1123
1124 return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
1125 }
1126
1127 typedef struct kmem_walk {
1128 int kmw_type;
1129
1130 uintptr_t kmw_addr; /* cache address */
1131 kmem_cache_t *kmw_cp;
1132 size_t kmw_csize;
1133
1134 /*
1135 * magazine layer
1136 */
1137 void **kmw_maglist;
1138 size_t kmw_max;
1139 size_t kmw_count;
1140 size_t kmw_pos;
1141
1142 /*
1143 * slab layer
1144 */
1145 char *kmw_valid; /* to keep track of freed buffers */
1146 char *kmw_ubase; /* buffer for slab data */
1147 } kmem_walk_t;
1148
1149 static int
kmem_walk_init_common(mdb_walk_state_t * wsp,int type)1150 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
1151 {
1152 kmem_walk_t *kmw;
1153 int ncpus, csize;
1154 kmem_cache_t *cp;
1155 size_t vm_quantum;
1156
1157 size_t magmax, magcnt;
1158 void **maglist = NULL;
1159 uint_t chunksize = 1, slabsize = 1;
1160 int status = WALK_ERR;
1161 uintptr_t addr = wsp->walk_addr;
1162 const char *layered;
1163
1164 type &= ~KM_HASH;
1165
1166 if (addr == 0) {
1167 mdb_warn("kmem walk doesn't support global walks\n");
1168 return (WALK_ERR);
1169 }
1170
1171 dprintf(("walking %p\n", addr));
1172
1173 /*
1174 * First we need to figure out how many CPUs are configured in the
1175 * system to know how much to slurp out.
1176 */
1177 mdb_readvar(&ncpus, "max_ncpus");
1178
1179 csize = KMEM_CACHE_SIZE(ncpus);
1180 cp = mdb_alloc(csize, UM_SLEEP);
1181
1182 if (mdb_vread(cp, csize, addr) == -1) {
1183 mdb_warn("couldn't read cache at addr %p", addr);
1184 goto out2;
1185 }
1186
1187 /*
1188 * It's easy for someone to hand us an invalid cache address.
1189 * Unfortunately, it is hard for this walker to survive an
1190 * invalid cache cleanly. So we make sure that:
1191 *
1192 * 1. the vmem arena for the cache is readable,
1193 * 2. the vmem arena's quantum is a power of 2,
1194 * 3. our slabsize is a multiple of the quantum, and
1195 * 4. our chunksize is >0 and less than our slabsize.
1196 */
1197 if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1198 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1199 vm_quantum == 0 ||
1200 (vm_quantum & (vm_quantum - 1)) != 0 ||
1201 cp->cache_slabsize < vm_quantum ||
1202 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1203 cp->cache_chunksize == 0 ||
1204 cp->cache_chunksize > cp->cache_slabsize) {
1205 mdb_warn("%p is not a valid kmem_cache_t\n", addr);
1206 goto out2;
1207 }
1208
1209 dprintf(("buf total is %d\n", cp->cache_buftotal));
1210
1211 if (cp->cache_buftotal == 0) {
1212 mdb_free(cp, csize);
1213 return (WALK_DONE);
1214 }
1215
1216 /*
1217 * If they ask for bufctls, but it's a small-slab cache,
1218 * there is nothing to report.
1219 */
1220 if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
1221 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1222 cp->cache_flags));
1223 mdb_free(cp, csize);
1224 return (WALK_DONE);
1225 }
1226
1227 /*
1228 * If they want constructed buffers, but there's no constructor or
1229 * the cache has DEADBEEF checking enabled, there is nothing to report.
1230 */
1231 if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
1232 cp->cache_constructor == NULL ||
1233 (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
1234 mdb_free(cp, csize);
1235 return (WALK_DONE);
1236 }
1237
1238 /*
1239 * Read in the contents of the magazine layer
1240 */
1241 if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
1242 &magmax, UM_SLEEP) == WALK_ERR)
1243 goto out2;
1244
1245 /*
1246 * We have all of the buffers from the magazines; if we are walking
1247 * allocated buffers, sort them so we can bsearch them later.
1248 */
1249 if (type & KM_ALLOCATED)
1250 qsort(maglist, magcnt, sizeof (void *), addrcmp);
1251
1252 wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
1253
1254 kmw->kmw_type = type;
1255 kmw->kmw_addr = addr;
1256 kmw->kmw_cp = cp;
1257 kmw->kmw_csize = csize;
1258 kmw->kmw_maglist = maglist;
1259 kmw->kmw_max = magmax;
1260 kmw->kmw_count = magcnt;
1261 kmw->kmw_pos = 0;
1262
1263 /*
1264 * When walking allocated buffers in a KMF_HASH cache, we walk the
1265 * hash table instead of the slab layer.
1266 */
1267 if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
1268 layered = "kmem_hash";
1269
1270 kmw->kmw_type |= KM_HASH;
1271 } else {
1272 /*
1273 * If we are walking freed buffers, we only need the
1274 * magazine layer plus the partially allocated slabs.
1275 * To walk allocated buffers, we need all of the slabs.
1276 */
1277 if (type & KM_ALLOCATED)
1278 layered = "kmem_slab";
1279 else
1280 layered = "kmem_slab_partial";
1281
1282 /*
1283 * for small-slab caches, we read in the entire slab. For
1284 * freed buffers, we can just walk the freelist. For
1285 * allocated buffers, we use a 'valid' array to track
1286 * the freed buffers.
1287 */
1288 if (!(cp->cache_flags & KMF_HASH)) {
1289 chunksize = cp->cache_chunksize;
1290 slabsize = cp->cache_slabsize;
1291
1292 kmw->kmw_ubase = mdb_alloc(slabsize +
1293 sizeof (kmem_bufctl_t), UM_SLEEP);
1294
1295 if (type & KM_ALLOCATED)
1296 kmw->kmw_valid =
1297 mdb_alloc(slabsize / chunksize, UM_SLEEP);
1298 }
1299 }
1300
1301 status = WALK_NEXT;
1302
1303 if (mdb_layered_walk(layered, wsp) == -1) {
1304 mdb_warn("unable to start layered '%s' walk", layered);
1305 status = WALK_ERR;
1306 }
1307
1308 out1:
1309 if (status == WALK_ERR) {
1310 if (kmw->kmw_valid)
1311 mdb_free(kmw->kmw_valid, slabsize / chunksize);
1312
1313 if (kmw->kmw_ubase)
1314 mdb_free(kmw->kmw_ubase, slabsize +
1315 sizeof (kmem_bufctl_t));
1316
1317 if (kmw->kmw_maglist)
1318 mdb_free(kmw->kmw_maglist,
1319 kmw->kmw_max * sizeof (uintptr_t));
1320
1321 mdb_free(kmw, sizeof (kmem_walk_t));
1322 wsp->walk_data = NULL;
1323 }
1324
1325 out2:
1326 if (status == WALK_ERR)
1327 mdb_free(cp, csize);
1328
1329 return (status);
1330 }
1331
1332 int
kmem_walk_step(mdb_walk_state_t * wsp)1333 kmem_walk_step(mdb_walk_state_t *wsp)
1334 {
1335 kmem_walk_t *kmw = wsp->walk_data;
1336 int type = kmw->kmw_type;
1337 kmem_cache_t *cp = kmw->kmw_cp;
1338
1339 void **maglist = kmw->kmw_maglist;
1340 int magcnt = kmw->kmw_count;
1341
1342 uintptr_t chunksize, slabsize;
1343 uintptr_t addr;
1344 const kmem_slab_t *sp;
1345 const kmem_bufctl_t *bcp;
1346 kmem_bufctl_t bc;
1347
1348 int chunks;
1349 char *kbase;
1350 void *buf;
1351 int i, ret;
1352
1353 char *valid, *ubase;
1354
1355 /*
1356 * first, handle the 'kmem_hash' layered walk case
1357 */
1358 if (type & KM_HASH) {
1359 /*
1360 * We have a buffer which has been allocated out of the
1361 * global layer. We need to make sure that it's not
1362 * actually sitting in a magazine before we report it as
1363 * an allocated buffer.
1364 */
1365 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
1366
1367 if (magcnt > 0 &&
1368 bsearch(&buf, maglist, magcnt, sizeof (void *),
1369 addrcmp) != NULL)
1370 return (WALK_NEXT);
1371
1372 if (type & KM_BUFCTL)
1373 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1374
1375 return (kmem_walk_callback(wsp, (uintptr_t)buf));
1376 }
1377
1378 ret = WALK_NEXT;
1379
1380 addr = kmw->kmw_addr;
1381
1382 /*
1383 * If we're walking freed buffers, report everything in the
1384 * magazine layer before processing the first slab.
1385 */
1386 if ((type & KM_FREE) && magcnt != 0) {
1387 kmw->kmw_count = 0; /* only do this once */
1388 for (i = 0; i < magcnt; i++) {
1389 buf = maglist[i];
1390
1391 if (type & KM_BUFCTL) {
1392 uintptr_t out;
1393
1394 if (cp->cache_flags & KMF_BUFTAG) {
1395 kmem_buftag_t *btp;
1396 kmem_buftag_t tag;
1397
1398 /* LINTED - alignment */
1399 btp = KMEM_BUFTAG(cp, buf);
1400 if (mdb_vread(&tag, sizeof (tag),
1401 (uintptr_t)btp) == -1) {
1402 mdb_warn("reading buftag for "
1403 "%p at %p", buf, btp);
1404 continue;
1405 }
1406 out = (uintptr_t)tag.bt_bufctl;
1407 } else {
1408 if (kmem_hash_lookup(cp, addr, buf,
1409 &out) == -1)
1410 continue;
1411 }
1412 ret = bufctl_walk_callback(cp, wsp, out);
1413 } else {
1414 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1415 }
1416
1417 if (ret != WALK_NEXT)
1418 return (ret);
1419 }
1420 }
1421
1422 /*
1423 * If they want constructed buffers, we're finished, since the
1424 * magazine layer holds them all.
1425 */
1426 if (type & KM_CONSTRUCTED)
1427 return (WALK_DONE);
1428
1429 /*
1430 * Handle the buffers in the current slab
1431 */
1432 chunksize = cp->cache_chunksize;
1433 slabsize = cp->cache_slabsize;
1434
1435 sp = wsp->walk_layer;
1436 chunks = sp->slab_chunks;
1437 kbase = sp->slab_base;
1438
1439 dprintf(("kbase is %p\n", kbase));
1440
1441 if (!(cp->cache_flags & KMF_HASH)) {
1442 valid = kmw->kmw_valid;
1443 ubase = kmw->kmw_ubase;
1444
1445 if (mdb_vread(ubase, chunks * chunksize,
1446 (uintptr_t)kbase) == -1) {
1447 mdb_warn("failed to read slab contents at %p", kbase);
1448 return (WALK_ERR);
1449 }
1450
1451 /*
1452 * Set up the valid map as fully allocated -- we'll punch
1453 * out the freelist.
1454 */
1455 if (type & KM_ALLOCATED)
1456 (void) memset(valid, 1, chunks);
1457 } else {
1458 valid = NULL;
1459 ubase = NULL;
1460 }
1461
1462 /*
1463 * walk the slab's freelist
1464 */
1465 bcp = sp->slab_head;
1466
1467 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1468
1469 /*
1470 * since we could be in the middle of allocating a buffer,
1471 * our refcnt could be one higher than it aught. So we
1472 * check one further on the freelist than the count allows.
1473 */
1474 for (i = sp->slab_refcnt; i <= chunks; i++) {
1475 uint_t ndx;
1476
1477 dprintf(("bcp is %p\n", bcp));
1478
1479 if (bcp == NULL) {
1480 if (i == chunks)
1481 break;
1482 mdb_warn(
1483 "slab %p in cache %p freelist too short by %d\n",
1484 sp, addr, chunks - i);
1485 break;
1486 }
1487
1488 if (cp->cache_flags & KMF_HASH) {
1489 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1490 mdb_warn("failed to read bufctl ptr at %p",
1491 bcp);
1492 break;
1493 }
1494 buf = bc.bc_addr;
1495 } else {
1496 /*
1497 * Otherwise the buffer is (or should be) in the slab
1498 * that we've read in; determine its offset in the
1499 * slab, validate that it's not corrupt, and add to
1500 * our base address to find the umem_bufctl_t. (Note
1501 * that we don't need to add the size of the bufctl
1502 * to our offset calculation because of the slop that's
1503 * allocated for the buffer at ubase.)
1504 */
1505 uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1506
1507 if (offs > chunks * chunksize) {
1508 mdb_warn("found corrupt bufctl ptr %p"
1509 " in slab %p in cache %p\n", bcp,
1510 wsp->walk_addr, addr);
1511 break;
1512 }
1513
1514 bc = *((kmem_bufctl_t *)((uintptr_t)ubase + offs));
1515 buf = KMEM_BUF(cp, bcp);
1516 }
1517
1518 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1519
1520 if (ndx > slabsize / cp->cache_bufsize) {
1521 /*
1522 * This is very wrong; we have managed to find
1523 * a buffer in the slab which shouldn't
1524 * actually be here. Emit a warning, and
1525 * try to continue.
1526 */
1527 mdb_warn("buf %p is out of range for "
1528 "slab %p, cache %p\n", buf, sp, addr);
1529 } else if (type & KM_ALLOCATED) {
1530 /*
1531 * we have found a buffer on the slab's freelist;
1532 * clear its entry
1533 */
1534 valid[ndx] = 0;
1535 } else {
1536 /*
1537 * Report this freed buffer
1538 */
1539 if (type & KM_BUFCTL) {
1540 ret = bufctl_walk_callback(cp, wsp,
1541 (uintptr_t)bcp);
1542 } else {
1543 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1544 }
1545 if (ret != WALK_NEXT)
1546 return (ret);
1547 }
1548
1549 bcp = bc.bc_next;
1550 }
1551
1552 if (bcp != NULL) {
1553 dprintf(("slab %p in cache %p freelist too long (%p)\n",
1554 sp, addr, bcp));
1555 }
1556
1557 /*
1558 * If we are walking freed buffers, the loop above handled reporting
1559 * them.
1560 */
1561 if (type & KM_FREE)
1562 return (WALK_NEXT);
1563
1564 if (type & KM_BUFCTL) {
1565 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1566 "cache %p\n", addr);
1567 return (WALK_ERR);
1568 }
1569
1570 /*
1571 * Report allocated buffers, skipping buffers in the magazine layer.
1572 * We only get this far for small-slab caches.
1573 */
1574 for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1575 buf = (char *)kbase + i * chunksize;
1576
1577 if (!valid[i])
1578 continue; /* on slab freelist */
1579
1580 if (magcnt > 0 &&
1581 bsearch(&buf, maglist, magcnt, sizeof (void *),
1582 addrcmp) != NULL)
1583 continue; /* in magazine layer */
1584
1585 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1586 }
1587 return (ret);
1588 }
1589
1590 void
kmem_walk_fini(mdb_walk_state_t * wsp)1591 kmem_walk_fini(mdb_walk_state_t *wsp)
1592 {
1593 kmem_walk_t *kmw = wsp->walk_data;
1594 uintptr_t chunksize;
1595 uintptr_t slabsize;
1596
1597 if (kmw == NULL)
1598 return;
1599
1600 if (kmw->kmw_maglist != NULL)
1601 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1602
1603 chunksize = kmw->kmw_cp->cache_chunksize;
1604 slabsize = kmw->kmw_cp->cache_slabsize;
1605
1606 if (kmw->kmw_valid != NULL)
1607 mdb_free(kmw->kmw_valid, slabsize / chunksize);
1608 if (kmw->kmw_ubase != NULL)
1609 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1610
1611 mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1612 mdb_free(kmw, sizeof (kmem_walk_t));
1613 }
1614
1615 /*ARGSUSED*/
1616 static int
kmem_walk_all(uintptr_t addr,const kmem_cache_t * c,mdb_walk_state_t * wsp)1617 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1618 {
1619 /*
1620 * Buffers allocated from NOTOUCH caches can also show up as freed
1621 * memory in other caches. This can be a little confusing, so we
1622 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1623 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1624 */
1625 if (c->cache_cflags & KMC_NOTOUCH)
1626 return (WALK_NEXT);
1627
1628 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1629 wsp->walk_cbdata, addr) == -1)
1630 return (WALK_DONE);
1631
1632 return (WALK_NEXT);
1633 }
1634
1635 #define KMEM_WALK_ALL(name, wsp) { \
1636 wsp->walk_data = (name); \
1637 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1638 return (WALK_ERR); \
1639 return (WALK_DONE); \
1640 }
1641
1642 int
kmem_walk_init(mdb_walk_state_t * wsp)1643 kmem_walk_init(mdb_walk_state_t *wsp)
1644 {
1645 if (wsp->walk_arg != NULL)
1646 wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1647
1648 if (wsp->walk_addr == 0)
1649 KMEM_WALK_ALL("kmem", wsp);
1650 return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1651 }
1652
1653 int
bufctl_walk_init(mdb_walk_state_t * wsp)1654 bufctl_walk_init(mdb_walk_state_t *wsp)
1655 {
1656 if (wsp->walk_addr == 0)
1657 KMEM_WALK_ALL("bufctl", wsp);
1658 return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1659 }
1660
1661 int
freemem_walk_init(mdb_walk_state_t * wsp)1662 freemem_walk_init(mdb_walk_state_t *wsp)
1663 {
1664 if (wsp->walk_addr == 0)
1665 KMEM_WALK_ALL("freemem", wsp);
1666 return (kmem_walk_init_common(wsp, KM_FREE));
1667 }
1668
1669 int
freemem_constructed_walk_init(mdb_walk_state_t * wsp)1670 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1671 {
1672 if (wsp->walk_addr == 0)
1673 KMEM_WALK_ALL("freemem_constructed", wsp);
1674 return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1675 }
1676
1677 int
freectl_walk_init(mdb_walk_state_t * wsp)1678 freectl_walk_init(mdb_walk_state_t *wsp)
1679 {
1680 if (wsp->walk_addr == 0)
1681 KMEM_WALK_ALL("freectl", wsp);
1682 return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1683 }
1684
1685 int
freectl_constructed_walk_init(mdb_walk_state_t * wsp)1686 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1687 {
1688 if (wsp->walk_addr == 0)
1689 KMEM_WALK_ALL("freectl_constructed", wsp);
1690 return (kmem_walk_init_common(wsp,
1691 KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1692 }
1693
1694 typedef struct bufctl_history_walk {
1695 void *bhw_next;
1696 kmem_cache_t *bhw_cache;
1697 kmem_slab_t *bhw_slab;
1698 hrtime_t bhw_timestamp;
1699 } bufctl_history_walk_t;
1700
1701 int
bufctl_history_walk_init(mdb_walk_state_t * wsp)1702 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1703 {
1704 bufctl_history_walk_t *bhw;
1705 kmem_bufctl_audit_t bc;
1706 kmem_bufctl_audit_t bcn;
1707
1708 if (wsp->walk_addr == 0) {
1709 mdb_warn("bufctl_history walk doesn't support global walks\n");
1710 return (WALK_ERR);
1711 }
1712
1713 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1714 mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1715 return (WALK_ERR);
1716 }
1717
1718 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1719 bhw->bhw_timestamp = 0;
1720 bhw->bhw_cache = bc.bc_cache;
1721 bhw->bhw_slab = bc.bc_slab;
1722
1723 /*
1724 * sometimes the first log entry matches the base bufctl; in that
1725 * case, skip the base bufctl.
1726 */
1727 if (bc.bc_lastlog != NULL &&
1728 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1729 bc.bc_addr == bcn.bc_addr &&
1730 bc.bc_cache == bcn.bc_cache &&
1731 bc.bc_slab == bcn.bc_slab &&
1732 bc.bc_timestamp == bcn.bc_timestamp &&
1733 bc.bc_thread == bcn.bc_thread)
1734 bhw->bhw_next = bc.bc_lastlog;
1735 else
1736 bhw->bhw_next = (void *)wsp->walk_addr;
1737
1738 wsp->walk_addr = (uintptr_t)bc.bc_addr;
1739 wsp->walk_data = bhw;
1740
1741 return (WALK_NEXT);
1742 }
1743
1744 int
bufctl_history_walk_step(mdb_walk_state_t * wsp)1745 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1746 {
1747 bufctl_history_walk_t *bhw = wsp->walk_data;
1748 uintptr_t addr = (uintptr_t)bhw->bhw_next;
1749 uintptr_t baseaddr = wsp->walk_addr;
1750 kmem_bufctl_audit_t bc;
1751
1752 if (addr == 0)
1753 return (WALK_DONE);
1754
1755 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1756 mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1757 return (WALK_ERR);
1758 }
1759
1760 /*
1761 * The bufctl is only valid if the address, cache, and slab are
1762 * correct. We also check that the timestamp is decreasing, to
1763 * prevent infinite loops.
1764 */
1765 if ((uintptr_t)bc.bc_addr != baseaddr ||
1766 bc.bc_cache != bhw->bhw_cache ||
1767 bc.bc_slab != bhw->bhw_slab ||
1768 (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1769 return (WALK_DONE);
1770
1771 bhw->bhw_next = bc.bc_lastlog;
1772 bhw->bhw_timestamp = bc.bc_timestamp;
1773
1774 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1775 }
1776
1777 void
bufctl_history_walk_fini(mdb_walk_state_t * wsp)1778 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1779 {
1780 bufctl_history_walk_t *bhw = wsp->walk_data;
1781
1782 mdb_free(bhw, sizeof (*bhw));
1783 }
1784
1785 typedef struct kmem_log_walk {
1786 kmem_bufctl_audit_t *klw_base;
1787 kmem_bufctl_audit_t **klw_sorted;
1788 kmem_log_header_t klw_lh;
1789 size_t klw_size;
1790 size_t klw_maxndx;
1791 size_t klw_ndx;
1792 } kmem_log_walk_t;
1793
1794 int
kmem_log_walk_init(mdb_walk_state_t * wsp)1795 kmem_log_walk_init(mdb_walk_state_t *wsp)
1796 {
1797 uintptr_t lp = wsp->walk_addr;
1798 kmem_log_walk_t *klw;
1799 kmem_log_header_t *lhp;
1800 int maxndx, i, j, k;
1801
1802 /*
1803 * By default (global walk), walk the kmem_transaction_log. Otherwise
1804 * read the log whose kmem_log_header_t is stored at walk_addr.
1805 */
1806 if (lp == 0 && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1807 mdb_warn("failed to read 'kmem_transaction_log'");
1808 return (WALK_ERR);
1809 }
1810
1811 if (lp == 0) {
1812 mdb_warn("log is disabled\n");
1813 return (WALK_ERR);
1814 }
1815
1816 klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1817 lhp = &klw->klw_lh;
1818
1819 if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1820 mdb_warn("failed to read log header at %p", lp);
1821 mdb_free(klw, sizeof (kmem_log_walk_t));
1822 return (WALK_ERR);
1823 }
1824
1825 klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1826 klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1827 maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1828
1829 if (mdb_vread(klw->klw_base, klw->klw_size,
1830 (uintptr_t)lhp->lh_base) == -1) {
1831 mdb_warn("failed to read log at base %p", lhp->lh_base);
1832 mdb_free(klw->klw_base, klw->klw_size);
1833 mdb_free(klw, sizeof (kmem_log_walk_t));
1834 return (WALK_ERR);
1835 }
1836
1837 klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1838 sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1839
1840 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1841 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1842 ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1843
1844 for (j = 0; j < maxndx; j++)
1845 klw->klw_sorted[k++] = &chunk[j];
1846 }
1847
1848 qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1849 (int(*)(const void *, const void *))bufctlcmp);
1850
1851 klw->klw_maxndx = k;
1852 wsp->walk_data = klw;
1853
1854 return (WALK_NEXT);
1855 }
1856
1857 int
kmem_log_walk_step(mdb_walk_state_t * wsp)1858 kmem_log_walk_step(mdb_walk_state_t *wsp)
1859 {
1860 kmem_log_walk_t *klw = wsp->walk_data;
1861 kmem_bufctl_audit_t *bcp;
1862
1863 if (klw->klw_ndx == klw->klw_maxndx)
1864 return (WALK_DONE);
1865
1866 bcp = klw->klw_sorted[klw->klw_ndx++];
1867
1868 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1869 (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1870 }
1871
1872 void
kmem_log_walk_fini(mdb_walk_state_t * wsp)1873 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1874 {
1875 kmem_log_walk_t *klw = wsp->walk_data;
1876
1877 mdb_free(klw->klw_base, klw->klw_size);
1878 mdb_free(klw->klw_sorted, klw->klw_maxndx *
1879 sizeof (kmem_bufctl_audit_t *));
1880 mdb_free(klw, sizeof (kmem_log_walk_t));
1881 }
1882
1883 typedef struct allocdby_bufctl {
1884 uintptr_t abb_addr;
1885 hrtime_t abb_ts;
1886 } allocdby_bufctl_t;
1887
1888 typedef struct allocdby_walk {
1889 const char *abw_walk;
1890 uintptr_t abw_thread;
1891 size_t abw_nbufs;
1892 size_t abw_size;
1893 allocdby_bufctl_t *abw_buf;
1894 size_t abw_ndx;
1895 } allocdby_walk_t;
1896
1897 int
allocdby_walk_bufctl(uintptr_t addr,const kmem_bufctl_audit_t * bcp,allocdby_walk_t * abw)1898 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1899 allocdby_walk_t *abw)
1900 {
1901 if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1902 return (WALK_NEXT);
1903
1904 if (abw->abw_nbufs == abw->abw_size) {
1905 allocdby_bufctl_t *buf;
1906 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1907
1908 buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1909
1910 bcopy(abw->abw_buf, buf, oldsize);
1911 mdb_free(abw->abw_buf, oldsize);
1912
1913 abw->abw_size <<= 1;
1914 abw->abw_buf = buf;
1915 }
1916
1917 abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1918 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1919 abw->abw_nbufs++;
1920
1921 return (WALK_NEXT);
1922 }
1923
1924 /*ARGSUSED*/
1925 int
allocdby_walk_cache(uintptr_t addr,const kmem_cache_t * c,allocdby_walk_t * abw)1926 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1927 {
1928 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1929 abw, addr) == -1) {
1930 mdb_warn("couldn't walk bufctl for cache %p", addr);
1931 return (WALK_DONE);
1932 }
1933
1934 return (WALK_NEXT);
1935 }
1936
1937 static int
allocdby_cmp(const allocdby_bufctl_t * lhs,const allocdby_bufctl_t * rhs)1938 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1939 {
1940 if (lhs->abb_ts < rhs->abb_ts)
1941 return (1);
1942 if (lhs->abb_ts > rhs->abb_ts)
1943 return (-1);
1944 return (0);
1945 }
1946
1947 static int
allocdby_walk_init_common(mdb_walk_state_t * wsp,const char * walk)1948 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1949 {
1950 allocdby_walk_t *abw;
1951
1952 if (wsp->walk_addr == 0) {
1953 mdb_warn("allocdby walk doesn't support global walks\n");
1954 return (WALK_ERR);
1955 }
1956
1957 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1958
1959 abw->abw_thread = wsp->walk_addr;
1960 abw->abw_walk = walk;
1961 abw->abw_size = 128; /* something reasonable */
1962 abw->abw_buf =
1963 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1964
1965 wsp->walk_data = abw;
1966
1967 if (mdb_walk("kmem_cache",
1968 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1969 mdb_warn("couldn't walk kmem_cache");
1970 allocdby_walk_fini(wsp);
1971 return (WALK_ERR);
1972 }
1973
1974 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1975 (int(*)(const void *, const void *))allocdby_cmp);
1976
1977 return (WALK_NEXT);
1978 }
1979
1980 int
allocdby_walk_init(mdb_walk_state_t * wsp)1981 allocdby_walk_init(mdb_walk_state_t *wsp)
1982 {
1983 return (allocdby_walk_init_common(wsp, "bufctl"));
1984 }
1985
1986 int
freedby_walk_init(mdb_walk_state_t * wsp)1987 freedby_walk_init(mdb_walk_state_t *wsp)
1988 {
1989 return (allocdby_walk_init_common(wsp, "freectl"));
1990 }
1991
1992 int
allocdby_walk_step(mdb_walk_state_t * wsp)1993 allocdby_walk_step(mdb_walk_state_t *wsp)
1994 {
1995 allocdby_walk_t *abw = wsp->walk_data;
1996 kmem_bufctl_audit_t bc;
1997 uintptr_t addr;
1998
1999 if (abw->abw_ndx == abw->abw_nbufs)
2000 return (WALK_DONE);
2001
2002 addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
2003
2004 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2005 mdb_warn("couldn't read bufctl at %p", addr);
2006 return (WALK_DONE);
2007 }
2008
2009 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
2010 }
2011
2012 void
allocdby_walk_fini(mdb_walk_state_t * wsp)2013 allocdby_walk_fini(mdb_walk_state_t *wsp)
2014 {
2015 allocdby_walk_t *abw = wsp->walk_data;
2016
2017 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
2018 mdb_free(abw, sizeof (allocdby_walk_t));
2019 }
2020
2021 /*ARGSUSED*/
2022 int
allocdby_walk(uintptr_t addr,const kmem_bufctl_audit_t * bcp,void * ignored)2023 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
2024 {
2025 char c[MDB_SYM_NAMLEN];
2026 GElf_Sym sym;
2027 int i;
2028
2029 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
2030 for (i = 0; i < bcp->bc_depth; i++) {
2031 if (mdb_lookup_by_addr(bcp->bc_stack[i],
2032 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2033 continue;
2034 if (strncmp(c, "kmem_", 5) == 0)
2035 continue;
2036 mdb_printf("%s+0x%lx",
2037 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
2038 break;
2039 }
2040 mdb_printf("\n");
2041
2042 return (WALK_NEXT);
2043 }
2044
2045 static int
allocdby_common(uintptr_t addr,uint_t flags,const char * w)2046 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
2047 {
2048 if (!(flags & DCMD_ADDRSPEC))
2049 return (DCMD_USAGE);
2050
2051 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2052
2053 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
2054 mdb_warn("can't walk '%s' for %p", w, addr);
2055 return (DCMD_ERR);
2056 }
2057
2058 return (DCMD_OK);
2059 }
2060
2061 /*ARGSUSED*/
2062 int
allocdby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2063 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2064 {
2065 return (allocdby_common(addr, flags, "allocdby"));
2066 }
2067
2068 /*ARGSUSED*/
2069 int
freedby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2070 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2071 {
2072 return (allocdby_common(addr, flags, "freedby"));
2073 }
2074
2075 /*
2076 * Return a string describing the address in relation to the given thread's
2077 * stack.
2078 *
2079 * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2080 *
2081 * - If the address is above the stack pointer, return an empty string
2082 * signifying that the address is active.
2083 *
2084 * - If the address is below the stack pointer, and the thread is not on proc,
2085 * return " (below sp)".
2086 *
2087 * - If the address is below the stack pointer, and the thread is on proc,
2088 * return " (possibly below sp)". Depending on context, we may or may not
2089 * have an accurate t_sp.
2090 */
2091 static const char *
stack_active(const kthread_t * t,uintptr_t addr)2092 stack_active(const kthread_t *t, uintptr_t addr)
2093 {
2094 uintptr_t panicstk;
2095 GElf_Sym sym;
2096
2097 if (t->t_state == TS_FREE)
2098 return (" (inactive interrupt thread)");
2099
2100 /*
2101 * Check to see if we're on the panic stack. If so, ignore t_sp, as it
2102 * no longer relates to the thread's real stack.
2103 */
2104 if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
2105 panicstk = (uintptr_t)sym.st_value;
2106
2107 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
2108 return ("");
2109 }
2110
2111 if (addr >= t->t_sp + STACK_BIAS)
2112 return ("");
2113
2114 if (t->t_state == TS_ONPROC)
2115 return (" (possibly below sp)");
2116
2117 return (" (below sp)");
2118 }
2119
2120 /*
2121 * Additional state for the kmem and vmem ::whatis handlers
2122 */
2123 typedef struct whatis_info {
2124 mdb_whatis_t *wi_w;
2125 const kmem_cache_t *wi_cache;
2126 const vmem_t *wi_vmem;
2127 vmem_t *wi_msb_arena;
2128 size_t wi_slab_size;
2129 uint_t wi_slab_found;
2130 uint_t wi_kmem_lite_count;
2131 uint_t wi_freemem;
2132 } whatis_info_t;
2133
2134 /* call one of our dcmd functions with "-v" and the provided address */
2135 static void
whatis_call_printer(mdb_dcmd_f * dcmd,uintptr_t addr)2136 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2137 {
2138 mdb_arg_t a;
2139 a.a_type = MDB_TYPE_STRING;
2140 a.a_un.a_str = "-v";
2141
2142 mdb_printf(":\n");
2143 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2144 }
2145
2146 static void
whatis_print_kmf_lite(uintptr_t btaddr,size_t count)2147 whatis_print_kmf_lite(uintptr_t btaddr, size_t count)
2148 {
2149 #define KMEM_LITE_MAX 16
2150 pc_t callers[KMEM_LITE_MAX];
2151 pc_t uninit = (pc_t)KMEM_UNINITIALIZED_PATTERN;
2152
2153 kmem_buftag_t bt;
2154 intptr_t stat;
2155 const char *plural = "";
2156 int i;
2157
2158 /* validate our arguments and read in the buftag */
2159 if (count == 0 || count > KMEM_LITE_MAX ||
2160 mdb_vread(&bt, sizeof (bt), btaddr) == -1)
2161 return;
2162
2163 /* validate the buffer state and read in the callers */
2164 stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
2165
2166 if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE)
2167 return;
2168
2169 if (mdb_vread(callers, count * sizeof (pc_t),
2170 btaddr + offsetof(kmem_buftag_lite_t, bt_history)) == -1)
2171 return;
2172
2173 /* If there aren't any filled in callers, bail */
2174 if (callers[0] == uninit)
2175 return;
2176
2177 plural = (callers[1] == uninit) ? "" : "s";
2178
2179 /* Everything's done and checked; print them out */
2180 mdb_printf(":\n");
2181
2182 mdb_inc_indent(8);
2183 mdb_printf("recent caller%s: %a", plural, callers[0]);
2184 for (i = 1; i < count; i++) {
2185 if (callers[i] == uninit)
2186 break;
2187 mdb_printf(", %a", callers[i]);
2188 }
2189 mdb_dec_indent(8);
2190 }
2191
2192 static void
whatis_print_kmem(whatis_info_t * wi,uintptr_t maddr,uintptr_t addr,uintptr_t baddr)2193 whatis_print_kmem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2194 uintptr_t baddr)
2195 {
2196 mdb_whatis_t *w = wi->wi_w;
2197
2198 const kmem_cache_t *cp = wi->wi_cache;
2199 /* LINTED pointer cast may result in improper alignment */
2200 uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr);
2201 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2202 int call_printer = (!quiet && (cp->cache_flags & KMF_AUDIT));
2203
2204 mdb_whatis_report_object(w, maddr, addr, "");
2205
2206 if (baddr != 0 && !call_printer)
2207 mdb_printf("bufctl %p ", baddr);
2208
2209 mdb_printf("%s from %s",
2210 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2211
2212 if (baddr != 0 && call_printer) {
2213 whatis_call_printer(bufctl, baddr);
2214 return;
2215 }
2216
2217 /* for KMF_LITE caches, try to print out the previous callers */
2218 if (!quiet && (cp->cache_flags & KMF_LITE))
2219 whatis_print_kmf_lite(btaddr, wi->wi_kmem_lite_count);
2220
2221 mdb_printf("\n");
2222 }
2223
2224 /*ARGSUSED*/
2225 static int
whatis_walk_kmem(uintptr_t addr,void * ignored,whatis_info_t * wi)2226 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2227 {
2228 mdb_whatis_t *w = wi->wi_w;
2229
2230 uintptr_t cur;
2231 size_t size = wi->wi_cache->cache_bufsize;
2232
2233 while (mdb_whatis_match(w, addr, size, &cur))
2234 whatis_print_kmem(wi, cur, addr, 0);
2235
2236 return (WHATIS_WALKRET(w));
2237 }
2238
2239 /*ARGSUSED*/
2240 static int
whatis_walk_bufctl(uintptr_t baddr,const kmem_bufctl_t * bcp,whatis_info_t * wi)2241 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_info_t *wi)
2242 {
2243 mdb_whatis_t *w = wi->wi_w;
2244
2245 uintptr_t cur;
2246 uintptr_t addr = (uintptr_t)bcp->bc_addr;
2247 size_t size = wi->wi_cache->cache_bufsize;
2248
2249 while (mdb_whatis_match(w, addr, size, &cur))
2250 whatis_print_kmem(wi, cur, addr, baddr);
2251
2252 return (WHATIS_WALKRET(w));
2253 }
2254
2255 static int
whatis_walk_seg(uintptr_t addr,const vmem_seg_t * vs,whatis_info_t * wi)2256 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2257 {
2258 mdb_whatis_t *w = wi->wi_w;
2259
2260 size_t size = vs->vs_end - vs->vs_start;
2261 uintptr_t cur;
2262
2263 /* We're not interested in anything but alloc and free segments */
2264 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2265 return (WALK_NEXT);
2266
2267 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2268 mdb_whatis_report_object(w, cur, vs->vs_start, "");
2269
2270 /*
2271 * If we're not printing it seperately, provide the vmem_seg
2272 * pointer if it has a stack trace.
2273 */
2274 if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2275 (!(mdb_whatis_flags(w) & WHATIS_BUFCTL) ||
2276 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2277 mdb_printf("vmem_seg %p ", addr);
2278 }
2279
2280 mdb_printf("%s from the %s vmem arena",
2281 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2282 wi->wi_vmem->vm_name);
2283
2284 if (!(mdb_whatis_flags(w) & WHATIS_QUIET))
2285 whatis_call_printer(vmem_seg, addr);
2286 else
2287 mdb_printf("\n");
2288 }
2289
2290 return (WHATIS_WALKRET(w));
2291 }
2292
2293 static int
whatis_walk_vmem(uintptr_t addr,const vmem_t * vmem,whatis_info_t * wi)2294 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2295 {
2296 mdb_whatis_t *w = wi->wi_w;
2297 const char *nm = vmem->vm_name;
2298
2299 int identifier = ((vmem->vm_cflags & VMC_IDENTIFIER) != 0);
2300 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2301
2302 if (identifier != idspace)
2303 return (WALK_NEXT);
2304
2305 wi->wi_vmem = vmem;
2306
2307 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2308 mdb_printf("Searching vmem arena %s...\n", nm);
2309
2310 if (mdb_pwalk("vmem_seg",
2311 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2312 mdb_warn("can't walk vmem_seg for %p", addr);
2313 return (WALK_NEXT);
2314 }
2315
2316 return (WHATIS_WALKRET(w));
2317 }
2318
2319 /*ARGSUSED*/
2320 static int
whatis_walk_slab(uintptr_t saddr,const kmem_slab_t * sp,whatis_info_t * wi)2321 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_info_t *wi)
2322 {
2323 mdb_whatis_t *w = wi->wi_w;
2324
2325 /* It must overlap with the slab data, or it's not interesting */
2326 if (mdb_whatis_overlaps(w,
2327 (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2328 wi->wi_slab_found++;
2329 return (WALK_DONE);
2330 }
2331 return (WALK_NEXT);
2332 }
2333
2334 static int
whatis_walk_cache(uintptr_t addr,const kmem_cache_t * c,whatis_info_t * wi)2335 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2336 {
2337 mdb_whatis_t *w = wi->wi_w;
2338
2339 char *walk, *freewalk;
2340 mdb_walk_cb_t func;
2341 int do_bufctl;
2342
2343 int identifier = ((c->cache_flags & KMC_IDENTIFIER) != 0);
2344 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2345
2346 if (identifier != idspace)
2347 return (WALK_NEXT);
2348
2349 /* Override the '-b' flag as necessary */
2350 if (!(c->cache_flags & KMF_HASH))
2351 do_bufctl = FALSE; /* no bufctls to walk */
2352 else if (c->cache_flags & KMF_AUDIT)
2353 do_bufctl = TRUE; /* we always want debugging info */
2354 else
2355 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2356
2357 if (do_bufctl) {
2358 walk = "bufctl";
2359 freewalk = "freectl";
2360 func = (mdb_walk_cb_t)whatis_walk_bufctl;
2361 } else {
2362 walk = "kmem";
2363 freewalk = "freemem";
2364 func = (mdb_walk_cb_t)whatis_walk_kmem;
2365 }
2366
2367 wi->wi_cache = c;
2368
2369 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2370 mdb_printf("Searching %s...\n", c->cache_name);
2371
2372 /*
2373 * If more then two buffers live on each slab, figure out if we're
2374 * interested in anything in any slab before doing the more expensive
2375 * kmem/freemem (bufctl/freectl) walkers.
2376 */
2377 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2378 if (!(c->cache_flags & KMF_HASH))
2379 wi->wi_slab_size -= sizeof (kmem_slab_t);
2380
2381 if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2382 wi->wi_slab_found = 0;
2383 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2384 addr) == -1) {
2385 mdb_warn("can't find kmem_slab walker");
2386 return (WALK_DONE);
2387 }
2388 if (wi->wi_slab_found == 0)
2389 return (WALK_NEXT);
2390 }
2391
2392 wi->wi_freemem = FALSE;
2393 if (mdb_pwalk(walk, func, wi, addr) == -1) {
2394 mdb_warn("can't find %s walker", walk);
2395 return (WALK_DONE);
2396 }
2397
2398 if (mdb_whatis_done(w))
2399 return (WALK_DONE);
2400
2401 /*
2402 * We have searched for allocated memory; now search for freed memory.
2403 */
2404 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2405 mdb_printf("Searching %s for free memory...\n", c->cache_name);
2406
2407 wi->wi_freemem = TRUE;
2408 if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2409 mdb_warn("can't find %s walker", freewalk);
2410 return (WALK_DONE);
2411 }
2412
2413 return (WHATIS_WALKRET(w));
2414 }
2415
2416 static int
whatis_walk_touch(uintptr_t addr,const kmem_cache_t * c,whatis_info_t * wi)2417 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2418 {
2419 if (c->cache_arena == wi->wi_msb_arena ||
2420 (c->cache_cflags & KMC_NOTOUCH))
2421 return (WALK_NEXT);
2422
2423 return (whatis_walk_cache(addr, c, wi));
2424 }
2425
2426 static int
whatis_walk_metadata(uintptr_t addr,const kmem_cache_t * c,whatis_info_t * wi)2427 whatis_walk_metadata(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2428 {
2429 if (c->cache_arena != wi->wi_msb_arena)
2430 return (WALK_NEXT);
2431
2432 return (whatis_walk_cache(addr, c, wi));
2433 }
2434
2435 static int
whatis_walk_notouch(uintptr_t addr,const kmem_cache_t * c,whatis_info_t * wi)2436 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2437 {
2438 if (c->cache_arena == wi->wi_msb_arena ||
2439 !(c->cache_cflags & KMC_NOTOUCH))
2440 return (WALK_NEXT);
2441
2442 return (whatis_walk_cache(addr, c, wi));
2443 }
2444
2445 static int
whatis_walk_thread(uintptr_t addr,const kthread_t * t,mdb_whatis_t * w)2446 whatis_walk_thread(uintptr_t addr, const kthread_t *t, mdb_whatis_t *w)
2447 {
2448 uintptr_t cur;
2449 uintptr_t saddr;
2450 size_t size;
2451
2452 /*
2453 * Often, one calls ::whatis on an address from a thread structure.
2454 * We use this opportunity to short circuit this case...
2455 */
2456 while (mdb_whatis_match(w, addr, sizeof (kthread_t), &cur))
2457 mdb_whatis_report_object(w, cur, addr,
2458 "allocated as a thread structure\n");
2459
2460 /*
2461 * Now check the stack
2462 */
2463 if (t->t_stkbase == NULL)
2464 return (WALK_NEXT);
2465
2466 /*
2467 * This assumes that t_stk is the end of the stack, but it's really
2468 * only the initial stack pointer for the thread. Arguments to the
2469 * initial procedure, SA(MINFRAME), etc. are all after t_stk. So
2470 * that 't->t_stk::whatis' reports "part of t's stack", we include
2471 * t_stk in the range (the "+ 1", below), but the kernel should
2472 * really include the full stack bounds where we can find it.
2473 */
2474 saddr = (uintptr_t)t->t_stkbase;
2475 size = (uintptr_t)t->t_stk - saddr + 1;
2476 while (mdb_whatis_match(w, saddr, size, &cur))
2477 mdb_whatis_report_object(w, cur, cur,
2478 "in thread %p's stack%s\n", addr, stack_active(t, cur));
2479
2480 return (WHATIS_WALKRET(w));
2481 }
2482
2483 static void
whatis_modctl_match(mdb_whatis_t * w,const char * name,uintptr_t base,size_t size,const char * where)2484 whatis_modctl_match(mdb_whatis_t *w, const char *name,
2485 uintptr_t base, size_t size, const char *where)
2486 {
2487 uintptr_t cur;
2488
2489 /*
2490 * Since we're searching for addresses inside a module, we report
2491 * them as symbols.
2492 */
2493 while (mdb_whatis_match(w, base, size, &cur))
2494 mdb_whatis_report_address(w, cur, "in %s's %s\n", name, where);
2495 }
2496
2497 struct kmem_ctf_module {
2498 Shdr *symhdr;
2499 char *symtbl;
2500 unsigned int nsyms;
2501 char *symspace;
2502 size_t symsize;
2503 char *text;
2504 char *data;
2505 uintptr_t bss;
2506 size_t text_size;
2507 size_t data_size;
2508 size_t bss_size;
2509 };
2510
2511 static int
whatis_walk_modctl(uintptr_t addr,const struct modctl * m,mdb_whatis_t * w)2512 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, mdb_whatis_t *w)
2513 {
2514 char name[MODMAXNAMELEN];
2515 struct kmem_ctf_module mod;
2516 Shdr shdr;
2517
2518 if (m->mod_mp == NULL)
2519 return (WALK_NEXT);
2520
2521 if (mdb_ctf_vread(&mod, "struct module", "struct kmem_ctf_module",
2522 (uintptr_t)m->mod_mp, 0) == -1) {
2523 mdb_warn("couldn't read modctl %p's module", addr);
2524 return (WALK_NEXT);
2525 }
2526
2527 if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2528 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2529
2530 whatis_modctl_match(w, name,
2531 (uintptr_t)mod.text, mod.text_size, "text segment");
2532 whatis_modctl_match(w, name,
2533 (uintptr_t)mod.data, mod.data_size, "data segment");
2534 whatis_modctl_match(w, name,
2535 (uintptr_t)mod.bss, mod.bss_size, "bss segment");
2536
2537 if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2538 mdb_warn("couldn't read symbol header for %p's module", addr);
2539 return (WALK_NEXT);
2540 }
2541
2542 whatis_modctl_match(w, name,
2543 (uintptr_t)mod.symtbl, mod.nsyms * shdr.sh_entsize, "symtab");
2544 whatis_modctl_match(w, name,
2545 (uintptr_t)mod.symspace, mod.symsize, "symtab");
2546
2547 return (WHATIS_WALKRET(w));
2548 }
2549
2550 /*ARGSUSED*/
2551 static int
whatis_walk_memseg(uintptr_t addr,const struct memseg * seg,mdb_whatis_t * w)2552 whatis_walk_memseg(uintptr_t addr, const struct memseg *seg, mdb_whatis_t *w)
2553 {
2554 uintptr_t cur;
2555
2556 uintptr_t base = (uintptr_t)seg->pages;
2557 size_t size = (uintptr_t)seg->epages - base;
2558
2559 while (mdb_whatis_match(w, base, size, &cur)) {
2560 /* round our found pointer down to the page_t base. */
2561 size_t offset = (cur - base) % sizeof (page_t);
2562
2563 mdb_whatis_report_object(w, cur, cur - offset,
2564 "allocated as a page structure\n");
2565 }
2566
2567 return (WHATIS_WALKRET(w));
2568 }
2569
2570 /*ARGSUSED*/
2571 static int
whatis_run_modules(mdb_whatis_t * w,void * arg)2572 whatis_run_modules(mdb_whatis_t *w, void *arg)
2573 {
2574 if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, w) == -1) {
2575 mdb_warn("couldn't find modctl walker");
2576 return (1);
2577 }
2578 return (0);
2579 }
2580
2581 /*ARGSUSED*/
2582 static int
whatis_run_threads(mdb_whatis_t * w,void * ignored)2583 whatis_run_threads(mdb_whatis_t *w, void *ignored)
2584 {
2585 /*
2586 * Now search all thread stacks. Yes, this is a little weak; we
2587 * can save a lot of work by first checking to see if the
2588 * address is in segkp vs. segkmem. But hey, computers are
2589 * fast.
2590 */
2591 if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, w) == -1) {
2592 mdb_warn("couldn't find thread walker");
2593 return (1);
2594 }
2595 return (0);
2596 }
2597
2598 /*ARGSUSED*/
2599 static int
whatis_run_pages(mdb_whatis_t * w,void * ignored)2600 whatis_run_pages(mdb_whatis_t *w, void *ignored)
2601 {
2602 if (mdb_walk("memseg", (mdb_walk_cb_t)whatis_walk_memseg, w) == -1) {
2603 mdb_warn("couldn't find memseg walker");
2604 return (1);
2605 }
2606 return (0);
2607 }
2608
2609 /*ARGSUSED*/
2610 static int
whatis_run_kmem(mdb_whatis_t * w,void * ignored)2611 whatis_run_kmem(mdb_whatis_t *w, void *ignored)
2612 {
2613 whatis_info_t wi;
2614
2615 bzero(&wi, sizeof (wi));
2616 wi.wi_w = w;
2617
2618 if (mdb_readvar(&wi.wi_msb_arena, "kmem_msb_arena") == -1)
2619 mdb_warn("unable to readvar \"kmem_msb_arena\"");
2620
2621 if (mdb_readvar(&wi.wi_kmem_lite_count,
2622 "kmem_lite_count") == -1 || wi.wi_kmem_lite_count > 16)
2623 wi.wi_kmem_lite_count = 0;
2624
2625 /*
2626 * We process kmem caches in the following order:
2627 *
2628 * non-KMC_NOTOUCH, non-metadata (typically the most interesting)
2629 * metadata (can be huge with KMF_AUDIT)
2630 * KMC_NOTOUCH, non-metadata (see kmem_walk_all())
2631 */
2632 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2633 &wi) == -1 ||
2634 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2635 &wi) == -1 ||
2636 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2637 &wi) == -1) {
2638 mdb_warn("couldn't find kmem_cache walker");
2639 return (1);
2640 }
2641 return (0);
2642 }
2643
2644 /*ARGSUSED*/
2645 static int
whatis_run_vmem(mdb_whatis_t * w,void * ignored)2646 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2647 {
2648 whatis_info_t wi;
2649
2650 bzero(&wi, sizeof (wi));
2651 wi.wi_w = w;
2652
2653 if (mdb_walk("vmem_postfix",
2654 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2655 mdb_warn("couldn't find vmem_postfix walker");
2656 return (1);
2657 }
2658 return (0);
2659 }
2660
2661 typedef struct kmem_log_cpu {
2662 uintptr_t kmc_low;
2663 uintptr_t kmc_high;
2664 } kmem_log_cpu_t;
2665
2666 typedef struct kmem_log_data {
2667 uintptr_t kmd_addr;
2668 kmem_log_cpu_t *kmd_cpu;
2669 } kmem_log_data_t;
2670
2671 int
kmem_log_walk(uintptr_t addr,const kmem_bufctl_audit_t * b,kmem_log_data_t * kmd)2672 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2673 kmem_log_data_t *kmd)
2674 {
2675 int i;
2676 kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2677 size_t bufsize;
2678
2679 for (i = 0; i < NCPU; i++) {
2680 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2681 break;
2682 }
2683
2684 if (kmd->kmd_addr) {
2685 if (b->bc_cache == NULL)
2686 return (WALK_NEXT);
2687
2688 if (mdb_vread(&bufsize, sizeof (bufsize),
2689 (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2690 mdb_warn(
2691 "failed to read cache_bufsize for cache at %p",
2692 b->bc_cache);
2693 return (WALK_ERR);
2694 }
2695
2696 if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2697 kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2698 return (WALK_NEXT);
2699 }
2700
2701 if (i == NCPU)
2702 mdb_printf(" ");
2703 else
2704 mdb_printf("%3d", i);
2705
2706 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2707 b->bc_timestamp, b->bc_thread);
2708
2709 return (WALK_NEXT);
2710 }
2711
2712 /*ARGSUSED*/
2713 int
kmem_log(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2714 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2715 {
2716 kmem_log_header_t lh;
2717 kmem_cpu_log_header_t clh;
2718 uintptr_t lhp, clhp;
2719 int ncpus;
2720 uintptr_t *cpu;
2721 GElf_Sym sym;
2722 kmem_log_cpu_t *kmc;
2723 int i;
2724 kmem_log_data_t kmd;
2725 uint_t opt_b = FALSE;
2726
2727 if (mdb_getopts(argc, argv,
2728 'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2729 return (DCMD_USAGE);
2730
2731 if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2732 mdb_warn("failed to read 'kmem_transaction_log'");
2733 return (DCMD_ERR);
2734 }
2735
2736 if (lhp == 0) {
2737 mdb_warn("no kmem transaction log\n");
2738 return (DCMD_ERR);
2739 }
2740
2741 mdb_readvar(&ncpus, "ncpus");
2742
2743 if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2744 mdb_warn("failed to read log header at %p", lhp);
2745 return (DCMD_ERR);
2746 }
2747
2748 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2749
2750 cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2751
2752 if (mdb_lookup_by_name("cpu", &sym) == -1) {
2753 mdb_warn("couldn't find 'cpu' array");
2754 return (DCMD_ERR);
2755 }
2756
2757 if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2758 mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2759 NCPU * sizeof (uintptr_t), sym.st_size);
2760 return (DCMD_ERR);
2761 }
2762
2763 if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2764 mdb_warn("failed to read cpu array at %p", sym.st_value);
2765 return (DCMD_ERR);
2766 }
2767
2768 kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2769 kmd.kmd_addr = 0;
2770 kmd.kmd_cpu = kmc;
2771
2772 for (i = 0; i < NCPU; i++) {
2773
2774 if (cpu[i] == 0)
2775 continue;
2776
2777 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2778 mdb_warn("cannot read cpu %d's log header at %p",
2779 i, clhp);
2780 return (DCMD_ERR);
2781 }
2782
2783 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2784 (uintptr_t)lh.lh_base;
2785 kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2786
2787 clhp += sizeof (kmem_cpu_log_header_t);
2788 }
2789
2790 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2791 "TIMESTAMP", "THREAD");
2792
2793 /*
2794 * If we have been passed an address, print out only log entries
2795 * corresponding to that address. If opt_b is specified, then interpret
2796 * the address as a bufctl.
2797 */
2798 if (flags & DCMD_ADDRSPEC) {
2799 kmem_bufctl_audit_t b;
2800
2801 if (opt_b) {
2802 kmd.kmd_addr = addr;
2803 } else {
2804 if (mdb_vread(&b,
2805 sizeof (kmem_bufctl_audit_t), addr) == -1) {
2806 mdb_warn("failed to read bufctl at %p", addr);
2807 return (DCMD_ERR);
2808 }
2809
2810 (void) kmem_log_walk(addr, &b, &kmd);
2811
2812 return (DCMD_OK);
2813 }
2814 }
2815
2816 if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2817 mdb_warn("can't find kmem log walker");
2818 return (DCMD_ERR);
2819 }
2820
2821 return (DCMD_OK);
2822 }
2823
2824 typedef struct bufctl_history_cb {
2825 int bhc_flags;
2826 int bhc_argc;
2827 const mdb_arg_t *bhc_argv;
2828 int bhc_ret;
2829 } bufctl_history_cb_t;
2830
2831 /*ARGSUSED*/
2832 static int
bufctl_history_callback(uintptr_t addr,const void * ign,void * arg)2833 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2834 {
2835 bufctl_history_cb_t *bhc = arg;
2836
2837 bhc->bhc_ret =
2838 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2839
2840 bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2841
2842 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2843 }
2844
2845 void
bufctl_help(void)2846 bufctl_help(void)
2847 {
2848 mdb_printf("%s",
2849 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2850 mdb_dec_indent(2);
2851 mdb_printf("%<b>OPTIONS%</b>\n");
2852 mdb_inc_indent(2);
2853 mdb_printf("%s",
2854 " -v Display the full content of the bufctl, including its stack trace\n"
2855 " -h retrieve the bufctl's transaction history, if available\n"
2856 " -a addr\n"
2857 " filter out bufctls not involving the buffer at addr\n"
2858 " -c caller\n"
2859 " filter out bufctls without the function/PC in their stack trace\n"
2860 " -e earliest\n"
2861 " filter out bufctls timestamped before earliest\n"
2862 " -l latest\n"
2863 " filter out bufctls timestamped after latest\n"
2864 " -t thread\n"
2865 " filter out bufctls not involving thread\n");
2866 }
2867
2868 int
bufctl(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2869 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2870 {
2871 kmem_bufctl_audit_t bc;
2872 uint_t verbose = FALSE;
2873 uint_t history = FALSE;
2874 uint_t in_history = FALSE;
2875 uintptr_t caller = 0, thread = 0;
2876 uintptr_t laddr, haddr, baddr = 0;
2877 hrtime_t earliest = 0, latest = 0;
2878 int i, depth;
2879 char c[MDB_SYM_NAMLEN];
2880 GElf_Sym sym;
2881
2882 if (mdb_getopts(argc, argv,
2883 'v', MDB_OPT_SETBITS, TRUE, &verbose,
2884 'h', MDB_OPT_SETBITS, TRUE, &history,
2885 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */
2886 'c', MDB_OPT_UINTPTR, &caller,
2887 't', MDB_OPT_UINTPTR, &thread,
2888 'e', MDB_OPT_UINT64, &earliest,
2889 'l', MDB_OPT_UINT64, &latest,
2890 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2891 return (DCMD_USAGE);
2892
2893 if (!(flags & DCMD_ADDRSPEC))
2894 return (DCMD_USAGE);
2895
2896 if (in_history && !history)
2897 return (DCMD_USAGE);
2898
2899 if (history && !in_history) {
2900 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2901 UM_SLEEP | UM_GC);
2902 bufctl_history_cb_t bhc;
2903
2904 nargv[0].a_type = MDB_TYPE_STRING;
2905 nargv[0].a_un.a_str = "-H"; /* prevent recursion */
2906
2907 for (i = 0; i < argc; i++)
2908 nargv[i + 1] = argv[i];
2909
2910 /*
2911 * When in history mode, we treat each element as if it
2912 * were in a seperate loop, so that the headers group
2913 * bufctls with similar histories.
2914 */
2915 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2916 bhc.bhc_argc = argc + 1;
2917 bhc.bhc_argv = nargv;
2918 bhc.bhc_ret = DCMD_OK;
2919
2920 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2921 addr) == -1) {
2922 mdb_warn("unable to walk bufctl_history");
2923 return (DCMD_ERR);
2924 }
2925
2926 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2927 mdb_printf("\n");
2928
2929 return (bhc.bhc_ret);
2930 }
2931
2932 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2933 if (verbose) {
2934 mdb_printf("%16s %16s %16s %16s\n"
2935 "%<u>%16s %16s %16s %16s%</u>\n",
2936 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2937 "", "CACHE", "LASTLOG", "CONTENTS");
2938 } else {
2939 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2940 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2941 }
2942 }
2943
2944 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2945 mdb_warn("couldn't read bufctl at %p", addr);
2946 return (DCMD_ERR);
2947 }
2948
2949 /*
2950 * Guard against bogus bc_depth in case the bufctl is corrupt or
2951 * the address does not really refer to a bufctl.
2952 */
2953 depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2954
2955 if (caller != 0) {
2956 laddr = caller;
2957 haddr = caller + sizeof (caller);
2958
2959 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2960 &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2961 /*
2962 * We were provided an exact symbol value; any
2963 * address in the function is valid.
2964 */
2965 laddr = (uintptr_t)sym.st_value;
2966 haddr = (uintptr_t)sym.st_value + sym.st_size;
2967 }
2968
2969 for (i = 0; i < depth; i++)
2970 if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2971 break;
2972
2973 if (i == depth)
2974 return (DCMD_OK);
2975 }
2976
2977 if (thread != 0 && (uintptr_t)bc.bc_thread != thread)
2978 return (DCMD_OK);
2979
2980 if (earliest != 0 && bc.bc_timestamp < earliest)
2981 return (DCMD_OK);
2982
2983 if (latest != 0 && bc.bc_timestamp > latest)
2984 return (DCMD_OK);
2985
2986 if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2987 return (DCMD_OK);
2988
2989 if (flags & DCMD_PIPE_OUT) {
2990 mdb_printf("%#lr\n", addr);
2991 return (DCMD_OK);
2992 }
2993
2994 if (verbose) {
2995 mdb_printf(
2996 "%<b>%16p%</b> %16p %16llx %16p\n"
2997 "%16s %16p %16p %16p\n",
2998 addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2999 "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
3000
3001 mdb_inc_indent(17);
3002 for (i = 0; i < depth; i++)
3003 mdb_printf("%a\n", bc.bc_stack[i]);
3004 mdb_dec_indent(17);
3005 mdb_printf("\n");
3006 } else {
3007 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
3008 bc.bc_timestamp, bc.bc_thread);
3009
3010 for (i = 0; i < depth; i++) {
3011 if (mdb_lookup_by_addr(bc.bc_stack[i],
3012 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
3013 continue;
3014 if (strncmp(c, "kmem_", 5) == 0)
3015 continue;
3016 mdb_printf(" %a\n", bc.bc_stack[i]);
3017 break;
3018 }
3019
3020 if (i >= depth)
3021 mdb_printf("\n");
3022 }
3023
3024 return (DCMD_OK);
3025 }
3026
3027 typedef struct kmem_verify {
3028 uint64_t *kmv_buf; /* buffer to read cache contents into */
3029 size_t kmv_size; /* number of bytes in kmv_buf */
3030 int kmv_corruption; /* > 0 if corruption found. */
3031 uint_t kmv_flags; /* dcmd flags */
3032 struct kmem_cache kmv_cache; /* the cache we're operating on */
3033 } kmem_verify_t;
3034
3035 /*
3036 * verify_pattern()
3037 * verify that buf is filled with the pattern pat.
3038 */
3039 static int64_t
verify_pattern(uint64_t * buf_arg,size_t size,uint64_t pat)3040 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
3041 {
3042 /*LINTED*/
3043 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
3044 uint64_t *buf;
3045
3046 for (buf = buf_arg; buf < bufend; buf++)
3047 if (*buf != pat)
3048 return ((uintptr_t)buf - (uintptr_t)buf_arg);
3049 return (-1);
3050 }
3051
3052 /*
3053 * verify_buftag()
3054 * verify that btp->bt_bxstat == (bcp ^ pat)
3055 */
3056 static int
verify_buftag(kmem_buftag_t * btp,uintptr_t pat)3057 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
3058 {
3059 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
3060 }
3061
3062 /*
3063 * verify_free()
3064 * verify the integrity of a free block of memory by checking
3065 * that it is filled with 0xdeadbeef and that its buftag is sane.
3066 */
3067 /*ARGSUSED1*/
3068 static int
verify_free(uintptr_t addr,const void * data,void * private)3069 verify_free(uintptr_t addr, const void *data, void *private)
3070 {
3071 kmem_verify_t *kmv = (kmem_verify_t *)private;
3072 uint64_t *buf = kmv->kmv_buf; /* buf to validate */
3073 int64_t corrupt; /* corruption offset */
3074 kmem_buftag_t *buftagp; /* ptr to buftag */
3075 kmem_cache_t *cp = &kmv->kmv_cache;
3076 boolean_t besilent = !!(kmv->kmv_flags & (DCMD_LOOP | DCMD_PIPE_OUT));
3077
3078 /*LINTED*/
3079 buftagp = KMEM_BUFTAG(cp, buf);
3080
3081 /*
3082 * Read the buffer to check.
3083 */
3084 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3085 if (!besilent)
3086 mdb_warn("couldn't read %p", addr);
3087 return (WALK_NEXT);
3088 }
3089
3090 if ((corrupt = verify_pattern(buf, cp->cache_verify,
3091 KMEM_FREE_PATTERN)) >= 0) {
3092 if (!besilent)
3093 mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3094 addr, (uintptr_t)addr + corrupt);
3095 goto corrupt;
3096 }
3097 /*
3098 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3099 * the first bytes of the buffer, hence we cannot check for red
3100 * zone corruption.
3101 */
3102 if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
3103 buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
3104 if (!besilent)
3105 mdb_printf("buffer %p (free) seems to "
3106 "have a corrupt redzone pattern\n", addr);
3107 goto corrupt;
3108 }
3109
3110 /*
3111 * confirm bufctl pointer integrity.
3112 */
3113 if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
3114 if (!besilent)
3115 mdb_printf("buffer %p (free) has a corrupt "
3116 "buftag\n", addr);
3117 goto corrupt;
3118 }
3119
3120 return (WALK_NEXT);
3121 corrupt:
3122 if (kmv->kmv_flags & DCMD_PIPE_OUT)
3123 mdb_printf("%p\n", addr);
3124 kmv->kmv_corruption++;
3125 return (WALK_NEXT);
3126 }
3127
3128 /*
3129 * verify_alloc()
3130 * Verify that the buftag of an allocated buffer makes sense with respect
3131 * to the buffer.
3132 */
3133 /*ARGSUSED1*/
3134 static int
verify_alloc(uintptr_t addr,const void * data,void * private)3135 verify_alloc(uintptr_t addr, const void *data, void *private)
3136 {
3137 kmem_verify_t *kmv = (kmem_verify_t *)private;
3138 kmem_cache_t *cp = &kmv->kmv_cache;
3139 uint64_t *buf = kmv->kmv_buf; /* buf to validate */
3140 /*LINTED*/
3141 kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
3142 uint32_t *ip = (uint32_t *)buftagp;
3143 uint8_t *bp = (uint8_t *)buf;
3144 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */
3145 boolean_t besilent = !!(kmv->kmv_flags & (DCMD_LOOP | DCMD_PIPE_OUT));
3146
3147 /*
3148 * Read the buffer to check.
3149 */
3150 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3151 if (!besilent)
3152 mdb_warn("couldn't read %p", addr);
3153 return (WALK_NEXT);
3154 }
3155
3156 /*
3157 * There are two cases to handle:
3158 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3159 * 0xfeedfacefeedface at the end of it
3160 * 2. If the buf was alloc'd using kmem_alloc, it will have
3161 * 0xbb just past the end of the region in use. At the buftag,
3162 * it will have 0xfeedface (or, if the whole buffer is in use,
3163 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3164 * endianness), followed by 32 bits containing the offset of the
3165 * 0xbb byte in the buffer.
3166 *
3167 * Finally, the two 32-bit words that comprise the second half of the
3168 * buftag should xor to KMEM_BUFTAG_ALLOC
3169 */
3170
3171 if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
3172 looks_ok = 1;
3173 else if (!KMEM_SIZE_VALID(ip[1]))
3174 size_ok = 0;
3175 else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
3176 looks_ok = 1;
3177 else
3178 size_ok = 0;
3179
3180 if (!size_ok) {
3181 if (!besilent)
3182 mdb_printf("buffer %p (allocated) has a corrupt "
3183 "redzone size encoding\n", addr);
3184 goto corrupt;
3185 }
3186
3187 if (!looks_ok) {
3188 if (!besilent)
3189 mdb_printf("buffer %p (allocated) has a corrupt "
3190 "redzone signature\n", addr);
3191 goto corrupt;
3192 }
3193
3194 if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
3195 if (!besilent)
3196 mdb_printf("buffer %p (allocated) has a "
3197 "corrupt buftag\n", addr);
3198 goto corrupt;
3199 }
3200
3201 return (WALK_NEXT);
3202 corrupt:
3203 if (kmv->kmv_flags & DCMD_PIPE_OUT)
3204 mdb_printf("%p\n", addr);
3205
3206 kmv->kmv_corruption++;
3207 return (WALK_NEXT);
3208 }
3209
3210 /*ARGSUSED2*/
3211 int
kmem_verify(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3212 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3213 {
3214 if (flags & DCMD_ADDRSPEC) {
3215 int check_alloc = 0, check_free = 0;
3216 kmem_verify_t kmv;
3217
3218 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
3219 addr) == -1) {
3220 mdb_warn("couldn't read kmem_cache %p", addr);
3221 return (DCMD_ERR);
3222 }
3223
3224 if ((kmv.kmv_cache.cache_dump.kd_unsafe ||
3225 kmv.kmv_cache.cache_dump.kd_alloc_fails) &&
3226 !(flags & (DCMD_LOOP | DCMD_PIPE_OUT))) {
3227 mdb_warn("WARNING: cache was used during dump: "
3228 "corruption may be incorrectly reported\n");
3229 }
3230
3231 kmv.kmv_size = kmv.kmv_cache.cache_buftag +
3232 sizeof (kmem_buftag_t);
3233 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
3234 kmv.kmv_corruption = 0;
3235 kmv.kmv_flags = flags;
3236
3237 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
3238 check_alloc = 1;
3239 if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
3240 check_free = 1;
3241 } else {
3242 if (!(flags & DCMD_LOOP)) {
3243 mdb_warn("cache %p (%s) does not have "
3244 "redzone checking enabled\n", addr,
3245 kmv.kmv_cache.cache_name);
3246 }
3247 return (DCMD_ERR);
3248 }
3249
3250 if (!(flags & (DCMD_LOOP | DCMD_PIPE_OUT))) {
3251 mdb_printf("Summary for cache '%s'\n",
3252 kmv.kmv_cache.cache_name);
3253 mdb_inc_indent(2);
3254 }
3255
3256 if (check_alloc)
3257 (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
3258 if (check_free)
3259 (void) mdb_pwalk("freemem", verify_free, &kmv, addr);
3260
3261 if (!(flags & DCMD_PIPE_OUT)) {
3262 if (flags & DCMD_LOOP) {
3263 if (kmv.kmv_corruption == 0) {
3264 mdb_printf("%-*s %?p clean\n",
3265 KMEM_CACHE_NAMELEN,
3266 kmv.kmv_cache.cache_name, addr);
3267 } else {
3268 mdb_printf("%-*s %?p %d corrupt "
3269 "buffer%s\n", KMEM_CACHE_NAMELEN,
3270 kmv.kmv_cache.cache_name, addr,
3271 kmv.kmv_corruption,
3272 kmv.kmv_corruption > 1 ? "s" : "");
3273 }
3274 } else {
3275 /*
3276 * This is the more verbose mode, when the user
3277 * typed addr::kmem_verify. If the cache was
3278 * clean, nothing will have yet been printed. So
3279 * say something.
3280 */
3281 if (kmv.kmv_corruption == 0)
3282 mdb_printf("clean\n");
3283
3284 mdb_dec_indent(2);
3285 }
3286 }
3287 } else {
3288 /*
3289 * If the user didn't specify a cache to verify, we'll walk all
3290 * kmem_cache's, specifying ourself as a callback for each...
3291 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3292 */
3293
3294 if (!(flags & DCMD_PIPE_OUT)) {
3295 uintptr_t dump_curr;
3296 uintptr_t dump_end;
3297
3298 if (mdb_readvar(&dump_curr, "kmem_dump_curr") != -1 &&
3299 mdb_readvar(&dump_end, "kmem_dump_end") != -1 &&
3300 dump_curr == dump_end) {
3301 mdb_warn("WARNING: exceeded kmem_dump_size; "
3302 "corruption may be incorrectly reported\n");
3303 }
3304
3305 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n",
3306 KMEM_CACHE_NAMELEN, "Cache Name", "Addr",
3307 "Cache Integrity");
3308 }
3309
3310 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
3311 }
3312
3313 return (DCMD_OK);
3314 }
3315
3316 typedef struct vmem_node {
3317 struct vmem_node *vn_next;
3318 struct vmem_node *vn_parent;
3319 struct vmem_node *vn_sibling;
3320 struct vmem_node *vn_children;
3321 uintptr_t vn_addr;
3322 int vn_marked;
3323 vmem_t vn_vmem;
3324 } vmem_node_t;
3325
3326 typedef struct vmem_walk {
3327 vmem_node_t *vw_root;
3328 vmem_node_t *vw_current;
3329 } vmem_walk_t;
3330
3331 int
vmem_walk_init(mdb_walk_state_t * wsp)3332 vmem_walk_init(mdb_walk_state_t *wsp)
3333 {
3334 uintptr_t vaddr, paddr;
3335 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
3336 vmem_walk_t *vw;
3337
3338 if (mdb_readvar(&vaddr, "vmem_list") == -1) {
3339 mdb_warn("couldn't read 'vmem_list'");
3340 return (WALK_ERR);
3341 }
3342
3343 while (vaddr != 0) {
3344 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
3345 vp->vn_addr = vaddr;
3346 vp->vn_next = head;
3347 head = vp;
3348
3349 if (vaddr == wsp->walk_addr)
3350 current = vp;
3351
3352 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
3353 mdb_warn("couldn't read vmem_t at %p", vaddr);
3354 goto err;
3355 }
3356
3357 vaddr = (uintptr_t)vp->vn_vmem.vm_next;
3358 }
3359
3360 for (vp = head; vp != NULL; vp = vp->vn_next) {
3361
3362 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == 0) {
3363 vp->vn_sibling = root;
3364 root = vp;
3365 continue;
3366 }
3367
3368 for (parent = head; parent != NULL; parent = parent->vn_next) {
3369 if (parent->vn_addr != paddr)
3370 continue;
3371 vp->vn_sibling = parent->vn_children;
3372 parent->vn_children = vp;
3373 vp->vn_parent = parent;
3374 break;
3375 }
3376
3377 if (parent == NULL) {
3378 mdb_warn("couldn't find %p's parent (%p)\n",
3379 vp->vn_addr, paddr);
3380 goto err;
3381 }
3382 }
3383
3384 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3385 vw->vw_root = root;
3386
3387 if (current != NULL)
3388 vw->vw_current = current;
3389 else
3390 vw->vw_current = root;
3391
3392 wsp->walk_data = vw;
3393 return (WALK_NEXT);
3394 err:
3395 for (vp = head; head != NULL; vp = head) {
3396 head = vp->vn_next;
3397 mdb_free(vp, sizeof (vmem_node_t));
3398 }
3399
3400 return (WALK_ERR);
3401 }
3402
3403 int
vmem_walk_step(mdb_walk_state_t * wsp)3404 vmem_walk_step(mdb_walk_state_t *wsp)
3405 {
3406 vmem_walk_t *vw = wsp->walk_data;
3407 vmem_node_t *vp;
3408 int rval;
3409
3410 if ((vp = vw->vw_current) == NULL)
3411 return (WALK_DONE);
3412
3413 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3414
3415 if (vp->vn_children != NULL) {
3416 vw->vw_current = vp->vn_children;
3417 return (rval);
3418 }
3419
3420 do {
3421 vw->vw_current = vp->vn_sibling;
3422 vp = vp->vn_parent;
3423 } while (vw->vw_current == NULL && vp != NULL);
3424
3425 return (rval);
3426 }
3427
3428 /*
3429 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3430 * children are visited before their parent. We perform the postfix walk
3431 * iteratively (rather than recursively) to allow mdb to regain control
3432 * after each callback.
3433 */
3434 int
vmem_postfix_walk_step(mdb_walk_state_t * wsp)3435 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3436 {
3437 vmem_walk_t *vw = wsp->walk_data;
3438 vmem_node_t *vp = vw->vw_current;
3439 int rval;
3440
3441 /*
3442 * If this node is marked, then we know that we have already visited
3443 * all of its children. If the node has any siblings, they need to
3444 * be visited next; otherwise, we need to visit the parent. Note
3445 * that vp->vn_marked will only be zero on the first invocation of
3446 * the step function.
3447 */
3448 if (vp->vn_marked) {
3449 if (vp->vn_sibling != NULL)
3450 vp = vp->vn_sibling;
3451 else if (vp->vn_parent != NULL)
3452 vp = vp->vn_parent;
3453 else {
3454 /*
3455 * We have neither a parent, nor a sibling, and we
3456 * have already been visited; we're done.
3457 */
3458 return (WALK_DONE);
3459 }
3460 }
3461
3462 /*
3463 * Before we visit this node, visit its children.
3464 */
3465 while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3466 vp = vp->vn_children;
3467
3468 vp->vn_marked = 1;
3469 vw->vw_current = vp;
3470 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3471
3472 return (rval);
3473 }
3474
3475 void
vmem_walk_fini(mdb_walk_state_t * wsp)3476 vmem_walk_fini(mdb_walk_state_t *wsp)
3477 {
3478 vmem_walk_t *vw = wsp->walk_data;
3479 vmem_node_t *root = vw->vw_root;
3480 int done;
3481
3482 if (root == NULL)
3483 return;
3484
3485 if ((vw->vw_root = root->vn_children) != NULL)
3486 vmem_walk_fini(wsp);
3487
3488 vw->vw_root = root->vn_sibling;
3489 done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3490 mdb_free(root, sizeof (vmem_node_t));
3491
3492 if (done) {
3493 mdb_free(vw, sizeof (vmem_walk_t));
3494 } else {
3495 vmem_walk_fini(wsp);
3496 }
3497 }
3498
3499 typedef struct vmem_seg_walk {
3500 uint8_t vsw_type;
3501 uintptr_t vsw_start;
3502 uintptr_t vsw_current;
3503 } vmem_seg_walk_t;
3504
3505 /*ARGSUSED*/
3506 int
vmem_seg_walk_common_init(mdb_walk_state_t * wsp,uint8_t type,char * name)3507 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3508 {
3509 vmem_seg_walk_t *vsw;
3510
3511 if (wsp->walk_addr == 0) {
3512 mdb_warn("vmem_%s does not support global walks\n", name);
3513 return (WALK_ERR);
3514 }
3515
3516 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3517
3518 vsw->vsw_type = type;
3519 vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3520 vsw->vsw_current = vsw->vsw_start;
3521
3522 return (WALK_NEXT);
3523 }
3524
3525 /*
3526 * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3527 */
3528 #define VMEM_NONE 0
3529
3530 int
vmem_alloc_walk_init(mdb_walk_state_t * wsp)3531 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3532 {
3533 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3534 }
3535
3536 int
vmem_free_walk_init(mdb_walk_state_t * wsp)3537 vmem_free_walk_init(mdb_walk_state_t *wsp)
3538 {
3539 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3540 }
3541
3542 int
vmem_span_walk_init(mdb_walk_state_t * wsp)3543 vmem_span_walk_init(mdb_walk_state_t *wsp)
3544 {
3545 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3546 }
3547
3548 int
vmem_seg_walk_init(mdb_walk_state_t * wsp)3549 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3550 {
3551 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3552 }
3553
3554 int
vmem_seg_walk_step(mdb_walk_state_t * wsp)3555 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3556 {
3557 vmem_seg_t seg;
3558 vmem_seg_walk_t *vsw = wsp->walk_data;
3559 uintptr_t addr = vsw->vsw_current;
3560 static size_t seg_size = 0;
3561 int rval;
3562
3563 if (!seg_size) {
3564 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3565 mdb_warn("failed to read 'vmem_seg_size'");
3566 seg_size = sizeof (vmem_seg_t);
3567 }
3568 }
3569
3570 if (seg_size < sizeof (seg))
3571 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3572
3573 if (mdb_vread(&seg, seg_size, addr) == -1) {
3574 mdb_warn("couldn't read vmem_seg at %p", addr);
3575 return (WALK_ERR);
3576 }
3577
3578 vsw->vsw_current = (uintptr_t)seg.vs_anext;
3579 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3580 rval = WALK_NEXT;
3581 } else {
3582 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3583 }
3584
3585 if (vsw->vsw_current == vsw->vsw_start)
3586 return (WALK_DONE);
3587
3588 return (rval);
3589 }
3590
3591 void
vmem_seg_walk_fini(mdb_walk_state_t * wsp)3592 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3593 {
3594 vmem_seg_walk_t *vsw = wsp->walk_data;
3595
3596 mdb_free(vsw, sizeof (vmem_seg_walk_t));
3597 }
3598
3599 #define VMEM_NAMEWIDTH 22
3600
3601 int
vmem(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3602 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3603 {
3604 vmem_t v, parent;
3605 vmem_kstat_t *vkp = &v.vm_kstat;
3606 uintptr_t paddr;
3607 int ident = 0;
3608 char c[VMEM_NAMEWIDTH];
3609
3610 if (!(flags & DCMD_ADDRSPEC)) {
3611 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3612 mdb_warn("can't walk vmem");
3613 return (DCMD_ERR);
3614 }
3615 return (DCMD_OK);
3616 }
3617
3618 if (DCMD_HDRSPEC(flags))
3619 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3620 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3621 "TOTAL", "SUCCEED", "FAIL");
3622
3623 if (mdb_vread(&v, sizeof (v), addr) == -1) {
3624 mdb_warn("couldn't read vmem at %p", addr);
3625 return (DCMD_ERR);
3626 }
3627
3628 for (paddr = (uintptr_t)v.vm_source; paddr != 0; ident += 2) {
3629 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3630 mdb_warn("couldn't trace %p's ancestry", addr);
3631 ident = 0;
3632 break;
3633 }
3634 paddr = (uintptr_t)parent.vm_source;
3635 }
3636
3637 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3638
3639 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3640 addr, VMEM_NAMEWIDTH, c,
3641 vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3642 vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3643
3644 return (DCMD_OK);
3645 }
3646
3647 void
vmem_seg_help(void)3648 vmem_seg_help(void)
3649 {
3650 mdb_printf("%s",
3651 "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3652 "\n"
3653 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3654 "representing a single chunk of data. Only ALLOC segments have debugging\n"
3655 "information.\n");
3656 mdb_dec_indent(2);
3657 mdb_printf("%<b>OPTIONS%</b>\n");
3658 mdb_inc_indent(2);
3659 mdb_printf("%s",
3660 " -v Display the full content of the vmem_seg, including its stack trace\n"
3661 " -s report the size of the segment, instead of the end address\n"
3662 " -c caller\n"
3663 " filter out segments without the function/PC in their stack trace\n"
3664 " -e earliest\n"
3665 " filter out segments timestamped before earliest\n"
3666 " -l latest\n"
3667 " filter out segments timestamped after latest\n"
3668 " -m minsize\n"
3669 " filer out segments smaller than minsize\n"
3670 " -M maxsize\n"
3671 " filer out segments larger than maxsize\n"
3672 " -t thread\n"
3673 " filter out segments not involving thread\n"
3674 " -T type\n"
3675 " filter out segments not of type 'type'\n"
3676 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3677 }
3678
3679 /*ARGSUSED*/
3680 int
vmem_seg(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3681 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3682 {
3683 vmem_seg_t vs;
3684 pc_t *stk = vs.vs_stack;
3685 uintptr_t sz;
3686 uint8_t t;
3687 const char *type = NULL;
3688 GElf_Sym sym;
3689 char c[MDB_SYM_NAMLEN];
3690 int no_debug;
3691 int i;
3692 int depth;
3693 uintptr_t laddr, haddr;
3694
3695 uintptr_t caller = 0, thread = 0;
3696 uintptr_t minsize = 0, maxsize = 0;
3697
3698 hrtime_t earliest = 0, latest = 0;
3699
3700 uint_t size = 0;
3701 uint_t verbose = 0;
3702
3703 if (!(flags & DCMD_ADDRSPEC))
3704 return (DCMD_USAGE);
3705
3706 if (mdb_getopts(argc, argv,
3707 'c', MDB_OPT_UINTPTR, &caller,
3708 'e', MDB_OPT_UINT64, &earliest,
3709 'l', MDB_OPT_UINT64, &latest,
3710 's', MDB_OPT_SETBITS, TRUE, &size,
3711 'm', MDB_OPT_UINTPTR, &minsize,
3712 'M', MDB_OPT_UINTPTR, &maxsize,
3713 't', MDB_OPT_UINTPTR, &thread,
3714 'T', MDB_OPT_STR, &type,
3715 'v', MDB_OPT_SETBITS, TRUE, &verbose,
3716 NULL) != argc)
3717 return (DCMD_USAGE);
3718
3719 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3720 if (verbose) {
3721 mdb_printf("%16s %4s %16s %16s %16s\n"
3722 "%<u>%16s %4s %16s %16s %16s%</u>\n",
3723 "ADDR", "TYPE", "START", "END", "SIZE",
3724 "", "", "THREAD", "TIMESTAMP", "");
3725 } else {
3726 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3727 "START", size? "SIZE" : "END", "WHO");
3728 }
3729 }
3730
3731 if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3732 mdb_warn("couldn't read vmem_seg at %p", addr);
3733 return (DCMD_ERR);
3734 }
3735
3736 if (type != NULL) {
3737 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3738 t = VMEM_ALLOC;
3739 else if (strcmp(type, "FREE") == 0)
3740 t = VMEM_FREE;
3741 else if (strcmp(type, "SPAN") == 0)
3742 t = VMEM_SPAN;
3743 else if (strcmp(type, "ROTR") == 0 ||
3744 strcmp(type, "ROTOR") == 0)
3745 t = VMEM_ROTOR;
3746 else if (strcmp(type, "WLKR") == 0 ||
3747 strcmp(type, "WALKER") == 0)
3748 t = VMEM_WALKER;
3749 else {
3750 mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3751 type);
3752 return (DCMD_ERR);
3753 }
3754
3755 if (vs.vs_type != t)
3756 return (DCMD_OK);
3757 }
3758
3759 sz = vs.vs_end - vs.vs_start;
3760
3761 if (minsize != 0 && sz < minsize)
3762 return (DCMD_OK);
3763
3764 if (maxsize != 0 && sz > maxsize)
3765 return (DCMD_OK);
3766
3767 t = vs.vs_type;
3768 depth = vs.vs_depth;
3769
3770 /*
3771 * debug info, when present, is only accurate for VMEM_ALLOC segments
3772 */
3773 no_debug = (t != VMEM_ALLOC) ||
3774 (depth == 0 || depth > VMEM_STACK_DEPTH);
3775
3776 if (no_debug) {
3777 if (caller != 0 || thread != 0 || earliest != 0 || latest != 0)
3778 return (DCMD_OK); /* not enough info */
3779 } else {
3780 if (caller != 0) {
3781 laddr = caller;
3782 haddr = caller + sizeof (caller);
3783
3784 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3785 sizeof (c), &sym) != -1 &&
3786 caller == (uintptr_t)sym.st_value) {
3787 /*
3788 * We were provided an exact symbol value; any
3789 * address in the function is valid.
3790 */
3791 laddr = (uintptr_t)sym.st_value;
3792 haddr = (uintptr_t)sym.st_value + sym.st_size;
3793 }
3794
3795 for (i = 0; i < depth; i++)
3796 if (vs.vs_stack[i] >= laddr &&
3797 vs.vs_stack[i] < haddr)
3798 break;
3799
3800 if (i == depth)
3801 return (DCMD_OK);
3802 }
3803
3804 if (thread != 0 && (uintptr_t)vs.vs_thread != thread)
3805 return (DCMD_OK);
3806
3807 if (earliest != 0 && vs.vs_timestamp < earliest)
3808 return (DCMD_OK);
3809
3810 if (latest != 0 && vs.vs_timestamp > latest)
3811 return (DCMD_OK);
3812 }
3813
3814 type = (t == VMEM_ALLOC ? "ALLC" :
3815 t == VMEM_FREE ? "FREE" :
3816 t == VMEM_SPAN ? "SPAN" :
3817 t == VMEM_ROTOR ? "ROTR" :
3818 t == VMEM_WALKER ? "WLKR" :
3819 "????");
3820
3821 if (flags & DCMD_PIPE_OUT) {
3822 mdb_printf("%#lr\n", addr);
3823 return (DCMD_OK);
3824 }
3825
3826 if (verbose) {
3827 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16ld\n",
3828 addr, type, vs.vs_start, vs.vs_end, sz);
3829
3830 if (no_debug)
3831 return (DCMD_OK);
3832
3833 mdb_printf("%16s %4s %16p %16llx\n",
3834 "", "", vs.vs_thread, vs.vs_timestamp);
3835
3836 mdb_inc_indent(17);
3837 for (i = 0; i < depth; i++) {
3838 mdb_printf("%a\n", stk[i]);
3839 }
3840 mdb_dec_indent(17);
3841 mdb_printf("\n");
3842 } else {
3843 mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3844 vs.vs_start, size? sz : vs.vs_end);
3845
3846 if (no_debug) {
3847 mdb_printf("\n");
3848 return (DCMD_OK);
3849 }
3850
3851 for (i = 0; i < depth; i++) {
3852 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3853 c, sizeof (c), &sym) == -1)
3854 continue;
3855 if (strncmp(c, "vmem_", 5) == 0)
3856 continue;
3857 break;
3858 }
3859 mdb_printf(" %a\n", stk[i]);
3860 }
3861 return (DCMD_OK);
3862 }
3863
3864 typedef struct kmalog_data {
3865 uintptr_t kma_addr;
3866 hrtime_t kma_newest;
3867 } kmalog_data_t;
3868
3869 /*ARGSUSED*/
3870 static int
showbc(uintptr_t addr,const kmem_bufctl_audit_t * bcp,kmalog_data_t * kma)3871 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3872 {
3873 char name[KMEM_CACHE_NAMELEN + 1];
3874 hrtime_t delta;
3875 int i, depth;
3876 size_t bufsize;
3877
3878 if (bcp->bc_timestamp == 0)
3879 return (WALK_DONE);
3880
3881 if (kma->kma_newest == 0)
3882 kma->kma_newest = bcp->bc_timestamp;
3883
3884 if (kma->kma_addr) {
3885 if (mdb_vread(&bufsize, sizeof (bufsize),
3886 (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3887 mdb_warn(
3888 "failed to read cache_bufsize for cache at %p",
3889 bcp->bc_cache);
3890 return (WALK_ERR);
3891 }
3892
3893 if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3894 kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3895 return (WALK_NEXT);
3896 }
3897
3898 delta = kma->kma_newest - bcp->bc_timestamp;
3899 depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3900
3901 if (mdb_readstr(name, sizeof (name), (uintptr_t)
3902 &bcp->bc_cache->cache_name) <= 0)
3903 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3904
3905 mdb_printf("\nT-%lld.%09lld addr=%p %s\n",
3906 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3907
3908 for (i = 0; i < depth; i++)
3909 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3910
3911 return (WALK_NEXT);
3912 }
3913
3914 int
kmalog(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3915 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3916 {
3917 const char *logname = "kmem_transaction_log";
3918 kmalog_data_t kma;
3919
3920 if (argc > 1)
3921 return (DCMD_USAGE);
3922
3923 kma.kma_newest = 0;
3924 if (flags & DCMD_ADDRSPEC)
3925 kma.kma_addr = addr;
3926 else
3927 kma.kma_addr = 0;
3928
3929 if (argc > 0) {
3930 if (argv->a_type != MDB_TYPE_STRING)
3931 return (DCMD_USAGE);
3932 if (strcmp(argv->a_un.a_str, "fail") == 0)
3933 logname = "kmem_failure_log";
3934 else if (strcmp(argv->a_un.a_str, "slab") == 0)
3935 logname = "kmem_slab_log";
3936 else if (strcmp(argv->a_un.a_str, "zerosized") == 0)
3937 logname = "kmem_zerosized_log";
3938 else
3939 return (DCMD_USAGE);
3940 }
3941
3942 if (mdb_readvar(&addr, logname) == -1) {
3943 mdb_warn("failed to read %s log header pointer");
3944 return (DCMD_ERR);
3945 }
3946
3947 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3948 mdb_warn("failed to walk kmem log");
3949 return (DCMD_ERR);
3950 }
3951
3952 return (DCMD_OK);
3953 }
3954
3955 /*
3956 * As the final lure for die-hard crash(8) users, we provide ::kmausers here.
3957 * The first piece is a structure which we use to accumulate kmem_cache_t
3958 * addresses of interest. The kmc_add is used as a callback for the kmem_cache
3959 * walker; we either add all caches, or ones named explicitly as arguments.
3960 */
3961
3962 typedef struct kmclist {
3963 const char *kmc_name; /* Name to match (or NULL) */
3964 uintptr_t *kmc_caches; /* List of kmem_cache_t addrs */
3965 int kmc_nelems; /* Num entries in kmc_caches */
3966 int kmc_size; /* Size of kmc_caches array */
3967 } kmclist_t;
3968
3969 static int
kmc_add(uintptr_t addr,const kmem_cache_t * cp,kmclist_t * kmc)3970 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3971 {
3972 void *p;
3973 int s;
3974
3975 if (kmc->kmc_name == NULL ||
3976 strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3977 /*
3978 * If we have a match, grow our array (if necessary), and then
3979 * add the virtual address of the matching cache to our list.
3980 */
3981 if (kmc->kmc_nelems >= kmc->kmc_size) {
3982 s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3983 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3984
3985 bcopy(kmc->kmc_caches, p,
3986 sizeof (uintptr_t) * kmc->kmc_size);
3987
3988 kmc->kmc_caches = p;
3989 kmc->kmc_size = s;
3990 }
3991
3992 kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3993 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3994 }
3995
3996 return (WALK_NEXT);
3997 }
3998
3999 /*
4000 * The second piece of ::kmausers is a hash table of allocations. Each
4001 * allocation owner is identified by its stack trace and data_size. We then
4002 * track the total bytes of all such allocations, and the number of allocations
4003 * to report at the end. Once we have a list of caches, we walk through the
4004 * allocated bufctls of each, and update our hash table accordingly.
4005 */
4006
4007 typedef struct kmowner {
4008 struct kmowner *kmo_head; /* First hash elt in bucket */
4009 struct kmowner *kmo_next; /* Next hash elt in chain */
4010 size_t kmo_signature; /* Hash table signature */
4011 uint_t kmo_num; /* Number of allocations */
4012 size_t kmo_data_size; /* Size of each allocation */
4013 size_t kmo_total_size; /* Total bytes of allocation */
4014 int kmo_depth; /* Depth of stack trace */
4015 uintptr_t kmo_stack[KMEM_STACK_DEPTH]; /* Stack trace */
4016 } kmowner_t;
4017
4018 typedef struct kmusers {
4019 uintptr_t kmu_addr; /* address of interest */
4020 const kmem_cache_t *kmu_cache; /* Current kmem cache */
4021 kmowner_t *kmu_hash; /* Hash table of owners */
4022 int kmu_nelems; /* Number of entries in use */
4023 int kmu_size; /* Total number of entries */
4024 } kmusers_t;
4025
4026 static void
kmu_add(kmusers_t * kmu,const kmem_bufctl_audit_t * bcp,size_t size,size_t data_size)4027 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
4028 size_t size, size_t data_size)
4029 {
4030 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4031 size_t bucket, signature = data_size;
4032 kmowner_t *kmo, *kmoend;
4033
4034 /*
4035 * If the hash table is full, double its size and rehash everything.
4036 */
4037 if (kmu->kmu_nelems >= kmu->kmu_size) {
4038 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
4039
4040 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
4041 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
4042 kmu->kmu_hash = kmo;
4043 kmu->kmu_size = s;
4044
4045 kmoend = kmu->kmu_hash + kmu->kmu_size;
4046 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
4047 kmo->kmo_head = NULL;
4048
4049 kmoend = kmu->kmu_hash + kmu->kmu_nelems;
4050 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
4051 bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
4052 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4053 kmu->kmu_hash[bucket].kmo_head = kmo;
4054 }
4055 }
4056
4057 /*
4058 * Finish computing the hash signature from the stack trace, and then
4059 * see if the owner is in the hash table. If so, update our stats.
4060 */
4061 for (i = 0; i < depth; i++)
4062 signature += bcp->bc_stack[i];
4063
4064 bucket = signature & (kmu->kmu_size - 1);
4065
4066 for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
4067 if (kmo->kmo_signature == signature) {
4068 size_t difference = 0;
4069
4070 difference |= kmo->kmo_data_size - data_size;
4071 difference |= kmo->kmo_depth - depth;
4072
4073 for (i = 0; i < depth; i++) {
4074 difference |= kmo->kmo_stack[i] -
4075 bcp->bc_stack[i];
4076 }
4077
4078 if (difference == 0) {
4079 kmo->kmo_total_size += size;
4080 kmo->kmo_num++;
4081 return;
4082 }
4083 }
4084 }
4085
4086 /*
4087 * If the owner is not yet hashed, grab the next element and fill it
4088 * in based on the allocation information.
4089 */
4090 kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
4091 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4092 kmu->kmu_hash[bucket].kmo_head = kmo;
4093
4094 kmo->kmo_signature = signature;
4095 kmo->kmo_num = 1;
4096 kmo->kmo_data_size = data_size;
4097 kmo->kmo_total_size = size;
4098 kmo->kmo_depth = depth;
4099
4100 for (i = 0; i < depth; i++)
4101 kmo->kmo_stack[i] = bcp->bc_stack[i];
4102 }
4103
4104 /*
4105 * When ::kmausers is invoked without the -f flag, we simply update our hash
4106 * table with the information from each allocated bufctl.
4107 */
4108 /*ARGSUSED*/
4109 static int
kmause1(uintptr_t addr,const kmem_bufctl_audit_t * bcp,kmusers_t * kmu)4110 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4111 {
4112 const kmem_cache_t *cp = kmu->kmu_cache;
4113
4114 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4115 return (WALK_NEXT);
4116 }
4117
4118 /*
4119 * When ::kmausers is invoked with the -f flag, we print out the information
4120 * for each bufctl as well as updating the hash table.
4121 */
4122 static int
kmause2(uintptr_t addr,const kmem_bufctl_audit_t * bcp,kmusers_t * kmu)4123 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4124 {
4125 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4126 const kmem_cache_t *cp = kmu->kmu_cache;
4127 kmem_bufctl_t bufctl;
4128
4129 if (kmu->kmu_addr) {
4130 if (mdb_vread(&bufctl, sizeof (bufctl), addr) == -1)
4131 mdb_warn("couldn't read bufctl at %p", addr);
4132 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
4133 kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
4134 cp->cache_bufsize)
4135 return (WALK_NEXT);
4136 }
4137
4138 mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4139 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
4140
4141 for (i = 0; i < depth; i++)
4142 mdb_printf("\t %a\n", bcp->bc_stack[i]);
4143
4144 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4145 return (WALK_NEXT);
4146 }
4147
4148 /*
4149 * We sort our results by allocation size before printing them.
4150 */
4151 static int
kmownercmp(const void * lp,const void * rp)4152 kmownercmp(const void *lp, const void *rp)
4153 {
4154 const kmowner_t *lhs = lp;
4155 const kmowner_t *rhs = rp;
4156
4157 return (rhs->kmo_total_size - lhs->kmo_total_size);
4158 }
4159
4160 /*
4161 * The main engine of ::kmausers is relatively straightforward: First we
4162 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4163 * iterate over the allocated bufctls of each cache in the list. Finally,
4164 * we sort and print our results.
4165 */
4166 /*ARGSUSED*/
4167 int
kmausers(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)4168 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4169 {
4170 int mem_threshold = 8192; /* Minimum # bytes for printing */
4171 int cnt_threshold = 100; /* Minimum # blocks for printing */
4172 int audited_caches = 0; /* Number of KMF_AUDIT caches found */
4173 int do_all_caches = 1; /* Do all caches (no arguments) */
4174 int opt_e = FALSE; /* Include "small" users */
4175 int opt_f = FALSE; /* Print stack traces */
4176
4177 mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
4178 kmowner_t *kmo, *kmoend;
4179 int i, oelems;
4180
4181 kmclist_t kmc;
4182 kmusers_t kmu;
4183
4184 bzero(&kmc, sizeof (kmc));
4185 bzero(&kmu, sizeof (kmu));
4186
4187 while ((i = mdb_getopts(argc, argv,
4188 'e', MDB_OPT_SETBITS, TRUE, &opt_e,
4189 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
4190
4191 argv += i; /* skip past options we just processed */
4192 argc -= i; /* adjust argc */
4193
4194 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
4195 return (DCMD_USAGE);
4196
4197 oelems = kmc.kmc_nelems;
4198 kmc.kmc_name = argv->a_un.a_str;
4199 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4200
4201 if (kmc.kmc_nelems == oelems) {
4202 mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
4203 return (DCMD_ERR);
4204 }
4205
4206 do_all_caches = 0;
4207 argv++;
4208 argc--;
4209 }
4210
4211 if (flags & DCMD_ADDRSPEC) {
4212 opt_f = TRUE;
4213 kmu.kmu_addr = addr;
4214 } else {
4215 kmu.kmu_addr = 0;
4216 }
4217
4218 if (opt_e)
4219 mem_threshold = cnt_threshold = 0;
4220
4221 if (opt_f)
4222 callback = (mdb_walk_cb_t)kmause2;
4223
4224 if (do_all_caches) {
4225 kmc.kmc_name = NULL; /* match all cache names */
4226 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4227 }
4228
4229 for (i = 0; i < kmc.kmc_nelems; i++) {
4230 uintptr_t cp = kmc.kmc_caches[i];
4231 kmem_cache_t c;
4232
4233 if (mdb_vread(&c, sizeof (c), cp) == -1) {
4234 mdb_warn("failed to read cache at %p", cp);
4235 continue;
4236 }
4237
4238 if (!(c.cache_flags & KMF_AUDIT)) {
4239 if (!do_all_caches) {
4240 mdb_warn("KMF_AUDIT is not enabled for %s\n",
4241 c.cache_name);
4242 }
4243 continue;
4244 }
4245
4246 kmu.kmu_cache = &c;
4247 (void) mdb_pwalk("bufctl", callback, &kmu, cp);
4248 audited_caches++;
4249 }
4250
4251 if (audited_caches == 0 && do_all_caches) {
4252 mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4253 return (DCMD_ERR);
4254 }
4255
4256 qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
4257 kmoend = kmu.kmu_hash + kmu.kmu_nelems;
4258
4259 for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
4260 if (kmo->kmo_total_size < mem_threshold &&
4261 kmo->kmo_num < cnt_threshold)
4262 continue;
4263 mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4264 kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
4265 for (i = 0; i < kmo->kmo_depth; i++)
4266 mdb_printf("\t %a\n", kmo->kmo_stack[i]);
4267 }
4268
4269 return (DCMD_OK);
4270 }
4271
4272 void
kmausers_help(void)4273 kmausers_help(void)
4274 {
4275 mdb_printf(
4276 "Displays the largest users of the kmem allocator, sorted by \n"
4277 "trace. If one or more caches is specified, only those caches\n"
4278 "will be searched. By default, all caches are searched. If an\n"
4279 "address is specified, then only those allocations which include\n"
4280 "the given address are displayed. Specifying an address implies\n"
4281 "-f.\n"
4282 "\n"
4283 "\t-e\tInclude all users, not just the largest\n"
4284 "\t-f\tDisplay individual allocations. By default, users are\n"
4285 "\t\tgrouped by stack\n");
4286 }
4287
4288 static int
kmem_ready_check(void)4289 kmem_ready_check(void)
4290 {
4291 int ready;
4292
4293 if (mdb_readvar(&ready, "kmem_ready") < 0)
4294 return (-1); /* errno is set for us */
4295
4296 return (ready);
4297 }
4298
4299 void
kmem_statechange(void)4300 kmem_statechange(void)
4301 {
4302 static int been_ready = 0;
4303
4304 if (been_ready)
4305 return;
4306
4307 if (kmem_ready_check() <= 0)
4308 return;
4309
4310 been_ready = 1;
4311 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
4312 }
4313
4314 void
kmem_init(void)4315 kmem_init(void)
4316 {
4317 mdb_walker_t w = {
4318 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
4319 list_walk_step, list_walk_fini
4320 };
4321
4322 /*
4323 * If kmem is ready, we'll need to invoke the kmem_cache walker
4324 * immediately. Walkers in the linkage structure won't be ready until
4325 * _mdb_init returns, so we'll need to add this one manually. If kmem
4326 * is ready, we'll use the walker to initialize the caches. If kmem
4327 * isn't ready, we'll register a callback that will allow us to defer
4328 * cache walking until it is.
4329 */
4330 if (mdb_add_walker(&w) != 0) {
4331 mdb_warn("failed to add kmem_cache walker");
4332 return;
4333 }
4334
4335 kmem_statechange();
4336
4337 /* register our ::whatis handlers */
4338 mdb_whatis_register("modules", whatis_run_modules, NULL,
4339 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4340 mdb_whatis_register("threads", whatis_run_threads, NULL,
4341 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4342 mdb_whatis_register("pages", whatis_run_pages, NULL,
4343 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4344 mdb_whatis_register("kmem", whatis_run_kmem, NULL,
4345 WHATIS_PRIO_ALLOCATOR, 0);
4346 mdb_whatis_register("vmem", whatis_run_vmem, NULL,
4347 WHATIS_PRIO_ALLOCATOR, 0);
4348 }
4349
4350 typedef struct whatthread {
4351 uintptr_t wt_target;
4352 int wt_verbose;
4353 } whatthread_t;
4354
4355 static int
whatthread_walk_thread(uintptr_t addr,const kthread_t * t,whatthread_t * w)4356 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
4357 {
4358 uintptr_t current, data;
4359
4360 if (t->t_stkbase == NULL)
4361 return (WALK_NEXT);
4362
4363 /*
4364 * Warn about swapped out threads, but drive on anyway
4365 */
4366 if (!(t->t_schedflag & TS_LOAD)) {
4367 mdb_warn("thread %p's stack swapped out\n", addr);
4368 return (WALK_NEXT);
4369 }
4370
4371 /*
4372 * Search the thread's stack for the given pointer. Note that it would
4373 * be more efficient to follow ::kgrep's lead and read in page-sized
4374 * chunks, but this routine is already fast and simple.
4375 */
4376 for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
4377 current += sizeof (uintptr_t)) {
4378 if (mdb_vread(&data, sizeof (data), current) == -1) {
4379 mdb_warn("couldn't read thread %p's stack at %p",
4380 addr, current);
4381 return (WALK_ERR);
4382 }
4383
4384 if (data == w->wt_target) {
4385 if (w->wt_verbose) {
4386 mdb_printf("%p in thread %p's stack%s\n",
4387 current, addr, stack_active(t, current));
4388 } else {
4389 mdb_printf("%#lr\n", addr);
4390 return (WALK_NEXT);
4391 }
4392 }
4393 }
4394
4395 return (WALK_NEXT);
4396 }
4397
4398 int
whatthread(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)4399 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4400 {
4401 whatthread_t w;
4402
4403 if (!(flags & DCMD_ADDRSPEC))
4404 return (DCMD_USAGE);
4405
4406 w.wt_verbose = FALSE;
4407 w.wt_target = addr;
4408
4409 if (mdb_getopts(argc, argv,
4410 'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
4411 return (DCMD_USAGE);
4412
4413 if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
4414 == -1) {
4415 mdb_warn("couldn't walk threads");
4416 return (DCMD_ERR);
4417 }
4418
4419 return (DCMD_OK);
4420 }
4421